() {
- @Override public int compare(Rectangle o1, Rectangle o2) {
- if (o1.equals(o2)) return 0;
- if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) {
- return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1
- ? - java.lang.Double.compare(o1.getX(), o2.getX())
- : java.lang.Double.compare(o1.getX(), o2.getX());
- } else {
- return java.lang.Float.compare(o1.getBottom(), o2.getBottom());
- }
- }
- };
-
- protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f;
+ protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f;
+ /**
+ * Ill-defined comparator, from when Rectangle was Comparable.
+ *
+ * see https://github.com/tabulapdf/tabula-java/issues/116
+ *
+ * @deprecated with no replacement
+ */
+ @Deprecated
+ public static final Comparator ILL_DEFINED_ORDER = new Comparator() {
+ @Override
+ public int compare(Rectangle o1, Rectangle o2) {
+ if (o1.equals(o2)) return 0;
+ if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) {
+ return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1
+ ? -java.lang.Double.compare(o1.getX(), o2.getX())
+ : java.lang.Double.compare(o1.getX(), o2.getX());
+ } else {
+ return java.lang.Float.compare(o1.getBottom(), o2.getBottom());
+ }
+ }
+ };
- public Rectangle() {
- super();
- }
+ public Rectangle() {
+ super();
+ }
- public Rectangle(float top, float left, float width, float height) {
- super();
- this.setRect(left, top, width, height);
- }
+ public Rectangle(float top, float left, float width, float height) {
+ super();
+ this.setRect(left, top, width, height);
+ }
- public int compareTo(Rectangle other) {
- return ILL_DEFINED_ORDER.compare(this, other);
- }
+ /**
+ * @param rectangles
+ * @return minimum bounding box that contains all the rectangles
+ */
+ public static Rectangle boundingBoxOf(List extends Rectangle> rectangles) {
+ float minx = java.lang.Float.MAX_VALUE;
+ float miny = java.lang.Float.MAX_VALUE;
+ float maxx = java.lang.Float.MIN_VALUE;
+ float maxy = java.lang.Float.MIN_VALUE;
- // I'm bad at Java and need this for fancy sorting in
- // technology.tabula.TextChunk.
- public int isLtrDominant() {
- return 0;
- }
+ for (Rectangle r : rectangles) {
+ minx = (float) Math.min(r.getMinX(), minx);
+ miny = (float) Math.min(r.getMinY(), miny);
+ maxx = (float) Math.max(r.getMaxX(), maxx);
+ maxy = (float) Math.max(r.getMaxY(), maxy);
+ }
+ return new Rectangle(miny, minx, maxx - minx, maxy - miny);
+ }
- public float getArea() {
- return this.width * this.height;
- }
+ public int compareTo(Rectangle other) {
+ return ILL_DEFINED_ORDER.compare(this, other);
+ }
- public float verticalOverlap(Rectangle other) {
- return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
- }
+ // I'm bad at Java and need this for fancy sorting in
+ // technology.tabula.TextChunk.
+ public int isLtrDominant() {
+ return 0;
+ }
- public boolean verticallyOverlaps(Rectangle other) {
- return verticalOverlap(other) > 0;
- }
+ public float getArea() {
+ return this.width * this.height;
+ }
- public float horizontalOverlap(Rectangle other) {
- return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
- }
+ public float verticalOverlap(Rectangle other) {
+ return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
+ }
- public boolean horizontallyOverlaps(Rectangle other) {
- return horizontalOverlap(other) > 0;
- }
+ public boolean verticallyOverlaps(Rectangle other) {
+ return verticalOverlap(other) > 0;
+ }
- public float verticalOverlapRatio(Rectangle other) {
- float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop());
+ public float horizontalOverlap(Rectangle other) {
+ return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
+ }
- if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom()
- && other.getBottom() <= this.getBottom()) {
- rv = (other.getBottom() - this.getTop()) / delta;
- } else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom()
- && this.getBottom() <= other.getBottom()) {
- rv = (this.getBottom() - other.getTop()) / delta;
- } else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom()
- && other.getBottom() <= this.getBottom()) {
- rv = (other.getBottom() - other.getTop()) / delta;
- } else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom()
- && this.getBottom() <= other.getBottom()) {
- rv = (this.getBottom() - this.getTop()) / delta;
- }
+ public boolean horizontallyOverlaps(Rectangle other) {
+ return horizontalOverlap(other) > 0;
+ }
- return rv;
+ public float verticalOverlapRatio(Rectangle other) {
+ float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop());
- }
+ if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom()
+ && other.getBottom() <= this.getBottom()) {
+ rv = (other.getBottom() - this.getTop()) / delta;
+ } else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom()
+ && this.getBottom() <= other.getBottom()) {
+ rv = (this.getBottom() - other.getTop()) / delta;
+ } else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom()
+ && other.getBottom() <= this.getBottom()) {
+ rv = (other.getBottom() - other.getTop()) / delta;
+ } else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom()
+ && this.getBottom() <= other.getBottom()) {
+ rv = (this.getBottom() - this.getTop()) / delta;
+ }
- public float overlapRatio(Rectangle other) {
- double intersectionWidth = Math.max(0,
- Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
- double intersectionHeight = Math.max(0,
- Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
- double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight);
- double unionArea = this.getArea() + other.getArea() - intersectionArea;
+ return rv;
- return (float) (intersectionArea / unionArea);
- }
+ }
- public Rectangle merge(Rectangle other) {
- this.setRect(this.createUnion(other));
- return this;
- }
+ public float overlapRatio(Rectangle other) {
+ double intersectionWidth = Math.max(0,
+ Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
+ double intersectionHeight = Math.max(0,
+ Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
+ double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight);
+ double unionArea = this.getArea() + other.getArea() - intersectionArea;
- public float getTop() {
- return (float) this.getMinY();
- }
+ return (float) (intersectionArea / unionArea);
+ }
- public void setTop(float top) {
- float deltaHeight = top - this.y;
- this.setRect(this.x, top, this.width, this.height - deltaHeight);
- }
+ public Rectangle merge(Rectangle other) {
+ this.setRect(this.createUnion(other));
+ return this;
+ }
- public float getRight() {
- return (float) this.getMaxX();
- }
+ public float getTop() {
+ return (float) this.getMinY();
+ }
- public void setRight(float right) {
- this.setRect(this.x, this.y, right - this.x, this.height);
- }
+ public void setTop(float top) {
+ float deltaHeight = top - this.y;
+ this.setRect(this.x, top, this.width, this.height - deltaHeight);
+ }
- public float getLeft() {
- return (float) this.getMinX();
- }
+ public float getRight() {
+ return (float) this.getMaxX();
+ }
- public void setLeft(float left) {
- float deltaWidth = left - this.x;
- this.setRect(left, this.y, this.width - deltaWidth, this.height);
- }
+ public void setRight(float right) {
+ this.setRect(this.x, this.y, right - this.x, this.height);
+ }
- public float getBottom() {
- return (float) this.getMaxY();
- }
+ public float getLeft() {
+ return (float) this.getMinX();
+ }
- public void setBottom(float bottom) {
- this.setRect(this.x, this.y, this.width, bottom - this.y);
- }
+ public void setLeft(float left) {
+ float deltaWidth = left - this.x;
+ this.setRect(left, this.y, this.width - deltaWidth, this.height);
+ }
- public Point2D[] getPoints() {
- return new Point2D[] { new Point2D.Float(this.getLeft(), this.getTop()),
- new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()),
- new Point2D.Float(this.getLeft(), this.getBottom()) };
- }
+ public float getBottom() {
+ return (float) this.getMaxY();
+ }
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- String s = super.toString();
- sb.append(s.substring(0, s.length() - 1));
- sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight()));
- return sb.toString();
- }
+ public void setBottom(float bottom) {
+ this.setRect(this.x, this.y, this.width, bottom - this.y);
+ }
- /**
- * @param rectangles
- * @return minimum bounding box that contains all the rectangles
- */
- public static Rectangle boundingBoxOf(List extends Rectangle> rectangles) {
- float minx = java.lang.Float.MAX_VALUE;
- float miny = java.lang.Float.MAX_VALUE;
- float maxx = java.lang.Float.MIN_VALUE;
- float maxy = java.lang.Float.MIN_VALUE;
+ public Point2D[] getPoints() {
+ return new Point2D[]{new Point2D.Float(this.getLeft(), this.getTop()),
+ new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()),
+ new Point2D.Float(this.getLeft(), this.getBottom())};
+ }
- for (Rectangle r : rectangles) {
- minx = (float) Math.min(r.getMinX(), minx);
- miny = (float) Math.min(r.getMinY(), miny);
- maxx = (float) Math.max(r.getMaxX(), maxx);
- maxy = (float) Math.max(r.getMaxY(), maxy);
- }
- return new Rectangle(miny, minx, maxx - minx, maxy - miny);
- }
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ String s = super.toString();
+ sb.append(s.substring(0, s.length() - 1));
+ sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight()));
+ return sb.toString();
+ }
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java
index 79f08ec4..404b66e9 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java
@@ -1,12 +1,11 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
-import java.util.ArrayList;
-import java.util.List;
-
+import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.index.strtree.STRtree;
-import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
+import java.util.ArrayList;
+import java.util.List;
@SuppressWarnings("all")
public class RectangleSpatialIndex {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java
index 98e3b300..e90c52b2 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java
@@ -1,20 +1,13 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
+import com.iqser.red.service.redaction.v1.server.tableextraction.utils.CohenSutherlandClipping;
+import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
+import lombok.extern.slf4j.Slf4j;
+
import java.awt.geom.Line2D;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Formatter;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import com.iqser.red.service.redaction.v1.server.tableextraction.utils.CohenSutherlandClipping;
-import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
-
-import lombok.extern.slf4j.Slf4j;
+import java.util.*;
@Slf4j
@@ -23,13 +16,127 @@ public class Ruling extends Line2D.Float {
private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2;
- private enum SOType {VERTICAL, HRIGHT, HLEFT}
-
-
public Ruling(Point2D p1, Point2D p2) {
super(p1, p2);
}
+ public static List cropRulingsToArea(List rulings, Rectangle2D area) {
+ ArrayList rv = new ArrayList<>();
+ for (Ruling r : rulings) {
+ if (r.intersects(area)) {
+ rv.add(r.intersect(area));
+ }
+ }
+ return rv;
+ }
+
+ // log(n) implementation of find_intersections
+ // based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf
+ public static Map findIntersections(List horizontals, List verticals) {
+
+ class SortObject {
+ protected SOType type;
+ protected float position;
+ protected Ruling ruling;
+
+ public SortObject(SOType type, float position, Ruling ruling) {
+ this.type = type;
+ this.position = position;
+ this.ruling = ruling;
+ }
+ }
+
+ List sos = new ArrayList<>();
+
+ TreeMap tree = new TreeMap<>(new Comparator() {
+ @Override
+ public int compare(Ruling o1, Ruling o2) {
+ return java.lang.Double.compare(o1.getTop(), o2.getTop());
+ }
+ });
+
+ TreeMap rv = new TreeMap<>(new Comparator() {
+ @Override
+ public int compare(Point2D o1, Point2D o2) {
+ if (o1.getY() > o2.getY()) {
+ return 1;
+ }
+ if (o1.getY() < o2.getY()) {
+ return -1;
+ }
+ if (o1.getX() > o2.getX()) {
+ return 1;
+ }
+ if (o1.getX() < o2.getX()) {
+ return -1;
+ }
+ return 0;
+ }
+ });
+
+ for (Ruling h : horizontals) {
+ sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
+ sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
+ }
+
+ for (Ruling v : verticals) {
+ sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v));
+ }
+
+ Collections.sort(sos, new Comparator() {
+ @Override
+ public int compare(SortObject a, SortObject b) {
+ int rv;
+ if (Utils.feq(a.position, b.position)) {
+ if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) {
+ rv = 1;
+ } else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) {
+ rv = -1;
+ } else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) {
+ rv = -1;
+ } else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) {
+ rv = 1;
+ } else {
+ rv = java.lang.Double.compare(a.position, b.position);
+ }
+ } else {
+ return java.lang.Double.compare(a.position, b.position);
+ }
+ return rv;
+ }
+ });
+
+ for (SortObject so : sos) {
+ switch (so.type) {
+ case VERTICAL:
+ for (Map.Entry h : tree.entrySet()) {
+ try {
+ Point2D i = h.getKey().intersectionPoint(so.ruling);
+ if (i == null) {
+ continue;
+ }
+ rv.put(i,
+ new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT),
+ so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)});
+ } catch (UnsupportedOperationException e) {
+ log.info("Some line are oblique, ignoring...");
+ continue;
+ }
+ }
+ break;
+ case HRIGHT:
+ tree.remove(so.ruling);
+ break;
+ case HLEFT:
+ tree.put(so.ruling, true);
+ break;
+ }
+ }
+
+ return rv;
+
+ }
+
public boolean vertical() {
return this.length() > 0 && Utils.feq(this.x1, this.x2); //diff < ORIENTATION_CHECK_THRESHOLD;
}
@@ -38,13 +145,13 @@ public class Ruling extends Line2D.Float {
return this.length() > 0 && Utils.feq(this.y1, this.y2); //diff < ORIENTATION_CHECK_THRESHOLD;
}
+ // attributes that make sense only for non-oblique lines
+ // these are used to have a single collapse method (in page, currently)
+
public boolean oblique() {
return !(this.vertical() || this.horizontal());
}
- // attributes that make sense only for non-oblique lines
- // these are used to have a single collapse method (in page, currently)
-
public float getPosition() {
if (this.oblique()) {
throw new UnsupportedOperationException();
@@ -52,7 +159,6 @@ public class Ruling extends Line2D.Float {
return this.vertical() ? this.getLeft() : this.getTop();
}
-
public float getStart() {
if (this.oblique()) {
throw new UnsupportedOperationException();
@@ -102,12 +208,10 @@ public class Ruling extends Line2D.Float {
}
}
-
public boolean perpendicularTo(Ruling other) {
return this.vertical() == other.horizontal();
}
-
public boolean nearlyIntersects(Ruling another, int colinearOrParallelExpandAmount) {
if (this.intersectsLine(another)) {
return true;
@@ -238,7 +342,6 @@ public class Ruling extends Line2D.Float {
return angle;
}
-
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@@ -248,122 +351,7 @@ public class Ruling extends Line2D.Float {
return rv;
}
- public static List cropRulingsToArea(List rulings, Rectangle2D area) {
- ArrayList rv = new ArrayList<>();
- for (Ruling r : rulings) {
- if (r.intersects(area)) {
- rv.add(r.intersect(area));
- }
- }
- return rv;
- }
-
- // log(n) implementation of find_intersections
- // based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf
- public static Map findIntersections(List horizontals, List verticals) {
-
- class SortObject {
- protected SOType type;
- protected float position;
- protected Ruling ruling;
-
- public SortObject(SOType type, float position, Ruling ruling) {
- this.type = type;
- this.position = position;
- this.ruling = ruling;
- }
- }
-
- List sos = new ArrayList<>();
-
- TreeMap tree = new TreeMap<>(new Comparator() {
- @Override
- public int compare(Ruling o1, Ruling o2) {
- return java.lang.Double.compare(o1.getTop(), o2.getTop());
- }
- });
-
- TreeMap rv = new TreeMap<>(new Comparator() {
- @Override
- public int compare(Point2D o1, Point2D o2) {
- if (o1.getY() > o2.getY()) {
- return 1;
- }
- if (o1.getY() < o2.getY()) {
- return -1;
- }
- if (o1.getX() > o2.getX()) {
- return 1;
- }
- if (o1.getX() < o2.getX()) {
- return -1;
- }
- return 0;
- }
- });
-
- for (Ruling h : horizontals) {
- sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
- sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
- }
-
- for (Ruling v : verticals) {
- sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v));
- }
-
- Collections.sort(sos, new Comparator() {
- @Override
- public int compare(SortObject a, SortObject b) {
- int rv;
- if (Utils.feq(a.position, b.position)) {
- if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) {
- rv = 1;
- } else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) {
- rv = -1;
- } else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) {
- rv = -1;
- } else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) {
- rv = 1;
- } else {
- rv = java.lang.Double.compare(a.position, b.position);
- }
- } else {
- return java.lang.Double.compare(a.position, b.position);
- }
- return rv;
- }
- });
-
- for (SortObject so : sos) {
- switch (so.type) {
- case VERTICAL:
- for (Map.Entry h : tree.entrySet()) {
- try {
- Point2D i = h.getKey().intersectionPoint(so.ruling);
- if (i == null) {
- continue;
- }
- rv.put(i,
- new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT),
- so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)});
- } catch(UnsupportedOperationException e){
- log.info("Some line are oblique, ignoring...");
- continue;
- }
- }
- break;
- case HRIGHT:
- tree.remove(so.ruling);
- break;
- case HLEFT:
- tree.put(so.ruling, true);
- break;
- }
- }
-
- return rv;
-
- }
+ private enum SOType {VERTICAL, HRIGHT, HLEFT}
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java
index 8f55b482..6abc086e 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java
@@ -1,22 +1,13 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import org.apache.commons.collections4.CollectionUtils;
-
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
-
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
+
+import java.util.*;
@Slf4j
public class Table extends AbstractTextContainer {
@@ -24,21 +15,14 @@ public class Table extends AbstractTextContainer {
private final TreeMap cells = new TreeMap<>();
private final RectangleSpatialIndex si = new RectangleSpatialIndex<>();
-
+ private final int rotation;
@Getter
@Setter
private String headline;
-
private int unrotatedRowCount;
-
private int unrotatedColCount;
-
private int rowCount = -1;
-
private int colCount = -1;
-
- private final int rotation;
-
private List> rows;
@@ -62,8 +46,8 @@ public class Table extends AbstractTextContainer {
// Ignore rows that does not contain any cells and values.
List> rowsToRemove = new ArrayList<>();
- for (List row: rows){
- if (row.size() == 1 && row.get(0).getTextBlocks().isEmpty()){
+ for (List row : rows) {
+ if (row.size() == 1 && row.get(0).getTextBlocks().isEmpty()) {
rowsToRemove.add(row);
}
}
@@ -110,7 +94,7 @@ public class Table extends AbstractTextContainer {
// we move from left to right and top to bottom
for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
List rowCells = rows.get(rowIndex);
- if(rowCells.size() == 1){
+ if (rowCells.size() == 1) {
continue;
}
@@ -275,7 +259,7 @@ public class Table extends AbstractTextContainer {
cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2),
Utils.round(arg1
- .getBottom(), 2))));
+ .getBottom(), 2))));
Iterator iter = cells.iterator();
Cell c = iter.next();
@@ -367,4 +351,4 @@ public class Table extends AbstractTextContainer {
return sb.toString();
}
-}
\ No newline at end of file
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java
index 82ca3bb7..6f6ea80a 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java
@@ -1,19 +1,13 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
-import java.awt.geom.Line2D;
-import java.awt.geom.Point2D;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.springframework.stereotype.Service;
-
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
+import org.springframework.stereotype.Service;
+
+import java.awt.geom.Line2D;
+import java.awt.geom.Point2D;
+import java.util.*;
@Service
public class RulingCleaningService {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java
index 3dddd34a..682eb03e 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java
@@ -1,31 +1,57 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
-import java.awt.geom.Point2D;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-import org.springframework.stereotype.Service;
-
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
+import com.iqser.red.service.redaction.v1.server.tableextraction.model.*;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
+import org.springframework.stereotype.Service;
+
+import java.awt.geom.Point2D;
+import java.util.*;
+import java.util.stream.Collectors;
@Service
public class TableExtractionService {
+ private static final Comparator X_FIRST_POINT_COMPARATOR = (arg0, arg1) -> {
+
+ int rv = 0;
+ float arg0X = Utils.round(arg0.getX(), 2);
+ float arg0Y = Utils.round(arg0.getY(), 2);
+ float arg1X = Utils.round(arg1.getX(), 2);
+ float arg1Y = Utils.round(arg1.getY(), 2);
+
+ if (arg0X > arg1X) {
+ rv = 1;
+ } else if (arg0X < arg1X) {
+ rv = -1;
+ } else if (arg0Y > arg1Y) {
+ rv = 1;
+ } else if (arg0Y < arg1Y) {
+ rv = -1;
+ }
+ return rv;
+ };
+ private static final Comparator POINT_COMPARATOR = (arg0, arg1) -> {
+
+ int rv = 0;
+ float arg0X = Utils.round(arg0.getX(), 2);
+ float arg0Y = Utils.round(arg0.getY(), 2);
+ float arg1X = Utils.round(arg1.getX(), 2);
+ float arg1Y = Utils.round(arg1.getY(), 2);
+
+ if (arg0Y > arg1Y) {
+ rv = 1;
+ } else if (arg0Y < arg1Y) {
+ rv = -1;
+ } else if (arg0X > arg1X) {
+ rv = 1;
+ } else if (arg0X < arg1X) {
+ rv = -1;
+ }
+ return rv;
+ };
+
public void extractTables(CleanRulings cleanRulings, Page page) {
List cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
@@ -80,7 +106,6 @@ public class TableExtractionService {
page.getTextBlocks().removeAll(toBeRemoved);
}
-
public List findCells(List horizontalRulingLines, List verticalRulingLines) {
List| cellsFound = new ArrayList<>();
@@ -133,7 +158,6 @@ public class TableExtractionService {
return cellsFound;
}
-
private List findSpreadsheetsFromCells(List extends Rectangle> cells) {
// via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon
List rectangles = new ArrayList<>();
@@ -233,47 +257,6 @@ public class TableExtractionService {
return rectangles;
}
-
- private static final Comparator X_FIRST_POINT_COMPARATOR = (arg0, arg1) -> {
-
- int rv = 0;
- float arg0X = Utils.round(arg0.getX(), 2);
- float arg0Y = Utils.round(arg0.getY(), 2);
- float arg1X = Utils.round(arg1.getX(), 2);
- float arg1Y = Utils.round(arg1.getY(), 2);
-
- if (arg0X > arg1X) {
- rv = 1;
- } else if (arg0X < arg1X) {
- rv = -1;
- } else if (arg0Y > arg1Y) {
- rv = 1;
- } else if (arg0Y < arg1Y) {
- rv = -1;
- }
- return rv;
- };
-
- private static final Comparator POINT_COMPARATOR = (arg0, arg1) -> {
-
- int rv = 0;
- float arg0X = Utils.round(arg0.getX(), 2);
- float arg0Y = Utils.round(arg0.getY(), 2);
- float arg1X = Utils.round(arg1.getX(), 2);
- float arg1Y = Utils.round(arg1.getY(), 2);
-
- if (arg0Y > arg1Y) {
- rv = 1;
- } else if (arg0Y < arg1Y) {
- rv = -1;
- } else if (arg0X > arg1X) {
- rv = 1;
- } else if (arg0X < arg1X) {
- rv = -1;
- }
- return rv;
- };
-
private enum Direction {
HORIZONTAL, VERTICAL
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java
index d1f9ab06..bd4b9d0c 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java
@@ -19,21 +19,24 @@ import java.awt.geom.Rectangle2D;
* clipping algorithm (line against clip rectangle).
*/
@SuppressWarnings("all")
-public final class CohenSutherlandClipping
-{
+public final class CohenSutherlandClipping {
+ private static final int INSIDE = 0;
+ private static final int LEFT = 1;
+ private static final int RIGHT = 2;
+ private static final int BOTTOM = 4;
+ private static final int TOP = 8;
private double xMin;
private double yMin;
private double xMax;
private double yMax;
-
/**
* Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0).
*/
public CohenSutherlandClipping() {
}
-
/**
* Creates a Cohen Sutherland clipper with the given clip rectangle.
+ *
* @param clip the clip rectangle to use
*/
public CohenSutherlandClipping(Rectangle2D clip) {
@@ -42,6 +45,7 @@ public final class CohenSutherlandClipping
/**
* Sets the clip rectangle.
+ *
* @param clip the clip rectangle
*/
public void setClip(Rectangle2D clip) {
@@ -51,19 +55,13 @@ public final class CohenSutherlandClipping
yMax = yMin + clip.getHeight();
}
- private static final int INSIDE = 0;
- private static final int LEFT = 1;
- private static final int RIGHT = 2;
- private static final int BOTTOM = 4;
- private static final int TOP = 8;
-
private final int regionCode(double x, double y) {
- int code = x < xMin
- ? LEFT
- : x > xMax
+ int code = x < xMin
+ ? LEFT
+ : x > xMax
? RIGHT
: INSIDE;
- if (y < yMin) code |= BOTTOM;
+ if (y < yMin) code |= BOTTOM;
else if (y > yMax) code |= TOP;
return code;
}
@@ -71,6 +69,7 @@ public final class CohenSutherlandClipping
/**
* Clips a given line against the clip rectangle.
* The modification (if needed) is done in place.
+ *
* @param line the line to clip
* @return true if line is clipped, false if line is
* totally outside the clip rect.
@@ -87,9 +86,9 @@ public final class CohenSutherlandClipping
boolean vertical = p1x == p2x;
- double slope = vertical
- ? 0d
- : (p2y-p1y)/(p2x-p1x);
+ double slope = vertical
+ ? 0d
+ : (p2y - p1y) / (p2x - p1x);
int c1 = regionCode(p1x, p1y);
int c2 = regionCode(p2x, p2y);
@@ -103,31 +102,27 @@ public final class CohenSutherlandClipping
if ((c & LEFT) != INSIDE) {
qx = xMin;
- qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y;
- }
- else if ((c & RIGHT) != INSIDE) {
+ qy = (Utils.feq(qx, p1x) ? 0 : qx - p1x) * slope + p1y;
+ } else if ((c & RIGHT) != INSIDE) {
qx = xMax;
- qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y;
- }
- else if ((c & BOTTOM) != INSIDE) {
+ qy = (Utils.feq(qx, p1x) ? 0 : qx - p1x) * slope + p1y;
+ } else if ((c & BOTTOM) != INSIDE) {
qy = yMin;
qx = vertical
- ? p1x
- : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x;
- }
- else if ((c & TOP) != INSIDE) {
+ ? p1x
+ : (Utils.feq(qy, p1y) ? 0 : qy - p1y) / slope + p1x;
+ } else if ((c & TOP) != INSIDE) {
qy = yMax;
qx = vertical
- ? p1x
- : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x;
+ ? p1x
+ : (Utils.feq(qy, p1y) ? 0 : qy - p1y) / slope + p1x;
}
if (c == c1) {
p1x = qx;
p1y = qy;
- c1 = regionCode(p1x, p1y);
- }
- else {
+ c1 = regionCode(p1x, p1y);
+ } else {
p2x = qx;
p2y = qy;
c2 = regionCode(p2x, p2y);
@@ -137,4 +132,4 @@ public final class CohenSutherlandClipping
return true;
}
}
-// end of file
\ No newline at end of file
+// end of file
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java
index 5b9c3b6c..909de599 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java
@@ -10,11 +10,6 @@ import java.util.List;
*/
public final class QuickSort {
- private QuickSort() {
-
- }
-
-
private static final Comparator extends Comparable> OBJCOMP = new Comparator() {
@Override
public int compare(Comparable object1, Comparable object2) {
@@ -24,6 +19,10 @@ public final class QuickSort {
};
+ private QuickSort() {
+
+ }
+
/**
* Sorts the given list using the given comparator.
*
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java
index 62f72434..2a95ec3b 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java
@@ -1,11 +1,11 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.utils;
+import lombok.extern.slf4j.Slf4j;
+
import java.math.BigDecimal;
import java.util.Comparator;
import java.util.List;
-import lombok.extern.slf4j.Slf4j;
-
@Slf4j
@SuppressWarnings("all")
public class Utils {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java
index 43e2cf13..06ccb399 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java
@@ -1,15 +1,5 @@
package com.iqser.red.service.redaction.v1.server.visualization.service;
-import java.awt.Color;
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDPageContentStream;
-import org.apache.pdfbox.pdmodel.font.PDType1Font;
-import org.springframework.stereotype.Service;
-
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
@@ -17,9 +7,17 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
-
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.springframework.stereotype.Service;
+
+import java.awt.Color;
+import java.io.IOException;
+import java.util.List;
@Slf4j
@Service
@@ -34,7 +32,7 @@ public class PdfVisualisationService {
PDPage pdPage = document.getPage(page - 1);
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
- for(Paragraph paragraph : classifiedDoc.getParagraphs()) {
+ for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
@@ -44,10 +42,10 @@ public class PdfVisualisationService {
continue;
}
if (textBlock instanceof TextBlock) {
- textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size());
+ textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
visualizeTextBlock((TextBlock) textBlock, contentStream);
} else if (textBlock instanceof Table) {
- textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size());
+ textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
visualizeTable((Table) textBlock, contentStream);
}
@@ -59,7 +57,6 @@ public class PdfVisualisationService {
}
-
public void visualizeClassifications(Document classifiedDoc, PDDocument document) throws IOException {
for (int page = 1; page <= document.getNumberOfPages(); page++) {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml
index 302d198a..e19b1e65 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml
@@ -1,4 +1,11 @@
server:
port: 8083
-configuration-service.url: "http://localhost:8081"
\ No newline at end of file
+configuration-service.url: "http://localhost:8081"
+
+
+storage:
+ bucket-name: 'redaction'
+ endpoint: 'http://localhost:9000'
+ key: minioadmin
+ secret: minioadmin
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml
index efb01d6f..15ff3651 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml
@@ -17,4 +17,11 @@ management:
prometheus.enabled: ${monitoring.enabled:false}
health.enabled: true
endpoints.web.exposure.include: prometheus, health
- metrics.export.prometheus.enabled: ${monitoring.enabled:false}
\ No newline at end of file
+ metrics.export.prometheus.enabled: ${monitoring.enabled:false}
+
+
+storage:
+ signer-type: 'AWSS3V4SignerType'
+ bucket-name: 'redaction'
+ region: 'us-east-1'
+ endpoint: 'https://s3.amazonaws.com'
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/InMemoryStorageService.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/InMemoryStorageService.java
new file mode 100644
index 00000000..e73acb8b
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/InMemoryStorageService.java
@@ -0,0 +1,34 @@
+package com.iqser.red.service.redaction.v1.server;
+
+import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
+import com.iqser.red.storage.commons.service.StorageService;
+import org.springframework.core.io.InputStreamResource;
+
+import java.io.ByteArrayInputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+public class InMemoryStorageService extends StorageService {
+
+ private Map dataMap = new HashMap<>();
+
+ public InMemoryStorageService() {
+ super(null, null);
+ }
+
+ @Override
+ public InputStreamResource getObject(String objectId) {
+
+ var res = dataMap.get(objectId);
+ if (res == null) {
+ throw new StorageObjectDoesNotExist(new RuntimeException());
+ }
+ return new InputStreamResource(new ByteArrayInputStream(res));
+
+ }
+
+ @Override
+ public void storeObject(String objectId, byte[] data) {
+ dataMap.put(objectId, data);
+ }
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
index 0c459750..75384b34 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
@@ -1,28 +1,20 @@
package com.iqser.red.service.redaction.v1.server;
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.mockito.Mockito.when;
-import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT;
-
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.URL;
-import java.nio.charset.StandardCharsets;
-import java.time.OffsetDateTime;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.UUID;
-import java.util.stream.Collectors;
-
+import com.amazonaws.services.s3.AmazonS3;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.iqser.red.service.configuration.v1.api.model.*;
+import com.iqser.red.service.file.management.v1.api.model.FileType;
+import com.iqser.red.service.redaction.v1.model.*;
+import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
+import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
+import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
+import com.iqser.red.service.redaction.v1.server.client.RulesClient;
+import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
+import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
+import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
+import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
+import com.iqser.red.storage.commons.service.StorageService;
+import lombok.SneakyThrows;
import org.apache.commons.io.IOUtils;
import org.junit.Before;
import org.junit.Test;
@@ -37,40 +29,20 @@ import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.TestConfiguration;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.iqser.red.service.configuration.v1.api.model.Colors;
-import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
-import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
-import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
-import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
-import com.iqser.red.service.configuration.v1.api.model.TypeResult;
-import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
-import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
-import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
-import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
-import com.iqser.red.service.redaction.v1.model.Comment;
-import com.iqser.red.service.redaction.v1.model.IdRemoval;
-import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
-import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
-import com.iqser.red.service.redaction.v1.model.ManualRedactions;
-import com.iqser.red.service.redaction.v1.model.Point;
-import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
-import com.iqser.red.service.redaction.v1.model.Rectangle;
-import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
-import com.iqser.red.service.redaction.v1.model.RedactionRequest;
-import com.iqser.red.service.redaction.v1.model.RedactionResult;
-import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
-import com.iqser.red.service.redaction.v1.model.SectionText;
-import com.iqser.red.service.redaction.v1.model.Status;
-import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
-import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
-import com.iqser.red.service.redaction.v1.server.client.RulesClient;
-import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
-import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
-import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
+import java.io.*;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.time.OffsetDateTime;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.Mockito.when;
+import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT;
@RunWith(SpringRunner.class)
@SpringBootTest(webEnvironment = RANDOM_PORT)
@@ -116,6 +88,15 @@ public class RedactionIntegrationTest {
@MockBean
private ImageClassificationClient imageClassificationClient;
+ @Autowired
+ private RedactionStorageService redactionStorageService;
+
+ @Autowired
+ private StorageService storageService;
+
+ @MockBean
+ private AmazonS3 amazonS3;
+
private final Map> dictionary = new HashMap<>();
private final Map typeColorMap = new HashMap<>();
private final Map hintTypeMap = new HashMap<>();
@@ -126,6 +107,8 @@ public class RedactionIntegrationTest {
private final Map reanlysisVersions = new HashMap<>();
private final static String TEST_RULESET_ID = "123";
+ private final static String TEST_PROJECT_ID = "123";
+ private final static String TEST_FILE_ID = "123";
@TestConfiguration
public static class RedactionIntegrationTestConfiguration {
@@ -146,6 +129,12 @@ public class RedactionIntegrationTest {
return kieServices.newKieContainer(kieModule.getReleaseId());
}
+ @Bean
+ @Primary
+ public StorageService inmemoryStorage() {
+ return new InMemoryStorageService();
+ }
+
}
@@ -464,15 +453,16 @@ public class RedactionIntegrationTest {
input.addAll(getPathsRecursively(file));
}
for (File path : input) {
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(new FileInputStream(path)))
- .build();
+
+ AnalyzeRequest request = prepareStorage(new FileInputStream((path)));
System.out.println("Redacting file : " + path.getName());
AnalyzeResult result = redactionController.analyze(request);
Map> duplicates = new HashMap<>();
- result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
+
+ var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
+
+ redactionLog.getRedactionLogEntry().forEach(entry -> {
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
});
@@ -484,13 +474,7 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
long rstart = System.currentTimeMillis();
- ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
- .redactionLog(result.getRedactionLog())
- .document(IOUtils.toByteArray(new FileInputStream(path)))
- .manualRedactions(null)
- .text(result.getText())
- .ruleSetId(TEST_RULESET_ID)
- .build());
+ redactionController.reanalyze(request);
long rend = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (rend - rstart));
@@ -528,15 +512,14 @@ public class RedactionIntegrationTest {
System.out.println("redactionTest");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
-
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeResult result = redactionController.analyze(request);
- result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
+ var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
+ var text = redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID);
+
+ redactionLog.getRedactionLogEntry().forEach(entry -> {
if (entry.isImage()) {
System.out.println("---->" + entry.getType());
}
@@ -547,13 +530,13 @@ public class RedactionIntegrationTest {
System.out.println("first analysis duration: " + (end - start));
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) {
- fileOutputStream.write(objectMapper.writeValueAsBytes(result.getText()));
+ fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID)));
}
int correctFound = 0;
loop:
- for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) {
- for (SectionText sectionText : result.getText().getSectionTexts()) {
+ for (RedactionLogEntry redactionLogEntry : redactionLog.getRedactionLogEntry()) {
+ for (SectionText sectionText : text.getSectionTexts()) {
if (redactionLogEntry.isImage()) {
correctFound++;
continue loop;
@@ -569,7 +552,7 @@ public class RedactionIntegrationTest {
}
}
}
- assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size());
+ assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size());
dictionary.get(AUTHOR).add("properties");
reanlysisVersions.put("properties", 1L);
@@ -585,20 +568,14 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE));
start = System.currentTimeMillis();
- ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
- .redactionLog(result.getRedactionLog())
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .text(result.getText())
- .ruleSetId(TEST_RULESET_ID)
- .build());
+ AnalyzeResult reanalyzeResult = redactionController.reanalyze(request);
end = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (end - start));
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .redactionLog(reanalyzeResult.getRedactionLog())
- .sectionGrid(result.getSectionGrid())
+ .projectId(TEST_PROJECT_ID)
+ .fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
@@ -613,19 +590,13 @@ public class RedactionIntegrationTest {
System.out.println("testTableRedaction");
long start = System.currentTimeMillis();
- ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
-
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
+ AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
AnalyzeResult result = redactionController.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .redactionLog(result.getRedactionLog())
- .sectionGrid(result.getSectionGrid())
+ .projectId(TEST_PROJECT_ID)
+ .fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
@@ -680,12 +651,9 @@ public class RedactionIntegrationTest {
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .manualRedactions(manualRedactions)
- .build();
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+ request.setManualRedactions(manualRedactions);
AnalyzeResult result = redactionController.analyze(request);
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
@@ -694,20 +662,15 @@ public class RedactionIntegrationTest {
.status(Status.APPROVED)
.build()));
- ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
- .redactionLog(result.getRedactionLog())
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .manualRedactions(manualRedactions)
- .text(result.getText())
- .ruleSetId(TEST_RULESET_ID)
- .build());
+ redactionController.reanalyze(request);
+
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .redactionLog(reanalyzeResult.getRedactionLog())
- .sectionGrid(result.getSectionGrid())
+ .projectId(TEST_PROJECT_ID)
+ .fileId(TEST_FILE_ID)
.build());
+
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
@@ -724,11 +687,16 @@ public class RedactionIntegrationTest {
System.out.println("classificationTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Trinexapac/93 Trinexapac-ethyl_RAR_03_Volume_3CA_B-1_2017-03-31.pdf");
- RedactionRequest request = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+
+ RedactionRequest redactionRequest = RedactionRequest.builder()
+ .projectId(request.getProjectId())
+ .fileId(request.getFileId())
+ .ruleSetId(request.getRuleSetId())
.build();
- RedactionResult result = redactionController.classify(request);
+ RedactionResult result = redactionController.classify(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Classified.pdf")) {
fileOutputStream.write(result.getDocument());
@@ -742,11 +710,15 @@ public class RedactionIntegrationTest {
System.out.println("sectionsTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " + "Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
- RedactionRequest request = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+
+ RedactionRequest redactionRequest = RedactionRequest.builder()
+ .projectId(request.getProjectId())
+ .fileId(request.getFileId())
+ .ruleSetId(request.getRuleSetId())
.build();
- RedactionResult result = redactionController.sections(request);
+ RedactionResult result = redactionController.sections(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Sections.pdf")) {
fileOutputStream.write(result.getDocument());
@@ -760,11 +732,15 @@ public class RedactionIntegrationTest {
System.out.println("htmlTablesTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
- RedactionRequest request = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+
+ RedactionRequest redactionRequest = RedactionRequest.builder()
+ .projectId(request.getProjectId())
+ .fileId(request.getFileId())
+ .ruleSetId(request.getRuleSetId())
.build();
- RedactionResult result = redactionController.htmlTables(request);
+ RedactionResult result = redactionController.htmlTables(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
fileOutputStream.write(result.getDocument());
@@ -778,11 +754,15 @@ public class RedactionIntegrationTest {
System.out.println("htmlTableRotationTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
- RedactionRequest request = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+
+ RedactionRequest redactionRequest = RedactionRequest.builder()
+ .projectId(request.getProjectId())
+ .fileId(request.getFileId())
+ .ruleSetId(request.getRuleSetId())
.build();
- RedactionResult result = redactionController.htmlTables(request);
+ RedactionResult result = redactionController.htmlTables(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
fileOutputStream.write(result.getDocument());
@@ -795,20 +775,45 @@ public class RedactionIntegrationTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Phantom Cells.pdf");
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeResult result = redactionController.analyze(request);
- result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
+ var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
+
+ redactionLog.getRedactionLogEntry().forEach(entry -> {
if (!entry.isHint()) {
assertThat(entry.getReason()).isEqualTo("Not redacted because row is not a vertebrate study");
}
});
}
+ @SneakyThrows
+ private AnalyzeRequest prepareStorage(String file) {
+ ClassPathResource pdfFileResource = new ClassPathResource(file);
+
+ return prepareStorage(pdfFileResource.getInputStream());
+ }
+
+
+ @SneakyThrows
+ private AnalyzeRequest prepareStorage(InputStream stream) {
+
+ AnalyzeRequest request = AnalyzeRequest.builder()
+ .ruleSetId(TEST_RULESET_ID)
+ .projectId(TEST_PROJECT_ID)
+ .fileId(TEST_FILE_ID)
+ .lastProcessed(OffsetDateTime.now())
+ .build();
+
+ var bytes = IOUtils.toByteArray(stream);
+
+ storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_PROJECT_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
+
+ return request;
+
+ }
+
@Test
public void sponsorCompanyTest() throws IOException {
@@ -816,17 +821,14 @@ public class RedactionIntegrationTest {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
+
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeResult result = redactionController.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .redactionLog(result.getRedactionLog())
- .sectionGrid(result.getSectionGrid())
+ .projectId(TEST_PROJECT_ID)
+ .fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
@@ -857,4 +859,4 @@ public class RedactionIntegrationTest {
}
}
-}
\ No newline at end of file
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
index b7efed93..95c8ebf8 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
@@ -1,12 +1,8 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
-import com.iqser.red.service.configuration.v1.api.model.Colors;
-import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
-import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
-import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
-import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
-import com.iqser.red.service.configuration.v1.api.model.TypeResult;
-import com.iqser.red.service.redaction.v1.model.RedactionRequest;
+import com.amazonaws.services.s3.AmazonS3;
+import com.iqser.red.service.configuration.v1.api.model.*;
+import com.iqser.red.service.redaction.v1.server.InMemoryStorageService;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
@@ -14,7 +10,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
-import org.apache.commons.io.IOUtils;
+import com.iqser.red.storage.commons.service.StorageService;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.Before;
import org.junit.Ignore;
@@ -30,6 +26,7 @@ import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.context.TestConfiguration;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
@@ -40,15 +37,8 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
@@ -80,6 +70,9 @@ public class EntityRedactionServiceTest {
@Autowired
private DroolsExecutionService droolsExecutionService;
+ @MockBean
+ private AmazonS3 amazonS3;
+
private final static String TEST_RULESET_ID = "123";
@TestConfiguration
@@ -101,6 +94,13 @@ public class EntityRedactionServiceTest {
return kieServices.newKieContainer(kieModule.getReleaseId());
}
+
+ @Bean
+ @Primary
+ public StorageService inmemoryStorage() {
+ return new InMemoryStorageService();
+ }
+
}
@@ -125,10 +125,6 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
- RedactionRequest redactionRequest = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
-
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
.build();
@@ -144,7 +140,7 @@ public class EntityRedactionServiceTest {
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
- try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
+ try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
@@ -158,10 +154,6 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/nested_redaction.pdf");
- RedactionRequest redactionRequest = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
-
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
.build();
@@ -176,7 +168,7 @@ public class EntityRedactionServiceTest {
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
- try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
+ try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
@@ -526,4 +518,4 @@ public class EntityRedactionServiceTest {
return dictionaryEntries;
}
-}
\ No newline at end of file
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java
index 4f58b26d..4d83412a 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java
@@ -1,16 +1,14 @@
package com.iqser.red.service.redaction.v1.server.segmentation;
-import static org.assertj.core.api.Assertions.assertThat;
-
-import java.io.ByteArrayOutputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.Collections;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import javax.imageio.ImageIO;
-
+import com.amazonaws.services.s3.AmazonS3;
+import com.iqser.red.service.redaction.v1.server.classification.model.Document;
+import com.iqser.red.service.redaction.v1.server.classification.model.Page;
+import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
+import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
+import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
+import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
+import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
+import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.Ignore;
import org.junit.Test;
@@ -22,15 +20,15 @@ import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
-import com.iqser.red.service.redaction.v1.server.classification.model.Document;
-import com.iqser.red.service.redaction.v1.server.classification.model.Page;
-import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
-import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
-import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
-import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
-import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
+import javax.imageio.ImageIO;
+import java.io.ByteArrayOutputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.assertj.core.api.Assertions.assertThat;
@SpringBootTest
@RunWith(SpringRunner.class)
@@ -51,6 +49,8 @@ public class PdfSegmentationServiceTest {
@MockBean
private KieContainer kieContainer;
+ @MockBean
+ private AmazonS3 amazonS3;
@Test
@Ignore
@@ -76,6 +76,29 @@ public class PdfSegmentationServiceTest {
}
+ @Test
+ public void testPDFSegmentationWithComplexTable() throws IOException {
+
+ ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
+
+ try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
+ Document document = pdfSegmentationService.parseDocument(pdDocument);
+ assertThat(document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())).isNotEmpty();
+ Table table = document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())
+ .get(0);
+ assertThat(table.getColCount()).isEqualTo(6);
+ assertThat(table.getRowCount()).isEqualTo(13);
+ assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13);
+ }
+ }
+
+
@Test
public void testTableExtraction() throws IOException {
| | | | | | | |