() {
- @Override public int compare(Rectangle o1, Rectangle o2) {
- if (o1.equals(o2)) return 0;
- if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) {
- return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1
- ? - java.lang.Double.compare(o1.getX(), o2.getX())
- : java.lang.Double.compare(o1.getX(), o2.getX());
- } else {
- return java.lang.Float.compare(o1.getBottom(), o2.getBottom());
- }
- }
- };
-
- protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f;
+ protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f;
+ /**
+ * Ill-defined comparator, from when Rectangle was Comparable.
+ *
+ * see https://github.com/tabulapdf/tabula-java/issues/116
+ *
+ * @deprecated with no replacement
+ */
+ @Deprecated
+ public static final Comparator ILL_DEFINED_ORDER = new Comparator() {
+ @Override
+ public int compare(Rectangle o1, Rectangle o2) {
+ if (o1.equals(o2)) return 0;
+ if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) {
+ return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1
+ ? -java.lang.Double.compare(o1.getX(), o2.getX())
+ : java.lang.Double.compare(o1.getX(), o2.getX());
+ } else {
+ return java.lang.Float.compare(o1.getBottom(), o2.getBottom());
+ }
+ }
+ };
- public Rectangle() {
- super();
- }
+ public Rectangle() {
+ super();
+ }
- public Rectangle(float top, float left, float width, float height) {
- super();
- this.setRect(left, top, width, height);
- }
+ public Rectangle(float top, float left, float width, float height) {
+ super();
+ this.setRect(left, top, width, height);
+ }
- public int compareTo(Rectangle other) {
- return ILL_DEFINED_ORDER.compare(this, other);
- }
+ /**
+ * @param rectangles
+ * @return minimum bounding box that contains all the rectangles
+ */
+ public static Rectangle boundingBoxOf(List extends Rectangle> rectangles) {
+ float minx = java.lang.Float.MAX_VALUE;
+ float miny = java.lang.Float.MAX_VALUE;
+ float maxx = java.lang.Float.MIN_VALUE;
+ float maxy = java.lang.Float.MIN_VALUE;
- // I'm bad at Java and need this for fancy sorting in
- // technology.tabula.TextChunk.
- public int isLtrDominant() {
- return 0;
- }
+ for (Rectangle r : rectangles) {
+ minx = (float) Math.min(r.getMinX(), minx);
+ miny = (float) Math.min(r.getMinY(), miny);
+ maxx = (float) Math.max(r.getMaxX(), maxx);
+ maxy = (float) Math.max(r.getMaxY(), maxy);
+ }
+ return new Rectangle(miny, minx, maxx - minx, maxy - miny);
+ }
- public float getArea() {
- return this.width * this.height;
- }
+ public int compareTo(Rectangle other) {
+ return ILL_DEFINED_ORDER.compare(this, other);
+ }
- public float verticalOverlap(Rectangle other) {
- return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
- }
+ // I'm bad at Java and need this for fancy sorting in
+ // technology.tabula.TextChunk.
+ public int isLtrDominant() {
+ return 0;
+ }
- public boolean verticallyOverlaps(Rectangle other) {
- return verticalOverlap(other) > 0;
- }
+ public float getArea() {
+ return this.width * this.height;
+ }
- public float horizontalOverlap(Rectangle other) {
- return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
- }
+ public float verticalOverlap(Rectangle other) {
+ return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
+ }
- public boolean horizontallyOverlaps(Rectangle other) {
- return horizontalOverlap(other) > 0;
- }
+ public boolean verticallyOverlaps(Rectangle other) {
+ return verticalOverlap(other) > 0;
+ }
- public float verticalOverlapRatio(Rectangle other) {
- float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop());
+ public float horizontalOverlap(Rectangle other) {
+ return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
+ }
- if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom()
- && other.getBottom() <= this.getBottom()) {
- rv = (other.getBottom() - this.getTop()) / delta;
- } else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom()
- && this.getBottom() <= other.getBottom()) {
- rv = (this.getBottom() - other.getTop()) / delta;
- } else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom()
- && other.getBottom() <= this.getBottom()) {
- rv = (other.getBottom() - other.getTop()) / delta;
- } else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom()
- && this.getBottom() <= other.getBottom()) {
- rv = (this.getBottom() - this.getTop()) / delta;
- }
+ public boolean horizontallyOverlaps(Rectangle other) {
+ return horizontalOverlap(other) > 0;
+ }
- return rv;
+ public float verticalOverlapRatio(Rectangle other) {
+ float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop());
- }
+ if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom()
+ && other.getBottom() <= this.getBottom()) {
+ rv = (other.getBottom() - this.getTop()) / delta;
+ } else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom()
+ && this.getBottom() <= other.getBottom()) {
+ rv = (this.getBottom() - other.getTop()) / delta;
+ } else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom()
+ && other.getBottom() <= this.getBottom()) {
+ rv = (other.getBottom() - other.getTop()) / delta;
+ } else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom()
+ && this.getBottom() <= other.getBottom()) {
+ rv = (this.getBottom() - this.getTop()) / delta;
+ }
- public float overlapRatio(Rectangle other) {
- double intersectionWidth = Math.max(0,
- Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
- double intersectionHeight = Math.max(0,
- Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
- double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight);
- double unionArea = this.getArea() + other.getArea() - intersectionArea;
+ return rv;
- return (float) (intersectionArea / unionArea);
- }
+ }
- public Rectangle merge(Rectangle other) {
- this.setRect(this.createUnion(other));
- return this;
- }
+ public float overlapRatio(Rectangle other) {
+ double intersectionWidth = Math.max(0,
+ Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
+ double intersectionHeight = Math.max(0,
+ Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
+ double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight);
+ double unionArea = this.getArea() + other.getArea() - intersectionArea;
- public float getTop() {
- return (float) this.getMinY();
- }
+ return (float) (intersectionArea / unionArea);
+ }
- public void setTop(float top) {
- float deltaHeight = top - this.y;
- this.setRect(this.x, top, this.width, this.height - deltaHeight);
- }
+ public Rectangle merge(Rectangle other) {
+ this.setRect(this.createUnion(other));
+ return this;
+ }
- public float getRight() {
- return (float) this.getMaxX();
- }
+ public float getTop() {
+ return (float) this.getMinY();
+ }
- public void setRight(float right) {
- this.setRect(this.x, this.y, right - this.x, this.height);
- }
+ public void setTop(float top) {
+ float deltaHeight = top - this.y;
+ this.setRect(this.x, top, this.width, this.height - deltaHeight);
+ }
- public float getLeft() {
- return (float) this.getMinX();
- }
+ public float getRight() {
+ return (float) this.getMaxX();
+ }
- public void setLeft(float left) {
- float deltaWidth = left - this.x;
- this.setRect(left, this.y, this.width - deltaWidth, this.height);
- }
+ public void setRight(float right) {
+ this.setRect(this.x, this.y, right - this.x, this.height);
+ }
- public float getBottom() {
- return (float) this.getMaxY();
- }
+ public float getLeft() {
+ return (float) this.getMinX();
+ }
- public void setBottom(float bottom) {
- this.setRect(this.x, this.y, this.width, bottom - this.y);
- }
+ public void setLeft(float left) {
+ float deltaWidth = left - this.x;
+ this.setRect(left, this.y, this.width - deltaWidth, this.height);
+ }
- public Point2D[] getPoints() {
- return new Point2D[] { new Point2D.Float(this.getLeft(), this.getTop()),
- new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()),
- new Point2D.Float(this.getLeft(), this.getBottom()) };
- }
+ public float getBottom() {
+ return (float) this.getMaxY();
+ }
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- String s = super.toString();
- sb.append(s.substring(0, s.length() - 1));
- sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight()));
- return sb.toString();
- }
+ public void setBottom(float bottom) {
+ this.setRect(this.x, this.y, this.width, bottom - this.y);
+ }
- /**
- * @param rectangles
- * @return minimum bounding box that contains all the rectangles
- */
- public static Rectangle boundingBoxOf(List extends Rectangle> rectangles) {
- float minx = java.lang.Float.MAX_VALUE;
- float miny = java.lang.Float.MAX_VALUE;
- float maxx = java.lang.Float.MIN_VALUE;
- float maxy = java.lang.Float.MIN_VALUE;
+ public Point2D[] getPoints() {
+ return new Point2D[]{new Point2D.Float(this.getLeft(), this.getTop()),
+ new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()),
+ new Point2D.Float(this.getLeft(), this.getBottom())};
+ }
- for (Rectangle r : rectangles) {
- minx = (float) Math.min(r.getMinX(), minx);
- miny = (float) Math.min(r.getMinY(), miny);
- maxx = (float) Math.max(r.getMaxX(), maxx);
- maxy = (float) Math.max(r.getMaxY(), maxy);
- }
- return new Rectangle(miny, minx, maxx - minx, maxy - miny);
- }
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ String s = super.toString();
+ sb.append(s.substring(0, s.length() - 1));
+ sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight()));
+ return sb.toString();
+ }
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java
index 79f08ec4..404b66e9 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/RectangleSpatialIndex.java
@@ -1,12 +1,11 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
-import java.util.ArrayList;
-import java.util.List;
-
+import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.index.strtree.STRtree;
-import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
+import java.util.ArrayList;
+import java.util.List;
@SuppressWarnings("all")
public class RectangleSpatialIndex {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java
index 98e3b300..e90c52b2 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Ruling.java
@@ -1,20 +1,13 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
+import com.iqser.red.service.redaction.v1.server.tableextraction.utils.CohenSutherlandClipping;
+import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
+import lombok.extern.slf4j.Slf4j;
+
import java.awt.geom.Line2D;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Formatter;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import com.iqser.red.service.redaction.v1.server.tableextraction.utils.CohenSutherlandClipping;
-import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
-
-import lombok.extern.slf4j.Slf4j;
+import java.util.*;
@Slf4j
@@ -23,13 +16,127 @@ public class Ruling extends Line2D.Float {
private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2;
- private enum SOType {VERTICAL, HRIGHT, HLEFT}
-
-
public Ruling(Point2D p1, Point2D p2) {
super(p1, p2);
}
+ public static List cropRulingsToArea(List rulings, Rectangle2D area) {
+ ArrayList rv = new ArrayList<>();
+ for (Ruling r : rulings) {
+ if (r.intersects(area)) {
+ rv.add(r.intersect(area));
+ }
+ }
+ return rv;
+ }
+
+ // log(n) implementation of find_intersections
+ // based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf
+ public static Map findIntersections(List horizontals, List verticals) {
+
+ class SortObject {
+ protected SOType type;
+ protected float position;
+ protected Ruling ruling;
+
+ public SortObject(SOType type, float position, Ruling ruling) {
+ this.type = type;
+ this.position = position;
+ this.ruling = ruling;
+ }
+ }
+
+ List sos = new ArrayList<>();
+
+ TreeMap tree = new TreeMap<>(new Comparator() {
+ @Override
+ public int compare(Ruling o1, Ruling o2) {
+ return java.lang.Double.compare(o1.getTop(), o2.getTop());
+ }
+ });
+
+ TreeMap rv = new TreeMap<>(new Comparator() {
+ @Override
+ public int compare(Point2D o1, Point2D o2) {
+ if (o1.getY() > o2.getY()) {
+ return 1;
+ }
+ if (o1.getY() < o2.getY()) {
+ return -1;
+ }
+ if (o1.getX() > o2.getX()) {
+ return 1;
+ }
+ if (o1.getX() < o2.getX()) {
+ return -1;
+ }
+ return 0;
+ }
+ });
+
+ for (Ruling h : horizontals) {
+ sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
+ sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
+ }
+
+ for (Ruling v : verticals) {
+ sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v));
+ }
+
+ Collections.sort(sos, new Comparator() {
+ @Override
+ public int compare(SortObject a, SortObject b) {
+ int rv;
+ if (Utils.feq(a.position, b.position)) {
+ if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) {
+ rv = 1;
+ } else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) {
+ rv = -1;
+ } else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) {
+ rv = -1;
+ } else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) {
+ rv = 1;
+ } else {
+ rv = java.lang.Double.compare(a.position, b.position);
+ }
+ } else {
+ return java.lang.Double.compare(a.position, b.position);
+ }
+ return rv;
+ }
+ });
+
+ for (SortObject so : sos) {
+ switch (so.type) {
+ case VERTICAL:
+ for (Map.Entry h : tree.entrySet()) {
+ try {
+ Point2D i = h.getKey().intersectionPoint(so.ruling);
+ if (i == null) {
+ continue;
+ }
+ rv.put(i,
+ new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT),
+ so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)});
+ } catch (UnsupportedOperationException e) {
+ log.info("Some line are oblique, ignoring...");
+ continue;
+ }
+ }
+ break;
+ case HRIGHT:
+ tree.remove(so.ruling);
+ break;
+ case HLEFT:
+ tree.put(so.ruling, true);
+ break;
+ }
+ }
+
+ return rv;
+
+ }
+
public boolean vertical() {
return this.length() > 0 && Utils.feq(this.x1, this.x2); //diff < ORIENTATION_CHECK_THRESHOLD;
}
@@ -38,13 +145,13 @@ public class Ruling extends Line2D.Float {
return this.length() > 0 && Utils.feq(this.y1, this.y2); //diff < ORIENTATION_CHECK_THRESHOLD;
}
+ // attributes that make sense only for non-oblique lines
+ // these are used to have a single collapse method (in page, currently)
+
public boolean oblique() {
return !(this.vertical() || this.horizontal());
}
- // attributes that make sense only for non-oblique lines
- // these are used to have a single collapse method (in page, currently)
-
public float getPosition() {
if (this.oblique()) {
throw new UnsupportedOperationException();
@@ -52,7 +159,6 @@ public class Ruling extends Line2D.Float {
return this.vertical() ? this.getLeft() : this.getTop();
}
-
public float getStart() {
if (this.oblique()) {
throw new UnsupportedOperationException();
@@ -102,12 +208,10 @@ public class Ruling extends Line2D.Float {
}
}
-
public boolean perpendicularTo(Ruling other) {
return this.vertical() == other.horizontal();
}
-
public boolean nearlyIntersects(Ruling another, int colinearOrParallelExpandAmount) {
if (this.intersectsLine(another)) {
return true;
@@ -238,7 +342,6 @@ public class Ruling extends Line2D.Float {
return angle;
}
-
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@@ -248,122 +351,7 @@ public class Ruling extends Line2D.Float {
return rv;
}
- public static List cropRulingsToArea(List rulings, Rectangle2D area) {
- ArrayList rv = new ArrayList<>();
- for (Ruling r : rulings) {
- if (r.intersects(area)) {
- rv.add(r.intersect(area));
- }
- }
- return rv;
- }
-
- // log(n) implementation of find_intersections
- // based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf
- public static Map findIntersections(List horizontals, List verticals) {
-
- class SortObject {
- protected SOType type;
- protected float position;
- protected Ruling ruling;
-
- public SortObject(SOType type, float position, Ruling ruling) {
- this.type = type;
- this.position = position;
- this.ruling = ruling;
- }
- }
-
- List sos = new ArrayList<>();
-
- TreeMap tree = new TreeMap<>(new Comparator() {
- @Override
- public int compare(Ruling o1, Ruling o2) {
- return java.lang.Double.compare(o1.getTop(), o2.getTop());
- }
- });
-
- TreeMap rv = new TreeMap<>(new Comparator() {
- @Override
- public int compare(Point2D o1, Point2D o2) {
- if (o1.getY() > o2.getY()) {
- return 1;
- }
- if (o1.getY() < o2.getY()) {
- return -1;
- }
- if (o1.getX() > o2.getX()) {
- return 1;
- }
- if (o1.getX() < o2.getX()) {
- return -1;
- }
- return 0;
- }
- });
-
- for (Ruling h : horizontals) {
- sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
- sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
- }
-
- for (Ruling v : verticals) {
- sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v));
- }
-
- Collections.sort(sos, new Comparator() {
- @Override
- public int compare(SortObject a, SortObject b) {
- int rv;
- if (Utils.feq(a.position, b.position)) {
- if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) {
- rv = 1;
- } else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) {
- rv = -1;
- } else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) {
- rv = -1;
- } else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) {
- rv = 1;
- } else {
- rv = java.lang.Double.compare(a.position, b.position);
- }
- } else {
- return java.lang.Double.compare(a.position, b.position);
- }
- return rv;
- }
- });
-
- for (SortObject so : sos) {
- switch (so.type) {
- case VERTICAL:
- for (Map.Entry h : tree.entrySet()) {
- try {
- Point2D i = h.getKey().intersectionPoint(so.ruling);
- if (i == null) {
- continue;
- }
- rv.put(i,
- new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT),
- so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)});
- } catch(UnsupportedOperationException e){
- log.info("Some line are oblique, ignoring...");
- continue;
- }
- }
- break;
- case HRIGHT:
- tree.remove(so.ruling);
- break;
- case HLEFT:
- tree.put(so.ruling, true);
- break;
- }
- }
-
- return rv;
-
- }
+ private enum SOType {VERTICAL, HRIGHT, HLEFT}
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java
index 8f55b482..6abc086e 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Table.java
@@ -1,22 +1,13 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import org.apache.commons.collections4.CollectionUtils;
-
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
-
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
+
+import java.util.*;
@Slf4j
public class Table extends AbstractTextContainer {
@@ -24,21 +15,14 @@ public class Table extends AbstractTextContainer {
private final TreeMap cells = new TreeMap<>();
private final RectangleSpatialIndex si = new RectangleSpatialIndex<>();
-
+ private final int rotation;
@Getter
@Setter
private String headline;
-
private int unrotatedRowCount;
-
private int unrotatedColCount;
-
private int rowCount = -1;
-
private int colCount = -1;
-
- private final int rotation;
-
private List> rows;
@@ -62,8 +46,8 @@ public class Table extends AbstractTextContainer {
// Ignore rows that does not contain any cells and values.
List> rowsToRemove = new ArrayList<>();
- for (List row: rows){
- if (row.size() == 1 && row.get(0).getTextBlocks().isEmpty()){
+ for (List row : rows) {
+ if (row.size() == 1 && row.get(0).getTextBlocks().isEmpty()) {
rowsToRemove.add(row);
}
}
@@ -110,7 +94,7 @@ public class Table extends AbstractTextContainer {
// we move from left to right and top to bottom
for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
List rowCells = rows.get(rowIndex);
- if(rowCells.size() == 1){
+ if (rowCells.size() == 1) {
continue;
}
@@ -275,7 +259,7 @@ public class Table extends AbstractTextContainer {
cells.sort(Collections.reverseOrder((arg0, arg1) -> Float.compare(Utils.round(arg0.getBottom(), 2),
Utils.round(arg1
- .getBottom(), 2))));
+ .getBottom(), 2))));
Iterator iter = cells.iterator();
Cell c = iter.next();
@@ -367,4 +351,4 @@ public class Table extends AbstractTextContainer {
return sb.toString();
}
-}
\ No newline at end of file
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java
index 82ca3bb7..6f6ea80a 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/RulingCleaningService.java
@@ -1,19 +1,13 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
-import java.awt.geom.Line2D;
-import java.awt.geom.Point2D;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.springframework.stereotype.Service;
-
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
+import org.springframework.stereotype.Service;
+
+import java.awt.geom.Line2D;
+import java.awt.geom.Point2D;
+import java.util.*;
@Service
public class RulingCleaningService {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java
index 3dddd34a..682eb03e 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/service/TableExtractionService.java
@@ -1,31 +1,57 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
-import java.awt.geom.Point2D;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-import org.springframework.stereotype.Service;
-
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
+import com.iqser.red.service.redaction.v1.server.tableextraction.model.*;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
+import org.springframework.stereotype.Service;
+
+import java.awt.geom.Point2D;
+import java.util.*;
+import java.util.stream.Collectors;
@Service
public class TableExtractionService {
+ private static final Comparator X_FIRST_POINT_COMPARATOR = (arg0, arg1) -> {
+
+ int rv = 0;
+ float arg0X = Utils.round(arg0.getX(), 2);
+ float arg0Y = Utils.round(arg0.getY(), 2);
+ float arg1X = Utils.round(arg1.getX(), 2);
+ float arg1Y = Utils.round(arg1.getY(), 2);
+
+ if (arg0X > arg1X) {
+ rv = 1;
+ } else if (arg0X < arg1X) {
+ rv = -1;
+ } else if (arg0Y > arg1Y) {
+ rv = 1;
+ } else if (arg0Y < arg1Y) {
+ rv = -1;
+ }
+ return rv;
+ };
+ private static final Comparator POINT_COMPARATOR = (arg0, arg1) -> {
+
+ int rv = 0;
+ float arg0X = Utils.round(arg0.getX(), 2);
+ float arg0Y = Utils.round(arg0.getY(), 2);
+ float arg1X = Utils.round(arg1.getX(), 2);
+ float arg1Y = Utils.round(arg1.getY(), 2);
+
+ if (arg0Y > arg1Y) {
+ rv = 1;
+ } else if (arg0Y < arg1Y) {
+ rv = -1;
+ } else if (arg0X > arg1X) {
+ rv = 1;
+ } else if (arg0X < arg1X) {
+ rv = -1;
+ }
+ return rv;
+ };
+
public void extractTables(CleanRulings cleanRulings, Page page) {
List cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
@@ -80,7 +106,6 @@ public class TableExtractionService {
page.getTextBlocks().removeAll(toBeRemoved);
}
-
public List findCells(List horizontalRulingLines, List verticalRulingLines) {
List cellsFound = new ArrayList<>();
@@ -133,7 +158,6 @@ public class TableExtractionService {
return cellsFound;
}
-
private List findSpreadsheetsFromCells(List extends Rectangle> cells) {
// via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon
List rectangles = new ArrayList<>();
@@ -233,47 +257,6 @@ public class TableExtractionService {
return rectangles;
}
-
- private static final Comparator X_FIRST_POINT_COMPARATOR = (arg0, arg1) -> {
-
- int rv = 0;
- float arg0X = Utils.round(arg0.getX(), 2);
- float arg0Y = Utils.round(arg0.getY(), 2);
- float arg1X = Utils.round(arg1.getX(), 2);
- float arg1Y = Utils.round(arg1.getY(), 2);
-
- if (arg0X > arg1X) {
- rv = 1;
- } else if (arg0X < arg1X) {
- rv = -1;
- } else if (arg0Y > arg1Y) {
- rv = 1;
- } else if (arg0Y < arg1Y) {
- rv = -1;
- }
- return rv;
- };
-
- private static final Comparator POINT_COMPARATOR = (arg0, arg1) -> {
-
- int rv = 0;
- float arg0X = Utils.round(arg0.getX(), 2);
- float arg0Y = Utils.round(arg0.getY(), 2);
- float arg1X = Utils.round(arg1.getX(), 2);
- float arg1Y = Utils.round(arg1.getY(), 2);
-
- if (arg0Y > arg1Y) {
- rv = 1;
- } else if (arg0Y < arg1Y) {
- rv = -1;
- } else if (arg0X > arg1X) {
- rv = 1;
- } else if (arg0X < arg1X) {
- rv = -1;
- }
- return rv;
- };
-
private enum Direction {
HORIZONTAL, VERTICAL
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java
index d1f9ab06..bd4b9d0c 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/CohenSutherlandClipping.java
@@ -19,21 +19,24 @@ import java.awt.geom.Rectangle2D;
* clipping algorithm (line against clip rectangle).
*/
@SuppressWarnings("all")
-public final class CohenSutherlandClipping
-{
+public final class CohenSutherlandClipping {
+ private static final int INSIDE = 0;
+ private static final int LEFT = 1;
+ private static final int RIGHT = 2;
+ private static final int BOTTOM = 4;
+ private static final int TOP = 8;
private double xMin;
private double yMin;
private double xMax;
private double yMax;
-
/**
* Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0).
*/
public CohenSutherlandClipping() {
}
-
/**
* Creates a Cohen Sutherland clipper with the given clip rectangle.
+ *
* @param clip the clip rectangle to use
*/
public CohenSutherlandClipping(Rectangle2D clip) {
@@ -42,6 +45,7 @@ public final class CohenSutherlandClipping
/**
* Sets the clip rectangle.
+ *
* @param clip the clip rectangle
*/
public void setClip(Rectangle2D clip) {
@@ -51,19 +55,13 @@ public final class CohenSutherlandClipping
yMax = yMin + clip.getHeight();
}
- private static final int INSIDE = 0;
- private static final int LEFT = 1;
- private static final int RIGHT = 2;
- private static final int BOTTOM = 4;
- private static final int TOP = 8;
-
private final int regionCode(double x, double y) {
- int code = x < xMin
- ? LEFT
- : x > xMax
+ int code = x < xMin
+ ? LEFT
+ : x > xMax
? RIGHT
: INSIDE;
- if (y < yMin) code |= BOTTOM;
+ if (y < yMin) code |= BOTTOM;
else if (y > yMax) code |= TOP;
return code;
}
@@ -71,6 +69,7 @@ public final class CohenSutherlandClipping
/**
* Clips a given line against the clip rectangle.
* The modification (if needed) is done in place.
+ *
* @param line the line to clip
* @return true if line is clipped, false if line is
* totally outside the clip rect.
@@ -87,9 +86,9 @@ public final class CohenSutherlandClipping
boolean vertical = p1x == p2x;
- double slope = vertical
- ? 0d
- : (p2y-p1y)/(p2x-p1x);
+ double slope = vertical
+ ? 0d
+ : (p2y - p1y) / (p2x - p1x);
int c1 = regionCode(p1x, p1y);
int c2 = regionCode(p2x, p2y);
@@ -103,31 +102,27 @@ public final class CohenSutherlandClipping
if ((c & LEFT) != INSIDE) {
qx = xMin;
- qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y;
- }
- else if ((c & RIGHT) != INSIDE) {
+ qy = (Utils.feq(qx, p1x) ? 0 : qx - p1x) * slope + p1y;
+ } else if ((c & RIGHT) != INSIDE) {
qx = xMax;
- qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y;
- }
- else if ((c & BOTTOM) != INSIDE) {
+ qy = (Utils.feq(qx, p1x) ? 0 : qx - p1x) * slope + p1y;
+ } else if ((c & BOTTOM) != INSIDE) {
qy = yMin;
qx = vertical
- ? p1x
- : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x;
- }
- else if ((c & TOP) != INSIDE) {
+ ? p1x
+ : (Utils.feq(qy, p1y) ? 0 : qy - p1y) / slope + p1x;
+ } else if ((c & TOP) != INSIDE) {
qy = yMax;
qx = vertical
- ? p1x
- : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x;
+ ? p1x
+ : (Utils.feq(qy, p1y) ? 0 : qy - p1y) / slope + p1x;
}
if (c == c1) {
p1x = qx;
p1y = qy;
- c1 = regionCode(p1x, p1y);
- }
- else {
+ c1 = regionCode(p1x, p1y);
+ } else {
p2x = qx;
p2y = qy;
c2 = regionCode(p2x, p2y);
@@ -137,4 +132,4 @@ public final class CohenSutherlandClipping
return true;
}
}
-// end of file
\ No newline at end of file
+// end of file
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java
index 5b9c3b6c..909de599 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/QuickSort.java
@@ -10,11 +10,6 @@ import java.util.List;
*/
public final class QuickSort {
- private QuickSort() {
-
- }
-
-
private static final Comparator extends Comparable> OBJCOMP = new Comparator() {
@Override
public int compare(Comparable object1, Comparable object2) {
@@ -24,6 +19,10 @@ public final class QuickSort {
};
+ private QuickSort() {
+
+ }
+
/**
* Sorts the given list using the given comparator.
*
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java
index 62f72434..2a95ec3b 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/utils/Utils.java
@@ -1,11 +1,11 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.utils;
+import lombok.extern.slf4j.Slf4j;
+
import java.math.BigDecimal;
import java.util.Comparator;
import java.util.List;
-import lombok.extern.slf4j.Slf4j;
-
@Slf4j
@SuppressWarnings("all")
public class Utils {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java
index 43e2cf13..06ccb399 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/visualization/service/PdfVisualisationService.java
@@ -1,15 +1,5 @@
package com.iqser.red.service.redaction.v1.server.visualization.service;
-import java.awt.Color;
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDPageContentStream;
-import org.apache.pdfbox.pdmodel.font.PDType1Font;
-import org.springframework.stereotype.Service;
-
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
@@ -17,9 +7,17 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
-
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.springframework.stereotype.Service;
+
+import java.awt.Color;
+import java.io.IOException;
+import java.util.List;
@Slf4j
@Service
@@ -34,7 +32,7 @@ public class PdfVisualisationService {
PDPage pdPage = document.getPage(page - 1);
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
- for(Paragraph paragraph : classifiedDoc.getParagraphs()) {
+ for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
@@ -44,10 +42,10 @@ public class PdfVisualisationService {
continue;
}
if (textBlock instanceof TextBlock) {
- textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size());
+ textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
visualizeTextBlock((TextBlock) textBlock, contentStream);
} else if (textBlock instanceof Table) {
- textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size());
+ textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
visualizeTable((Table) textBlock, contentStream);
}
@@ -59,7 +57,6 @@ public class PdfVisualisationService {
}
-
public void visualizeClassifications(Document classifiedDoc, PDDocument document) throws IOException {
for (int page = 1; page <= document.getNumberOfPages(); page++) {
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml
index 302d198a..2d266963 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml
@@ -1,4 +1,11 @@
server:
port: 8083
-configuration-service.url: "http://localhost:8081"
\ No newline at end of file
+configuration-service.url: "http://localhost:8081"
+file-management-service.url: "http://localhost:8085"
+
+storage:
+ bucket-name: 'redaction'
+ endpoint: 'http://localhost:9000'
+ key: minioadmin
+ secret: minioadmin
diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml
index efb01d6f..671d3b20 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application.yml
@@ -2,6 +2,7 @@ info:
description: Redaction Service Server V1
configuration-service.url: "http://configuration-service-v1:8080"
+file-management-service.url: "http://file-management-service-v1:8080"
image-service.url: "http://image-service-v1:8080"
server:
@@ -10,6 +11,20 @@ server:
spring:
profiles:
active: kubernetes
+ rabbitmq:
+ host: ${RABBITMQ_HOST:localhost}
+ port: ${RABBITMQ_PORT:5672}
+ username: ${RABBITMQ_USERNAME:user}
+ password: ${RABBITMQ_PASSWORD:rabbitmq}
+ listener:
+ simple:
+ acknowledge-mode: AUTO
+ concurrency: 2
+ retry:
+ enabled: true
+ max-attempts: 3
+ max-interval: 15000
+ prefetch: 1
management:
endpoint:
@@ -17,4 +32,11 @@ management:
prometheus.enabled: ${monitoring.enabled:false}
health.enabled: true
endpoints.web.exposure.include: prometheus, health
- metrics.export.prometheus.enabled: ${monitoring.enabled:false}
\ No newline at end of file
+ metrics.export.prometheus.enabled: ${monitoring.enabled:false}
+
+
+storage:
+ signer-type: 'AWSS3V4SignerType'
+ bucket-name: 'redaction'
+ region: 'us-east-1'
+ endpoint: 'https://s3.amazonaws.com'
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/FileSystemBackedStorageService.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/FileSystemBackedStorageService.java
new file mode 100644
index 00000000..e37034ce
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/FileSystemBackedStorageService.java
@@ -0,0 +1,51 @@
+package com.iqser.red.service.redaction.v1.server;
+
+import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
+import com.iqser.red.storage.commons.service.StorageService;
+import lombok.SneakyThrows;
+import org.apache.commons.io.IOUtils;
+import org.springframework.core.io.InputStreamResource;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+public class FileSystemBackedStorageService extends StorageService {
+
+ private final Map dataMap = new HashMap<>();
+
+ public FileSystemBackedStorageService() {
+ super(null, null);
+ }
+
+ @SneakyThrows
+ @Override
+ public InputStreamResource getObject(String objectId) {
+
+ var res = dataMap.get(objectId);
+ if (res == null) {
+ throw new StorageObjectDoesNotExist(new RuntimeException());
+ }
+ return new InputStreamResource(new FileInputStream(res));
+
+ }
+
+ @SneakyThrows
+ @Override
+ public void storeObject(String objectId, byte[] data) {
+ File tempFile = File.createTempFile("test", ".tmp");
+
+ IOUtils.write(data, new FileOutputStream(tempFile));
+
+ dataMap.put(objectId, tempFile);
+ }
+
+ public void clearStorage() {
+ this.dataMap.forEach((k, v) -> {
+ v.delete();
+ });
+ this.dataMap.clear();
+ }
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
index be77a283..1608a982 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java
@@ -1,30 +1,27 @@
package com.iqser.red.service.redaction.v1.server;
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.mockito.Mockito.when;
-import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.RANDOM_PORT;
-
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.URL;
-import java.nio.charset.StandardCharsets;
-import java.time.OffsetDateTime;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.UUID;
-import java.util.stream.Collectors;
-
+import com.amazonaws.services.s3.AmazonS3;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.iqser.red.service.configuration.v1.api.model.*;
+import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource;
+import com.iqser.red.service.file.management.v1.api.model.FileType;
+import com.iqser.red.service.redaction.v1.model.*;
+import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
+import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
+import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
+import com.iqser.red.service.redaction.v1.server.client.RulesClient;
+import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
+import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
+import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
+import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
+import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
+import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
+import com.iqser.red.storage.commons.service.StorageService;
+import lombok.SneakyThrows;
import org.apache.commons.io.IOUtils;
+import org.junit.After;
import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.kie.api.KieServices;
@@ -32,48 +29,32 @@ import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
+import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
+import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.boot.test.context.TestConfiguration;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
+import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.iqser.red.service.configuration.v1.api.model.Colors;
-import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
-import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
-import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
-import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
-import com.iqser.red.service.configuration.v1.api.model.TypeResult;
-import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
-import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
-import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
-import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
-import com.iqser.red.service.redaction.v1.model.Comment;
-import com.iqser.red.service.redaction.v1.model.IdRemoval;
-import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
-import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
-import com.iqser.red.service.redaction.v1.model.ManualRedactions;
-import com.iqser.red.service.redaction.v1.model.Point;
-import com.iqser.red.service.redaction.v1.model.ReanalyzeResult;
-import com.iqser.red.service.redaction.v1.model.Rectangle;
-import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
-import com.iqser.red.service.redaction.v1.model.RedactionRequest;
-import com.iqser.red.service.redaction.v1.model.RedactionResult;
-import com.iqser.red.service.redaction.v1.model.RenalyzeRequest;
-import com.iqser.red.service.redaction.v1.model.SectionText;
-import com.iqser.red.service.redaction.v1.model.Status;
-import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
-import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
-import com.iqser.red.service.redaction.v1.server.client.RulesClient;
-import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
-import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
-import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
+import java.io.*;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.time.OffsetDateTime;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.Mockito.when;
@RunWith(SpringRunner.class)
-@SpringBootTest(webEnvironment = RANDOM_PORT)
+@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
+@Import(RedactionIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class RedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/rules.drl");
@@ -93,6 +74,7 @@ public class RedactionIntegrationTest {
private static final String SIGNATURE = "signature";
private static final String FORMULA = "formula";
private static final String OCR = "ocr";
+ private static final String DOSSIER_REDACTIONS = "dossier_redactions";
private static final String RECOMMENDATION_AUTHOR = "recommendation_CBI_author";
private static final String RECOMMENDATION_ADDRESS = "recommendation_CBI_address";
@@ -101,9 +83,13 @@ public class RedactionIntegrationTest {
private static final String PII = "PII";
+
@Autowired
private RedactionController redactionController;
+ @Autowired
+ private ReanalyzeService reanalyzeService;
+
@Autowired
private ObjectMapper objectMapper;
@@ -116,7 +102,20 @@ public class RedactionIntegrationTest {
@MockBean
private ImageClassificationClient imageClassificationClient;
+ @Autowired
+ private RedactionStorageService redactionStorageService;
+
+ @Autowired
+ private StorageService storageService;
+
+ @MockBean
+ private AmazonS3 amazonS3;
+
+ @MockBean
+ private RabbitTemplate rabbitTemplate;
+
private final Map> dictionary = new HashMap<>();
+ private final Map> dossierDictionary = new HashMap<>();
private final Map typeColorMap = new HashMap<>();
private final Map hintTypeMap = new HashMap<>();
private final Map caseInSensitiveMap = new HashMap<>();
@@ -126,8 +125,11 @@ public class RedactionIntegrationTest {
private final Map reanlysisVersions = new HashMap<>();
private final static String TEST_RULESET_ID = "123";
+ private final static String TEST_PROJECT_ID = "123";
+ private final static String TEST_FILE_ID = "123";
- @TestConfiguration
+ @Configuration
+ @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
public static class RedactionIntegrationTestConfiguration {
@Bean
@@ -146,6 +148,21 @@ public class RedactionIntegrationTest {
return kieServices.newKieContainer(kieModule.getReleaseId());
}
+ @Bean
+ @Primary
+ public StorageService inmemoryStorage() {
+ return new FileSystemBackedStorageService();
+ }
+
+
+ }
+
+
+ @After
+ public void cleanupStorage() {
+ if (this.storageService instanceof FileSystemBackedStorageService) {
+ ((FileSystemBackedStorageService) this.storageService).clearStorage();
+ }
}
@@ -158,30 +175,45 @@ public class RedactionIntegrationTest {
loadDictionaryForTest();
loadTypeForTest();
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(0L);
- when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(TypeResponse.builder()
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(0L);
+ when(dictionaryClient.getAllTypes(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(TypeResponse.builder()
.types(getTypeResponse())
.build());
- when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE));
- when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(ADDRESS));
- when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(AUTHOR));
- when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(SPONSOR));
- when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR));
- when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR));
- when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(HINT_ONLY));
- when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(MUST_REDACT));
- when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION));
- when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(TEST_METHOD));
- when(dictionaryClient.getDictionaryForType(PII, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PII));
- when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR));
- when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS));
- when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(FALSE_POSITIVE));
- when(dictionaryClient.getDictionaryForType(PURITY, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PURITY));
- when(dictionaryClient.getDictionaryForType(IMAGE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(IMAGE));
- when(dictionaryClient.getDictionaryForType(OCR, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(OCR));
- when(dictionaryClient.getDictionaryForType(LOGO, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(LOGO));
- when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(SIGNATURE));
- when(dictionaryClient.getDictionaryForType(FORMULA, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(FORMULA));
+
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(0L);
+ when(dictionaryClient.getAllTypes(TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(TypeResponse.builder()
+ .types(List.of(TypeResult.builder()
+ .type(DOSSIER_REDACTIONS)
+ .ruleSetId(TEST_RULESET_ID)
+ .hexColor( "#ffe187")
+ .isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
+ .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
+ .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
+ .rank(rankTypeMap.get(DOSSIER_REDACTIONS))
+ .build()))
+ .build());
+
+ when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
+ when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(ADDRESS, false));
+ when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(AUTHOR, false));
+ when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SPONSOR, false));
+ when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
+ when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
+ when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
+ when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
+ when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
+ when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
+ when(dictionaryClient.getDictionaryForType(PII, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PII, false));
+ when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false));
+ when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false));
+ when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
+ when(dictionaryClient.getDictionaryForType(PURITY, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PURITY, false));
+ when(dictionaryClient.getDictionaryForType(IMAGE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(IMAGE, false));
+ when(dictionaryClient.getDictionaryForType(OCR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(OCR, false));
+ when(dictionaryClient.getDictionaryForType(LOGO, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(LOGO, false));
+ when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SIGNATURE, false));
+ when(dictionaryClient.getDictionaryForType(FORMULA, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FORMULA, false));
+ when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS, TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
when(dictionaryClient.getColors(TEST_RULESET_ID)).thenReturn(colors);
}
@@ -288,6 +320,11 @@ public class RedactionIntegrationTest {
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
+ dossierDictionary.computeIfAbsent(DOSSIER_REDACTIONS, v -> new ArrayList<>())
+ .addAll(ResourceLoader.load("dictionaries/dossier_redactions.txt")
+ .stream()
+ .map(this::cleanDictionaryEntry)
+ .collect(Collectors.toSet()));
}
@@ -340,6 +377,7 @@ public class RedactionIntegrationTest {
hintTypeMap.put(FORMULA, false);
hintTypeMap.put(LOGO, false);
hintTypeMap.put(SIGNATURE, false);
+ hintTypeMap.put(DOSSIER_REDACTIONS, false);
caseInSensitiveMap.put(VERTEBRATE, true);
caseInSensitiveMap.put(ADDRESS, false);
@@ -361,6 +399,7 @@ public class RedactionIntegrationTest {
caseInSensitiveMap.put(SIGNATURE, true);
caseInSensitiveMap.put(LOGO, true);
caseInSensitiveMap.put(FORMULA, true);
+ caseInSensitiveMap.put(DOSSIER_REDACTIONS, false);
recommendationTypeMap.put(VERTEBRATE, false);
recommendationTypeMap.put(ADDRESS, false);
@@ -382,6 +421,7 @@ public class RedactionIntegrationTest {
recommendationTypeMap.put(FORMULA, false);
recommendationTypeMap.put(SIGNATURE, false);
recommendationTypeMap.put(LOGO, false);
+ recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
rankTypeMap.put(FALSE_POSITIVE, 160);
rankTypeMap.put(PURITY, 155);
@@ -403,6 +443,7 @@ public class RedactionIntegrationTest {
rankTypeMap.put(LOGO, 28);
rankTypeMap.put(SIGNATURE, 27);
rankTypeMap.put(FORMULA, 26);
+ rankTypeMap.put(DOSSIER_REDACTIONS, 200);
colors.setDefaultColor("#acfc00");
colors.setNotRedacted("#cccccc");
@@ -429,11 +470,11 @@ public class RedactionIntegrationTest {
}
- private DictionaryResponse getDictionaryResponse(String type) {
+ private DictionaryResponse getDictionaryResponse(String type, boolean isDossierDictionary) {
return DictionaryResponse.builder()
.hexColor(typeColorMap.get(type))
- .entries(toDictionaryEntry(dictionary.get(type)))
+ .entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
@@ -453,6 +494,71 @@ public class RedactionIntegrationTest {
@Test
+ public void test270Rotated() {
+ AnalyzeRequest request = prepareStorage("files/Minimal Examples/270Rotated.pdf");
+ MemoryStats.printMemoryStats();
+ AnalyzeResult result = reanalyzeService.analyze(request);
+ assertThat(result).isNotNull();
+ }
+
+
+ @Test
+ @Ignore
+ public void testLargeScannedFileOOM() {
+ AnalyzeRequest request = prepareStorage("scanned/VV-377031.pdf");
+ MemoryStats.printMemoryStats();
+ AnalyzeResult result = reanalyzeService.analyze(request);
+ assertThat(result).isNotNull();
+ }
+
+ @Test
+ public void testMergedImages() throws IOException {
+
+ long start = System.currentTimeMillis();
+ ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/merge_images.pdf");
+
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+ AnalyzeResult result = reanalyzeService.analyze(request);
+
+ Map> duplicates = new HashMap<>();
+
+ var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
+
+ redactionLog.getRedactionLogEntry().forEach(entry -> {
+ duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
+ });
+
+ duplicates.entrySet().forEach(entry -> {
+ assertThat(entry.getValue().size()).isEqualTo(1);
+ });
+
+ dictionary.get(AUTHOR).add("Drinking water");
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L);
+
+ AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
+ .projectId(TEST_PROJECT_ID)
+ .fileId(TEST_FILE_ID)
+ .build());
+
+ try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated3.pdf")) {
+ fileOutputStream.write(annotateResponse.getDocument());
+ }
+ long rstart = System.currentTimeMillis();
+ reanalyzeService.reanalyze(request);
+
+ long rend = System.currentTimeMillis();
+ System.out.println("reanalysis analysis duration: " + (rend - rstart));
+
+
+ long end = System.currentTimeMillis();
+
+ System.out.println("duration: " + (end - start));
+
+
+ }
+
+ @Test
+ @Ignore
public void noExceptionShouldBeThrownForAnyFiles() throws IOException {
long start = System.currentTimeMillis();
@@ -465,15 +571,16 @@ public class RedactionIntegrationTest {
input.addAll(getPathsRecursively(file));
}
for (File path : input) {
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(new FileInputStream(path)))
- .build();
+
+ AnalyzeRequest request = prepareStorage(new FileInputStream((path)));
System.out.println("Redacting file : " + path.getName());
- AnalyzeResult result = redactionController.analyze(request);
+ AnalyzeResult result = reanalyzeService.analyze(request);
Map> duplicates = new HashMap<>();
- result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
+
+ var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
+
+ redactionLog.getRedactionLogEntry().forEach(entry -> {
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
});
@@ -482,16 +589,10 @@ public class RedactionIntegrationTest {
});
dictionary.get(AUTHOR).add("Drinking water");
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L);
long rstart = System.currentTimeMillis();
- ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
- .redactionLog(result.getRedactionLog())
- .document(IOUtils.toByteArray(new FileInputStream(path)))
- .manualRedactions(null)
- .text(result.getText())
- .ruleSetId(TEST_RULESET_ID)
- .build());
+ reanalyzeService.reanalyze(request);
long rend = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (rend - rstart));
@@ -526,18 +627,16 @@ public class RedactionIntegrationTest {
@Test
public void redactionTest() throws IOException {
- System.out.println("redactionTest");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
+ AnalyzeResult result = reanalyzeService.analyze(request);
- AnalyzeResult result = redactionController.analyze(request);
+ var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
+ var text = redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID);
- result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
+ redactionLog.getRedactionLogEntry().forEach(entry -> {
if (entry.isImage()) {
System.out.println("---->" + entry.getType());
}
@@ -548,13 +647,13 @@ public class RedactionIntegrationTest {
System.out.println("first analysis duration: " + (end - start));
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) {
- fileOutputStream.write(objectMapper.writeValueAsBytes(result.getText()));
+ fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID)));
}
int correctFound = 0;
loop:
- for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) {
- for (SectionText sectionText : result.getText().getSectionTexts()) {
+ for (RedactionLogEntry redactionLogEntry : redactionLog.getRedactionLogEntry()) {
+ for (SectionText sectionText : text.getSectionTexts()) {
if (redactionLogEntry.isImage()) {
correctFound++;
continue loop;
@@ -570,7 +669,7 @@ public class RedactionIntegrationTest {
}
}
}
- assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size());
+ assertThat(correctFound).isEqualTo(redactionLog.getRedactionLogEntry().size());
dictionary.get(AUTHOR).add("properties");
reanlysisVersions.put("properties", 1L);
@@ -581,25 +680,19 @@ public class RedactionIntegrationTest {
dictionary.get(VERTEBRATE).add("s-metolachlor");
reanlysisVersions.put("s-metolachlor", 3L);
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(3L);
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(3L);
- when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(VERTEBRATE));
+ when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
start = System.currentTimeMillis();
- ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
- .redactionLog(result.getRedactionLog())
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .text(result.getText())
- .ruleSetId(TEST_RULESET_ID)
- .build());
+ AnalyzeResult reanalyzeResult = reanalyzeService.reanalyze(request);
end = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (end - start));
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .redactionLog(reanalyzeResult.getRedactionLog())
- .sectionGrid(result.getSectionGrid())
+ .projectId(TEST_PROJECT_ID)
+ .fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
@@ -614,19 +707,13 @@ public class RedactionIntegrationTest {
System.out.println("testTableRedaction");
long start = System.currentTimeMillis();
- ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
-
- AnalyzeResult result = redactionController.analyze(request);
+ AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
+ AnalyzeResult result = reanalyzeService.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .redactionLog(result.getRedactionLog())
- .sectionGrid(result.getSectionGrid())
+ .projectId(TEST_PROJECT_ID)
+ .fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
@@ -681,13 +768,10 @@ public class RedactionIntegrationTest {
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .manualRedactions(manualRedactions)
- .build();
- AnalyzeResult result = redactionController.analyze(request);
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+ request.setManualRedactions(manualRedactions);
+ AnalyzeResult result = reanalyzeService.analyze(request);
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder()
@@ -695,20 +779,15 @@ public class RedactionIntegrationTest {
.status(Status.APPROVED)
.build()));
- ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
- .redactionLog(result.getRedactionLog())
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .manualRedactions(manualRedactions)
- .text(result.getText())
- .ruleSetId(TEST_RULESET_ID)
- .build());
+ reanalyzeService.reanalyze(request);
+
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .redactionLog(reanalyzeResult.getRedactionLog())
- .sectionGrid(result.getSectionGrid())
+ .projectId(TEST_PROJECT_ID)
+ .fileId(TEST_FILE_ID)
.build());
+
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
@@ -725,11 +804,16 @@ public class RedactionIntegrationTest {
System.out.println("classificationTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Trinexapac/93 Trinexapac-ethyl_RAR_03_Volume_3CA_B-1_2017-03-31.pdf");
- RedactionRequest request = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+
+ RedactionRequest redactionRequest = RedactionRequest.builder()
+ .projectId(request.getProjectId())
+ .fileId(request.getFileId())
+ .ruleSetId(request.getRuleSetId())
.build();
- RedactionResult result = redactionController.classify(request);
+ RedactionResult result = redactionController.classify(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Classified.pdf")) {
fileOutputStream.write(result.getDocument());
@@ -743,11 +827,15 @@ public class RedactionIntegrationTest {
System.out.println("sectionsTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " + "Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
- RedactionRequest request = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+
+ RedactionRequest redactionRequest = RedactionRequest.builder()
+ .projectId(request.getProjectId())
+ .fileId(request.getFileId())
+ .ruleSetId(request.getRuleSetId())
.build();
- RedactionResult result = redactionController.sections(request);
+ RedactionResult result = redactionController.sections(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Sections.pdf")) {
fileOutputStream.write(result.getDocument());
@@ -761,11 +849,15 @@ public class RedactionIntegrationTest {
System.out.println("htmlTablesTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
- RedactionRequest request = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+
+ RedactionRequest redactionRequest = RedactionRequest.builder()
+ .projectId(request.getProjectId())
+ .fileId(request.getFileId())
+ .ruleSetId(request.getRuleSetId())
.build();
- RedactionResult result = redactionController.htmlTables(request);
+ RedactionResult result = redactionController.htmlTables(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
fileOutputStream.write(result.getDocument());
@@ -779,11 +871,15 @@ public class RedactionIntegrationTest {
System.out.println("htmlTableRotationTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
- RedactionRequest request = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+
+ RedactionRequest redactionRequest = RedactionRequest.builder()
+ .projectId(request.getProjectId())
+ .fileId(request.getFileId())
+ .ruleSetId(request.getRuleSetId())
.build();
- RedactionResult result = redactionController.htmlTables(request);
+ RedactionResult result = redactionController.htmlTables(redactionRequest);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
fileOutputStream.write(result.getDocument());
@@ -796,20 +892,45 @@ public class RedactionIntegrationTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Phantom Cells.pdf");
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
- AnalyzeResult result = redactionController.analyze(request);
+ AnalyzeResult result = reanalyzeService.analyze(request);
- result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
+ var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
+
+ redactionLog.getRedactionLogEntry().forEach(entry -> {
if (!entry.isHint()) {
assertThat(entry.getReason()).isEqualTo("Not redacted because row is not a vertebrate study");
}
});
}
+ @SneakyThrows
+ private AnalyzeRequest prepareStorage(String file) {
+ ClassPathResource pdfFileResource = new ClassPathResource(file);
+
+ return prepareStorage(pdfFileResource.getInputStream());
+ }
+
+
+ @SneakyThrows
+ private AnalyzeRequest prepareStorage(InputStream stream) {
+
+ AnalyzeRequest request = AnalyzeRequest.builder()
+ .ruleSetId(TEST_RULESET_ID)
+ .projectId(TEST_PROJECT_ID)
+ .fileId(TEST_FILE_ID)
+ .lastProcessed(OffsetDateTime.now())
+ .build();
+
+ var bytes = IOUtils.toByteArray(stream);
+
+ storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_PROJECT_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
+
+ return request;
+
+ }
+
@Test
public void sponsorCompanyTest() throws IOException {
@@ -817,17 +938,14 @@ public class RedactionIntegrationTest {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
- AnalyzeRequest request = AnalyzeRequest.builder()
- .ruleSetId(TEST_RULESET_ID)
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
- AnalyzeResult result = redactionController.analyze(request);
+ AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
+
+ AnalyzeResult result = reanalyzeService.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .redactionLog(result.getRedactionLog())
- .sectionGrid(result.getSectionGrid())
+ .projectId(TEST_PROJECT_ID)
+ .fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
@@ -858,4 +976,4 @@ public class RedactionIntegrationTest {
}
}
-}
\ No newline at end of file
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
index b7efed93..b6cb00ca 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java
@@ -1,12 +1,10 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
-import com.iqser.red.service.configuration.v1.api.model.Colors;
-import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
-import com.iqser.red.service.configuration.v1.api.model.DictionaryResponse;
-import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
-import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
-import com.iqser.red.service.configuration.v1.api.model.TypeResult;
-import com.iqser.red.service.redaction.v1.model.RedactionRequest;
+import com.amazonaws.services.s3.AmazonS3;
+import com.iqser.red.service.configuration.v1.api.model.*;
+import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource;
+import com.iqser.red.service.redaction.v1.server.Application;
+import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
@@ -14,8 +12,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
-import org.apache.commons.io.IOUtils;
-import org.apache.pdfbox.pdmodel.PDDocument;
+import com.iqser.red.storage.commons.service.StorageService;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
@@ -26,10 +23,14 @@ import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
+import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.boot.test.context.TestConfiguration;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
+import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
@@ -40,21 +41,15 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
-@SpringBootTest
@RunWith(SpringRunner.class)
+@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
+@Import(EntityRedactionServiceTest.RedactionIntegrationTestConfiguration.class)
public class EntityRedactionServiceTest {
private static final String DEFAULT_RULES = loadFromClassPath("drools/rules.drl");
@@ -80,9 +75,13 @@ public class EntityRedactionServiceTest {
@Autowired
private DroolsExecutionService droolsExecutionService;
+ @MockBean
+ private AmazonS3 amazonS3;
+
private final static String TEST_RULESET_ID = "123";
- @TestConfiguration
+ @Configuration
+ @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
public static class RedactionIntegrationTestConfiguration {
@Bean
@@ -101,6 +100,13 @@ public class EntityRedactionServiceTest {
return kieServices.newKieContainer(kieModule.getReleaseId());
}
+
+ @Bean
+ @Primary
+ public StorageService inmemoryStorage() {
+ return new FileSystemBackedStorageService();
+ }
+
}
@@ -108,8 +114,8 @@ public class EntityRedactionServiceTest {
public void testNestedEntitiesRemoval() {
Set entities = new HashSet<>();
- Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false);
- Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false);
+ Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false);
+ Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false);
entities.add(nested);
entities.add(nesting);
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
@@ -125,31 +131,25 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
- RedactionRequest redactionRequest = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
-
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
.build();
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
- when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
+ when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
.build();
- when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
+ when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
- when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
+ when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
- try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
- Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
- entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
- assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
- assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
- }
+ Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
+ assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
+ assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
}
@@ -158,30 +158,24 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/nested_redaction.pdf");
- RedactionRequest redactionRequest = RedactionRequest.builder()
- .document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
- .build();
-
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "O’Loughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
.build();
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
- when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
+ when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
.build();
- when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
+ when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
- when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
+ when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
- try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
- Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
- entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
- assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
- assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
- }
+ Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
+ assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
+ assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
}
@@ -190,64 +184,58 @@ public class EntityRedactionServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Cyprodinil/40 Cyprodinil - EU AIR3 - LCA Section 1" +
" Supplement - Identity of the active substance - Reference list.pdf");
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
- when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
+ when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
- when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
+ when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
- when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
- entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
- assertThat(classifiedDoc.getEntities()
- .entrySet()
- .stream()
- .noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
- }
+ when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
+ Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
+ assertThat(classifiedDoc.getEntities()
+ .entrySet()
+ .stream()
+ .noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
pdfFileResource = new ClassPathResource("files/Compounds/27 A8637C - EU AIR3 - MCP Section 1 - Identity of " +
"the plant protection product.pdf");
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
- entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
- assertThat(classifiedDoc.getEntities()
- .entrySet()
- .stream()
- .noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
- }
+ classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
+ assertThat(classifiedDoc.getEntities()
+ .entrySet()
+ .stream()
+ .noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
}
@Test
public void testFalsePositiveInWrongCell() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf");
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
- when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
+ when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
- when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
+ when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
.build();
- when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
- entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
- assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
- assertThat(classifiedDoc.getEntities().get(1).stream()
- .filter(entity -> entity.getMatchedRule() == 9)
- .count()).isEqualTo(10);
- }
+ when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
+ Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
+ assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
+ assertThat(classifiedDoc.getEntities().get(1).stream()
+ .filter(entity -> entity.getMatchedRule() == 9)
+ .count()).isEqualTo(10);
}
@@ -296,27 +284,25 @@ public class EntityRedactionServiceTest {
droolsExecutionService.updateRules(TEST_RULESET_ID);
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
- when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
+ when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
- when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
+ when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
- when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
- entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
- assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
- assertThat(classifiedDoc.getEntities().get(1).stream()
- .filter(entity -> entity.getMatchedRule() == 6)
- .count()).isEqualTo(13);
- }
+ when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
+ Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
+ assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
+ assertThat(classifiedDoc.getEntities().get(1).stream()
+ .filter(entity -> entity.getMatchedRule() == 6)
+ .count()).isEqualTo(13);
}
@@ -337,27 +323,25 @@ public class EntityRedactionServiceTest {
droolsExecutionService.updateRules(TEST_RULESET_ID);
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/batches_new_line.pdf");
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
- when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
+ when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse authorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
- when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(authorResponse);
+ when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(authorResponse);
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
.build();
- when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
- entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
- assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
- assertThat(classifiedDoc.getEntities().get(1).stream()
- .filter(entity -> entity.getMatchedRule() == 11)
- .count()).isEqualTo(1);
- }
+ when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
+ Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
+ assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
+ assertThat(classifiedDoc.getEntities().get(1).stream()
+ .filter(entity -> entity.getMatchedRule() == 11)
+ .count()).isEqualTo(1);
}
@@ -371,24 +355,22 @@ public class EntityRedactionServiceTest {
.entries(toDictionaryEntry(Arrays.asList("Bissig R.", "Thanei P.")))
.build();
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
- when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
+ when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
- when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
+ when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
- when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
- entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
- assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages
- assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8);
- assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(5); // 2 names, 1 address, 2 Y
- }
+ when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
+ Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
+ assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages
+ assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8);
+ assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(5); // 2 names, 1 address, 2 Y
pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation2.pdf");
@@ -396,20 +378,18 @@ public class EntityRedactionServiceTest {
.entries(toDictionaryEntry(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C.")))
.build();
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
- when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
+ when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
- when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
+ when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
- entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
- assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
- assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3);
- assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9);
- }
+ classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
+ assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
+ assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3);
+ assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9);
}
@@ -423,23 +403,21 @@ public class EntityRedactionServiceTest {
.entries(toDictionaryEntry(Collections.singletonList("Aldershof S.")))
.build();
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
- when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
+ when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
- when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
+ when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
- when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
- entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null);
- assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
- assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6);
- }
+ when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
+ Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
+ assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
+ assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6);
}
@@ -476,19 +454,19 @@ public class EntityRedactionServiceTest {
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(ADDRESS_CODE).hexColor("#ff00ff").build(),
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(SPONSOR_CODE).hexColor("#00ffff").build()))
.build();
- when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
- when(dictionaryClient.getAllTypes(TEST_RULESET_ID)).thenReturn(typeResponse);
+ when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
+ when(dictionaryClient.getAllTypes(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(typeResponse);
// Default empty return to prevent NPEs
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.build();
- when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID)).thenReturn(dictionaryResponse);
+ when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.build();
- when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID)).thenReturn(addressResponse);
+ when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.build();
- when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID)).thenReturn(sponsorResponse);
+ when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Colors colors = new Colors();
colors.setDefaultColor("#acfc00");
@@ -518,7 +496,7 @@ public class EntityRedactionServiceTest {
}
}
- private List toDictionaryEntry(List entries){
+ private List toDictionaryEntry(List entries) {
List dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> {
dictionaryEntries.add(new DictionaryEntry(entry, 1L, false));
@@ -526,4 +504,4 @@ public class EntityRedactionServiceTest {
return dictionaryEntries;
}
-}
\ No newline at end of file
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java
index 4f58b26d..44842b7d 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java
@@ -1,7 +1,31 @@
package com.iqser.red.service.redaction.v1.server.segmentation;
-import static org.assertj.core.api.Assertions.assertThat;
+import com.amazonaws.services.s3.AmazonS3;
+import com.iqser.red.service.redaction.v1.server.Application;
+import com.iqser.red.service.redaction.v1.server.classification.model.Document;
+import com.iqser.red.service.redaction.v1.server.classification.model.Page;
+import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
+import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
+import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
+import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
+import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
+import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.kie.api.runtime.KieContainer;
+import org.springframework.amqp.rabbit.core.RabbitTemplate;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
+import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.boot.test.mock.mockito.MockBean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
+import org.springframework.core.io.ClassPathResource;
+import org.springframework.test.context.junit4.SpringRunner;
+import javax.imageio.ImageIO;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
@@ -9,31 +33,12 @@ import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
-import javax.imageio.ImageIO;
+import static org.assertj.core.api.Assertions.assertThat;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.kie.api.runtime.KieContainer;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.boot.test.mock.mockito.MockBean;
-import org.springframework.core.io.ClassPathResource;
-import org.springframework.test.context.junit4.SpringRunner;
-import com.iqser.red.service.redaction.v1.server.classification.model.Document;
-import com.iqser.red.service.redaction.v1.server.classification.model.Page;
-import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
-import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
-import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
-import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
-import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
-import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
-
-@SpringBootTest
@RunWith(SpringRunner.class)
+@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
+@Import(PdfSegmentationServiceTest.TestConfiguration.class)
public class PdfSegmentationServiceTest {
@Autowired
@@ -51,6 +56,28 @@ public class PdfSegmentationServiceTest {
@MockBean
private KieContainer kieContainer;
+ @MockBean
+ private AmazonS3 amazonS3;
+
+ @MockBean
+ private RabbitTemplate rabbitTemplate;
+
+ @Configuration
+ @EnableAutoConfiguration(exclude = { RabbitAutoConfiguration.class})
+ public static class TestConfiguration {
+
+ }
+
+ @Test
+ public void testMergeImages() throws IOException {
+
+ ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/270Rotated.pdf");
+
+ Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ assertThat(document.getPages().get(0).getImages().size()).isEqualTo(1);
+ assertThat(document.getPages().get(1).getImages().size()).isEqualTo(0);
+
+ }
@Test
@Ignore
@@ -58,61 +85,78 @@ public class PdfSegmentationServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document document = pdfSegmentationService.parseDocument(pdDocument);
- int i = 0;
- for (Page page : document.getPages()) {
- for (PdfImage image : page.getImages()) {
- try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
- ImageIO.write(image.getImage(), "png", baos);
- try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Image " + i + ".png")) {
- fileOutputStream.write(baos.toByteArray());
- }
+ Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ int i = 0;
+ for (Page page : document.getPages()) {
+ for (PdfImage image : page.getImages()) {
+ try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+ ImageIO.write(image.getImage(), "png", baos);
+ try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Image " + i + ".png")) {
+ fileOutputStream.write(baos.toByteArray());
}
- i++;
}
+ i++;
}
}
}
+ @Test
+ public void testPDFSegmentationWithComplexTable() throws IOException {
+
+ ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Spanning Cells.pdf");
+
+ Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ assertThat(document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())).isNotEmpty();
+ Table table = document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())
+ .get(0);
+ assertThat(table.getColCount()).isEqualTo(6);
+ assertThat(table.getRowCount()).isEqualTo(13);
+ assertThat(table.getRows().stream().mapToInt(List::size).sum()).isEqualTo(6 * 13);
+ }
+
+
@Test
public void testTableExtraction() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document document = pdfSegmentationService.parseDocument(pdDocument);
- assertThat(document.getParagraphs()
- .stream()
- .flatMap(paragraph -> paragraph.getTables().stream())
- .collect(Collectors.toList())).isNotEmpty();
- Table firstTable = document.getParagraphs()
- .stream()
- .flatMap(paragraph -> paragraph.getTables().stream())
- .collect(Collectors.toList())
- .get(0);
- assertThat(firstTable.getColCount()).isEqualTo(8);
- assertThat(firstTable.getRowCount()).isEqualTo(1);
- Table secondTable = document.getParagraphs()
- .stream()
- .flatMap(paragraph -> paragraph.getTables().stream())
- .collect(Collectors.toList())
- .get(1);
- assertThat(secondTable.getColCount()).isEqualTo(8);
- assertThat(secondTable.getRowCount()).isEqualTo(2);
- List> firstTableHeaderCells = firstTable.getRows()
- .get(0)
- .stream()
- .map(Collections::singletonList)
- .collect(Collectors.toList());
- assertThat(secondTable.getRows()
- .stream()
- .allMatch(row -> row.stream()
- .map(Cell::getHeaderCells)
- .collect(Collectors.toList())
- .equals(firstTableHeaderCells))).isTrue();
- }
+ Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ assertThat(document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())).isNotEmpty();
+ Table firstTable = document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())
+ .get(0);
+ assertThat(firstTable.getColCount()).isEqualTo(8);
+ assertThat(firstTable.getRowCount()).isEqualTo(1);
+ Table secondTable = document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())
+ .get(1);
+ assertThat(secondTable.getColCount()).isEqualTo(8);
+ assertThat(secondTable.getRowCount()).isEqualTo(2);
+ List> firstTableHeaderCells = firstTable.getRows()
+ .get(0)
+ .stream()
+ .map(Collections::singletonList)
+ .collect(Collectors.toList());
+ assertThat(secondTable.getRows()
+ .stream()
+ .allMatch(row -> row.stream()
+ .map(Cell::getHeaderCells)
+ .collect(Collectors.toList())
+ .equals(firstTableHeaderCells))).isTrue();
}
@@ -121,38 +165,36 @@ public class PdfSegmentationServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Multi Page Table.pdf");
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document document = pdfSegmentationService.parseDocument(pdDocument);
- assertThat(document.getParagraphs()
- .stream()
- .flatMap(paragraph -> paragraph.getTables().stream())
- .collect(Collectors.toList())).isNotEmpty();
- Table firstTable = document.getParagraphs()
- .stream()
- .flatMap(paragraph -> paragraph.getTables().stream())
- .collect(Collectors.toList())
- .get(0);
- assertThat(firstTable.getColCount()).isEqualTo(9);
- assertThat(firstTable.getRowCount()).isEqualTo(5);
- Table secondTable = document.getParagraphs()
- .stream()
- .flatMap(paragraph -> paragraph.getTables().stream())
- .collect(Collectors.toList())
- .get(1);
- assertThat(secondTable.getColCount()).isEqualTo(9);
- assertThat(secondTable.getRowCount()).isEqualTo(6);
- List> firstTableHeaderCells = firstTable.getRows()
- .get(firstTable.getRowCount() - 1)
- .stream()
- .map(Cell::getHeaderCells)
- .collect(Collectors.toList());
- assertThat(secondTable.getRows()
- .stream()
- .allMatch(row -> row.stream()
- .map(Cell::getHeaderCells)
- .collect(Collectors.toList())
- .equals(firstTableHeaderCells))).isTrue();
- }
+ Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ assertThat(document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())).isNotEmpty();
+ Table firstTable = document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())
+ .get(0);
+ assertThat(firstTable.getColCount()).isEqualTo(9);
+ assertThat(firstTable.getRowCount()).isEqualTo(5);
+ Table secondTable = document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())
+ .get(1);
+ assertThat(secondTable.getColCount()).isEqualTo(9);
+ assertThat(secondTable.getRowCount()).isEqualTo(6);
+ List> firstTableHeaderCells = firstTable.getRows()
+ .get(firstTable.getRowCount() - 1)
+ .stream()
+ .map(Cell::getHeaderCells)
+ .collect(Collectors.toList());
+ assertThat(secondTable.getRows()
+ .stream()
+ .allMatch(row -> row.stream()
+ .map(Cell::getHeaderCells)
+ .collect(Collectors.toList())
+ .equals(firstTableHeaderCells))).isTrue();
}
@@ -161,38 +203,36 @@ public class PdfSegmentationServiceTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Rotated Table Headers.pdf");
- try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
- Document document = pdfSegmentationService.parseDocument(pdDocument);
- assertThat(document.getParagraphs()
- .stream()
- .flatMap(paragraph -> paragraph.getTables().stream())
- .collect(Collectors.toList())).isNotEmpty();
- Table firstTable = document.getParagraphs()
- .stream()
- .flatMap(paragraph -> paragraph.getTables().stream())
- .collect(Collectors.toList())
- .get(0);
- assertThat(firstTable.getColCount()).isEqualTo(8);
- assertThat(firstTable.getRowCount()).isEqualTo(1);
- Table secondTable = document.getParagraphs()
- .stream()
- .flatMap(paragraph -> paragraph.getTables().stream())
- .collect(Collectors.toList())
- .get(1);
- assertThat(secondTable.getColCount()).isEqualTo(8);
- assertThat(secondTable.getRowCount()).isEqualTo(6);
- List> firstTableHeaderCells = firstTable.getRows()
- .get(0)
- .stream()
- .map(Collections::singletonList)
- .collect(Collectors.toList());
- assertThat(secondTable.getRows()
- .stream()
- .allMatch(row -> row.stream()
- .map(Cell::getHeaderCells)
- .collect(Collectors.toList())
- .equals(firstTableHeaderCells))).isTrue();
- }
+ Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
+ assertThat(document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())).isNotEmpty();
+ Table firstTable = document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())
+ .get(0);
+ assertThat(firstTable.getColCount()).isEqualTo(8);
+ assertThat(firstTable.getRowCount()).isEqualTo(1);
+ Table secondTable = document.getParagraphs()
+ .stream()
+ .flatMap(paragraph -> paragraph.getTables().stream())
+ .collect(Collectors.toList())
+ .get(1);
+ assertThat(secondTable.getColCount()).isEqualTo(8);
+ assertThat(secondTable.getRowCount()).isEqualTo(6);
+ List> firstTableHeaderCells = firstTable.getRows()
+ .get(0)
+ .stream()
+ .map(Collections::singletonList)
+ .collect(Collectors.toList());
+ assertThat(secondTable.getRows()
+ .stream()
+ .allMatch(row -> row.stream()
+ .map(Cell::getHeaderCells)
+ .collect(Collectors.toList())
+ .equals(firstTableHeaderCells))).isTrue();
}
}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml b/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml
index 23e59464..4b511179 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/application.yml
@@ -1,5 +1,6 @@
configuration-service.url: "http://configuration-service-v1:8080"
image-service.url: "http://image-service-v1:8080"
+file-management-service.url: "http://file-management-service-v1:8080"
ribbon:
ConnectTimeout: 600000
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/dossier_redactions.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/dossier_redactions.txt
new file mode 100644
index 00000000..3840a8ac
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/dossier_redactions.txt
@@ -0,0 +1 @@
+Difenoconazole
\ No newline at end of file
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl
index a2a78200..5f7e24f2 100644
--- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl
@@ -328,4 +328,12 @@ rule "28: Redact Logos"
Section(matchesImageType("logo"))
then
section.redactImage("logo", 28, "Logo found", "Reg (EC) No 1107/2009 Art. 63 (2g)");
- end
\ No newline at end of file
+ end
+
+
+rule "29: Redact Dossier Redactions"
+ when
+ Section(matchesType("dossier_redactions"))
+ then
+ section.redact("dossier_redactions", 29, "Dossier Redaction found", "Article 39(1)(2) of Regulation (EC) No 178/2002");
+ end
\ No newline at end of file
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/merge_images.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/merge_images.pdf
new file mode 100644
index 00000000..a2decc1a
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/merge_images.pdf differ
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/scanned/VV-377031.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/scanned/VV-377031.pdf
new file mode 100644
index 00000000..0fe661c4
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/scanned/VV-377031.pdf differ
| | | | | | | |