diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java index e4cdd6e4..89c81de5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/model/Page.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.classification.model; +import java.awt.geom.Rectangle2D; import java.util.List; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; @@ -16,6 +17,8 @@ public class Page { @NonNull private List textBlocks; + private List imageBounds; + private Rectangle bodyTextFrame; private boolean landscape; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java index 53b66b25..2e493797 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java @@ -1,6 +1,8 @@ package com.iqser.red.service.redaction.v1.server.parsing; +import java.awt.geom.AffineTransform; import java.awt.geom.Point2D; +import java.awt.geom.Rectangle2D; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -28,10 +30,16 @@ import org.apache.pdfbox.contentstream.operator.state.SetLineWidth; import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent; import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize; import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition; +import org.apache.pdfbox.util.Matrix; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling; @@ -43,6 +51,9 @@ import lombok.extern.slf4j.Slf4j; @Slf4j public class PDFLinesTextStripper extends PDFTextStripper { + @Setter + protected PDPage pdpage; + @Getter private int maxCharWidths; @@ -57,13 +68,18 @@ public class PDFLinesTextStripper extends PDFTextStripper { private final List graphicsPath = new ArrayList<>(); + @Getter + private List imageBounds = new ArrayList<>(); + private float path_x; private float path_y; @Setter private int pageNumber; + public PDFLinesTextStripper() throws IOException { + super(); this.addOperator(new SetStrokingColorSpace()); this.addOperator(new SetNonStrokingColorSpace()); @@ -87,9 +103,9 @@ public class PDFLinesTextStripper extends PDFTextStripper { this.addOperator(new SetLineWidth()); } + @Override - protected void processOperator(Operator operator, List arguments) - throws IOException { + protected void processOperator(Operator operator, List arguments) throws IOException { String operation = operator.getName(); @@ -110,9 +126,11 @@ public class PDFLinesTextStripper extends PDFTextStripper { // The direction of vertical lines must always be from bottom to top for the table extraction algorithm. if (pos.getY() > path_y) { - graphicsPath.add(new Ruling(new Point2D.Float(path_x, path_y), new Point2D.Float((float) pos.getX(), (float) pos.getY()))); + graphicsPath.add(new Ruling(new Point2D.Float(path_x, path_y), new Point2D.Float((float) pos.getX(), (float) pos + .getY()))); } else { - graphicsPath.add(new Ruling(new Point2D.Float(path_x, (float) pos.getY()), new Point2D.Float((float) pos.getX(), path_y))); + graphicsPath.add(new Ruling(new Point2D.Float(path_x, (float) pos.getY()), new Point2D.Float((float) pos + .getX(), path_y))); } path_x = (float) pos.getX(); @@ -133,19 +151,25 @@ public class PDFLinesTextStripper extends PDFTextStripper { Point2D p2 = transformPosition(x + width, y + height); // Horizontal lines - graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY()))); - graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY()))); + graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p2 + .getX(), (float) p1.getY()))); + graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p2 + .getX(), (float) p2.getY()))); // Vertical lines, direction must always be from bottom to top for the table extraction algorithm. if (p2.getY() > p1.getY()) { - graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY()))); + graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p1.getY()), new Point2D.Float((float) p2 + .getX(), (float) p2.getY()))); } else { - graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY()))); + graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p2.getY()), new Point2D.Float((float) p2 + .getX(), (float) p1.getY()))); } if (p2.getY() > p1.getY()) { - graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p1.getX(), (float) p2.getY()))); + graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p1 + .getX(), (float) p2.getY()))); } else { - graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p1.getX(), (float) p1.getY()))); + graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p1 + .getX(), (float) p1.getY()))); } } break; @@ -168,12 +192,80 @@ public class PDFLinesTextStripper extends PDFTextStripper { case OperatorName.ENDPATH: graphicsPath.clear(); break; + + case OperatorName.DRAW_OBJECT: + processImageOperation(arguments); + break; + } super.processOperator(operator, arguments); } + + protected void processImageOperation(List arguments) { + + try { + COSName objectName = (COSName) arguments.get(0); + PDXObject xobject = getResources().getXObject(objectName); + if (xobject instanceof PDImageXObject) { + PDImageXObject pdfImage = (PDImageXObject) xobject; + + Rectangle2D imageBounds = calculateImagePosition(pdfImage); + + Rectangle2D rect = new Rectangle2D.Float((float) imageBounds.getX(), (float) imageBounds.getY(), (float) imageBounds + .getWidth(), (float) imageBounds.getHeight()); + + this.imageBounds.add(rect); + } + } catch (Exception e) { + log.warn("Problem during image extraction: {}", e.getMessage()); + } + } + + + private Rectangle2D calculateImagePosition(PDImageXObject pdfImage) throws IOException { + + Matrix ctm = getGraphicsState().getCurrentTransformationMatrix(); + + Rectangle2D imageBounds = pdfImage.getImage().getRaster().getBounds(); + + AffineTransform imageTransform = new AffineTransform(ctm.createAffineTransform()); + imageTransform.scale(1.0 / pdfImage.getWidth(), -1.0 / pdfImage.getHeight()); + imageTransform.translate(0, -pdfImage.getHeight()); + + AffineTransform pageTransform = createCurrentPageTransformation(); + pageTransform.concatenate(imageTransform); + + return pageTransform.createTransformedShape(imageBounds).getBounds2D(); + } + + + protected AffineTransform createCurrentPageTransformation() { + + PDRectangle cb = pdpage.getCropBox(); + AffineTransform pageTransform = new AffineTransform(); + + switch (pdpage.getRotation()) { + case 90: + pageTransform.translate(cb.getHeight(), 0); + break; + case 180: + pageTransform.translate(cb.getWidth(), cb.getHeight()); + break; + case 270: + pageTransform.translate(0, cb.getWidth()); + break; + } + + pageTransform.rotate(Math.toRadians(pdpage.getRotation())); + + return pageTransform; + } + + private float floatValue(COSBase value) { + if (value instanceof COSNumber) { return ((COSNumber) value).floatValue(); } else { @@ -181,21 +273,31 @@ public class PDFLinesTextStripper extends PDFTextStripper { } } + private Point2D.Float transformPosition(float x, float y) { + return super.transformedPoint(x, y); } + private void addVisibleRulings(List path, boolean stroke) throws IOException { try { - if (stroke && !getGraphicsState().getStrokingColor().isPattern() && getGraphicsState().getStrokingColor().toRGB() == 0 || !stroke && !getGraphicsState().getNonStrokingColor().isPattern() && getGraphicsState().getNonStrokingColor().toRGB() == 0) { + if (stroke && !getGraphicsState().getStrokingColor().isPattern() && getGraphicsState().getStrokingColor() + .toRGB() == 0 || !stroke && !getGraphicsState().getNonStrokingColor() + .isPattern() && getGraphicsState().getNonStrokingColor().toRGB() == 0) { rulings.addAll(path); } } catch (UnsupportedOperationException e) { - log.error("UnsupportedOperationException: " + getGraphicsState().getStrokingColor().getColorSpace().getName() + " or " + getGraphicsState().getNonStrokingColor().getColorSpace().getName() + " does not support toRGB"); + log.error("UnsupportedOperationException: " + getGraphicsState().getStrokingColor() + .getColorSpace() + .getName() + " or " + getGraphicsState().getNonStrokingColor() + .getColorSpace() + .getName() + " does not support toRGB"); } } + @Override public void writeString(String text, List textPositions) throws IOException { @@ -203,16 +305,18 @@ public class PDFLinesTextStripper extends PDFTextStripper { for (int i = 0; i <= textPositions.size() - 1; i++) { int charHeight = (int) textPositions.get(i).getHeightDir(); - if(charHeight > maxCharHeight){ + if (charHeight > maxCharHeight) { maxCharHeight = charHeight; } int charWidth = (int) textPositions.get(i).getWidthDirAdj(); - if(charWidth > maxCharWidths){ + if (charWidth > maxCharWidths) { maxCharWidths = charWidth; } - if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0"))) { + if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i) + .getUnicode() + .equals("\u00A0"))) { startIndex++; continue; } @@ -220,15 +324,21 @@ public class PDFLinesTextStripper extends PDFTextStripper { // Strange but sometimes this is happening, for example: Metolachlor2.pdf if (i > 0 && textPositions.get(i).getX() < textPositions.get(i - 1).getX()) { List sublist = textPositions.subList(startIndex, i); - if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) { + if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0) + .getUnicode() + .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) { textPositionSequences.add(new TextPositionSequence(sublist, pageNumber)); } startIndex = i; } - if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0")) && i <= textPositions.size() - 2) { + if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i) + .getUnicode() + .equals("\u00A0")) && i <= textPositions.size() - 2) { List sublist = textPositions.subList(startIndex, i); - if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) { + if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0) + .getUnicode() + .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) { textPositionSequences.add(new TextPositionSequence(sublist, pageNumber)); } startIndex = i + 1; @@ -236,21 +346,27 @@ public class PDFLinesTextStripper extends PDFTextStripper { } List sublist = textPositions.subList(startIndex, textPositions.size()); - if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1).getUnicode().equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0"))) { + if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1) + .getUnicode() + .equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0"))) { sublist = sublist.subList(0, sublist.size() - 1); } - if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) { + if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0) + .getUnicode() + .equals("\u00A0")))) { textPositionSequences.add(new TextPositionSequence(sublist, pageNumber)); } super.writeString(text); } + @Override public String getText(PDDocument doc) throws IOException { maxCharWidths = 0; maxCharWidths = 0; textPositionSequences.clear(); + imageBounds = new ArrayList<>(); rulings.clear(); graphicsPath.clear(); path_x = 0.0f; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/ParsedElements.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/ParsedElements.java index 28983e69..3a0eede7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/ParsedElements.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/ParsedElements.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.parsing.model; +import java.awt.geom.Rectangle2D; import java.util.List; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling; @@ -13,6 +14,7 @@ public class ParsedElements { private List sequences; private List rulings; + private List imageBounds; private boolean landscape; private boolean rotated; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnnotationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnnotationService.java index cddc37bd..9af17d2f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnnotationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/AnnotationService.java @@ -53,25 +53,24 @@ public class AnnotationService { List logEntries = redactionLogPerPage.get(page); if (logEntries != null && !logEntries.isEmpty()) { - addAnnotations(logEntries, pdPage, page, redactionLog.getRuleSetId()); + addAnnotations(logEntries, pdPage, page); } } } - private void addAnnotations(List logEntries, PDPage pdPage, int page, - String ruleSetId) throws IOException { + private void addAnnotations(List logEntries, PDPage pdPage, int page) throws IOException { List annotations = pdPage.getAnnotations(); for (RedactionLogEntry entry : logEntries) { - annotations.addAll(createAnnotation(entry, page, ruleSetId, pdPage.getMediaBox(), pdPage.getCropBox())); + annotations.addAll(createAnnotation(entry, page, pdPage.getMediaBox(), pdPage.getCropBox())); } } - private List createAnnotation(RedactionLogEntry redactionLogEntry, int page, String ruleSetId, - PDRectangle mediaBox, PDRectangle cropBox) { + private List createAnnotation(RedactionLogEntry redactionLogEntry, int page, PDRectangle mediaBox, + PDRectangle cropBox) { List annotations = new ArrayList<>(); @@ -89,7 +88,7 @@ public class AnnotationService { PDRectangle pdRectangle = toPDRectangle(rectangles, mediaBox, cropBox); annotation.setRectangle(pdRectangle); annotation.setQuadPoints(toQuadPoints(rectangles, mediaBox, cropBox)); - if (!dictionaryService.isHint(redactionLogEntry.getType(), ruleSetId)) { + if (!redactionLogEntry.isHint()) { annotation.setContents(createAnnotationContent(redactionLogEntry)); } annotation.setTitlePopup(redactionLogEntry.getId()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 15a73224..01eea643 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.redaction.service; +import java.awt.geom.Rectangle2D; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -27,6 +28,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.model.Entity; import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell; import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; @@ -37,6 +39,8 @@ import lombok.RequiredArgsConstructor; @RequiredArgsConstructor public class RedactionLogCreatorService { + private static final String IMAGE = "image"; + private final DictionaryService dictionaryService; @@ -56,6 +60,30 @@ public class RedactionLogCreatorService { if (manualRedactionPages.contains(page)) { addManualEntries(classifiedDoc, manualRedactions, page, ruleSetId); } + + if (!classifiedDoc.getPages().get(page - 1).getImageBounds().isEmpty()) { + addImageEntries(classifiedDoc, page, ruleSetId); + } + } + } + + + private void addImageEntries(Document classifiedDoc, int pageNumber, String ruleSetId) { + + for (Rectangle2D imageBounds : classifiedDoc.getPages().get(pageNumber - 1).getImageBounds()) { + RedactionLogEntry redactionLogEntry = RedactionLogEntry.builder() + .id(IdBuilder.buildId(imageBounds, pageNumber)) + .color(getColor(IMAGE, ruleSetId)) + .type(IMAGE) + .redacted(false) + .isHint(true) + .manual(false) + .isDictionaryEntry(false) + .isRecommendation(false) + .positions(List.of(new Rectangle(new Point((float) imageBounds.getX(), (float) imageBounds.getY()), (float) imageBounds + .getWidth(), (float) imageBounds.getHeight(), pageNumber))) + .build(); + classifiedDoc.getRedactionLogEntities().add(redactionLogEntry); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java index 70885daa..ce3c7540 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/utils/IdBuilder.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.redaction.utils; +import java.awt.geom.Rectangle2D; import java.nio.charset.StandardCharsets; import java.util.List; @@ -23,4 +24,16 @@ public class IdBuilder { return hashFunction.hashString(sb.toString(), StandardCharsets.UTF_8).toString(); } + + + public String buildId(Rectangle2D rectangle2D, int page){ + + StringBuilder sb = new StringBuilder(); + sb.append("x").append(rectangle2D.getX()).append("y").append(rectangle2D.getY()).append("h").append(rectangle2D.getHeight()).append("w").append(rectangle2D.getWidth()).append("p").append(page); + + return hashFunction.hashString(sb.toString(), StandardCharsets.UTF_8).toString(); + } + + + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java index 8106d5eb..b19b22b3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java @@ -48,6 +48,7 @@ public class PdfSegmentationService { stripper.setPageNumber(pageNumber); stripper.setStartPage(pageNumber); stripper.setEndPage(pageNumber); + stripper.setPdpage(pdPage); stripper.getText(pdDocument); PDRectangle pdr = pdPage.getMediaBox(); @@ -56,10 +57,10 @@ public class PdfSegmentationService { int rotation = pdPage.getRotation(); boolean isRotated = rotation != 0 && rotation != 360; - ParsedElements parsedElements = ParsedElements.builder() .rulings(stripper.getRulings()) .sequences(stripper.getTextPositionSequences()) + .imageBounds(stripper.getImageBounds()) .maxCharWidth(stripper.getMaxCharWidths()) .maxCharHeight(stripper.getMaxCharWidths()) .landscape(isLandscape) @@ -81,8 +82,10 @@ public class PdfSegmentationService { page.setPageNumber(pageNumber); increaseDocumentStatistics(page, document); + page.setImageBounds(parsedElements.getImageBounds()); pages.add(page); } + document.setPages(pages); classificationService.classifyDocument(document); @@ -90,11 +93,9 @@ public class PdfSegmentationService { sectionsBuilderService.buildSections(document); return document; - } - private void increaseDocumentStatistics(Page page, Document document) { if (!page.isLandscape()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index a8fc0c77..3d1bb993 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -81,6 +81,7 @@ public class RedactionIntegrationTest { private static final String PUBLISHED_INFORMATION = "published_information"; private static final String TEST_METHOD = "test_method"; private static final String PURITY = "purity"; + private static final String IMAGE = "image"; private static final String RECOMMENDATION_AUTHOR = "recommendation_CBI_author"; private static final String RECOMMENDATION_ADDRESS = "recommendation_CBI_address"; @@ -157,6 +158,7 @@ public class RedactionIntegrationTest { when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS)); when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(FALSE_POSITIVE)); when(dictionaryClient.getDictionaryForType(PURITY, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(PURITY)); + when(dictionaryClient.getDictionaryForType(IMAGE, TEST_RULESET_ID)).thenReturn(getDictionaryResponse(IMAGE)); when(dictionaryClient.getColors(TEST_RULESET_ID)).thenReturn(colors); } @@ -238,6 +240,11 @@ public class RedactionIntegrationTest { .stream() .map(this::cleanDictionaryEntry) .collect(Collectors.toSet())); + dictionary.computeIfAbsent(IMAGE, v -> new ArrayList<>()) + .addAll(ResourceLoader.load("dictionaries/image.txt") + .stream() + .map(this::cleanDictionaryEntry) + .collect(Collectors.toSet())); } @@ -264,6 +271,7 @@ public class RedactionIntegrationTest { typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c"); typeColorMap.put(FALSE_POSITIVE, "#ffffff"); typeColorMap.put(PURITY, "#ffe187"); + typeColorMap.put(IMAGE, "#fcc5fb"); hintTypeMap.put(VERTEBRATE, true); hintTypeMap.put(ADDRESS, false); @@ -280,6 +288,7 @@ public class RedactionIntegrationTest { hintTypeMap.put(RECOMMENDATION_ADDRESS, false); hintTypeMap.put(FALSE_POSITIVE, true); hintTypeMap.put(PURITY, false); + hintTypeMap.put(IMAGE, true); caseInSensitiveMap.put(VERTEBRATE, true); caseInSensitiveMap.put(ADDRESS, false); @@ -296,6 +305,7 @@ public class RedactionIntegrationTest { caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false); caseInSensitiveMap.put(FALSE_POSITIVE, false); caseInSensitiveMap.put(PURITY, false); + caseInSensitiveMap.put(IMAGE, true); recommendationTypeMap.put(VERTEBRATE, false); recommendationTypeMap.put(ADDRESS, false); @@ -312,6 +322,8 @@ public class RedactionIntegrationTest { recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true); recommendationTypeMap.put(FALSE_POSITIVE, false); recommendationTypeMap.put(PURITY, false); + recommendationTypeMap.put(IMAGE, false); + rankTypeMap.put(FALSE_POSITIVE, 160); rankTypeMap.put(PURITY, 155); @@ -328,6 +340,8 @@ public class RedactionIntegrationTest { rankTypeMap.put(HINT_ONLY, 50); rankTypeMap.put(RECOMMENDATION_AUTHOR, 40); rankTypeMap.put(RECOMMENDATION_ADDRESS, 30); + rankTypeMap.put(IMAGE, 30); + colors.setDefaultColor("#acfc00"); colors.setNotRedacted("#cccccc"); @@ -427,7 +441,7 @@ public class RedactionIntegrationTest { System.out.println("redactionTest"); long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_08_Volume_3CA_B-6_2018-09-06.pdf"); AnalyzeRequest request = AnalyzeRequest.builder() .ruleSetId(TEST_RULESET_ID) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/image.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/image.txt new file mode 100644 index 00000000..e69de29b