diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java index abe13409..3a089db4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/classification/service/BlockificationService.java @@ -1,5 +1,7 @@ package com.iqser.red.service.redaction.v1.server.classification.service; +import static java.util.stream.Collectors.toSet; + import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter; import com.iqser.red.service.redaction.v1.server.classification.model.Orientation; import com.iqser.red.service.redaction.v1.server.classification.model.Page; @@ -16,6 +18,7 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table; import org.springframework.stereotype.Service; import java.util.ArrayList; +import java.util.Comparator; import java.util.Iterator; import java.util.List; @@ -25,9 +28,12 @@ public class BlockificationService { static final float THRESHOLD = 1f; + public Page blockify(List textPositions, List horizontalRulingLines, List verticalRulingLines) { + sortRotatedSequences(textPositions); + List chunkWords = new ArrayList<>(); List chunkBlockList1 = new ArrayList<>(); @@ -50,7 +56,7 @@ public class BlockificationService { if (prev != null && (lineSeparation || startFromTop || splitByX || newLineAfterSplit || splittedByRuling)) { Orientation prevOrientation = null; - if(!chunkBlockList1.isEmpty()) { + if (!chunkBlockList1.isEmpty()) { prevOrientation = chunkBlockList1.get(chunkBlockList1.size() - 1).getOrientation(); } @@ -62,15 +68,11 @@ public class BlockificationService { wasSplitted = true; cb1.setOrientation(Orientation.LEFT); splitX1 = word.getX1(); - } else - - if (newLineAfterSplit && !splittedByRuling) { + } else if (newLineAfterSplit && !splittedByRuling) { wasSplitted = false; cb1.setOrientation(Orientation.RIGHT); splitX1 = null; - } else - - if(prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !splittedByRuling)){ + } else if (prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !splittedByRuling)) { cb1.setOrientation(Orientation.LEFT); } @@ -110,16 +112,18 @@ public class BlockificationService { while (itty.hasNext()) { TextBlock block = (TextBlock) itty.next(); - if(previousLeft != null && block.getOrientation().equals(Orientation.LEFT)){ - if (previousLeft.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousLeft.getMinY()){ + if (previousLeft != null && block.getOrientation().equals(Orientation.LEFT)) { + if (previousLeft.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousLeft + .getMinY()) { previousLeft.add(block); itty.remove(); continue; } } - if(previousRight != null && block.getOrientation().equals(Orientation.RIGHT)){ - if (previousRight.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousRight.getMinY()){ + if (previousRight != null && block.getOrientation().equals(Orientation.RIGHT)) { + if (previousRight.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousRight + .getMinY()) { previousRight.add(block); itty.remove(); continue; @@ -133,16 +137,16 @@ public class BlockificationService { } } - itty = chunkBlockList1.iterator(); TextBlock previous = null; while (itty.hasNext()) { TextBlock block = (TextBlock) itty.next(); - if(previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.LEFT) && equalsWithThreshold(block.getMaxY(), previous - .getMaxY())|| - previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.RIGHT) && equalsWithThreshold(block.getMaxY(), previous - .getMaxY())){ + if (previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation() + .equals(Orientation.LEFT) && equalsWithThreshold(block.getMaxY(), previous.getMaxY()) || previous != null && previous + .getOrientation() + .equals(Orientation.LEFT) && block.getOrientation() + .equals(Orientation.RIGHT) && equalsWithThreshold(block.getMaxY(), previous.getMaxY())) { previous.add(block); itty.remove(); continue; @@ -151,11 +155,12 @@ public class BlockificationService { previous = block; } - return new Page(chunkBlockList1); } - private boolean equalsWithThreshold(float f1, float f2){ + + private boolean equalsWithThreshold(float f1, float f2) { + return Math.abs(f1 - f2) < THRESHOLD; } @@ -197,6 +202,13 @@ public class BlockificationService { textBlock.setHighestFontSize(fontSizeFrequencyCounter.getHighest()); } + if (textBlock != null && textBlock.getSequences() != null && textBlock.getSequences() + .stream() + .map(t -> round(t.getY1(), 3)) + .collect(toSet()) + .size() == 1) { + textBlock.getSequences().sort(Comparator.comparing(TextPositionSequence::getX1)); + } return textBlock; } @@ -291,4 +303,30 @@ public class BlockificationService { return new Rectangle(minY, minX, maxX - minX, maxY - minY); } + + private void sortRotatedSequences(List sequences) { + + List rotatedWords = new ArrayList<>(); + Iterator itty = sequences.iterator(); + while (itty.hasNext()) { + var pos = itty.next(); + if (pos.getTextPositions().get(0).getDir() == 270) { + rotatedWords.add(pos); + itty.remove(); + } + } + + if (!rotatedWords.isEmpty() && !sequences.isEmpty()) { + rotatedWords.sort(Comparator.comparing(TextPositionSequence::getX1)); + } + sequences.addAll(rotatedWords); + } + + + private double round(float value, int decimalPoints) { + + var d = Math.pow(10, decimalPoints); + return Math.round(value * d) / d; + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java index d3f6f2a0..95fdad6a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/PDFLinesTextStripper.java @@ -1,16 +1,33 @@ package com.iqser.red.service.redaction.v1.server.parsing; -import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; -import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; -import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling; -import lombok.Getter; -import lombok.Setter; -import lombok.extern.slf4j.Slf4j; +import java.awt.geom.Point2D; +import java.awt.geom.Rectangle2D; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + import org.apache.commons.lang3.reflect.FieldUtils; import org.apache.pdfbox.contentstream.operator.Operator; import org.apache.pdfbox.contentstream.operator.OperatorName; -import org.apache.pdfbox.contentstream.operator.color.*; -import org.apache.pdfbox.contentstream.operator.state.*; +import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor; +import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN; +import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace; +import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor; +import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor; +import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor; +import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor; +import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN; +import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace; +import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor; +import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor; +import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor; +import org.apache.pdfbox.contentstream.operator.state.SetFlatness; +import org.apache.pdfbox.contentstream.operator.state.SetLineCapStyle; +import org.apache.pdfbox.contentstream.operator.state.SetLineDashPattern; +import org.apache.pdfbox.contentstream.operator.state.SetLineJoinStyle; +import org.apache.pdfbox.contentstream.operator.state.SetLineMiterLimit; +import org.apache.pdfbox.contentstream.operator.state.SetLineWidth; +import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent; import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSName; @@ -23,12 +40,14 @@ import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition; import org.apache.pdfbox.util.Matrix; -import java.awt.geom.AffineTransform; -import java.awt.geom.Point2D; -import java.awt.geom.Rectangle2D; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition; +import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; +import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; +import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling; + +import lombok.Getter; +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; @Slf4j public class PDFLinesTextStripper extends PDFTextStripper { @@ -189,16 +208,19 @@ public class PDFLinesTextStripper extends PDFTextStripper { COSName objectName = (COSName) arguments.get(0); PDXObject xobject = getResources().getXObject(objectName); if (xobject instanceof PDImageXObject) { - PDImageXObject image = (PDImageXObject)xobject; + PDImageXObject image = (PDImageXObject) xobject; Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix(); - Rectangle2D rect = new Rectangle2D.Float(ctmNew.getTranslateX(), ctmNew.getTranslateY(), ctmNew.getScaleX(), ctmNew.getScaleY()); + Rectangle2D rect = new Rectangle2D.Float(ctmNew.getTranslateX(), ctmNew.getTranslateY(), ctmNew.getScaleX(), ctmNew + .getScaleY()); // Memory Hack - sofReference kills me FieldUtils.writeField(image, "cachedImageSubsampling", -1, true); if (rect.getHeight() > 2 && rect.getWidth() > 2) { - this.images.add(new PdfImage(image.getImage(), rect, pageNumber, image.getImage().getColorModel().hasAlpha())); + this.images.add(new PdfImage(image.getImage(), rect, pageNumber, image.getImage() + .getColorModel() + .hasAlpha())); } } } catch (Exception e) { @@ -207,8 +229,6 @@ public class PDFLinesTextStripper extends PDFTextStripper { } - - private float floatValue(COSBase value) { if (value instanceof COSNumber) { @@ -247,8 +267,16 @@ public class PDFLinesTextStripper extends PDFTextStripper { public void writeString(String text, List textPositions) throws IOException { int startIndex = 0; + RedTextPosition previous = null; + for (int i = 0; i <= textPositions.size() - 1; i++) { + if (!textPositionSequences.isEmpty()) { + previous = textPositionSequences.get(textPositionSequences.size() - 1) + .getTextPositions() + .get(textPositionSequences.get(textPositionSequences.size() - 1).getTextPositions().size() - 1); + } + int charWidth = (int) textPositions.get(i).getWidthDirAdj(); if (charWidth < minCharWidth) { minCharWidth = charWidth; @@ -267,42 +295,54 @@ public class PDFLinesTextStripper extends PDFTextStripper { if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i) .getUnicode() - .equals("\u00A0"))) { + .equals("\u00A0") || textPositions.get(i) + .getUnicode() + .equals("\t"))) { startIndex++; continue; } // Strange but sometimes this is happening, for example: Metolachlor2.pdf - if (i > 0 && textPositions.get(i).getX() < textPositions.get(i - 1).getX()) { + if (i > 0 && textPositions.get(i).getXDirAdj() < textPositions.get(i - 1).getXDirAdj()) { List sublist = textPositions.subList(startIndex, i); if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0) .getUnicode() - .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) { + .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) { textPositionSequences.add(new TextPositionSequence(sublist, pageNumber)); } startIndex = i; } - - if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) { + if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i) + .getX() > textPositions.get(i - 1).getEndX() + 1) { List sublist = textPositions.subList(startIndex, i); if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0) .getUnicode() - .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) { + .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) { textPositionSequences.add(new TextPositionSequence(sublist, pageNumber)); } startIndex = i; } - if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i) .getUnicode() - .equals("\u00A0")) && i <= textPositions.size() - 2) { + .equals("\u00A0") || textPositions.get(i) + .getUnicode() + .equals("\t")) && i <= textPositions.size() - 2) { List sublist = textPositions.subList(startIndex, i); if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0) .getUnicode() - .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) { - textPositionSequences.add(new TextPositionSequence(sublist, pageNumber)); + .equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) { + + // Remove false sequence ends (whitespaces) + if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0) + .getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) { + for (TextPosition t : sublist) { + textPositionSequences.get(textPositionSequences.size() - 1).add(t); + } + } else { + textPositionSequences.add(new TextPositionSequence(sublist, pageNumber)); + } } startIndex = i + 1; } @@ -311,13 +351,23 @@ public class PDFLinesTextStripper extends PDFTextStripper { List sublist = textPositions.subList(startIndex, textPositions.size()); if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1) .getUnicode() - .equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0"))) { + .equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0") || sublist.get(sublist.size() - 1).getUnicode().equals("\t"))) { sublist = sublist.subList(0, sublist.size() - 1); } + if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0) .getUnicode() - .equals("\u00A0")))) { - textPositionSequences.add(new TextPositionSequence(sublist, pageNumber)); + .equals("\u00A0") || sublist.get(0) + .getUnicode() + .equals("\t")))) { + if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0) + .getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) { + for (TextPosition t : sublist) { + textPositionSequences.get(textPositionSequences.size() - 1).add(t); + } + } else { + textPositionSequences.add(new TextPositionSequence(sublist, pageNumber)); + } } super.writeString(text); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/RedTextPosition.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/RedTextPosition.java index a0a9375b..f9d9b9e2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/RedTextPosition.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/RedTextPosition.java @@ -22,6 +22,7 @@ public class RedTextPosition { private float width; private float heightDir; private float widthDirAdj; + private float dir; // not used in reanalysis @JsonIgnore diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java index bf13f391..dd9d1026 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/parsing/model/TextPositionSequence.java @@ -138,6 +138,13 @@ public class TextPositionSequence implements CharSequence { } + @JsonIgnore + public float getRotationAdjustedX() { + + return textPositions.get(0).getXDirAdj(); + } + + @JsonIgnore public float getY1() { @@ -235,7 +242,7 @@ public class TextPositionSequence implements CharSequence { float posYInit; float posYEnd; - if (textPositions.get(0).getRotation() == 90) { + if (textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() != 0.0f) { posXEnd = textPositions.get(0).getYDirAdj() + 2; posYInit = getY1(); posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4; @@ -246,17 +253,24 @@ public class TextPositionSequence implements CharSequence { posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1) .getYDirAdj() + height; - } else if(textPositions.get(0).getRotation() == 0 && textPositions.stream().map(t -> t.getY()).collect(toSet()).size() > 1) { + } else if(textPositions.get(0).getRotation() == 0 && textPositions.get(0).getDir() == 270f) { posYInit = textPositions.get(0).getPageHeight() - getX1(); posYEnd = textPositions.get(0).getPageHeight() - getX2() - textPositions.get(0) .getWidthDirAdj() - textPositions.get(textPositions.size() - 1).getWidthDirAdj() - 3; posXInit = textPositions.get(0).getPageWidth() - textPositions.get(0).getYDirAdj() - 2; posXEnd = textPositions.get(0).getPageWidth() - textPositions.get(textPositions.size() - 1) .getYDirAdj() + height; + } else if(textPositions.get(0).getRotation() == 90 && textPositions.get(0).getDir() == 0.0f){ + posXInit = textPositions.get(textPositions.size() - 1) + .getXDirAdj() + textPositions.get(textPositions.size() - 1).getHeightDir(); + posXEnd = textPositions.get(0).getXDirAdj(); + posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; + posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1) + .getYDirAdj() + 2; } else { posXEnd = textPositions.get(textPositions.size() - 1) - .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1; + .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + 1; posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2; posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1) .getYDirAdj() + 2; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 2a340bcd..7754cf8b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -133,7 +133,7 @@ public class RedactionLogCreatorService { int startIndex = 0; for (int i = 1; i < textPositions.size(); i++) { float yDirAdj = textPositions.get(i).getYDirAdj(); - if (yDirAdj != y) { + if (round(yDirAdj,3) != round(y, 3)) { rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, i), page) .getRectangle()); y = yDirAdj; @@ -149,6 +149,11 @@ public class RedactionLogCreatorService { return rectangles; } + private double round(float value, int decimalPoints) { + var d = Math.pow(10, decimalPoints); + return Math.round(value * d) / d; + } + private RedactionLogEntry createRedactionLogEntry(Entity entity, String dossierTemplateId) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 2190b031..e7ade382 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -653,7 +653,7 @@ public class RedactionIntegrationTest { public void redactionTest() throws IOException { long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/new/VV-919901.pdf"); AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); request.setExcludedPages(Set.of(1)); @@ -886,7 +886,7 @@ public class RedactionIntegrationTest { public void classificationTest() throws IOException { System.out.println("classificationTest"); - ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf"); AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); @@ -908,7 +908,7 @@ public class RedactionIntegrationTest { public void sectionsTest() throws IOException { System.out.println("sectionsTest"); - ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf"); AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); @@ -930,7 +930,7 @@ public class RedactionIntegrationTest { public void htmlTablesTest() throws IOException { System.out.println("htmlTablesTest"); - ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf"); AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); @@ -1195,10 +1195,10 @@ public class RedactionIntegrationTest { private static String getTemporaryDirectory() { - String tmpdir = System.getProperty("java.io.tmpdir"); - if (StringUtils.isNotBlank(tmpdir)) { - return tmpdir; - } +// String tmpdir = System.getProperty("java.io.tmpdir"); +// if (StringUtils.isNotBlank(tmpdir)) { +// return tmpdir; +// } return "/tmp"; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/PII.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/PII.txt index e69de29b..6c81517d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/PII.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/PII.txt @@ -0,0 +1,5 @@ +Amendment 1 +Report Number: 33168 +Page +Report Number: BFI0714 +Tesh Consultants International diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/S11.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/S11.pdf deleted file mode 100644 index 42d37883..00000000 Binary files a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/S11.pdf and /dev/null differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/S16.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/S16.pdf deleted file mode 100644 index 013951cc..00000000 Binary files a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/S16.pdf and /dev/null differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/270 rotated text on non rotated pages.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/270 rotated text on non rotated pages.pdf new file mode 100644 index 00000000..1ccf7d41 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/270 rotated text on non rotated pages.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/VV-919901.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/VV-919901.pdf new file mode 100644 index 00000000..48345622 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/VV-919901.pdf differ