Pull request #134: RED-1260

Merge in RED/redaction-service from RED-1260 to master

* commit 'ae28555bf4c740d9872e26fb27615fbf7402f002':
  RED-1260: First steps for image classification
  Integrate image classification
This commit is contained in:
Dominique Eiflaender 2021-04-09 15:11:49 +02:00
commit 9696a421fc
18 changed files with 300 additions and 191 deletions

View File

@ -43,4 +43,6 @@ public class RedactionLogEntry {
private int startOffset;
private int endOffset;
private boolean isImage;
}

View File

@ -1,8 +1,8 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import java.awt.geom.Rectangle2D;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
@ -17,7 +17,7 @@ public class Page {
@NonNull
private List<AbstractTextContainer> textBlocks;
private List<Rectangle2D> imageBounds;
private List<PdfImage> images;
private Rectangle bodyTextFrame;
@ -31,7 +31,9 @@ public class Page {
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
public boolean isRotated() {
return rotation != 0;
}

View File

@ -0,0 +1,15 @@
package com.iqser.red.service.redaction.v1.server.client;
import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.multipart.MultipartFile;
@FeignClient(name = "ImageClassificationResource", url = "${image-service.url}")
public interface ImageClassificationClient {
@PostMapping(value = "/process_full_img", consumes = MediaType.MULTIPART_FORM_DATA_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
ImageClassificationResponse classify(@RequestBody MultipartFile file);
}

View File

@ -0,0 +1,13 @@
package com.iqser.red.service.redaction.v1.server.client;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class ImageClassificationResponse {
private String category;
}

View File

@ -0,0 +1,102 @@
package com.iqser.red.service.redaction.v1.server.client;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import org.springframework.lang.NonNull;
import org.springframework.lang.Nullable;
import org.springframework.util.Assert;
import org.springframework.util.FileCopyUtils;
import org.springframework.web.multipart.MultipartFile;
public class MockMultipartFile implements MultipartFile {
private final String name;
private final String originalFilename;
@Nullable
private final String contentType;
private final byte[] content;
public MockMultipartFile(String name, @Nullable byte[] content) {
this(name, "", (String) null, (byte[]) content);
}
public MockMultipartFile(String name, InputStream contentStream) throws IOException {
this(name, "", (String) null, (byte[]) FileCopyUtils.copyToByteArray(contentStream));
}
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType,
@Nullable byte[] content) {
Assert.hasLength(name, "Name must not be empty");
this.name = name;
this.originalFilename = originalFilename != null ? originalFilename : "";
this.contentType = contentType;
this.content = content != null ? content : new byte[0];
}
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType,
InputStream contentStream) throws IOException {
this(name, originalFilename, contentType, FileCopyUtils.copyToByteArray(contentStream));
}
public String getName() {
return this.name;
}
@NonNull
public String getOriginalFilename() {
return this.originalFilename;
}
@Nullable
public String getContentType() {
return this.contentType;
}
public boolean isEmpty() {
return this.content.length == 0;
}
public long getSize() {
return (long) this.content.length;
}
public byte[] getBytes() throws IOException {
return this.content;
}
public InputStream getInputStream() throws IOException {
return new ByteArrayInputStream(this.content);
}
public void transferTo(File dest) throws IOException, IllegalStateException {
FileCopyUtils.copy(this.content, dest);
}
}

View File

@ -20,6 +20,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationSer
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ImageClassificationService;
import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
@ -53,6 +54,7 @@ public class RedactionController implements RedactionResource {
private final DictionaryService dictionaryService;
private final AnnotationService annotationService;
private final ReanalyzeService reanalyzeService;
private final ImageClassificationService imageClassificationService;
@Override
@ -66,6 +68,7 @@ public class RedactionController implements RedactionResource {
log.info("Document structure analysis successful, starting redaction analysis...");
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions());
imageClassificationService.classifyImages(classifiedDoc);
redactionLogCreatorService.createRedactionLog(classifiedDoc, pdDocument.getNumberOfPages(), analyzeRequest.getManualRedactions(), analyzeRequest
.getRuleSetId());

View File

@ -41,6 +41,7 @@ import org.apache.pdfbox.text.TextPosition;
import org.apache.pdfbox.util.Matrix;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import lombok.Getter;
@ -58,7 +59,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
@Getter
private int maxCharWidth;
@Getter
private int minCharHeight;
@ -74,7 +75,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
private final List<Ruling> graphicsPath = new ArrayList<>();
@Getter
private List<Rectangle2D> imageBounds = new ArrayList<>();
private List<PdfImage> images = new ArrayList<>();
private float path_x;
private float path_y;
@ -222,7 +223,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
.getWidth(), (float) imageBounds.getHeight());
if (rect.getHeight() > 2 && rect.getWidth() > 2) {
this.imageBounds.add(rect);
this.images.add(new PdfImage(pdfImage.getImage(), rect));
}
}
} catch (Exception e) {
@ -358,7 +359,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
minCharHeight = Integer.MAX_VALUE;
maxCharHeight = 0;
textPositionSequences.clear();
imageBounds = new ArrayList<>();
images = new ArrayList<>();
rulings.clear();
graphicsPath.clear();
path_x = 0.0f;

View File

@ -1,24 +0,0 @@
package com.iqser.red.service.redaction.v1.server.parsing.model;
import java.awt.geom.Rectangle2D;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import lombok.Builder;
import lombok.Data;
@Data
@Builder
public class ParsedElements {
private List<TextPositionSequence> sequences;
private List<Ruling> rulings;
private List<Rectangle2D> imageBounds;
private boolean landscape;
private boolean rotated;
private float minCharWidth;
private float maxCharWidth;
}

View File

@ -0,0 +1,5 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
public enum ImageType {
LOGO, FORMULA, SIGNATURE, OTHER, OCR
}

View File

@ -0,0 +1,24 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
@RequiredArgsConstructor
public class PdfImage {
@NonNull
private BufferedImage image;
@NonNull
private Rectangle2D position;
private ImageType imageType;
}

View File

@ -0,0 +1,62 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import javax.imageio.ImageIO;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationResponse;
import com.iqser.red.service.redaction.v1.server.client.MockMultipartFile;
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class ImageClassificationService {
private final ImageClassificationClient imageClassificationClient;
private final RedactionServiceSettings settings;
public void classifyImages(Document classifiedDoc) {
long start = System.currentTimeMillis();
classifiedDoc.getPages().forEach(page -> {
page.getImages().forEach(image -> {
if(settings.isEnableImageClassification()) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
ImageIO.write(image.getImage(), "png", baos);
ImageClassificationResponse response = imageClassificationClient.classify(new MockMultipartFile("file", "Image.png", "image/png", baos
.toByteArray()));
image.setImageType(ImageType.valueOf(response.getCategory()));
} catch (IOException e) {
log.error("Could not classify image", e);
}
} else {
image.setImageType(ImageType.OTHER);
}
if (image.getImageType().equals(ImageType.OTHER)) {
page.getTextBlocks().forEach(textblock -> {
if (image.getPosition()
.contains(textblock.getMinX(), textblock.getMinY(), textblock.getWidth(), textblock.getHeight())) {
image.setImageType(ImageType.OCR);
}
});
}
});
});
log.info("Image classification took: " + (System.currentTimeMillis() - start));
}
}

View File

@ -248,7 +248,7 @@ public class ReanalyzeService {
Iterator<RedactionLogEntry> itty = renalyzeRequest.getRedactionLog().getRedactionLogEntry().iterator();
while (itty.hasNext()) {
RedactionLogEntry entry = itty.next();
if (sectionsToReanaylse.contains(entry.getSectionNumber()) && !entry.getType().equals("image") || entry.getSectionNumber() == 0 && !entry.getType().equals("image")) {
if (sectionsToReanaylse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry.isImage()) {
itty.remove();
}
}

View File

@ -1,9 +1,9 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@ -30,6 +30,8 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
@ -41,8 +43,6 @@ import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class RedactionLogCreatorService {
private static final String IMAGE = "image";
private final DictionaryService dictionaryService;
@ -56,14 +56,16 @@ public class RedactionLogCreatorService {
addSectionGrid(classifiedDoc, page);
if (classifiedDoc.getEntities().get(page) != null) {
classifiedDoc.getRedactionLogEntities().addAll(addEntries(classifiedDoc.getEntities(), manualRedactions, page, ruleSetId));
classifiedDoc.getRedactionLogEntities()
.addAll(addEntries(classifiedDoc.getEntities(), manualRedactions, page, ruleSetId));
}
if (manualRedactionPages.contains(page)) {
classifiedDoc.getRedactionLogEntities().addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, ruleSetId));
classifiedDoc.getRedactionLogEntities()
.addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, ruleSetId));
}
if (!classifiedDoc.getPages().get(page - 1).getImageBounds().isEmpty()) {
if (!classifiedDoc.getPages().get(page - 1).getImages().isEmpty()) {
addImageEntries(classifiedDoc, page, ruleSetId);
}
}
@ -72,24 +74,41 @@ public class RedactionLogCreatorService {
private void addImageEntries(Document classifiedDoc, int pageNumber, String ruleSetId) {
for (Rectangle2D imageBounds : classifiedDoc.getPages().get(pageNumber - 1).getImageBounds()) {
for (PdfImage image : classifiedDoc.getPages().get(pageNumber - 1).getImages()) {
RedactionLogEntry redactionLogEntry = RedactionLogEntry.builder()
.id(IdBuilder.buildId(imageBounds, pageNumber))
.color(getColor(IMAGE, ruleSetId))
.type(IMAGE)
.redacted(false)
.isHint(true)
.id(IdBuilder.buildId(image.getPosition(), pageNumber))
.color(getColor(image.getImageType().name().toLowerCase(Locale.ROOT), ruleSetId))
.isImage(true)
.type(image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().name().toLowerCase(Locale.ROOT))
.redacted(isImageRedactionType(image.getImageType()))
.isHint(!isImageRedactionType(image.getImageType()))
.manual(false)
.isDictionaryEntry(false)
.isRecommendation(false)
.positions(List.of(new Rectangle(new Point((float) imageBounds.getX(), (float) imageBounds.getY()), (float) imageBounds
.getWidth(), (float) imageBounds.getHeight(), pageNumber)))
.positions(List.of(new Rectangle(new Point((float) image.getPosition()
.getX(), (float) image.getPosition().getY()), (float) image.getPosition()
.getWidth(), (float) image.getPosition().getHeight(), pageNumber)))
.build();
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
}
}
private boolean isImageRedactionType(ImageType imageType) {
if (imageType.equals(ImageType.LOGO)) {
return true;
}
if (imageType.equals(ImageType.FORMULA)) {
return true;
}
if (imageType.equals(ImageType.SIGNATURE)) {
return true;
}
return false;
}
private Set<Integer> getManualRedactionPages(ManualRedactions manualRedactions) {
Set<Integer> manualRedactionPages = new HashSet<>();
@ -107,7 +126,8 @@ public class RedactionLogCreatorService {
}
public List<RedactionLogEntry> addEntries(Map<Integer, List<Entity>> entities, ManualRedactions manualRedactions, int page, String ruleSetId) {
public List<RedactionLogEntry> addEntries(Map<Integer, List<Entity>> entities, ManualRedactions manualRedactions,
int page, String ruleSetId) {
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
@ -238,8 +258,9 @@ public class RedactionLogCreatorService {
}
public List<RedactionLogEntry> addManualAddEntries(Set<ManualRedactionEntry> manualAdds, Map<String, List<Comment>> comments, int page,
String ruleSetId) {
public List<RedactionLogEntry> addManualAddEntries(Set<ManualRedactionEntry> manualAdds,
Map<String, List<Comment>> comments, int page,
String ruleSetId) {
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();

View File

@ -15,7 +15,6 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
import com.iqser.red.service.redaction.v1.server.classification.service.ClassificationService;
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
import com.iqser.red.service.redaction.v1.server.parsing.model.ParsedElements;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
@ -57,19 +56,10 @@ public class PdfSegmentationService {
int rotation = pdPage.getRotation();
boolean isRotated = rotation != 0 && rotation != 360;
ParsedElements parsedElements = ParsedElements.builder()
.rulings(stripper.getRulings())
.sequences(stripper.getTextPositionSequences())
.imageBounds(stripper.getImageBounds())
.minCharWidth(stripper.getMinCharWidth())
.maxCharWidth(stripper.getMaxCharWidth())
.landscape(isLandscape)
.rotated(isRotated)
.build();
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(stripper.getRulings(), stripper.getMinCharWidth(), stripper
.getMaxCharHeight());
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(parsedElements.getRulings(), stripper.getMinCharWidth(), stripper.getMaxCharHeight());
Page page = blockificationService.blockify(parsedElements.getSequences(), cleanRulings.getHorizontal(), cleanRulings
Page page = blockificationService.blockify(stripper.getTextPositionSequences(), cleanRulings.getHorizontal(), cleanRulings
.getVertical());
page.setRotation(rotation);
@ -77,11 +67,11 @@ public class PdfSegmentationService {
buildPageStatistics(page);
page.setLandscape(parsedElements.isLandscape() || parsedElements.isRotated());
page.setLandscape(isLandscape || isRotated);
page.setPageNumber(pageNumber);
increaseDocumentStatistics(page, document);
page.setImageBounds(parsedElements.getImageBounds());
page.setImages(stripper.getImages());
pages.add(page);
}

View File

@ -12,4 +12,6 @@ public class RedactionServiceSettings {
private int surroundingWordsOffsetWindow = 100;
private boolean enableImageClassification = true;
}

View File

@ -2,6 +2,7 @@ info:
description: Redaction Service Server V1
configuration-service.url: "http://configuration-service-v1:8080"
image-service.url: "http://image-service-v1:8080"
server:
port: 8080

View File

@ -83,6 +83,7 @@ import com.iqser.red.service.redaction.v1.model.Status;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
@ -131,6 +132,9 @@ public class RedactionIntegrationTest {
@MockBean
private DictionaryClient dictionaryClient;
@MockBean
private ImageClassificationClient imageClassificationClient;
private final Map<String, List<String>> dictionary = new HashMap<>();
private final Map<String, String> typeColorMap = new HashMap<>();
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
@ -412,7 +416,8 @@ public class RedactionIntegrationTest {
}
private List<DictionaryEntry> toDictionaryEntry(List<String> entries){
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> {
dictionaryEntries.add(new DictionaryEntry(entry, reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L, false));
@ -450,7 +455,6 @@ public class RedactionIntegrationTest {
assertThat(entry.getValue().size()).isEqualTo(1);
});
dictionary.get(AUTHOR).add("Drinking water");
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
@ -498,7 +502,7 @@ public class RedactionIntegrationTest {
System.out.println("redactionTest");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_13_Volume_3CP_A9396G_B-1_2018-09-06.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
AnalyzeRequest request = AnalyzeRequest.builder()
.ruleSetId(TEST_RULESET_ID)
@ -507,6 +511,12 @@ public class RedactionIntegrationTest {
AnalyzeResult result = redactionController.analyze(request);
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
if (entry.isImage()) {
System.out.println("---->" + entry.getType());
}
});
long end = System.currentTimeMillis();
System.out.println("first analysis duration: " + (end - start));
@ -519,7 +529,7 @@ public class RedactionIntegrationTest {
loop:
for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) {
for (SectionText sectionText : result.getText().getSectionTexts()) {
if (redactionLogEntry.getType().equals("image")) {
if (redactionLogEntry.isImage()) {
correctFound++;
continue loop;
}
@ -536,7 +546,6 @@ public class RedactionIntegrationTest {
}
assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size());
dictionary.get(AUTHOR).add("properties");
reanlysisVersions.put("properties", 1L);
@ -575,127 +584,6 @@ public class RedactionIntegrationTest {
}
@Test
@Ignore
public void fillRecanTest() throws IOException {
System.out.println("redactionTest");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/S5.pdf");
AnalyzeRequest request = AnalyzeRequest.builder()
.ruleSetId(TEST_RULESET_ID)
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.build();
AnalyzeResult result = redactionController.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.redactionLog(result.getRedactionLog())
.sectionGrid(result.getSectionGrid())
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) {
fileOutputStream.write(objectMapper.writeValueAsBytes(result.getText()));
}
int correctFound = 0;
loop:
for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) {
for (SectionText sectionText : result.getText().getSectionTexts()) {
if (redactionLogEntry.getType().equals("image")) {
correctFound++;
continue loop;
}
if (redactionLogEntry.getSectionNumber() == sectionText.getSectionNumber()) {
String value = sectionText.getText()
.substring(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset());
if (redactionLogEntry.getValue().equalsIgnoreCase(value)) {
correctFound++;
} else {
throw new RuntimeException("WTF");
}
}
}
}
assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size());
System.out.println("correctFound " + correctFound);
long end = System.currentTimeMillis();
System.out.println("duration: " + (end - start));
System.out.println("numberOfPages: " + result.getNumberOfPages());
SectionArea sectionArea = result.getText().getSectionTexts().get(3).getSectionAreas().get(5);
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(IOUtils.toByteArray(pdfFileResource.getInputStream())))) {
PDPage docPage = pdDocument.getPage(0);
PDFTextStripperByArea textStripper = new PDFTextStripperByArea();
PDRectangle cropBox = docPage.getCropBox();
PDRectangle mediaBox = docPage.getMediaBox();
// if (textPositions.get(0).getRotation() == 90) {
// posXEnd = textPositions.get(0).getYDirAdj() + 2;
// posYInit = getY1();
// posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4;
// } else {
// posXEnd = textPositions.get(textPositions.size() - 1)
// .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1;
// posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2;
// posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1)
// .getYDirAdj() + 2;
// }
Rectangle2D rect = new Rectangle2D.Float(sectionArea.getTopLeft()
.getY(), sectionArea.getTopLeft()
.getX() , sectionArea.getHeight(), sectionArea
.getWidth() + 0.001f);
textStripper.addRegion("region", rect);
textStripper.extractRegions(docPage);
String textForRegion = textStripper.getTextForRegion("region");
System.out.println(textForRegion);
// fill a rectangle
PDPageContentStream contents = new PDPageContentStream (pdDocument, docPage, PDPageContentStream.AppendMode.APPEND, false, false);
contents.setNonStrokingColor (Color.RED);
contents.addRect (sectionArea.getTopLeft().getX(), sectionArea.getTopLeft().getY(), sectionArea.getWidth(), sectionArea.getHeight());
contents.fill ();
contents.close ();
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
pdDocument.save(byteArrayOutputStream);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated2.pdf")) {
fileOutputStream.write(byteArrayOutputStream.toByteArray());
}
}
} catch (Exception e) {
throw new RedactionException(e);
}
}
@Test
public void testTableRedaction() throws IOException {
@ -782,7 +670,6 @@ public class RedactionIntegrationTest {
.status(Status.APPROVED)
.build()));
ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
.redactionLog(result.getRedactionLog())
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
@ -791,7 +678,6 @@ public class RedactionIntegrationTest {
.ruleSetId(TEST_RULESET_ID)
.build());
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
.redactionLog(reanalyzeResult.getRedactionLog())

View File

@ -1,4 +1,5 @@
configuration-service.url: "http://configuration-service-v1:8080"
image-service.url: "http://image-service-v1:8080"
ribbon:
ConnectTimeout: 600000
@ -12,3 +13,6 @@ processing.kafkastreams: false
platform.multi-tenancy:
enabled: false
redaction-service:
enable-image-classification: false