RED-1260: First steps for image classification
This commit is contained in:
parent
2558b3cab8
commit
ae28555bf4
@ -43,4 +43,6 @@ public class RedactionLogEntry {
|
|||||||
private int startOffset;
|
private int startOffset;
|
||||||
private int endOffset;
|
private int endOffset;
|
||||||
|
|
||||||
|
private boolean isImage;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -6,7 +6,7 @@ import org.springframework.web.bind.annotation.PostMapping;
|
|||||||
import org.springframework.web.bind.annotation.RequestBody;
|
import org.springframework.web.bind.annotation.RequestBody;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
@FeignClient(name = "ImageClassificationResource", url = "http://localhost:8080")
|
@FeignClient(name = "ImageClassificationResource", url = "${image-service.url}")
|
||||||
public interface ImageClassificationClient {
|
public interface ImageClassificationClient {
|
||||||
|
|
||||||
@PostMapping(value = "/process_full_img", consumes = MediaType.MULTIPART_FORM_DATA_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
|
@PostMapping(value = "/process_full_img", consumes = MediaType.MULTIPART_FORM_DATA_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||||
|
|||||||
@ -0,0 +1,102 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.client;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import org.springframework.lang.NonNull;
|
||||||
|
import org.springframework.lang.Nullable;
|
||||||
|
import org.springframework.util.Assert;
|
||||||
|
import org.springframework.util.FileCopyUtils;
|
||||||
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
|
public class MockMultipartFile implements MultipartFile {
|
||||||
|
|
||||||
|
private final String name;
|
||||||
|
private final String originalFilename;
|
||||||
|
@Nullable
|
||||||
|
private final String contentType;
|
||||||
|
private final byte[] content;
|
||||||
|
|
||||||
|
|
||||||
|
public MockMultipartFile(String name, @Nullable byte[] content) {
|
||||||
|
|
||||||
|
this(name, "", (String) null, (byte[]) content);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public MockMultipartFile(String name, InputStream contentStream) throws IOException {
|
||||||
|
|
||||||
|
this(name, "", (String) null, (byte[]) FileCopyUtils.copyToByteArray(contentStream));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType,
|
||||||
|
@Nullable byte[] content) {
|
||||||
|
|
||||||
|
Assert.hasLength(name, "Name must not be empty");
|
||||||
|
this.name = name;
|
||||||
|
this.originalFilename = originalFilename != null ? originalFilename : "";
|
||||||
|
this.contentType = contentType;
|
||||||
|
this.content = content != null ? content : new byte[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType,
|
||||||
|
InputStream contentStream) throws IOException {
|
||||||
|
|
||||||
|
this(name, originalFilename, contentType, FileCopyUtils.copyToByteArray(contentStream));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
|
||||||
|
return this.name;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@NonNull
|
||||||
|
public String getOriginalFilename() {
|
||||||
|
|
||||||
|
return this.originalFilename;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Nullable
|
||||||
|
public String getContentType() {
|
||||||
|
|
||||||
|
return this.contentType;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
|
||||||
|
return this.content.length == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public long getSize() {
|
||||||
|
|
||||||
|
return (long) this.content.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public byte[] getBytes() throws IOException {
|
||||||
|
|
||||||
|
return this.content;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public InputStream getInputStream() throws IOException {
|
||||||
|
|
||||||
|
return new ByteArrayInputStream(this.content);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void transferTo(File dest) throws IOException, IllegalStateException {
|
||||||
|
|
||||||
|
FileCopyUtils.copy(this.content, dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -20,6 +20,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationSer
|
|||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
|
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.service.ImageClassificationService;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
|
import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
|
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
|
||||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||||
@ -53,6 +54,7 @@ public class RedactionController implements RedactionResource {
|
|||||||
private final DictionaryService dictionaryService;
|
private final DictionaryService dictionaryService;
|
||||||
private final AnnotationService annotationService;
|
private final AnnotationService annotationService;
|
||||||
private final ReanalyzeService reanalyzeService;
|
private final ReanalyzeService reanalyzeService;
|
||||||
|
private final ImageClassificationService imageClassificationService;
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -66,6 +68,7 @@ public class RedactionController implements RedactionResource {
|
|||||||
log.info("Document structure analysis successful, starting redaction analysis...");
|
log.info("Document structure analysis successful, starting redaction analysis...");
|
||||||
|
|
||||||
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions());
|
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions());
|
||||||
|
imageClassificationService.classifyImages(classifiedDoc);
|
||||||
redactionLogCreatorService.createRedactionLog(classifiedDoc, pdDocument.getNumberOfPages(), analyzeRequest.getManualRedactions(), analyzeRequest
|
redactionLogCreatorService.createRedactionLog(classifiedDoc, pdDocument.getNumberOfPages(), analyzeRequest.getManualRedactions(), analyzeRequest
|
||||||
.getRuleSetId());
|
.getRuleSetId());
|
||||||
|
|
||||||
|
|||||||
@ -6,13 +6,18 @@ import java.awt.image.BufferedImage;
|
|||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.NonNull;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
|
@RequiredArgsConstructor
|
||||||
public class PdfImage {
|
public class PdfImage {
|
||||||
|
|
||||||
|
@NonNull
|
||||||
private BufferedImage image;
|
private BufferedImage image;
|
||||||
|
@NonNull
|
||||||
private Rectangle2D position;
|
private Rectangle2D position;
|
||||||
private ImageType imageType;
|
private ImageType imageType;
|
||||||
|
|
||||||
|
|||||||
@ -1,22 +1,18 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.UUID;
|
|
||||||
|
|
||||||
import javax.imageio.ImageIO;
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
import org.apache.commons.fileupload.FileItem;
|
|
||||||
import org.apache.commons.fileupload.disk.DiskFileItem;
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
|
||||||
import org.springframework.web.multipart.commons.CommonsMultipartFile;
|
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationResponse;
|
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationResponse;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.client.MockMultipartFile;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
@ -26,28 +22,41 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class ImageClassificationService {
|
public class ImageClassificationService {
|
||||||
|
|
||||||
private ImageClassificationClient imageClassificationClient;
|
private final ImageClassificationClient imageClassificationClient;
|
||||||
private File repository = new File(System.getProperty("java.io.tmpdir"));
|
private final RedactionServiceSettings settings;
|
||||||
|
|
||||||
|
|
||||||
public void classifyImages(Document classifiedDoc) {
|
public void classifyImages(Document classifiedDoc) {
|
||||||
|
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
classifiedDoc.getPages().forEach(page -> {
|
classifiedDoc.getPages().forEach(page -> {
|
||||||
page.getImages().forEach(image -> {
|
page.getImages().forEach(image -> {
|
||||||
|
|
||||||
|
if(settings.isEnableImageClassification()) {
|
||||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
ImageIO.write(image.getImage(), "png", baos);
|
ImageIO.write(image.getImage(), "png", baos);
|
||||||
String fileName = UUID.randomUUID().toString() + ".png";
|
ImageClassificationResponse response = imageClassificationClient.classify(new MockMultipartFile("file", "Image.png", "image/png", baos
|
||||||
FileItem fileItem = new DiskFileItem(fileName, "image/png", true, fileName, 100000000, repository);
|
.toByteArray()));
|
||||||
MultipartFile multipartFile = new CommonsMultipartFile(fileItem);
|
|
||||||
ImageClassificationResponse response = imageClassificationClient.classify(multipartFile);
|
|
||||||
image.setImageType(ImageType.valueOf(response.getCategory()));
|
image.setImageType(ImageType.valueOf(response.getCategory()));
|
||||||
fileItem.delete();
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e.printStackTrace();
|
log.error("Could not classify image", e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
image.setImageType(ImageType.OTHER);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (image.getImageType().equals(ImageType.OTHER)) {
|
||||||
|
page.getTextBlocks().forEach(textblock -> {
|
||||||
|
if (image.getPosition()
|
||||||
|
.contains(textblock.getMinX(), textblock.getMinY(), textblock.getWidth(), textblock.getHeight())) {
|
||||||
|
image.setImageType(ImageType.OCR);
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
log.info("Image classification took: " + (System.currentTimeMillis() - start));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -248,7 +248,7 @@ public class ReanalyzeService {
|
|||||||
Iterator<RedactionLogEntry> itty = renalyzeRequest.getRedactionLog().getRedactionLogEntry().iterator();
|
Iterator<RedactionLogEntry> itty = renalyzeRequest.getRedactionLog().getRedactionLogEntry().iterator();
|
||||||
while (itty.hasNext()) {
|
while (itty.hasNext()) {
|
||||||
RedactionLogEntry entry = itty.next();
|
RedactionLogEntry entry = itty.next();
|
||||||
if (sectionsToReanaylse.contains(entry.getSectionNumber()) && !entry.getType().equals("image") || entry.getSectionNumber() == 0 && !entry.getType().equals("image")) {
|
if (sectionsToReanaylse.contains(entry.getSectionNumber()) && !entry.isImage() || entry.getSectionNumber() == 0 && !entry.isImage()) {
|
||||||
itty.remove();
|
itty.remove();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,9 +1,9 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||||
|
|
||||||
import java.awt.geom.Rectangle2D;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@ -30,6 +30,8 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
|||||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||||
@ -41,8 +43,6 @@ import lombok.RequiredArgsConstructor;
|
|||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class RedactionLogCreatorService {
|
public class RedactionLogCreatorService {
|
||||||
|
|
||||||
private static final String IMAGE = "image";
|
|
||||||
|
|
||||||
private final DictionaryService dictionaryService;
|
private final DictionaryService dictionaryService;
|
||||||
|
|
||||||
|
|
||||||
@ -56,11 +56,13 @@ public class RedactionLogCreatorService {
|
|||||||
addSectionGrid(classifiedDoc, page);
|
addSectionGrid(classifiedDoc, page);
|
||||||
|
|
||||||
if (classifiedDoc.getEntities().get(page) != null) {
|
if (classifiedDoc.getEntities().get(page) != null) {
|
||||||
classifiedDoc.getRedactionLogEntities().addAll(addEntries(classifiedDoc.getEntities(), manualRedactions, page, ruleSetId));
|
classifiedDoc.getRedactionLogEntities()
|
||||||
|
.addAll(addEntries(classifiedDoc.getEntities(), manualRedactions, page, ruleSetId));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (manualRedactionPages.contains(page)) {
|
if (manualRedactionPages.contains(page)) {
|
||||||
classifiedDoc.getRedactionLogEntities().addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, ruleSetId));
|
classifiedDoc.getRedactionLogEntities()
|
||||||
|
.addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, ruleSetId));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!classifiedDoc.getPages().get(page - 1).getImages().isEmpty()) {
|
if (!classifiedDoc.getPages().get(page - 1).getImages().isEmpty()) {
|
||||||
@ -72,24 +74,41 @@ public class RedactionLogCreatorService {
|
|||||||
|
|
||||||
private void addImageEntries(Document classifiedDoc, int pageNumber, String ruleSetId) {
|
private void addImageEntries(Document classifiedDoc, int pageNumber, String ruleSetId) {
|
||||||
|
|
||||||
for (Rectangle2D imageBounds : classifiedDoc.getPages().get(pageNumber - 1).getImageBounds()) {
|
for (PdfImage image : classifiedDoc.getPages().get(pageNumber - 1).getImages()) {
|
||||||
RedactionLogEntry redactionLogEntry = RedactionLogEntry.builder()
|
RedactionLogEntry redactionLogEntry = RedactionLogEntry.builder()
|
||||||
.id(IdBuilder.buildId(imageBounds, pageNumber))
|
.id(IdBuilder.buildId(image.getPosition(), pageNumber))
|
||||||
.color(getColor(IMAGE, ruleSetId))
|
.color(getColor(image.getImageType().name().toLowerCase(Locale.ROOT), ruleSetId))
|
||||||
.type(IMAGE)
|
.isImage(true)
|
||||||
.redacted(false)
|
.type(image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().name().toLowerCase(Locale.ROOT))
|
||||||
.isHint(true)
|
.redacted(isImageRedactionType(image.getImageType()))
|
||||||
|
.isHint(!isImageRedactionType(image.getImageType()))
|
||||||
.manual(false)
|
.manual(false)
|
||||||
.isDictionaryEntry(false)
|
.isDictionaryEntry(false)
|
||||||
.isRecommendation(false)
|
.isRecommendation(false)
|
||||||
.positions(List.of(new Rectangle(new Point((float) imageBounds.getX(), (float) imageBounds.getY()), (float) imageBounds
|
.positions(List.of(new Rectangle(new Point((float) image.getPosition()
|
||||||
.getWidth(), (float) imageBounds.getHeight(), pageNumber)))
|
.getX(), (float) image.getPosition().getY()), (float) image.getPosition()
|
||||||
|
.getWidth(), (float) image.getPosition().getHeight(), pageNumber)))
|
||||||
.build();
|
.build();
|
||||||
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
|
classifiedDoc.getRedactionLogEntities().add(redactionLogEntry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private boolean isImageRedactionType(ImageType imageType) {
|
||||||
|
|
||||||
|
if (imageType.equals(ImageType.LOGO)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (imageType.equals(ImageType.FORMULA)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (imageType.equals(ImageType.SIGNATURE)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private Set<Integer> getManualRedactionPages(ManualRedactions manualRedactions) {
|
private Set<Integer> getManualRedactionPages(ManualRedactions manualRedactions) {
|
||||||
|
|
||||||
Set<Integer> manualRedactionPages = new HashSet<>();
|
Set<Integer> manualRedactionPages = new HashSet<>();
|
||||||
@ -107,7 +126,8 @@ public class RedactionLogCreatorService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<RedactionLogEntry> addEntries(Map<Integer, List<Entity>> entities, ManualRedactions manualRedactions, int page, String ruleSetId) {
|
public List<RedactionLogEntry> addEntries(Map<Integer, List<Entity>> entities, ManualRedactions manualRedactions,
|
||||||
|
int page, String ruleSetId) {
|
||||||
|
|
||||||
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||||
|
|
||||||
@ -238,7 +258,8 @@ public class RedactionLogCreatorService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<RedactionLogEntry> addManualAddEntries(Set<ManualRedactionEntry> manualAdds, Map<String, List<Comment>> comments, int page,
|
public List<RedactionLogEntry> addManualAddEntries(Set<ManualRedactionEntry> manualAdds,
|
||||||
|
Map<String, List<Comment>> comments, int page,
|
||||||
String ruleSetId) {
|
String ruleSetId) {
|
||||||
|
|
||||||
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||||
|
|||||||
@ -12,4 +12,6 @@ public class RedactionServiceSettings {
|
|||||||
|
|
||||||
private int surroundingWordsOffsetWindow = 100;
|
private int surroundingWordsOffsetWindow = 100;
|
||||||
|
|
||||||
|
private boolean enableImageClassification = true;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -2,6 +2,7 @@ info:
|
|||||||
description: Redaction Service Server V1
|
description: Redaction Service Server V1
|
||||||
|
|
||||||
configuration-service.url: "http://configuration-service-v1:8080"
|
configuration-service.url: "http://configuration-service-v1:8080"
|
||||||
|
image-service.url: "http://image-service-v1:8080"
|
||||||
|
|
||||||
server:
|
server:
|
||||||
port: 8080
|
port: 8080
|
||||||
|
|||||||
@ -83,6 +83,7 @@ import com.iqser.red.service.redaction.v1.model.Status;
|
|||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.client.ImageClassificationClient;
|
||||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||||
@ -131,6 +132,9 @@ public class RedactionIntegrationTest {
|
|||||||
@MockBean
|
@MockBean
|
||||||
private DictionaryClient dictionaryClient;
|
private DictionaryClient dictionaryClient;
|
||||||
|
|
||||||
|
@MockBean
|
||||||
|
private ImageClassificationClient imageClassificationClient;
|
||||||
|
|
||||||
private final Map<String, List<String>> dictionary = new HashMap<>();
|
private final Map<String, List<String>> dictionary = new HashMap<>();
|
||||||
private final Map<String, String> typeColorMap = new HashMap<>();
|
private final Map<String, String> typeColorMap = new HashMap<>();
|
||||||
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
|
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
|
||||||
@ -413,6 +417,7 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
|
|
||||||
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
|
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
|
||||||
|
|
||||||
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
|
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
|
||||||
entries.forEach(entry -> {
|
entries.forEach(entry -> {
|
||||||
dictionaryEntries.add(new DictionaryEntry(entry, reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L, false));
|
dictionaryEntries.add(new DictionaryEntry(entry, reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L, false));
|
||||||
@ -450,7 +455,6 @@ public class RedactionIntegrationTest {
|
|||||||
assertThat(entry.getValue().size()).isEqualTo(1);
|
assertThat(entry.getValue().size()).isEqualTo(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
dictionary.get(AUTHOR).add("Drinking water");
|
dictionary.get(AUTHOR).add("Drinking water");
|
||||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
|
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
|
||||||
|
|
||||||
@ -498,7 +502,7 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
System.out.println("redactionTest");
|
System.out.println("redactionTest");
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_13_Volume_3CP_A9396G_B-1_2018-09-06.pdf");
|
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
|
||||||
|
|
||||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
AnalyzeRequest request = AnalyzeRequest.builder()
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
.ruleSetId(TEST_RULESET_ID)
|
||||||
@ -507,6 +511,12 @@ public class RedactionIntegrationTest {
|
|||||||
|
|
||||||
AnalyzeResult result = redactionController.analyze(request);
|
AnalyzeResult result = redactionController.analyze(request);
|
||||||
|
|
||||||
|
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
||||||
|
if (entry.isImage()) {
|
||||||
|
System.out.println("---->" + entry.getType());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
long end = System.currentTimeMillis();
|
long end = System.currentTimeMillis();
|
||||||
|
|
||||||
System.out.println("first analysis duration: " + (end - start));
|
System.out.println("first analysis duration: " + (end - start));
|
||||||
@ -519,7 +529,7 @@ public class RedactionIntegrationTest {
|
|||||||
loop:
|
loop:
|
||||||
for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) {
|
for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) {
|
||||||
for (SectionText sectionText : result.getText().getSectionTexts()) {
|
for (SectionText sectionText : result.getText().getSectionTexts()) {
|
||||||
if (redactionLogEntry.getType().equals("image")) {
|
if (redactionLogEntry.isImage()) {
|
||||||
correctFound++;
|
correctFound++;
|
||||||
continue loop;
|
continue loop;
|
||||||
}
|
}
|
||||||
@ -536,7 +546,6 @@ public class RedactionIntegrationTest {
|
|||||||
}
|
}
|
||||||
assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size());
|
assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size());
|
||||||
|
|
||||||
|
|
||||||
dictionary.get(AUTHOR).add("properties");
|
dictionary.get(AUTHOR).add("properties");
|
||||||
reanlysisVersions.put("properties", 1L);
|
reanlysisVersions.put("properties", 1L);
|
||||||
|
|
||||||
@ -575,127 +584,6 @@ public class RedactionIntegrationTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
@Ignore
|
|
||||||
public void fillRecanTest() throws IOException {
|
|
||||||
|
|
||||||
System.out.println("redactionTest");
|
|
||||||
long start = System.currentTimeMillis();
|
|
||||||
ClassPathResource pdfFileResource = new ClassPathResource("files/S5.pdf");
|
|
||||||
|
|
||||||
AnalyzeRequest request = AnalyzeRequest.builder()
|
|
||||||
.ruleSetId(TEST_RULESET_ID)
|
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
.build();
|
|
||||||
|
|
||||||
AnalyzeResult result = redactionController.analyze(request);
|
|
||||||
|
|
||||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
|
||||||
.redactionLog(result.getRedactionLog())
|
|
||||||
.sectionGrid(result.getSectionGrid())
|
|
||||||
.build());
|
|
||||||
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
|
|
||||||
fileOutputStream.write(annotateResponse.getDocument());
|
|
||||||
}
|
|
||||||
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) {
|
|
||||||
fileOutputStream.write(objectMapper.writeValueAsBytes(result.getText()));
|
|
||||||
}
|
|
||||||
|
|
||||||
int correctFound = 0;
|
|
||||||
loop:
|
|
||||||
for (RedactionLogEntry redactionLogEntry : result.getRedactionLog().getRedactionLogEntry()) {
|
|
||||||
for (SectionText sectionText : result.getText().getSectionTexts()) {
|
|
||||||
if (redactionLogEntry.getType().equals("image")) {
|
|
||||||
correctFound++;
|
|
||||||
continue loop;
|
|
||||||
}
|
|
||||||
if (redactionLogEntry.getSectionNumber() == sectionText.getSectionNumber()) {
|
|
||||||
String value = sectionText.getText()
|
|
||||||
.substring(redactionLogEntry.getStartOffset(), redactionLogEntry.getEndOffset());
|
|
||||||
if (redactionLogEntry.getValue().equalsIgnoreCase(value)) {
|
|
||||||
correctFound++;
|
|
||||||
} else {
|
|
||||||
throw new RuntimeException("WTF");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assertThat(correctFound).isEqualTo(result.getRedactionLog().getRedactionLogEntry().size());
|
|
||||||
|
|
||||||
System.out.println("correctFound " + correctFound);
|
|
||||||
|
|
||||||
long end = System.currentTimeMillis();
|
|
||||||
|
|
||||||
System.out.println("duration: " + (end - start));
|
|
||||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
|
||||||
|
|
||||||
SectionArea sectionArea = result.getText().getSectionTexts().get(3).getSectionAreas().get(5);
|
|
||||||
|
|
||||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(IOUtils.toByteArray(pdfFileResource.getInputStream())))) {
|
|
||||||
|
|
||||||
PDPage docPage = pdDocument.getPage(0);
|
|
||||||
|
|
||||||
PDFTextStripperByArea textStripper = new PDFTextStripperByArea();
|
|
||||||
|
|
||||||
PDRectangle cropBox = docPage.getCropBox();
|
|
||||||
PDRectangle mediaBox = docPage.getMediaBox();
|
|
||||||
|
|
||||||
|
|
||||||
// if (textPositions.get(0).getRotation() == 90) {
|
|
||||||
// posXEnd = textPositions.get(0).getYDirAdj() + 2;
|
|
||||||
// posYInit = getY1();
|
|
||||||
// posYEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() - height + 4;
|
|
||||||
// } else {
|
|
||||||
// posXEnd = textPositions.get(textPositions.size() - 1)
|
|
||||||
// .getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1;
|
|
||||||
// posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() - 2;
|
|
||||||
// posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1)
|
|
||||||
// .getYDirAdj() + 2;
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
Rectangle2D rect = new Rectangle2D.Float(sectionArea.getTopLeft()
|
|
||||||
.getY(), sectionArea.getTopLeft()
|
|
||||||
.getX() , sectionArea.getHeight(), sectionArea
|
|
||||||
.getWidth() + 0.001f);
|
|
||||||
|
|
||||||
textStripper.addRegion("region", rect);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
textStripper.extractRegions(docPage);
|
|
||||||
|
|
||||||
String textForRegion = textStripper.getTextForRegion("region");
|
|
||||||
|
|
||||||
System.out.println(textForRegion);
|
|
||||||
|
|
||||||
// fill a rectangle
|
|
||||||
PDPageContentStream contents = new PDPageContentStream (pdDocument, docPage, PDPageContentStream.AppendMode.APPEND, false, false);
|
|
||||||
contents.setNonStrokingColor (Color.RED);
|
|
||||||
contents.addRect (sectionArea.getTopLeft().getX(), sectionArea.getTopLeft().getY(), sectionArea.getWidth(), sectionArea.getHeight());
|
|
||||||
contents.fill ();
|
|
||||||
contents.close ();
|
|
||||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
|
||||||
pdDocument.save(byteArrayOutputStream);
|
|
||||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated2.pdf")) {
|
|
||||||
fileOutputStream.write(byteArrayOutputStream.toByteArray());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new RedactionException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTableRedaction() throws IOException {
|
public void testTableRedaction() throws IOException {
|
||||||
|
|
||||||
@ -782,7 +670,6 @@ public class RedactionIntegrationTest {
|
|||||||
.status(Status.APPROVED)
|
.status(Status.APPROVED)
|
||||||
.build()));
|
.build()));
|
||||||
|
|
||||||
|
|
||||||
ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
|
ReanalyzeResult reanalyzeResult = redactionController.reanalyze(RenalyzeRequest.builder()
|
||||||
.redactionLog(result.getRedactionLog())
|
.redactionLog(result.getRedactionLog())
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||||
@ -791,7 +678,6 @@ public class RedactionIntegrationTest {
|
|||||||
.ruleSetId(TEST_RULESET_ID)
|
.ruleSetId(TEST_RULESET_ID)
|
||||||
.build());
|
.build());
|
||||||
|
|
||||||
|
|
||||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||||
.redactionLog(reanalyzeResult.getRedactionLog())
|
.redactionLog(reanalyzeResult.getRedactionLog())
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
configuration-service.url: "http://configuration-service-v1:8080"
|
configuration-service.url: "http://configuration-service-v1:8080"
|
||||||
|
image-service.url: "http://image-service-v1:8080"
|
||||||
|
|
||||||
ribbon:
|
ribbon:
|
||||||
ConnectTimeout: 600000
|
ConnectTimeout: 600000
|
||||||
@ -12,3 +13,6 @@ processing.kafkastreams: false
|
|||||||
|
|
||||||
platform.multi-tenancy:
|
platform.multi-tenancy:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
||||||
|
redaction-service:
|
||||||
|
enable-image-classification: false
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user