Pull request #141: fix: merge images into one
Merge in RED/redaction-service from classimg3 to master * commit '4d1ad3b5a56915e9e5df9191a535c0e5cd83972c': RED-1351: merge images into one
This commit is contained in:
commit
b83250f161
@ -8,6 +8,7 @@ import com.iqser.red.service.redaction.v1.server.classification.service.Classifi
|
||||
import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.ImageClassificationService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||
@ -22,6 +23,9 @@ import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.awt.Graphics;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
@ -92,7 +96,8 @@ public class PdfSegmentationService {
|
||||
page.setRotation(rotation);
|
||||
page.setLandscape(isLandscape || isRotated);
|
||||
page.setPageNumber(pageNumber);
|
||||
page.setImages(stripper.getImages());
|
||||
List<PdfImage> mergedList = processImages(stripper.getImages());
|
||||
page.setImages(mergedList);
|
||||
|
||||
tableExtractionService.extractTables(cleanRulings, page);
|
||||
buildPageStatistics(page);
|
||||
@ -105,6 +110,7 @@ public class PdfSegmentationService {
|
||||
|
||||
pages.add(page);
|
||||
|
||||
|
||||
}
|
||||
|
||||
document.setPages(pages);
|
||||
@ -142,6 +148,110 @@ public class PdfSegmentationService {
|
||||
return newPDDocument;
|
||||
}
|
||||
|
||||
//merge images, if they are separated during pdf import, return new list of Pdfimages
|
||||
private List<PdfImage> processImages(List<PdfImage> imageList){
|
||||
if (imageList.size() >1) {
|
||||
List<PdfImage> mergedList = new ArrayList<>();
|
||||
int countElementsInList = 0;
|
||||
boolean beginImage = true;
|
||||
|
||||
// a List of Boolean, true = candidate for merging, false = no merging
|
||||
ArrayList<Boolean> candidatesList = getCandidatesList(imageList);
|
||||
|
||||
// loop through list, if there are candidates for merging (true), merge images and add it to mergedList
|
||||
for (int i = 0; i < candidatesList.size(); i++) {
|
||||
if (candidatesList.get(i)) {
|
||||
if (beginImage) {
|
||||
//begin of image, merge two parts of imageList
|
||||
PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1));
|
||||
mergedList.add(mergedImage);
|
||||
countElementsInList++;
|
||||
} else {
|
||||
//middle of an image, merge current piece auf mergedList with image of imageList
|
||||
PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1));
|
||||
mergedList.set(countElementsInList - 1, mergedImage);
|
||||
}
|
||||
beginImage = false;
|
||||
} else {
|
||||
// if the last candidate is false, then both images i and i+1 must be added
|
||||
if (i == candidatesList.size()-1){
|
||||
mergedList.add(imageList.get(i));
|
||||
mergedList.add(imageList.get(i+1));
|
||||
}else {
|
||||
//first image is not splitted, add i to resultlist
|
||||
if (beginImage){
|
||||
mergedList.add(imageList.get(i));
|
||||
countElementsInList++;
|
||||
}else {
|
||||
// i is the end of an image, add begin of new image
|
||||
mergedList.add(imageList.get(i + 1));
|
||||
countElementsInList++;
|
||||
beginImage = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}return mergedList;
|
||||
}else {
|
||||
return imageList;
|
||||
}
|
||||
}
|
||||
|
||||
private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2){
|
||||
PdfImage newPdfImage = null;
|
||||
// diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten
|
||||
double width = image1.getPosition().getWidth();
|
||||
double height1 = image1.getPosition().getHeight();
|
||||
double height2 = image2.getPosition().getHeight();
|
||||
// mit den Werten, die unter Image gespeichert sind, funktioniert es
|
||||
double img1height = image1.getImage().getHeight();
|
||||
double img1width = image1.getImage().getWidth();
|
||||
double img2height = image2.getImage().getHeight();
|
||||
|
||||
BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height+ img2height), BufferedImage.TYPE_INT_RGB);
|
||||
Graphics g = mergedImage.getGraphics();
|
||||
try {
|
||||
g.drawImage(image1.getImage(),0,0,null);
|
||||
g.drawImage(image2.getImage(),0, (int) (img1height),null);
|
||||
|
||||
// set Image, Position and type for merged Image
|
||||
//set position for merged image with values of image1 and the height of both
|
||||
Rectangle2D pos = new Rectangle2D.Float();
|
||||
pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), width,height1+height2);
|
||||
newPdfImage = new PdfImage(mergedImage,pos,0);
|
||||
// Graphics need to be disposed
|
||||
newPdfImage.getImage().flush();
|
||||
newPdfImage.getImage().getGraphics().dispose();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
newPdfImage.setPage(image1.getPage());
|
||||
return newPdfImage;
|
||||
}
|
||||
|
||||
//make a list of true and false, if the image is a candidate for merging
|
||||
private ArrayList<Boolean> getCandidatesList(List<PdfImage> imageList) {
|
||||
ArrayList<Boolean> candidatesList = new ArrayList<>();
|
||||
for (int i = 0; i < imageList.size(); i++) {
|
||||
if (i >= 1) {
|
||||
candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i)));
|
||||
}
|
||||
}return candidatesList;
|
||||
}
|
||||
|
||||
// evaluate if two images are candidates for merging, depending on their coordinates, width and height
|
||||
private boolean isCandidateForMerging(PdfImage image1, PdfImage image2){
|
||||
double x1 = image1.getPosition().getX();
|
||||
double y1 = image1.getPosition().getY();
|
||||
double width1 = image1.getPosition().getWidth();
|
||||
double x2 = image2.getPosition().getX();
|
||||
double y2 = image2.getPosition().getY();
|
||||
double width2 = image2.getPosition().getWidth();
|
||||
double height2 = image2.getPosition().getHeight();
|
||||
//if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates,
|
||||
// then it is the same picture and has to be merged -> return true
|
||||
return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(y1 - y2) && width2 > (height2 / 6);
|
||||
}
|
||||
|
||||
|
||||
private void increaseDocumentStatistics(Page page, Document document) {
|
||||
|
||||
|
||||
@ -482,6 +482,51 @@ public class RedactionIntegrationTest {
|
||||
assertThat(result).isNotNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergedImages() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/merge_images.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
AnalyzeResult result = reanalyzeService.analyze(request);
|
||||
|
||||
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
|
||||
|
||||
redactionLog.getRedactionLogEntry().forEach(entry -> {
|
||||
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
|
||||
});
|
||||
|
||||
duplicates.entrySet().forEach(entry -> {
|
||||
assertThat(entry.getValue().size()).isEqualTo(1);
|
||||
});
|
||||
|
||||
dictionary.get(AUTHOR).add("Drinking water");
|
||||
when(dictionaryClient.getVersion(TEST_RULESET_ID)).thenReturn(1L);
|
||||
|
||||
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
|
||||
.projectId(TEST_PROJECT_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated3.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long rstart = System.currentTimeMillis();
|
||||
reanalyzeService.reanalyze(request);
|
||||
|
||||
long rend = System.currentTimeMillis();
|
||||
System.out.println("reanalysis analysis duration: " + (rend - rstart));
|
||||
|
||||
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
|
||||
@ -68,6 +68,17 @@ public class PdfSegmentationServiceTest {
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergeImages() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/270Rotated.pdf");
|
||||
|
||||
Document document = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
|
||||
assertThat(document.getPages().get(0).getImages().size()).isEqualTo(1);
|
||||
assertThat(document.getPages().get(1).getImages().size()).isEqualTo(0);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void testExtractImages() throws IOException {
|
||||
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user