Pull request #171: RED-1472: Fixed image merging in rotated pages
Merge in RED/redaction-service from RED-1472 to master * commit 'b8dc0e448d9101db8099aaf638ac929a16c87c3a': RED-1472: Fixed image merging in rotated pages
This commit is contained in:
commit
925b2a274c
@ -0,0 +1,165 @@
|
||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
|
||||
import java.awt.Graphics;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ImageMergeService {
|
||||
|
||||
|
||||
public List<PdfImage> mergeImages(List<PdfImage> images, int rotation){
|
||||
|
||||
List<PdfImage> mergedList = processImages(images, rotation);
|
||||
|
||||
List<PdfImage> imagesInImage = new ArrayList<>();
|
||||
for(PdfImage image: mergedList){
|
||||
for (PdfImage inner: mergedList){
|
||||
if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){
|
||||
imagesInImage.add(inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
mergedList.removeAll(imagesInImage);
|
||||
|
||||
return mergedList;
|
||||
}
|
||||
|
||||
|
||||
//merge images, if they are separated during pdf import, return new list of Pdfimages
|
||||
private List<PdfImage> processImages(List<PdfImage> imageList, int rotation) {
|
||||
if (imageList.size() > 1) {
|
||||
List<PdfImage> mergedList = new ArrayList<>();
|
||||
int countElementsInList = 0;
|
||||
boolean beginImage = true;
|
||||
|
||||
// a List of Boolean, true = candidate for merging, false = no merging
|
||||
List<Boolean> candidatesList = getCandidatesList(imageList, rotation);
|
||||
|
||||
// loop through list, if there are candidates for merging (true), merge images and add it to mergedList
|
||||
for (int i = 0; i < candidatesList.size(); i++) {
|
||||
if (candidatesList.get(i)) {
|
||||
if (beginImage) {
|
||||
//begin of image, merge two parts of imageList
|
||||
PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1), rotation);
|
||||
// image merge successful
|
||||
if (mergedImage != null) {
|
||||
mergedList.add(mergedImage);
|
||||
countElementsInList++;
|
||||
}
|
||||
} else {
|
||||
//middle of an image, merge current piece auf mergedList with image of imageList
|
||||
PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1), rotation);
|
||||
// image merge successful
|
||||
if (mergedImage != null) {
|
||||
mergedList.set(countElementsInList - 1, mergedImage);
|
||||
}
|
||||
}
|
||||
beginImage = false;
|
||||
} else {
|
||||
// if the last candidate is false, then both images i and i+1 must be added
|
||||
if (i == candidatesList.size() - 1) {
|
||||
if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) {
|
||||
mergedList.add(imageList.get(i + 1));
|
||||
} else {
|
||||
mergedList.add(imageList.get(i));
|
||||
mergedList.add(imageList.get(i + 1));
|
||||
}
|
||||
} else {
|
||||
//first image is not splitted, add i to resultlist
|
||||
if (beginImage) {
|
||||
mergedList.add(imageList.get(i));
|
||||
countElementsInList++;
|
||||
} else {
|
||||
// i is the end of an image, add begin of new image
|
||||
mergedList.add(imageList.get(i + 1));
|
||||
countElementsInList++;
|
||||
beginImage = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return mergedList;
|
||||
} else {
|
||||
return imageList;
|
||||
}
|
||||
}
|
||||
|
||||
private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2, int rotation) {
|
||||
|
||||
// diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten
|
||||
double width = image1.getPosition().getWidth();
|
||||
double width2 = image2.getPosition().getWidth();
|
||||
double height1 = image1.getPosition().getHeight();
|
||||
double height2 = image2.getPosition().getHeight();
|
||||
// mit den Werten, die unter Image gespeichert sind, funktioniert es
|
||||
double img1height = image1.getImage().getHeight();
|
||||
double img1width = image1.getImage().getWidth();
|
||||
double img2height = image2.getImage().getHeight();
|
||||
|
||||
BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB);
|
||||
Graphics mergedImageGraphics = mergedImage.getGraphics();
|
||||
try {
|
||||
mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null);
|
||||
mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null);
|
||||
|
||||
// set Image, Position and type for merged Image
|
||||
//set position for merged image with values of image1 and the height of both
|
||||
Rectangle2D pos = new Rectangle2D.Float();
|
||||
pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), rotation == 90 ? width + width2: width, rotation == 90 ? height1 : height1 + height2);
|
||||
PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage());
|
||||
// Graphics need to be disposed
|
||||
|
||||
image1.getImage().flush();
|
||||
image2.getImage().flush();
|
||||
|
||||
mergedImage.flush();
|
||||
mergedImageGraphics.dispose();
|
||||
|
||||
return newPdfImage;
|
||||
} catch (Exception e) {
|
||||
// failed to merge image
|
||||
log.error("Failed to merge image", e);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
//make a list of true and false, if the image is a candidate for merging
|
||||
private List<Boolean> getCandidatesList(List<PdfImage> imageList, int rotation) {
|
||||
List<Boolean> candidatesList = new ArrayList<>();
|
||||
for (int i = 0; i < imageList.size(); i++) {
|
||||
if (i >= 1) {
|
||||
candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i), rotation));
|
||||
}
|
||||
}
|
||||
return candidatesList;
|
||||
}
|
||||
|
||||
// evaluate if two images are candidates for merging, depending on their coordinates, width and height
|
||||
private boolean isCandidateForMerging(PdfImage image1, PdfImage image2, int rotation) {
|
||||
double x1 = rotation == 90 ? image1.getPosition().getY() : image1.getPosition().getX();
|
||||
double y1 = rotation == 90 ? image1.getPosition().getX() : image1.getPosition().getY();
|
||||
double width1 = rotation == 90 ? image1.getPosition().getHeight() : image1.getPosition().getWidth();
|
||||
double x2 = rotation == 90 ? image2.getPosition().getY() : image2.getPosition().getX();
|
||||
double y2 = rotation == 90 ? image2.getPosition().getX() : image2.getPosition().getY();
|
||||
double width2 = rotation == 90 ? image2.getPosition().getHeight() : image2.getPosition().getWidth();
|
||||
double height2 = rotation == 90 ? image2.getPosition().getWidth() : image2.getPosition().getHeight();
|
||||
//if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates,
|
||||
// then it is the same picture and has to be merged -> return true
|
||||
return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(rotation == 90 ? y2 - y1 : y1 - y2) && width2 > (height2 / 6);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,6 +1,19 @@
|
||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Rectangle;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.pdfbox.io.MemoryUsageSetting;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
@ -15,24 +28,9 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractT
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.pdfbox.io.MemoryUsageSetting;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.awt.Graphics;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@ -47,13 +45,17 @@ public class PdfSegmentationService {
|
||||
private final ClassificationService classificationService;
|
||||
private final SectionsBuilderService sectionsBuilderService;
|
||||
private final ImageClassificationService imageClassificationService;
|
||||
private final ImageMergeService imageMergeService;
|
||||
|
||||
|
||||
public Document parseDocument(InputStream documentInputStream) throws IOException {
|
||||
|
||||
return parseDocument(documentInputStream, false);
|
||||
}
|
||||
|
||||
|
||||
public Document parseDocument(InputStream documentInputStream, boolean ignoreImages) throws IOException {
|
||||
|
||||
PDDocument pdDocument = null;
|
||||
try {
|
||||
//create tempFile
|
||||
@ -64,7 +66,6 @@ public class PdfSegmentationService {
|
||||
Document document = new Document();
|
||||
List<Page> pages = new ArrayList<>();
|
||||
|
||||
|
||||
pdDocument = reinitializePDDocument(tempFile, null);
|
||||
long pageCount = pdDocument.getNumberOfPages();
|
||||
|
||||
@ -101,32 +102,19 @@ public class PdfSegmentationService {
|
||||
page.setRotation(rotation);
|
||||
page.setLandscape(isLandscape || isRotated);
|
||||
page.setPageNumber(pageNumber);
|
||||
List<PdfImage> mergedList = processImages(stripper.getImages());
|
||||
|
||||
List<PdfImage> imagesInImage = new ArrayList<>();
|
||||
for(PdfImage image: mergedList){
|
||||
for (PdfImage inner: mergedList){
|
||||
if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){
|
||||
imagesInImage.add(inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
mergedList.removeAll(imagesInImage);
|
||||
|
||||
List<PdfImage> mergedList = imageMergeService.mergeImages(stripper.getImages(), rotation);
|
||||
page.setImages(mergedList);
|
||||
|
||||
tableExtractionService.extractTables(cleanRulings, page);
|
||||
buildPageStatistics(page);
|
||||
increaseDocumentStatistics(page, document);
|
||||
|
||||
|
||||
if (!ignoreImages) {
|
||||
imageClassificationService.classifyImages(page);
|
||||
}
|
||||
|
||||
pages.add(page);
|
||||
|
||||
|
||||
}
|
||||
|
||||
document.setPages(pages);
|
||||
@ -149,7 +137,9 @@ public class PdfSegmentationService {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private PDDocument reinitializePDDocument(File tempFile, PDDocument pdDocument) throws IOException {
|
||||
|
||||
if (pdDocument != null) {
|
||||
pdDocument.close();
|
||||
}
|
||||
@ -164,130 +154,6 @@ public class PdfSegmentationService {
|
||||
return newPDDocument;
|
||||
}
|
||||
|
||||
//merge images, if they are separated during pdf import, return new list of Pdfimages
|
||||
private List<PdfImage> processImages(List<PdfImage> imageList) {
|
||||
if (imageList.size() > 1) {
|
||||
List<PdfImage> mergedList = new ArrayList<>();
|
||||
int countElementsInList = 0;
|
||||
boolean beginImage = true;
|
||||
|
||||
// a List of Boolean, true = candidate for merging, false = no merging
|
||||
List<Boolean> candidatesList = getCandidatesList(imageList);
|
||||
|
||||
// loop through list, if there are candidates for merging (true), merge images and add it to mergedList
|
||||
for (int i = 0; i < candidatesList.size(); i++) {
|
||||
if (candidatesList.get(i)) {
|
||||
if (beginImage) {
|
||||
//begin of image, merge two parts of imageList
|
||||
PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1));
|
||||
// image merge successful
|
||||
if (mergedImage != null) {
|
||||
mergedList.add(mergedImage);
|
||||
countElementsInList++;
|
||||
}
|
||||
} else {
|
||||
//middle of an image, merge current piece auf mergedList with image of imageList
|
||||
PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1));
|
||||
// image merge successful
|
||||
if (mergedImage != null) {
|
||||
mergedList.set(countElementsInList - 1, mergedImage);
|
||||
}
|
||||
}
|
||||
beginImage = false;
|
||||
} else {
|
||||
// if the last candidate is false, then both images i and i+1 must be added
|
||||
if (i == candidatesList.size() - 1) {
|
||||
if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) {
|
||||
mergedList.add(imageList.get(i + 1));
|
||||
} else {
|
||||
mergedList.add(imageList.get(i));
|
||||
mergedList.add(imageList.get(i + 1));
|
||||
}
|
||||
} else {
|
||||
//first image is not splitted, add i to resultlist
|
||||
if (beginImage) {
|
||||
mergedList.add(imageList.get(i));
|
||||
countElementsInList++;
|
||||
} else {
|
||||
// i is the end of an image, add begin of new image
|
||||
mergedList.add(imageList.get(i + 1));
|
||||
countElementsInList++;
|
||||
beginImage = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return mergedList;
|
||||
} else {
|
||||
return imageList;
|
||||
}
|
||||
}
|
||||
|
||||
private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2) {
|
||||
|
||||
// diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten
|
||||
double width = image1.getPosition().getWidth();
|
||||
double height1 = image1.getPosition().getHeight();
|
||||
double height2 = image2.getPosition().getHeight();
|
||||
// mit den Werten, die unter Image gespeichert sind, funktioniert es
|
||||
double img1height = image1.getImage().getHeight();
|
||||
double img1width = image1.getImage().getWidth();
|
||||
double img2height = image2.getImage().getHeight();
|
||||
|
||||
BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB);
|
||||
Graphics mergedImageGraphics = mergedImage.getGraphics();
|
||||
try {
|
||||
mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null);
|
||||
mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null);
|
||||
|
||||
// set Image, Position and type for merged Image
|
||||
//set position for merged image with values of image1 and the height of both
|
||||
Rectangle2D pos = new Rectangle2D.Float();
|
||||
pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), width, height1 + height2);
|
||||
PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage());
|
||||
// Graphics need to be disposed
|
||||
|
||||
image1.getImage().flush();
|
||||
image2.getImage().flush();
|
||||
|
||||
mergedImage.flush();
|
||||
mergedImageGraphics.dispose();
|
||||
|
||||
return newPdfImage;
|
||||
} catch (Exception e) {
|
||||
// failed to merge image
|
||||
log.error("Failed to merge image", e);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
//make a list of true and false, if the image is a candidate for merging
|
||||
private List<Boolean> getCandidatesList(List<PdfImage> imageList) {
|
||||
List<Boolean> candidatesList = new ArrayList<>();
|
||||
for (int i = 0; i < imageList.size(); i++) {
|
||||
if (i >= 1) {
|
||||
candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i)));
|
||||
}
|
||||
}
|
||||
return candidatesList;
|
||||
}
|
||||
|
||||
// evaluate if two images are candidates for merging, depending on their coordinates, width and height
|
||||
private boolean isCandidateForMerging(PdfImage image1, PdfImage image2) {
|
||||
double x1 = image1.getPosition().getX();
|
||||
double y1 = image1.getPosition().getY();
|
||||
double width1 = image1.getPosition().getWidth();
|
||||
double x2 = image2.getPosition().getX();
|
||||
double y2 = image2.getPosition().getY();
|
||||
double width2 = image2.getPosition().getWidth();
|
||||
double height2 = image2.getPosition().getHeight();
|
||||
//if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates,
|
||||
// then it is the same picture and has to be merged -> return true
|
||||
return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(y1 - y2) && width2 > (height2 / 6);
|
||||
}
|
||||
|
||||
|
||||
private void increaseDocumentStatistics(Page page, Document document) {
|
||||
|
||||
@ -319,5 +185,4 @@ public class PdfSegmentationService {
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user