diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageMergeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageMergeService.java new file mode 100644 index 00000000..73a94909 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/ImageMergeService.java @@ -0,0 +1,165 @@ +package com.iqser.red.service.redaction.v1.server.segmentation; + +import java.awt.Graphics; +import java.awt.geom.Rectangle2D; +import java.awt.image.BufferedImage; +import java.util.ArrayList; +import java.util.List; + +import org.springframework.stereotype.Service; + +import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class ImageMergeService { + + + public List mergeImages(List images, int rotation){ + + List mergedList = processImages(images, rotation); + + List imagesInImage = new ArrayList<>(); + for(PdfImage image: mergedList){ + for (PdfImage inner: mergedList){ + if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){ + imagesInImage.add(inner); + } + } + } + mergedList.removeAll(imagesInImage); + + return mergedList; + } + + + //merge images, if they are separated during pdf import, return new list of Pdfimages + private List processImages(List imageList, int rotation) { + if (imageList.size() > 1) { + List mergedList = new ArrayList<>(); + int countElementsInList = 0; + boolean beginImage = true; + + // a List of Boolean, true = candidate for merging, false = no merging + List candidatesList = getCandidatesList(imageList, rotation); + + // loop through list, if there are candidates for merging (true), merge images and add it to mergedList + for (int i = 0; i < candidatesList.size(); i++) { + if (candidatesList.get(i)) { + if (beginImage) { + //begin of image, merge two parts of imageList + PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1), rotation); + // image merge successful + if (mergedImage != null) { + mergedList.add(mergedImage); + countElementsInList++; + } + } else { + //middle of an image, merge current piece auf mergedList with image of imageList + PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1), rotation); + // image merge successful + if (mergedImage != null) { + mergedList.set(countElementsInList - 1, mergedImage); + } + } + beginImage = false; + } else { + // if the last candidate is false, then both images i and i+1 must be added + if (i == candidatesList.size() - 1) { + if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) { + mergedList.add(imageList.get(i + 1)); + } else { + mergedList.add(imageList.get(i)); + mergedList.add(imageList.get(i + 1)); + } + } else { + //first image is not splitted, add i to resultlist + if (beginImage) { + mergedList.add(imageList.get(i)); + countElementsInList++; + } else { + // i is the end of an image, add begin of new image + mergedList.add(imageList.get(i + 1)); + countElementsInList++; + beginImage = false; + } + } + } + } + return mergedList; + } else { + return imageList; + } + } + + private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2, int rotation) { + + // diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten + double width = image1.getPosition().getWidth(); + double width2 = image2.getPosition().getWidth(); + double height1 = image1.getPosition().getHeight(); + double height2 = image2.getPosition().getHeight(); + // mit den Werten, die unter Image gespeichert sind, funktioniert es + double img1height = image1.getImage().getHeight(); + double img1width = image1.getImage().getWidth(); + double img2height = image2.getImage().getHeight(); + + BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB); + Graphics mergedImageGraphics = mergedImage.getGraphics(); + try { + mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null); + mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null); + + // set Image, Position and type for merged Image + //set position for merged image with values of image1 and the height of both + Rectangle2D pos = new Rectangle2D.Float(); + pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), rotation == 90 ? width + width2: width, rotation == 90 ? height1 : height1 + height2); + PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage()); + // Graphics need to be disposed + + image1.getImage().flush(); + image2.getImage().flush(); + + mergedImage.flush(); + mergedImageGraphics.dispose(); + + return newPdfImage; + } catch (Exception e) { + // failed to merge image + log.error("Failed to merge image", e); + return null; + } + + + } + + //make a list of true and false, if the image is a candidate for merging + private List getCandidatesList(List imageList, int rotation) { + List candidatesList = new ArrayList<>(); + for (int i = 0; i < imageList.size(); i++) { + if (i >= 1) { + candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i), rotation)); + } + } + return candidatesList; + } + + // evaluate if two images are candidates for merging, depending on their coordinates, width and height + private boolean isCandidateForMerging(PdfImage image1, PdfImage image2, int rotation) { + double x1 = rotation == 90 ? image1.getPosition().getY() : image1.getPosition().getX(); + double y1 = rotation == 90 ? image1.getPosition().getX() : image1.getPosition().getY(); + double width1 = rotation == 90 ? image1.getPosition().getHeight() : image1.getPosition().getWidth(); + double x2 = rotation == 90 ? image2.getPosition().getY() : image2.getPosition().getX(); + double y2 = rotation == 90 ? image2.getPosition().getX() : image2.getPosition().getY(); + double width2 = rotation == 90 ? image2.getPosition().getHeight() : image2.getPosition().getWidth(); + double height2 = rotation == 90 ? image2.getPosition().getWidth() : image2.getPosition().getHeight(); + //if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates, + // then it is the same picture and has to be merged -> return true + return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(rotation == 90 ? y2 - y1 : y1 - y2) && width2 > (height2 / 6); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java index a33a009d..22643b0b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationService.java @@ -1,6 +1,19 @@ package com.iqser.red.service.redaction.v1.server.segmentation; -import com.iqser.red.service.redaction.v1.model.Rectangle; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.io.IOUtils; +import org.apache.pdfbox.io.MemoryUsageSetting; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.springframework.stereotype.Service; + import com.iqser.red.service.redaction.v1.server.classification.model.Document; import com.iqser.red.service.redaction.v1.server.classification.model.Page; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; @@ -15,24 +28,9 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractT import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings; import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService; import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.io.IOUtils; -import org.apache.pdfbox.io.MemoryUsageSetting; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.common.PDRectangle; -import org.springframework.stereotype.Service; - -import java.awt.Graphics; -import java.awt.geom.Rectangle2D; -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; @Slf4j @Service @@ -47,13 +45,17 @@ public class PdfSegmentationService { private final ClassificationService classificationService; private final SectionsBuilderService sectionsBuilderService; private final ImageClassificationService imageClassificationService; + private final ImageMergeService imageMergeService; public Document parseDocument(InputStream documentInputStream) throws IOException { + return parseDocument(documentInputStream, false); } + public Document parseDocument(InputStream documentInputStream, boolean ignoreImages) throws IOException { + PDDocument pdDocument = null; try { //create tempFile @@ -64,7 +66,6 @@ public class PdfSegmentationService { Document document = new Document(); List pages = new ArrayList<>(); - pdDocument = reinitializePDDocument(tempFile, null); long pageCount = pdDocument.getNumberOfPages(); @@ -101,32 +102,19 @@ public class PdfSegmentationService { page.setRotation(rotation); page.setLandscape(isLandscape || isRotated); page.setPageNumber(pageNumber); - List mergedList = processImages(stripper.getImages()); - - List imagesInImage = new ArrayList<>(); - for(PdfImage image: mergedList){ - for (PdfImage inner: mergedList){ - if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){ - imagesInImage.add(inner); - } - } - } - mergedList.removeAll(imagesInImage); + List mergedList = imageMergeService.mergeImages(stripper.getImages(), rotation); page.setImages(mergedList); tableExtractionService.extractTables(cleanRulings, page); buildPageStatistics(page); increaseDocumentStatistics(page, document); - if (!ignoreImages) { imageClassificationService.classifyImages(page); } pages.add(page); - - } document.setPages(pages); @@ -149,7 +137,9 @@ public class PdfSegmentationService { } } + private PDDocument reinitializePDDocument(File tempFile, PDDocument pdDocument) throws IOException { + if (pdDocument != null) { pdDocument.close(); } @@ -164,130 +154,6 @@ public class PdfSegmentationService { return newPDDocument; } - //merge images, if they are separated during pdf import, return new list of Pdfimages - private List processImages(List imageList) { - if (imageList.size() > 1) { - List mergedList = new ArrayList<>(); - int countElementsInList = 0; - boolean beginImage = true; - - // a List of Boolean, true = candidate for merging, false = no merging - List candidatesList = getCandidatesList(imageList); - - // loop through list, if there are candidates for merging (true), merge images and add it to mergedList - for (int i = 0; i < candidatesList.size(); i++) { - if (candidatesList.get(i)) { - if (beginImage) { - //begin of image, merge two parts of imageList - PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1)); - // image merge successful - if (mergedImage != null) { - mergedList.add(mergedImage); - countElementsInList++; - } - } else { - //middle of an image, merge current piece auf mergedList with image of imageList - PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1)); - // image merge successful - if (mergedImage != null) { - mergedList.set(countElementsInList - 1, mergedImage); - } - } - beginImage = false; - } else { - // if the last candidate is false, then both images i and i+1 must be added - if (i == candidatesList.size() - 1) { - if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) { - mergedList.add(imageList.get(i + 1)); - } else { - mergedList.add(imageList.get(i)); - mergedList.add(imageList.get(i + 1)); - } - } else { - //first image is not splitted, add i to resultlist - if (beginImage) { - mergedList.add(imageList.get(i)); - countElementsInList++; - } else { - // i is the end of an image, add begin of new image - mergedList.add(imageList.get(i + 1)); - countElementsInList++; - beginImage = false; - } - } - } - } - return mergedList; - } else { - return imageList; - } - } - - private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2) { - - // diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten - double width = image1.getPosition().getWidth(); - double height1 = image1.getPosition().getHeight(); - double height2 = image2.getPosition().getHeight(); - // mit den Werten, die unter Image gespeichert sind, funktioniert es - double img1height = image1.getImage().getHeight(); - double img1width = image1.getImage().getWidth(); - double img2height = image2.getImage().getHeight(); - - BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB); - Graphics mergedImageGraphics = mergedImage.getGraphics(); - try { - mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null); - mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null); - - // set Image, Position and type for merged Image - //set position for merged image with values of image1 and the height of both - Rectangle2D pos = new Rectangle2D.Float(); - pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), width, height1 + height2); - PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage()); - // Graphics need to be disposed - - image1.getImage().flush(); - image2.getImage().flush(); - - mergedImage.flush(); - mergedImageGraphics.dispose(); - - return newPdfImage; - } catch (Exception e) { - // failed to merge image - log.error("Failed to merge image", e); - return null; - } - - - } - - //make a list of true and false, if the image is a candidate for merging - private List getCandidatesList(List imageList) { - List candidatesList = new ArrayList<>(); - for (int i = 0; i < imageList.size(); i++) { - if (i >= 1) { - candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i))); - } - } - return candidatesList; - } - - // evaluate if two images are candidates for merging, depending on their coordinates, width and height - private boolean isCandidateForMerging(PdfImage image1, PdfImage image2) { - double x1 = image1.getPosition().getX(); - double y1 = image1.getPosition().getY(); - double width1 = image1.getPosition().getWidth(); - double x2 = image2.getPosition().getX(); - double y2 = image2.getPosition().getY(); - double width2 = image2.getPosition().getWidth(); - double height2 = image2.getPosition().getHeight(); - //if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates, - // then it is the same picture and has to be merged -> return true - return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(y1 - y2) && width2 > (height2 / 6); - } - private void increaseDocumentStatistics(Page page, Document document) { @@ -319,5 +185,4 @@ public class PdfSegmentationService { } - }