Merge branch 'RED-3813' into 'main'
RED-3813: Recategorize same image as experimental feature See merge request fforesight/layout-parser!155
This commit is contained in:
commit
a2f559af51
@ -43,6 +43,8 @@ public class DocumentStructure implements Serializable {
|
||||
public static final String POSITION = "position";
|
||||
public static final String ID = "id";
|
||||
|
||||
public static final String REPRESENTATION_HASH = "representationHash";
|
||||
|
||||
}
|
||||
|
||||
@Schema(description = "Object containing the extra field names, a table cell has in its properties field.")
|
||||
@ -67,11 +69,25 @@ public class DocumentStructure implements Serializable {
|
||||
|
||||
public static Rectangle2D parseRectangle2D(String bBox) {
|
||||
|
||||
List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER)).map(Float::parseFloat).toList();
|
||||
List<Float> floats = Arrays.stream(bBox.split(RECTANGLE_DELIMITER))
|
||||
.map(Float::parseFloat)
|
||||
.toList();
|
||||
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
|
||||
}
|
||||
|
||||
|
||||
public static double[] parseRepresentationVector(String representationHash) {
|
||||
|
||||
String[] stringArray = representationHash.split("[,\\s]+");
|
||||
double[] doubleArray = new double[stringArray.length];
|
||||
for (int i = 0; i < stringArray.length; i++) {
|
||||
doubleArray[i] = Double.parseDouble(stringArray[i]);
|
||||
}
|
||||
|
||||
return doubleArray;
|
||||
}
|
||||
|
||||
|
||||
public EntryData get(List<Integer> tocId) {
|
||||
|
||||
if (tocId.isEmpty()) {
|
||||
@ -87,19 +103,24 @@ public class DocumentStructure implements Serializable {
|
||||
|
||||
public Stream<EntryData> streamAllEntries() {
|
||||
|
||||
return Stream.concat(Stream.of(root), root.children.stream()).flatMap(DocumentStructure::flatten);
|
||||
return Stream.concat(Stream.of(root), root.children.stream())
|
||||
.flatMap(DocumentStructure::flatten);
|
||||
}
|
||||
|
||||
|
||||
public String toString() {
|
||||
|
||||
return String.join("\n", streamAllEntries().map(EntryData::toString).toList());
|
||||
return String.join("\n",
|
||||
streamAllEntries().map(EntryData::toString)
|
||||
.toList());
|
||||
}
|
||||
|
||||
|
||||
private static Stream<EntryData> flatten(EntryData entry) {
|
||||
|
||||
return Stream.concat(Stream.of(entry), entry.children.stream().flatMap(DocumentStructure::flatten));
|
||||
return Stream.concat(Stream.of(entry),
|
||||
entry.children.stream()
|
||||
.flatMap(DocumentStructure::flatten));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -148,14 +148,17 @@ public class LayoutParsingPipeline {
|
||||
visualLayoutParsingResponse,
|
||||
layoutParsingRequest.identifier());
|
||||
|
||||
log.info("Building document graph for {}", layoutParsingRequest.identifier());
|
||||
|
||||
Document documentGraph = observeBuildDocumentGraph(settings.getLayoutParsingTypeOverride() == null //
|
||||
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(), classificationDocument);
|
||||
|
||||
log.info("Creating viewer document for {}", layoutParsingRequest.identifier());
|
||||
|
||||
layoutGridService.addLayoutGrid(viewerDocumentFile, documentGraph, viewerDocumentFile, false, layoutParsingRequest.visualLayoutParsingFileId().isPresent());
|
||||
layoutGridService.addLayoutGrid(viewerDocumentFile,
|
||||
documentGraph,
|
||||
viewerDocumentFile,
|
||||
false,
|
||||
layoutParsingRequest.visualLayoutParsingFileId()
|
||||
.isPresent());
|
||||
|
||||
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
|
||||
|
||||
@ -312,7 +315,11 @@ public class LayoutParsingPipeline {
|
||||
|
||||
pdfImages.computeIfAbsent(pageNumber, x -> new ArrayList<>())
|
||||
.addAll(graphics.stream()
|
||||
.map(g -> new ClassifiedImage(new Rectangle2D.Double(g.x1, g.y1, g.width(), g.height()), ImageType.GRAPHIC, false, stripper.getPageNumber()))
|
||||
.map(g -> new ClassifiedImage(new Rectangle2D.Double(g.x1, g.y1, g.width(), g.height()),
|
||||
ImageType.GRAPHIC,
|
||||
false,
|
||||
stripper.getPageNumber(),
|
||||
""))
|
||||
.toList());
|
||||
|
||||
ClassificationPage classificationPage = switch (layoutParsingType) {
|
||||
|
||||
@ -28,6 +28,8 @@ public class Image extends AbstractSemanticNode {
|
||||
|
||||
String id;
|
||||
|
||||
String representationHash;
|
||||
|
||||
ImageType imageType;
|
||||
boolean transparent;
|
||||
Rectangle2D position;
|
||||
|
||||
@ -24,5 +24,7 @@ public class ClassifiedImage {
|
||||
private boolean hasTransparency;
|
||||
@NonNull
|
||||
private int page;
|
||||
@NonNull
|
||||
private String representation;
|
||||
|
||||
}
|
||||
|
||||
@ -32,7 +32,7 @@ public class ImageServiceResponseAdapter {
|
||||
.add(new ClassifiedImage(new Rectangle2D.Double(imageMetadata.getPosition().getX1(),
|
||||
imageMetadata.getPosition().getY1(),
|
||||
imageMetadata.getGeometry().getWidth(),
|
||||
imageMetadata.getGeometry().getHeight()), classification, imageMetadata.isAlpha(), imageMetadata.getPosition().getPageNumber()));
|
||||
imageMetadata.getGeometry().getHeight()), classification, imageMetadata.isAlpha(), imageMetadata.getPosition().getPageNumber(),imageMetadata.getRepresentation()));
|
||||
});
|
||||
|
||||
// Currently This is a copy but, it will be changed later because i don' t think that we should unclassified images.
|
||||
@ -44,7 +44,7 @@ public class ImageServiceResponseAdapter {
|
||||
.add(new ClassifiedImage(new Rectangle2D.Double(imageMetadata.getPosition().getX1(),
|
||||
imageMetadata.getPosition().getY1(),
|
||||
imageMetadata.getGeometry().getWidth(),
|
||||
imageMetadata.getGeometry().getHeight()), classification, imageMetadata.isAlpha(), imageMetadata.getPosition().getPageNumber()));
|
||||
imageMetadata.getGeometry().getHeight()), classification, imageMetadata.isAlpha(), imageMetadata.getPosition().getPageNumber(),imageMetadata.getRepresentation()));
|
||||
});
|
||||
|
||||
return images;
|
||||
|
||||
@ -79,7 +79,7 @@ public class VisualLayoutParsingAdapter {
|
||||
ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),
|
||||
t.getBox().getY1(),
|
||||
t.getBox().getX2() - t.getBox().getX1(),
|
||||
t.getBox().getY2() - t.getBox().getY1()), ImageType.SIGNATURE, true, false, false, pageNumber);
|
||||
t.getBox().getY2() - t.getBox().getY1()), ImageType.SIGNATURE, true, false, false, pageNumber,"");
|
||||
|
||||
signatures.add(signature);
|
||||
}
|
||||
|
||||
@ -12,6 +12,7 @@ import lombok.NoArgsConstructor;
|
||||
public class ImageMetadata {
|
||||
|
||||
private Classification classification;
|
||||
private String representation;
|
||||
private Position position;
|
||||
private Geometry geometry;
|
||||
private Filters filters;
|
||||
|
||||
@ -45,8 +45,10 @@ import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@UtilityClass
|
||||
@Slf4j
|
||||
public class DocumentGraphFactory {
|
||||
|
||||
public Document buildDocumentGraph(LayoutParsingType layoutParsingType, ClassificationDocument document) {
|
||||
@ -145,6 +147,7 @@ public class DocumentGraphFactory {
|
||||
.position(position)
|
||||
.transparent(image.isHasTransparency())
|
||||
.page(page)
|
||||
.representationHash(image.getRepresentation())
|
||||
.documentTree(context.getDocumentTree())
|
||||
.build();
|
||||
page.getMainBody().add(imageNode);
|
||||
|
||||
@ -24,6 +24,7 @@ public class PropertiesMapper {
|
||||
properties.put(DocumentStructure.ImageProperties.TRANSPARENT, String.valueOf(image.isTransparent()));
|
||||
properties.put(DocumentStructure.ImageProperties.POSITION, toString(image.getPosition()));
|
||||
properties.put(DocumentStructure.ImageProperties.ID, image.getId());
|
||||
properties.put(DocumentStructure.ImageProperties.REPRESENTATION_HASH, image.getRepresentationHash());
|
||||
return properties;
|
||||
}
|
||||
|
||||
|
||||
@ -190,7 +190,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
imageMetadata.getGeometry().getHeight()),
|
||||
ImageType.valueOf(imageMetadata.getClassification().getLabel().toUpperCase(Locale.ROOT)),
|
||||
imageMetadata.isAlpha(),
|
||||
imageMetadata.getPosition().getPageNumber())));
|
||||
imageMetadata.getPosition().getPageNumber(), "")));
|
||||
|
||||
System.out.println("object");
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user