From 4f1c926a172cac410e86da5f8a13683b671a3b04 Mon Sep 17 00:00:00 2001 From: yhampe Date: Mon, 13 May 2024 15:06:25 +0200 Subject: [PATCH] RED-3813: image recategorization added first simple clustering service --- .../build.gradle.kts | 1 + .../service/image/Classification.java | 20 ++++++ .../service/image/FilterGeometry.java | 17 ++++++ .../v1/processor/service/image/Filters.java | 18 ++++++ .../v1/processor/service/image/Geometry.java | 17 ++++++ .../v1/processor/service/image/Image.java | 25 ++++++++ .../service/image/ImageClusteringService.java | 61 +++++++++++++++++++ .../processor/service/image/ImageFormat.java | 18 ++++++ .../service/image/ImageMetadata.java | 21 +++++++ .../service/image/ImageServiceResponse.java | 34 +++++++++++ .../v1/processor/service/image/ImageSize.java | 18 ++++++ .../v1/processor/service/image/Position.java | 20 ++++++ .../processor/service/image/Probability.java | 16 +++++ .../service/queue/ImageMessageReceiver.java | 10 ++- .../service/ImageClusteringTest.java | 5 ++ 15 files changed, 300 insertions(+), 1 deletion(-) create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Classification.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/FilterGeometry.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Filters.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Geometry.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Image.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageClusteringService.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageFormat.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageMetadata.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageServiceResponse.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageSize.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Position.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Probability.java create mode 100644 persistence-service-v1/persistence-service-processor-v1/src/test/java/com/iqser/red/service/persistence/management/v1/processor/service/ImageClusteringTest.java diff --git a/persistence-service-v1/persistence-service-processor-v1/build.gradle.kts b/persistence-service-v1/persistence-service-processor-v1/build.gradle.kts index ced472f2b..b564912f0 100644 --- a/persistence-service-v1/persistence-service-processor-v1/build.gradle.kts +++ b/persistence-service-v1/persistence-service-processor-v1/build.gradle.kts @@ -61,6 +61,7 @@ dependencies { api("commons-validator:commons-validator:1.7") implementation("org.mapstruct:mapstruct:1.5.5.Final") + implementation("org.apache.commons:commons-math3:3.6.1") annotationProcessor("org.mapstruct:mapstruct-processor:1.5.5.Final") testImplementation("org.springframework.amqp:spring-rabbit-test:3.0.2") diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Classification.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Classification.java new file mode 100644 index 000000000..42ac1de40 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Classification.java @@ -0,0 +1,20 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import java.util.HashMap; +import java.util.Map; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Classification { + + private Map probabilities = new HashMap<>(); + private String label; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/FilterGeometry.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/FilterGeometry.java new file mode 100644 index 000000000..7a8f248c8 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/FilterGeometry.java @@ -0,0 +1,17 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class FilterGeometry { + + private ImageSize imageSize; + private ImageFormat imageFormat; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Filters.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Filters.java new file mode 100644 index 000000000..7c31d6b07 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Filters.java @@ -0,0 +1,18 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Filters { + + private FilterGeometry geometry; + private Probability probability; + private boolean allPassed; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Geometry.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Geometry.java new file mode 100644 index 000000000..78ac75668 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Geometry.java @@ -0,0 +1,17 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Geometry { + + private float width; + private float height; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Image.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Image.java new file mode 100644 index 000000000..c1a8ab78e --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Image.java @@ -0,0 +1,25 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import org.apache.commons.math3.ml.clustering.Clusterable; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +public class Image implements Clusterable { + private String hash; + private int fixedLength; + + @Override + public double[] getPoint() { + double[] featureVector = new double[fixedLength]; + for (int i = 0; i < hash.length(); i++) { + featureVector[i] = (double) hash.charAt(i); + } + return featureVector; + } +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageClusteringService.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageClusteringService.java new file mode 100644 index 000000000..62bd9ed33 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageClusteringService.java @@ -0,0 +1,61 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.math3.ml.clustering.Cluster; +import org.apache.commons.math3.ml.clustering.DBSCANClusterer; +import org.apache.commons.math3.ml.distance.DistanceMeasure; +import org.apache.commons.math3.ml.distance.ManhattanDistance; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.persistence.management.v1.processor.service.FileManagementStorageService; +import com.knecon.fforesight.tenantcommons.TenantContext; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class ImageClusteringService { + + private final ObjectMapper objectMapper; + private final double eps = 26; + private final int minPts = 3; + + private final int fixedLength = 25; + private final DistanceMeasure distanceMeasure = new ManhattanDistance(); + + @Autowired + FileManagementStorageService fileManagementStorageService; + + + public void clusterImages(String storageId) throws Exception { + + DBSCANClusterer dbscanClusterer = new DBSCANClusterer<>(eps, minPts, distanceMeasure); + + try (InputStream inputStream = fileManagementStorageService.getObject(TenantContext.getTenantId(), storageId)) { + ImageServiceResponse imageServiceResponse = objectMapper.readValue(inputStream, ImageServiceResponse.class); + List imageMetadataList = imageServiceResponse.getData(); + List imageList = new ArrayList<>(); + for (ImageMetadata metadata : imageMetadataList) { + Image image = new Image(metadata.getRepresentation(), fixedLength); + imageList.add(image); + } + if (imageList.isEmpty()) { + throw new IllegalArgumentException("The image list is empty. Unable to perform clustering."); + } + List> clusters = dbscanClusterer.cluster(imageList); + for (Cluster cluster : clusters) { + List clusterImages = cluster.getPoints(); + log.info("cluster: {}", clusterImages.get(0)); + } + } + + } + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageFormat.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageFormat.java new file mode 100644 index 000000000..89bd37ad2 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageFormat.java @@ -0,0 +1,18 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class ImageFormat { + + private float quotient; + private boolean tooTall; + private boolean tooWide; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageMetadata.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageMetadata.java new file mode 100644 index 000000000..5e7c59d50 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageMetadata.java @@ -0,0 +1,21 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class ImageMetadata { + + private Classification classification; + private String representation; + private Position position; + private Geometry geometry; + private Filters filters; + private boolean alpha; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageServiceResponse.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageServiceResponse.java new file mode 100644 index 000000000..e2e548002 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageServiceResponse.java @@ -0,0 +1,34 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import java.util.ArrayList; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonAlias; +import com.fasterxml.jackson.annotation.JsonProperty; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class ImageServiceResponse { + + private String dossierId; + private String fileId; + + @JsonProperty(value = "imageMetadata") + @JsonAlias("data") + private List data = new ArrayList<>(); + + private List dataCV = new ArrayList<>(); + + + @JsonProperty(value = "imageMetadata") + @JsonAlias("data") + public void setData(List data) {this.data = data;} + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageSize.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageSize.java new file mode 100644 index 000000000..40343e0e3 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/ImageSize.java @@ -0,0 +1,18 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class ImageSize { + + private float quotient; + private boolean tooLarge; + private boolean tooSmall; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Position.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Position.java new file mode 100644 index 000000000..31b347f0e --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Position.java @@ -0,0 +1,20 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Position { + + private float x1; + private float x2; + private float y1; + private float y2; + private int pageNumber; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Probability.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Probability.java new file mode 100644 index 000000000..daec73d1d --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/image/Probability.java @@ -0,0 +1,16 @@ +package com.iqser.red.service.persistence.management.v1.processor.service.image; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Probability { + + private boolean unconfident; + +} diff --git a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/ImageMessageReceiver.java b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/ImageMessageReceiver.java index 00597e9b2..8c3c31728 100644 --- a/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/ImageMessageReceiver.java +++ b/persistence-service-v1/persistence-service-processor-v1/src/main/java/com/iqser/red/service/persistence/management/v1/processor/service/queue/ImageMessageReceiver.java @@ -1,5 +1,6 @@ package com.iqser.red.service.persistence.management.v1.processor.service.queue; +import java.io.File; import java.io.IOException; import java.time.OffsetDateTime; import java.time.temporal.ChronoUnit; @@ -15,7 +16,9 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration; import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService; import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService; +import com.iqser.red.service.persistence.management.v1.processor.service.image.ImageClusteringService; import com.iqser.red.service.persistence.management.v1.processor.settings.FileManagementServiceSettings; +import com.iqser.red.service.persistence.management.v1.processor.utils.StorageIdUtils; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; @@ -33,6 +36,7 @@ public class ImageMessageReceiver { private final ObjectMapper objectMapper; private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService; private final ObservationRegistry observationRegistry; + private final ImageClusteringService imageClusteringService; @SneakyThrows @@ -41,8 +45,13 @@ public class ImageMessageReceiver { JsonNode imageResponse = objectMapper.readTree(message.getBody()); + log.info("image response"+imageResponse); + String dossierId = imageResponse.path("dossierId").asText(); String fileId = imageResponse.path("fileId").asText(); + imageClusteringService.clusterImages(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO)); + + addFileIdToTrace(fileId); fileStatusService.setStatusAnalyse(dossierId, fileId, false); @@ -59,7 +68,6 @@ public class ImageMessageReceiver { String dossierId = (String) imageResponse.get("dossierId"); String fileId = (String) imageResponse.get("fileId"); addFileIdToTrace(fileId); - log.warn("Received message from {} for dossierId {} and fileId {}", MessagingConfiguration.IMAGE_SERVICE_DLQ, dossierId, fileId); fileStatusProcessingUpdateService.analysisFailed(dossierId, fileId, diff --git a/persistence-service-v1/persistence-service-processor-v1/src/test/java/com/iqser/red/service/persistence/management/v1/processor/service/ImageClusteringTest.java b/persistence-service-v1/persistence-service-processor-v1/src/test/java/com/iqser/red/service/persistence/management/v1/processor/service/ImageClusteringTest.java new file mode 100644 index 000000000..dad72ea06 --- /dev/null +++ b/persistence-service-v1/persistence-service-processor-v1/src/test/java/com/iqser/red/service/persistence/management/v1/processor/service/ImageClusteringTest.java @@ -0,0 +1,5 @@ +package com.iqser.red.service.persistence.management.v1.processor.service; + +public class ImageClusteringTest { + +}