RED-3813: image recategorization

added first simple clustering service
This commit is contained in:
yhampe 2024-05-13 15:06:25 +02:00
parent 63dbc073c7
commit 4f1c926a17
15 changed files with 300 additions and 1 deletions

View File

@ -61,6 +61,7 @@ dependencies {
api("commons-validator:commons-validator:1.7")
implementation("org.mapstruct:mapstruct:1.5.5.Final")
implementation("org.apache.commons:commons-math3:3.6.1")
annotationProcessor("org.mapstruct:mapstruct-processor:1.5.5.Final")
testImplementation("org.springframework.amqp:spring-rabbit-test:3.0.2")

View File

@ -0,0 +1,20 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import java.util.HashMap;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Classification {
private Map<String, Float> probabilities = new HashMap<>();
private String label;
}

View File

@ -0,0 +1,17 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class FilterGeometry {
private ImageSize imageSize;
private ImageFormat imageFormat;
}

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Filters {
private FilterGeometry geometry;
private Probability probability;
private boolean allPassed;
}

View File

@ -0,0 +1,17 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Geometry {
private float width;
private float height;
}

View File

@ -0,0 +1,25 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import org.apache.commons.math3.ml.clustering.Clusterable;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
public class Image implements Clusterable {
private String hash;
private int fixedLength;
@Override
public double[] getPoint() {
double[] featureVector = new double[fixedLength];
for (int i = 0; i < hash.length(); i++) {
featureVector[i] = (double) hash.charAt(i);
}
return featureVector;
}
}

View File

@ -0,0 +1,61 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.math3.ml.clustering.Cluster;
import org.apache.commons.math3.ml.clustering.DBSCANClusterer;
import org.apache.commons.math3.ml.distance.DistanceMeasure;
import org.apache.commons.math3.ml.distance.ManhattanDistance;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.service.FileManagementStorageService;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class ImageClusteringService {
private final ObjectMapper objectMapper;
private final double eps = 26;
private final int minPts = 3;
private final int fixedLength = 25;
private final DistanceMeasure distanceMeasure = new ManhattanDistance();
@Autowired
FileManagementStorageService fileManagementStorageService;
public void clusterImages(String storageId) throws Exception {
DBSCANClusterer<Image> dbscanClusterer = new DBSCANClusterer<>(eps, minPts, distanceMeasure);
try (InputStream inputStream = fileManagementStorageService.getObject(TenantContext.getTenantId(), storageId)) {
ImageServiceResponse imageServiceResponse = objectMapper.readValue(inputStream, ImageServiceResponse.class);
List<ImageMetadata> imageMetadataList = imageServiceResponse.getData();
List<Image> imageList = new ArrayList<>();
for (ImageMetadata metadata : imageMetadataList) {
Image image = new Image(metadata.getRepresentation(), fixedLength);
imageList.add(image);
}
if (imageList.isEmpty()) {
throw new IllegalArgumentException("The image list is empty. Unable to perform clustering.");
}
List<Cluster<Image>> clusters = dbscanClusterer.cluster(imageList);
for (Cluster<Image> cluster : clusters) {
List<Image> clusterImages = cluster.getPoints();
log.info("cluster: {}", clusterImages.get(0));
}
}
}
}

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ImageFormat {
private float quotient;
private boolean tooTall;
private boolean tooWide;
}

View File

@ -0,0 +1,21 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ImageMetadata {
private Classification classification;
private String representation;
private Position position;
private Geometry geometry;
private Filters filters;
private boolean alpha;
}

View File

@ -0,0 +1,34 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import java.util.ArrayList;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonAlias;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ImageServiceResponse {
private String dossierId;
private String fileId;
@JsonProperty(value = "imageMetadata")
@JsonAlias("data")
private List<ImageMetadata> data = new ArrayList<>();
private List<ImageMetadata> dataCV = new ArrayList<>();
@JsonProperty(value = "imageMetadata")
@JsonAlias("data")
public void setData(List<ImageMetadata> data) {this.data = data;}
}

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ImageSize {
private float quotient;
private boolean tooLarge;
private boolean tooSmall;
}

View File

@ -0,0 +1,20 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Position {
private float x1;
private float x2;
private float y1;
private float y2;
private int pageNumber;
}

View File

@ -0,0 +1,16 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Probability {
private boolean unconfident;
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
import java.io.File;
import java.io.IOException;
import java.time.OffsetDateTime;
import java.time.temporal.ChronoUnit;
@ -15,7 +16,9 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.management.v1.processor.service.image.ImageClusteringService;
import com.iqser.red.service.persistence.management.v1.processor.settings.FileManagementServiceSettings;
import com.iqser.red.service.persistence.management.v1.processor.utils.StorageIdUtils;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
@ -33,6 +36,7 @@ public class ImageMessageReceiver {
private final ObjectMapper objectMapper;
private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService;
private final ObservationRegistry observationRegistry;
private final ImageClusteringService imageClusteringService;
@SneakyThrows
@ -41,8 +45,13 @@ public class ImageMessageReceiver {
JsonNode imageResponse = objectMapper.readTree(message.getBody());
log.info("image response"+imageResponse);
String dossierId = imageResponse.path("dossierId").asText();
String fileId = imageResponse.path("fileId").asText();
imageClusteringService.clusterImages(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO));
addFileIdToTrace(fileId);
fileStatusService.setStatusAnalyse(dossierId, fileId, false);
@ -59,7 +68,6 @@ public class ImageMessageReceiver {
String dossierId = (String) imageResponse.get("dossierId");
String fileId = (String) imageResponse.get("fileId");
addFileIdToTrace(fileId);
log.warn("Received message from {} for dossierId {} and fileId {}", MessagingConfiguration.IMAGE_SERVICE_DLQ, dossierId, fileId);
fileStatusProcessingUpdateService.analysisFailed(dossierId,
fileId,

View File

@ -0,0 +1,5 @@
package com.iqser.red.service.persistence.management.v1.processor.service;
public class ImageClusteringTest {
}