RED-3813: image recategorization

added saving images after layout parsing finishes

added endpoint for querying similiar images
This commit is contained in:
yhampe 2024-06-26 15:03:13 +02:00
parent 9c48547c36
commit 23b437e60a
14 changed files with 416 additions and 74 deletions

View File

@ -0,0 +1,51 @@
package com.iqser.red.persistence.service.v1.external.api.impl.controller;
import static com.iqser.red.service.persistence.management.v1.processor.roles.ActionRoles.GET_SIMILIAR_IMAGES;
import java.util.ArrayList;
import java.util.List;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RestController;
import com.iqser.red.service.persistence.management.v1.processor.service.image.ImageSimilarityService;
import com.iqser.red.service.persistence.service.v1.api.external.resource.ImageSimilaritySearchResource;
import com.iqser.red.service.persistence.service.v1.api.shared.model.image.ImageSimilaritySearchResponse;
import com.iqser.red.service.persistence.service.v1.api.shared.model.image.ImageSimilaritySearchRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.document.ImageDocument;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@RestController
@RequiredArgsConstructor
@Slf4j
public class ImageSimilaritySearchController implements ImageSimilaritySearchResource {
private final ImageSimilarityService imageSimiliarityService;
@SneakyThrows
public ResponseEntity<ImageSimilaritySearchResponse> getSimilarImages(@RequestBody ImageSimilaritySearchRequest imageSimilaritySearchRequest) {
log.info("received similiar image search request {}",imageSimilaritySearchRequest);
List<ImageDocument> similarImages = this.imageSimiliarityService.findSimilarImages(imageSimilaritySearchRequest.getCentroId(),
imageSimilaritySearchRequest.getDistance(),
imageSimilaritySearchRequest.getScope());
List<String> similarImagesIds = new ArrayList<>();
List<Byte> similarImagesThumbnails = new ArrayList<>();
similarImages.stream()
.forEach(doc -> {
similarImagesIds.add(doc.getImageId());
similarImagesThumbnails.add(doc.getThumbnail());
});
ImageSimilaritySearchResponse imageSimilaritySearchResponse = new ImageSimilaritySearchResponse(similarImagesIds, similarImagesThumbnails);
return new ResponseEntity<>(imageSimilaritySearchResponse, HttpStatus.OK);
}
}

View File

@ -0,0 +1,28 @@
package com.iqser.red.service.persistence.service.v1.api.external.resource;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.ResponseStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.image.ImageSimilaritySearchRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.image.ImageSimilaritySearchResponse;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import io.swagger.v3.oas.annotations.responses.ApiResponses;
public interface ImageSimilaritySearchResource {
String IMAGE_SIMILARITY_SEARCH_PATH = ExternalApi.BASE_PATH + "/imageSimilaritySearch";
@ResponseStatus(value = HttpStatus.OK)
@Operation(summary = "Gets similiar images to given image", description = "None")
@ApiResponses(value = {@ApiResponse(responseCode = "200", description = "OK"), @ApiResponse(responseCode = "404", description = "Not found")})
@PostMapping(value = IMAGE_SIMILARITY_SEARCH_PATH, produces = MediaType.APPLICATION_JSON_VALUE)
ResponseEntity<ImageSimilaritySearchResponse> getSimilarImages(@RequestBody ImageSimilaritySearchRequest imageSimilaritySearchRequest);
}

View File

@ -59,6 +59,10 @@ public final class ActionRoles {
public static final String READ_GENERAL_CONFIGURATION = "red-read-general-configuration";
public static final String WRITE_GENERAL_CONFIGURATION = "red-write-general-configuration";
// IMAGE SIMILIARITY SEARCH
public static final String GET_SIMILIAR_IMAGES ="red-get-similiar-images";
// Preferences
public static final String MANAGE_USER_PREFERENCES = "red-manage-user-preferences";

View File

@ -13,6 +13,7 @@ import lombok.NoArgsConstructor;
public class Image implements Clusterable {
private String hash;
private int fixedLength;
private String label;
@Override
public double[] getPoint() {

View File

@ -1,61 +0,0 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.math3.ml.clustering.Cluster;
import org.apache.commons.math3.ml.clustering.DBSCANClusterer;
import org.apache.commons.math3.ml.distance.DistanceMeasure;
import org.apache.commons.math3.ml.distance.ManhattanDistance;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.service.FileManagementStorageService;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class ImageClusteringService {
private final ObjectMapper objectMapper;
private final double eps = 26;
private final int minPts = 3;
private final int fixedLength = 25;
private final DistanceMeasure distanceMeasure = new ManhattanDistance();
@Autowired
FileManagementStorageService fileManagementStorageService;
public void clusterImages(String storageId) throws Exception {
DBSCANClusterer<Image> dbscanClusterer = new DBSCANClusterer<>(eps, minPts, distanceMeasure);
try (InputStream inputStream = fileManagementStorageService.getObject(TenantContext.getTenantId(), storageId)) {
ImageServiceResponse imageServiceResponse = objectMapper.readValue(inputStream, ImageServiceResponse.class);
List<ImageMetadata> imageMetadataList = imageServiceResponse.getData();
List<Image> imageList = new ArrayList<>();
for (ImageMetadata metadata : imageMetadataList) {
Image image = new Image(metadata.getRepresentation(), fixedLength);
imageList.add(image);
}
if (imageList.isEmpty()) {
throw new IllegalArgumentException("The image list is empty. Unable to perform clustering.");
}
List<Cluster<Image>> clusters = dbscanClusterer.cluster(imageList);
for (Cluster<Image> cluster : clusters) {
List<Image> clusterImages = cluster.getPoints();
log.info("cluster: {}", clusterImages.get(0));
}
}
}
}

View File

@ -0,0 +1,96 @@
package com.iqser.red.service.persistence.management.v1.processor.service.image;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.service.FileManagementStorageService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.utils.Scope;
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.document.ImageDocument;
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.service.ImageMongoService;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.extern.slf4j.Slf4j;
@Service
@Slf4j
public class ImageSimilarityService {
@Autowired
ImageMongoService imageMongoService;
@Autowired
FileManagementStorageService fileManagementStorageService;
@Autowired
ObjectMapper objectMapper;
//load all recat requests and find similiar images
public void saveImages(String templateId, String dossierId, String fileId, String storageId) throws IOException {
//load structure files of all files in template
List<ImageDocument> imageDocuments = new ArrayList<>();
try (InputStream inputStream = fileManagementStorageService.getObject(TenantContext.getTenantId(), storageId)) {
//load images from structures
DocumentStructure documentStructure = objectMapper.readValue(inputStream, DocumentStructure.class);
documentStructure.streamAllEntries()
.filter(entry -> entry.getType().equals(NodeType.IMAGE))
.forEach(i -> {
Map<String, String> properties = i.getProperties();
ImageDocument imageDocument = new ImageDocument();
imageDocument.setImageId(properties.get("id"));
imageDocument.setFeatureVector(parseRepresentationVector(properties.get("representationHash")));
imageDocument.setTemplateId(templateId);
imageDocument.setDossierId(dossierId);
imageDocument.setFileId(fileId);
imageDocuments.add(imageDocument);
});
}
if (imageDocuments.isEmpty()) {
return;
}
imageMongoService.saveImages(imageDocuments);
}
public List<ImageDocument> findSimilarImages(String centroId, double distance, Scope scope) throws Exception {
ImageDocument centroImage = this.imageMongoService.findById(centroId);
log.info("image received with id {}: {}",centroId, centroImage);
List<ImageDocument> similarImages = this.imageMongoService.findSimilarImages(centroImage, distance, scope);
log.info("received similar images: {}",similarImages);
return similarImages.stream()
.collect(Collectors.toList());
}
public static double[] parseRepresentationVector(String representationHash) {
double[] doubleArray = new double[representationHash.length()];
for (int i = 0; i < representationHash.length(); i++) {
char c = representationHash.charAt(i);
if (Character.isDigit(c)) {
// Convert numeric characters directly to their numeric values
doubleArray[i] = Character.getNumericValue(c);
} else if (Character.isLetter(c)) {
// Convert alphabetic characters to their position in the alphabet
// 'A' or 'a' -> 10, 'B' or 'b' -> 11, ..., 'F' or 'f' -> 15
doubleArray[i] = 10 + Character.toUpperCase(c) - 'A';
} else {
throw new IllegalArgumentException("Invalid character in input string: " + c);
}
}
return doubleArray;
}
}

View File

@ -1,6 +1,5 @@
package com.iqser.red.service.persistence.management.v1.processor.service.queue;
import java.io.File;
import java.io.IOException;
import java.time.OffsetDateTime;
import java.time.temporal.ChronoUnit;
@ -16,11 +15,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.management.v1.processor.service.image.ImageClusteringService;
import com.iqser.red.service.persistence.management.v1.processor.settings.FileManagementServiceSettings;
import com.iqser.red.service.persistence.management.v1.processor.utils.StorageIdUtils;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import io.micrometer.observation.ObservationRegistry;
import lombok.RequiredArgsConstructor;
@ -36,7 +31,6 @@ public class ImageMessageReceiver {
private final ObjectMapper objectMapper;
private final FileStatusProcessingUpdateService fileStatusProcessingUpdateService;
private final ObservationRegistry observationRegistry;
private final ImageClusteringService imageClusteringService;
@SneakyThrows
@ -45,18 +39,15 @@ public class ImageMessageReceiver {
JsonNode imageResponse = objectMapper.readTree(message.getBody());
log.info("image response"+imageResponse);
String dossierId = imageResponse.path("dossierId").asText();
String fileId = imageResponse.path("fileId").asText();
imageClusteringService.clusterImages(StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO));
addFileIdToTrace(fileId);
fileStatusService.setStatusAnalyse(dossierId, fileId, false);
log.info("Received message from {} for dossierId {} and fileId {}", MessagingConfiguration.IMAGE_SERVICE_RESPONSE_QUEUE, dossierId, fileId);
}

View File

@ -4,10 +4,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.exc.ValueInstantiationException;
import com.iqser.red.service.persistence.management.v1.processor.configuration.MessagingConfiguration;
import com.iqser.red.service.persistence.management.v1.processor.migration.SaasMigrationService;
import com.iqser.red.service.persistence.management.v1.processor.model.websocket.AnalyseStatus;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusProcessingUpdateService;
import com.iqser.red.service.persistence.management.v1.processor.service.FileStatusService;
import com.iqser.red.service.persistence.management.v1.processor.service.WebsocketService;
import com.iqser.red.service.persistence.management.v1.processor.service.layoutparsing.LayoutParsingRequestIdentifierService;
import com.iqser.red.service.persistence.management.v1.processor.service.persistence.SaasMigrationStatusPersistenceService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
@ -37,7 +35,7 @@ public class LayoutParsingFinishedMessageReceiver {
private final LayoutParsingRequestIdentifierService layoutParsingRequestIdentifierService;
private final SaasMigrationStatusPersistenceService saasMigrationStatusPersistenceService;
private final SaasMigrationService saasMigrationService;
private final WebsocketService websocketService;
private final ImageSimilarityService imageSimiliarityService;
@SneakyThrows
@ -50,6 +48,11 @@ public class LayoutParsingFinishedMessageReceiver {
return;
}
String templateId = "";
String storageId = StorageIdUtils.getStorageId(dossierId, fileId, FileType.DOCUMENT_STRUCTURE);
fileStatusService.setStatusAnalyse(dossierId, fileId, layoutParsingRequestIdentifierService.parsePriority(response.identifier()));
var dossierId = layoutParsingRequestIdentifierService.parseDossierId(response.identifier());
var fileId = layoutParsingRequestIdentifierService.parseFileId(response.identifier());
@ -58,6 +61,7 @@ public class LayoutParsingFinishedMessageReceiver {
layoutParsingRequestIdentifierService.parsePriority(response.identifier()));
fileStatusService.updateLayoutProcessedTime(layoutParsingRequestIdentifierService.parseFileId(response.identifier()));
imageSimiliarityService.saveImages(templateId, dossierId, fileId, storageId);
websocketService.sendAnalysisEvent(dossierId, fileId, AnalyseStatus.LAYOUT_UPDATE, fileStatusService.getStatus(fileId).getNumberOfAnalyses() + 1);
log.info("Received message {} in {}", response, LayoutParsingQueueNames.LAYOUT_PARSING_FINISHED_EVENT_QUEUE);

View File

@ -0,0 +1,31 @@
package com.iqser.red.service.persistence.service.v1.api.shared.model.image;
import com.iqser.red.service.persistence.service.v1.api.shared.model.utils.Scope;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.NonNull;
import lombok.ToString;
@Data
@ToString
@Builder
@AllArgsConstructor
@NoArgsConstructor
@Schema(description = "Image Similiarity Search request containing information about centroId, distance and scope")
public class ImageSimilaritySearchRequest {
@NonNull
@Schema(description = "id of the central image")
private String centroId;
@NonNull
@Schema(description = "the manhattan distance used in the similiarity search")
private double distance;
@NonNull
@Schema(description = "the scope for the similiarity search")
private Scope scope;
}

View File

@ -0,0 +1,28 @@
package com.iqser.red.service.persistence.service.v1.api.shared.model.image;
import java.util.ArrayList;
import java.util.List;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.experimental.FieldDefaults;
@Data
@Builder
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@Schema(description = "Object containing the result of the image similiarity search")
public class ImageSimilaritySearchResponse {
@Builder.Default
@Schema(description = "list of ids of similiar images")
List<String> similarImagesIds = new ArrayList<>();
@Builder.Default
@Schema(description = "list of ids of similiar images")
List<Byte> similarImagesThumbnails = new ArrayList<>();
}

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.persistence.service.v1.api.shared.model.utils;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Scope {
private String templateId;
private String dossierId;
private String fileId;
}

View File

@ -0,0 +1,40 @@
package com.iqser.red.service.persistence.service.v1.api.shared.mongo.document;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.index.Indexed;
import org.springframework.data.mongodb.core.mapping.Document;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.ToString;
@Getter
@Setter
@AllArgsConstructor
@NoArgsConstructor
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
@ToString
@Document(collection = "images")
public class ImageDocument {
@Indexed
private double[] featureVector;
private String label;
@Id
@EqualsAndHashCode.Include
private String imageId;
private String templateId;
private String dossierId;
private String fileId;
private Byte thumbnail;
}

View File

@ -0,0 +1,12 @@
package com.iqser.red.service.persistence.service.v1.api.shared.mongo.repository;
import org.springframework.data.mongodb.repository.MongoRepository;
import org.springframework.stereotype.Repository;
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.document.ImageDocument;
@Repository
public interface ImageDocumentRepository extends MongoRepository<ImageDocument, String> {
// Additional query methods can be added here
}

View File

@ -0,0 +1,103 @@
package com.iqser.red.service.persistence.service.v1.api.shared.mongo.service;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.springframework.data.mongodb.core.MongoOperations;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.utils.Scope;
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.document.ImageDocument;
import com.iqser.red.service.persistence.service.v1.api.shared.mongo.repository.ImageDocumentRepository;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
@Service
@Slf4j
public class ImageMongoService {
private MongoOperations mongoOperations;
private final ImageDocumentRepository imageDocumentRepository;
public ImageMongoService(MongoOperations mongoOperations, ImageDocumentRepository imageDocumentRepository) {
this.mongoOperations = mongoOperations;
this.imageDocumentRepository = imageDocumentRepository;
}
public ImageDocument findById(String imageId) {
Optional<ImageDocument> image = this.imageDocumentRepository.findById(imageId);
if (image.isPresent()) {
return image.get();
}
return null;
}
public List<ImageDocument> findSimilarImages(ImageDocument centralImage, double maxDistance, Scope scope) {
double[] centralFeatureVector = centralImage.getFeatureVector();
List<ImageDocument> allImages = this.imageDocumentRepository.findAll();
return allImages.stream()
.filter(image -> !image.getImageId().equals(centralImage.getImageId()))
.filter(image -> filterForScope(image, scope))
.map(image -> new ImageDistancePair(image, calculateManhattanDistance(image.getFeatureVector(), centralFeatureVector)))
.filter(image -> image.getDistance() <= maxDistance)
.map(ImageDistancePair::getImageDocument)
.collect(Collectors.toList());
}
private boolean filterForScope(ImageDocument image, Scope scope) {
if (scope.getTemplateId() != null && scope.getTemplateId().length() > 0) {
return image.getTemplateId() != null && image.getTemplateId().equals(scope.getTemplateId());
} else if (scope.getDossierId() != null && scope.getDossierId().length() > 0) {
return image.getDossierId() != null && image.getDossierId().equals(scope.getDossierId());
} else if (scope.getFileId() != null && scope.getFileId().length() > 0) {
return image.getFileId() != null && image.getFileId().equals(scope.getFileId());
}
return true;
}
private double calculateManhattanDistance(double[] vector1, double[] vector2) {
double distance = 0.0;
for (int i = 0; i < vector1.length; i++) {
distance += Math.abs(vector1[i] - vector2[i]);
}
return distance;
}
public void saveImages(List<ImageDocument> imageDocuments) {
imageDocumentRepository.saveAll(imageDocuments);
}
@Getter
private static class ImageDistancePair {
private final ImageDocument imageDocument;
private final double distance;
public ImageDistancePair(ImageDocument imageDocument, double distance) {
this.imageDocument = imageDocument;
this.distance = distance;
}
}
}