From 35a4d004fda0404e9b19e7112d25627cbde2a932 Mon Sep 17 00:00:00 2001 From: Philipp Schramm Date: Tue, 30 Nov 2021 11:05:06 +0100 Subject: [PATCH] RED-2756 Bugfix with redactions are not continuous and added some entries to dictionary --- .../service/RedactionLogCreatorService.java | 32 +++++++++++++++---- .../resources/dictionaries/CBI_address.txt | 6 ++++ .../resources/dictionaries/CBI_author.txt | 2 ++ 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java index 3f0d9580..6075cf74 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/RedactionLogCreatorService.java @@ -22,8 +22,10 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities; import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; @Service +@Slf4j @RequiredArgsConstructor public class RedactionLogCreatorService { @@ -136,22 +138,27 @@ public class RedactionLogCreatorService { if (textPositions.size() == 1) { rectangles.add(TextPositionSequence.fromData(textPositions, page).getRectangle()); } else { + float x = textPositions.get(0).getXDirAdj(); float y = textPositions.get(0).getYDirAdj(); + float width = textPositions.get(0).getWidth(); float height = textPositions.get(0).getHeightDir(); int startIndex = 0; for (int i = 1; i < textPositions.size(); i++) { + float xDirAdj = textPositions.get(i).getXDirAdj(); float yDirAdj = textPositions.get(i).getYDirAdj(); + float widthDir = textPositions.get(i).getWidth(); float heightDir = textPositions.get(i).getHeightDir(); - if (!isCharInSameLine(y, yDirAdj, height, heightDir)) { - + if (!(isCharInSameLine(y, yDirAdj, height, heightDir) && isCharClose(x, xDirAdj, width))) { rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, i), page) .getRectangle()); y = yDirAdj; + width = widthDir; height = heightDir; startIndex = i; } + x = xDirAdj; } if (startIndex != textPositions.size()) { rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, textPositions.size()), page) @@ -163,19 +170,30 @@ public class RedactionLogCreatorService { } + private boolean isCharClose(float x, float xDirAdj, float width) { + + float max = x + (5 * width); + if (xDirAdj < max) { + return true; + } + return false; + } + + private boolean isCharInSameLine(float y, float yCompare, float height, float heightCompare) { - float offsetHeight = heightCompare / 5; - float minHeight = heightCompare - offsetHeight; - float maxHeight = heightCompare + offsetHeight; + float offsetHeight = height / 2; + float minHeight = height - offsetHeight; + float maxHeight = height + offsetHeight; - float offsetY = heightCompare / 22; + float offsetY = height / 10; float minY = y - offsetY; float maxY = y + offsetY; - if (yCompare > minY && yCompare < maxY && height > minHeight && height < maxHeight) { + if (yCompare > minY && yCompare < maxY && heightCompare > minHeight && heightCompare < maxHeight) { return true; } + return false; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_address.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_address.txt index b6ad4398..415b6f41 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_address.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_address.txt @@ -1,3 +1,8 @@ +A & L Laboratories, 3505 Conestoga Drive, Fort Wayne, IN 46806-4413, Indiana, USA +Jealotts Hill Research Station, Bracknell +Jealott s Hill Research Station, Bracknell +Western Research Center, 120 +Fort Wayne, IN 46806-4413, Indiana Abandoned vineyard, Northern Italy ABC Analytical Bio-Chemistry Lab. Inc. ABC Analytical Bio-Chemistry Lab. Inc., Columbia, USA @@ -1633,6 +1638,7 @@ XenoTech, LLC, 16825 West 116th Street, Lenexa, KS, USA Zeneca Ag Products Zeneca Ag Products, Richmond, USA Zeneca Agrochemicals +ZENECA Agrochemicals Zeneca Agrochemicals, Fernhurst, United Kingdom Zeneca Agrochemicals, Jealott’s Hill Research Station, Bracknell, Berkshire, UK Zeneca Agrochemical s, Jealott’s Hill, United Kingdom diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt index 05f5dc2c..e2854b4c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/CBI_author.txt @@ -1,3 +1,5 @@ +Johnson R | +Weissler M S and Butters C A AD Hurt N Pengelly HA J Napper