DM-307: Added BodyTextFrameService logic for SCM prototype to fix some missing...

This commit is contained in:
Dominique Eifländer 2023-07-06 09:53:06 +02:00
parent 390bb7d381
commit edc5833bce
3 changed files with 34 additions and 1 deletions

View File

@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.s
import java.util.List;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
@ -17,6 +18,9 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.ut
@Service
public class BodyTextFrameService {
@Value("${application.type:RedactManager}")
private String applicationType;
private static final float APPROXIMATE_HEADER_LINE_COUNT = 2.9f;
@ -87,7 +91,8 @@ public class BodyTextFrameService {
}
float approxLineCount = PositionUtils.getApproxLineCount(textBlock);
if (approxLineCount < APPROXIMATE_HEADER_LINE_COUNT) {
if ((applicationType.equals("DocuMine") && approxLineCount < APPROXIMATE_HEADER_LINE_COUNT && textBlock.getMaxY() >= page.getPageHeight() - (page.getPageHeight() / 10))
|| (applicationType.equals("RedactManager") && approxLineCount < APPROXIMATE_HEADER_LINE_COUNT)){
continue;
}

View File

@ -89,6 +89,34 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
}
@Test
public void testTopOfPage13InNotHeader() throws IOException {
// Fix In BodyTextFrameService destroys header detection in files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf
// TODO unify logic
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 15_Toxicidade Oral Aguda.pdf");
System.out.println("Start Full integration test");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
System.out.println("Finished structure analysis");
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("Finished analysis");
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf";
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
fileOutputStream.write(annotateResponse.getDocument());
}
}
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})