DM-307: Improved paragraph splitting #50
@ -49,7 +49,7 @@ public class DocuMineBlockificationService implements BlockificationService{
|
||||
Float splitX1 = null;
|
||||
for (TextPositionSequence word : textPositions) {
|
||||
|
||||
boolean lineSeparation = word.getMinYDirAdj() - maxY > word.getHeight() * 1.25;
|
||||
boolean lineSeparation = prev != null && word.getMinYDirAdj() - maxY > Math.min(word.getHeight(), prev.getHeight()) * 1.5;
|
||||
boolean startFromTop = prev != null && word.getMinYDirAdj() < prev.getMinYDirAdj() - prev.getTextHeight();
|
||||
boolean splitByX = prev != null && maxX + 50 < word.getMinXDirAdj() && prev.getMinYDirAdj() == word.getMinYDirAdj();
|
||||
boolean xIsBeforeFirstX = prev != null && word.getMinXDirAdj() < minX;
|
||||
|
||||
@ -45,9 +45,10 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
@Disabled
|
||||
public void titleExtraction() throws IOException {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/ProblemDocs/402-16_Fantom_ToxicidadeCutaneaAguda.pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/Documine/Flora/ProblemDocs/23_In Vitro Percutaneous Absorption - Human Split-Thickness Skin (1).pdf",
|
||||
"files/Documine/Flora/ProblemDocs/23_In Vitro Percutaneous Absorption - Human Split-Thickness Skin (1).json");
|
||||
// AnalyzeRequest request = prepareStorage("files/Documine/Flora/ProblemDocs/23_In Vitro Percutaneous Absorption - Human Split-Thickness Skin (1).pdf",
|
||||
// "files/Documine/Flora/ProblemDocs/23_In Vitro Percutaneous Absorption - Human Split-Thickness Skin (1).json");
|
||||
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user