diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingsClassifierTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingsClassifierTest.java new file mode 100644 index 0000000..f3fd281 --- /dev/null +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/RulingsClassifierTest.java @@ -0,0 +1,84 @@ +package com.knecon.fforesight.service.layoutparser.server.services; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Collections; +import java.util.List; + +import org.junit.jupiter.api.Test; + +import com.knecon.fforesight.service.layoutparser.processor.model.PageContents; +import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings; +import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling; +import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; +import com.knecon.fforesight.service.layoutparser.processor.services.PageContentExtractor; +import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService; +import com.knecon.fforesight.service.layoutparser.processor.services.TextRulingsClassifier; +import com.knecon.fforesight.service.layoutparser.processor.utils.RectangularIntersectionFinder; + +import lombok.SneakyThrows; + +public class RulingsClassifierTest { + + @Test + @SneakyThrows + public void textRulingExtractionTest() { + + String fileName = "files/Minimal Examples/RotateTextWithRulingsTestFile.pdf"; + List pageContents = PageContentExtractor.getSortedPageContents(fileName); + RulingCleaningService rulingCleaningService = new RulingCleaningService(); + + for (PageContents pageContent : pageContents) { + CleanRulings cleanRulings = rulingCleaningService.deduplicateAndStraightenRulings(Collections.emptyList(), pageContent.getRulings()); + RectangularIntersectionFinder.find(cleanRulings.getHorizontals(), cleanRulings.getVerticals()); + TextRulingsClassifier.classifyUnderlinedAndStrikethroughText(pageContent.getSortedTextPositionSequences(), cleanRulings); + + assertTrue(pageContent.getSortedTextPositionSequences() + .stream() + .filter(word -> word.toString().equals("Underlined")) + .allMatch(TextPositionSequence::isUnderline)); + assertTrue(pageContent.getSortedTextPositionSequences() + .stream() + .filter(word -> word.toString().equals("Striketrough")) + .allMatch(TextPositionSequence::isStrikethrough)); + + assertEquals(4, + cleanRulings.buildAll() + .stream() + .filter(ruling -> ruling.getClassification().equals(Ruling.Classification.STRIKETROUGH)) + .count()); + assertEquals(4, + cleanRulings.buildAll() + .stream() + .filter(ruling -> ruling.getClassification().equals(Ruling.Classification.UNDERLINE)) + .count()); + assertEquals(0, cleanRulings.withoutTextRulings().buildAll().size()); + } + + } + + + @Test + @SneakyThrows + public void tableRulingExtractionTest() { + + String fileName = "files/SinglePages/AbsolutelyEnormousTable.pdf"; + List pageContents = PageContentExtractor.getSortedPageContents(fileName); + RulingCleaningService rulingCleaningService = new RulingCleaningService(); + + for (PageContents pageContent : pageContents) { + CleanRulings cleanRulings = rulingCleaningService.deduplicateAndStraightenRulings(Collections.emptyList(), pageContent.getRulings()); + RectangularIntersectionFinder.find(cleanRulings.getHorizontals(), cleanRulings.getVerticals()); + TextRulingsClassifier.classifyUnderlinedAndStrikethroughText(pageContent.getSortedTextPositionSequences(), cleanRulings); + + assertEquals(30, cleanRulings.getHorizontals().size()); + assertEquals(30, cleanRulings.getTableLines().getHorizontals().size()); + + assertEquals(144, cleanRulings.getVerticals().size()); + assertEquals(144, cleanRulings.getTableLines().getVerticals().size()); + } + + } + +} diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/TextRulingsClassifierTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/TextRulingsClassifierTest.java deleted file mode 100644 index dc6e8ae..0000000 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/services/TextRulingsClassifierTest.java +++ /dev/null @@ -1,46 +0,0 @@ -package com.knecon.fforesight.service.layoutparser.server.services; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.Collections; -import java.util.List; - -import org.junit.jupiter.api.Test; - -import com.knecon.fforesight.service.layoutparser.processor.model.PageContents; -import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings; -import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling; -import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence; -import com.knecon.fforesight.service.layoutparser.processor.services.PageContentExtractor; -import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService; -import com.knecon.fforesight.service.layoutparser.processor.services.TextRulingsClassifier; -import com.knecon.fforesight.service.layoutparser.processor.utils.RectangularIntersectionFinder; - -import lombok.SneakyThrows; - -public class TextRulingsClassifierTest { - - @Test - @SneakyThrows - public void textRulingExtractionTest() { - - String fileName = "files/Minimal Examples/RotateTextWithRulingsTestFile.pdf"; - List pageContents = PageContentExtractor.getSortedPageContents(fileName); - RulingCleaningService rulingCleaningService = new RulingCleaningService(); - - for (PageContents pageContent : pageContents) { - CleanRulings cleanRulings = rulingCleaningService.deduplicateAndStraightenRulings(Collections.emptyList(), pageContent.getRulings()); - RectangularIntersectionFinder.find(cleanRulings.getHorizontals(), cleanRulings.getVerticals()); - TextRulingsClassifier.classifyUnderlinedAndStrikethroughText(pageContent.getSortedTextPositionSequences(), cleanRulings); - - assertTrue(pageContent.getSortedTextPositionSequences().stream().filter(word -> word.toString().equals("Underlined")).allMatch(TextPositionSequence::isUnderline)); - assertTrue(pageContent.getSortedTextPositionSequences().stream().filter(word -> word.toString().equals("Striketrough")).allMatch(TextPositionSequence::isStrikethrough)); - - assertEquals(4, cleanRulings.buildAll().stream().filter(ruling -> ruling.getClassification().equals(Ruling.Classification.STRIKETROUGH)).count()); - assertEquals(4, cleanRulings.buildAll().stream().filter(ruling -> ruling.getClassification().equals(Ruling.Classification.UNDERLINE)).count()); - } - - } - -}