Merge branch 'thread-safe-hcs-fields' into 'main'

fixed issue with thread-safety of local fields in the HeadlineClassificationService

See merge request fforesight/layout-parser!167
This commit is contained in:
Maverick Studer 2024-06-06 14:51:24 +02:00
commit 797602e373
4 changed files with 15 additions and 30 deletions

View File

@ -21,33 +21,31 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
public class ClarifyndClassificationService {
private final HeadlineClassificationService headlineClassificationService;
public void classifyDocument(ClassificationDocument document) {
List<Float> headlineFontSizes = document.getFontSizeCounter().getHighterThanMostPopular();
log.debug("Document FontSize counters are: {}", document.getFontSizeCounter().getCountPerValue());
headlineClassificationService.resetContext();
HeadlineClassificationService headlineClassificationService = new HeadlineClassificationService();
for (ClassificationPage page : document.getPages()) {
classifyPage(page, document, headlineFontSizes);
classifyPage(headlineClassificationService, page, document, headlineFontSizes);
}
}
private void classifyPage(ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
private void classifyPage(HeadlineClassificationService headlineClassificationService, ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
for (AbstractPageBlock textBlock : page.getTextBlocks()) {
if (textBlock instanceof TextPageBlock) {
classifyBlock((TextPageBlock) textBlock, page, document, headlineFontSizes);
classifyBlock(headlineClassificationService, (TextPageBlock) textBlock, page, document, headlineFontSizes);
}
}
}
private void classifyBlock(TextPageBlock textBlock, ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
private void classifyBlock(HeadlineClassificationService headlineClassificationService, TextPageBlock textBlock, ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
var bodyTextFrame = page.getBodyTextFrame();

View File

@ -24,7 +24,6 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
public class DocuMineClassificationService {
private final HeadlineClassificationService headlineClassificationService;
private static final Pattern pattern = Pattern.compile("^([1-9]\\d?\\.){1,3}\\d{1,2}\\.?\\s[0-9A-Za-z \\[\\]]{2,50}", Pattern.CASE_INSENSITIVE);
private static final Pattern pattern2 = Pattern.compile("\\p{L}{3,}", Pattern.CASE_INSENSITIVE);
private static final Pattern pattern3 = Pattern.compile("^(\\d{1,1}\\.){1,3}\\d{1,2}\\.?\\s[a-z]{1,2}\\/[a-z]{1,2}.*");
@ -36,25 +35,25 @@ public class DocuMineClassificationService {
log.debug("Document FontSize counters are: {}", document.getFontSizeCounter().getCountPerValue());
headlineClassificationService.resetContext();
HeadlineClassificationService headlineClassificationService = new HeadlineClassificationService();
for (ClassificationPage page : document.getPages()) {
classifyPage(page, document, headlineFontSizes);
classifyPage(headlineClassificationService, page, document, headlineFontSizes);
}
}
private void classifyPage(ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
private void classifyPage(HeadlineClassificationService headlineClassificationService, ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
for (AbstractPageBlock textBlock : page.getTextBlocks()) {
if (textBlock instanceof TextPageBlock) {
classifyBlock((TextPageBlock) textBlock, page, document, headlineFontSizes);
classifyBlock(headlineClassificationService, (TextPageBlock) textBlock, page, document, headlineFontSizes);
}
}
}
private void classifyBlock(TextPageBlock textBlock, ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
private void classifyBlock(HeadlineClassificationService headlineClassificationService, TextPageBlock textBlock, ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
log.debug("headlineFontSizes: {}", headlineFontSizes);
var bodyTextFrame = page.getBodyTextFrame();

View File

@ -2,15 +2,12 @@ package com.knecon.fforesight.service.layoutparser.processor.services.classifica
import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.getHeadlineNumber;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
import lombok.Getter;
import lombok.Setter;
@Service
@Getter
@Setter
public class HeadlineClassificationService {
@ -19,13 +16,6 @@ public class HeadlineClassificationService {
PageBlockType originalClassifiedBlockType;
TextPageBlock lastHeadlineFromOutline;
public void resetContext() {
setLastHeadline(null);
setOriginalClassifiedBlockType(null);
setLastHeadlineFromOutline(null);
}
public void setLastHeadlineFromOutline(TextPageBlock lastHeadlineFromOutline) {
this.lastHeadlineFromOutline = lastHeadlineFromOutline;

View File

@ -22,8 +22,6 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
public class RedactManagerClassificationService {
private final HeadlineClassificationService headlineClassificationService;
public void classifyDocument(ClassificationDocument document) {
@ -31,25 +29,25 @@ public class RedactManagerClassificationService {
log.debug("Document FontSize counters are: {}", document.getFontSizeCounter().getCountPerValue());
headlineClassificationService.resetContext();
HeadlineClassificationService headlineClassificationService = new HeadlineClassificationService();
for (ClassificationPage page : document.getPages()) {
classifyPage(page, document, headlineFontSizes);
classifyPage(headlineClassificationService, page, document, headlineFontSizes);
}
}
private void classifyPage(ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
private void classifyPage(HeadlineClassificationService headlineClassificationService, ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
for (AbstractPageBlock textBlock : page.getTextBlocks()) {
if (textBlock instanceof TextPageBlock) {
classifyBlock((TextPageBlock) textBlock, page, document, headlineFontSizes);
classifyBlock(headlineClassificationService, (TextPageBlock) textBlock, page, document, headlineFontSizes);
}
}
}
private void classifyBlock(TextPageBlock textBlock, ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
private void classifyBlock(HeadlineClassificationService headlineClassificationService, TextPageBlock textBlock, ClassificationPage page, ClassificationDocument document, List<Float> headlineFontSizes) {
var bodyTextFrame = page.getBodyTextFrame();