From ec50eca15bbfcbec4bc2a3ed213bc599ffa3512a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Wed, 11 Sep 2024 13:38:35 +0200 Subject: [PATCH] RED-9975: extend SectionIdentifier to alphanumeric --- .../document/nodes/SectionIdentifier.java | 66 ++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SectionIdentifier.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SectionIdentifier.java index 0d3cdbaa..16ac4c48 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SectionIdentifier.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SectionIdentifier.java @@ -3,11 +3,13 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes; import java.util.Collections; import java.util.LinkedList; import java.util.List; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; import lombok.AccessLevel; import lombok.AllArgsConstructor; +import lombok.Getter; import lombok.experimental.FieldDefaults; /** @@ -17,15 +19,19 @@ import lombok.experimental.FieldDefaults; @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) public class SectionIdentifier { - static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?"); + public static Pattern numericalIdentifierPattern = Pattern.compile("^[\\s]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?"); + public static Pattern alphanumericIdentifierPattern = Pattern.compile("^[\\s]?[A-Za-z][\\s.,;]?(\\d+)[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?(\\d+)?[\\s.,;]?"); - private enum Format { + protected enum Format { EMPTY, NUMERICAL, + ALPHANUMERIC, DOCUMENT } + @Getter Format format; + @Getter String identifierString; List identifiers; boolean asChild; @@ -47,6 +53,10 @@ public class SectionIdentifier { if (numericalIdentifierMatcher.find()) { return buildNumericalSectionIdentifier(headline, numericalIdentifierMatcher); } + Matcher alphanumericIdentifierMatcher = alphanumericIdentifierPattern.matcher(headline); + if (alphanumericIdentifierMatcher.find()) { + return buildAlphanumericSectionIdentifier(headline, alphanumericIdentifierMatcher); + } // more formats here return SectionIdentifier.empty(); } @@ -105,6 +115,32 @@ public class SectionIdentifier { } + private static SectionIdentifier buildAlphanumericSectionIdentifier(String headline, Matcher alphanumericIdentifierMatcher) { + + String identifierString = headline.substring(alphanumericIdentifierMatcher.start(), alphanumericIdentifierMatcher.end()); + + String alphanumericIdentifier = alphanumericIdentifierMatcher.group(0).substring(0, 1).toUpperCase(Locale.ENGLISH); + int mappedCharacterValue = alphanumericIdentifier.charAt(0) - 'A' + 1; + List identifiers = new LinkedList<>(); + identifiers.add(mappedCharacterValue); + + for (int i = 1; i <= 3; i++) { + String numericalIdentifier = alphanumericIdentifierMatcher.group(i); + if (numericalIdentifier == null || numericalIdentifier.equals("0") || numericalIdentifier.isEmpty() || numericalIdentifier.isBlank()) { + break; + } + identifiers.add(Integer.parseInt(numericalIdentifier.trim())); + } + + return new SectionIdentifier(Format.ALPHANUMERIC, + // Changed format to reflect alphanumeric + identifierString, + identifiers.stream() + .toList(), + false); + } + + /** * Determines if the current section is the parent of the given section. * @@ -155,4 +191,30 @@ public class SectionIdentifier { return identifierString; } + + /** + * @return true, when no identifier could be found + */ + public boolean isEmpty() { + + return this.format.equals(Format.EMPTY); + } + + + /** + * The level of a SectionIdentifier corresponds with the count of identifiers. E.g. 1.1 is level 2, and 1. is level 1 + * + * @return the level of the Headline + */ + public int level() { + + return identifiers.size(); + } + + + protected List getIdentifiers() { + + return identifiers; + } + }