From 153cef615e53185233d01425c7aa401ae221248e Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 15 May 2026 11:49:54 +0200 Subject: [PATCH] Move a couple of `src/core/` string helper functions into their own file Given that the various utility-files naturally increase in size over time, it shouldn't hurt to shorten `src/core/core_utils.js` a little bit by moving a few of its string helper functions to their own file. --- src/core/annotation.js | 3 +- src/core/core_utils.js | 43 -------------------- src/core/default_appearance.js | 2 +- src/core/editor/pdf_editor.js | 2 +- src/core/string_utils.js | 62 +++++++++++++++++++++++++++++ src/core/struct_tree.js | 3 +- test/unit/clitests.json | 1 + test/unit/core_utils_spec.js | 53 ------------------------- test/unit/jasmine-boot.js | 1 + test/unit/string_utils_spec.js | 72 ++++++++++++++++++++++++++++++++++ 10 files changed, 141 insertions(+), 101 deletions(-) create mode 100644 src/core/string_utils.js create mode 100644 test/unit/string_utils_spec.js diff --git a/src/core/annotation.js b/src/core/annotation.js index b1b922af6..208d26a6b 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -53,8 +53,6 @@ import { numberToString, RESOURCES_KEYS_OPERATOR_LIST, RESOURCES_KEYS_TEXT_CONTENT, - stringToAsciiOrUTF16BE, - stringToUTF16String, } from "./core_utils.js"; import { createDefaultAppearance, @@ -66,6 +64,7 @@ import { import { DateFormats, TimeFormats } from "../shared/scripting_utils.js"; import { Dict, isName, isRefsEqual, Name, Ref, RefSet } from "./primitives.js"; import { Stream, StringStream } from "./stream.js"; +import { stringToAsciiOrUTF16BE, stringToUTF16String } from "./string_utils.js"; import { BaseStream } from "./base_stream.js"; import { bidi } from "./bidi.js"; import { Catalog } from "./catalog.js"; diff --git a/src/core/core_utils.js b/src/core/core_utils.js index 009cc8c79..e1f6bacfe 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -684,45 +684,6 @@ function getNewAnnotationsMap(annotationStorage) { return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null; } -// If the string is null or undefined then it is returned as is. -function stringToAsciiOrUTF16BE(str) { - if (str === null || str === undefined) { - return str; - } - return isAscii(str) ? str : stringToUTF16String(str, /* bigEndian = */ true); -} - -function isAscii(str) { - if (typeof str !== "string") { - return false; - } - return !str || /^[\x00-\x7F]*$/.test(str); -} - -function stringToUTF16HexString(str) { - const buf = []; - for (let i = 0, ii = str.length; i < ii; i++) { - const char = str.charCodeAt(i); - buf.push(Util.hexNums[(char >> 8) & 0xff], Util.hexNums[char & 0xff]); - } - return buf.join(""); -} - -function stringToUTF16String(str, bigEndian = false) { - const buf = []; - if (bigEndian) { - buf.push("\xFE\xFF"); - } - for (let i = 0, ii = str.length; i < ii; i++) { - const char = str.charCodeAt(i); - buf.push( - String.fromCharCode((char >> 8) & 0xff), - String.fromCharCode(char & 0xff) - ); - } - return buf.join(""); -} - function getModificationDate(date = new Date()) { if (!(date instanceof Date)) { date = new Date(date); @@ -782,7 +743,6 @@ export { getRotationMatrix, getSizeInBytes, IDENTITY_MATRIX, - isAscii, isBooleanArray, isNumberArray, isWhiteSpace, @@ -798,9 +758,6 @@ export { recoverJsURL, RESOURCES_KEYS_OPERATOR_LIST, RESOURCES_KEYS_TEXT_CONTENT, - stringToAsciiOrUTF16BE, - stringToUTF16HexString, - stringToUTF16String, toRomanNumerals, validateCSSFont, validateFontName, diff --git a/src/core/default_appearance.js b/src/core/default_appearance.js index 5756a388c..01114baeb 100644 --- a/src/core/default_appearance.js +++ b/src/core/default_appearance.js @@ -18,7 +18,6 @@ import { escapePDFName, getRotationMatrix, numberToString, - stringToUTF16HexString, } from "./core_utils.js"; import { Dict, Name } from "./primitives.js"; import { @@ -33,6 +32,7 @@ import { EvaluatorPreprocessor } from "./evaluator.js"; import { LocalColorSpaceCache } from "./image_utils.js"; import { PDFFunctionFactory } from "./function.js"; import { StringStream } from "./stream.js"; +import { stringToUTF16HexString } from "./string_utils.js"; class DefaultAppearanceEvaluator extends EvaluatorPreprocessor { constructor(str) { diff --git a/src/core/editor/pdf_editor.js b/src/core/editor/pdf_editor.js index 4e3b3b034..0cb9b7de7 100644 --- a/src/core/editor/pdf_editor.js +++ b/src/core/editor/pdf_editor.js @@ -25,7 +25,6 @@ import { getInheritableProperty, getModificationDate, getNewAnnotationsMap, - stringToAsciiOrUTF16BE, } from "../core_utils.js"; import { Dict, isName, Name, Ref, RefSet, RefSetCache } from "../primitives.js"; import { incrementalUpdate, writeValue } from "../writer.js"; @@ -34,6 +33,7 @@ import { stringToBytes, stringToPDFString } from "../../shared/util.js"; import { AnnotationFactory } from "../annotation.js"; import { BaseStream } from "../base_stream.js"; import { StringStream } from "../stream.js"; +import { stringToAsciiOrUTF16BE } from "../string_utils.js"; const MAX_LEAVES_PER_PAGES_NODE = 16; const MAX_IN_NAME_TREE_NODE = 64; diff --git a/src/core/string_utils.js b/src/core/string_utils.js new file mode 100644 index 000000000..1ef84ec62 --- /dev/null +++ b/src/core/string_utils.js @@ -0,0 +1,62 @@ +/* Copyright 2019 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Util } from "../shared/util.js"; + +function isAscii(str) { + if (typeof str !== "string") { + return false; + } + return !str || /^[\x00-\x7F]*$/.test(str); +} + +// If the string is null or undefined then it is returned as is. +function stringToAsciiOrUTF16BE(str) { + if (str === null || str === undefined) { + return str; + } + return isAscii(str) ? str : stringToUTF16String(str, /* bigEndian = */ true); +} + +function stringToUTF16HexString(str) { + const buf = []; + for (let i = 0, ii = str.length; i < ii; i++) { + const char = str.charCodeAt(i); + buf.push(Util.hexNums[(char >> 8) & 0xff], Util.hexNums[char & 0xff]); + } + return buf.join(""); +} + +function stringToUTF16String(str, bigEndian = false) { + const buf = []; + if (bigEndian) { + buf.push("\xFE\xFF"); + } + for (let i = 0, ii = str.length; i < ii; i++) { + const char = str.charCodeAt(i); + buf.push( + String.fromCharCode((char >> 8) & 0xff), + String.fromCharCode(char & 0xff) + ); + } + return buf.join(""); +} + +export { + isAscii, + stringToAsciiOrUTF16BE, + stringToUTF16HexString, + stringToUTF16String, +}; diff --git a/src/core/struct_tree.js b/src/core/struct_tree.js index f6e6a8422..3fab02247 100644 --- a/src/core/struct_tree.js +++ b/src/core/struct_tree.js @@ -21,9 +21,10 @@ import { warn, } from "../shared/util.js"; import { Dict, isName, Name, Ref, RefSetCache } from "./primitives.js"; -import { lookupNormalRect, stringToAsciiOrUTF16BE } from "./core_utils.js"; import { BaseStream } from "./base_stream.js"; +import { lookupNormalRect } from "./core_utils.js"; import { NumberTree } from "./name_number_tree.js"; +import { stringToAsciiOrUTF16BE } from "./string_utils.js"; const MAX_DEPTH = 40; diff --git a/test/unit/clitests.json b/test/unit/clitests.json index 3ab4dc148..ce261c249 100644 --- a/test/unit/clitests.json +++ b/test/unit/clitests.json @@ -49,6 +49,7 @@ "postscript_spec.js", "primitives_spec.js", "stream_spec.js", + "string_utils_spec.js", "struct_tree_spec.js", "svg_factory_spec.js", "text_layer_spec.js", diff --git a/test/unit/core_utils_spec.js b/test/unit/core_utils_spec.js index 12f0a4d91..a9c6bd8f0 100644 --- a/test/unit/core_utils_spec.js +++ b/test/unit/core_utils_spec.js @@ -22,13 +22,10 @@ import { getInheritableProperty, getModificationDate, getSizeInBytes, - isAscii, isWhiteSpace, numberToString, parseXFAPath, recoverJsURL, - stringToUTF16HexString, - stringToUTF16String, toRomanNumerals, validateCSSFont, } from "../../src/core/core_utils.js"; @@ -416,56 +413,6 @@ describe("core_utils", function () { }); }); - describe("isAscii", function () { - it("handles ascii/non-ascii strings", function () { - expect(isAscii("hello world")).toEqual(true); - expect(isAscii("こんにちは世界の")).toEqual(false); - expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual( - false - ); - expect(isAscii("")).toEqual(true); - expect(isAscii(123)).toEqual(false); - expect(isAscii(null)).toEqual(false); - expect(isAscii(undefined)).toEqual(false); - }); - }); - - describe("stringToUTF16HexString", function () { - it("should encode a string in UTF16 hexadecimal format", function () { - expect(stringToUTF16HexString("hello world")).toEqual( - "00680065006c006c006f00200077006f0072006c0064" - ); - - expect(stringToUTF16HexString("こんにちは世界の")).toEqual( - "30533093306b3061306f4e16754c306e" - ); - }); - }); - - describe("stringToUTF16String", function () { - it("should encode a string in UTF16", function () { - expect(stringToUTF16String("hello world")).toEqual( - "\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d" - ); - - expect(stringToUTF16String("こんにちは世界の")).toEqual( - "\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e" - ); - }); - - it("should encode a string in UTF16BE with a BOM", function () { - expect( - stringToUTF16String("hello world", /* bigEndian = */ true) - ).toEqual("\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"); - - expect( - stringToUTF16String("こんにちは世界の", /* bigEndian = */ true) - ).toEqual( - "\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e" - ); - }); - }); - describe("deepCompare", function () { it("should return true for the same reference", function () { const dict = new Dict(); diff --git a/test/unit/jasmine-boot.js b/test/unit/jasmine-boot.js index 6ffb29784..fbbc7730e 100644 --- a/test/unit/jasmine-boot.js +++ b/test/unit/jasmine-boot.js @@ -96,6 +96,7 @@ async function initializePDFJS(callback) { "pdfjs-test/unit/primitives_spec.js", "pdfjs-test/unit/scripting_spec.js", "pdfjs-test/unit/stream_spec.js", + "pdfjs-test/unit/string_utils_spec.js", "pdfjs-test/unit/struct_tree_spec.js", "pdfjs-test/unit/svg_factory_spec.js", "pdfjs-test/unit/text_layer_spec.js", diff --git a/test/unit/string_utils_spec.js b/test/unit/string_utils_spec.js new file mode 100644 index 000000000..3b919acb5 --- /dev/null +++ b/test/unit/string_utils_spec.js @@ -0,0 +1,72 @@ +/* Copyright 2019 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { + isAscii, + stringToUTF16HexString, + stringToUTF16String, +} from "../../src/core/string_utils.js"; + +describe("string_utils", function () { + describe("isAscii", function () { + it("handles ascii/non-ascii strings", function () { + expect(isAscii("hello world")).toEqual(true); + expect(isAscii("こんにちは世界の")).toEqual(false); + expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual( + false + ); + expect(isAscii("")).toEqual(true); + expect(isAscii(123)).toEqual(false); + expect(isAscii(null)).toEqual(false); + expect(isAscii(undefined)).toEqual(false); + }); + }); + + describe("stringToUTF16HexString", function () { + it("should encode a string in UTF16 hexadecimal format", function () { + expect(stringToUTF16HexString("hello world")).toEqual( + "00680065006c006c006f00200077006f0072006c0064" + ); + + expect(stringToUTF16HexString("こんにちは世界の")).toEqual( + "30533093306b3061306f4e16754c306e" + ); + }); + }); + + describe("stringToUTF16String", function () { + it("should encode a string in UTF16", function () { + expect(stringToUTF16String("hello world")).toEqual( + "\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d" + ); + + expect(stringToUTF16String("こんにちは世界の")).toEqual( + "\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e" + ); + }); + + it("should encode a string in UTF16BE with a BOM", function () { + expect( + stringToUTF16String("hello world", /* bigEndian = */ true) + ).toEqual("\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"); + + expect( + stringToUTF16String("こんにちは世界の", /* bigEndian = */ true) + ).toEqual( + "\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e" + ); + }); + }); +});