Move a couple of src/core/ string helper functions into their own file

Given that the various utility-files naturally increase in size over time, it shouldn't hurt to shorten `src/core/core_utils.js` a little bit by moving a few of its string helper functions to their own file.
This commit is contained in:
Jonas Jenwald 2026-05-15 11:49:54 +02:00
parent d9491ffce3
commit 153cef615e
10 changed files with 141 additions and 101 deletions

View File

@ -53,8 +53,6 @@ import {
numberToString,
RESOURCES_KEYS_OPERATOR_LIST,
RESOURCES_KEYS_TEXT_CONTENT,
stringToAsciiOrUTF16BE,
stringToUTF16String,
} from "./core_utils.js";
import {
createDefaultAppearance,
@ -66,6 +64,7 @@ import {
import { DateFormats, TimeFormats } from "../shared/scripting_utils.js";
import { Dict, isName, isRefsEqual, Name, Ref, RefSet } from "./primitives.js";
import { Stream, StringStream } from "./stream.js";
import { stringToAsciiOrUTF16BE, stringToUTF16String } from "./string_utils.js";
import { BaseStream } from "./base_stream.js";
import { bidi } from "./bidi.js";
import { Catalog } from "./catalog.js";

View File

@ -684,45 +684,6 @@ function getNewAnnotationsMap(annotationStorage) {
return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null;
}
// If the string is null or undefined then it is returned as is.
function stringToAsciiOrUTF16BE(str) {
if (str === null || str === undefined) {
return str;
}
return isAscii(str) ? str : stringToUTF16String(str, /* bigEndian = */ true);
}
function isAscii(str) {
if (typeof str !== "string") {
return false;
}
return !str || /^[\x00-\x7F]*$/.test(str);
}
function stringToUTF16HexString(str) {
const buf = [];
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.charCodeAt(i);
buf.push(Util.hexNums[(char >> 8) & 0xff], Util.hexNums[char & 0xff]);
}
return buf.join("");
}
function stringToUTF16String(str, bigEndian = false) {
const buf = [];
if (bigEndian) {
buf.push("\xFE\xFF");
}
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.charCodeAt(i);
buf.push(
String.fromCharCode((char >> 8) & 0xff),
String.fromCharCode(char & 0xff)
);
}
return buf.join("");
}
function getModificationDate(date = new Date()) {
if (!(date instanceof Date)) {
date = new Date(date);
@ -782,7 +743,6 @@ export {
getRotationMatrix,
getSizeInBytes,
IDENTITY_MATRIX,
isAscii,
isBooleanArray,
isNumberArray,
isWhiteSpace,
@ -798,9 +758,6 @@ export {
recoverJsURL,
RESOURCES_KEYS_OPERATOR_LIST,
RESOURCES_KEYS_TEXT_CONTENT,
stringToAsciiOrUTF16BE,
stringToUTF16HexString,
stringToUTF16String,
toRomanNumerals,
validateCSSFont,
validateFontName,

View File

@ -18,7 +18,6 @@ import {
escapePDFName,
getRotationMatrix,
numberToString,
stringToUTF16HexString,
} from "./core_utils.js";
import { Dict, Name } from "./primitives.js";
import {
@ -33,6 +32,7 @@ import { EvaluatorPreprocessor } from "./evaluator.js";
import { LocalColorSpaceCache } from "./image_utils.js";
import { PDFFunctionFactory } from "./function.js";
import { StringStream } from "./stream.js";
import { stringToUTF16HexString } from "./string_utils.js";
class DefaultAppearanceEvaluator extends EvaluatorPreprocessor {
constructor(str) {

View File

@ -25,7 +25,6 @@ import {
getInheritableProperty,
getModificationDate,
getNewAnnotationsMap,
stringToAsciiOrUTF16BE,
} from "../core_utils.js";
import { Dict, isName, Name, Ref, RefSet, RefSetCache } from "../primitives.js";
import { incrementalUpdate, writeValue } from "../writer.js";
@ -34,6 +33,7 @@ import { stringToBytes, stringToPDFString } from "../../shared/util.js";
import { AnnotationFactory } from "../annotation.js";
import { BaseStream } from "../base_stream.js";
import { StringStream } from "../stream.js";
import { stringToAsciiOrUTF16BE } from "../string_utils.js";
const MAX_LEAVES_PER_PAGES_NODE = 16;
const MAX_IN_NAME_TREE_NODE = 64;

62
src/core/string_utils.js Normal file
View File

@ -0,0 +1,62 @@
/* Copyright 2019 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Util } from "../shared/util.js";
function isAscii(str) {
if (typeof str !== "string") {
return false;
}
return !str || /^[\x00-\x7F]*$/.test(str);
}
// If the string is null or undefined then it is returned as is.
function stringToAsciiOrUTF16BE(str) {
if (str === null || str === undefined) {
return str;
}
return isAscii(str) ? str : stringToUTF16String(str, /* bigEndian = */ true);
}
function stringToUTF16HexString(str) {
const buf = [];
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.charCodeAt(i);
buf.push(Util.hexNums[(char >> 8) & 0xff], Util.hexNums[char & 0xff]);
}
return buf.join("");
}
function stringToUTF16String(str, bigEndian = false) {
const buf = [];
if (bigEndian) {
buf.push("\xFE\xFF");
}
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.charCodeAt(i);
buf.push(
String.fromCharCode((char >> 8) & 0xff),
String.fromCharCode(char & 0xff)
);
}
return buf.join("");
}
export {
isAscii,
stringToAsciiOrUTF16BE,
stringToUTF16HexString,
stringToUTF16String,
};

View File

@ -21,9 +21,10 @@ import {
warn,
} from "../shared/util.js";
import { Dict, isName, Name, Ref, RefSetCache } from "./primitives.js";
import { lookupNormalRect, stringToAsciiOrUTF16BE } from "./core_utils.js";
import { BaseStream } from "./base_stream.js";
import { lookupNormalRect } from "./core_utils.js";
import { NumberTree } from "./name_number_tree.js";
import { stringToAsciiOrUTF16BE } from "./string_utils.js";
const MAX_DEPTH = 40;

View File

@ -49,6 +49,7 @@
"postscript_spec.js",
"primitives_spec.js",
"stream_spec.js",
"string_utils_spec.js",
"struct_tree_spec.js",
"svg_factory_spec.js",
"text_layer_spec.js",

View File

@ -22,13 +22,10 @@ import {
getInheritableProperty,
getModificationDate,
getSizeInBytes,
isAscii,
isWhiteSpace,
numberToString,
parseXFAPath,
recoverJsURL,
stringToUTF16HexString,
stringToUTF16String,
toRomanNumerals,
validateCSSFont,
} from "../../src/core/core_utils.js";
@ -416,56 +413,6 @@ describe("core_utils", function () {
});
});
describe("isAscii", function () {
it("handles ascii/non-ascii strings", function () {
expect(isAscii("hello world")).toEqual(true);
expect(isAscii("こんにちは世界の")).toEqual(false);
expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual(
false
);
expect(isAscii("")).toEqual(true);
expect(isAscii(123)).toEqual(false);
expect(isAscii(null)).toEqual(false);
expect(isAscii(undefined)).toEqual(false);
});
});
describe("stringToUTF16HexString", function () {
it("should encode a string in UTF16 hexadecimal format", function () {
expect(stringToUTF16HexString("hello world")).toEqual(
"00680065006c006c006f00200077006f0072006c0064"
);
expect(stringToUTF16HexString("こんにちは世界の")).toEqual(
"30533093306b3061306f4e16754c306e"
);
});
});
describe("stringToUTF16String", function () {
it("should encode a string in UTF16", function () {
expect(stringToUTF16String("hello world")).toEqual(
"\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
);
expect(stringToUTF16String("こんにちは世界の")).toEqual(
"\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});
it("should encode a string in UTF16BE with a BOM", function () {
expect(
stringToUTF16String("hello world", /* bigEndian = */ true)
).toEqual("\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d");
expect(
stringToUTF16String("こんにちは世界の", /* bigEndian = */ true)
).toEqual(
"\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});
});
describe("deepCompare", function () {
it("should return true for the same reference", function () {
const dict = new Dict();

View File

@ -96,6 +96,7 @@ async function initializePDFJS(callback) {
"pdfjs-test/unit/primitives_spec.js",
"pdfjs-test/unit/scripting_spec.js",
"pdfjs-test/unit/stream_spec.js",
"pdfjs-test/unit/string_utils_spec.js",
"pdfjs-test/unit/struct_tree_spec.js",
"pdfjs-test/unit/svg_factory_spec.js",
"pdfjs-test/unit/text_layer_spec.js",

View File

@ -0,0 +1,72 @@
/* Copyright 2019 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {
isAscii,
stringToUTF16HexString,
stringToUTF16String,
} from "../../src/core/string_utils.js";
describe("string_utils", function () {
describe("isAscii", function () {
it("handles ascii/non-ascii strings", function () {
expect(isAscii("hello world")).toEqual(true);
expect(isAscii("こんにちは世界の")).toEqual(false);
expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual(
false
);
expect(isAscii("")).toEqual(true);
expect(isAscii(123)).toEqual(false);
expect(isAscii(null)).toEqual(false);
expect(isAscii(undefined)).toEqual(false);
});
});
describe("stringToUTF16HexString", function () {
it("should encode a string in UTF16 hexadecimal format", function () {
expect(stringToUTF16HexString("hello world")).toEqual(
"00680065006c006c006f00200077006f0072006c0064"
);
expect(stringToUTF16HexString("こんにちは世界の")).toEqual(
"30533093306b3061306f4e16754c306e"
);
});
});
describe("stringToUTF16String", function () {
it("should encode a string in UTF16", function () {
expect(stringToUTF16String("hello world")).toEqual(
"\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
);
expect(stringToUTF16String("こんにちは世界の")).toEqual(
"\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});
it("should encode a string in UTF16BE with a BOM", function () {
expect(
stringToUTF16String("hello world", /* bigEndian = */ true)
).toEqual("\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d");
expect(
stringToUTF16String("こんにちは世界の", /* bigEndian = */ true)
).toEqual(
"\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});
});
});