mirror of
https://github.com/mozilla/pdf.js.git
synced 2026-02-08 00:21:11 +01:00
Currently *some* of the links[1] on page three of the `issue19835.pdf` test-case aren't clickable, since the destination (of the LinkAnnotation) becomes empty. The reason is that these destinations include the character `\x1b`, which is interpreted as the start of a Unicode escape sequence specifying the language of the string; please refer to section [7.9.2.2 Text String Type](https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf#G6.1957385) in the PDF specification. Hence it seems that we need a way to optionally disable that behaviour, to avoid a "badly" formatted string from becoming empty (or truncated), at least for cases where we are: - Parsing named destinations[2] and URLs. - Handling "strings" that are actually /Name-instances. - Building a lookup Object/Map based on some PDF data-structure. *NOTE:* The issue that prompted this patch is obviously related to destinations, however I've gone through the `src/core/` folder and updated various other `stringToPDFString` call-sites that (directly or indirectly) fit the categories listed above. --- [1] Try clicking on anything on the line containing "Item 7A. Quantitative and Qualitative Disclosures About Market Risk 27". [2] Unfortunately just skipping `stringToPDFString` in this case would cause other issues, such as the named destination becoming "unusable" in the viewer; see e.g. issues 14847 and 14864.
131 lines
3.5 KiB
JavaScript
131 lines
3.5 KiB
JavaScript
/* Copyright 2021 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
import { shadow, stringToPDFString, warn } from "../shared/util.js";
|
|
import { BaseStream } from "./base_stream.js";
|
|
import { Dict } from "./primitives.js";
|
|
|
|
function pickPlatformItem(dict) {
|
|
if (!(dict instanceof Dict)) {
|
|
return null;
|
|
}
|
|
// Look for the filename in this order:
|
|
// UF, F, Unix, Mac, DOS
|
|
if (dict.has("UF")) {
|
|
return dict.get("UF");
|
|
} else if (dict.has("F")) {
|
|
return dict.get("F");
|
|
} else if (dict.has("Unix")) {
|
|
return dict.get("Unix");
|
|
} else if (dict.has("Mac")) {
|
|
return dict.get("Mac");
|
|
} else if (dict.has("DOS")) {
|
|
return dict.get("DOS");
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function stripPath(str) {
|
|
return str.substring(str.lastIndexOf("/") + 1);
|
|
}
|
|
|
|
/**
|
|
* "A PDF file can refer to the contents of another file by using a File
|
|
* Specification (PDF 1.1)", see the spec (7.11) for more details.
|
|
* NOTE: Only embedded files are supported (as part of the attachments support)
|
|
* TODO: support the 'URL' file system (with caching if !/V), portable
|
|
* collections attributes and related files (/RF)
|
|
*/
|
|
class FileSpec {
|
|
#contentAvailable = false;
|
|
|
|
constructor(root, xref, skipContent = false) {
|
|
if (!(root instanceof Dict)) {
|
|
return;
|
|
}
|
|
this.xref = xref;
|
|
this.root = root;
|
|
if (root.has("FS")) {
|
|
this.fs = root.get("FS");
|
|
}
|
|
if (root.has("RF")) {
|
|
warn("Related file specifications are not supported");
|
|
}
|
|
if (!skipContent) {
|
|
if (root.has("EF")) {
|
|
this.#contentAvailable = true;
|
|
} else {
|
|
warn("Non-embedded file specifications are not supported");
|
|
}
|
|
}
|
|
}
|
|
|
|
get filename() {
|
|
let filename = "";
|
|
|
|
const item = pickPlatformItem(this.root);
|
|
if (item && typeof item === "string") {
|
|
filename = stringToPDFString(item, /* keepEscapeSequence = */ true)
|
|
.replaceAll("\\\\", "\\")
|
|
.replaceAll("\\/", "/")
|
|
.replaceAll("\\", "/");
|
|
}
|
|
return shadow(this, "filename", filename || "unnamed");
|
|
}
|
|
|
|
get content() {
|
|
if (!this.#contentAvailable) {
|
|
return null;
|
|
}
|
|
this._contentRef ||= pickPlatformItem(this.root?.get("EF"));
|
|
|
|
let content = null;
|
|
if (this._contentRef) {
|
|
const fileObj = this.xref.fetchIfRef(this._contentRef);
|
|
if (fileObj instanceof BaseStream) {
|
|
content = fileObj.getBytes();
|
|
} else {
|
|
warn(
|
|
"Embedded file specification points to non-existing/invalid content"
|
|
);
|
|
}
|
|
} else {
|
|
warn("Embedded file specification does not have any content");
|
|
}
|
|
return content;
|
|
}
|
|
|
|
get description() {
|
|
let description = "";
|
|
|
|
const desc = this.root?.get("Desc");
|
|
if (desc && typeof desc === "string") {
|
|
description = stringToPDFString(desc);
|
|
}
|
|
return shadow(this, "description", description);
|
|
}
|
|
|
|
get serializable() {
|
|
return {
|
|
rawFilename: this.filename,
|
|
filename: stripPath(this.filename),
|
|
content: this.content,
|
|
description: this.description,
|
|
};
|
|
}
|
|
}
|
|
|
|
export { FileSpec };
|