pdf.js.mirror/src/display/node_stream.js
Jonas Jenwald 09a9a7bd0b [api-minor] Remove the length parameter from getDocument
This is an old API-parameter that is now unused within the PDF.js project itself, and its description says that it's (partly) being used for "range requests operations".
Note that the `length` API-parameter is used to set the *initial* `contentLength` in various `BasePDFStreamReader` implementations, however it's always overridden by the "Content-Length" header (sent by the server) when that one exists *and* is a valid number. While we currently fallback to the keep the initial `contentLength` otherwise, note however how in that case range requests will always be *disabled* and thus the only spot in the code-base [where `fullReader.contentLength` is necessary](873378b718/src/core/worker.js (L230-L236)) cannot actually be reached.

Hence the only possible reason to use the `length` API-parameter would be for improved progress reporting[1] during streaming of PDF data in rare cases where the "Content-Length" header is missing/invalid, but the user *somehow* has information from another source about the correct `length` of the PDF document.
That situation feels very much like an edge-case, but it's obviously impossible to know if someone is depending on it. However, please note that there's a work-around available for users affected by this removal:
 - Implement a `PDFDataRangeTransport` instance together with custom data-fetching[2], since in that case its `length`-parameter will always be used as-is.

Finally, updates various `BasePDFStreamReader` implementations to only set the `_isRangeSupported` field once the headers are available (since previously we'd just overwrite the "initial" value anyway).

---

[1] I.e. to avoid the "indeterminate" loadingBar being displayed in the viewer.

[2] This is what e.g. the Firefox PDF Viewer uses.
2026-03-13 23:42:45 +01:00

157 lines
4.5 KiB
JavaScript

/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* globals process */
import { AbortException, assert } from "../shared/util.js";
import {
BasePDFStream,
BasePDFStreamRangeReader,
BasePDFStreamReader,
} from "../shared/base_pdf_stream.js";
import { createResponseError } from "./network_utils.js";
import { getArrayBuffer } from "./fetch_stream.js";
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
throw new Error(
'Module "./node_stream.js" shall not be used with MOZCENTRAL builds.'
);
}
function getReadableStream(readStream) {
const { Readable } = process.getBuiltinModule("stream");
if (typeof Readable.toWeb === "function") {
// See https://nodejs.org/api/stream.html#streamreadabletowebstreamreadable-options
return Readable.toWeb(readStream);
}
// Fallback to support Node.js versions older than `24.0.0` and `22.17.0`.
const require = process
.getBuiltinModule("module")
.createRequire(import.meta.url);
const polyfill = require("node-readable-to-web-readable-stream");
return polyfill.makeDefaultReadableStreamFromNodeReadable(readStream);
}
class PDFNodeStream extends BasePDFStream {
constructor(source) {
super(source, PDFNodeStreamReader, PDFNodeStreamRangeReader);
const { url } = source;
assert(
url.protocol === "file:",
"PDFNodeStream only supports file:// URLs."
);
}
}
class PDFNodeStreamReader extends BasePDFStreamReader {
_reader = null;
constructor(stream) {
super(stream);
const { disableRange, disableStream, rangeChunkSize, url } = stream._source;
this._isStreamingSupported = !disableStream;
const fs = process.getBuiltinModule("fs");
fs.promises
.lstat(url)
.then(stat => {
const readStream = fs.createReadStream(url);
const readableStream = getReadableStream(readStream);
this._reader = readableStream.getReader();
const { size } = stat;
this._contentLength = size;
// When the file size is smaller than the size of two chunks, it doesn't
// make any sense to abort the request and retry with a range request.
this._isRangeSupported = !disableRange && size > 2 * rangeChunkSize;
// We need to stop reading when range is supported and streaming is
// disabled.
if (!this._isStreamingSupported && this._isRangeSupported) {
this.cancel(new AbortException("Streaming is disabled."));
}
this._headersCapability.resolve();
})
.catch(error => {
if (error.code === "ENOENT") {
error = createResponseError(/* status = */ 0, url);
}
this._headersCapability.reject(error);
});
}
async read() {
await this._headersCapability.promise;
const { value, done } = await this._reader.read();
if (done) {
return { value, done };
}
this._loaded += value.byteLength;
this._callOnProgress();
return { value: getArrayBuffer(value), done: false };
}
cancel(reason) {
this._reader?.cancel(reason);
}
}
class PDFNodeStreamRangeReader extends BasePDFStreamRangeReader {
_readCapability = Promise.withResolvers();
_reader = null;
constructor(stream, begin, end) {
super(stream, begin, end);
const { url } = stream._source;
const fs = process.getBuiltinModule("fs");
try {
const readStream = fs.createReadStream(url, {
start: begin,
end: end - 1,
});
const readableStream = getReadableStream(readStream);
this._reader = readableStream.getReader();
this._readCapability.resolve();
} catch (error) {
this._readCapability.reject(error);
}
}
async read() {
await this._readCapability.promise;
const { value, done } = await this._reader.read();
if (done) {
return { value, done };
}
return { value: getArrayBuffer(value), done: false };
}
cancel(reason) {
this._reader?.cancel(reason);
}
}
export { PDFNodeStream };