pdf.js.mirror/src/core/worker.js
Jonas Jenwald eef53347fe Ensure that the correct data is sent, with the test message, from the worker if typed arrays aren't properly supported
With native typed array support now being mandatory in PDF.js, since version 2.0, this probably isn't a huge problem even though the current code seems wrong (it was changed in PR 6571).

Note how in the `!(data instanceof Uint8Array)` case we're currently attempting to send `handler.send('test', 'main', false);` to the main-thread, which doesn't really make any sense since the signature of the method reads `send(actionName, data, transfers) {`.
Hence the data that's *actually* being sent here is `'main'`, with `false` as the transferList, which just seems weird. On the main-thread, this means that we're in this case checking `data && data.supportTypedArray`, where `data` contains the string `'main'` rather than being falsy. Since a string doesn't have a `supportTypedArray` property, that check still fails as expected but it doesn't seem great nonetheless.
2018-06-06 08:52:42 +02:00

885 lines
26 KiB
JavaScript

/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {
arrayByteLength, arraysToBytes, assert, createPromiseCapability, info,
InvalidPDFException, MissingPDFException, PasswordException,
setVerbosityLevel, UnexpectedResponseException, UnknownErrorException,
UNSUPPORTED_FEATURES, warn, XRefParseException
} from '../shared/util';
import { LocalPdfManager, NetworkPdfManager } from './pdf_manager';
import isNodeJS from '../shared/is_node';
import { MessageHandler } from '../shared/message_handler';
import { Ref } from './primitives';
var WorkerTask = (function WorkerTaskClosure() {
function WorkerTask(name) {
this.name = name;
this.terminated = false;
this._capability = createPromiseCapability();
}
WorkerTask.prototype = {
get finished() {
return this._capability.promise;
},
finish() {
this._capability.resolve();
},
terminate() {
this.terminated = true;
},
ensureNotTerminated() {
if (this.terminated) {
throw new Error('Worker task was terminated');
}
},
};
return WorkerTask;
})();
if (typeof PDFJSDev === 'undefined' || !PDFJSDev.test('PRODUCTION')) {
/**
* Interface that represents PDF data transport. If possible, it allows
* progressively load entire or fragment of the PDF binary data.
*
* @interface
*/
function IPDFStream() {} // eslint-disable-line no-inner-declarations
IPDFStream.prototype = {
/**
* Gets a reader for the entire PDF data.
* @returns {IPDFStreamReader}
*/
getFullReader() {
return null;
},
/**
* Gets a reader for the range of the PDF data.
* @param {number} begin - the start offset of the data.
* @param {number} end - the end offset of the data.
* @returns {IPDFStreamRangeReader}
*/
getRangeReader(begin, end) {
return null;
},
/**
* Cancels all opened reader and closes all their opened requests.
* @param {Object} reason - the reason for cancelling
*/
cancelAllRequests(reason) {},
};
/**
* Interface for a PDF binary data reader.
*
* @interface
*/
function IPDFStreamReader() {} // eslint-disable-line no-inner-declarations
IPDFStreamReader.prototype = {
/**
* Gets a promise that is resolved when the headers and other metadata of
* the PDF data stream are available.
* @returns {Promise}
*/
get headersReady() {
return null;
},
/**
* Gets the Content-Disposition filename. It is defined after the headersReady
* promise is resolved.
* @returns {string|null} The filename, or `null` if the Content-Disposition
* header is missing/invalid.
*/
get filename() {
return null;
},
/**
* Gets PDF binary data length. It is defined after the headersReady promise
* is resolved.
* @returns {number} The data length (or 0 if unknown).
*/
get contentLength() {
return 0;
},
/**
* Gets ability of the stream to handle range requests. It is defined after
* the headersReady promise is resolved. Rejected when the reader is cancelled
* or an error occurs.
* @returns {boolean}
*/
get isRangeSupported() {
return false;
},
/**
* Gets ability of the stream to progressively load binary data. It is defined
* after the headersReady promise is resolved.
* @returns {boolean}
*/
get isStreamingSupported() {
return false;
},
/**
* Requests a chunk of the binary data. The method returns the promise, which
* is resolved into object with properties "value" and "done". If the done
* is set to true, then the stream has reached its end, otherwise the value
* contains binary data. Cancelled requests will be resolved with the done is
* set to true.
* @returns {Promise}
*/
read() {},
/**
* Cancels all pending read requests and closes the stream.
* @param {Object} reason
*/
cancel(reason) {},
/**
* Sets or gets the progress callback. The callback can be useful when the
* isStreamingSupported property of the object is defined as false.
* The callback is called with one parameter: an object with the loaded and
* total properties.
*/
onProgress: null,
};
/**
* Interface for a PDF binary data fragment reader.
*
* @interface
*/
function IPDFStreamRangeReader() {} // eslint-disable-line no-inner-declarations
IPDFStreamRangeReader.prototype = {
/**
* Gets ability of the stream to progressively load binary data.
* @returns {boolean}
*/
get isStreamingSupported() {
return false;
},
/**
* Requests a chunk of the binary data. The method returns the promise, which
* is resolved into object with properties "value" and "done". If the done
* is set to true, then the stream has reached its end, otherwise the value
* contains binary data. Cancelled requests will be resolved with the done is
* set to true.
* @returns {Promise}
*/
read() {},
/**
* Cancels all pending read requests and closes the stream.
* @param {Object} reason
*/
cancel(reason) {},
/**
* Sets or gets the progress callback. The callback can be useful when the
* isStreamingSupported property of the object is defined as false.
* The callback is called with one parameter: an object with the loaded
* property.
*/
onProgress: null,
};
}
/** @implements {IPDFStream} */
var PDFWorkerStream = (function PDFWorkerStreamClosure() {
function PDFWorkerStream(msgHandler) {
this._msgHandler = msgHandler;
this._contentLength = null;
this._fullRequestReader = null;
this._rangeRequestReaders = [];
}
PDFWorkerStream.prototype = {
getFullReader() {
assert(!this._fullRequestReader);
this._fullRequestReader = new PDFWorkerStreamReader(this._msgHandler);
return this._fullRequestReader;
},
getRangeReader(begin, end) {
let reader = new PDFWorkerStreamRangeReader(begin, end, this._msgHandler);
this._rangeRequestReaders.push(reader);
return reader;
},
cancelAllRequests(reason) {
if (this._fullRequestReader) {
this._fullRequestReader.cancel(reason);
}
let readers = this._rangeRequestReaders.slice(0);
readers.forEach(function (reader) {
reader.cancel(reason);
});
},
};
/** @implements {IPDFStreamReader} */
function PDFWorkerStreamReader(msgHandler) {
this._msgHandler = msgHandler;
this._contentLength = null;
this._isRangeSupported = false;
this._isStreamingSupported = false;
let readableStream = this._msgHandler.sendWithStream('GetReader');
this._reader = readableStream.getReader();
this._headersReady = this._msgHandler.sendWithPromise('ReaderHeadersReady').
then((data) => {
this._isStreamingSupported = data.isStreamingSupported;
this._isRangeSupported = data.isRangeSupported;
this._contentLength = data.contentLength;
});
}
PDFWorkerStreamReader.prototype = {
get headersReady() {
return this._headersReady;
},
get contentLength() {
return this._contentLength;
},
get isStreamingSupported() {
return this._isStreamingSupported;
},
get isRangeSupported() {
return this._isRangeSupported;
},
read() {
return this._reader.read().then(function({ value, done, }) {
if (done) {
return { value: undefined, done: true, };
}
// `value` is wrapped into Uint8Array, we need to
// unwrap it to ArrayBuffer for further processing.
return { value: value.buffer, done: false, };
});
},
cancel(reason) {
this._reader.cancel(reason);
},
};
/** @implements {IPDFStreamRangeReader} */
function PDFWorkerStreamRangeReader(begin, end, msgHandler) {
this._msgHandler = msgHandler;
this.onProgress = null;
let readableStream = this._msgHandler.sendWithStream('GetRangeReader',
{ begin, end, });
this._reader = readableStream.getReader();
}
PDFWorkerStreamRangeReader.prototype = {
get isStreamingSupported() {
return false;
},
read() {
return this._reader.read().then(function({ value, done, }) {
if (done) {
return { value: undefined, done: true, };
}
return { value: value.buffer, done: false, };
});
},
cancel(reason) {
this._reader.cancel(reason);
},
};
return PDFWorkerStream;
})();
var WorkerMessageHandler = {
setup(handler, port) {
var testMessageProcessed = false;
handler.on('test', function wphSetupTest(data) {
if (testMessageProcessed) {
return; // we already processed 'test' message once
}
testMessageProcessed = true;
// check if Uint8Array can be sent to worker
if (!(data instanceof Uint8Array)) {
handler.send('test', false);
return;
}
// making sure postMessage transfers are working
var supportTransfers = data[0] === 255;
handler.postMessageTransfers = supportTransfers;
// check if the response property is supported by xhr
var xhr = new XMLHttpRequest();
var responseExists = 'response' in xhr;
// check if the property is actually implemented
try {
xhr.responseType; // eslint-disable-line no-unused-expressions
} catch (e) {
responseExists = false;
}
if (!responseExists) {
handler.send('test', false);
return;
}
handler.send('test', {
supportTypedArray: true,
supportTransfers,
});
});
handler.on('configure', function wphConfigure(data) {
setVerbosityLevel(data.verbosity);
});
handler.on('GetDocRequest', function wphSetupDoc(data) {
return WorkerMessageHandler.createDocumentHandler(data, port);
});
},
createDocumentHandler(docParams, port) {
// This context is actually holds references on pdfManager and handler,
// until the latter is destroyed.
var pdfManager;
var terminated = false;
var cancelXHRs = null;
var WorkerTasks = [];
let apiVersion = docParams.apiVersion;
let workerVersion =
typeof PDFJSDev !== 'undefined' ? PDFJSDev.eval('BUNDLE_VERSION') : null;
// The `apiVersion !== null` check is needed to avoid errors during testing.
if (apiVersion !== null && apiVersion !== workerVersion) {
throw new Error(`The API version "${apiVersion}" does not match ` +
`the Worker version "${workerVersion}".`);
}
var docId = docParams.docId;
var docBaseUrl = docParams.docBaseUrl;
var workerHandlerName = docParams.docId + '_worker';
var handler = new MessageHandler(workerHandlerName, docId, port);
// Ensure that postMessage transfers are correctly enabled/disabled,
// to prevent "DataCloneError" in older versions of IE (see issue 6957).
handler.postMessageTransfers = docParams.postMessageTransfers;
function ensureNotTerminated() {
if (terminated) {
throw new Error('Worker was terminated');
}
}
function startWorkerTask(task) {
WorkerTasks.push(task);
}
function finishWorkerTask(task) {
task.finish();
var i = WorkerTasks.indexOf(task);
WorkerTasks.splice(i, 1);
}
function loadDocument(recoveryMode) {
var loadDocumentCapability = createPromiseCapability();
var parseSuccess = function parseSuccess() {
Promise.all([
pdfManager.ensureDoc('numPages'),
pdfManager.ensureDoc('fingerprint'),
]).then(function([numPages, fingerprint]) {
loadDocumentCapability.resolve({
numPages,
fingerprint,
});
}, parseFailure);
};
var parseFailure = function parseFailure(e) {
loadDocumentCapability.reject(e);
};
pdfManager.ensureDoc('checkHeader', []).then(function() {
pdfManager.ensureDoc('parseStartXRef', []).then(function() {
pdfManager.ensureDoc('parse', [recoveryMode]).then(
parseSuccess, parseFailure);
}, parseFailure);
}, parseFailure);
return loadDocumentCapability.promise;
}
function getPdfManager(data, evaluatorOptions) {
var pdfManagerCapability = createPromiseCapability();
var pdfManager;
var source = data.source;
if (source.data) {
try {
pdfManager = new LocalPdfManager(docId, source.data, source.password,
evaluatorOptions, docBaseUrl);
pdfManagerCapability.resolve(pdfManager);
} catch (ex) {
pdfManagerCapability.reject(ex);
}
return pdfManagerCapability.promise;
}
var pdfStream, cachedChunks = [];
try {
pdfStream = new PDFWorkerStream(handler);
} catch (ex) {
pdfManagerCapability.reject(ex);
return pdfManagerCapability.promise;
}
var fullRequest = pdfStream.getFullReader();
fullRequest.headersReady.then(function () {
if (!fullRequest.isRangeSupported) {
return;
}
// We don't need auto-fetch when streaming is enabled.
var disableAutoFetch = source.disableAutoFetch ||
fullRequest.isStreamingSupported;
pdfManager = new NetworkPdfManager(docId, pdfStream, {
msgHandler: handler,
url: source.url,
password: source.password,
length: fullRequest.contentLength,
disableAutoFetch,
rangeChunkSize: source.rangeChunkSize,
}, evaluatorOptions, docBaseUrl);
// There may be a chance that `pdfManager` is not initialized
// for first few runs of `readchunk` block of code. Be sure
// to send all cached chunks, if any, to chunked_stream via
// pdf_manager.
for (let i = 0; i < cachedChunks.length; i++) {
pdfManager.sendProgressiveData(cachedChunks[i]);
}
cachedChunks = [];
pdfManagerCapability.resolve(pdfManager);
cancelXHRs = null;
}).catch(function (reason) {
pdfManagerCapability.reject(reason);
cancelXHRs = null;
});
var loaded = 0;
var flushChunks = function () {
var pdfFile = arraysToBytes(cachedChunks);
if (source.length && pdfFile.length !== source.length) {
warn('reported HTTP length is different from actual');
}
// the data is array, instantiating directly from it
try {
pdfManager = new LocalPdfManager(docId, pdfFile, source.password,
evaluatorOptions, docBaseUrl);
pdfManagerCapability.resolve(pdfManager);
} catch (ex) {
pdfManagerCapability.reject(ex);
}
cachedChunks = [];
};
var readPromise = new Promise(function (resolve, reject) {
var readChunk = function (chunk) {
try {
ensureNotTerminated();
if (chunk.done) {
if (!pdfManager) {
flushChunks();
}
cancelXHRs = null;
return;
}
var data = chunk.value;
loaded += arrayByteLength(data);
if (!fullRequest.isStreamingSupported) {
handler.send('DocProgress', {
loaded,
total: Math.max(loaded, fullRequest.contentLength || 0),
});
}
if (pdfManager) {
pdfManager.sendProgressiveData(data);
} else {
cachedChunks.push(data);
}
fullRequest.read().then(readChunk, reject);
} catch (e) {
reject(e);
}
};
fullRequest.read().then(readChunk, reject);
});
readPromise.catch(function (e) {
pdfManagerCapability.reject(e);
cancelXHRs = null;
});
cancelXHRs = function () {
pdfStream.cancelAllRequests('abort');
};
return pdfManagerCapability.promise;
}
function setupDoc(data) {
function onSuccess(doc) {
ensureNotTerminated();
handler.send('GetDoc', { pdfInfo: doc, });
}
function onFailure(e) {
ensureNotTerminated();
if (e instanceof PasswordException) {
var task = new WorkerTask('PasswordException: response ' + e.code);
startWorkerTask(task);
handler.sendWithPromise('PasswordRequest', e).then(function (data) {
finishWorkerTask(task);
pdfManager.updatePassword(data.password);
pdfManagerReady();
}).catch(function (boundException) {
finishWorkerTask(task);
handler.send('PasswordException', boundException);
}.bind(null, e));
} else if (e instanceof InvalidPDFException) {
handler.send('InvalidPDF', e);
} else if (e instanceof MissingPDFException) {
handler.send('MissingPDF', e);
} else if (e instanceof UnexpectedResponseException) {
handler.send('UnexpectedResponse', e);
} else {
handler.send('UnknownError',
new UnknownErrorException(e.message, e.toString()));
}
}
function pdfManagerReady() {
ensureNotTerminated();
loadDocument(false).then(onSuccess, function loadFailure(ex) {
ensureNotTerminated();
// Try again with recoveryMode == true
if (!(ex instanceof XRefParseException)) {
onFailure(ex);
return;
}
pdfManager.requestLoadedStream();
pdfManager.onLoadedStream().then(function() {
ensureNotTerminated();
loadDocument(true).then(onSuccess, onFailure);
});
}, onFailure);
}
ensureNotTerminated();
var evaluatorOptions = {
forceDataSchema: data.disableCreateObjectURL,
maxImageSize: data.maxImageSize,
disableFontFace: data.disableFontFace,
nativeImageDecoderSupport: data.nativeImageDecoderSupport,
ignoreErrors: data.ignoreErrors,
isEvalSupported: data.isEvalSupported,
};
getPdfManager(data, evaluatorOptions).then(function (newPdfManager) {
if (terminated) {
// We were in a process of setting up the manager, but it got
// terminated in the middle.
newPdfManager.terminate();
throw new Error('Worker was terminated');
}
pdfManager = newPdfManager;
handler.send('PDFManagerReady', null);
pdfManager.onLoadedStream().then(function(stream) {
handler.send('DataLoaded', { length: stream.bytes.byteLength, });
});
}).then(pdfManagerReady, onFailure);
}
handler.on('GetPage', function wphSetupGetPage(data) {
return pdfManager.getPage(data.pageIndex).then(function(page) {
var rotatePromise = pdfManager.ensure(page, 'rotate');
var refPromise = pdfManager.ensure(page, 'ref');
var userUnitPromise = pdfManager.ensure(page, 'userUnit');
var viewPromise = pdfManager.ensure(page, 'view');
return Promise.all([
rotatePromise, refPromise, userUnitPromise, viewPromise
]).then(function(results) {
return {
rotate: results[0],
ref: results[1],
userUnit: results[2],
view: results[3],
};
});
});
});
handler.on('GetPageIndex', function wphSetupGetPageIndex(data) {
var ref = new Ref(data.ref.num, data.ref.gen);
var catalog = pdfManager.pdfDocument.catalog;
return catalog.getPageIndex(ref);
});
handler.on('GetDestinations',
function wphSetupGetDestinations(data) {
return pdfManager.ensureCatalog('destinations');
}
);
handler.on('GetDestination',
function wphSetupGetDestination(data) {
return pdfManager.ensureCatalog('getDestination', [data.id]);
}
);
handler.on('GetPageLabels',
function wphSetupGetPageLabels(data) {
return pdfManager.ensureCatalog('pageLabels');
}
);
handler.on('GetPageMode', function wphSetupGetPageMode(data) {
return pdfManager.ensureCatalog('pageMode');
});
handler.on('GetAttachments',
function wphSetupGetAttachments(data) {
return pdfManager.ensureCatalog('attachments');
}
);
handler.on('GetJavaScript',
function wphSetupGetJavaScript(data) {
return pdfManager.ensureCatalog('javaScript');
}
);
handler.on('GetOutline',
function wphSetupGetOutline(data) {
return pdfManager.ensureCatalog('documentOutline');
}
);
handler.on('GetMetadata',
function wphSetupGetMetadata(data) {
return Promise.all([pdfManager.ensureDoc('documentInfo'),
pdfManager.ensureCatalog('metadata')]);
}
);
handler.on('GetData', function wphSetupGetData(data) {
pdfManager.requestLoadedStream();
return pdfManager.onLoadedStream().then(function(stream) {
return stream.bytes;
});
});
handler.on('GetStats',
function wphSetupGetStats(data) {
return pdfManager.pdfDocument.xref.stats;
}
);
handler.on('GetAnnotations', function wphSetupGetAnnotations(data) {
return pdfManager.getPage(data.pageIndex).then(function(page) {
return pdfManager.ensure(page, 'getAnnotationsData', [data.intent]);
});
});
handler.on('RenderPageRequest', function wphSetupRenderPage(data) {
var pageIndex = data.pageIndex;
pdfManager.getPage(pageIndex).then(function(page) {
var task = new WorkerTask('RenderPageRequest: page ' + pageIndex);
startWorkerTask(task);
var pageNum = pageIndex + 1;
var start = Date.now();
// Pre compile the pdf page and fetch the fonts/images.
page.getOperatorList({
handler,
task,
intent: data.intent,
renderInteractiveForms: data.renderInteractiveForms,
}).then(function(operatorList) {
finishWorkerTask(task);
info('page=' + pageNum + ' - getOperatorList: time=' +
(Date.now() - start) + 'ms, len=' + operatorList.totalLength);
}, function(e) {
finishWorkerTask(task);
if (task.terminated) {
return; // ignoring errors from the terminated thread
}
// For compatibility with older behavior, generating unknown
// unsupported feature notification on errors.
handler.send('UnsupportedFeature',
{ featureId: UNSUPPORTED_FEATURES.unknown, });
var minimumStackMessage =
'worker.js: while trying to getPage() and getOperatorList()';
var wrappedException;
// Turn the error into an obj that can be serialized
if (typeof e === 'string') {
wrappedException = {
message: e,
stack: minimumStackMessage,
};
} else if (typeof e === 'object') {
wrappedException = {
message: e.message || e.toString(),
stack: e.stack || minimumStackMessage,
};
} else {
wrappedException = {
message: 'Unknown exception type: ' + (typeof e),
stack: minimumStackMessage,
};
}
handler.send('PageError', {
pageNum,
error: wrappedException,
intent: data.intent,
});
});
});
}, this);
handler.on('GetTextContent', function wphExtractText(data, sink) {
var pageIndex = data.pageIndex;
sink.onPull = function (desiredSize) { };
sink.onCancel = function (reason) { };
pdfManager.getPage(pageIndex).then(function(page) {
var task = new WorkerTask('GetTextContent: page ' + pageIndex);
startWorkerTask(task);
var pageNum = pageIndex + 1;
var start = Date.now();
page.extractTextContent({
handler,
task,
sink,
normalizeWhitespace: data.normalizeWhitespace,
combineTextItems: data.combineTextItems,
}).then(function() {
finishWorkerTask(task);
info('text indexing: page=' + pageNum + ' - time=' +
(Date.now() - start) + 'ms');
sink.close();
}, function (reason) {
finishWorkerTask(task);
if (task.terminated) {
return; // ignoring errors from the terminated thread
}
sink.error(reason);
throw reason;
});
});
});
handler.on('Cleanup', function wphCleanup(data) {
return pdfManager.cleanup();
});
handler.on('Terminate', function wphTerminate(data) {
terminated = true;
if (pdfManager) {
pdfManager.terminate();
pdfManager = null;
}
if (cancelXHRs) {
cancelXHRs();
}
var waitOn = [];
WorkerTasks.forEach(function (task) {
waitOn.push(task.finished);
task.terminate();
});
return Promise.all(waitOn).then(function () {
// Notice that even if we destroying handler, resolved response promise
// must be sent back.
handler.destroy();
handler = null;
});
});
handler.on('Ready', function wphReady(data) {
setupDoc(docParams);
docParams = null; // we don't need docParams anymore -- saving memory.
});
return workerHandlerName;
},
initializeFromPort(port) {
var handler = new MessageHandler('worker', 'main', port);
WorkerMessageHandler.setup(handler, port);
handler.send('ready', null);
},
};
function isMessagePort(maybePort) {
return typeof maybePort.postMessage === 'function' &&
('onmessage' in maybePort);
}
// Worker thread (and not node.js)?
if (typeof window === 'undefined' && !isNodeJS() &&
typeof self !== 'undefined' && isMessagePort(self)) {
WorkerMessageHandler.initializeFromPort(self);
}
export {
WorkerTask,
WorkerMessageHandler,
};