From 3ccc3ec65cd2e0c6019b5871d3b33bcca152130a Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Wed, 24 Jun 2026 21:41:44 +0200 Subject: [PATCH] Add support for Sound annotations playing embedded audio Wrap uncompressed PCM sound streams (Raw/Signed, 8/16-bit, mono/stereo) in WAV and play them through the shared media overlay. --- src/core/annotation.js | 59 +++++++- src/core/catalog.js | 45 +++--- src/core/sound.js | 144 ++++++++++++++++++++ src/display/annotation_layer.js | 6 +- test/integration/annotation_spec.mjs | 36 +++++ test/unit/annotation_spec.js | 136 +++++++++++++++++++ test/unit/clitests.json | 1 + test/unit/jasmine-boot.js | 1 + test/unit/sound_spec.js | 196 +++++++++++++++++++++++++++ test/unit/test_utils.js | 36 ++++- 10 files changed, 636 insertions(+), 24 deletions(-) create mode 100644 src/core/sound.js create mode 100644 test/unit/sound_spec.js diff --git a/src/core/annotation.js b/src/core/annotation.js index 79b65eafa..ecee49c3d 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -74,6 +74,7 @@ import { Catalog } from "./catalog.js"; import { ColorSpaceUtils } from "./colorspace_utils.js"; import { createImage } from "./editor/pdf_images.js"; import { FileSpec } from "./file_spec.js"; +import { getSoundFormat } from "./sound.js"; import { JpegStream } from "./jpeg_stream.js"; import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js"; @@ -291,6 +292,9 @@ class AnnotationFactory { case "Screen": return new ScreenAnnotation(parameters); + case "Sound": + return new SoundAnnotation(parameters); + default: if (!collectFields) { if (!subtype) { @@ -1509,7 +1513,7 @@ class Annotation { * usually indirect; when it's inline its embedded-file stream still isn't * (streams are always indirect), so fall back to that ref. */ - _getAttachmentId(fsDict, fsRef, annotationGlobals) { + _getAttachmentId(fsDict, fsRef, annotationGlobals, isSound = false) { if (!(fsDict instanceof Dict)) { return undefined; } @@ -1517,7 +1521,7 @@ class Annotation { fsRef = FileSpec.pickPlatformItem(fsDict.get("EF"), /* raw = */ true); } return fsRef instanceof Ref - ? annotationGlobals.catalog.getAttachmentIdForAnnotation(fsRef) + ? annotationGlobals.catalog.getAttachmentIdForAnnotation(fsRef, isSound) : undefined; } @@ -5488,15 +5492,23 @@ class MediaAnnotation extends Annotation { * when `assetRef` isn't itself a reference. * @param {string} asset.filename * @param {string} asset.contentType + * @param {boolean} [asset.wrapSound] + * When set, the embedded bytes are raw PDF sound samples that the catalog + * wraps in a WAV container when fetched (see `soundStreamToWav`). * @param {Object} annotationGlobals */ _setMediaData( - { assetRef, assetDict, filename, contentType }, + { assetRef, assetDict, filename, contentType, wrapSound = false }, annotationGlobals ) { this.data.noHTML = false; this.data.richMedia = { - fileId: this._getAttachmentId(assetDict, assetRef, annotationGlobals), + fileId: this._getAttachmentId( + assetDict, + assetRef, + annotationGlobals, + wrapSound + ), filename, contentType, }; @@ -5825,6 +5837,45 @@ class ScreenAnnotation extends MediaAnnotation { } } +class SoundAnnotation extends MediaAnnotation { + constructor(params) { + super(params); + + const { dict, xref, annotationGlobals } = params; + const soundRef = dict.getRaw("Sound"); + if (!(soundRef instanceof Ref)) { + return; + } + let sound; + try { + sound = xref.fetch(soundRef); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + // A corrupt sound stream: fall back to rendering the appearance. + warn(`SoundAnnotation: "${ex}".`); + return; + } + if (!(sound instanceof BaseStream) || !getSoundFormat(sound.dict)) { + // No embedded samples, or an encoding we can't turn into a playable WAV + // (compressed, or an unusual bit depth); just render the appearance. + return; + } + + this._setMediaData( + { + assetRef: soundRef, + assetDict: sound.dict, + filename: "sound.wav", + contentType: "audio/wav", + wrapSound: true, + }, + annotationGlobals + ); + } +} + export { Annotation, AnnotationBorderStyle, diff --git a/src/core/catalog.js b/src/core/catalog.js index b12abee83..e637b744c 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -52,6 +52,7 @@ import { clearGlobalCaches } from "./cleanup_helper.js"; import { ColorSpaceUtils } from "./colorspace_utils.js"; import { FileSpec } from "./file_spec.js"; import { MetadataParser } from "./metadata_parser.js"; +import { soundStreamToWav } from "./sound.js"; import { stringToPDFString } from "./string_utils.js"; import { StructTreeRoot } from "./struct_tree.js"; @@ -123,6 +124,8 @@ class Catalog { #annotationAttachmentRefById = new Map(); + #soundAttachmentIds = new Set(); + #catDict = null; builtInCMapCache = new Map(); @@ -171,28 +174,32 @@ class Catalog { * * @param {Ref} ref * File-spec or embedded-file stream reference. + * @param {boolean} [isSound] + * When set, the referenced stream holds raw PDF sound samples that + * `attachmentContent` wraps in a WAV container on fetch. * @returns {string} * Attachment id. */ - getAttachmentIdForAnnotation(ref) { + getAttachmentIdForAnnotation(ref, isSound = false) { let id = this.#annotationAttachmentIdByRef.get(ref); - if (id) { - return id; + if (!id) { + const baseId = `attachmentRef:${ref.toString()}`; + id = baseId; + + let i = 1; + while ( + this.#annotationAttachmentRefById.has(id) || + this.attachments?.has(id) + ) { + id = `${baseId}-${i++}`; + } + + this.#annotationAttachmentIdByRef.put(ref, id); + this.#annotationAttachmentRefById.set(id, ref); } - - const baseId = `attachmentRef:${ref.toString()}`; - id = baseId; - - let i = 1; - while ( - this.#annotationAttachmentRefById.has(id) || - this.attachments?.has(id) - ) { - id = `${baseId}-${i++}`; + if (isSound) { + this.#soundAttachmentIds.add(id); } - - this.#annotationAttachmentIdByRef.put(ref, id); - this.#annotationAttachmentRefById.set(id, ref); return id; } @@ -1198,7 +1205,11 @@ class Catalog { if (ref) { const target = this.xref.fetch(ref); if (target instanceof BaseStream) { - return FileSpec.readStreamContent(target); + const content = FileSpec.readStreamContent(target); + if (this.#soundAttachmentIds.has(id)) { + return soundStreamToWav(target, content) ?? content; + } + return content; } return target instanceof Dict ? FileSpec.readContent(target) : null; } diff --git a/src/core/sound.js b/src/core/sound.js new file mode 100644 index 000000000..abe685244 --- /dev/null +++ b/src/core/sound.js @@ -0,0 +1,144 @@ +/* Copyright 2026 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Name } from "./primitives.js"; +import { stringToBytes } from "../shared/util.js"; + +// Size, in bytes, of the canonical 44-byte WAV header (RIFF + fmt + data +// chunk headers) that precedes the sample data. +const WAV_HEADER_SIZE = 44; + +/** + * Helpers for PDF sound objects (ISO 32000-1, 12.5.6.16). + * + * Sound streams contain samples described by /R, /C, /B, /E, and optional /CO. + * We wrap supported uncompressed PCM (Raw/Signed, 8/16-bit, mono/stereo) in WAV + * for playback. + * + * @import { BaseStream } from "./base_stream.js"; + * @import { Dict } from "./primitives.js"; + */ + +/** + * Return a supported uncompressed sample format. + * + * @param {Dict} [dict] The sound object's stream dictionary. + * @returns {{ + * channels: number, + * sampleRate: number, + * bitsPerSample: number, + * encoding: string, + * } | null} + */ +function getSoundFormat(dict) { + // `/CO` compression is beyond PDF stream filters and isn't decoded here. + if (!dict || dict.has("CO")) { + return null; + } + const sampleRate = dict.get("R"); + if (!Number.isInteger(sampleRate) || sampleRate <= 0) { + return null; + } + const channels = dict.get("C") ?? 1; + if (!Number.isInteger(channels) || channels < 1 || channels > 2) { + return null; + } + const bitsPerSample = dict.get("B") ?? 8; + if (bitsPerSample !== 8 && bitsPerSample !== 16) { + return null; + } + // `/E` is optional and defaults to Raw; a present-but-malformed value (one + // that isn't a name) is rejected rather than silently treated as Raw. + const e = dict.get("E"); + let encoding = "Raw"; + if (e !== undefined) { + encoding = e instanceof Name ? e.name : null; + } + if (encoding !== "Raw" && encoding !== "Signed") { + return null; + } + return { channels, sampleRate, bitsPerSample, encoding }; +} + +/** + * Build a WAV file from supported PDF sound samples. + * + * PDF 16-bit samples are big-endian; WAV uses little-endian, unsigned 8-bit + * samples, and signed 16-bit samples. The data chunk is trimmed to a whole + * number of frames (a multiple of the block alignment); a stream with no + * complete frame produces no WAV. + * + * @param {BaseStream} stream The sound object stream. + * @param {Uint8Array} samples Raw sample bytes from the stream. + * @returns {Uint8Array | null} + */ +function soundStreamToWav(stream, samples) { + const format = getSoundFormat(stream.dict); + if (!format) { + return null; + } + const { channels, sampleRate, bitsPerSample, encoding } = format; + const blockAlign = channels * (bitsPerSample >> 3); + // Keep only whole frames, dropping a trailing partial frame; bail out when + // there isn't a single complete frame to play. + const dataLength = samples.length - (samples.length % blockAlign); + if (dataLength === 0) { + return null; + } + + const wav = new Uint8Array(WAV_HEADER_SIZE + dataLength); + const view = new DataView(wav.buffer); + wav.set(stringToBytes("RIFF"), 0); + // File size minus the first 8 bytes (the "RIFF" tag and this field). + view.setUint32(4, WAV_HEADER_SIZE - 8 + dataLength, true); + wav.set(stringToBytes("WAVE"), 8); + wav.set(stringToBytes("fmt "), 12); + view.setUint32(16, 16, true); // PCM fmt-chunk size. + view.setUint16(20, 1 /* = WAVE_FORMAT_PCM */, true); + view.setUint16(22, channels, true); + view.setUint32(24, sampleRate, true); + view.setUint32(28, sampleRate * blockAlign, true); // Byte rate. + view.setUint16(32, blockAlign, true); + view.setUint16(34, bitsPerSample, true); + wav.set(stringToBytes("data"), 36); + view.setUint32(40, dataLength, true); + + // Transcode the samples straight into the WAV data region (right after the + // header) so we never allocate or copy a separate sample buffer. + if (bitsPerSample === 16) { + const signed = encoding === "Signed"; + for (let i = 0; i < dataLength; i += 2) { + let value = (samples[i] << 8) | samples[i + 1]; + if (signed) { + if (value >= 0x8000) { + value -= 0x10000; + } + } else { + value -= 0x8000; + } + view.setInt16(WAV_HEADER_SIZE + i, value, /* littleEndian = */ true); + } + } else if (encoding === "Signed") { + for (let i = 0; i < dataLength; i++) { + wav[WAV_HEADER_SIZE + i] = (samples[i] + 128) & 0xff; + } + } else { + wav.set(samples.subarray(0, dataLength), WAV_HEADER_SIZE); + } + + return wav; +} + +export { getSoundFormat, soundStreamToWav }; diff --git a/src/display/annotation_layer.js b/src/display/annotation_layer.js index a7fdadef0..a44c9cfc7 100644 --- a/src/display/annotation_layer.js +++ b/src/display/annotation_layer.js @@ -158,10 +158,12 @@ class AnnotationElementFactory { case AnnotationType.FILEATTACHMENT: return new FileAttachmentAnnotationElement(parameters); - // A Screen annotation with a rendition action plays embedded media the - // same way RichMedia does (see `MediaAnnotation` in the core layer). + // RichMedia, Screen (rendition action) and Sound annotations all expose a + // single embedded audio/video clip and play it the same way (see + // `MediaAnnotation` in the core layer). case AnnotationType.RICHMEDIA: case AnnotationType.SCREEN: + case AnnotationType.SOUND: return new MediaAnnotationElement(parameters); default: diff --git a/test/integration/annotation_spec.mjs b/test/integration/annotation_spec.mjs index 39ce0474d..ce041ddc2 100644 --- a/test/integration/annotation_spec.mjs +++ b/test/integration/annotation_spec.mjs @@ -1081,3 +1081,39 @@ describe("Screen annotation (rendition)", () => { }); }); }); + +describe("Sound annotation", () => { + describe("multimedia_annotations.pdf", () => { + let pages; + + beforeEach(async () => { + pages = await loadAndWait( + "multimedia_annotations.pdf", + getAnnotationSelector("7R") + ); + }); + + afterEach(async () => { + await closePages(pages); + }); + + it("must play the embedded sound when clicking the play button", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + const annotationSelector = getAnnotationSelector("7R"); + const buttonSelector = `${annotationSelector} .mediaPlayButton`; + const audioSelector = `${annotationSelector} audio.mediaContent`; + + await page.waitForSelector(buttonSelector, { visible: true }); + await page.click(buttonSelector); + + await page.waitForSelector(audioSelector, { visible: true }); + const hasSource = await page.$eval(audioSelector, el => + el.src.startsWith("blob:") + ); + expect(hasSource).withContext(`In ${browserName}`).toEqual(true); + }) + ); + }); + }); +}); diff --git a/test/unit/annotation_spec.js b/test/unit/annotation_spec.js index 2cc238826..7106cedc7 100644 --- a/test/unit/annotation_spec.js +++ b/test/unit/annotation_spec.js @@ -30,12 +30,15 @@ import { bytesToString, DrawOPS, OPS, + PasswordException, + PasswordResponses, RenderingIntentFlag, stringToUTF8String, } from "../../src/shared/util.js"; import { CMAP_URL, createIdFactory, + createSoundDict, DefaultBinaryDataFactory, fetchBuiltInCMapHelper, STANDARD_FONT_DATA_URL, @@ -5238,6 +5241,139 @@ describe("annotation", function () { }); }); + describe("SoundAnnotation", function () { + function createSoundAnnotation(refNum, soundRef) { + const dict = new Dict(); + dict.set("Type", Name.get("Annot")); + dict.set("Subtype", Name.get("Sound")); + if (soundRef) { + dict.set("Sound", soundRef); + } + return { ref: Ref.get(refNum, 0), data: dict }; + } + + it("should expose the embedded sound as a playable WAV asset", async function () { + const soundRef = Ref.get(300, 0); + const soundStream = new StringStream( + "\x00\x00\x01\x00", + createSoundDict({ type: true }) + ); + const annotation = createSoundAnnotation(301, soundRef); + + const xref = new XRefMock([ + { ref: soundRef, data: soundStream }, + annotation, + ]); + + const { data } = await AnnotationFactory.create( + xref, + annotation.ref, + annotationGlobalsMock, + idFactoryMock + ); + expect(data.annotationType).toEqual(AnnotationType.SOUND); + expect(data.noHTML).toEqual(false); + expect(data.richMedia).toEqual({ + fileId: "attachmentRef:300R", + filename: "sound.wav", + contentType: "audio/wav", + }); + }); + + it("should not create media data for a compressed sound", async function () { + const soundRef = Ref.get(310, 0); + const soundStream = new StringStream( + "\x00\x00", + createSoundDict({ type: true, CO: "ADPCM" }) + ); + const annotation = createSoundAnnotation(311, soundRef); + + const xref = new XRefMock([ + { ref: soundRef, data: soundStream }, + annotation, + ]); + + const { data } = await AnnotationFactory.create( + xref, + annotation.ref, + annotationGlobalsMock, + idFactoryMock + ); + expect(data.annotationType).toEqual(AnnotationType.SOUND); + expect(data.noHTML).toEqual(true); + expect(data.richMedia).toBeUndefined(); + }); + + it("should not create media data for an unsupported bit depth", async function () { + const soundRef = Ref.get(320, 0); + const soundStream = new StringStream( + "\x00\x00\x00", + createSoundDict({ type: true, B: 24 }) + ); + const annotation = createSoundAnnotation(321, soundRef); + + const xref = new XRefMock([ + { ref: soundRef, data: soundStream }, + annotation, + ]); + + const { data } = await AnnotationFactory.create( + xref, + annotation.ref, + annotationGlobalsMock, + idFactoryMock + ); + expect(data.annotationType).toEqual(AnnotationType.SOUND); + expect(data.noHTML).toEqual(true); + expect(data.richMedia).toBeUndefined(); + }); + + it("should not create media data without a sound object", async function () { + const annotation = createSoundAnnotation(331, null); + const xref = new XRefMock([annotation]); + + const { data } = await AnnotationFactory.create( + xref, + annotation.ref, + annotationGlobalsMock, + idFactoryMock + ); + expect(data.annotationType).toEqual(AnnotationType.SOUND); + expect(data.noHTML).toEqual(true); + expect(data.richMedia).toBeUndefined(); + }); + + it("should request a password before wrapping encrypted sound content", function () { + const soundRef = Ref.get(340, 0); + const soundDict = createSoundDict({ type: true }); + const soundStream = new StringStream("\x00\x00", soundDict); + const pagesDict = new Dict(); + const catalogDict = new Dict(); + catalogDict.set("Pages", pagesDict); + + const xref = new XRefMock([{ ref: soundRef, data: soundStream }]); + xref.encrypt = { encryptionKey: null }; + xref.getCatalogObj = () => catalogDict; + for (const dict of [soundDict, pagesDict, catalogDict]) { + dict.assignXref(xref); + } + + const catalog = new Catalog(pdfManagerMock, xref); + const soundId = catalog.getAttachmentIdForAnnotation( + soundRef, + /* isSound = */ true + ); + + try { + catalog.attachmentContent(soundId); + expect(false).toEqual(true); + } catch (ex) { + expect(ex).toBeInstanceOf(PasswordException); + expect(ex.code).toEqual(PasswordResponses.NEED_PASSWORD); + } + }); + }); + describe("PopupAnnotation", function () { it("should inherit properties from its parent", async function () { const parentDict = new Dict(); diff --git a/test/unit/clitests.json b/test/unit/clitests.json index 2033ddd7a..7f392695c 100644 --- a/test/unit/clitests.json +++ b/test/unit/clitests.json @@ -49,6 +49,7 @@ "postscript_spec.js", "primitives_spec.js", "scripting_utils_spec.js", + "sound_spec.js", "stream_spec.js", "string_utils_spec.js", "struct_tree_spec.js", diff --git a/test/unit/jasmine-boot.js b/test/unit/jasmine-boot.js index ea6404f9a..a0d1842e3 100644 --- a/test/unit/jasmine-boot.js +++ b/test/unit/jasmine-boot.js @@ -96,6 +96,7 @@ async function initializePDFJS(callback) { "pdfjs-test/unit/primitives_spec.js", "pdfjs-test/unit/scripting_spec.js", "pdfjs-test/unit/scripting_utils_spec.js", + "pdfjs-test/unit/sound_spec.js", "pdfjs-test/unit/stream_spec.js", "pdfjs-test/unit/string_utils_spec.js", "pdfjs-test/unit/struct_tree_spec.js", diff --git a/test/unit/sound_spec.js b/test/unit/sound_spec.js new file mode 100644 index 000000000..62879d694 --- /dev/null +++ b/test/unit/sound_spec.js @@ -0,0 +1,196 @@ +/* Copyright 2026 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { getSoundFormat, soundStreamToWav } from "../../src/core/sound.js"; +import { createSoundDict } from "./test_utils.js"; +import { StringStream } from "../../src/core/stream.js"; + +describe("sound", function () { + function createSoundStream(bytes, opts) { + return new StringStream( + String.fromCharCode(...bytes), + createSoundDict(opts) + ); + } + + function createWav(bytes, opts) { + const stream = createSoundStream(bytes, opts); + return soundStreamToWav(stream, stream.getBytes()); + } + + function parseWav(wav) { + const view = new DataView(wav.buffer, wav.byteOffset, wav.byteLength); + const tag = offset => + String.fromCharCode( + wav[offset], + wav[offset + 1], + wav[offset + 2], + wav[offset + 3] + ); + return { + riff: tag(0), + wave: tag(8), + fmt: tag(12), + dataTag: tag(36), + fmtSize: view.getUint32(16, true), + format: view.getUint16(20, true), + channels: view.getUint16(22, true), + sampleRate: view.getUint32(24, true), + byteRate: view.getUint32(28, true), + blockAlign: view.getUint16(32, true), + bitsPerSample: view.getUint16(34, true), + dataLength: view.getUint32(40, true), + data: Array.from(wav.subarray(44)), + }; + } + + describe("getSoundFormat", function () { + it("should read an explicit format", function () { + expect(getSoundFormat(createSoundDict())).toEqual({ + channels: 1, + sampleRate: 22050, + bitsPerSample: 16, + encoding: "Signed", + }); + }); + + it("should apply the spec defaults", function () { + // Only `/R` is required; `/C`, `/B` and `/E` default to 1, 8 and Raw. + expect( + getSoundFormat(createSoundDict({ C: null, B: null, E: null })) + ).toEqual({ + channels: 1, + sampleRate: 22050, + bitsPerSample: 8, + encoding: "Raw", + }); + }); + + it("should reject compressed sample data", function () { + expect(getSoundFormat(createSoundDict({ CO: "ADPCM" }))).toBeNull(); + }); + + it("should reject an unsupported bit depth", function () { + expect(getSoundFormat(createSoundDict({ B: 24 }))).toBeNull(); + }); + + it("should reject muLaw/ALaw encodings", function () { + expect(getSoundFormat(createSoundDict({ B: 8, E: "muLaw" }))).toBeNull(); + expect(getSoundFormat(createSoundDict({ B: 8, E: "ALaw" }))).toBeNull(); + }); + + it("should reject a present but non-name /E encoding", function () { + const dict = createSoundDict({ E: null }); + dict.set("E", 0); // Not a name object (a malformed `/E`). + expect(getSoundFormat(dict)).toBeNull(); + }); + + it("should reject a missing or invalid sample rate", function () { + expect(getSoundFormat(createSoundDict({ R: null }))).toBeNull(); + expect(getSoundFormat(createSoundDict({ R: 0 }))).toBeNull(); + }); + + it("should reject a non-integer or non-finite sample rate", function () { + expect(getSoundFormat(createSoundDict({ R: 22050.5 }))).toBeNull(); + expect(getSoundFormat(createSoundDict({ R: Infinity }))).toBeNull(); + }); + + it("should reject an unsupported channel count", function () { + expect(getSoundFormat(createSoundDict({ C: 3 }))).toBeNull(); + }); + }); + + describe("soundStreamToWav", function () { + it("should wrap 16-bit signed samples in a little-endian WAV", function () { + // Big-endian source samples 0x1234 and 0xFFFE (-2). + const wav = parseWav(createWav([0x12, 0x34, 0xff, 0xfe])); + expect(wav.riff).toEqual("RIFF"); + expect(wav.wave).toEqual("WAVE"); + expect(wav.fmt).toEqual("fmt "); + expect(wav.dataTag).toEqual("data"); + expect(wav.fmtSize).toEqual(16); + expect(wav.format).toEqual(1); // PCM + expect(wav.channels).toEqual(1); + expect(wav.sampleRate).toEqual(22050); + expect(wav.bitsPerSample).toEqual(16); + expect(wav.blockAlign).toEqual(2); + expect(wav.byteRate).toEqual(22050 * 2); + expect(wav.dataLength).toEqual(4); + // Bytes are swapped to little-endian, values unchanged. + expect(wav.data).toEqual([0x34, 0x12, 0xfe, 0xff]); + }); + + it("should shift 16-bit raw (unsigned) samples into the signed range", function () { + // Big-endian unsigned 0x8000 (mid) and 0x0000 (min). + const wav = parseWav(createWav([0x80, 0x00, 0x00, 0x00], { E: "Raw" })); + // 0x8000 - 0x8000 = 0; 0x0000 - 0x8000 = -32768 (0x8000 little-endian). + expect(wav.data).toEqual([0x00, 0x00, 0x00, 0x80]); + }); + + it("should copy 8-bit raw (unsigned) samples unchanged", function () { + const wav = parseWav(createWav([0x00, 0x7f, 0xff], { B: 8, E: "Raw" })); + expect(wav.bitsPerSample).toEqual(8); + expect(wav.blockAlign).toEqual(1); + expect(wav.data).toEqual([0x00, 0x7f, 0xff]); + }); + + it("should convert 8-bit signed samples to unsigned", function () { + // Signed bytes 0, 127, -128, -1 -> unsigned 128, 255, 0, 127. + const wav = parseWav( + createWav([0x00, 0x7f, 0x80, 0xff], { B: 8, E: "Signed" }) + ); + expect(wav.data).toEqual([0x80, 0xff, 0x00, 0x7f]); + }); + + it("should report stereo block alignment", function () { + const wav = parseWav(createWav([0, 0, 0, 0], { C: 2 })); + expect(wav.channels).toEqual(2); + expect(wav.blockAlign).toEqual(4); // 2 channels * 16 bits. + expect(wav.byteRate).toEqual(22050 * 4); + }); + + it("should trim a trailing partial frame (16-bit stereo)", function () { + // blockAlign = 4; six bytes is one whole frame plus a partial one. + const wav = parseWav(createWav([1, 2, 3, 4, 5, 6], { C: 2 })); + expect(wav.blockAlign).toEqual(4); + expect(wav.dataLength).toEqual(4); + // Only the first frame, byte-swapped to little-endian. + expect(wav.data).toEqual([2, 1, 4, 3]); + }); + + it("should trim a trailing partial frame (8-bit stereo)", function () { + // blockAlign = 2; three bytes is one whole frame plus a partial one. + const wav = parseWav(createWav([10, 20, 30], { C: 2, B: 8, E: "Raw" })); + expect(wav.blockAlign).toEqual(2); + expect(wav.dataLength).toEqual(2); + expect(wav.data).toEqual([10, 20]); + }); + + it("should return null when there is no complete frame", function () { + // An empty stream, and a stereo stream with only a partial frame. + expect(createWav([], {})).toBeNull(); + expect(createWav([1, 2], { C: 2 })).toBeNull(); + }); + + it("should return null for an unsupported format", function () { + expect( + soundStreamToWav( + createSoundStream([0, 0], { CO: "ADPCM" }), + new Uint8Array([0, 0]) + ) + ).toBeNull(); + }); + }); +}); diff --git a/test/unit/test_utils.js b/test/unit/test_utils.js index 6c7027d22..38ca55a66 100644 --- a/test/unit/test_utils.js +++ b/test/unit/test_utils.js @@ -14,6 +14,7 @@ */ import { assert, isNodeJS } from "../../src/shared/util.js"; +import { Dict, Name, Ref } from "../../src/core/primitives.js"; import { fetchData as fetchDataNode, NodeBinaryDataFactory, @@ -22,7 +23,6 @@ import { NullStream, StringStream } from "../../src/core/stream.js"; import { Page, PDFDocument } from "../../src/core/document.js"; import { DOMBinaryDataFactory } from "../../src/display/binary_data_factory.js"; import { fetchData as fetchDataDOM } from "../../src/display/display_utils.js"; -import { Ref } from "../../src/core/primitives.js"; const TEST_PDFS_PATH = isNodeJS ? "./test/pdfs/" : "../pdfs/"; @@ -74,6 +74,39 @@ function buildGetDocumentParams(filename, options) { return params; } +// Builds a PDF sound object's stream dictionary (ISO 32000-1, 12.5.6.16). Pass +// a key as `null` to omit it (to exercise defaults/missing entries); pass +// `type: true` to add the optional `/Type /Sound` entry. +function createSoundDict({ + R = 22050, + C = 1, + B = 16, + E = "Signed", + CO, + type = false, +} = {}) { + const dict = new Dict(); + if (type) { + dict.set("Type", Name.get("Sound")); + } + if (R !== null) { + dict.set("R", R); + } + if (C !== null) { + dict.set("C", C); + } + if (B !== null) { + dict.set("B", B); + } + if (E !== null) { + dict.set("E", Name.get(E)); + } + if (CO) { + dict.set("CO", Name.get(CO)); + } + return dict; +} + function getCrossOriginHostname(hostname) { if (hostname === "localhost") { // Note: This does not work if localhost is listening on IPv6 only. @@ -254,6 +287,7 @@ export { buildGetDocumentParams, CMAP_URL, createIdFactory, + createSoundDict, DefaultBinaryDataFactory, DefaultFileReaderFactory, fetchBuiltInCMapHelper,