Merge pull request #21501 from calixteman/sound

Add support for Sound annotations playing embedded audio
2026-06-30 12:15:49 +02:00 · 2026-06-29 13:21:08 +02:00 · 2026-06-29 13:21:08 +02:00 · 649fb9c970
commit 649fb9c970
parent 1651e57e61 3ccc3ec65c
10 changed files with 636 additions and 24 deletions
--- a/src/core/annotation.js
+++ b/src/core/annotation.js
@ -74,6 +74,7 @@ import { Catalog } from "./catalog.js";
 import { ColorSpaceUtils } from "./colorspace_utils.js";
 import { createImage } from "./editor/pdf_images.js";
 import { FileSpec } from "./file_spec.js";
+import { getSoundFormat } from "./sound.js";
 import { JpegStream } from "./jpeg_stream.js";
 import { ObjectLoader } from "./object_loader.js";
 import { OperatorList } from "./operator_list.js";
@ -291,6 +292,9 @@ class AnnotationFactory {
      case "Screen":
        return new ScreenAnnotation(parameters);

+      case "Sound":
+        return new SoundAnnotation(parameters);
+
      default:
        if (!collectFields) {
          if (!subtype) {
@ -1509,7 +1513,7 @@ class Annotation {
   * usually indirect; when it's inline its embedded-file stream still isn't
   * (streams are always indirect), so fall back to that ref.
   */
-  _getAttachmentId(fsDict, fsRef, annotationGlobals) {
+  _getAttachmentId(fsDict, fsRef, annotationGlobals, isSound = false) {
    if (!(fsDict instanceof Dict)) {
      return undefined;
    }
@ -1517,7 +1521,7 @@ class Annotation {
      fsRef = FileSpec.pickPlatformItem(fsDict.get("EF"), /* raw = */ true);
    }
    return fsRef instanceof Ref
-      ? annotationGlobals.catalog.getAttachmentIdForAnnotation(fsRef)
+      ? annotationGlobals.catalog.getAttachmentIdForAnnotation(fsRef, isSound)
      : undefined;
  }

@ -5488,15 +5492,23 @@ class MediaAnnotation extends Annotation {
   *   when `assetRef` isn't itself a reference.
   * @param {string} asset.filename
   * @param {string} asset.contentType
+   * @param {boolean} [asset.wrapSound]
+   *   When set, the embedded bytes are raw PDF sound samples that the catalog
+   *   wraps in a WAV container when fetched (see `soundStreamToWav`).
   * @param {Object} annotationGlobals
   */
  _setMediaData(
-    { assetRef, assetDict, filename, contentType },
+    { assetRef, assetDict, filename, contentType, wrapSound = false },
    annotationGlobals
  ) {
    this.data.noHTML = false;
    this.data.richMedia = {
-      fileId: this._getAttachmentId(assetDict, assetRef, annotationGlobals),
+      fileId: this._getAttachmentId(
+        assetDict,
+        assetRef,
+        annotationGlobals,
+        wrapSound
+      ),
      filename,
      contentType,
    };
@ -5825,6 +5837,45 @@ class ScreenAnnotation extends MediaAnnotation {
  }
 }

+class SoundAnnotation extends MediaAnnotation {
+  constructor(params) {
+    super(params);
+
+    const { dict, xref, annotationGlobals } = params;
+    const soundRef = dict.getRaw("Sound");
+    if (!(soundRef instanceof Ref)) {
+      return;
+    }
+    let sound;
+    try {
+      sound = xref.fetch(soundRef);
+    } catch (ex) {
+      if (ex instanceof MissingDataException) {
+        throw ex;
+      }
+      // A corrupt sound stream: fall back to rendering the appearance.
+      warn(`SoundAnnotation: "${ex}".`);
+      return;
+    }
+    if (!(sound instanceof BaseStream) || !getSoundFormat(sound.dict)) {
+      // No embedded samples, or an encoding we can't turn into a playable WAV
+      // (compressed, or an unusual bit depth); just render the appearance.
+      return;
+    }
+
+    this._setMediaData(
+      {
+        assetRef: soundRef,
+        assetDict: sound.dict,
+        filename: "sound.wav",
+        contentType: "audio/wav",
+        wrapSound: true,
+      },
+      annotationGlobals
+    );
+  }
+}
+
 export {
  Annotation,
  AnnotationBorderStyle,
--- a/src/core/catalog.js
+++ b/src/core/catalog.js
@ -52,6 +52,7 @@ import { clearGlobalCaches } from "./cleanup_helper.js";
 import { ColorSpaceUtils } from "./colorspace_utils.js";
 import { FileSpec } from "./file_spec.js";
 import { MetadataParser } from "./metadata_parser.js";
+import { soundStreamToWav } from "./sound.js";
 import { stringToPDFString } from "./string_utils.js";
 import { StructTreeRoot } from "./struct_tree.js";

@ -123,6 +124,8 @@ class Catalog {

  #annotationAttachmentRefById = new Map();

+  #soundAttachmentIds = new Set();
+
  #catDict = null;

  builtInCMapCache = new Map();
@ -171,28 +174,32 @@ class Catalog {
   *
   * @param {Ref} ref
   *   File-spec or embedded-file stream reference.
+   * @param {boolean} [isSound]
+   *   When set, the referenced stream holds raw PDF sound samples that
+   *   `attachmentContent` wraps in a WAV container on fetch.
   * @returns {string}
   *   Attachment id.
   */
-  getAttachmentIdForAnnotation(ref) {
+  getAttachmentIdForAnnotation(ref, isSound = false) {
    let id = this.#annotationAttachmentIdByRef.get(ref);
-    if (id) {
-      return id;
+    if (!id) {
+      const baseId = `attachmentRef:${ref.toString()}`;
+      id = baseId;
+
+      let i = 1;
+      while (
+        this.#annotationAttachmentRefById.has(id) ||
+        this.attachments?.has(id)
+      ) {
+        id = `${baseId}-${i++}`;
+      }
+
+      this.#annotationAttachmentIdByRef.put(ref, id);
+      this.#annotationAttachmentRefById.set(id, ref);
    }
-
-    const baseId = `attachmentRef:${ref.toString()}`;
-    id = baseId;
-
-    let i = 1;
-    while (
-      this.#annotationAttachmentRefById.has(id) ||
-      this.attachments?.has(id)
-    ) {
-      id = `${baseId}-${i++}`;
+    if (isSound) {
+      this.#soundAttachmentIds.add(id);
    }
-
-    this.#annotationAttachmentIdByRef.put(ref, id);
-    this.#annotationAttachmentRefById.set(id, ref);
    return id;
  }

@ -1198,7 +1205,11 @@ class Catalog {
    if (ref) {
      const target = this.xref.fetch(ref);
      if (target instanceof BaseStream) {
-        return FileSpec.readStreamContent(target);
+        const content = FileSpec.readStreamContent(target);
+        if (this.#soundAttachmentIds.has(id)) {
+          return soundStreamToWav(target, content) ?? content;
+        }
+        return content;
      }
      return target instanceof Dict ? FileSpec.readContent(target) : null;
    }
--- a/src/core/sound.js
+++ b/src/core/sound.js
@ -0,0 +1,144 @@
+/* Copyright 2026 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { Name } from "./primitives.js";
+import { stringToBytes } from "../shared/util.js";
+
+// Size, in bytes, of the canonical 44-byte WAV header (RIFF + fmt + data
+// chunk headers) that precedes the sample data.
+const WAV_HEADER_SIZE = 44;
+
+/**
+ * Helpers for PDF sound objects (ISO 32000-1, 12.5.6.16).
+ *
+ * Sound streams contain samples described by /R, /C, /B, /E, and optional /CO.
+ * We wrap supported uncompressed PCM (Raw/Signed, 8/16-bit, mono/stereo) in WAV
+ * for playback.
+ *
+ * @import { BaseStream } from "./base_stream.js";
+ * @import { Dict } from "./primitives.js";
+ */
+
+/**
+ * Return a supported uncompressed sample format.
+ *
+ * @param {Dict} [dict] The sound object's stream dictionary.
+ * @returns {{
+ *   channels: number,
+ *   sampleRate: number,
+ *   bitsPerSample: number,
+ *   encoding: string,
+ * } | null}
+ */
+function getSoundFormat(dict) {
+  // `/CO` compression is beyond PDF stream filters and isn't decoded here.
+  if (!dict || dict.has("CO")) {
+    return null;
+  }
+  const sampleRate = dict.get("R");
+  if (!Number.isInteger(sampleRate) || sampleRate <= 0) {
+    return null;
+  }
+  const channels = dict.get("C") ?? 1;
+  if (!Number.isInteger(channels) || channels < 1 || channels > 2) {
+    return null;
+  }
+  const bitsPerSample = dict.get("B") ?? 8;
+  if (bitsPerSample !== 8 && bitsPerSample !== 16) {
+    return null;
+  }
+  // `/E` is optional and defaults to Raw; a present-but-malformed value (one
+  // that isn't a name) is rejected rather than silently treated as Raw.
+  const e = dict.get("E");
+  let encoding = "Raw";
+  if (e !== undefined) {
+    encoding = e instanceof Name ? e.name : null;
+  }
+  if (encoding !== "Raw" && encoding !== "Signed") {
+    return null;
+  }
+  return { channels, sampleRate, bitsPerSample, encoding };
+}
+
+/**
+ * Build a WAV file from supported PDF sound samples.
+ *
+ * PDF 16-bit samples are big-endian; WAV uses little-endian, unsigned 8-bit
+ * samples, and signed 16-bit samples. The data chunk is trimmed to a whole
+ * number of frames (a multiple of the block alignment); a stream with no
+ * complete frame produces no WAV.
+ *
+ * @param {BaseStream} stream The sound object stream.
+ * @param {Uint8Array} samples Raw sample bytes from the stream.
+ * @returns {Uint8Array | null}
+ */
+function soundStreamToWav(stream, samples) {
+  const format = getSoundFormat(stream.dict);
+  if (!format) {
+    return null;
+  }
+  const { channels, sampleRate, bitsPerSample, encoding } = format;
+  const blockAlign = channels * (bitsPerSample >> 3);
+  // Keep only whole frames, dropping a trailing partial frame; bail out when
+  // there isn't a single complete frame to play.
+  const dataLength = samples.length - (samples.length % blockAlign);
+  if (dataLength === 0) {
+    return null;
+  }
+
+  const wav = new Uint8Array(WAV_HEADER_SIZE + dataLength);
+  const view = new DataView(wav.buffer);
+  wav.set(stringToBytes("RIFF"), 0);
+  // File size minus the first 8 bytes (the "RIFF" tag and this field).
+  view.setUint32(4, WAV_HEADER_SIZE - 8 + dataLength, true);
+  wav.set(stringToBytes("WAVE"), 8);
+  wav.set(stringToBytes("fmt "), 12);
+  view.setUint32(16, 16, true); // PCM fmt-chunk size.
+  view.setUint16(20, 1 /* = WAVE_FORMAT_PCM */, true);
+  view.setUint16(22, channels, true);
+  view.setUint32(24, sampleRate, true);
+  view.setUint32(28, sampleRate * blockAlign, true); // Byte rate.
+  view.setUint16(32, blockAlign, true);
+  view.setUint16(34, bitsPerSample, true);
+  wav.set(stringToBytes("data"), 36);
+  view.setUint32(40, dataLength, true);
+
+  // Transcode the samples straight into the WAV data region (right after the
+  // header) so we never allocate or copy a separate sample buffer.
+  if (bitsPerSample === 16) {
+    const signed = encoding === "Signed";
+    for (let i = 0; i < dataLength; i += 2) {
+      let value = (samples[i] << 8) | samples[i + 1];
+      if (signed) {
+        if (value >= 0x8000) {
+          value -= 0x10000;
+        }
+      } else {
+        value -= 0x8000;
+      }
+      view.setInt16(WAV_HEADER_SIZE + i, value, /* littleEndian = */ true);
+    }
+  } else if (encoding === "Signed") {
+    for (let i = 0; i < dataLength; i++) {
+      wav[WAV_HEADER_SIZE + i] = (samples[i] + 128) & 0xff;
+    }
+  } else {
+    wav.set(samples.subarray(0, dataLength), WAV_HEADER_SIZE);
+  }
+
+  return wav;
+}
+
+export { getSoundFormat, soundStreamToWav };
--- a/src/display/annotation_layer.js
+++ b/src/display/annotation_layer.js
@ -158,10 +158,12 @@ class AnnotationElementFactory {
      case AnnotationType.FILEATTACHMENT:
        return new FileAttachmentAnnotationElement(parameters);

-      // A Screen annotation with a rendition action plays embedded media the
-      // same way RichMedia does (see `MediaAnnotation` in the core layer).
+      // RichMedia, Screen (rendition action) and Sound annotations all expose a
+      // single embedded audio/video clip and play it the same way (see
+      // `MediaAnnotation` in the core layer).
      case AnnotationType.RICHMEDIA:
      case AnnotationType.SCREEN:
+      case AnnotationType.SOUND:
        return new MediaAnnotationElement(parameters);

      default:
--- a/test/integration/annotation_spec.mjs
+++ b/test/integration/annotation_spec.mjs
@ -1081,3 +1081,39 @@ describe("Screen annotation (rendition)", () => {
    });
  });
 });
+
+describe("Sound annotation", () => {
+  describe("multimedia_annotations.pdf", () => {
+    let pages;
+
+    beforeEach(async () => {
+      pages = await loadAndWait(
+        "multimedia_annotations.pdf",
+        getAnnotationSelector("7R")
+      );
+    });
+
+    afterEach(async () => {
+      await closePages(pages);
+    });
+
+    it("must play the embedded sound when clicking the play button", async () => {
+      await Promise.all(
+        pages.map(async ([browserName, page]) => {
+          const annotationSelector = getAnnotationSelector("7R");
+          const buttonSelector = `${annotationSelector} .mediaPlayButton`;
+          const audioSelector = `${annotationSelector} audio.mediaContent`;
+
+          await page.waitForSelector(buttonSelector, { visible: true });
+          await page.click(buttonSelector);
+
+          await page.waitForSelector(audioSelector, { visible: true });
+          const hasSource = await page.$eval(audioSelector, el =>
+            el.src.startsWith("blob:")
+          );
+          expect(hasSource).withContext(`In ${browserName}`).toEqual(true);
+        })
+      );
+    });
+  });
+});
--- a/test/unit/annotation_spec.js
+++ b/test/unit/annotation_spec.js
@ -30,12 +30,15 @@ import {
  bytesToString,
  DrawOPS,
  OPS,
+  PasswordException,
+  PasswordResponses,
  RenderingIntentFlag,
  stringToUTF8String,
 } from "../../src/shared/util.js";
 import {
  CMAP_URL,
  createIdFactory,
+  createSoundDict,
  DefaultBinaryDataFactory,
  fetchBuiltInCMapHelper,
  STANDARD_FONT_DATA_URL,
@ -5238,6 +5241,139 @@ describe("annotation", function () {
    });
  });

+  describe("SoundAnnotation", function () {
+    function createSoundAnnotation(refNum, soundRef) {
+      const dict = new Dict();
+      dict.set("Type", Name.get("Annot"));
+      dict.set("Subtype", Name.get("Sound"));
+      if (soundRef) {
+        dict.set("Sound", soundRef);
+      }
+      return { ref: Ref.get(refNum, 0), data: dict };
+    }
+
+    it("should expose the embedded sound as a playable WAV asset", async function () {
+      const soundRef = Ref.get(300, 0);
+      const soundStream = new StringStream(
+        "\x00\x00\x01\x00",
+        createSoundDict({ type: true })
+      );
+      const annotation = createSoundAnnotation(301, soundRef);
+
+      const xref = new XRefMock([
+        { ref: soundRef, data: soundStream },
+        annotation,
+      ]);
+
+      const { data } = await AnnotationFactory.create(
+        xref,
+        annotation.ref,
+        annotationGlobalsMock,
+        idFactoryMock
+      );
+      expect(data.annotationType).toEqual(AnnotationType.SOUND);
+      expect(data.noHTML).toEqual(false);
+      expect(data.richMedia).toEqual({
+        fileId: "attachmentRef:300R",
+        filename: "sound.wav",
+        contentType: "audio/wav",
+      });
+    });
+
+    it("should not create media data for a compressed sound", async function () {
+      const soundRef = Ref.get(310, 0);
+      const soundStream = new StringStream(
+        "\x00\x00",
+        createSoundDict({ type: true, CO: "ADPCM" })
+      );
+      const annotation = createSoundAnnotation(311, soundRef);
+
+      const xref = new XRefMock([
+        { ref: soundRef, data: soundStream },
+        annotation,
+      ]);
+
+      const { data } = await AnnotationFactory.create(
+        xref,
+        annotation.ref,
+        annotationGlobalsMock,
+        idFactoryMock
+      );
+      expect(data.annotationType).toEqual(AnnotationType.SOUND);
+      expect(data.noHTML).toEqual(true);
+      expect(data.richMedia).toBeUndefined();
+    });
+
+    it("should not create media data for an unsupported bit depth", async function () {
+      const soundRef = Ref.get(320, 0);
+      const soundStream = new StringStream(
+        "\x00\x00\x00",
+        createSoundDict({ type: true, B: 24 })
+      );
+      const annotation = createSoundAnnotation(321, soundRef);
+
+      const xref = new XRefMock([
+        { ref: soundRef, data: soundStream },
+        annotation,
+      ]);
+
+      const { data } = await AnnotationFactory.create(
+        xref,
+        annotation.ref,
+        annotationGlobalsMock,
+        idFactoryMock
+      );
+      expect(data.annotationType).toEqual(AnnotationType.SOUND);
+      expect(data.noHTML).toEqual(true);
+      expect(data.richMedia).toBeUndefined();
+    });
+
+    it("should not create media data without a sound object", async function () {
+      const annotation = createSoundAnnotation(331, null);
+      const xref = new XRefMock([annotation]);
+
+      const { data } = await AnnotationFactory.create(
+        xref,
+        annotation.ref,
+        annotationGlobalsMock,
+        idFactoryMock
+      );
+      expect(data.annotationType).toEqual(AnnotationType.SOUND);
+      expect(data.noHTML).toEqual(true);
+      expect(data.richMedia).toBeUndefined();
+    });
+
+    it("should request a password before wrapping encrypted sound content", function () {
+      const soundRef = Ref.get(340, 0);
+      const soundDict = createSoundDict({ type: true });
+      const soundStream = new StringStream("\x00\x00", soundDict);
+      const pagesDict = new Dict();
+      const catalogDict = new Dict();
+      catalogDict.set("Pages", pagesDict);
+
+      const xref = new XRefMock([{ ref: soundRef, data: soundStream }]);
+      xref.encrypt = { encryptionKey: null };
+      xref.getCatalogObj = () => catalogDict;
+      for (const dict of [soundDict, pagesDict, catalogDict]) {
+        dict.assignXref(xref);
+      }
+
+      const catalog = new Catalog(pdfManagerMock, xref);
+      const soundId = catalog.getAttachmentIdForAnnotation(
+        soundRef,
+        /* isSound = */ true
+      );
+
+      try {
+        catalog.attachmentContent(soundId);
+        expect(false).toEqual(true);
+      } catch (ex) {
+        expect(ex).toBeInstanceOf(PasswordException);
+        expect(ex.code).toEqual(PasswordResponses.NEED_PASSWORD);
+      }
+    });
+  });
+
  describe("PopupAnnotation", function () {
    it("should inherit properties from its parent", async function () {
      const parentDict = new Dict();
--- a/test/unit/clitests.json
+++ b/test/unit/clitests.json
@ -49,6 +49,7 @@
    "postscript_spec.js",
    "primitives_spec.js",
    "scripting_utils_spec.js",
+    "sound_spec.js",
    "stream_spec.js",
    "string_utils_spec.js",
    "struct_tree_spec.js",
--- a/test/unit/jasmine-boot.js
+++ b/test/unit/jasmine-boot.js
@ -96,6 +96,7 @@ async function initializePDFJS(callback) {
      "pdfjs-test/unit/primitives_spec.js",
      "pdfjs-test/unit/scripting_spec.js",
      "pdfjs-test/unit/scripting_utils_spec.js",
+      "pdfjs-test/unit/sound_spec.js",
      "pdfjs-test/unit/stream_spec.js",
      "pdfjs-test/unit/string_utils_spec.js",
      "pdfjs-test/unit/struct_tree_spec.js",
--- a/test/unit/sound_spec.js
+++ b/test/unit/sound_spec.js
@ -0,0 +1,196 @@
+/* Copyright 2026 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { getSoundFormat, soundStreamToWav } from "../../src/core/sound.js";
+import { createSoundDict } from "./test_utils.js";
+import { StringStream } from "../../src/core/stream.js";
+
+describe("sound", function () {
+  function createSoundStream(bytes, opts) {
+    return new StringStream(
+      String.fromCharCode(...bytes),
+      createSoundDict(opts)
+    );
+  }
+
+  function createWav(bytes, opts) {
+    const stream = createSoundStream(bytes, opts);
+    return soundStreamToWav(stream, stream.getBytes());
+  }
+
+  function parseWav(wav) {
+    const view = new DataView(wav.buffer, wav.byteOffset, wav.byteLength);
+    const tag = offset =>
+      String.fromCharCode(
+        wav[offset],
+        wav[offset + 1],
+        wav[offset + 2],
+        wav[offset + 3]
+      );
+    return {
+      riff: tag(0),
+      wave: tag(8),
+      fmt: tag(12),
+      dataTag: tag(36),
+      fmtSize: view.getUint32(16, true),
+      format: view.getUint16(20, true),
+      channels: view.getUint16(22, true),
+      sampleRate: view.getUint32(24, true),
+      byteRate: view.getUint32(28, true),
+      blockAlign: view.getUint16(32, true),
+      bitsPerSample: view.getUint16(34, true),
+      dataLength: view.getUint32(40, true),
+      data: Array.from(wav.subarray(44)),
+    };
+  }
+
+  describe("getSoundFormat", function () {
+    it("should read an explicit format", function () {
+      expect(getSoundFormat(createSoundDict())).toEqual({
+        channels: 1,
+        sampleRate: 22050,
+        bitsPerSample: 16,
+        encoding: "Signed",
+      });
+    });
+
+    it("should apply the spec defaults", function () {
+      // Only `/R` is required; `/C`, `/B` and `/E` default to 1, 8 and Raw.
+      expect(
+        getSoundFormat(createSoundDict({ C: null, B: null, E: null }))
+      ).toEqual({
+        channels: 1,
+        sampleRate: 22050,
+        bitsPerSample: 8,
+        encoding: "Raw",
+      });
+    });
+
+    it("should reject compressed sample data", function () {
+      expect(getSoundFormat(createSoundDict({ CO: "ADPCM" }))).toBeNull();
+    });
+
+    it("should reject an unsupported bit depth", function () {
+      expect(getSoundFormat(createSoundDict({ B: 24 }))).toBeNull();
+    });
+
+    it("should reject muLaw/ALaw encodings", function () {
+      expect(getSoundFormat(createSoundDict({ B: 8, E: "muLaw" }))).toBeNull();
+      expect(getSoundFormat(createSoundDict({ B: 8, E: "ALaw" }))).toBeNull();
+    });
+
+    it("should reject a present but non-name /E encoding", function () {
+      const dict = createSoundDict({ E: null });
+      dict.set("E", 0); // Not a name object (a malformed `/E`).
+      expect(getSoundFormat(dict)).toBeNull();
+    });
+
+    it("should reject a missing or invalid sample rate", function () {
+      expect(getSoundFormat(createSoundDict({ R: null }))).toBeNull();
+      expect(getSoundFormat(createSoundDict({ R: 0 }))).toBeNull();
+    });
+
+    it("should reject a non-integer or non-finite sample rate", function () {
+      expect(getSoundFormat(createSoundDict({ R: 22050.5 }))).toBeNull();
+      expect(getSoundFormat(createSoundDict({ R: Infinity }))).toBeNull();
+    });
+
+    it("should reject an unsupported channel count", function () {
+      expect(getSoundFormat(createSoundDict({ C: 3 }))).toBeNull();
+    });
+  });
+
+  describe("soundStreamToWav", function () {
+    it("should wrap 16-bit signed samples in a little-endian WAV", function () {
+      // Big-endian source samples 0x1234 and 0xFFFE (-2).
+      const wav = parseWav(createWav([0x12, 0x34, 0xff, 0xfe]));
+      expect(wav.riff).toEqual("RIFF");
+      expect(wav.wave).toEqual("WAVE");
+      expect(wav.fmt).toEqual("fmt ");
+      expect(wav.dataTag).toEqual("data");
+      expect(wav.fmtSize).toEqual(16);
+      expect(wav.format).toEqual(1); // PCM
+      expect(wav.channels).toEqual(1);
+      expect(wav.sampleRate).toEqual(22050);
+      expect(wav.bitsPerSample).toEqual(16);
+      expect(wav.blockAlign).toEqual(2);
+      expect(wav.byteRate).toEqual(22050 * 2);
+      expect(wav.dataLength).toEqual(4);
+      // Bytes are swapped to little-endian, values unchanged.
+      expect(wav.data).toEqual([0x34, 0x12, 0xfe, 0xff]);
+    });
+
+    it("should shift 16-bit raw (unsigned) samples into the signed range", function () {
+      // Big-endian unsigned 0x8000 (mid) and 0x0000 (min).
+      const wav = parseWav(createWav([0x80, 0x00, 0x00, 0x00], { E: "Raw" }));
+      // 0x8000 - 0x8000 = 0; 0x0000 - 0x8000 = -32768 (0x8000 little-endian).
+      expect(wav.data).toEqual([0x00, 0x00, 0x00, 0x80]);
+    });
+
+    it("should copy 8-bit raw (unsigned) samples unchanged", function () {
+      const wav = parseWav(createWav([0x00, 0x7f, 0xff], { B: 8, E: "Raw" }));
+      expect(wav.bitsPerSample).toEqual(8);
+      expect(wav.blockAlign).toEqual(1);
+      expect(wav.data).toEqual([0x00, 0x7f, 0xff]);
+    });
+
+    it("should convert 8-bit signed samples to unsigned", function () {
+      // Signed bytes 0, 127, -128, -1 -> unsigned 128, 255, 0, 127.
+      const wav = parseWav(
+        createWav([0x00, 0x7f, 0x80, 0xff], { B: 8, E: "Signed" })
+      );
+      expect(wav.data).toEqual([0x80, 0xff, 0x00, 0x7f]);
+    });
+
+    it("should report stereo block alignment", function () {
+      const wav = parseWav(createWav([0, 0, 0, 0], { C: 2 }));
+      expect(wav.channels).toEqual(2);
+      expect(wav.blockAlign).toEqual(4); // 2 channels * 16 bits.
+      expect(wav.byteRate).toEqual(22050 * 4);
+    });
+
+    it("should trim a trailing partial frame (16-bit stereo)", function () {
+      // blockAlign = 4; six bytes is one whole frame plus a partial one.
+      const wav = parseWav(createWav([1, 2, 3, 4, 5, 6], { C: 2 }));
+      expect(wav.blockAlign).toEqual(4);
+      expect(wav.dataLength).toEqual(4);
+      // Only the first frame, byte-swapped to little-endian.
+      expect(wav.data).toEqual([2, 1, 4, 3]);
+    });
+
+    it("should trim a trailing partial frame (8-bit stereo)", function () {
+      // blockAlign = 2; three bytes is one whole frame plus a partial one.
+      const wav = parseWav(createWav([10, 20, 30], { C: 2, B: 8, E: "Raw" }));
+      expect(wav.blockAlign).toEqual(2);
+      expect(wav.dataLength).toEqual(2);
+      expect(wav.data).toEqual([10, 20]);
+    });
+
+    it("should return null when there is no complete frame", function () {
+      // An empty stream, and a stereo stream with only a partial frame.
+      expect(createWav([], {})).toBeNull();
+      expect(createWav([1, 2], { C: 2 })).toBeNull();
+    });
+
+    it("should return null for an unsupported format", function () {
+      expect(
+        soundStreamToWav(
+          createSoundStream([0, 0], { CO: "ADPCM" }),
+          new Uint8Array([0, 0])
+        )
+      ).toBeNull();
+    });
+  });
+});
--- a/test/unit/test_utils.js
+++ b/test/unit/test_utils.js
@ -14,6 +14,7 @@
 */

 import { assert, isNodeJS } from "../../src/shared/util.js";
+import { Dict, Name, Ref } from "../../src/core/primitives.js";
 import {
  fetchData as fetchDataNode,
  NodeBinaryDataFactory,
@ -22,7 +23,6 @@ import { NullStream, StringStream } from "../../src/core/stream.js";
 import { Page, PDFDocument } from "../../src/core/document.js";
 import { DOMBinaryDataFactory } from "../../src/display/binary_data_factory.js";
 import { fetchData as fetchDataDOM } from "../../src/display/display_utils.js";
-import { Ref } from "../../src/core/primitives.js";

 const TEST_PDFS_PATH = isNodeJS ? "./test/pdfs/" : "../pdfs/";

@ -74,6 +74,39 @@ function buildGetDocumentParams(filename, options) {
  return params;
 }

+// Builds a PDF sound object's stream dictionary (ISO 32000-1, 12.5.6.16). Pass
+// a key as `null` to omit it (to exercise defaults/missing entries); pass
+// `type: true` to add the optional `/Type /Sound` entry.
+function createSoundDict({
+  R = 22050,
+  C = 1,
+  B = 16,
+  E = "Signed",
+  CO,
+  type = false,
+} = {}) {
+  const dict = new Dict();
+  if (type) {
+    dict.set("Type", Name.get("Sound"));
+  }
+  if (R !== null) {
+    dict.set("R", R);
+  }
+  if (C !== null) {
+    dict.set("C", C);
+  }
+  if (B !== null) {
+    dict.set("B", B);
+  }
+  if (E !== null) {
+    dict.set("E", Name.get(E));
+  }
+  if (CO) {
+    dict.set("CO", Name.get(CO));
+  }
+  return dict;
+}
+
 function getCrossOriginHostname(hostname) {
  if (hostname === "localhost") {
    // Note: This does not work if localhost is listening on IPv6 only.
@ -254,6 +287,7 @@ export {
  buildGetDocumentParams,
  CMAP_URL,
  createIdFactory,
+  createSoundDict,
  DefaultBinaryDataFactory,
  DefaultFileReaderFactory,
  fetchBuiltInCMapHelper,