diff --git a/test/integration/accessibility_spec.mjs b/test/integration/accessibility_spec.mjs index 8d78d7fb6..330b1d434 100644 --- a/test/integration/accessibility_spec.mjs +++ b/test/integration/accessibility_spec.mjs @@ -320,37 +320,22 @@ describe("accessibility", () => { it("must check that the MathML is correctly inserted", async () => { await Promise.all( pages.map(async ([browserName, page]) => { - const isSanitizerSupported = await page.evaluate(() => { - try { - // eslint-disable-next-line no-undef - return typeof Sanitizer !== "undefined"; - } catch { - return false; - } - }); - if (isSanitizerSupported) { - const mathML = await page.$eval( - "span.structTree span[aria-owns='p58R_mc13'] > math", - el => el?.innerHTML ?? "" + const mathML = await page.$eval( + "span.structTree span[aria-owns='p58R_mc13'] > math", + el => el?.innerHTML ?? "" + ); + expect(mathML) + .withContext(`In ${browserName}`) + .toEqual( + ` x2 = |x| ` ); - expect(mathML) - .withContext(`In ${browserName}`) - .toEqual( - ` x2 = |x| ` - ); - // Check that the math corresponding element is hidden in the text - // layer. - const ariaHidden = await page.$eval("span#p58R_mc13", el => - el.getAttribute("aria-hidden") - ); - expect(ariaHidden).withContext(`In ${browserName}`).toEqual("true"); - } else { - // eslint-disable-next-line no-console - console.log( - `Pending in Chrome: Sanitizer API (in ${browserName}) is not supported` - ); - } + // Check that the math corresponding element is hidden in the text + // layer. + const ariaHidden = await page.$eval("span#p58R_mc13", el => + el.getAttribute("aria-hidden") + ); + expect(ariaHidden).withContext(`In ${browserName}`).toEqual("true"); }) ); }); @@ -370,30 +355,15 @@ describe("accessibility", () => { it("must check that the MathML is correctly inserted", async () => { await Promise.all( pages.map(async ([browserName, page]) => { - const isSanitizerSupported = await page.evaluate(() => { - try { - // eslint-disable-next-line no-undef - return typeof Sanitizer !== "undefined"; - } catch { - return false; - } - }); - if (isSanitizerSupported) { - const mathML = await page.$eval( - "span.structTree span[aria-owns='p21R_mc64']", - el => el?.innerHTML ?? "" + const mathML = await page.$eval( + "span.structTree span[aria-owns='p21R_mc64']", + el => el?.innerHTML ?? "" + ); + expect(mathML) + .withContext(`In ${browserName}`) + .toEqual( + ' 𝑛 𝑝 = 𝑛 mod 𝑝 ' ); - expect(mathML) - .withContext(`In ${browserName}`) - .toEqual( - ' 𝑛 𝑝 = 𝑛 mod 𝑝 ' - ); - } else { - // eslint-disable-next-line no-console - console.log( - `Pending in Chrome: Sanitizer API (in ${browserName}) is not supported` - ); - } }) ); }); @@ -475,25 +445,11 @@ describe("accessibility", () => { it("must check that there's no alt-text on the MathML node", async () => { await Promise.all( pages.map(async ([browserName, page]) => { - const isSanitizerSupported = await page.evaluate(() => { - try { - // eslint-disable-next-line no-undef - return typeof Sanitizer !== "undefined"; - } catch { - return false; - } - }); const ariaLabel = await page.$eval( "span[aria-owns='p3R_mc2']", el => el.getAttribute("aria-label") || "" ); - if (isSanitizerSupported) { - expect(ariaLabel).withContext(`In ${browserName}`).toEqual(""); - } else { - expect(ariaLabel) - .withContext(`In ${browserName}`) - .toEqual("cube root of , x plus y end cube root "); - } + expect(ariaLabel).withContext(`In ${browserName}`).toEqual(""); }) ); }); @@ -513,14 +469,6 @@ describe("accessibility", () => { it("must check that the text in text layer is aria-hidden", async () => { await Promise.all( pages.map(async ([browserName, page]) => { - const isSanitizerSupported = await page.evaluate(() => { - try { - // eslint-disable-next-line no-undef - return typeof Sanitizer !== "undefined"; - } catch { - return false; - } - }); const ariaHidden = await page.evaluate(() => Array.from( document.querySelectorAll(".structTree :has(> math)") @@ -530,16 +478,64 @@ describe("accessibility", () => { .getAttribute("aria-hidden") ) ); - if (isSanitizerSupported) { - expect(ariaHidden) - .withContext(`In ${browserName}`) - .toEqual(["true", "true", "true"]); - } else { - // eslint-disable-next-line no-console - console.log( - `Pending in Chrome: Sanitizer API (in ${browserName}) is not supported` + expect(ariaHidden) + .withContext(`In ${browserName}`) + .toEqual(["true", "true", "true"]); + }) + ); + }); + }); + + describe("MathML in AF entry with struct tree children must not be duplicated", () => { + let pages; + + beforeEach(async () => { + pages = await loadAndWait("bug2025674.pdf", ".textLayer"); + }); + + afterEach(async () => { + await closePages(pages); + }); + + it("must check that the MathML is not duplicated in the struct tree", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + // The Formula node has both AF MathML and struct tree children. + // When AF MathML is present, children must not be walked to avoid + // rendering the math content twice in the accessibility tree. + const mathCount = await page.evaluate( + () => document.querySelectorAll(".structTree math").length + ); + expect(mathCount).withContext(`In ${browserName}`).toBe(1); + + // All text layer elements referenced by the formula subtree must + // be aria-hidden so screen readers don't read both the MathML and + // the underlying text content. + const allHidden = await page.evaluate(() => { + const ids = []; + for (const el of document.querySelectorAll( + ".structTree [aria-owns]" + )) { + if (el.closest("math")) { + ids.push(el.getAttribute("aria-owns")); + } + } + // Also collect ids from the formula span itself. + for (const el of document.querySelectorAll( + ".structTree span:has(> math)" + )) { + const owned = el.getAttribute("aria-owns"); + if (owned) { + ids.push(owned); + } + } + return ids.every( + id => + document.getElementById(id)?.getAttribute("aria-hidden") === + "true" ); - } + }); + expect(allHidden).withContext(`In ${browserName}`).toBeTrue(); }) ); }); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index e6936d0f7..7d628caf2 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -892,3 +892,4 @@ !issue20930.pdf !text_rise_eol_bug.pdf !hello_world_rotated.pdf +!bug2025674.pdf diff --git a/test/pdfs/bug2025674.pdf b/test/pdfs/bug2025674.pdf new file mode 100644 index 000000000..24a07b32c Binary files /dev/null and b/test/pdfs/bug2025674.pdf differ diff --git a/web/struct_tree_layer_builder.js b/web/struct_tree_layer_builder.js index 6214b5fcd..abad3a5a1 100644 --- a/web/struct_tree_layer_builder.js +++ b/web/struct_tree_layer_builder.js @@ -350,12 +350,25 @@ class StructTreeLayerBuilder { } } + #collectIds(node, ids) { + if (!node) { + return; + } + if ("id" in node) { + ids.push(node.id); + } + for (const kid of node.children || []) { + this.#collectIds(kid, ids); + } + } + #walk(node, parentNodes = []) { if (!node) { return null; } let element; + let visitChildren = true; if ("role" in node) { const { role } = node; if (MathMLElements.has(role)) { @@ -389,18 +402,14 @@ class StructTreeLayerBuilder { } if (role === "Formula") { if (node.mathML && MathMLSanitizer.sanitizer) { + visitChildren = false; element.setHTML(node.mathML, { sanitizer: MathMLSanitizer.sanitizer, }); // Hide all the corresponding content elements in the text layer in // order to avoid screen readers reading both the MathML and the // text content. - for (const { id } of node.children || []) { - if (!id) { - continue; - } - (this.#elementsToHideInTextLayer ||= []).push(id); - } + this.#collectIds(node, (this.#elementsToHideInTextLayer ||= [])); // For now, we don't want to keep the alt text if there's valid // MathML (see https://github.com/w3c/mathml-aam/issues/37). // TODO: Revisit this decision in the future. @@ -426,7 +435,7 @@ class StructTreeLayerBuilder { // Often there is only one content node so just set the values on the // parent node to avoid creating an extra span. this.#setAttributes(node.children[0], element); - } else { + } else if (visitChildren) { parentNodes.push(node); for (const kid of node.children) { element.append(this.#walk(kid, parentNodes));