diff --git a/test/integration/accessibility_spec.mjs b/test/integration/accessibility_spec.mjs
index 8d78d7fb6..330b1d434 100644
--- a/test/integration/accessibility_spec.mjs
+++ b/test/integration/accessibility_spec.mjs
@@ -320,37 +320,22 @@ describe("accessibility", () => {
it("must check that the MathML is correctly inserted", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
- const isSanitizerSupported = await page.evaluate(() => {
- try {
- // eslint-disable-next-line no-undef
- return typeof Sanitizer !== "undefined";
- } catch {
- return false;
- }
- });
- if (isSanitizerSupported) {
- const mathML = await page.$eval(
- "span.structTree span[aria-owns='p58R_mc13'] > math",
- el => el?.innerHTML ?? ""
+ const mathML = await page.$eval(
+ "span.structTree span[aria-owns='p58R_mc13'] > math",
+ el => el?.innerHTML ?? ""
+ );
+ expect(mathML)
+ .withContext(`In ${browserName}`)
+ .toEqual(
+ ` x2 = |x| `
);
- expect(mathML)
- .withContext(`In ${browserName}`)
- .toEqual(
- ` x2 = |x| `
- );
- // Check that the math corresponding element is hidden in the text
- // layer.
- const ariaHidden = await page.$eval("span#p58R_mc13", el =>
- el.getAttribute("aria-hidden")
- );
- expect(ariaHidden).withContext(`In ${browserName}`).toEqual("true");
- } else {
- // eslint-disable-next-line no-console
- console.log(
- `Pending in Chrome: Sanitizer API (in ${browserName}) is not supported`
- );
- }
+ // Check that the math corresponding element is hidden in the text
+ // layer.
+ const ariaHidden = await page.$eval("span#p58R_mc13", el =>
+ el.getAttribute("aria-hidden")
+ );
+ expect(ariaHidden).withContext(`In ${browserName}`).toEqual("true");
})
);
});
@@ -370,30 +355,15 @@ describe("accessibility", () => {
it("must check that the MathML is correctly inserted", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
- const isSanitizerSupported = await page.evaluate(() => {
- try {
- // eslint-disable-next-line no-undef
- return typeof Sanitizer !== "undefined";
- } catch {
- return false;
- }
- });
- if (isSanitizerSupported) {
- const mathML = await page.$eval(
- "span.structTree span[aria-owns='p21R_mc64']",
- el => el?.innerHTML ?? ""
+ const mathML = await page.$eval(
+ "span.structTree span[aria-owns='p21R_mc64']",
+ el => el?.innerHTML ?? ""
+ );
+ expect(mathML)
+ .withContext(`In ${browserName}`)
+ .toEqual(
+ ''
);
- expect(mathML)
- .withContext(`In ${browserName}`)
- .toEqual(
- ''
- );
- } else {
- // eslint-disable-next-line no-console
- console.log(
- `Pending in Chrome: Sanitizer API (in ${browserName}) is not supported`
- );
- }
})
);
});
@@ -475,25 +445,11 @@ describe("accessibility", () => {
it("must check that there's no alt-text on the MathML node", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
- const isSanitizerSupported = await page.evaluate(() => {
- try {
- // eslint-disable-next-line no-undef
- return typeof Sanitizer !== "undefined";
- } catch {
- return false;
- }
- });
const ariaLabel = await page.$eval(
"span[aria-owns='p3R_mc2']",
el => el.getAttribute("aria-label") || ""
);
- if (isSanitizerSupported) {
- expect(ariaLabel).withContext(`In ${browserName}`).toEqual("");
- } else {
- expect(ariaLabel)
- .withContext(`In ${browserName}`)
- .toEqual("cube root of , x plus y end cube root ");
- }
+ expect(ariaLabel).withContext(`In ${browserName}`).toEqual("");
})
);
});
@@ -513,14 +469,6 @@ describe("accessibility", () => {
it("must check that the text in text layer is aria-hidden", async () => {
await Promise.all(
pages.map(async ([browserName, page]) => {
- const isSanitizerSupported = await page.evaluate(() => {
- try {
- // eslint-disable-next-line no-undef
- return typeof Sanitizer !== "undefined";
- } catch {
- return false;
- }
- });
const ariaHidden = await page.evaluate(() =>
Array.from(
document.querySelectorAll(".structTree :has(> math)")
@@ -530,16 +478,64 @@ describe("accessibility", () => {
.getAttribute("aria-hidden")
)
);
- if (isSanitizerSupported) {
- expect(ariaHidden)
- .withContext(`In ${browserName}`)
- .toEqual(["true", "true", "true"]);
- } else {
- // eslint-disable-next-line no-console
- console.log(
- `Pending in Chrome: Sanitizer API (in ${browserName}) is not supported`
+ expect(ariaHidden)
+ .withContext(`In ${browserName}`)
+ .toEqual(["true", "true", "true"]);
+ })
+ );
+ });
+ });
+
+ describe("MathML in AF entry with struct tree children must not be duplicated", () => {
+ let pages;
+
+ beforeEach(async () => {
+ pages = await loadAndWait("bug2025674.pdf", ".textLayer");
+ });
+
+ afterEach(async () => {
+ await closePages(pages);
+ });
+
+ it("must check that the MathML is not duplicated in the struct tree", async () => {
+ await Promise.all(
+ pages.map(async ([browserName, page]) => {
+ // The Formula node has both AF MathML and struct tree children.
+ // When AF MathML is present, children must not be walked to avoid
+ // rendering the math content twice in the accessibility tree.
+ const mathCount = await page.evaluate(
+ () => document.querySelectorAll(".structTree math").length
+ );
+ expect(mathCount).withContext(`In ${browserName}`).toBe(1);
+
+ // All text layer elements referenced by the formula subtree must
+ // be aria-hidden so screen readers don't read both the MathML and
+ // the underlying text content.
+ const allHidden = await page.evaluate(() => {
+ const ids = [];
+ for (const el of document.querySelectorAll(
+ ".structTree [aria-owns]"
+ )) {
+ if (el.closest("math")) {
+ ids.push(el.getAttribute("aria-owns"));
+ }
+ }
+ // Also collect ids from the formula span itself.
+ for (const el of document.querySelectorAll(
+ ".structTree span:has(> math)"
+ )) {
+ const owned = el.getAttribute("aria-owns");
+ if (owned) {
+ ids.push(owned);
+ }
+ }
+ return ids.every(
+ id =>
+ document.getElementById(id)?.getAttribute("aria-hidden") ===
+ "true"
);
- }
+ });
+ expect(allHidden).withContext(`In ${browserName}`).toBeTrue();
})
);
});
diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore
index e6936d0f7..7d628caf2 100644
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@@ -892,3 +892,4 @@
!issue20930.pdf
!text_rise_eol_bug.pdf
!hello_world_rotated.pdf
+!bug2025674.pdf
diff --git a/test/pdfs/bug2025674.pdf b/test/pdfs/bug2025674.pdf
new file mode 100644
index 000000000..24a07b32c
Binary files /dev/null and b/test/pdfs/bug2025674.pdf differ
diff --git a/web/struct_tree_layer_builder.js b/web/struct_tree_layer_builder.js
index 6214b5fcd..abad3a5a1 100644
--- a/web/struct_tree_layer_builder.js
+++ b/web/struct_tree_layer_builder.js
@@ -350,12 +350,25 @@ class StructTreeLayerBuilder {
}
}
+ #collectIds(node, ids) {
+ if (!node) {
+ return;
+ }
+ if ("id" in node) {
+ ids.push(node.id);
+ }
+ for (const kid of node.children || []) {
+ this.#collectIds(kid, ids);
+ }
+ }
+
#walk(node, parentNodes = []) {
if (!node) {
return null;
}
let element;
+ let visitChildren = true;
if ("role" in node) {
const { role } = node;
if (MathMLElements.has(role)) {
@@ -389,18 +402,14 @@ class StructTreeLayerBuilder {
}
if (role === "Formula") {
if (node.mathML && MathMLSanitizer.sanitizer) {
+ visitChildren = false;
element.setHTML(node.mathML, {
sanitizer: MathMLSanitizer.sanitizer,
});
// Hide all the corresponding content elements in the text layer in
// order to avoid screen readers reading both the MathML and the
// text content.
- for (const { id } of node.children || []) {
- if (!id) {
- continue;
- }
- (this.#elementsToHideInTextLayer ||= []).push(id);
- }
+ this.#collectIds(node, (this.#elementsToHideInTextLayer ||= []));
// For now, we don't want to keep the alt text if there's valid
// MathML (see https://github.com/w3c/mathml-aam/issues/37).
// TODO: Revisit this decision in the future.
@@ -426,7 +435,7 @@ class StructTreeLayerBuilder {
// Often there is only one content node so just set the values on the
// parent node to avoid creating an extra span.
this.#setAttributes(node.children[0], element);
- } else {
+ } else if (visitChildren) {
parentNodes.push(node);
for (const kid of node.children) {
element.append(this.#walk(kid, parentNodes));