diff --git a/test/integration/accessibility_spec.mjs b/test/integration/accessibility_spec.mjs index 2c04b5f00..8d78d7fb6 100644 --- a/test/integration/accessibility_spec.mjs +++ b/test/integration/accessibility_spec.mjs @@ -544,4 +544,71 @@ describe("accessibility", () => { ); }); }); + + describe("A TH in a TR itself in a TBody is rowheader", () => { + let pages; + + beforeEach(async () => { + pages = await loadAndWait("bug2014080.pdf", ".textLayer"); + }); + + afterEach(async () => { + await closePages(pages); + }); + + it("must check that the table has the right structure", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + let elementRole = await page.evaluate(() => + Array.from( + document.querySelector(".structTree [role='table']").children + ).map(child => child.getAttribute("role")) + ); + + // THeader and TBody must be rowgroup. + expect(elementRole) + .withContext(`In ${browserName}`) + .toEqual(["rowgroup", "rowgroup"]); + + elementRole = await page.evaluate(() => + Array.from( + document.querySelector( + ".structTree [role='table'] > [role='rowgroup'] > [role='row']" + ).children + ).map(child => child.getAttribute("role")) + ); + + // THeader has 3 columnheader. + expect(elementRole) + .withContext(`In ${browserName}`) + .toEqual(["columnheader", "columnheader", "columnheader"]); + + elementRole = await page.evaluate(() => + Array.from( + document.querySelector( + ".structTree [role='table'] > [role='rowgroup']:nth-child(2)" + ).children + ).map(child => child.getAttribute("role")) + ); + + // TBody has 5 rows. + expect(elementRole) + .withContext(`In ${browserName}`) + .toEqual(["row", "row", "row", "row", "row"]); + + elementRole = await page.evaluate(() => + Array.from( + document.querySelector( + ".structTree [role='table'] > [role='rowgroup']:nth-child(2) > [role='row']:first-child" + ).children + ).map(child => child.getAttribute("role")) + ); + // First row has a rowheader and 2 cells. + expect(elementRole) + .withContext(`In ${browserName}`) + .toEqual(["rowheader", "cell", "cell"]); + }) + ); + }); + }); }); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 7d924f753..73665e3be 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -872,3 +872,4 @@ !page_with_number_and_link.pdf !Brotli-Prototype-FileA.pdf !bug2013793.pdf +!bug2014080.pdf diff --git a/test/pdfs/bug2014080.pdf b/test/pdfs/bug2014080.pdf new file mode 100755 index 000000000..1fc44d80d Binary files /dev/null and b/test/pdfs/bug2014080.pdf differ diff --git a/web/struct_tree_layer_builder.js b/web/struct_tree_layer_builder.js index 8f561eac5..b1bf2189e 100644 --- a/web/struct_tree_layer_builder.js +++ b/web/struct_tree_layer_builder.js @@ -61,8 +61,8 @@ const PDF_ROLE_TO_HTML_ROLE = { TR: "row", TH: "columnheader", TD: "cell", - THead: "columnheader", - TBody: null, + THead: "rowgroup", + TBody: "rowgroup", TFoot: null, // Standard structure type Caption Caption: null, @@ -353,7 +353,7 @@ class StructTreeLayerBuilder { } } - #walk(node) { + #walk(node, parentNodes = []) { if (!node) { return null; } @@ -378,7 +378,14 @@ class StructTreeLayerBuilder { element.setAttribute("role", "heading"); element.setAttribute("aria-level", match[1]); } else if (PDF_ROLE_TO_HTML_ROLE[role]) { - element.setAttribute("role", PDF_ROLE_TO_HTML_ROLE[role]); + element.setAttribute( + "role", + role === "TH" && + parentNodes.at(-1)?.role === "TR" && + parentNodes.at(-2)?.role === "TBody" + ? "rowheader" // TH inside TR itself in TBody is a rowheader. + : PDF_ROLE_TO_HTML_ROLE[role] + ); } if (role === "Figure" && this.#addImageInTextLayer(node, element)) { return element; @@ -423,9 +430,11 @@ class StructTreeLayerBuilder { // parent node to avoid creating an extra span. this.#setAttributes(node.children[0], element); } else { + parentNodes.push(node); for (const kid of node.children) { - element.append(this.#walk(kid)); + element.append(this.#walk(kid, parentNodes)); } + parentNodes.pop(); } } return element;