From 923a778d261b7587de74b7dedbf3ad05a4d8cbfc Mon Sep 17 00:00:00 2001 From: calixteman Date: Mon, 22 Dec 2025 17:41:50 +0100 Subject: [PATCH] Escape punctuation signs in search query It fixes #20516. The chars `*{}()[]\` are punctuation signs in regex, so they need to be escaped when searching for them literally. --- test/pdfs/.gitignore | 1 + test/pdfs/issue20516.pdf | Bin 0 -> 8572 bytes test/unit/pdf_find_controller_spec.js | 20 ++++++++++++++++++++ web/pdf_find_controller.js | 9 ++++++--- 4 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 test/pdfs/issue20516.pdf diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index acd3a66ad..6bef34dd9 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -765,3 +765,4 @@ !two_paragraphs.pdf !paragraph_and_link.pdf !issue20225.pdf +!issue20516.pdf diff --git a/test/pdfs/issue20516.pdf b/test/pdfs/issue20516.pdf new file mode 100644 index 0000000000000000000000000000000000000000..77695291b93d77e7eff4cf018225b719b8fdd4f6 GIT binary patch literal 8572 zcmcIqcU)6Vv!+Q2Dpi!CL^@JKLKEp-KswSPgb+dr2%$@pB2}bGQ9z_99i@mMAYDM3 zbd)B&gVLnvJ%QKz>i2%P{c&?nlCyJmXXnh!<~Pr?Vb@Yn;)C!*LF~jY}X-n)cn(!MH zMdpDYw|U2MM;l~ckJimZSwrc0wz8S85!NeBUGv#%)?GpvM~tw)FfkzZ(k*T)XKUTh zn|~j;ne(zT3-r_OaP&$2kAg7Wy{0zGGcLDAHb41FE5Aida7CLKVm^11OV~@rgOC{O zza9#>!g&Jnn@9A#oB{KA{guw#$UBNhy z0v$Av)@Zn#lPB0j5Dm7x}yR;1(EdCSQb%vHex)G;-JV4KaPuY1PQujE%ZV^ zi8{6uxrIU@LUfsA&k9*)noAsZpU?Buv?nKV%w{qeb<@9vrAO6hO{B6_eO6U5a zfkf^_=|EQ13s9uuT<-H+HH8l!&Yn-|u|D3T{5kVZ9NpF4#QP-U-NmhtMSbnX1hU|8 z+nSIYfrbzHtwF*5gVX+(L^w(4{nG4dZQU&Jd_%;ws#HndJ4xcnvl6mlq`e7zS4=1& zp!aoeZHr<{Tw{d1e=Hm@lbc3hrqyLxbmw=^##pqLzIlhq+ar}T4J<>E%MCg_Ks8^Nm#7%i1xB2{keoKo<- z@Py>zyHC?MtLUW8zizxZ&A5rj8)DHIPb%}Cl(0|9kd$#tLGGa;8$Q)l`&gE2f;`34 zhv%~y-$bj#-kBvNzMlzCzC_nbKXczGYTC*GK4f)Ma4;E>Y@mK|Uc`nH8ujiM%?6e+mL{c1F9}EBs}fA-+uqTCVRH7Xx@0PZ5iTZrO)(r*6R^^B zZkl7u^X;_>+Kqb@4X&G3cdyog{F(f(OvW&`)^k22GbXaXM{)HLn9YGrkCK6Of%G_- zsu89g_(BuQ5=!aSY-+{*^1+DgTU92GTk*82p==j**tk>51NBtxSj;#iIcl%Qw_mqb zcDZCEW3E}umI1b4r-+l*O5oz4)x621C^vdb{N@`OFBvcS9G!D7Faw79$<=AbnzahP zH+=<^yuKM5nk$Y4{pxq$Tpc|M~YLKZk zUEU}eqN z6TGY}ntj+GztpnLyjZ(UhGa*Y%yG>z%`wcCb?6W`3R><%b?9@V;-hM!V!pxV-}KD% zd=e=^oBw>vo%sD4=GxsvmPDIG{KS}ib$zEn+QGDZ$9&BJK|2mRxZSAT;UIrObkFl1 zkMy223)s)uqx;LH)#2V%2X;G6%aY6S45H@5-+}4=9 zg2;Hzc%i~vFfnEQgPsk0S)_PoQSsVyzCNk!@dC*r#EUXRRMxGsZW^;VeiKpI*QT#^ zJ>)di1Plc%O0+jJrL;4OGP3KH>rMPS-t~gVEF)zfH@wJ<$gGE_!?UIltogA8eb=&# zvm{!@5S4c#wxniEI7-9QRO3{m+ck7Y1}5|@t9#0O&f3r0a=;3NYucWbel{{HR2q6J z-gM*r-7K%i-+9O-BMl?d=*Hupw`zKJ&+oDvavVZPB=39PuRXsvf_3Mr3HBYNT3MW~ z>fVZ)A>s7leA%!1d@{|bo4fm5cNY2Ci<=h*_%6Bh-I=apa*5@)=8v-Qbj}>m|CBgm zymIhCV>o)jz1gvOd_;2exayFHLX9F;IkGL`dHe!H54m|*QpxSu+a<5#_N+GSHl9me zmkO7PmZGV1uPg8s*oPkz9J?Qke;eLhI2bYJ)Eb!5S&2)f{K<-H1J>&+N@L9792d0_RH7F*9E9VRLuM+xs}Kc7JB| z3utDYA8PujY<0|~VlJ<xDKvTA2%I%fK*TC7lgog=P+|azfye@Qb z`F?r796LOsl*bGz$yqDV1j`bXukJt(uItl0lj)09=W;c&sY!nDLw!djitRCbkP&&A z-Kz`vcYd_H61brt@<9xAsFnVH;r&H&mVbkIV8a`ti{Lrm&??Xu77wY0Ie zPs8<-j{g1l(S$7|Qdel1-ul1a)3Ps;b!>ruo-S|!IBm<>&-rq*j4eZFKzqG8|i0njCw`* z<}Yy`XY5w?8b=ss4t*X9ugn%f`>6X&E(P?bUGBcjC;j;6-R-E|l+>~glv+=0_k|~q z#WOtozRIRR;s#u_G4OcY4Jp7U{ZV#l_}h}D^AoFL6>>G{${$AkOBHhHz%t51h5d&B;NiQ7TtjRxVQRiTWNj?)`GRWwSFXsedM6U?(>L=f;#Elz~jtA#CBWk(yHdZW@1LPw8vet zqpzMEjyCg?d%@oh|5;$;Dp*|M3lzJm^73+USEMx6P_B0%Y@ULvbdKVj|$Sk9f?IF;J1)~Q73pu0qKfxK|5oeTtJXhBqX4S z0e~tR13a7t9HHan2*>;_Q9`@8V&!e&E?_7CZq(s_HH3gf5x9-E#@gaMC?qNd!sXlF zhmf#182A2L@;_Y(3*#OXIDXS4DunaI|LqWl;{1B@2|B$7T7Yh$zjU2`<0QhOxG)QX zp-^0MLIH1_+8`>7TT8-%zs>ubO}|M`M*kn%|F4>0LLy+?`;T1y^<#m9zo%(}OOe0R z;5036!5x7F`7hGO0geJtIS$+cBjo@{4l>~r7lwfOU}6w3L|jZ1SnHyq0NPds76}?5 zi$OUc0p|$Fx*~AD1>(ZMP~3#L2A>eH?gixG&MHVW$`%U}1r{5yRFIAaKtCLohM%OO zkQlHWPP>4GA)=>URKQ|5t?sCk%mc$cE#A(eJwRWWlW`~ z$p#rLj91-9JxIHrHK?)J<9<0!GF$rf5yMXN%Xa9&fdlzSuj-!sbLKO+S1e3+l7bUb{kXK1HHMMFwj|CY%lL z9Xca#y;94b!<0(V6>}w>!tq>}`W16Qr2&a6>Rn0knP=QBmPm_hZhn=^WO_mSykk!4 z10vf?kZ63NGkN-|FY)KW+*x6)5PPc}dFW#IN6DO!H`DLr;8jcy=d|ujf9(F)m@^$< z%Ab|=rja%ZeaL_!7kfP8i5awcL*Z%}@%89NS9oez2YJ%`0^zrj@Uds_BcCjt8{yu8 zC`3fQEiF@s0Yxx9B~~=B-sGY{U`(u5>vX8pt*xx;RkIyD(^LwsDfA8Fd+X<`6iBni zUUyQ?F8)NFSJQ|MQek&?k5{qB@afFy5*f!jPt#GS_p{ybASWBfJS<)4XJ0MaBvxe} z@zy6hlx8Ix9)A$L;-uDi?m{jj8x!6Fah!80%e0(jpneoEu%^L!)W3ZCr~Yj8ibD4! z3cW7BC{)TOTIk0{&CBE0<{suvR}5|dt!fysO?k=A8YMfSg5G9~4jkN9Z@gjfgQhYh zz_5Ae{eq>9AHfc#&vdRK7v&M=C+clp!BXy{pF@^PB;=O*5@X3`wYR62EeGvJ!wVwc z+(`^zs95{Cq@zYc;K#auDB=^(Jl7ME!Ppq;bZG)+9L?8yU)^1|8|LKQv)Vtv9=~{pPP@JGgR?EITG&oIp77W= zWNm}OIS!j_VpifQ+ZH^Un__zt8|rtA;_TUtzAW4}Us&4iDEIt2HuL4E!h8!<$o6Ke zr=TU{>3y>6-}964qoUR`@t|4DgwJH?MnIn_Zje49?#9=|>%qGi&?&Ri{4o3&Kg0tM zg5OQ&=9u_Wxk)x{{Bs3&z6Jjj3 zysp0E*-xLo-W`ce>e+B*;CqoZ6ffF~Nu+!3iEy9Wn8D;oJQ;VOtyA&g1-F{*bYVPP z(+b32OODnSlz6$jKk_%CSAos?u#B(0eDK3lXs9V^`U}Zf)q(3QWEuc_Ae#wl?ajw9R`EPk7!Lj zRu@pa8y4TIt6caxUl*Cu-hTgxJN#^kA9z*HDlIJ3r#-{3#U(jAbug1wxjly3I`o1@ zxr>VmK@JnI1c}$&(|+7{XLrLZ*V0v{vbYP<5sfne0WGsr>R0!t6|Eo4M*9s&?0jTV zgHOS1;=rW}&NhH4HJCP}TONM=dr|UaRAWb|1 z$WvX^t7e@Gw}Ng<=u(w^r0x=X^YP$0W-|lcsUk!ts29s2BCRi1>e93d^s;Tigyv~p#GeW~0HWaq3 za^uRK*W1n1;yJAjFZG6p&zRVKaI8%;8M9mMdZWR^EOJJpL@b|IM3ZgSvGCy%6Y?5 z_uEwZ4|&cs3d7fqm^E^>b(K0+lc__AbrsJ~ZMg>r9Ius7=g&HjAw!#9_L-93>rEYJ zELE1+eRZFOh?gee#?#rs+8#AMJio+S)NJil5^+--R`7~4@-)V&bFfl;-?hciT-jeIm+1eUCBt!F9JUim9XdsK#CYLfO_``*RBx7bemrThzU{ zcvHXme9h5)Tf^PIyw~=sC#U9Kd&8qy^}N?^vzGopr(=hx4fw$iYwz#(y<5MkV|)bG zxn<^mi_#8{%dZz;d8)Io$aDFLQA*oQbCd@epDk=FJjNXO{KZLiPFw)^mHs2 zebf=Hwl!TYUBT^e(Jp|D=QzCDbfJS{zu5C7kL_NEo9R6$t?Y`bb9NbHkw^>;Cx=ULMQQHi-q8bvdfq+G<>B=j`4Hcr@8NP>ddcr8)_H=fR35ks z!?4jD@mYDHp#!$B_NHu2J}ov(Ovq<#?Jr(+h$xdat&B8I_Kw=lyS$iPZ>Lg{>hMH9 zTx7h`acootB9<1;a$rN;CK(=!z2i}qpH=Vf*hB0Q8Q8V2-VBM@Qrhwz0X=I5sI5_s zB1A(PZ?X`%aeU#3yn*^k^}~|*3-Q>fYHZZvyWZIIE&}IC3}4^g%4H{gk%1bJ;mmpgDcJu@*H zI!fO#`ObYLRhmaLq;a(A&N6{7eX(=#iuFBJdwRAw2Fho9>@4L(mx~e?*FwkMWg0*h z{VUK%;f)k(&zt+g>9;oPP`lk>tcEpFDxh~Q>BcFIsY8Pa*Jzj}NX*Lns}qjZ5};uE z7i?7KcALfay%r=N67)x&@{`f>%jJ8EOa*K*ZR(P|!vFnjGX~_0Rz$DxW2EF!9kb0| zbkWFEF}A7{Mq5_2`JA2JSWqzsN{91@%NbETc~K8EUrD`Exp_9P7=78#TQWWF319Vtil(j*d|0mr z;g0OK+}v=Xt{WXwTeN96*Dz1d-s2}y+?OcyzAf&2DJYAhPN-fshH8|pMD#ASN~-HN z56Zoz@v#jK?7hP9k~i(Oy^r$}_AYkU7%Na|i8o4j?rC>M$K+!Unq8JGhS{Z^M80cF zt(7u6DmiS$g)?u9!{**u92fb;xS?|xi|v*J2!|VtHyd0pQF#+hN^Dqpl=+O*Ixto0 zG!FOF?$>RSN6mVoIMW1YDLrYfJez#98=IH){Q0fR;;=UekVVC4J-%$MS>ZnkApju# zMiQX00Q}<8$6#?6*4zMR;o@ZNh5$CExg6mLONJfR&s%3>;b*@Ii zRVE?=a0h?(6NHHa(fCsaPz?WO4j8~c{MqlM^}mooVS@jhLs$f0vi{Tu6B7MLKbSD= zA2y4ML4UIa>jDQ5w9Cmq8g$X#04oT9@e|JQgc#ESa6XRnI!QWQLjjcpi(gq { + const { eventBus, pdfFindController } = + await initPdfFindController("issue20516.pdf"); + + await testSearch({ + eventBus, + pdfFindController, + state: { + query: `("client")`, + }, + matchesPerPage: [1], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[6]], + pageMatchesLength: [[10]], + }); + }); + describe("custom matcher", () => { it("calls to the matcher with the right arguments", async () => { const QUERY = "Foo bar"; diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index eaa4456f0..0f3f04c55 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -77,8 +77,8 @@ const DIACRITICS_EXCEPTION = new Set([ let DIACRITICS_EXCEPTION_STR; // Lazily initialized, see below. const DIACRITICS_REG_EXP = /\p{M}+/gu; -const SPECIAL_CHARS_REG_EXP = - /([*+^${}()|[\]\\])|(\p{P}+)|(\s+)|(\p{M})|(\p{L})/gu; +const SPECIAL_PUNCTUATION_CHARACTERS = /[.?*{}()[\]\\]/g; +const SPECIAL_CHARS_REG_EXP = /([+^$|])|(\p{P}+)|(\s+)|(\p{M})|(\p{L})/gu; const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u; const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u; @@ -739,7 +739,10 @@ class PDFFindController { } if (p2) { // Allow whitespaces around group of punctuation signs. - return addExtraWhitespaces(p2, p2.replaceAll(/[.?]/g, "\\$&")); + return addExtraWhitespaces( + p2, + p2.replaceAll(SPECIAL_PUNCTUATION_CHARACTERS, "\\$&") + ); } if (p3) { // Replace spaces by \s+ to be sure to match any spaces.