From a2b7687c3b88aadc55ec38a2249c299eaefd394d Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 29 Nov 2022 13:19:16 -0800 Subject: [PATCH] In the case of an RTL language being extracted via pdfminer.six, fall back to forced OCR, which handles RTL text better --- src/paperless_tesseract/parsers.py | 25 ++++++++++++- .../tests/samples/rtl-test.pdf | Bin 0 -> 11911 bytes src/paperless_tesseract/tests/test_parser.py | 33 ++++++++++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) create mode 100755 src/paperless_tesseract/tests/samples/rtl-test.pdf diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index bde2ad25e..4cc9b8e5f 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -13,6 +13,10 @@ class NoTextFoundException(Exception): pass +class RtlLanguageException(Exception): + pass + + class RasterisedDocumentParser(DocumentParser): """ This parser uses Tesseract to try and get some text out of a rasterised @@ -125,7 +129,26 @@ class RasterisedDocumentParser(DocumentParser): stripped = post_process_text(pdfminer_extract_text(pdf_file)) self.log("debug", f"Extracted text from PDF file {pdf_file}") + + # pdfminer.six does not handle RTL text + # as a hack, for some languages, return no text, to force + # OCRMyPdf/Tesseract do handle this correctly + from langdetect import detect + + lang = detect(stripped) + + self.log("debug", f"Detected language {lang}") + + if lang in { + "ar", # Arabic + "he", # Hebrew, + "fa", # Persian + }: + raise RtlLanguageException() return stripped + except RtlLanguageException: + self.log("warning", f"Detected RTL language {lang}") + return None except Exception: # TODO catch all for various issues with PDFminer.six. # If PDFminer fails, fall back to OCR. @@ -305,7 +328,7 @@ class RasterisedDocumentParser(DocumentParser): ) if original_has_text: self.text = text_original - except (NoTextFoundException, InputFileError) as e: + except (NoTextFoundException, RtlLanguageException, InputFileError) as e: self.log( "warning", f"Encountered an error while running OCR: {str(e)}. " diff --git a/src/paperless_tesseract/tests/samples/rtl-test.pdf b/src/paperless_tesseract/tests/samples/rtl-test.pdf new file mode 100755 index 0000000000000000000000000000000000000000..daa666f8b104d0e4e1b94ed5d08047082fbe7d01 GIT binary patch literal 11911 zcmaKS1yodB7q) zn*x{|5Z3GnQ%3|d4#yh@I};~kQwIQ(iWMBjftm@h0devHr0wj?QLYkp#*P4}sjZ2r zgQ)|vurLnl@-7zlKQWDA01ys`m;!)9(aymJX8n&7 zio@~R)Ycqf0pNmg2@3-p5e}v>8ywe+$XIz>f6k{tr;cIGuMDs58DrC+)zl55Rq-S%|O0NPU~ks7m=+kWh$D z@Wh~uobih=8RaC+-mULF>};xJvZCsy&z)1^9ps^fjEPUOKB!%lv2Y5CUb(_Y+^CZ0 zx2+B-=1y7Hm)w*}rxN5;43!NVD&CoTLTGJ;%nGli4ROtP$Zs(0RSLi^0Bt3F=~KX( zH47wj@WuqVx8ZdUlH&d34&1+B_zPyPe-Ko4dus~dcmp#>0S7g8v~zMWHgyD`YKp3$ zv5F}Ipz}xZI8;qt5daQZ8<>3E=rF{9pOp-TqC*f92zN7tRT_I*LZ}a1%#>&RyD2y?=VVF;MD zo%!7|<|q{YYZbnK-2bE)2mnTHoe!1d|A&VBJRrV*=izTJY|nVLsnh0Dw_32x+>6Bt z9-E0GS@;!z=+O^aVbCOW%c{j1y@Ht4H`>L^-G$%Q`Ue*9!*d-?+3+&oT!A5B${ zf%$$V`BCa)N4u*U zr`BzC+7o)YZE2-6zQol5-z%A)`BF-m>iGM}jr5Q+jL*wL_Lkn=kx?|PvDCqyKaR@w zkK-%#u85R-k0sW(H`w9YS+vn5jB#6UI$f5w5~JpAs=UMsL{l4WyAJ~8vtEAqZk8u= zRIJxhUFyWI#f<(VQq*JeW8H|}7}e>8i$vu#=qjH5Fu4NT8s3pyVtX%W@&lOni^%iS z7Z=8@o!d%Fk-jjJhj5+YM=qoXWLFU0(=re}6WQs*;NR_jaSzrrv z{S-%(B_>7ZFeFA83})Jmjma;9OHS$zX1MHx)qSpcD|wju(*mB=>PKM?d9NGR=x3tV zfwNAZ_2UsSyG>pbkvY4z%=nhOmJC70LV=_e4Ykk=L6~U`1I?2fBg5*fe0^3udOf^m z!G~_I9(=Y7=y5*~EsbmWWG|}liY>mH^g|0S`yP)?5|2)L75i8e|J zDPI|B?%rY38Cv$D#y4rA7v6y858g9rNf}KCjlCj!?f*tZpIu9e6{)KorH(5QnUv#6GY7;JV8XldvcpaDQ-1^} znowZ+oqpM|*lwZai~oR*kJF#HVfUGIxr8m|1bX@#s*LZioQCVfez1Rh$X;UgByy>& z(hTxe_{moUlz>mm5s!Yr))fL3zVGv5bw_egI#0jnDmz9F&?BLX8R?9xmb*O8eeJx^MnB*6SmD=`{L ztIgORRy-9O%f)#qt2rRWqo^fWx9RJNO*F|@Va;!)vXxk#V3jB`p;s^OF2^qwJJas= zaFjJ0uCKA=*JD0zhhz}ws*Id@N12V*K0|+vX4RpU#?Eb;d%$iMqqt#+%VlyQI6ow` z*`6wFqVD`oHB^5`)pIa=d{C;6rZv4Z{s}DKt;H;fOW!SvfU}S6$o<(loKeh|s*qPJ zVLjwPdhH>-GO&Jy_VW+SMvX0DXM2~_J)amTPhyT4sgRUUVRB0RbtuQA1RMvf|HW-k zcxK5kEogShpYgW4W8DfL7?+h`m4LJPQ=mmkCjCZix7H+Sl%WY4{Yo$ zABs;LAG9B^wlGv&Jhs@EYRT>}Bj$-vFe|Pjc1F_~t%UK`Y|P64qSGJQK1;o-{z{w4 zF2te%c%7)TCJ#=ASB`mq)my&a*hy~fRL4mevfw)UZZv%J0l})?7_MKy`xJMKrU(|x zM%LXuF$;DU{;1s;yV%`W?J?^daX|30N7&4ux@txlSA1PN>*J)`1HbsHm5sgabM`CA zfaJ5{D{^M#!5G1br|^QW0QOH6Q}P;gzhagWSoDg4xrB5t@4?d;?}$cM`SM}_{p0UG7nz~2WaetJ~ZDm^XBPHGZ^}C zczvZK%P1)ag0>~lMJAcaf0>X&>LMQuFbXlYvs1?1uzLmPe10$sH2-WnOBbEA@wxn` z&Klkt9;6ODYAHNMRxe+l49A!obrN?fY>{mlXqjzMXvu8xx{$KicvkOEwc}_)od8TK zE2KmaaU{f_J-Wi&JfukDh};-c?Fh74A*uZ>s8T2O(l6TZ*=ESEpo;9D>F1s258;C4 zrYdj{HN`Ila>nIgi-N2BP6MqqjU;1-WHy94LwHvCpGQNI4Tn2r{7s^d*xH#X4F%4f zFIue)Lo33U=@k5k1zD0;D-(f>VtBIVb&ud!RekvGsy_5buIR0J-nmgk)weQDHG9}& zc}7T~mVl9;RVpOETrsYrz9f@OsHuJi=HMZ1H6zhQH9kwWr~5(A z7B8E|*OKrh$##lr-LC7hgNb~T5{Wi^BY6`8))&r8{itn-MlxZFXmkK&mX{_Qz#hpO zO%Bjg2e`DpVu_i=H@IKL^+?!;v7+^vcaAPuGyyBvoZ`rcOt9z6L*9={pT}|}tM^Iw zj5x-eZ7A`^#1z2()7?ZXvpdXSmF9F?DDebzO?%K^M6*iQm+He9R()t=wbSCpr_thW zhb7n#Fyu;7Au_Ii{+!&Cr89T;Mr;mfR_H@381R~ThyM406*zG)fi&L=&Q_7%@PJ!t692+8>4~Ad|YBq!37EaT5xC>?e&YO;?&b} zT_qtA>Uc`~8TPDTC9qz-`&M`7eTKTm4m&F)#Gz*|4%RCkhV$qF-XL{K^7g<0Rm_^R zsQ~u`2gbjLTYz zN!~V2Jj`(P`dP8p>-v>JwG{Wqr1}M7i!(~n2!aSS*4bG)VZ~rbo8Jsg+s#-8moG6w zAR(Y(stDAfBe?6eeX519Ev+!{@w5s1s&AwB^Xcr#$uaJ-P5gFrl>*7kn6L02o69$F zD^1T_DQ-mHH00~$jKE{$Q&#+PoXV_{JrNnjqBNdSZn(n*OQDb_&&C8J<~E*)bHBm(q|$Ld6D!#fGT7%l z`BoX2bqd#ID9*oT^?ShAD*Rh#f{|4f6H0z>6xHgHgI9q>{iRj6bRD zoW`xW9Ag2gYwG~;JF_4#ng7ZuNWH40L_+Q<*(L87yWgNtiMLJ(=IvEl@>KGV<_$}0 z7lQ0hN+{?kDeV4;RJ1s7KE|Qioow~E?M)#tD%T0R)cojEYKuEHt>`qRn5DH_Vr%z! zps}%138*Pj@d21{(5pTr&n%d*${eTts4(N(yr#)kzGv;D+xjx-DBkALm|Q~uzolnU zA4t@iv!M9)b1?cK{(b_P*R7^STF3XSj&ppk1Fs^IZ$Q{bJ{GHOv&IkP7hgrc`C()c zhvn?6YGm(}9F2oDiyqyg4=%y{$f2J4Ar(-t?+s?S*sSmq6*1(4n6Z(z1v0*tlM}My z{G#@ncIKS=FHQolM^5yv>~CBz54QTKda;=@H44rwE3JJiv}fKv$1D!BQEY@A@Nmp? zI+N#OUh3j`&h|Y$r?@bD!oaBtKGjrfpU_$pZO$$EJ=xx8}1EWgFp`DbZyph zOiOQl-&pOUU1no2N0+~8Q5IJe7r*AZ+R%M$Y85RFpSF_{hS`|YPucjN2#YuanP0;$ zZj%Q0k$i-jZ1dN~=t@!+*nwW2*{VKapPb>Rc$m#Uo8Iay=gj70ehA+xK%=xRzb;&a z@=)G)WS-xZZP<;gEl|CE*vbpwf62Cw30iZD!#IufB%4PSG|{mtKE;&_U)&K7$pMgr;DK<@4Z*!d2u6x<_|>m-|f|^{4x1;odHR z?Sk(*yKdz|W6ln*m?7R#BBgz!2c0GRlOdB&MLuO870;Fm5*!DZzYb9dIaIFt?z8o| zekRd@dl_SP_TBjk*Pqp@PTKuWdUNjlNWd*1Yy6q^1=1MOk-jq)?^3h4T^WEbaK9-y zS`4Rpwr2KA`wqFSn7Z#}nN=*oO2?60!TqMu*5}MuTz-GS#QicL^hZ{?Vjn)1iCOg*RU>WyfQLF8(?;*_fFZ(3-cM;t6b6 z?4C>0n%ACk23e60%#9e@P)(T`+rTTn49!u&J^{jb))$u;bygc{8_hN6{VU!lHZ*Ic zhc}qk@?f!^JaBEftrvrospm&Izz`wQi61X}_7E~)N`Y+L>}sqtT1{m~eXxVceR9Ua zP`-S+Vzv}&zBWUx^XE7BsXWo=_wG~P8=A>s%#l*ciDw)WeO&!m5UY*E^Ld4DgGJ`M zP&+8-LCPQWFt{)Lc&coY|M7Y~8@TuVR?mTWd=`_#754r81EI`f4;2NT_-esBk3Q9? zw=|rqJ}NOLz4<9#zB)uCz}g+3BsOw6x~a=9J*wa6YDd?DF`QWGcfF6X$MQ`Ldspn$ z=aZ0Yi1aHn@5|*k4!4Sz!t-k<#fKQ~3BR1Np64kB&mod+Qm(trzZQ|?ubOpwy$^Ao z+^mD8Hr_IrF{e>(Xv(ZXrh;w}iIjwxH`(0`<(N3%8&Md-w3Ag6Jt}O-_&1KJhT2zJ3h7O+_Ax8py1yQUvz;3iYU{sj@cVXgGLk>njJw=f zEPyf0=~krlqry;Fp8eaD+F6jBC5C`$Sk;Wm_N0P9yEnP91|;WeKkv#jcRep(<~$3% zFjvN=@aNoB^C`5eXxltI;1~$ZDYao3Y6&bY6~G1ujU5VUT^6W#@qSf&f4Z1Fx5D}q zo4z4;$|chk-W2ZD9i^)A+bBD{B-cAxb+Dr(i*~SZqAVzA?p}IjBOTTy#qS(YcKPok zz7)xq4x5vHzin|F9lyKP4-w+eNlL`X+iZk|yqO<(T|YmSrlr@w&aGk-znP=npkS5v zJzI{%`6oZB+poM29Z+@N%>$4opPoIV#oAFp6zWISLQk6h**z=JfmeHd8V zk|DV4h-9DXbFD6p+y~6Ldb2?^Tm>tPB|f!jtp-}<*R_x?Y_=LXg@2ponf^Y*Q$=&c zFty0%q8}cTp62yrWSwPM&^*z2K17SLG>g7Wnyc* z>-_AoqQS2W$W-KpMAbhV&f1gT1X&VoJtITVusB9`x zqM;+Bw4tMRfP4D`62W9cD1Y!|mpMG50+-3dKM+HiPnS>*YA3GD15J>W2q4R9x-A#8 zFRb?Cxsky-EUHQTItMtPu-xX;71i53Hf7v_uDV*-bfN2{1~;4>;ionBi+2l#LQeMv z)pAv3h*`z$_hbbp=IG+|2|gYi$yw725RPmMm>H^ov!N737m(V0&;pGD^u+-5F)96| zV!Uistmzzms!aBBqH3Ih1#sYlv$EFg<$7GVzh;JF`4E`^LI!`-fexrx{eq#IxfFy+ zEE}Ko+$M_kHTX$X?n6%N z5e6$9pOt;cLP%XJ``-P~kd{r5)VQ?Az?1QOX04%W5JYpz(2n>Baq(o1*-vfvV?Xr3lZRwwshG@b^kkG)U*VGSTUlfIt9@=ZM)1MD`{e+N5_?U&%-k%sg zrC|Ip+aCAKXlO#r{pr$}k;|I^Z0yib(L~WiAR6sYsEnWIbrGm{Ru*1|qhlw~6iEKe z_;=9RW_qj$_7fr!_{T_ojHlg^CJVB1S0xO-akz$($HLbKH@Ycq^DQV zEK@xB8O_@yT_N7?;vV|`D}oR|qvu*N%k&7OhZhVfKC!4-H=mkIt}R$!{i*_76O&Ct z>@qO^sPGCC!rk{eOItG`&EQ+@O4ydp$ZWtyw%cEYR+kR>;vlsmh3smW~koYdt9c3sHK|2fdmpkufzbE_&AF=A=s7 zQp#r}KV)P{4PU21FS{#3xh?x{U<*yi@LnaHvTf)wy?^Dg6hl}deMllwoOL45$YylV z+Q_s$EYXaoFbq-zi!AzXZxs1J7H@HN?0s-3baDc!65nHKTER1Ez12DwW1hDJIW5z$ zSfhzPT2>cvy87Kv@iSQRDYWU90gK^Rkj*%8T;Rx(B=K6vw4prv$8OVF8MEAX`0hWb zGN=iXwusyxx;TC%M~-bn$YxD=$ku#K{L(UTkd^(m zktBhu6dBH{3FG#1kx+Ku$OKRVYO*m01>uQ=(zt`UYA;_eea^9U&!=UVB(`6d>?5@{ z5(R3s%zWVEL-(g~=RHwKIx^`M1(|f<(B2$lWrkA*AjF5ZL!TvPY4iA9FHvJ48P&6e}}KWX^wbJ)gg%xPIFUysMOprb!uDsfQk?pETsx&Qj1+9Dn`B`B{3euwK`w|z9(e-KN@ zIjeoyWrU?pEW)6yQDHANoyNh&zEkjeJ-_Vt8*AA3ovJAIg0~FN%q`{C4 z%4m_t=|IC?A`&&0D;_XaVjk|^5&$mK2qpNYLSz$E;_UC~r}_fdfbvKoiVEy!-o(&s zOkldWI;z2`@XM>*l43-l6(Tg1VixxL^XSOX>nZ6%Go_L&b&ol}wyVK{fViGB3I1tq zkDiP)=aIc{n3G`0yoyq%waw*48k*@^C2cW;eu}U1>c|SnPURNMI7^y8`h|s*Z%}26 zf-Bcd@~})KO32vjGHv82jN7px>*wb1t|0cgcpb6HR-USeyuoJL=So{{|9GW%Y<4$4F%ePOP0Cj#f`wfqPc)bb-#0x%xGviA{Z{3(apkW0Qhf%j370i`DHOOx{D=cf{3-pLhfet( znP=8GltyzgYgnhytNW=le8g=wEx~WUuTKV>-~_+(OllIO4-$jrf@P-gblmk7UTf

C?+j)_q8Myp*7=md>;f6Thn!sOc73ZLHYN)4jh$~kAJ=P*rI6+tTeu4KJiID#M#fTO{}_AilmE%3Vic^ zX)&_DHqj40IXjz{vksqNZt5JBEiYLqxb`|!@!xo&_zKclu&q;?f}2Xsnq)}0b`gE@9onyG%SoxKUFOBN6gfk z68;Gs@Q^TZIA?VV&bv<~GUp_TC43n{_oPes;)2knl|j`BL1;rI66bzxUb>dcQK!TfuKk!EnE@XK?Uaf=D#Q zqRE6&i?A>iI9qj;Wg;+5>c_iDwG7c`zQsM);@^jq1(5;VSSGxT&MVg)^|(DU^pHS65jE=fet0X}Ksmymw5J!-{N2@G$d`G@1 zxUz=mH~=8(%!qS=&Y6E#6xY>%>?pU-d%J?RmFgpWVBrI?D;e8)xr&e}t72 zZHsA((IM+S_xM-Nyze{`ckRiXs0&Ngq#_Y@kq9~lg5)j9;*(#h`}lL{3ks(!Rpa>x z>OBzA>DYBw`_a8Ur6a(ez5Bzn`!;GPli34USC4E|ZlsjT&NL1%vh#3Fsf85g-;Ivp zuGG~(JEIVK?3sn$MPfm+CrMI1Rb@Y7Ct{$r?`-FvK(u$=+ zF&bkzhtbz=BKw_>#y+G?YIF|47gy@R0JIPsQSpTNb8hoqh^c{}Zy~meF zD!Y>+w~3cWO5XKKw`W+M7yXQ9N73G0#XhcnO1J&dd&u3>iz6a0&kGNcmC0oluL}>9 zgLs*e2qHsCq~PN3OP|yG+1_uCkyvWk-g_FGlgnqRo);a}N_dw?2AN8?4a<{0?ZVf= zJ4jeI)d`aE6sdHLiYI%8`!bdG*nR7KF%gD3ioDfV%b4X3LLu76r`ctl7j-^D2R|IU zi#>knmFJg)gj8P#Gr#V9j5B*H8zE?MtG(~Dd^!EYr^Y8UD&m3oCNC>$72(VWi1K>WKS65VZPj{aq+sJnwR(Sgu00Hnv3ad zz~IpTqzdoGEAi%NrvJu&DZhV-?lI~ITIupG;oW(ei4*5Dc0>2IuJ!hWs&Dqzt6mP!*r|^0lq_`~UgUfHa%{$PkIp(= zdal51Z~t|7l<*EZMNGKb9Z1sTAt0$CsI6ch3VAQGst@Iz@*8-uSmT41c23 zE z{&^2sxw|vt8!?}|+@n=$Hja(rB5@XM|MBxXq4xS%)j}`F@SU(?^@HGE!0#LE%Pk>_%7Y+$As_odyV52D+W;YLHrT#lUG@o z(1zFgy)sIkh>L4Q&}$gZWGNv2Vx8mWMextwyu>MRK}m$rF*k>p9V)2+@cX$ zhvWXQI*$Jzo!6i0xH}D-gsG#k1N<$*&f!l@n%!Vdjni zFb;>PqcKXd2H@v}0J(U%!6?gJfIAaA7YGF82Z1?x?&NKzHtGN{1jxtF2j)fz$;4rA zWlZ7b76E=Lp>bw6C&fU5GIpLp# z0e66)#CT#bN7K9U|4m!P$q4a>?oM1*;?Cm^HZD{)vJ$Fx(y|f?u(v3wpb08_IKu4@ zvx=J|Ds@>~GrK!&Ak@?xjuI%k0hmNlDo|6Dd{oH+r3{7Jngf{ro`y22IK6#qZEAA| zBIlimP~%QLsKX1nOM(-?#mfui<3=H-heDknCHCXw1)|{Nf&jR<_<>-4E)YLz1O!2i z@NogTxOgBusC#bIJp?t*%Lf77$^E#wd4XU~E^aVt3=Bg3a|6J9d_Z0veh4>!pP%dB z;_CeYGC&8+%>_iM0Qo_81`r740R{u_>WkV5%7EGm1k4TO2Sd0)dZ=PZX#BDJzbo)> zD%$^_ija$+``<-`da-&|{+xtCyN9eJrY_3$0#n2i)$h?bPP%n{d=;XAoW>#hAALIG z3lP=5LBXd&*|H;F3daXU4`7VrdH(uvxP2Be>r-1krZ3K7TZITHDn+6Pfa2r0AK&OCbO&RDhl^Lvi5EkdJ@@!AINnvmNW|4HKnF zHulhiPrZ}!jnS%?st~=bD7UXs@5;Q!$~G&alz+q;C3%vBN)Muc%WZFoI^Np!6u!UO zfiAMI8J+DH4;1;_ZY&aKaBXa0RFkAmw8Z%9vz;{DQ!qC*BKlyNqdX(p_J8~OPJjA; zA^X3`d&gQ-fBnB{E@_Jr7~iq-Uu0&N`Ma@AV5lBLQ6D8U{iCa(s8XxisoBEsG|2!I zE&ikNTpdi!a8O;!g@bC=zdisS2n0n(Gr+%WD6;-B2C)5?4GclG@ZIO%Y@Gb42K^5k z2n2?p*!&+hPEHW_|JrzX{@2FKh1%_Z&E@6!?{z_(U|y7<_CNeUr~~M~5FC1XNhPWO E1I7{V$^ZZW literal 0 HcmV?d00001 diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py index a0550bde9..4d6890653 100644 --- a/src/paperless_tesseract/tests/test_parser.py +++ b/src/paperless_tesseract/tests/test_parser.py @@ -588,6 +588,39 @@ class TestParser(DirectoriesMixin, TestCase): params = parser.construct_ocrmypdf_parameters("", "", "", "") self.assertNotIn("deskew", params) + def test_rtl_language_detection(self): + """ + GIVEN: + - File with text in an RTL language + WHEN: + - Document is parsed + THEN: + - Text from the document is extracted + """ + parser = RasterisedDocumentParser(None) + with mock.patch.object( + parser, + "construct_ocrmypdf_parameters", + wraps=parser.construct_ocrmypdf_parameters, + ) as wrapped: + + parser.parse( + os.path.join(self.SAMPLE_FILES, "rtl-test.pdf"), + "application/pdf", + ) + + # There isn't a good way to actually check this working, with RTL correctly return + # as it would require tesseract-ocr-ara installed for everyone running the + # test suite. This test does provide the coverage though and attempts to ensure + # the force OCR happens + self.assertIsNotNone(parser.get_text()) + + self.assertEqual(parser.construct_ocrmypdf_parameters.call_count, 2) + # Check the last call kwargs + self.assertTrue( + parser.construct_ocrmypdf_parameters.call_args.kwargs["safe_fallback"], + ) + class TestParserFileTypes(DirectoriesMixin, TestCase):