From 8c2fa52315328a0b7251ee4c549c3f5fbec5d158 Mon Sep 17 00:00:00 2001 From: phail Date: Thu, 3 Nov 2022 00:58:36 +0100 Subject: [PATCH] put parser into setup make test using convert optional Gotenberg live testing --- .github/workflows/ci.yml | 4 + src/paperless_mail/tests/samples/first.pdf | Bin 0 -> 6714 bytes src/paperless_mail/tests/samples/second.pdf | Bin 0 -> 6723 bytes .../tests/samples/simple_text.eml.pdf | Bin 0 -> 22301 bytes .../tests/samples/simple_text.eml.pdf.webp | Bin 0 -> 5340 bytes src/paperless_mail/tests/test_parsers.py | 195 +++++++++------- src/paperless_mail/tests/test_parsers_live.py | 220 ++++++++++++++++++ 7 files changed, 329 insertions(+), 90 deletions(-) create mode 100644 src/paperless_mail/tests/samples/first.pdf create mode 100644 src/paperless_mail/tests/samples/second.pdf create mode 100644 src/paperless_mail/tests/samples/simple_text.eml.pdf create mode 100644 src/paperless_mail/tests/samples/simple_text.eml.pdf.webp create mode 100644 src/paperless_mail/tests/test_parsers_live.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c4965a348..8e506c8aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,6 +97,10 @@ jobs: PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }} PAPERLESS_MAIL_TEST_USER: ${{ secrets.TEST_MAIL_USER }} PAPERLESS_MAIL_TEST_PASSWD: ${{ secrets.TEST_MAIL_PASSWD }} + # Skip Tests which require convert + PAPERLESS_TEST_SKIP_CONVERT: 1 + # Enable Gotenberg end to end testing + GOTENBERG_LIVE: 1 steps: - name: Checkout diff --git a/src/paperless_mail/tests/samples/first.pdf b/src/paperless_mail/tests/samples/first.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4f74613f97a8cf297ce4c453e346b34c54a5364c GIT binary patch literal 6714 zcmb7}2{@Ep`@k&~O`*t69$C`DEM|tW4`YzXE<_E+G?p2Sv4&KNl9VOc${Jr7E2Zi3!Q}wzT8=eEYABNP#qePK&R3GIq=?uMs@afB7zT~ zs3|A}u%K8JSm4Ze^bcV&m4YMCL3=nA1Pp~UVt()1gII&i|S2t0%L;1)@QI33Kd+3GfH6w zff^B=$pmexA7IB=M`2J3Fy;j)BM}?7_P=FnCNgY!(ZMG4 zfcPb+L=Qvifn8y9PtB7BTk7P+ah*LqOPzw@y5~<^f4?fJRKfXrAZG-#@^FqX_V^%I z*jE3Up_heaKETa=I1%NigLByBnJ(9+=fw|$Lqk|q-o0Kq&hCCSR(T0J(#jpjF(eWz zvdT{6z4Kb{=2y;HzbgX%+H0E{@{Z*@N?n#tls+B3OB!>n@mgba=Hm~YzN(~5OX<6c z&lH5oi(iq#WO?1pqxx^+p?wcI`2Cit%-06;C6M}U!G z9MQ{(M)m|x-g?H32_E3#Fg3!0*8zQklbgI5k>u@8U@W^6NM2yQ)&;eh9VCxLqZE`8 zFh$V3JOXSbBpe1up#d}+u7F0sVHiaKql{5dLZTVB_7JE6cyWz!@j%_fyVX|FcEl;IUCcGF;ne8TfkhLI1Cwd17R$>dzfi}Fppz#)BUtOdOcYF)L|HB(-0-%nAs z5`Wb3zN5TAwWN@;b~3wcx--99QzfNiimsy4LC;o9DYpwhJzM$twDgvu{2BcR$7dJR zf+q&No~~-kNjh)o(=+2F&)?jCNb|t*mVn)1&t51MmH58BDp-{koU&u^QgB^w@4~f` zlxw9KouuiotHKc|DE6zz$>DB^E3-$}ZeOWLIPhMIn&%&gIU@kt1Ir zT|pXQ$!j-E&uXjXYtiK;?~DHP_^51lO`+bjx&zR^@eCxiWNonoUHl1=s0!QO5UOgVq zORq=TTUlYP&en$4N>t$t$*21zGX7D^RI*x6`%k<`aG!n*`kjNJPBFR7Bq90DEJfER12y_1kBVnrjk;162vZ2FJ7R ze2kR1b7Q2%x(y|2&+M^1Ze12Y?cUbW{BYz#3ADy@JR&d8 z(y|Mms9O;=*0oaSkyYwgnO#tg^zGPV-B3I1?eo04{8Fu>opVcD>#drPx#pjJ&u-!w z7V7O_nP#cK8*;&t2R40RY5TE}ttYZ~t-0x_vM;LzVSMUZAFQUA=P%hu&T-mTD}2h8 z;(MfLZK1YRD5fN&$t4w;L{KXXX-!JWGVJE(XWhjghb|Bv%K)nDQ~W5@M;47Yq<#r{PO9}?pm=JotW>_NeW_D;TXi{N@VTz& z=REk>8P$4sPkuRHwzmma6mlYiy-_mA`!;WGf0?Pe8{g4*kR*EyXkaa}Vm~@fDGtv- z9btF1^fbMEVX_m$s`BK9TgGX*LB)Wj67`IX3~QbT$;Q2BA0SsQ9$oWAuJRzjm63;G_C}p+qW6q49^8n=>K)S^Sev0z^q+E$5_)j==XEsJ<9P zuh!IiG(mP)O?8`XU1(mKUX{AlbNMaCrVrD8z*hZ{s>hA$&BBwD!rML`EblOm@E|w@ zY{?Ry^xsApdn{=`-gsQ~Mx04WS$+nzvm%Sye$gjlX*o;glSq7S&(PRCn{64T&V8?3 ztz!m9U2WZ4X1q8Dv#Lryq(C{CLpSFEBx>t7g#cFL~ zUFv@pv!mZHCu$B%(Bk6EY;9+0RJQgl4~r$QZ5q4DQN{O(_ds7j(DmIy8CjkgjuD2e z`6Kr+pH{=pxSaVIlK-)kg1=uvPSfz%qMm|wN+^L_7bankkgNW|^N3GzHEP@v>Qei% zEiA*!`+AK}Z==JL^QzO+Xfs<&3p-83eZfv;f!tmZ>=voJF4s9q%h+~|yO$=!c|>Vb zlyfhLMjPBTC-?=fS)mD3`!YiHbOK`95{Gir>P&wbFDiKv`V=KRsKA$-c#M*9sc1o{Epd8cC}3pv#qr{X<#Ej- zH_i{KxV^p)6H8AVeYL~PY;Cg4DN~|tpE8kzg=??!#)hTskRzpuh0-ho z>sX0Ou|TYo37+sINo%iYx;C`$$k4gG5tS!dT#Fhj(yN8d_k_CwV;v31x*Y*#>^JzL z2N!(>{e?SD+`I5V@1?=h>0Plkz?4Y%TZ!-h$Xy^_*WPfv`>^Se3;I^FQ!0q#-N2M$ zi?&gG&Qob(;$ZqMtSd5GH%Q==TfwCjd;jKLNjXnttF~*jm}*NO_KB9{1%fhf<0Q|> zNveBViD=z;>%7@!D~ji_$VH5LBTv4HI~T9S;`>k^RB3tXxgxBVp3W3h#N%8UQG>axf&;soR)@;S@DeV7SD^^Z=TPv9yl4|8w2u5pdb z@=E7a@gqZr%@&y`%l$!F8aixQK4Su``?R$hSVV*_$mX`l`W1vkZB}#HY*Jj$T^kjf zfoIR*kMZWuyIh|w_-cE}j+1;WwqhwWMbGs2@64b*e|fP?($DrRPi}Afm-{{3eg!+0 zw&WFt*4vhSa5mF3eMTD*8s@GqG3hB)!* zA8R9O{|Y}3q`@!T{p`?g+`*9ABzNDy;lc4HxXU0*fe`M9lX_sB#HwC&-quf+qJdR! zT(TQTDk-j`m75=wu@5FKiC}q@IK|F~dswtQ{B|sj58qf>x!rmYmSzHN z1CX7$t@-|6->78#!(GYI6%nW067-M6qi=ied`^o!U(az=ZLZ0*d2eV}4u@3{XBS5h zu{gd6rP^iam-kjoTMMsC24yHDB4mn($+u~YkO~T*Z_20$z&t+MS(qWbsfx0MO>wUqz+SOby+tq=P zs)%+|Ss!PfPO&Xhyxf;^6ZS5wX7^xU7kptak&CAb)!aFJVQ(%+TE~OMHjlbC+cpK4 z`<2_5xLVA7GY$s2;L3(&c;sVd2-jJ~0pE^&T6Vx5U zNQGN@9!lD@`%QrUC%1N3*~D)>u)Ui_(S--cBvjm zbJLca6|8Thvb!KpT~1O%cjNm*JbGvkzqzC~^{4>ZHeq7w`eooBbJ)`|9JBdl)` zw&Y^dp%NQA*$8ut6*f}UA8Qtv9%LDwK5>g<**c@+*ejRkY23|~3f!?Fr_d1lWwnb( zs)Pt=$B&hxQsg-987{W3R>GP-_zISZNd3Cq_&!(kqe!bUMK_cEcci*|BIR`tQ1+?1 zHkc66ITr%>VgrP$tO98^HwUF%UEd3RbU&6TGifb<%LiAm*=|6@$Gt2q|K-VqBI1Fm z>n&yM>Ro3`?wLA8y_fWUS7US%+p3C*5{Q#dmMF=$6===nGJn6*SXX2iIG7@r#%fxK zKYa7p?t~KOCS02trCIs|z{}k@6f)F#*bfCY9;$#J!G2A9Je63)Bl^CHSB7^bH3~9X0M=^ zvcrxyR(??u|0aKp+E&4vmU-ayu$i1g^XJ2_Rih~bbCPj+4UQ7}0x!8U&Ps(W1mK3f~dd{zIvwE|)v%uc*v&vQ(?KKtIzDnI$z*srCMqZ%pSxF7d4y*V%=MIOp; zyxy>7%u`k=npOSNsB6gyS4MJZcNXkU>>%aL z`R^QSM~Mm?;3e;y8^=`W^CGiV(s-#C32Jkg_?>xrx-RvL=D|8!ui0;{7)=Z9e@;hV zqbG2m`Myp2=s*@*IIMkdL-E5P#PXr;BmsXi0U%i;kwMPl56EFb zs7TxgK#$Ozi8L~W1jrdUgBXKM_Xj7`4G;)} zrtH8Gjb=WgG0Z^;u|6oS4@l+!TVF#lHiyBG>yyY0Jv?&~y{?Bwtv}(`2c`94T>+u1 zv5p{rI0%-JndqPCaq^}y04}o{%%m}rL8cIY7R;0C$tXVa#d!M@?lE}OIR<&Rzh-4zdpEz>RgN-ryO6Gk*_Yb~Q-Lad`*WMU)ekay`G_bc|09+ZuXomF*K%-EKD8L2yImZZwaRHQ{b8sXGPXC-k zg9!ECb1*mtM9Y85!4M$d{P&!qB1qW(ltW>Z|K1mk`b!Rm0X_SdE)0nVYx1W(82ax$ zU>Nvcalql2zv4h4kblL2LZZO_`g0F79Pw9dCUx2M;>!>0N5$YAV6h) kaBlqQU`D0V0S1C*;#V&^fktPR35`$!iL#80?h!oXznpN;>Hq)$ literal 0 HcmV?d00001 diff --git a/src/paperless_mail/tests/samples/second.pdf b/src/paperless_mail/tests/samples/second.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2955c8d5d133267d97e5279dc7a30886fdd95fb9 GIT binary patch literal 6723 zcmb7}2{@GB+s74!K`47fcWsN+zW7!)dY6r!XA z!4n;5tHXbf!q)>2NUPzfI5!djTntCRQvd{LVEo$znZG7z_)xV>sNjMSX%iKIO7^1N z=mStGC>$bfj{{&*P%sOX%~l0#0yyH_D0CYf#TAe?AQADu;z4e?w+5G|Qt@OWAgzw~ zcDBbGYpL4)2paWQ476Bj5tH#m>Q>0oY9t~R>{0+(dJ59UcnZmjY!BuHgKmzf5Q!x4 zIgGXy#t>{(p*aaEFyinmI)4^6`Ef7pOpX|(@ZtNyy#Y%XaN1w z$kOnv3*Mgo05tklP*wM42w=A{aC%x)>%3L#8F5JWOY2np6u)a9=^?@8{pi*^KnvN1-y=H!KEWu@S7I0~E!0d=%FC<#Y^&Jh?{83Z`0hV#(II}@D#!`Ot1cQ*%Q z5Kg$9y zo|k5_FzRNWxLG9^DG`Q{E0Ab?FnRx5Y4()dRHLkX?EAaXlOxdg(XFwQCu-CEXVwK- zohKO&rVdIk`L=#mn5p~}Bz%<%9y2A(f1zyUZlbQTW-xTA8>l@5eCR)5#XT5R&=N)onW~9lqKOLjpo=iA3)A)@G~+E9)fQ+w64nWbGI} zbWq=BrE?FDY+Y<{nd?;J#h3$jpFdCc`rHX_-!sQ%Q3RRqPvuQ?;1diAZ&i5sITYzW zKb-ykU;fkMLgL3tzGumN79H>GADEfFeO0Q^!ygqQ70a+Y=kkk?+Pg0~GjzBFsP}uC zm&qC(WbHTO7XN;>@trJG9;$r5bmDG#W!{Rab(QbCXBS=j1jlCm_6r`RRxJ*0%r^Od zrc^l*gL=o!*mye%zmWPK4-8rVd!D!T^t!p&*cl;WW0PW_X~qQX3N)Ud_?G+L<_Dy@!S|--SDI7wUl3wjS+$Q3`qRH&kM*W zSi%NH(iA?p-K5q|*Bl6Du(>+LR85J-9FFU0BPE;hcI&)#S;;wO;(4j~*6zgmM_5EE zY$!7Qs#s!7`Pwy$kE14Xabr(*j7yJ*nuc1CGbe(R(KWOtJuq9kfAaj zJ$`XV#f0i9lY!34BV`u{J(i)8l9TaMr%%gyeESk_yykMRAGh@2fO%4)@jk18;l-W< z1B0E5Z{Xp}OxlmQ7hmcbGm!a&w|_K#@gnE!sl4F}U978p7E8#N==K5sqJTwIV2gXH zq?`MK;5Ugmg_xqTG4{ZH19CxnIv<)PcQ#c#s2kcbu@>qlZ}duZCHi6LfudX|ZliSh zR4nJ!C6nXR0o%UsuHW-%>`lQ_1`gAKimy^V59b#WE-7B}w^R0==R0>2?cQ6iW9Q*} z)oN12Xl7?)JO6-9PrXr;w4-_*TIcE4(=`dc-+gYbE2;rG7rKoXQ`z@~A&A+?t_4imFpJb_13Cp9k?<4B3St!*}z4qVj z=jck)n)t`u9A;_S%c4iZyp1SGLMSDOy1JOf&z6Jzr08eol6Qaz{;N@Z7`Ji4?-qMh@b+?12KS z%E9S!`L;&gu8m~yFd#Vd3FX4%5$ zqiLkZ6wNri{&7klty3l?mbb;5HlgCmZf19oOXjYDEa#Kp>Lk+4b>zr$W-kHQ`g_iv~Wn-tR zy1FN3<_m>vD$fq;^BTHtEH1{5c5*e%hfow0e?`MpIL9;C!Rfkn2$OLeuK1MXt~EH@n5$K2=ZL4ZRp-Q_<=qQb zyLHEt&^C`LU5y(hwQGIKZ6-|BS9fRk?Qku5c1iXAliSDjEtd9+Xk#}B@EeC1jatvKWO*!nUiHJINJNA3xWvO7N-P=Zv6^u_ z_L?`AgcZW~AillqRp0;Q=JsA>Fgl)ZI67x%PyYq{_G$x_p3<9AD%F059SH3sE0!G` zA4F3QOt+N3d{O^i>`2;6^NdLeR^YAiKyWCEcbM%C*e0fOU*lQqR{@whz;*B$N% zgb0YL-gIE*7ZrYExKmqT_OkICw#xJDme;O~e&EerIn%|`JbpyQ35j!!WXdEkmrZ8v zjVR}Ks?T)42;Vz^zXfqg^$28azEs;N+q`SH^3)gdnAf%lyB^`_??8aVo1!;SxZ^BC zDwd@j>D!Wf*9p^}^Z&4W6Y(SMSOdcb%Zl9t$KxZX0~yW#r+`6b@tJ6>fl( zY)fx?XQID4Y=w0h?x;>P%~TH)-&^}|yZMWE?ab~ur7YTewx6<*?>}?%U*~w{=+axOfepHXr7C_L}>PFq=_ih`RZ{PI374 z1)dt^9R#$@AL=Vg?7ge>y**8BFwt-zp zuk%~Z2Ug?GH?ytfGd@#CSJh@gK zXKdjdv|A;xsi#R1xKMTZy*QtI37=CegO>(pUcq&-x%E3qD({L`7)!;qGPr7z05 zkPo(li=aJRpc_>il)3MzPgbu+O{&$@WwyeJ^2{j){XMIXQmT)I`~O3*NWJhjCBoCU za%olUe(U{Xf$WMzmb|qqXL;(8D>)@O8ZU?R1Z4Yma_2FY#P~&K?27JF@+k^6Kk!_I z?}qlSW=b=(R;;eYDNZW-atoAnuujV$`(S!un;oZObVam#7rFMEdFG2QIpB^}m_VR* zImbjVbmpQRXMp^3g^Fjp6elxVCJYpWWNXYVU%W5TeeM)n#<+?%d*)y&^&uCSQ!O|+ zhQ9pTFLPSek3ARzxIFVKh$>~h0>7RdyAQ9~zAI9p=j@}&VSkS1IpGbztJkyLHTYA` z+-Mp(!4zGp@v`v~Y@Hg#?RY5s6{Ct%v&&c>d)y(jnQm^p@N=K5#Jnhlh{Pq4>%o8*pr)yur9OSFm#SQv5EYAMHE zwyupW+*P*sfFxgqn-<2n(mP8`&cj?$XUatJ7VD$#%eE!tcH%yDwPs+`A^$ zd+0_K{IP+h+}^C{Q}?vepf*a0rDtE5HtG=TxR#Cf?CN+FJ=hj&_emw#{_#7t{U!uq z<>!(f`4W#|`_W0~vZf22Dn?cM6N{Us} z-H-XOW{;pqJ)Nd-u~&JPoHf2oA!@S?Nm3ha^H5mCWQN62C$YwY$JT<0N~hz+T9QPQ zR{b0d_E3zxYYj0GZ)7~&54RC?V6*mk%kk+3@i`xuo{yu_-OEBl&v`xnx$$ngR{V*R zb-WC+_f$^8!DB(wc3j4X_SXvXos3)ZxTSYzTUyJ4UH$D?5xyen(X=a~CmF85{m19( zgOclS@wbg;>aZ_lFEPESy7D14iMKu{9xL>IJ65m`(NBs6t{dhny}=#lTNHfmm4&^z zaz9c*<@sKNn+5aFP2)@#btlHTc|4_7^qGU%c#`J})elccefXCrCF!eT!fLF;o|&b{ zU6YYL%uzcko;r!g)SX`@!o{n*IJ+XF+MD9Ik-0@}{b}~;0e9HXTuAyVFAClHIze=C zkCg4h9>v#Hx2UHQy;9oyzf{e1?$gvCz1!uTT9RP-5t{o^u=enPe?qQL(s(_lvz0Tuil$7^f!q8tK`~Iax!qx zs_1h0s$Pf|POO#VPIJ58LJgDCxr2c-s9w@Bg{1BhyCZAE#s2E+ff@~djL8{u0?-`C zBj3J7=3Z|Qk98r))$9oO4F8_ar}o&Ugu7TI@!BFWopSm1>yHj%rV}mDs85QY;86sr8|S(9p3CEMNghq$RXLX|z6947(_;*j+3y7WkDxUU@YJa*k&*^4o9KEMKX>Ef=&e*mQ(^1G{K8r zg#UOB-<);U@lW%uS@eeP0|xp4$uQ{38f?)BdKZP~%qMuy%%G4uvWivbFi z*&LKX(R=949tuwHq0~1Aw|Z!L4-Pt`v&bI-g4;+h^w09xdy#1*m%bbHqS1=sOvL}( zFb|RkZS(0L+Si|mk4B|7$<_a&Z(4;xDE+?)TZ0G|lqTArqLbAAf!-W&L@MntPylNf z9npf|&XiWYh0p93Z*hKLVXOCJpEd|KZ$v`(R8WQ2`o=rbAc?6-TDhw+RK81rhI&BN~R9 GkpBUk(v literal 0 HcmV?d00001 diff --git a/src/paperless_mail/tests/samples/simple_text.eml.pdf b/src/paperless_mail/tests/samples/simple_text.eml.pdf new file mode 100644 index 0000000000000000000000000000000000000000..678e6df42eb3d69a13c6b209bef01494c5847240 GIT binary patch literal 22301 zcmc({1z40@7eA^}(o&M*&>%I#1Pm$N-6bs|-5^pTt#nC;fOJZSv`8y0Dk;*S2ndMx z9rS$S(f>K$_rLdfF8aXCdUvd~_FBKa_u6~COezvmoDeP;4%4f(l{p*;2n@0}y^SL% z$Sv+!_EZ|-Vl0T9)#txULqx*%H!7y&#vEp26QCt>1Z z0b-Tl=K=HZKp?;`Fqj7dWe0;_}t`7$4fDLiD z)hx_hK>82_7{mwW<$@rPNInn`ADjyb{P2SKcmTCP02)wI6a*;bNBd?jfS5oBAZ|@H zSsc_a5UZ_;m5qx%KkCnQXM0-M_m5pzqCZbZ zyZdoGXjwDh&|I0BB8PN7qj*q80m_|)nRl3oGgmvUYxFV`Sb7K8hp}GcWnG;6q{(U- z#L1VQb)({KQ5Si$0oNuXqxSBtr+W6hw<{PPif(NoK7ICV3eOltx>$))x*^7R84vw^?EXIe- z9=2SaG;kX4tnn`ncR7t&z4SK`6*;r2Rzk95RDr^Xjb=9J}vfa5Av!52T%*+3Zb;ic(rk2#O_3 zf1nrZ99{01_tFS#osE5#K)+X!@7KXd@TdlEPHfl|wCCWkvTT)cKuX29@m7EYBh zaxC}ll=MC)k7;x5gE?c_Yf_sVknkl>NC@A#l-~E|fx52RHd~qz^{cio(__iV8t1$3 zV+_{6l1I3&9FXhzTxYetVoRH3)!cXe`DZgQ=_)m!m!|bqZ*@Dz<>afLiRpT+V#3!4 zFRM2crp*UN3}v_tzIG6u8f3y53R7APq8@JWb?R zpD0zS8J?+!lH!+o-+Pp_v9LYGeI|JN(fq&>`eh-@i2rMxJfv^|T(2mL$pOZwY% zo-=F9f0vEtr^o%HZ16wHcGee98~Wd5JLU8b((s*?=HGnnmyzQ60n-0worph3!*kXb zPaFClr8$L{-^PaLEDrLZEa{(`{Z*PDR{d|%@cv1fUrgfPaPoKfJ>~TGvEez3gV6s8 zIe7j{PWc+4p(6iVE z{kNI-S8;wi(LYWQkUuRnpl4kN`fslDm)}Bv&bOz${?TuLu=Njq3;o5x{tbEmpv%)P ze%Iw$WQF}t3ketsF#ij`h5Tvt1U>6vr!D<&mVV0W?~V&S3$2iUgCPlfGgn(+!vcg6 zOzbQ{tQK~hn(C9r&KIF%P^Szhv^Bm~M{O}#z zK&SYn20Hb7;Lj<3gZ|Rrk1Y-Cw`JD%t(u0D1+W)Ya6VW4NJfNVdk z0$6lqwjcg>mI}!B-7L@Q7r^wtT;gn5K(_C$`b$|rwjX};GZ+AbK>xGW z`5vV>HTAQ$e+njlfH#zLsoA4q;PRmFRt-SUX@mszLShxis z=Z`4PU(Ei1#xn{yGZ%f2#zKHHP#6%#|EGv8F9HFqfPRK-m+p8PXpWtH&^u}jhAO9a zc=&bT@uX2mkXkIqImelsT4jcaQ?gpIQ9aQft{hfVkYwtZ!W*#8yrsA3F!rvb$nDe3 zA-NX~S&myn%=f#FHodP`czKl_O?Yj6+B3~)9-p=@*%I6!yiY9F|M1*zdfNZ}1wKM|1wd zeaG-_>Yfe<8e#8o=ulefmZ4n8<)P)<8P`%{YS|Y`bnkE1*D-r(y>*tnSIb9zL}jpb zhu?mDG%ED#H>D7VOUqoB$u5h$`oi|e@9V84?~&93$*+%nS)X*yC9h1*g#4Ao(q0(BA-elhO`)aq7v@trihE3=ma%=A zFjDa&N5juir7w6G;0lS}B*W`v;c<*ElN6|Wh5wS{4rk)Ik(T+68VlJ zLsfUKUxdlt9f zd1SwMA)eMOujdvgcSSfST9~$ei5{CV^l?-RV*uXuLagX4bC_q8dF%zASkh?EMdU}< ze6jlQP9ok?g)Kc~x%HAxOz=zJu>PIm^`Qr?lXH3AVEdY`y`QACwJP#a;CjmTjAtua4|e*~!UXqyuT;mO#B)TL@dK2D_+Fbxwk0fW#I0H-|jAHF1(p4nwO<16tI=uOG=!RKHHt} zpt-lMJ&u44fH9+!WB0fvP{973kvwasY*xM=SA;338l01j&=f8Oymw07)abk zPcdl_@6h~V-vw#A5gVsVLo)ep$y}j=b3NNwUxL5fIpOzBf4tLm>?xeco!48gNx@>% zBXC9d8J|-SdhDG$WIMum@5Fdu9|Jr|>T#A$c)#Fev5qWp6fi!0jP6+RN=)rVyaVAv zKiiz(lwZlEcNzI9D;S-dU_&{s=n22&py+{J;+L)o$1(|Gh~OQXnEDHy-$>J4Y4`0x zU4nN63K<@RcOg4zO%%0!(dcf}J+9GG3yZ3fEz#kg&IRAkcCZof&Zr3~i=lElPa8b7 z&1Tpgyd&_% z;sKGn)+Zy-K>RM#rnD0+W{%Y5ZQkgKuqn-~t0HAD^qcF*eS_bvJI);r%)nzJhm_+h zX2q{-^P(OLp)IsV%ZPe6^G^47}D=7l!l|}T#w*@bYc5|y1 z%Xx-;b4x{v`#Ubu^kmz#%TTqv?%|C&S)F?L_?>Uq(6_C`T{gCY1~ zQ_)2<+aj3{{M$G&zg;>14b~=7m;gO%y;#dHkr&7s6+Ru{?;0y-bmce@}#|jS`b3 zQ8zGF>EIc4GDXwStPmy(1&!r4SAZE_1kWvVhx=_N?65fLi}-k}?~wGPP>U4anb9O$&g)2Im-pN z*T&@Z%|-ZbJd(Jm!2Y3%=*-{WCh!?m392+??=@%g-`n&s~`W6PPTZou2vZOiYB5t+aPG~-DE zHKmO;DUH53-B&>pU9&h@y66$w_JW+`emvd{1wZnre)7#=vcAvlI076K;uuOAi{eP9 zLct3Ox^skh30xI8@{}AE?WUL(cyg4_(}uY97zKxnN9c8u2$`k`vKTL11}6>8B1r_T z{XF@T`eq*?SnUFkw%|tmFbYy0kK{IWx^2dQN8|qO{T1MVwp~duJ{AUE9?x@2aA2D` zrl&VuEj}})F?~P}g@SPu^ajgfb(_Z(!PN~G!DRFW3c-6(S1B(}5m3l;w(A&qOcD@2 z+$N?qjmiziqI^JUOVkafu=Q8PlbTP7#*@OUy~xmH?2)NTVdrlm<>A*he2qd4BY+Zb zem+pt7QB6NMugZL^AR5AZY?(2`mjg`$8N2tlt@k#oqKXt2GJppjO8%P0N1WnK=s?p zbOQv<>Yv5zaUZYJFp*nePHQ?YY*R1Eixppn z*}Iu{!~7_yC8Q-Uj_iwTpi4-72$X?Je4tAer3g@g-Up@8Asm*(9jMXiutmRIx`oc- zG~5fy#6QNmca`9n;^2J%q0@_Ce_p$fduUi_0%+Wr2m@m?V@bUdw6@BN{a=htjbR?R zQka+qw%eFOXlWSLc(6K!j_Z{HNrZwei>hceepN{!&V29Z{q_97nV(_4Q}VFAZ-L>* z8B(*po*1UIt@W>@-m&08T|}nNrSz&1)o)4Pm6Z4D>vG;Fx?A@u=E|k_V|TLLb~QD{Zk9n4iz&j`MJH~GA~OmKfb#-xr5Bw&7|xX;6MZTTecGbg3*TL|=ddu@oL${8Z(SX?vB@X_G z)l}ijm(Vyj7}jW}7}lDqu2Sn-4*B3g{0A4joszj~XhiIu=DM3W%XdjtUgTw*!z^*u zD;8b}NlD$kD56Bc!Z}NR@zaHC$Bg~cUp;((%X5Fk^U*KEcJDDb@~o5NjBGWic0fW+*FCN6L46jrRk_+KS^z|`p&mI<8noO z3z`lVeBWcY@wWL$x1nLH*tH|0go^LOjJNE&ru~DO?CX11UQjzrwp{icAY#zvz*1wN z!1i+lop07)P|+#Q#k}GeM)lUX(tuJoU%K$&wKmMa*>FllPFbzMx9`I#Ib~~H2-}V+ z84n+b-R2frkqnBa;kcX5#C@qZr8`E33b%QM;xKbgbb(Lr`tnPR@&{DW5}f${=In(z z@P4Wo9Qp-Td5c7@w3Q@AiE5*OPaC}gy7&X$S9c5_BIN3E`?r!L~5n1!3%q7GfBvHr?#1uGy!|EYoA#qPKF-0feKzCq1lt( z#9qcZ^%=UD5hw0aDv!)DolSPa2%PNeg_0(j&z$2fmIDt}Bkq4PA+BV?X1Y~BHori0 z)mOaoR)gJ%l+u#~=V6U&ckdio-gwoGPM|k+dvBHQaK!D9s2*!Gz^PbAaoF2m<)K3b z*U}Rm&JY@Dtgs6X60`0vuf#kL{3LNcb;OBqmMbJ(KOm}Et>6L`?i)mR`0$+&COgZ} z;13*K>J_)GYRbuk##k%0ls>}!+!8Yw&bdA0$!AR2Z*b2=?geehQN#F=OwThUUP~B> zSNqzi>^kV%lzo_U{3@+qT3ubMql4AQSFq}T??o50#cFp~YyzPjvu?7T-m6z{%+1p- zKYg1y*&z94XSL=q$J}Mv(frvh8>g$*dKomj*0dyo%#I50Dbkf5yKODn!L!0JCG+xj zm6Sg2$tuaI0%7+w`r3Sg(Zd z_hGC@vecBf4y6jOjRfO-%HuSw4j~Y^6B41Lp;u9M^Wy~5I{9AKle;NhI8pYZLs@iM z^Yj6WV3A6#d_!w}g^AU-cr~l=)I_85>k3NtZ9|VTnUtunWRnqk2!K z;m8jeN8egZN9*=xMdCiAC;JJ$uJLk**oPvQZR!+6a~qY_dh%G&1X!rY$evxl&W+l- z`OOZ#d)`7-7`Zmr0?wpgJF+Ug=IXmv)1o)7drx2aiqny~-yq2zHaduYTiLqf`bzkX z9=rDy+^(m>MJ4QCnM4>IUvr+|gc)*)wQcmIYzDmdn>|M`;>m+WE(vo*O15G+UT;_kMl&?>F zy_l3Mh>rXzqf||(wWl4YZhvb5(Gzr~_BB!)E`;X;+MJE^RkU9oce*iU>vjx2yB|T% zRg|--+@AZ!W^ywEHjh-t>xSYl8X_@@W2EMb<6f~6&Q|ktK3#R0_U8{p!;h+k-4QKn z+f^D4Rr2v8$f6>;-AK=Hzvyt^?i1-9KFsq2pRPw^k3{$Qn~x_wO&x5TV&XPrBxkFC zER!amNT0iU(*d`h*s7)Ufj&LwNXdMYXCW3h)j1OwK{jk;nl$kZRJe<2}p|zs2_On_qmoMXq9dt{eXG%w=};T9Wi?Z+~IY z=$$3nsXjew-L>8u$Njyt%jE}!k3T=ZL5j^>kM-%P2wswE$j5>fohBIn!aC8p@HOZdzrh2M7Rr;SasE_3Nr-m@MAkwg? z4GKBlzrs7@HX~tS-^kc`ji?I!3(ZiXaVdo;6~q^J{OFdVd1Bm>rm z;G0st{!URe9F2>YkT7;IJ1aY1Up!e1Su&X>=~Iq!`X}^t?V3cD94?v~iW=4$D*NM` zif^c$0-OSz4eRM7MJ3xLOS7XYGb?9#v~@$cZ&GQ*6UpU@C{_jKzQrb|6N(kG-<%w4 z4#%C~9i6a5aJ}=YSwJkg7553n34NZt|IKbG23L-m+(?)LT4=Vjd9R5?Ka#dKz9lKD zT;N)v;oO+XqGri0={1KK;mWHpivDR#qaHbD?`QlXrd2|-)|cQX;%H>~u`cq81u6D$ zwg-+^U*KQEEyTXiStYP0s!?!{WEwZPPKpg1`|i_)cx^XU_ntoQ{lrv;w6tZpy?xtT zv&2I;T753tZ_XVHjz9P59#2YaC8@|VcUy+nMeyu0JaRh6mfz>8Uhi06+V+C?V3fUd z>Lz2yy7(}h*S+y3Zlmj~j~$A&#k^QL7hYV@!Wc!rSRGu+;$Ht@n2Q{LqQ~aNoukFx z+A73D%4SrX!|mvwb-5)ew?Qy7&x3ZkpM#O`xp-g3vGuhladCtU!$%+rInLw=qSnas|V`+z3OB zTi{`a7W|q!9ded_0k8OA=8Tq5LdQZ@M1gB)d(%za+YL(zEc*|8@i=gbv5&#!aFyqb zk6^KVF6NjQ9o4d#RjKpY83U`M*j-35Z?Oz&QP5;JL^RG3l3iDO#I=*VfrYpH?vdy1 zk8y+<=CzY-RIg5$;#cc!`*>kVb+RN8X>zht6YLRtS^TS#SQSIb<5y;CbwhPt_);D} zXxgJkR;rh)1l&xNVSmmaJNaaxiILt4Lkexo72OD`;Q5P0z1LSAnM?|L!yFw&CKM?ZNv5RnU0shN!uCGv zzRK6AVQH6+H;Y&oZ343`f6{DH8@^GEuAos2)?LG2bD>b_rRd7Rd&<6-oh&bfd(Cew z$mh%TQgOvw?ZvdtZ4oh_*nAMJK6C9)y{M@$TZHyIWlL)o9PK_O4D{RtsJhzGs>vI(N*yvJdVT`v|rd@Y1_m&kSGL>_m?!3ak}k&uaGQD!9XGn@pX;zV)n zf}zUQ;SZM4A!EfOA-Y-*k$o%SrR)ld;MWjORqO2s58mtFzK|0lvF8+Q8Au*jhEvwI zXLm5RJ?OysLMA>eC;#zgSpOW6ZhzE*{9FEUu%+G7UJ!DqrMfwbuFCG@o=~~zT+`JS z4fKOMPwB7QeHwqb)iR>mw!E2w@T6NAzxwzYuJfSRZvM`@HD%}86PkY453sBPr9Sgz1YtVW+8mz%6+TTrTD zWXcLpNR}JOO;EEAE$J>B8=7X2VtXn-6e-kYy|<}*wEDKWtVWHxaq5t8Pv~j;*qY^_ z!MFFj1@=W#R^#nq%c2!IYClGS{M6vN%2zjsA9?X%l8-n?KF9URM=r@sudeRRYI(~T;GCSuu zCU7aTqpyiL)nWLikXP}-Ih-QnFc7F(?169oB|XymxAE)|8**elZxzWPT&1%cOA%x( zat05cw5G5N(_}(qsk}Z!-NGme-(L))VD(kGkh}5Lph7^CQC03lxUax@H7y9;W#93T zPh7-S#8AXrB;c*20W39qTyo9JlWCgdN^?_AKz4wc0fcT&b*aJ0yFP2?7Q=Jy2ol$9 zT0CQPlBBi^KC^`9XBW|)s)j-yL6ke=y5qWv!wN#J9`1Bi`|Op)szKI`V%2ihg2;Q; zQ_D@e7U7Q+U;7-Iw;w5!rS|JrI<3-r4q0b>({2+Lb~bWxajOv&>>RcJ8n!BKV$!3g z?o3Pdh|Gk*sL}k>TaaXe4zc4mQbZ2EDJd^D6?C0mYx+sS@Je*g6SK=HvK69mKSs|N zYLOGyYu#r#Bd=;_2fb;`AQq=rkN;qjJM@yc#>RYSBqy;(OXFh7lvn%}ID5CvhRjLp6(oV3KXuwK{OGc^iq`ig2O*y^Cx0s2nl-ORbgQ(2D$vi{~E0XXts+a6c zL%9k=xf5Iu)_VO?hTD0PlV3ya*eV!h6$6FhGmaA~OG=?nj)>*0^O71Z-|KM3b|`R?CP zc+w?imQ^sEN}WiZkrI`nL#j<`6sHoWb6NYcQG!YW#S`Y~^i-452}f+%$9a=5BkK-=xeG8!J~?kGh@@qFo9n+K5=na^yEM|e%W=i1Ww$xMcyXMS z66va()G2J8vc0)7s?k5OEOIpCEbaETgvt4ly>{9ck@E&$qV#@Gfq=S! zuy8hWvT^`2+rDQmpss@eS!1U^>=G8YO|)FqP3(Xy0U#mB(ixRDChBa4O58&7@Nx0* z@E{O4+^9BCBu*Y6X^9Vt&@WfdmE`P&G=GU=W-a$V)@Qp%6oq*7W&+q2WdH0QrGnAiWOA z8U!OxuYa620flk#BH+NVAs{~(Gz8iOq=EcZ3GtsIa3I5t4-7*f0hWJOg9FS1)&rOf z4;L6^4Jgi_P%d5+=Y}9&1c(<9lNXQ#$^(W1d5a)@Brl8$!h=%He+mNqL%8@L2;f@D z&&Gj^RsatrMDU?H|IcSY!6;FXd_d9;6beTg z0(K~&eL7}8ZhOH|I{4vl|8z$K3Inc||Gc7M7&8zSaQ)K#E#WSjs^CoM3FCPpVv@Fw zlk!brzYzp`agIWNTkX4wKE`r193%Hct&xSiFOZ21))~TA+87VBfy~qo)9(}{=M@sQ z1k^$sO1c9N3|*II>$D6MCZZb*c$-Oj#Og9?ysiu|Y8^F*yb51vNl8{H;x_%9b~z#Z zHTYOf`VMJ&EMsYU{}fG9p#_Wo z9x1CC4xvH{=PRE+m$1q8`P=UM2mR;_!~~?rGJH{VHk{|xNF!M{X9*at-6O)#2VD&n z&j=UwBkBxg3=8(6;cEAcdgE$vKlhRd4&A#aOk_^|uaoQt%*MtBC>kwV^qSs!R+QxR zY)~JXXblWqWgw`z@drTsIX>T$9=>D4fBYOp1VHH@K|#_E$dE+&@PAyNQ|2!~G&ccY z2>=S1L73$iwhS_hH7gO9#_L$=yAta54AZZ^9X z-O%(q?}v8ne3tR#c7#&pi2&~ zpjz*eqJioGT)^jAeHU67NXDVHmqDobg~5v!j&tKOv_|ZQcny~6c^5sNt9x(EKleo^ zc}R1CA@dURGe*`xrzC=XYBSf9ZRV4>wc~DXU;NfEF?G)l8)6zV%#C}cx0&wBtGX_Z zH_dwGh;6+7O3w3v2djv#c3K!(_p}1S8_5!;I!0eW)sb+UR!R7CzFNmtZ9Zo_V3^~cLLhe$h0Y-;=6H+!)P+#cw}zF9^1=q|A<_cPygYEC~WsD_r# zJ=hzaWHu0Sl|szPcokTa4EhG)hG6-U*wN#G!rfk*PdhW11Xb+kiD6U+`>03>l!w%j z7*jbi6A8udJ{HB23ASscAZvnfd6><7-m5+O%wmP?XB>z%rcc9rB|A>lElzC~q;b!s zy<3&gaDg#Kd)S_8<=W#<*;gFx>4X+~6F|GUo_Q)>ukj7ZFya1sZ}z+jxU$oyea*&8 zd2fy>=X+U|_6EHp!56@>>KEZ%;BLOE?2U_WOY(pyFi2YTWwvxp>fr;1=GZ4jQQe6n z>w!TJDx=A&T(lIBMZR#aHoXv2zj1wk)A`j6X2iXgAOT~VCjRYao+A6+w}j?Yh* z#JO>#?YICYZ_IjzU9q{#O4ZXUT=D?!UKv-{CJ}&XlMaGYKF=E z{!?c1D&7*&YltK!@p!ea$`Ad$Y6R`H9Vf!YVV_B{I?wZA z_ULeZoSy% zgeq;a>>Q*$aW(T4>w)SHsR!p`ADw*OG4XPIFe_-2N%kaIU4f`afz~;MoINosUFb=% zHdmE--lfj+bCS*+w2K(Wh9?xVZ2SssE_lWwg*_jHIw4w;TB$r_RgXrMgLPrb&6V0y za>6Yb8L>vNMjok0xf*TDHqh5nm(ltyQ#}1WX_W6@Qzsh=aK2+RTkaG!A8p;a;OH?k zip}LjlG;O~^^EreQ9$`*hhJcSfFh(k4j&;RlYY)EKsWQ9+8*SRSLylBl}UHYGapMn@-Br=ldqT*1q==LgL1XcqRXq55ryq&t zS7^)ÄwZ#Fz!c`DqlQB@r9fx1s2$f*5E{Rl38Bxyj$11kE5&gB)na-sMlH6-}N zzBa+Cq1s&UI7<85`gj*|S#QK0@P?8zo-Df^i_RM{r6(RVUmcPa)GrIIB3^?RM{c*? z%@1vTJm*g3L)~Sc9zpU>=9oXsNOA{{$dWNV;UKqDL|%|-2RD?3>4YsT!}l_`Fn9aV z{7&cXRzB7H89v09Go*H6!9I8SID{kScOnJnJ!pJf`k913&O*p(g-pq$13WszuBt^6 z=6EFSUL{z_vb3F7;TiXyC2}p<4hVqEqY=_RXxo=PZXqP}aQN1;VcOd1y|onX{o2up z$(Q@(ft^lMV&s?*2|o0FTQ0yA~qDT+4k^9bq|+1O?_$D&T&rh}`HgDv7(nY)IjiqN~9=ZhA$ zHD7+ZP0FiYg$yfG`KC?mNCHoJKYNwY&L)sseg?F|6uQQAk`lDW*itu6cO$ND+~1nv zEMN;ioQDZ&^k^_aTrHsqk%yYQJ+Jdcy#Zd=$HCsG<1cZ%xi!KLAY6@#f9yk*x zs=IL{))k|1<@(Eijg~SH6Hi3n(WT~M9-p_l83 z%hA8Hhf(-YMn~}4PBd7U!?C3^_~mw6?txe6Cq^yiSv0D_cs}WQo6xH4gGScDIFEPD zYhB*NTZWO)JeDv@)ztJ5GSyTB)>gF6jd7*8ryT{V91rK?Sl@CwR(8q~%Xhz)kflNY)lH$>BBMtuwhnWh;DhC_uYKSoLpfy<~=(;u(j*( zcFVYsY$SZRCYIm9lv?p&X5b2!i-wbc?7C`oZdSZORd?3Y7;CptdV}~|(YF$A5hsIX zNvXmvE~|FZ-?>yDNWZdMiThrvoZ8M+GRtNF(K=HXZz=f{ekY96u%#F@xNAf+3{ zDJ^Jo<5?}<^Vo(~KChJV)-6D2TMr-?E=KSr_;K|pay$^yA*bo4_o%XXY!R2d#%e<1 zc+a9zZ#KzZH=RQHpf`j#{6&!m;qDP;~FKb=V#Tw6b z?o**!#=7aPT{)Yj3LQ86qLLmZogU+Rqb>@#d*to0neH-cKfB#LbkQ<}WCqkELmtEqDccb=@BXY_`wy zCmWVLd|vAIfFf<5S?D6e8|g9LQT5vfPn6{?;6(8wm38>T68AiLz!zRW6ajUOsAY*u=sw+A; zqwrv1gY2C`eosi6;UHxyd~xL^BGMqOwuSH8vuXDl<~PE{;#lvMK6?9d(u*w|HVYCR zN{h{YO)A*Zj|kI5xC#1b_WQj4_*kOMbZ44QzCyr@Srx=Z@yLS{U%7EH(7Du^vOu5z z@bG}N@-_8GC{JTbiUPsC*BDu#5NiZ(iAke(7s-BV+NX0ReRN;0_2mgP1b&VQ-(+X~ z(kw5_oi0%8n^3I0(b-UbbJuBw?%fRoLCbCp@D0n%&ZOOMFCJy#FLf<%^ZTl13Ba)u zyT-0tqH*F;CQ_+7k6n0bP@?fgd8#~=tnb>_9$X&-+mFwOSciOGe(*)Rq4D9y<~$ce z3LaDT^|)}kk;k1X#vcs|CiZuv_+QnijJAW>UNVnR=Qu&p72kU-hk^`c_m7N}!{7ld z-8U;zFg@2H0T)8uGQwk68>f-?K91)v+lt>r%e4z4dJ*yHQ9ieMdTIVKeI`kSWU7DM zXxvQP$G8`9l5AypNuMZ7JoHi%Q&$E`Swx@Pg7~3}@Ws3O<9i>4j+UIdF$p-i)E7TK zwr<0Vwk>%$~v6xpVKa}=U@qd29)!mz7Ymy8T*S=!vt^|*vO zk^5bgHul@+J$Bq>dY?ZAjTP~#ERw8TsaGn#ufzHf=6*4pWTI|rEdidXTU?kOqgqPQ zb8+aYilug=)PuoNw>8ZNj;4IO&n?Fh=Huht6aw`%Ct`WDZbFMY-kHk9C(7t94XK+3 zOythZ7;@(eT_&Ln4+R~Cu(Q%WD7ob~#=E#h8ccfiYk6*eu!h4;bMH11nS0!-mGanN z6%e(5{np&kPYL>v+%JCM0p93ct@1+QaGX0i1s%;uwN=`P2>2;hAe3SXJS_5 zBu6~D?c1+v-vL?WOU>4r-5T}O2~{q{9kVFENIu8&3dc3Rfyzj;NHy57kf70#cf7!L zl}_+J%UJ9x`2mJcnNQh`d!IadN1xD_3^jKJO);%V}?IRp6?1=VN-p zpkASneDC|I1dfva6R^fL+%IN%feCW9lpLys2hMrd`qVa<)Sp&{Z&hkoH^%edu$(Hn z5-k2g_M~Fv@lwoqmr-GypYh`F@#F82a%>(OMvG48+SJz$YG5 zp!Sz|`DxJUY!K?V4At*3v0vo};-x_7?3Z}?|49x138wx388io&1q0cRd|VJ13=aJs zxJDv@I5vbAf;0rdVIX~8AOg$-fuZ8Qd^|w>m=6gAxKCr%Ksfn(TpHB~5PjzTaqRI| z^ce~Rg4w)4Cf;fE83IPV#R>UWw44_h3O*n*jf(A|{=tCSU>>0KkM9WJfCP*Rr~e^* zj4JbAgpYanet9XIVRYZ^4)7(({lhTbyzp7~n0t_Fg7e&mnRdd@qPq(+69PH7kN4v( zUy@!cp94FWmEO#5Kq92PLf*b%)!!Yq&Y@9SSZ8dJaMe0-urzb;Z~a4@i7r0|x(T4r_I5n|((_SoUqbJ9|=qkr)ApIZL!g24YL zOaCt;!SK^JSNs|YMve3tC-iW#xQPQCHNbJczZMJi8-#$vfy~^Sp#RWN`PHXS?Ea!b zV8C0%{zd~s;D0X%fkRPQx4+coIR%MdX<)!G0YCqx92gAc`+Iv}Fq{_<>Tl(E;lOdp zuQULHeyfWB^8(WUS`G#rQT?3;L87vte=P^$L*@MbN&{Z5`I`=Sz^MH4zm?+!|Hcb& zn)#a?JTS!XeE=^1TYGT6-}^bOqzTd|K`iF6Yfq`SQzt=_nt}7S-Pk%25L!F%ctuAoH z``h@!V94L}2LqV-tu7xb$NpD2V7%bpXmG&D|6UFb7|d@pC@O#Z*Y*&=g!&r|Fu>nv zJiqxMaNhcx?je9){>}^FzQ5TY1P8H2uxrP_LK$yBs`V#97;LaRT1PYT<-> zm94tf9pL;4SY)8`;z6g`?7(DxdQc4rbP7ezE+$Scr$7X}ww4EY9WIlkvef?rL+s;_ literal 0 HcmV?d00001 diff --git a/src/paperless_mail/tests/samples/simple_text.eml.pdf.webp b/src/paperless_mail/tests/samples/simple_text.eml.pdf.webp new file mode 100644 index 0000000000000000000000000000000000000000..614aeee9c2edaef9ac705e15de6c9234050b008a GIT binary patch literal 5340 zcmeHIRbLd2qGah#MH-ft_|py2y$I4FE8UHBcS%Wyq_C7U(!HSMuB3E#yGSnVIp^Ll zaNh5or}@pi%*@N^s;j8Xy+A`VQdZD0)DdU;`EL%-LHh#Wx&}be-=wRRf-1h2GXwQ; z)f&n19Nmz?d3z_ssAeNDmpF=hneV<~JXH)y8Tes^#F)qa8dh~N5xV*ed)&w?K0|f+ zC*7frWowHDKBS{qQ09jrXY^SQad*Y{Z%V)o$erh$yANdUS5G8J$OGg7{|AIY7|rCfwQ? z-D&w}gawXA5nHK;6tg0|2N1_7NSBWB6Vn=iRm_Z+CiZO{<@XL*O0X81AI5eshDAh6 zb{(fvk64g}c$gcmGTYG=?S(fVIa1XLvH~Sy3)s#Gm8bkJgMnk^b5TC*FRbh8MZ(Z* zp^@dnJm5z?*v3Rt$dAiFfF?S}5tW3ci>?wJv#t0->!zCQN4J#mXGoV0mK9rl>X*77 zKi=5QZ%I##AelqGPlMHHfmyxsQo?>rS#xnlrH}e5TfHyN*2e6>e4xKRc>x}f>>}+K zVz9Jf^1v-!qc*F ze8}S&T>O(V6$~mYn-UpQJeh76Bw)Ok@#$@GVdSp!u6W`Toiyv27?H7O-jePdbu?rV z9LP4oX2ElFvYA@gE!`6h`!#~!E}_ATCPdU%cUx7QIN1ZvJ3bKtTtytI5P1;^0AK#h zFn3tph;`HQDfbSg?i5)_p1QN!T{IOF+bZ}UD;mrQYwvHo#O{qGIdc21*3#t>nJ;uI zviH`ApWl-=D2iXCY-b^NuK!IUcok@0TlfF+|D{cI(4HG&2)vBY-dN%1COixd@u*SUX_2g{o>t^F z*W(Ab)^&tFSB{$$aBV^+G$!IMj$QssBX7MV@%f(_kRO&z=u%(&4LI?Q^_wk`b-7`A z4so*8otrB^EmO&^QOvBW@2_iTrUYx92bBLzH4hWzRzxtwW|cBo1b#k*xZpNx)=wZ{ zx(tllf{T@vhW2}-6zbdu@peD?6>Gvddju)p+Su{)A~tS59a`oDWtD7<1mO882e*L#TVMghwV`dV@OMO-{LT8bp##y&}VYH!lwYw5Sv$}}#t z0VkQ!Kc;dqF0ITAEPHE$M@Zsv2qzTgy7{=S04r!$bb|nl4vcRRrSLcn9F;1DcS_<` zI)r0|2n9vK3o@EXs-)rbg;dgzGreNa=fpgFG z`^51&=HP~y0jfF8)sy{+f;e!Cu7&7+YqLRy0H=isFIQWBmMZeTe*gCdQ}Mu=^X=dpHT{78nm@yxKA*0_|B#2GCv(ChQjbSD z-@aOYr$c8@mS$~GV7OxD-7TK=dRQNpn@2G}=*<$D>Zzbu4`E>o-5|OazLVz)WWXdo zp)n2syYC!?nn zy_sV(7-$SW@uBh|Et5oHa+6%6xt3aL8G^iA_M)aS9TVX5-KyeHEK)*Ai@B5<0=Ij! z*TU4x1jVsr&V_GDX7-;@Z~=02$>k6GOkil-2TiGUfN}1c(b;iIo+Z3 z;0Ur5SOh6H);CCgUxrM6p}&<2F6$eoDd=m=C1U!P9kgAFgL`lZ|0KHa9ZQVd2y#7=T$?|i&VE(0Ej zoHS7o>l!&GL#5MABC|`?Q3Po5cmI@8yiB3wW@P(e=B-dK8ofe6s4klDNEX>6CWCAf z_rAW{NcAL8suBJdZ}!f-1h%o zm9g2<+A>wq3z&c4sapX0F}j#{{6arAiuBGxhGlCDZhLI_-6L{3C-o!iEUkddsEPcK zU9WQ~%pJPg^pDF{ACC;YZP4amg#0EDxyMVFBV2v_l();NKTgBDjzuH)I^QD-5WDGK zKL$JW?8bnqacclmyjCp3)W51O+l;12fqw|H^Jz9C=MR*zw}KK4|JWW@C~c&jG@lHP zK#3u-U+hY45;O=cB`M23@MTJLvqSaa{Ob(OMI#Te$Yv;Bc< z38>rV`(}-gf9Mv9e>Xv1K}&e5qe#uZ*h^=1XN{{=-qAR4NU}9Bc)TlnB^T_G?!SH3Y4%Brch#K35A9+`b5B2ubV6lw`jjhEnW-|L%LsjCb!)C$O;jywt# zZs@H2CQ_Q6n*br46Zw9RN5|Uf*OQ%7!7J21V76lb!tbnrTDU4vxQ)0PZN3*wSCh^; zL%f9foiFouHrq%PG=7Zx*%?h>=&wFIQ24SJUVr(3dX+rYt@{~WX|c<_ln(#5S^%6V zneQUosVj?geVp)5(707}5`2J>XvNl;nf&{@*w?I`zUa$1H*&x<_~jnQId^4x-*jqb z!j0%@euD=V@06G{JNrWA`cak55;0!EaM>{=K6mE{?1Wh?b&3I0r3pUVIFh}wMQ2&Q zphF&h2$yl7(iY-rZvO|_%0X5@*XKP*3-Sxmw)>6Hy!~E8PO7d*U)2)u_%&dI1`kX} zD0sHCM$p(ITKsmDKG^C#XuG6?P~LgNIN6p(uUU#vlJzXVnx&jl35owLP{!d}{E?xj zonG-WF0FLmGDH6voz~f(g`agPUg(gQu>Y0-D&5R6(6?fbp41giq&m%p;1k%x5yi|k z)ozDwhWUKH!I;baq{6`#4or}+#5W^lN1S8kyK3)Nl=iLsOeekzlByjy8*!*K79L4w z_)E63LeT>^I5`5iCACfn0h`ZuN2?sa@^k8sxnMR}c^61t{($%L++ObyB-S5}MLHqw z%4i$b;w9{`eTgi#u0K$y8XKJIJYoIrUuBgOKC-Wx%&gsKs@7B%9IQ;ZK1bEI^mCq| zHJlT?Gt4;hNw&_$z~1mVtM{pZU(SVn8u0R0NjZaK3`#2z>Fn_ZI>#V3eJZ1)nmQz> zz!1rL0!w<6p6G)-rDL5j{Cl^v$nC^CoQC8Wn+;!}XWVr)-8{urod zw8ykcw8Yo#XZJMvCq4!^->R;iT&ZhGQ=l>jY*vy(j7YQF0s4 zaM1ID^ZD{SCl0wvcelavo!=jR{ zU_JQ!NZ zM_XnZ13N&Duax&|UQR16Ng7$b-9+Nmt1bIyGrT4{{9|>{+954|l?pB5l2@;S=%*Nk zyq^_p!}7Lvr*(fB<%zne#xc-;n2XJ(nAdw4z5V_or zg%F<{e5WR(7P3&my!Y>R3$cU4avk$ytuM(|-jx~r$rdkrh1XJo!oRUFk%P5{@TSy5 z?Mcq%D7NM*xIzBI>c`;Q9Z2WlCnn)hCY6hvI`wb&!nK$-4)BU3-qWe9Z_wN9n*!ED z=FI~=&fm(0HJg=CJ%3L|9+KjOA^#hP7-A2dnJi7v{Ixs$j+0>VjPGW#*V=C;%pxL{M$&;X)V z^rTs89aY8R9NC4v^U@lH_^3y_wv-rx!-j&YU1pOOSh||}bo+Y7L?>cp&w}+|NCsK8 zA_Ufd(ik&mDRxh_aOxylQ2IBdEffO<2&LyN8h6hU-8Y}~9NCLR|E2?@tynIDcC|86 zwUs)x1*k^U14AX?5!QpLH=x>ybA}vuX4My(JBo9Y=h)4TI5kc=F(NGiwK=XPg1}53 zx}%g5OnAGJll8_%lLR(Cx66WaN7PbD)z8Dfx44;^l1P&*oMpjaCj(!e?_ z_wuj}S(R02^M??okB?vY*;9TML>g=&PHjV$;o3)?V-j<=^}51VR88LDjYySIwuJYu zo~HNiE(5{kOko;rQE9uruh9X)(vp02g-pHPTe#v*IFV0oUYKgrxRx?8_})S+GM8WU zNa1{2Vk$ALYBGwKw8KAtL#Zd3UD&eSVZL@!!X_9ZCB7L%V3)|iSWS@q`OtCRj`1mq z^L5`~>ShyJ+iux4;77ENU{h1IDr{oPH*QIF`W(j4_f+Lhytv`=oMdW4WA}NR1Loa) zIKD0MsT_=L`DaA!x02+&(fIob&TcoYhGm`?widL^!(9NqnLkk$C(us}hTO;)sEoOs zOiq#($aJmd)w5gIC~5q?Y$NG6+b1}I?jz{HuwpodkPxkNJuh@%fqiJY;KS5VWmMe{1H2Mmx2>tBP1B{XRo`_pMcc-P+afIj^D{ zQElUcccg|0tqr#@J_7H#t<;Gs$zire`}58!e^UP0r|zb^%og(v`tO2<~$`^wo!9v<^WEezwk6n rXztKK)yt`iErF=`BuT~I6p}`zcg(#n6zgz_b9 None: + self.parser = MailDocumentParser(logging_group=None) + def tearDown(self) -> None: + self.parser.cleanup() + + def test_get_parsed(self): # Check if exception is raised when parsing fails. with pytest.raises(ParseError): - parser.get_parsed(os.path.join(self.SAMPLE_FILES, "na")) + self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "na")) # Check if exception is raised when the mail is faulty. with pytest.raises(ParseError): - parser.get_parsed(os.path.join(self.SAMPLE_FILES, "broken.eml")) + self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "broken.eml")) # Parse Test file and check relevant content - parsed1 = parser.get_parsed(os.path.join(self.SAMPLE_FILES, "simple_text.eml")) + parsed1 = self.parser.get_parsed( + os.path.join(self.SAMPLE_FILES, "simple_text.eml"), + ) self.assertEqual(parsed1.date.year, 2022) self.assertEqual(parsed1.date.month, 10) @@ -42,48 +45,45 @@ class TestParser(TestCase): self.assertEqual(parsed1.to, ("some@one.de",)) # Check if same parsed object as before is returned, even if another file is given. - parsed2 = parser.get_parsed(os.path.join(os.path.join(self.SAMPLE_FILES, "na"))) + parsed2 = self.parser.get_parsed( + os.path.join(os.path.join(self.SAMPLE_FILES, "na")), + ) self.assertEqual(parsed1, parsed2) - @staticmethod - def hashfile(file): - buf_size = 65536 # An arbitrary (but fixed) buffer - sha256 = hashlib.sha256() - with open(file, "rb") as f: - while True: - data = f.read(buf_size) - if not data: - break - sha256.update(data) - return sha256.hexdigest() - + @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") @mock.patch("paperless_mail.parsers.make_thumbnail_from_pdf") - @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output - def test_get_thumbnail(self, m, mock_make_thumbnail_from_pdf: mock.MagicMock): - parser = MailDocumentParser(None) - thumb = parser.get_thumbnail( + def test_get_thumbnail( + self, + mock_make_thumbnail_from_pdf: mock.MagicMock, + mock_generate_pdf: mock.MagicMock, + ): + mocked_return = "Passing the return value through.." + mock_make_thumbnail_from_pdf.return_value = mocked_return + + mock_generate_pdf.return_value = "Mocked return value.." + + thumb = self.parser.get_thumbnail( os.path.join(self.SAMPLE_FILES, "simple_text.eml"), "message/rfc822", ) self.assertEqual( - parser.archive_path, + self.parser.archive_path, mock_make_thumbnail_from_pdf.call_args_list[0].args[0], ) self.assertEqual( - parser.tempdir, + self.parser.tempdir, mock_make_thumbnail_from_pdf.call_args_list[0].args[1], ) + self.assertEqual(mocked_return, thumb) @mock.patch("documents.loggers.LoggingMixin.log") def test_extract_metadata(self, m: mock.MagicMock): - parser = MailDocumentParser(None) - # Validate if warning is logged when parsing fails - self.assertEqual([], parser.extract_metadata("na", "message/rfc822")) + self.assertEqual([], self.parser.extract_metadata("na", "message/rfc822")) self.assertEqual("warning", m.call_args[0][0]) # Validate Metadata parsing returns the expected results - metadata = parser.extract_metadata( + metadata = self.parser.extract_metadata( os.path.join(self.SAMPLE_FILES, "simple_text.eml"), "message/rfc822", ) @@ -209,22 +209,22 @@ class TestParser(TestCase): in metadata, ) - @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output - def test_parse(self, m): - parser = MailDocumentParser(None) - + def test_parse_na(self): # Check if exception is raised when parsing fails. with pytest.raises(ParseError): - parser.parse( + self.parser.parse( os.path.join(os.path.join(self.SAMPLE_FILES, "na")), "message/rfc822", ) + @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") + @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output + def test_parse_html_eml(self, m, n): # Validate parsing returns the expected results - parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822") + self.parser.parse(os.path.join(self.SAMPLE_FILES, "html.eml"), "message/rfc822") text_expected = "Some Text\nand an embedded image.\n\nSubject: HTML Message\n\nFrom: Name \n\nTo: someone@example.de\n\nAttachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)\n\nHTML content: Some Text\nand an embedded image.\nParagraph unchanged." - self.assertEqual(text_expected, parser.text) + self.assertEqual(text_expected, self.parser.text) self.assertEqual( datetime.datetime( 2022, @@ -235,17 +235,20 @@ class TestParser(TestCase): 19, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)), ), - parser.date, + self.parser.date, ) + @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") + @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output + def test_parse_simple_eml(self, m, n): # Validate parsing returns the expected results - parser = MailDocumentParser(None) - parser.parse( + + self.parser.parse( os.path.join(self.SAMPLE_FILES, "simple_text.eml"), "message/rfc822", ) text_expected = "This is just a simple Text Mail.\n\nSubject: Simple Text Mail\n\nFrom: Some One \n\nTo: some@one.de\n\nCC: asdasd@æsdasd.de, asdadasdasdasda.asdasd@æsdasd.de\n\nBCC: fdf@fvf.de\n\n" - self.assertEqual(text_expected, parser.text) + self.assertEqual(text_expected, self.parser.text) self.assertEqual( datetime.datetime( 2022, @@ -256,33 +259,32 @@ class TestParser(TestCase): 43, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)), ), - parser.date, + self.parser.date, ) # Just check if file exists, the unittest for generate_pdf() goes deeper. - self.assertTrue(os.path.isfile(parser.archive_path)) + self.assertTrue(os.path.isfile(self.parser.archive_path)) @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_tika_parse(self, m): html = '

Some Text

' expected_text = "\n\n\n\n\n\n\n\n\nSome Text\n" - parser = MailDocumentParser(None) - tika_server_original = parser.tika_server + tika_server_original = self.parser.tika_server # Check if exception is raised when Tika cannot be reached. with pytest.raises(ParseError): - parser.tika_server = "" - parser.tika_parse(html) + self.parser.tika_server = "" + self.parser.tika_parse(html) # Check unsuccessful parsing - parser.tika_server = tika_server_original + self.parser.tika_server = tika_server_original - parsed = parser.tika_parse(None) + parsed = self.parser.tika_parse(None) self.assertEqual("", parsed) # Check successful parsing - parsed = parser.tika_parse(html) + parsed = self.parser.tika_parse(html) self.assertEqual(expected_text, parsed) @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail") @@ -290,32 +292,63 @@ class TestParser(TestCase): def test_generate_pdf_parse_error(self, m: mock.MagicMock, n: mock.MagicMock): m.return_value = b"" n.return_value = b"" - parser = MailDocumentParser(None) # Check if exception is raised when the pdf can not be created. - parser.gotenberg_server = "" + self.parser.gotenberg_server = "" with pytest.raises(ParseError): - parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml")) - - @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output - def test_generate_pdf(self, m): - parser = MailDocumentParser(None) + self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml")) + @mock.patch("paperless_mail.parsers.requests.post") + @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail") + @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html") + def test_generate_pdf( + self, + mock_generate_pdf_from_html: mock.MagicMock, + mock_generate_pdf_from_mail: mock.MagicMock, + mock_post: mock.MagicMock, + ): # Check if exception is raised when the mail can not be parsed. with pytest.raises(ParseError): - parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "broken.eml")) + self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "broken.eml")) - pdf_path = parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml")) + mock_generate_pdf_from_mail.return_value = b"Mail Return" + mock_generate_pdf_from_html.return_value = b"HTML Return" + + mock_response = mock.MagicMock() + mock_response.content = b"Content" + mock_post.return_value = mock_response + pdf_path = self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml")) self.assertTrue(os.path.isfile(pdf_path)) - extracted = extract_text(pdf_path) - expected = "From Name \n\n2022-10-15 09:23\n\nSubject HTML Message\n\nTo someone@example.de\n\nAttachments IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (0.59 MiB)\n\nSome Text \n\nand an embedded image.\n\n\x0cSome Text\n\n This image should not be shown.\n\nand an embedded image.\n\nParagraph unchanged.\n\n\x0c" - self.assertEqual(expected, extracted) + mock_generate_pdf_from_mail.assert_called_once_with( + self.parser.get_parsed(None), + ) + mock_generate_pdf_from_html.assert_called_once_with( + self.parser.get_parsed(None).html, + self.parser.get_parsed(None).attachments, + ) + self.assertEqual( + self.parser.gotenberg_server + "/forms/pdfengines/merge", + mock_post.call_args.args[0], + ) + self.assertEqual({}, mock_post.call_args.kwargs["headers"]) + self.assertEqual( + b"Mail Return", + mock_post.call_args.kwargs["files"]["1_mail.pdf"][1].read(), + ) + self.assertEqual( + b"HTML Return", + mock_post.call_args.kwargs["files"]["2_html.pdf"][1].read(), + ) + + mock_response.raise_for_status.assert_called_once() + + with open(pdf_path, "rb") as file: + self.assertEqual(b"Content", file.read()) def test_mail_to_html(self): - parser = MailDocumentParser(None) - mail = parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) - html_handle = parser.mail_to_html(mail) + mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) + html_handle = self.parser.mail_to_html(mail) with open( os.path.join(self.SAMPLE_FILES, "html.eml.html"), @@ -324,13 +357,12 @@ class TestParser(TestCase): @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output def test_generate_pdf_from_mail(self, m): - parser = MailDocumentParser(None) - mail = parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) + mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) - pdf_path = os.path.join(parser.tempdir, "test_generate_pdf_from_mail.pdf") + pdf_path = os.path.join(self.parser.tempdir, "test_generate_pdf_from_mail.pdf") with open(pdf_path, "wb") as file: - file.write(parser.generate_pdf_from_mail(mail)) + file.write(self.parser.generate_pdf_from_mail(mail)) file.close() extracted = extract_text(pdf_path) @@ -343,8 +375,6 @@ class TestParser(TestCase): self.payload = payload self.content_id = content_id - parser = MailDocumentParser(None) - result = None with open(os.path.join(self.SAMPLE_FILES, "sample.html")) as html_file: @@ -354,7 +384,7 @@ class TestParser(TestCase): attachments = [ MailAttachmentMock(png, "part1.pNdUSz0s.D3NqVtPg@example.de"), ] - result = parser.transform_inline_html(html, attachments) + result = self.parser.transform_inline_html(html, attachments) resulting_html = result[-1][1].read() self.assertTrue(result[-1][0] == "index.html") @@ -368,8 +398,6 @@ class TestParser(TestCase): self.payload = payload self.content_id = content_id - parser = MailDocumentParser(None) - result = None with open(os.path.join(self.SAMPLE_FILES, "sample.html")) as html_file: @@ -379,9 +407,9 @@ class TestParser(TestCase): attachments = [ MailAttachmentMock(png, "part1.pNdUSz0s.D3NqVtPg@example.de"), ] - result = parser.generate_pdf_from_html(html, attachments) + result = self.parser.generate_pdf_from_html(html, attachments) - pdf_path = os.path.join(parser.tempdir, "test_generate_pdf_from_html.pdf") + pdf_path = os.path.join(self.parser.tempdir, "test_generate_pdf_from_html.pdf") with open(pdf_path, "wb") as file: file.write(result) @@ -390,16 +418,3 @@ class TestParser(TestCase): extracted = extract_text(pdf_path) expected = "Some Text\n\n This image should not be shown.\n\nand an embedded image.\n\nParagraph unchanged.\n\n\x0c" self.assertEqual(expected, extracted) - - def test_is_online_image_still_available(self): - """ - A public image is used in the html sample file. We have no control - whether this image stays online forever, so here we check if it is still there - """ - - # Start by Testing if nonexistent URL really throws an Exception - with pytest.raises(HTTPError): - urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png") - - # Now check the URL used in samples/sample.html - urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png") diff --git a/src/paperless_mail/tests/test_parsers_live.py b/src/paperless_mail/tests/test_parsers_live.py new file mode 100644 index 000000000..6676ebc1e --- /dev/null +++ b/src/paperless_mail/tests/test_parsers_live.py @@ -0,0 +1,220 @@ +import hashlib +import os +from unittest import mock +from urllib.error import HTTPError +from urllib.request import urlopen + +import pytest +from django.test import TestCase +from documents.parsers import ParseError +from documents.parsers import run_convert +from paperless_mail.parsers import MailDocumentParser +from pdfminer.high_level import extract_text + + +class TestParserLive(TestCase): + SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples") + + def setUp(self) -> None: + self.parser = MailDocumentParser(logging_group=None) + + def tearDown(self) -> None: + self.parser.cleanup() + + @staticmethod + def hashfile(file): + buf_size = 65536 # An arbitrary (but fixed) buffer + sha256 = hashlib.sha256() + with open(file, "rb") as f: + while True: + data = f.read(buf_size) + if not data: + break + sha256.update(data) + return sha256.hexdigest() + + # Only run if convert is available + @pytest.mark.skipif( + "PAPERLESS_TEST_SKIP_CONVERT" in os.environ, + reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test", + ) + @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf") + @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output + def test_get_thumbnail(self, m, mock_generate_pdf: mock.MagicMock): + mock_generate_pdf.return_value = os.path.join( + self.SAMPLE_FILES, + "simple_text.eml.pdf", + ) + thumb = self.parser.get_thumbnail( + os.path.join(self.SAMPLE_FILES, "simple_text.eml"), + "message/rfc822", + ) + self.assertTrue(os.path.isfile(thumb)) + + expected = os.path.join(self.SAMPLE_FILES, "simple_text.eml.pdf.webp") + + self.assertEqual( + self.hashfile(thumb), + self.hashfile(expected), + f"Created Thumbnail {thumb} differs from expected file {expected}", + ) + + @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output + def test_tika_parse(self, m): + html = '

Some Text

' + expected_text = "\n\n\n\n\n\n\n\n\nSome Text\n" + + tika_server_original = self.parser.tika_server + + # Check if exception is raised when Tika cannot be reached. + with pytest.raises(ParseError): + self.parser.tika_server = "" + self.parser.tika_parse(html) + + # Check unsuccessful parsing + self.parser.tika_server = tika_server_original + + parsed = self.parser.tika_parse(None) + self.assertEqual("", parsed) + + # Check successful parsing + parsed = self.parser.tika_parse(html) + self.assertEqual(expected_text, parsed) + + @pytest.mark.skipif( + "GOTENBERG_LIVE" not in os.environ, + reason="No gotenberg server", + ) + @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail") + @mock.patch("paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html") + def test_generate_pdf_gotenberg_merging( + self, + mock_generate_pdf_from_html: mock.MagicMock, + mock_generate_pdf_from_mail: mock.MagicMock, + ): + + with open(os.path.join(self.SAMPLE_FILES, "first.pdf"), "rb") as first: + mock_generate_pdf_from_mail.return_value = first.read() + + with open(os.path.join(self.SAMPLE_FILES, "second.pdf"), "rb") as second: + mock_generate_pdf_from_html.return_value = second.read() + + pdf_path = self.parser.generate_pdf(os.path.join(self.SAMPLE_FILES, "html.eml")) + self.assertTrue(os.path.isfile(pdf_path)) + + extracted = extract_text(pdf_path) + expected = ( + "first\tPDF\tto\tbe\tmerged.\n\n\x0csecond\tPDF\tto\tbe\tmerged.\n\n\x0c" + ) + self.assertEqual(expected, extracted) + + # Only run if convert is available + @pytest.mark.skipif( + "PAPERLESS_TEST_SKIP_CONVERT" in os.environ, + reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test", + ) + @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output + def test_generate_pdf_from_mail(self, m): + # TODO + mail = self.parser.get_parsed(os.path.join(self.SAMPLE_FILES, "html.eml")) + + pdf_path = os.path.join(self.parser.tempdir, "test_generate_pdf_from_mail.pdf") + + with open(pdf_path, "wb") as file: + file.write(self.parser.generate_pdf_from_mail(mail)) + file.close() + + converted = os.path.join(parser.tempdir, "test_generate_pdf_from_mail.webp") + run_convert( + density=300, + scale="500x5000>", + alpha="remove", + strip=True, + trim=False, + auto_orient=True, + input_file=f"{pdf_path}", # Do net define an index to convert all pages. + output_file=converted, + logging_group=None, + ) + self.assertTrue(os.path.isfile(converted)) + thumb_hash = self.hashfile(converted) + + # The created pdf is not reproducible. But the converted image should always look the same. + expected_hash = ( + "8734a3f0a567979343824e468cd737bf29c02086bbfd8773e94feb986968ad32" + ) + self.assertEqual( + thumb_hash, + expected_hash, + f"PDF looks different. Check if {converted} looks weird.", + ) + + # Only run if convert is available + @pytest.mark.skipif( + "PAPERLESS_TEST_SKIP_CONVERT" in os.environ, + reason="PAPERLESS_TEST_SKIP_CONVERT set, skipping Test", + ) + @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output + def test_generate_pdf_from_html(self, m): + # TODO + class MailAttachmentMock: + def __init__(self, payload, content_id): + self.payload = payload + self.content_id = content_id + + result = None + + with open(os.path.join(self.SAMPLE_FILES, "sample.html")) as html_file: + with open(os.path.join(self.SAMPLE_FILES, "sample.png"), "rb") as png_file: + html = html_file.read() + png = png_file.read() + attachments = [ + MailAttachmentMock(png, "part1.pNdUSz0s.D3NqVtPg@example.de"), + ] + result = self.parser.generate_pdf_from_html(html, attachments) + + pdf_path = os.path.join(self.parser.tempdir, "test_generate_pdf_from_html.pdf") + + with open(pdf_path, "wb") as file: + file.write(result) + file.close() + + converted = os.path.join(parser.tempdir, "test_generate_pdf_from_html.webp") + run_convert( + density=300, + scale="500x5000>", + alpha="remove", + strip=True, + trim=False, + auto_orient=True, + input_file=f"{pdf_path}", # Do net define an index to convert all pages. + output_file=converted, + logging_group=None, + ) + self.assertTrue(os.path.isfile(converted)) + thumb_hash = self.hashfile(converted) + + # The created pdf is not reproducible. But the converted image should always look the same. + expected_hash = ( + "267d61f0ab8f128a037002a424b2cb4bfe18a81e17f0b70f15d241688ed47d1a" + ) + self.assertEqual( + thumb_hash, + expected_hash, + f"PDF looks different. Check if {converted} looks weird. " + f"If Rick Astley is shown, Gotenberg loads from web which is bad for Mail content.", + ) + + @staticmethod + def test_is_online_image_still_available(): + """ + A public image is used in the html sample file. We have no control + whether this image stays online forever, so here we check if it is still there + """ + + # Start by Testing if nonexistent URL really throws an Exception + with pytest.raises(HTTPError): + urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png") + + # Now check the URL used in samples/sample.html + urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png")