From e96d65f9451a1b92aaec0085654c21a50d581adc Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Mon, 14 Nov 2022 15:38:35 -0800 Subject: [PATCH] Allows parsing of WebP format images --- src/paperless_tesseract/parsers.py | 1 + src/paperless_tesseract/signals.py | 1 + .../tests/samples/document.webp | Bin 0 -> 5794 bytes src/paperless_tesseract/tests/test_parser.py | 19 ++++++++++++++---- 4 files changed, 17 insertions(+), 4 deletions(-) create mode 100755 src/paperless_tesseract/tests/samples/document.webp diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index aa3ad64fa..bde2ad25e 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -66,6 +66,7 @@ class RasterisedDocumentParser(DocumentParser): "image/tiff", "image/bmp", "image/gif", + "image/webp", ] def has_alpha(self, image): diff --git a/src/paperless_tesseract/signals.py b/src/paperless_tesseract/signals.py index 85f2cab9f..c4fd1e039 100644 --- a/src/paperless_tesseract/signals.py +++ b/src/paperless_tesseract/signals.py @@ -15,5 +15,6 @@ def tesseract_consumer_declaration(sender, **kwargs): "image/tiff": ".tif", "image/gif": ".gif", "image/bmp": ".bmp", + "image/webp": ".webp", }, } diff --git a/src/paperless_tesseract/tests/samples/document.webp b/src/paperless_tesseract/tests/samples/document.webp new file mode 100755 index 0000000000000000000000000000000000000000..c19ba298004c5e585470a8b012f0bee8829a151f GIT binary patch literal 5794 zcmeH}XH*nR+Q+*mlH-umkaGq}g20e-kenHUWEe>Tfqd*1I`=Tz0N|NXD(>Zj{;Jto>38rie}V4<#RWNsvDNdy1@ z`n(n)fQJNtHU?uvaUlUj6c;Vzy!G)7#F=QSqAjhg(HGiZ#R+@eU&Y8s@2~6s?)Rnu z@Irkj{NMNfPld?E^*Z+4b#h)Ku>QC}002YJnJ*;J|AO<+8RhKtC)Zyv?tFpg`oIOd z|C8tc^!$^z|MXln!JNMp3;+m7-2aW8|Be6k+;cm?p@IwaaB~esoBFv0hB)C|(bq2` zaULbK5zf!e!`t=3^{?{(GXFK`Kl(t|(DQc#0F0mi4V;JjwLtWLtw2IdTpalOTl_Ts zGsYn8@ZZ|slm4yw6#{_lE&xE2|JIzd0H7%j0O;TUt#Rc80C_9`G`__K;e!7;(|Nc6 z01glVR2KWFtAOwg2@IX9}2qXjP zKsJyE6a(czHBbjM0c}7R&<6|wqrfCE2P^`szy`1l>;cCh2qHmJkQ!tF*+6bk5R?FA zK_yTf)B{aGE6@>i2YtaHFanGL6TuWP8!Q0J!DrwLunQan$G}-|30wzvz#|A0LIOcS zSRgzQQHU%=1)>YN3bBK@LjoXSkXXolNG7BZQUz&*bU}t7laNKo24oisK#8HWPdz zqN_xnM0lc1qH3aEqIsg9#KgoL#B#*O#2&=Ah%5 zNjgbpNq&-&lJbx$lUkDolHMmRC+#8qKzcw%O(sI7P3A%tMV3R>NH$KkMNUZ0O|C+2 zPaZ~|LH?ZlE%_z|Aq5WwhQf&=k|KxV1;sSQE+sXkIHe(_59K||3d%vsFH}e>9x8Pz z7phpQBB~y$WojrjCpCr|OC3X9Ox;Vpib9}xQQ9a^R3hprY6P`SLq#J+V@?xFlS9)^ zvq%f2<)PK4^`=dtt)rcyJ)&cwQ=xOEyGvJ1H%_-t&qS|G?@FIQUqe4hf5d=hP-pOB zc)-xau)qjo6ks%F3}JlC*vq)dgkn-)!ZIZ=)iJ$ihA;~-n=pqn7cmbr@3OG6XtMaT zWU=(HY_ig_DzkdBrm?AT4lBAV=V>AVlzzpoici z!7(AYkeraOP@&MYFp02=aIkQt@RA6vh@MEaNR!BxD3_?U=mXJy(Ni&LF>kSAu{m)n zaUJn!@fPtN2>}VLM2^ISB&no^WRzsHQ6^VrS{5a1BzsSGKn^aaDi@l`54jRWVhZYJ=)NMjmq$ z)2Rkk(^N}P8&RiHH&f46|DeI4;jU4sv8^es8Lru>1=rHmdZ0C}&7zIfF4x}Hk)uBggps|g=53JBiJMG5lc5! zZWcs>kg(PGgl(Z6CWVp?KZVsFJR$7#k@#*@Y4;-_yb z+|IiLyW??Z_X4 z%7d~L%9OB_U!56P(UpEb1 z8fqMt9&UIe{ib2$(n#Z5nYYcOa-%I{m&aa?Um5Rur}nOILVIFp(qM9I%4}+8+ID(z z#%1QqtoQ7X_rdRv=Wflz=Mxqv7BW6Ce<=LO|FLHA(qj9P+S2eR(@%5D*yW9t>nkU# zv7bplXRNWUm4A`=(zdR#KKj+>>*|L8#_?wSH_C6hTYOvf-<7`)Z(DA!{=oe>+qw6X z;b-YDsbAf@M!O&OeD;p_6Al;-$_`}@`;RP+){a9?kSCeH`G2>Z>YmP@`JA1dXNr0* zBmf{<)W;2pLQ`m&%&>*V-OapnXy~nJ^1cyFJw4m&cBK>tEqhl7Z`B&lH8wxUErp2; z)!8%@bnWpCd)$_*9^`x8h8wXuNPpJNJl!eBy166a7v3Y3CuY+Hy0*?U_3cR1gk?36 z)D6`wo*Fx(ulU!GQvS{Z@|911Yf?V2{n>GLdK~rUZ2IK;IYrfN4}F_3J`m;qwnX1G zH?oLRpsc5M3QwbL&@3r?#C-2ZMbgRsuZpDHt`0pOdcAFXhsS-}0aL4VlJv7PXQVi@&T}V(y0_jcRdK~lTOMDAUZ+>p9Y^>- z=X#_kZtL4Lcl0y3Rq@F1SqVMg7m7@~tl_+-xo;c4w$2zMcF&e)o<1i?m+))xCy3P{ zbUk8J9lH6ezn-x2DC?m$>wHz-?;eI-0^$8Jws)UV7!$i&BxetDzN&J4gGe%^u0skM z4zJuD3F0TWDmkS7V*9Ysn@GlTQamk}a`g#iBBP17vgvncvwkC-hN%F{x30!beRPwg zoQ~jG%(ie=b*kPT@X;jOhv9@}#0)RJ%eSR!8c+;%bW6Q%=BVG!f7ik0due-Md)~Jv z8^1}S>Q1)@)O?Wrt*)6MI~qUab>>xdOj4XeC_WtDVQ-20&B@Rm&L$qX6)uah9Yo9w3$_kTb|)TAy!m7^ zeysB29KlGt~rrd$5jg^RTH*lliIdxS&XN*0Ii{R!zePdmH&xOaa9LG<}c5qNKH-J<#z zX+KkPor&G9riaxE^pCen-o)+_udFV#wT!0WJh|NFql5bdz6Mm>A8oPaD;HqtDRdds z=f8j(+6LlqF_m6_kJ~OfKNd=6LGm`j8S~9b@|mZOHwlJsT;Aq!4A)Wp!&vH_n9$YZ z`NezVv=ujf{WapH+wtfQth`0sxItNF3&JQkY(C!}-F}xTD2-}W(KNFBBY#C)&BsL= zPkg)77Zg)gV0RT`QRn-(x_9RpXqknD#+V+|;ikmV5z|Y}{xbiy4vH`7Qcv|Wtr&Km zGCDS)M+H5sbCMmth9qRq@2HfNAWGi$w^iWF>+e2mjtMY}S-->CyvVpIygh07ys6XT zqkjR>uuo=&baANiR&oM#rAGPMH{?z}_FG9rVlb(GJJNj8d5l4I@JZ3WvR#o7Q*D*V zjPQU1e)N55wTO^;zm+AU%923EeaBgb&NpN$WR5nx@viQGjDplItEFO0+_K)9BFQ5S zok3T9&iqY=?6286BJU_MWu=>{?utVqfm8asYaq?rmCD;e!^38;1t|m&o96k@{1wW9 zdC95@a+U_Z8_IS)y7F-)M^<}X)BI^Ru?b+Sx*fF4D|c2ne?nE*@dGqsxBL;1K>Cw) zv9hB=DR3k|Fq=Q>nx}7;$XBAE2kdo^@Jof&dYV@U#pcl1UTkAm^J7zJIMum9L-X$2W*;@g7gKbm4edy+p7q^6G`J>T%{uepehJ6D?uZgzfvLwdPl zP~mh0D2ohWC-vC7?qFoJzQu25L@q}BrJjYB?^^j^ zKde$r1|00{qgyb(Y+of>@JGsau&*f%xo$xq%EEl4SfohdS&<%`p;gVWwm2>N-og%p zbW@$$3tydeDW?q?z@Z1_Fln077#Xup9NCV(!Emq}y^;mdoeY2N5xkL$eEFezSxxxH z+gbiM3PsjCkMH)*&Ra`-=2=%@Gq~YAz_-AfkTj`{;A=U-((!!nW(G zI|{$NOT-uPZJ$WCmF=G`wXw$QSkLCnMH_XL*+Z}il2fYIOB4h&7W0zbm&<({74NYO zZD8KB_Qgq7FuO{Ae+E%IOgAnpXLbC(%@W2o$k!o}J&vv=lc2OjBEnPkXhPh`Yq!QI zPDsT46BW#R^-(scYRH_tRW;AlalB36&Rf1H;D9`56Xl>NF~d(IW1Kr$#pU^ze6mw;8%4&z0xRLxWo>jLI(UOoj82`TBQo ztyt+G*h_1ntD0gC+@q9L9!3H9PnHjMeed9omu-D>UJG^(-}iVXA6#{HF}9})8Nw%=jOdi} zfz0S)9+$`_Uw_@S5n9DJP486MoBaXf=%Z2~?mV>`9d5V%?tj5*i-H&JX`dmI6+$yxu-Lnitq*Yc`WSm$f_zKS!Z5o&fy?$yn zBIaZ4`vw;uufFhGh$dGiD;Zi_weYGg!_%5$RIj1r$(ZHv#?)Jra`iL|;To)LjZKHL zMRW0!-T8%OVXyKMZV9QN5K&A&&Qs3ak)vjgNS>=MV=A$Fi#a1B65MHdnvcnxewz1W zy!d2ca0Zi)#_E}VJQlJm5Gm4B=?i_&F|2)LGv1wB+|f&Kozhfcu2b+}J1F^%w>BGk zG`o0T!ag(glAv%iCIuM_FP8^waH99=W>b`spJ2;#@TFQ|()@JXH#nQEOl+^7wb?fF z4Kk@*j7V+qm(8jHjTmLLCc$(meRaLS;~$1+$U5IEr|W0RUj==my2Qdxm`wY=oeaGD zsJGMgzDKX_@L-KxTj{_~lpX?0>R3Y&utgc>5e7%Dto!l|>3gW+PbZjT_@?>v~ z9REc#a`BDr3EpY##}#DTg#>0NMV%&VJ8YJ^B(WN}{1Lf=RC*