From bcce0838dde7c42c326972040f13fb9f72ae8aa5 Mon Sep 17 00:00:00 2001 From: "florian on nixos (Florian Brandes)" Date: Wed, 23 Mar 2022 22:49:29 +0100 Subject: [PATCH] working split pages Signed-off-by: florian on nixos (Florian Brandes) --- src/documents/tasks.py | 85 +++++++++++------- .../tests/samples/patch-code-t-middle.pdf | Bin 0 -> 35388 bytes src/documents/tests/test_tasks.py | 36 +++++++- 3 files changed, 87 insertions(+), 34 deletions(-) create mode 100644 src/documents/tests/samples/patch-code-t-middle.pdf diff --git a/src/documents/tasks.py b/src/documents/tasks.py index dc646ddfc..1dd41b740 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -69,69 +69,92 @@ def train_classifier(): logger.warning("Classifier error: " + str(e)) - -def barcode_reader(page) -> list: +def barcode_reader(image) -> list: """ - Read any barcodes contained in page + Read any barcodes contained in image Returns a list containing all found barcodes """ - barcodes = [ ] + barcodes = [] # Decode the barcode image - detected_barcodes = pyzbar.decode(page) + detected_barcodes = pyzbar.decode(image) if not detected_barcodes: logger.debug(f"No barcode detected") else: # Traverse through all the detected barcodes in image for barcode in detected_barcodes: - if barcode.data!="": + if barcode.data != "": barcodes = barcodes + [str(barcode.data)] - logger.debug(f"Barcode of type {str(barcode.type)} found: {str(barcode.data)}") + logger.debug( + f"Barcode of type {str(barcode.type)} found: {str(barcode.data)}" + ) return barcodes -def scan_file_for_seperating_barcodes(filepath) -> list: + +def scan_file_for_seperating_barcodes(filepath: str) -> list: """ Scan the provided file for page seperating barcodes Returns a list of pagenumbers, which seperate the file """ - seperator_page_numbers = [ ] + seperator_page_numbers = [] # use a temporary directory in case the file os too big to handle in memory with tempfile.TemporaryDirectory() as path: pages_from_path = convert_from_path(filepath, output_folder=path) for current_page_number, page in enumerate(pages_from_path): current_barcodes = barcode_reader(page) - if current_barcodes.isin("PATCHT"): - seperator_page_numbers = seperator_page_numbers + current_page_number + if "b'PATCHT'" in current_barcodes: + seperator_page_numbers = seperator_page_numbers + [current_page_number] return seperator_page_numbers -def seperate_pages(filepath, pages_to_split_on: list): + +def seperate_pages(filepath: str, pages_to_split_on: list) -> list: """ Seperate the provided file on the pages_to_split_on. The pages which are defined by page_numbers will be removed. + Returns a list of (temporary) filepaths to consume. + These will need to be deleted later. """ - pages_to_split_on = scan_file_for_seperating_barcodes(filepath) + os.makedirs(settings.SCRATCH_DIR, exist_ok=True) + tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) fname = os.path.splitext(os.path.basename(filepath))[0] pdf = Pdf.open(filepath) + document_paths = [] + logger.debug(f"Temp dir is {str(tempdir)}") # TODO: Get the directory of the file and save the other files there # TODO: Return list of new paths of the new files + if len(pages_to_split_on) <= 0: + logger.warning(f"No pages to split on!") + else: + # go from the first page to the first separator page + dst = Pdf.new() + for n, page in enumerate(pdf.pages): + if n < pages_to_split_on[0]: + dst.pages.append(page) + output_filename = "{}_document_0.pdf".format(fname) + savepath = os.path.join(tempdir, output_filename) + with open(savepath, "wb") as out: + dst.save(out) + document_paths = [savepath] + for count, page_number in enumerate(pages_to_split_on): - # First element, so iterate from zero to the first seperator page - if count == 0: - dst = Pdf.new() - for page in range(0, page_number): - dst.pages.append(page) - output_filename = '{}_page_{}.pdf'.format( - fname, str(count)) - with open(output_filename, 'wb') as out: - dst.save(out) - else: - dst = Pdf.new() - for page in range(pages_to_split_on[count-1], page_number): - dst.pages.append(page) - output_filename = '{}_page_{}.pdf'.format( - fname, page+1) - with open(output_filename, 'wb') as out: - dst.save(out) + logger.debug(f"Count: {str(count)} page_number: {str(page_number)}") + dst = Pdf.new() + try: + next_page = pages_to_split_on[count + 1] + except IndexError: + next_page = len(pdf.pages) + # skip the first page_number. This contains the barcode page + for page in range(page_number + 1, next_page): + logger.debug(f"page_number: {str(page_number)} next_page: {str(next_page)}") + dst.pages.append(pdf.pages[page]) + output_filename = "{}_document_{}.pdf".format(fname, str(count + 1)) + logger.debug(f"pdf no:{str(count)} has {str(len(dst.pages))} pages") + savepath = os.path.join(tempdir, output_filename) + with open(savepath, "wb") as out: + dst.save(out) + document_paths = document_paths + [savepath] + logger.debug(f"Temp files are {str(document_paths)}") + return document_paths def consume_file( @@ -146,7 +169,7 @@ def consume_file( # check for seperators in current document seperator_page_numbers = scan_file_for_seperating_barcodes(path) - if seperator_page_numbers != [ ]: + if seperator_page_numbers != []: logger.debug(f"Pages with seperators found: {str(seperator_page_numbers)}") document = Consumer().try_consume_file( diff --git a/src/documents/tests/samples/patch-code-t-middle.pdf b/src/documents/tests/samples/patch-code-t-middle.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2ccb76947e4e47dfc2e482326391cdac74f177e2 GIT binary patch literal 35388 zcmeFYcU)83wkW(e8#I80s5A?SASg9dDIy(0RTLDdk=_MqHXssF=@A8KDhOCXBG^%h zAOZ>~MT($;Qba-NNJ+BZSV6aV?m6e(_rCl6-altz)|!2;Ir&PgWR1%yaVD|;gWR>W05JPP*yCU} z6qW#3be?|#S@a`*exwnM!f*aDASl4q%00xM02?P*yB`es4UTB1zeg$HRoP<}?j5c9 z*@2B=bZ&xGs7nZnZfp2TcCDUsusgd&PZv$n=H4#uL2Rs8xd(ZB5RANoftJ&m6h0K^o_Qmzn^(@47Y`@8G?1du6^FF0dDSi1(Z~Or$w~Sf7#lvIoMNfbM|*9{Hs0r zIeP}+pPz5;zRxqn3l9Kf6A26f zQYRSs0Y@0Pqx&b!_fUW?YKrn&TEQW}H2zC@4xZ~%KNPa2?|fH)QHi_#-3_>swc<^i zDjxXR-mT`|FSp&#EX-x&SsGqAM%=zc+{H}q^hyqQg}xWT#@xh!z5VDszU`csF`=isb;(E&R!n2*{wRuI86!^t^iru7)m&vRs`MB%_b)(+s zwtlk0ctYP4jSd!4qQ7CdE2SS~E8 zjaMS=oze|jJL9dc{ouIA*n_l_Zw-@@$=cQNC(Tb6$e+cXvAGz<;2??{Y5lSE!CB2? zyR~&Xl3po2OYL?|h;mhb-S+(PTiufLN7f`ilsp+6ba+fzo#SC+5YM<~O8voR&xH2| zMOB{K+-DwRZkauvX>Kzv|Cp^%Q1Co#EVLxmyw^)}+4s@o?esr`aIF(2M!_EGGv^uZ zZ|OmAGdi&kQWJXC{ZV&*OJx9)!o#EyB2TS)>Qk1vDqJMZSG4pv^+DCMxCW8 zy=oA6D1g*07RbHs+_H1hqlH#hM|^+dy2)#ac=De6KI^Z(zgC8OIQhrEwd(|z&pbNz z7c=7Vg$w7mls>#0FgO(Wu%pxNo~f+;=75g-7Y{Ly54LW8y=8Fior2v(SIeH1z4cH@ zT{BU7`eSYdhehNCyJx{VK3u{KxeS`Xl5a=fxi1gV@Zn06eEaQc=$m(uwit|JnYHxL z`SS^G%WEj4Q#Rewr}U?4SC}T9-za%W{8HhDf#Vi#D?^@V}kuu)V8}6|)rlH}v zuJhfnef|AChZR548W(*ne;Ma;e66Uh2HuKwql%smj3tExmmX`e;ZUf^T1MXq2H#|MwhLvya z$>v^5Iww-hdw8y{aQBv#2b>?oR>w;u5wyY@WF7`gJwHqq+B}`|`dx%$knQj@r;LIy z-VcB593jQOyZhcx$<1ApR4K7XKxWzQc8@%FwSx|#Vlg$%kKZfHox+YkH3({*c6+&9 zJy>|#Gkl-9r9Edvz{|lz%Qw*j!bVW~QasIiz%H(Zx5P zT}wEO?#K#Z-6nZtR`~Qub-%St72c|Es%1*wmep9!qkpZzkErxIsrK8LG%|I^J_VDJ}1;{@olkqoqq_eLPPb?_8y*td{M6KEc4(J6E4@ z@ay`AuS5jgp7&gO&^UHeO8VJe6`R8TX$;hlMUccp7gUgF=tAqFL?^|}lv+?D4 zd#9qGvytoYm+XsGU*mKs$pL4$*ExGXF0Y&7D9!nL|2xlNhbBMWS9PW<9P2(TeH{~F zA6WH%_+4fGe(eVx;o_6KOiOcob6R%9+uIKou3I+o(ljuKTr+o7C)jXAk?SEiVI{x0L@eDCIjsv=uGGYS6d;)&LH?*}d~HF;Ae1i4YJ=AZysqNeNK=ubq5$|gXe2tYC7B%*+eIIenYt!);9>>}(1MH7n zEH86d9CoaJFUfYX%HFGmM-vJ0o^mSpt|i^hlBGY&{m^&Eqs6h0@TBUvr&A4K){S~d zcB|!|lGSHAaU%yP?!5}$+L8pCl$@mGRjHeM(qu=c&n5>MNEK*m`MVnmS6^#6p_Oe% ze$8{GP~Ma4F@FqyCRW|=_dqud!_wmMgj7nOVZ0)ZY*sf5!Nn z)gR&q>PijskJh}f*S3Q9 zNKqO2duW>s=l6F@5*fq~r7c-y`+V}lf?aP!M#`PI%?+5yfgxg*JCSf?; z*`#giYEV$GBqWs>u1lwmr<15+;Qi4RCeSyyQ0<#-kKK2Xv#ldrpe%3s^or3qP-^@?XNUuy68P_0d& zzuif$?pG2Ksl8WI^X*PrPoF=2Jx^69@k_n3TtrD!|8w45AxoFZl+`}2(bnIM&3^4x zOCaq`5Gy`xQJhFCynQgd-*hXfceHQ3>az+T=`WT_x9wjQs}FiDT|$expmHiUZSN7x z;10?1_xWq0|MU=Zc(A)rIWEyR>zrkEaiFyHkhD#E+NWJ9Youj0zkeK}zU`tNP!4e% zef_SiK|HZbTQiatT;E|Z+?{m(-ts%`y20Zg+%F9~v_&GR7PWp~^S;sExkQ9DefAD` z{~*7($X}4Dax6rytd{gr+obOPLvvHvMHZoVOdEU(tTjdE(gfv(Co`m$oVe7kaIEEt z_n~7^UOb62Qgy`?>x1D2+K*S9u%X;u!FPXE*3oAhCBot_)xQiGHBriXKEb2B$yRFo z-ik9S%KlLTdpd7ZZq(ix#M3#YUOcM`6|CU>{#N78P0O?C5#-LsfOTKmMTSUatn8x> z2YNOHl;UhH2jjuCE2FK;aoF@8V=1ToNx;7JvrWR%1Q)gdS$K1P*`CoKHJ~aUuC4B z@3R(UgFkN!zsvD=mPrw%H*-1nULj9kFVL5z&%LXfUCNvqtX7GHGQr@g9+0t{#G}w!1{<5yZ zc)5uE8vZ_k9QSIsK+dIVp_GdpfxUhf(mHo$GDkxXylx{0v<=c{>$ziO-ts4v`=>oA zxTJL8h-rMQ{os49Ng_@rVX{1VaoLffeVKz{b(8Df5^4jh&z(K|VwYk5byHfCeWCkd zDs9R4zv|L^dye)E7Fb3V3{CDgdb?;#&;g^0&E?)kCVxd5+3cpD?tOEfpcLh)s&&>W z^n#~h;~oYO$~=Tw{b(_ek5jHiih z7GEDu&3YeSv9;Wo(Iinw+2LO)H8A3PwQ)`D z%$O4{pt9y(uZQBRAmfckCr+Bnn+uXQGv2+acc@Fev-*>j{7s8S9aY2ij~C6HU%Gsm ztiyB{?S05ZMJDTY#Yu$mv{NFJuxIHoGMW(jp=a(q?}xNVzr5m1h1N5SV?u+fmz;On z74GCu5wOmD?Ovt0)-Z3|@#-cmQt|t%vQMU^lS+JExyN;dOI^W5tH^h%s-M}GDVmPI zH}oLeJ2yNx)#fEJGxDziYbmAD(?wNM8pRFa6TU0E{@CADk*&OrdZ&)`r$B?2SKs?< zLE(qDr{*1TS4s(XoxYT-{!vw(#xvLW$8xjyMgH%bE^RLHNqy?vJm%u6f4;gc$KKuE zr$NJKtw{DR8>=0uX*QOY?>|X5&?;S*k=}jknLJ|HE?%L5mE$Ylq;~;>iygU(A;gXZ zW6Pu~&Wk3*{2_wvFudv+)F*n?0Q1U|6NgO;Y(2K5X|^;wI1F7i(XojNA4{@6OnIvG~uu8*iJ= z7E*>3Zba8=AMvr@|GxOf;GfS`?)X&)e;uC_Toj;HTG{7wO@6JN*H$&Pfydt(DvBcJ zauU9``FZ&ic{@($1)Un#x+*EN`$lJ`?p#FeOKbP#ntn;=)Rxez)$L6C^9xLDGp-pI z)T{OpZpl15^KGZ2u&B|QY8$V@O_yBruDFgn@;>s9x*~fNwP)R{+&8n;8e7&F+{%jbz;-KX(k5rkPqG z97W~B7PESf!#gSQ4k`SUaTcTCm2Xv-P?uvuXtR)e3ZWXkoEThi$hssks05MKJMDSI`zI< z^!eTuoRL+?xse3lD6h?T9p9+^DSkjwo;dbAon!onJd<1 z)g2xre#jt7W%}X%^BjVwR(S2|){CJKqvC6Jev2JKf?D>|Bm)&}YkgPUkCyqM+B~pp zf;PT+Q8T=Ui@RR0iHkBaFD0-rJ(Ps*#-ko{4Qy~ z_@vfQqLH4r?M3aBn1spWRaYc9VsAWev{V){#Vq>}9y>njc+udrXkzHn(50RkpK6Ux z>**MabCfr@;fskeiRagxJcUUlp1x+u5u1Efeoq@|<3WyVmdfIsxw_8yf_A0N0>Vob zK6-6R=E(_^;jc0#WSJVAF0a`%dA6`t;*?b)%i1fzxE*VJN$>fd+bX9FNJ4LPHmo*l z<(aEUZoGMuaB3ChqMMs$TwOA=;(-Q7&|m3h%Bwr$CBC2VQTSGDut_+{p!)a*3)@D4 zogZ}KB(@5hii_q&#l|w*OP;IwB<`h78SZor(G)i2*`hxeUDob~pFLI@vT0A2Lg6*O zjD(FYM{E<4dc8zf#pT}8k(XbQ5tH9wr*;aH;PULIUvlGy0*ruO%;MZ-ZPua!%boKD zLqoFpEB|P2F|U%C%Jr05xAZvWwb+Trs>)HO0|!L~I`zkecVSX}fm z$8a7?SKBSUSwqESF4}#YJB2qv!lfpDJLgU>cST#lZI?c-IrlPRZmNYVr8U9D^Gid? zcWztCsZFgG#jK_cv-d)Ix9%^bt~s`HRg~R&r3|&SI9?go4HryX z#l=>J7FXWCRsGJQ!z4PP=X}MS=?YO@(e8CRnnxQp#U`u0*}lSDD#1FcQ16RRxa#S{ zO-JwPXjn>}U~s<{G+tA|{W|y6AGg+s?bvyeZg4x|(jBAd*_2Hh8DX+|otETPvLSiz zF+ss!dGfP&h~BT?S-)TL_UF$;!(Rao~bD}nFugN@}F4_*M3=_;}8={h&{*mlu{>kXKTy~rumDf%HHBT>+DlZ zmWC%)i%h7Un9VCZBm8`aN7gojj-VwUZtiTZz4@M7K=;&V_aL*}Pp`td$36414ezDR zye%JCwO!Fk`lE?BEu?dC@O?E?1jjwjeAN#BDg6?GtPnqjpN!_oxNnePTA%B^Y za_g#{#7(bT6}&!@u=Pw`z^Vw0Oj$_Ut4Z0L8eDr?n$KS_*cbOaNi>3kJSAP=x1!+m zluB-4UWDJS6R8OsGaRFaD!*%O=2vLFchz5gaP-af{7)X{8CFrBtFTGMGK!%gH{Umy z>&fio;c9<@(@o^=-?Gj=-GX>@a&!KK#xup5*V0$rO!JbI;*Q;KcW9Ut+39l_`)%dB zY+nV(^0#mDw`S}el?e7D+TJerOZx0%b&dO(l|~QuUEJ5_Q=SR+_NBSH9`&PKDmd3W zCX!CBGP!tG{+0_DZU}wZRvek%W_38zD*Kwl(H7Oq1JB+RrR;Rca2ph>laSxGxx2h6 zwy`l=WAn)e1s>M6N};B8FX{HwDt=G?I!9crpUfE{*Lx+id2e>9rrwHFIzLdo^=j5v z!Ts)$Z%zB6$7LFJ1+y+>yeGV>7u}kOt+bN4u%WnJ{-KA--zx5z zmET*R6!7t6xMuXdo1eHXO{%+J3m^Qp1moGAx&6sFw>dd*`mb-ynU>a*{oCt;w+=6* ze{VPv&ODNHWVv$hZPx8iw~-LWL}T-z3Zs1W!<*c z?1^*dv?Q~wvpFM2mcI9DdBq((89e&n^Mg;Hw$<0a^1hs{#^C(kTz!7`yPno+^}*r` zu5D|3n9GlT`*ZNGbDx}$$8SG~!CRK|bBzQ&!R0OXCpC84*H$*^sz?;->8=atv!52xssekhdt4<#B>s-z zu+@b_%Qqh4xxLSkdZ+r$QyfWC{CKIKm;aIQxrwmItRr{s+;V-qtIhdqV3=mxs7LjY zb%&m=3&gR4wR^Qb^oxJ@xndt=@AvJ}`F`EJJqlibyvuQVy)@`(qQLVUt9w^_LQ0+A zsc1M6?J%nToGNQiKE3r9Q$@wGdR%a>)GhIoi}13V7UqQH8wPjI#Hn9C7by?tJn|p4tM~J= z1BE|h6F*~y?5GAi(t+Qss0IPZzZ({YAAf{E6x9_W*z}L#kA(pb$K=|5Pw{+)-^00T zL$EW+YRjLOq(~I^8se^ZE)U#$PP9eHxn!g(EyKkA8gD~J#*xKBr(J`63b~&b;CAuq zPF49VZ$3EA)5}#<6(^I{aW#i%Q(?_7Viv!O=^~;O>BIbed<-6XK&`Kt<8Dg?p1&Kz#vyi zwA9b3Db8-lDWKV!otw)&v+9WhJ~QyzX!N%QZ3Ihc>wrK zoUNewKEJ$0dQYa03OE@5<(MLu-`bSefS>rj#@a*Sr205V+JtNFl3LHz7kD2OxsW)b z#AG*}IX&pGv*_Mhr=hT%43%OthcsuAlk%&@O20U)EDW90S||1>W8B>RQO;V83!`GO zyLUPaUU?X05SM3>U(PMHvsBmPj4{v5^|(Eq#=3dY`o|Be*6*{M8xd?h*h(W$PAooK zLg@MDTmC!Bj91vac>&%1^MJ5^P(a|{;a?SX6**;PyqdD2oV@Zs27y%;#C3lJk9P)y zK(G!Xguh3T|6!WHW6lsi-m~4s$K5pqawh)r@PR)9glJIvSHRcA9|G}u5c2)kfn$Y* zk>ekTRr~i$WE|VH+*n3x$Iar#@1av6JLD~I-U<%d-`^+It9VDbMM*(HUS3}DPo6cK z_M{7a78bI%vs=C9*v#_>4zm{?EaH)yafotww>Ud4~jBx(De8_y-2; zLxZsjfDONZAge%USN9(=cw&(Aq50S}09HjAYSADn#NPdb0p4bKS@xJv|9!#p?K=Yw zz-~tsQ zzc4>$o(q+7XCxKH8l06g^eSyd&xk+eW?bHtzhgV{GqJQ2_Kz(V@o3?Y&x!Li9^YJ+ zoGEr2Q$fL%j~EH&d&CU)`y$7>69~vmCL^7+nU1jfxN|7J$Y%Ng_4Q@bZi+8M@ew{| z;PRQ1o|S@<>bqCCY4{dOVXo{6Ol#rFHtIPUU2vV#`7cAIfwzGZtCSLQVg|xLij@o` z!IDwemd6R3^nA8m$H|}E7(x-dI3AN!o4Z9LpFdl}G2-CtZlW%iTZp6cCU;$ZNpWP> zF(RVfDGix5Av5&{gzzNfGnu6-dAc%+!i6v{$LfbCiWx-k#hr?ml$YDZGe%iuA%;OG zl#3Zis?EbB#e7Xk=hu_D6p`GZ8>q|mW*w*Cn{|SAM%Z;}24Rla#&v0s#db;(NeF2Y zaSP#d;L{C*Z&p9Nn8r?N0;*% z9Q`?dD(FM%$pGKgMNci|eBeR&ImN$wPyf5d`L@)(A>9A_UjJWu{7-(w|Kwc%*L(VJ ze8j(*n~{b?7()LY^qt)S6pEa+`tz(C&*Bqggk}GFwWs&kWgfTN$}#xk=D>zE{(i%p z%VD*}AO3>p)9n~-1Yu~SPv(_qs2O1{hUe{(7{Nw(4>vC5LHpIjiX7m;>vTa@Z13j* ze89^mQ3%3XGeTJkRa4Ckx^yVw8Y~u+5VSD2-2`o6QP4FfCbHl;{8=8Vn7P<43o?Ry zXG%fcti>EJypVj9>OW7Mgy&!c83tyPAbVpH9lEISUUa86MGR;K>q%+_Z@* zJbTx0Lj`im*UW+u16>)h#W*O>qB~=!&uA~!RywuO8Rd4QPmyU5lN2ZoA$1EyQTBoGp z+5F)+JI`(rBB2W`K^K^oNAnURd!gZ@D83v6TPcaPF;)skL=+~D>57q3gK zTx({Yiy$8s4`#N_BFLl&#+nSPwlX6)#A<m||=CN)|}zhar%Nu1!Q^YV|)80C>; z80W=;13e#C?_YxN0ii15X+D!HWM#zTu=hb=yTkopV&7AvdrFO?<62(z!hucxat=F2 zYZG2k(7vZF@@FktR2{XO^O5aw7BP+zO(9uVkjs^_o6m#-e|;vh!83A@G74z9G)<@m zdJ8aPS57TAq$4b4Iy@2NjQh&Z7TeK!*3tn&W;PNNNn!br5oXcZeP}H*68?_051Bxa z`Yr@M0wd$j<^4^RC0_?+9U$3T--bN6A=UGX=6?>G*38=*6@}PFp-P@*R$VfN+Q$8ITPn%I=X=$^z5*Xbq zc_(cYrf>4l=TI|p!YUOdT$Rgek*^{t*yPUNfYeV?^i9 zh|c57^eH+g@7N&XO+z}oDws4>h%=HnxO@<24pxPCyUs%?Z`#0y3NGe~kmd$6k-mdKRjs|bm#0vEVj}hsIFlR`W*;E96?6^&xBmY z5>dw$V%N7ct~*9F{}Ok5I~Fshl4qwGi)mb$tf3j7yPhwm@tjfJL;1QoO0rxmwuP@z z%K(@*^|)nTgBY(=fhd+skFuVoQ?mXrNt)-17+*foxIjwrCQ9!`(mb5fJQs<8<(!x% z902dkCiqeojE}=4(Lvg$Kqb<@q0SapkJt4OQv=!^-ycmh?5zV+Q=~*|pQ;2|EDw9X zLac8*$h~nLwlVsU&>68jOer^;&>Qf#R)7hN9S_9cswoxw+#CKbKM1J0jvyDsQNwZ+ zN_?JXB!;1~G)Ob~`{ZNh*=F6LSS11CxG|q?yj;wzdk>ht1Jtzo`L6ImIuyw!l+IBS zP6YxM<3{-)AHyg8qXo#@HY7^On9ooH$k-W3evhsoTB0|Iannk^u=_#_cH2Wa7|r=O z%>wOjXN--eSY!BP33Hsh(31mm#SUG^M6s0kK((l_9aEtb#c@-ouoPbOfB1@rm@B|{ zHqM-mz(Ir1p=1z2o|CKS-x-m`1LVQg2ow22|4cEfo*kU+V$r*3v`J=l(`hQ{qc)T5 zAt+Vh%;k)2P^u&r(XSDv+gL~&bC)Y>lk{03NLBq`BMyShOwIiW4#BbN5rjI8fD}%u zrH$mo>(me z?|w0iycm8l3(IX*!3cCrWroo^fIr^@n8qIL+#(d~BQyBN8w7bd>12O=kq$qY8abyH zuwClAwM3v?1LZ@jlQkrauZgbF0@;j!v`2cA-LPH-(mXfyK>+OlZODKDEaI?t&^|;BYK{UxWh;9J-zqz_u0|WrozKj1&qT8BRqm))&!tKkm_jy zjxyARcu=d?=+Kc%XdP<@AP7@BVIAmx0L=KlwKRhez}ROpf^_&YDi8-g0yCHwsiiS0 z>IiL|yd4vC81>Nd2!{Vg>Jt!Qq=z<(noZ=*@w;gq^2r-#K&n44J}vVx2Ve#|9c&3I znt{#JFds#0##BT@ba%9f7;uXi@bhob_k450nG6z?(YVa8RaYRcge^z-0GDF4`Ef`; z99rSP8vm>@+Ib=1sz7d(bDcBd({&5y+fYYLj&_od;!3B2?vzguTX6`dDqMw)c2V7! zW+cc~eqf7o0V6x+=yEYBT&Qls2&A%g8_Lg*zVg zn$b{g$cihupC9EB-(ZJ{&vjh4f}2AH2>$OhPt{dnV6N~1H$S3gJr)CM?ho>4jOMe& zadLwsuCD^h2V5nHYM29VAEEZmIgCsd(MU{PDTbPr*c;+vUg;^}B2V$`Z5GP~>_4Bk zb3log0?mU@1{H{4uJBH*cmtA_v~i8xDSTWgAog~Y5TKH6pX8*19PhXdavU{n_a+eDnO_0DKP(z#aF*eVe5bK?Q3jb5W4;Xq2(9`Ct8|w3 zy$dOfKp6W6kwKASl(TSal*tH78wc4Qgo~vODDFYZBemb;@*=Ey9ExTzD|AQ*^5P2Q zSvvsRj3NX?<_m4GLQ=@bxN`GQ(1MHvM&TpKQvsrkit%`4Dif(AFv;Z{2-p?CiF1=} z@^no)2^r@_U_MrBvs3f%qGp{al{pgPuL~G|N9$mMc)Q)ED*@Z)FYqpK=9p@mvsqNZ z1%ioXE3GFM6VsPs4kkq3DV1IoFg)+lk?9y%`M;m)&cqqq!nSQaoj~a`(kt1n^6A#) zFcRT>!E-0y=bBSSZuT2%du{MzGrZ5?BbNwJIIYGw=`RE+lHGA+_G3nZj&On=VrhJ& z$9*iFF3Ovx#qZdrC;F zE%^#^*7KHd*xsF5&gqAyT*bPh4&gS#dJc6;!Z`DXP@^xSoAlJwYdUp~toX#S-l%R$Lcqu`v4I*g_Y!^t_5h9sI?}H_Z?fF?< zL%TLqb@ES~S>qNFH?>*nv`acIpWf+`B{ne^*;Q_oj5so${Jwec=u=abLJe-t~P z`OTJB_ZEUUg^>DWy~)T(S=(0tkn9e5Kll_#EH}YZ$t!9^jPi=_e>e>l4LWg=6!Dle zqkIbSP$R%Sl({S125e}3;c#k}#~a{)`rZh|Epl~k2Z05~{pu-PK>jB@n$@B3o>-?` zd+V#>(32~f-pOM_NkGAOolz2AiBfA6M?~^O>Q!TGWVxaiN^@mVf=$+`K$SXKyNQjK zP1cyQCLm*qTLk~IgT{Ok%?=wd5XKUpllfi`8Nra zLQk7)BOv6#G(a)H1v5O%s0Qoqx=CUjvqXEy=Ih+5XPb$gVnbH^P3Hc|Q<8Tj*}Xad+g=mT!!!*7f_zoEgCLa#MCbvD<;I<6bkTT5y8e(B zN%XUR zE5|ZIj>8vYadff%H$zp9>8q&e9wHb45+V7c7AmSvkmT#!$b>WiK3S|Xyow};;=!ZN zeSAshcCmgf40R@)iX9gg5gTN=5|Y8i*29($e@lQSCqPh?AaJ?P1@y&k-J5}%U}FPi zeL4soK65JT6av*Z&8b5>O~TMkG9x=kEK$L(>#T>k!C6+poCtktlU~o)e0oc$Q- zy&VA`#Q9@fpjQ~zWU$*c`h|nUyplXK5>9}28wTa_vGhw6ho=ffCmw_li$Tr$Z^->u zy$7l3wS66E*Eb1e&RO2Q;Z=E;Ob*gPz@ai1ggvN&Wq^~F-u%l=j&P!~w?r$7<1oCm zr$A;iDQ@Hjd`mMdxlTD#+sH#k5kxf(d-zlx3b+Os>u@JuRI=|F;ZGG$`AeLahR*?CAudz z#K(BUf%5{^t-A^fKt%DjSc=YE0SS13VKcdF)G@TJNhBpC8;suG@J>!%LYjA)ha#|> zck-JsSKzY$h9EpwGm2A4tGx|&!v*ht;k(XPMm$k<59M@UP9md<7&DVV#R5NmAfWhb za^wKDYrO@K9>GDl1L}KTZJUDH2ayVjS8BCslwgMrhzN5KlCQvz(BTWdwHjbpYqj0n z$>3^mW19ws`o7SM&9e+F8Ksyo)BxZlRhvjdcOV*Alur9Tw4c{W8`;I0LndP!k-4=X zT-zsOoKQaIql5AuogJkOMI0_FKw*qyE8}W}K^{ebKBf_8witG1bGI|H8uYg^h98(} z^d^gr4nWjdYnqUApk?VV$+M)F3RNYjfi#0b_qpSLduL*m=YoFxd&JegmZ}4Kk5jja&0|6i2 zDPMcL_STXfknR4LgXLa9jd?#2Y)g{Z=d`_1aMr~LIH!9EhY^9mC|p0rf!V2mnzgFw zMvB^>O0@hh?@v1v{>{k*R9_MS-=lk6IEyolb}1XMT5>(oK&nQObmB=0#sO2c)*KxEA_?RmR# z3RjK02^BpPK&PWOk!fVQ{V*pH>6z9gBVc@jTB=4^DdcK4VfR2!Mz8wZrO)IKU`@VN zg|c?Y&8}Cf$g8fIn!_XpGf5kyAQ*liVLxeq(l$eoPy5Kwt+qRfL5C?Z!yx%kldub@ zI~?0p1Vjs_Zv|5;`b4oJK<(kCS z#JkcikKg5d85y!V!ao#}He`hwFvu~NtSPC} zuKsZ_j~}&|1joYuu(Y$?H{q1#E782@6?m)pp_U;H5A3{E1X}%dmf`s25GB7iSz>ihG3p;uWQ6d?z-v}c&U>m8KVmpDfLeG@dE1`y!uUA7Ge-`x zR&eryjrfTAK~x(S`Jokuo^tZazv}BkSbi=)k2jTM1VO2TO=zetn*~t^h6nQe@Ms)g znF`EE8{{j~rcY@zKHODuy|5RM1&P`KGmj2wBjwcqebj`wgfKpVWI;xB=IH;?Tg1%TJ#Niv6l*3OjZ>NS?5bMO_xZ%w_2HzfI%oqa*kl`C`G2 z7`u5rZikAf7tofd z?*(BY5VHB|^wp{DwliRE!)-ggC$KL?TM~l!F7TLfhH=J49}-@j>lPbrEp}Zq)=qr9 zrc=JYA2h6Tm`_}$1eS}0uy#LrVQIasT);qdbBSEZ`NP~h&}h^)Y)D$lol}jwOD?*> zQ3FjA*md(6qz)V~NupZL*v5kuY>y4q2#bS|Q;3s!se+I}geEoJ2)y&ZJm?~5uxY+S zC5#$`K|!9$f4eTh>jt&!1PvmT`0%%4D&(U$z_8taZl9`$n7RVO6ooKUUQC4?0QK#@ zIvvU@m`w|XFnjScH8T&CYEX}SIvN7lm^{9Jf(V5uF3>w_;4yE<@1FP3Kun44iU&t1 zIOZDg;5zpy=RjTSKfLzkY_AXxTJtYm_%|uwjR$8D_-YSV700Up#9$Q`tdI*w3p zs~0;YMq}Q9X;^GeD>zyCWKky)`X%5&`VsrG2Y(SzMg^wyr~e*&OHo2>|3AA4bvyy# z_mQu|xqFs%5~0zm*Md*J@P`+FmNq&#J4j=q{zJ1t+*+{O89NEgh=U)s=a`{XQrjNp zZtX-eWRPJt7O4mo;4OFDRF}oe z;zib|vb525NH2yNMc{D)C!?AyKW@fxkyYwD{&oDgk-cR${9ULY7zZ9Ja7yMb5s0I1 zVxPmyDUCb&4uIo6(uQnE1h>@;QJTI9nQ?^qm`E3eiwVwrI{3Fe!I@8EVFV%$;%x>8 zSrKFk7dS2Zz+p*3?Mn;tmqQ2fcI@K-)Iwiq{XPitltqVl04d!NdXmutJ2F+u?Qdyv z<2tv>&8Pt1;i*jTXkPGmE%59kVMR-?bCV%hHit<Zn99J-)_bTdUwon%SU-_ zXp5}iqO+p{EptvSw*k5a`yq-JES9&?FxWs;-BAWyVL#K1PwX+z`NG(PMum(#JMm5R zSj<3dwC;`#1{YU1zRTr|$z7j!9iq(?&0@3Xt%i+j^!r3NTn)k;Pw^xCoQFZ%AMH?Q z6a?pmw-JH{;br?&lN)2jP_O7v1#m{s05#np1j4g8bCjJ1D!@WO<5p-4Ac?9&sTrss z2pNRG-Nz|#2s@5Cz#-U$`b*W&kd|Q&+7k~_N>zmI|3<@Tzh*22HhaV;vE$}FgayQU z3?)cTtIH5&N{~b86o@^?NMN3!*pUU51)P97SHW>OAE-h{qA`bP3TfWoigFp}T@3#e zb{lxai^|!Z!Sf1y$Yw%1>XYTEE#Tyt!(4GpGZqJ7rwzx*T~zvX+w|v9W@sBTf;`zX zp*6HZzl}QV7>ev_Ynw#kIZxqDY$n?r$xZ-5kGLHOgWOHWF~Z@j!yqHbEpj>hQBm0l zL0S(4!LcFD5R#Q7B4bpx7j^6*rWAau;8q2xTLfNpaHm2Z4M!C*@>084HZT#S)DPrz z5|U1u9ZYw^2zE#91*v=-HEWqbz2w2KzGxJRgjRF`O_Tbup|{&mZxeccV&3@6yNF4f zLS3tQyuKa2?!i-6#o?ZVrRmh;%Xj&thwvotD>-S*I}K(&6pI75+vZ5RggIt3kpz0H zZoR;cd`jNUd{e%dPi~@e;Na;iJ&v#pKwnH{7?GI}zFtUy4-=ANqxB6%MX|kyRt)5` zgG{T52D>0&n`nG~Mfz0(Z~i4uRkMr=>sL2wndDwvC1&6Vu_(vY;OSi=VAz24u(p+~ z)}l{fN>ya??WA6;Es(5`JFP3SC+87y^wrI$Bkkw$Eh7rfH59L9C%Y#cl0^4{@m)blbye)bs0ragAKZWau_WddJXSx06$#*^; z>zKA1Bft*c*nv29LMgg=Ugj1+7?x*VF>vyl&WCan&}eS!d=&Sm#(;h(?Sh!cTw#lB z$7rj9`2Yh%fY7TC;!O0mf(}F+CRTu^01_ZOg4XXLA&<%QHfCr~&n0RPX%C#(sY$7> ztttpO|EYT>FMUh~2jk$tVP-WgjGG`rM-BswQBp=tglUAO4FMq!I403YS-jQV4^?}n zk;Ke;glmGXD@Xz_3{=uVrq5&Mg*I4Ad+7+0LPjZSRXWO%gs9` zUGfluuj&pq5&SsRd746=mC`H%7J#?{17tmXAy|TvYaF72!sWJxLFlqmmgnwzx0Z42WvS*l}a9O zA%T>AIF$p8VJ{tZGb-%vPAVYRuM@ z%3L-^eQ~`O?y0Da;tdfd)DA_{7*K!Ggn_bwSk2gm#$Kbw5U&_1QLJU0L5RUF?B%_6 z9*qS`Mz^=0_PT0_X%r+TFBLS38Hl@|C{|Ml(*S(>D zdY$;Zoi7LI?4?kI{77ukoRuKTrs9wC2}a$zoWq4V4m%|qWWrz^D(>gwU5y9%Y^N*4x+yOqx|yI8 z1%XZX#soeY>g7X1)Lae1#`Hwg{J8)lewsHU~|r-8^Ba)pjs}FzR2Od<)BDC*SYwMKdeY2TMU!|D4~yNo1F&dWiPkMhV~?w_>jV?4 zhS_!nm<2!y*y8|t8V-lBc&m8on#ibhvf<}*S3U`j61WKkW+?g@5Rr5y|C1>f%>JM3 z|1${SYk}nNUoIlU?V8Mc2m&Yfv1BsJBA~OPwc!?z&%6o&QJwjieL@w*uBEeGyP`mx zfa?$NVhN*-J#1b)Jqm;Y2Q)fFec%WG+&%wc;k4$dD_VlzR$6_F4o z2y*S8tt>qpf*7ix?1`{U3cBF`I9%68rmW!HJ-eZ07-5i+84S?hsum*JWk!Mq1ebXg zF5g&Kfj94r4LG0R`yY^z37b{We?CpYl0ULFQE(MS@;VXjWTM-}AuO{h_%;xAuwVuJ zK>4R;BP=m+AB8I(Q(M)oUlsU@Q*G6U6yYaVmW-ZBesVJ5!v##Br21OS!Sxjzm8t?I zpQ;&r*d|<|=E-r6XPaljxg<_az7OOdEB=#Ljd>D%LG>&~S~7^XpyJz? zCFX{h!CxhCr;-|HOuxz%l@LL{k%q~bDwHFi^$9_YF$wInky>KX4 z(Dk?M%p&rF^6vx;65&^3uDZW)g9&Ho2HvvAnI%rl~Eek1!FO3ClY z{+A<>Dcr)gg$1^R{MBgqJH7mB_=nt>St@x!RnLMvQWH&F2(f=)4-lBY(r;Y;tNP#M zP;vb${X69R>IJ#ya=R9msS6A1aSKa^w!d#}VcYNdI}3}|3k%7ZB+`QNKggY!(gkh* zLtGH8yOcQq;$0w<#7VAJ>g2CQeI(aCx*Xskuc((!qg5_nH0&ztU1p@^gip;8L z0xB>-@v#9~G&6|9`c{x?1lcAcn^iMne6UpkMG?ecd;4FZNUtn2i!N@te{z@J?|%Qg z|K0un*Sq`u|KE>x{*U0ZV7&*U`ZKmo5?y_M$wTSll^7>16s=61l30WWA9T4paza*xZFtDSI*1 zgmFaYVD66@Wv0C*tgScdJ;*MM>h4WivJ$+xt|PDMelTbT{_|DC$HrmRpDbMl6=!5zY6mq0aYqpeZVTyv@=mVswThPtcl^aZ3o$Hcm!v|`*9s?kXdZey*akIxe8RF$ zkhG%W6{dBgD`==z93DTqfOSInS5jB3y~+FC*IPW^ymcYo<+lFu>3xTV(9-GN&&K+? zK!NSF0J@V*k-W3oe37~2Q%ZP&lOGr`Xed*`axJ6-Z41zPfW!p|l`>9-hqb7o(~zuZ zhs?BRM?oEE5UynC9B47apj`}rxFpcE0JCw2tZf7f-C)ve#@=@a%`i0!3(^9s(#Q&7 zFrCqwTpMWl)m|az+3%~q*bj*;%__mvPOHPzL2hsY*5t=*Hn(3-BPC*!-KOLBP5ZfM zMP1h%XJyFNHqd2@B6Cvhjn}TqyQho;t)rJ@5kKH7;i{RM$;)kvhN?=x75mSiwC!U~ zJM4rYJ#!015Awxc+nc9XVHzQFpF$`(ZA=@)*Zlgc%k#lN$;-LejiBDcb*$OTRVvWF zH4cp^=)VWPs>s2YENmPJHq(j&sv4OlUDBGM0$l_j94eMt4oGTzhslVTF>7v6sZ>eL zEzOdI=9XP8;||8SIIC`lRYW(mW~Dy)nS9cK z{i)_$j{r@>1M2IG2X-Yl6su?PTjVX7ou5>v```Ik^HH-6xvDs9wsP;)W?p%xaIGX{ z_Kf<@wEeYD9WOW_WRv?jm-}qV9$+_l|Z(hJz_2MR*q6Mk_>%9fh ziq$c2^x~scjw`1>tL&mJTcTYlFRWKpPi`!qqF~xZM?r&e1C6#z;xo&;oGdxlIT3z# z9XI`Lo<4q(RvG@NW-#UHEmyzZLs7K2bFITRWiQRZw~p+Kn#uNGZ&XcO^^EW{7z$||Rfs);e8CAq zy<@BptfvJ16;?=B3O!~9gX}=?C}hS!XUYgHr1Kif$A(~I{*KP1cli&oh2T*X(;K)D zj7ex2!i98BMhEwy(uLp=A-D11C!I-u0D5t~5hB+FH$j3gMDT?Oz7W9|BKSfCUx?rf z5qu$nFGTQ#2)+=(7b5sV1Yd~Y3lV%Ff-gkyg$TY7!51R;pUkD!SePGBJTALwBqEc88ECBPd1o60B z4j01Kj3z9AiX|xRjg1eYvp5?Y0Rc;#4dxo>MKFdC&JUSjdt4TX7UKN)SiBQwb7vd* z@nLtP^6@zsfygi~pC>T(bH_rGI6rp;oI^vKfXy}X6L45~KbXhC+P6VonCH&J?-%B? zvFl{$$7c)h`+)f{8-(<6eh3Fd@o_e=W1NkHurSD(fgiAetBtoI;A-P-JY-y1q#21R zDrThs%;{A;P literal 0 HcmV?d00001 diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index 94df0fc73..02c747e3e 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -93,13 +93,43 @@ class TestTasks(DirectoriesMixin, TestCase): def test_barcode_reader(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "patch-code-t.pbm" + os.path.dirname(__file__), "samples", "patch-code-t.pbm" ) img = Image.open(test_file) self.assertEqual(tasks.barcode_reader(img), ["b'PATCHT'"]) + def test_barcode_reader2(self): + test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png") + img = Image.open(test_file) + self.assertEqual(tasks.barcode_reader(img), []) + + def test_scan_file_for_seperating_barcodes(self): + test_file = os.path.join( + os.path.dirname(__file__), "samples", "patch-code-t.pdf" + ) + pages = tasks.scan_file_for_seperating_barcodes(test_file) + self.assertEqual(pages, [0]) + + def test_scan_file_for_seperating_barcodes2(self): + test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") + pages = tasks.scan_file_for_seperating_barcodes(test_file) + self.assertEqual(pages, []) + + def test_scan_file_for_seperating_barcodes3(self): + test_file = os.path.join( + os.path.dirname(__file__), "samples", "patch-code-t-middle.pdf" + ) + pages = tasks.scan_file_for_seperating_barcodes(test_file) + self.assertEqual(pages, [1]) + + def test_seperate_pages(self): + test_file = os.path.join( + os.path.dirname(__file__), "samples", "patch-code-t-middle.pdf" + ) + pages = tasks.seperate_pages(test_file, [1]) + + self.assertEqual(len(pages), 2) + @mock.patch("documents.tasks.sanity_checker.check_sanity") def test_sanity_check_success(self, m): m.return_value = SanityCheckMessages()