From 9ae847039b26b5c3b6f52be03d1614a63a6f21a8 Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Wed, 14 Sep 2022 14:00:37 -0700 Subject: [PATCH] Fixes the seperation of files by barcode, during the case where 2 barcodes appear back to back --- src/documents/barcodes.py | 74 ++++++++++-------- .../samples/barcodes/patch-code-t-double.pdf | Bin 0 -> 36146 bytes src/documents/tests/test_barcodes.py | 20 +++++ 3 files changed, 62 insertions(+), 32 deletions(-) create mode 100644 src/documents/tests/samples/barcodes/patch-code-t-double.pdf diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py index ccfae37cb..d8a73e277 100644 --- a/src/documents/barcodes.py +++ b/src/documents/barcodes.py @@ -8,6 +8,7 @@ from typing import List # for type hinting. Can be removed, if only Python >3.8 import magic from django.conf import settings from pdf2image import convert_from_path +from pikepdf import Page from pikepdf import Pdf from PIL import Image from PIL import ImageSequence @@ -122,47 +123,56 @@ def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]: Returns a list of (temporary) filepaths to consume. These will need to be deleted later. """ + + document_paths = [] + + if not pages_to_split_on: + logger.warning("No pages to split on!") + return document_paths + os.makedirs(settings.SCRATCH_DIR, exist_ok=True) tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) fname = os.path.splitext(os.path.basename(filepath))[0] pdf = Pdf.open(filepath) - document_paths = [] - logger.debug(f"Temp dir is {str(tempdir)}") - if not pages_to_split_on: - logger.warning("No pages to split on!") - else: - # go from the first page to the first separator page + + # A list of documents, ie a list of lists of pages + documents: List[List[Page]] = [] + # A single document, ie a list of pages + document: List[Page] = [] + + for idx, page in enumerate(pdf.pages): + # Keep building the new PDF as long as it is not a + # separator index + if idx not in pages_to_split_on: + document.append(page) + # Make sure to append the very last document to the documents + if idx == (len(pdf.pages) - 1): + documents.append(document) + document = [] + else: + # This is a split index, save the current PDF pages, and restart + # a new destination page listing + logger.debug(f"Starting new document at idx {idx}") + documents.append(document) + document = [] + + documents = [x for x in documents if len(x)] + + logger.debug(f"Split into {len(documents)} new documents") + + # Write the new documents out + for doc_idx, document in enumerate(documents): dst = Pdf.new() - for n, page in enumerate(pdf.pages): - if n < pages_to_split_on[0]: - dst.pages.append(page) - output_filename = f"{fname}_document_0.pdf" + dst.pages.extend(document) + + output_filename = f"{fname}_document_{doc_idx}.pdf" + + logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages") savepath = os.path.join(tempdir, output_filename) with open(savepath, "wb") as out: dst.save(out) - document_paths = [savepath] + document_paths.append(savepath) - # iterate through the rest of the document - for count, page_number in enumerate(pages_to_split_on): - logger.debug(f"Count: {str(count)} page_number: {str(page_number)}") - dst = Pdf.new() - try: - next_page = pages_to_split_on[count + 1] - except IndexError: - next_page = len(pdf.pages) - # skip the first page_number. This contains the barcode page - for page in range(page_number + 1, next_page): - logger.debug( - f"page_number: {str(page_number)} next_page: {str(next_page)}", - ) - dst.pages.append(pdf.pages[page]) - output_filename = f"{fname}_document_{str(count + 1)}.pdf" - logger.debug(f"pdf no:{str(count)} has {str(len(dst.pages))} pages") - savepath = os.path.join(tempdir, output_filename) - with open(savepath, "wb") as out: - dst.save(out) - document_paths.append(savepath) - logger.debug(f"Temp files are {str(document_paths)}") return document_paths diff --git a/src/documents/tests/samples/barcodes/patch-code-t-double.pdf b/src/documents/tests/samples/barcodes/patch-code-t-double.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b68d3cc7f70f5a6fee319c419589eee00a9ed294 GIT binary patch literal 36146 zcmeEu2Ut@}*X}tS&;S;q0%A)ff>J}3BGMsLML`h-Vt~*Qq=^dV5K)m9QIKLozzPz< zj!Fa(P*5o`|th!d!Oe%-)Q!pK6_@?de_=(HcQUI+)_nd zRZ~ok^EH<*rcP8N27CC685j`B4m2Nn7*PXqRujoiTSMqXvKcL$78vY9G&B^WZ}P$_ zV#VJZm<2~{3MXn2$+mu8VZ;qYO+iyF!BtyuT`IV`eeZ63grA*Z4SsgEDV)A3JdCI# zXhWui21kStvF9>#B$CbPk$#@^)f7`A*_s$08UfD+xlW<9O<^IlQ2Hj%t?-OQvOV3) zk7gPi1rMVJ|CXw25Owr4i0}XoSgr8yvs+CM3yui&gz4&zkLCz{dWMfz3WKa~^T!@| z!sM~3{mt|0jrU*{8XWR-WNkfdRV^)|u9k+Xnie+xU!Lr5%Ky>x2{tA)*wc|74wE9o z?!gKK!DE?Xy{=p7;ZOGr7c7%?kQz}PoAqjJDX`U89UM*zr(?7Eb$Z(WV0ze|{eL_? zj2QoBdg>Sl0KNZTA~zsN!EnEB$UmYtj1|5=v87*NI6ag|whV+lHK%(9djSvF(Kq>o z`x4ccF2$%cEIgD>3lfVutddeIwo^^Q;_FC)i?rRQz`RMOw}$_ea<%%yEloB5Nn>(7 zap3D*CvWvVdi_Ef1D?54YJKJ$6~9*GK^2N6sVq6Lzsq%X(Up1b-H|z2+Qqi68MFm^ z)aH;%Kf2B=j2JYWPr8xSZ%4n8Gf)3$FKO@E)vjHquE&}uyUE6f-Z(2XrBmE@m37auEF!^8!xeWu9Zx8$pS}<1pG238ZR`qc}Av^qq z{+;Mi;XwUAaRg(*A1(jMI_YXmSSG8zjnl1^6<3`vZulB86uwH$;rxZL(9Q2U6x%f} zX+73dS65S0)A(I%?vizxvp>kp-mrfCoVhzk?p|{pJ$h}DnCggYtm_TeqB3K*-+rI; zTc)9e_}@-Rz>nXb%+xPD%z++i79123ya@v%b-=dZz~E5F5Sk}!<~YVtLTOvEIJUt6 z3PEAx7#JKF5ws}`c3K<5TcQfIT^+my;965tPr&To>BG#-+R4e17WJJ(tmu9|z5sk! zJwTywnipU?QGFbx$t!36H9?c028IAyf5$f(EIcrj=z{=^}eT z?zSlRH!;CZW8?Qaax|{>!@nwDWZm z8I5FfGpS2Ch~FWa!wf)6Ii0LWCv9Fc1GpMDi1D2#5A5-&lvdDNJHtyqpimKaYF$Xi zW07N)ZF}Mh&I;51u+Z#$5i&4KGbty&Gx{y5q%#GEj6E|wLB_i6gUftj$<&5$Ch2&8 zd`fNZGW~qXWBP6}QKM@qCL&(pZnPzI6Eg)lH0l-;)8d|iMy(kD*C|S>nQB6 zjAe=-?#aDo(a9w97>R^^i3)0}YsC7Pv+PMYtV7lK&Xn3bTuS_>v`k4;qis8V$?BVTWN&4iJYNTt2gfTh%ZS)2gr~!3li&OS*V`*`MhJ0@uq*FW9OU%y zCd2h&PrP&k+`lwT6~pS)>|L-$h-lmjQPRc_oy6b=w_*|E&+TDOg{+~bdJ_&L3HA09 zQV2_ot+d8U_)Kiffq_GC{qjK#vUs`pR(l3QUwMjO3J#g)cVL-!U zREVA%gv%%-(derTxKiPVtaVPnRlp@5bg&y0evr%l7Ul1^$C_1OxdZ!y;W})KWpogI zPJJ{&=#B~`Mo+<|=OA2o47kJ{fGb}bV#C%taBZI}3K?i$fUP|@9;$Np784*nip@+A zK47)@%Q=DsLMy-Bc}7@-0lhBAA`p0naN))&eSuqJFbA$!Xbi@l3kw5>o{vWmPks!G zUEUG^*QQU{13nS#$%mmPhG+-bD@;?^Y8e&SO{W zo4W;x-7_HZLFfENL29QkROA`T2#sQh@?=N}u1#-t3F3m1*;H(RDcAtRYFJ#pXCoB6 zl_i$rU@9ehg`flvzkgehubR?so*TAs^W^!-mGf-vauIqnxhuPQ6rsTdI41^-`h*uF zM5+agmJ+tE_Y zb>p>=7T2p`l#bh$S2P=X`we~X%Y-xrsk*M}txbB!#Om(1&mXmatm9_Xl#f;>*vGrc zH-;ZNg-%wgEIkkb{Plq%Se~9JW-n~ZiD7az&|9#rAaiKC1qbo9IB-Si0Da~M`;}Ne ze`+T=`xuJf!Q}fh5U=RqCM*~AM8D*3LIVihZ$-$nF-X2SNVO56prGiRmk(a|TYBz9 ze>Bf8z;7vRZ&xaDb8j!Y&0lKdk>}Z#V#Hsn3sW1WMbRd$Eg(9)|EcHNID}Ye~V9(8ANXg}Egc)4DFr!bjv&*0l$%&pm%i zNo=-b2nU$zru^Fb48*@TlhrA!4>;u%4!Hk&*h`WXK@5N57!qo7+ZK&NdHs*OB)SmfAeqQi5pRz9asFFvX%F zzJiq7d-cUGlwr|85SH>#O(NlLq~jZL(ggL{kOjM1^8z0iE9_W+ll>qhELMAN5w5~S zQ5Yw6Fjh!}CXAEJu#zCI#Yv-1x%0-H15CeBVH(kL1{B0Wz8ldvmWpzWW7k8j$*d1T zYWr;Yk29#FP@V&|u*?p9)LjZ)N>FnwrTO=d2nmayPXGe!z*nEGW>a6pkUy4IpJQ5+ zd0l+xwCQ}KHO&JVC^-s!E?woTCpqK`_^AdiWf zpE_{H28Z^eF~n1r!2Xeuu(&NM7wR#-)MI?}Bxi^tEZ*0J{MhK3Zv~HyfjBosNW>q} zg!mPHEymXs{n(w0DnxiQ!kg-C(Sgb)YWbzy6_>N0(S)9__3hvFAWsIfQ!udmxuJMb>Qw>FlzG`8z3g9}_OF#N+z3^VS>e#Wl=K z)i+4YT__RXaM-f$x>{WwGgWmj{;@=Zp*b*Z`YwmOdXl(efh=Ccl(~>?T(WQ*RY~j= zNg|(ORiLPG9;5dnB{5+ovEvlj<(&9N0xaHvCGeztXrF*f;Q-pFK_+^)sLq9OpJ?Ju z(goV>-yBD=Xs-iRb3uvWCLMVODGz_Og4EF;N^h8tZ;0DE`v55qS1KwX^m^ij8K45= z`$LEqYD!5T+M{3QhXPgCkyT@bYWQxmlka9&l5i}(K6NC2lUn>Z+sxm(S5u18ZzbW9 zs2V>?Zv)l0la+CQyed49F;k%tqjQXe(}93Vq8K0K<0O>6l>m9WgvZXdlCaPRGNu8^ zuQQRxQk;5{D7)mNpe~f)S6o+x*1|V4?6Lamt?+S7C!B-|c}!RwY6{^_t;fc()+nW1 z(nNPQj4fdA;7R5=5t|2j4R zeb8XPlcHElTkpV0ai9j9DbjtKIs%dbL@`1w#+REfHeGCTnn4rlCT<7_c;???d`^4J zd2GYiPn(*@R`SL+h@OJ7q8o&Z6(Tp;LzlDtPG%Z&K5fTIJsZl7|8^@W&~)D+Ecyo zz6DBR=gk0sR)I9+!kqbTLgMOP4E4De#7Xaf5EH8#2;@vl{9Xgrp`lOC$K#JD%c=g>3~xReG91Z9rM`c z;jm*L7zjNJ;8q}42{JEC9M!V96?NohVew}J9B6gjVLMLpT>345FzR*ZNln&jcEq*p zXKJa7M*!9D7VlSnn*(bGG97dY78ZfYvvF@_Yx=Zh!%bE_USKY|z+6&tk(tla-82Ru zD7Rsn#S;^$gc1RcNWfl_*yHWcJ9ue@7uLi(4RN%I3s(iWQ7&@WQo_V5T4GThB{j}n zEmkCx1+r5ui8Ny?K}V(vALpU7IKxs}ApAfV6#+(e%P|q*F-0)hgp*1a$Tp0#F-evI zZ0aYZb`#{2VC7AOL^^@VOE95EG|pUv0R!$76WDi2Qc@?TRCFjYwVvNy2 z0D1ET6x(}x?(rWa34(fKUHm3$7s#q2+BS5tnNQP38C1R-^*3nG@&u- ze6g$CQazlj(VcKHnvH%#9ZZ*8~;Z=7Sh%S@SPH`;C4m^9=klw8e^^-io`k4wyTn?FNTNE-rirgxQAV(kGzK()2F?Olnc9mcU;c_AzlhJ4>lPL5J6oLADHnJke0f5uIfHwLIiB= z#n{=fl>+@FCmnG7*+sx{Otn$lX9Hkd0WkYGV;q<(V6&kW##i)40PpNiu)W_j8q_t) z4M1Pn{58xj9@9!MLq+}K2>RncX>xD+GO5WpR(p(!f=pn?U$UR^ z0NyS(o5;hoB@6rtXhK8HW6qN*L;#rhE=s1P`1p=AJ5V7y_G!0kgW`FagNEZ_EJD!fi$2reV?93%{Kh_C;alkm25xF~O!9p45#-=8$(samID7@OHtGm>@Bk=qqaTz73FT90pXN=w{_#hH|d*d?9j+e52xm{X?Qy6|Wc} zVeH1}hJFWdWeIz{V&wd>pn8G&#)rT>U zfw+tcaiDzE+r3-}L>1RpLTUOiVznXTcz!IVliETI1B{MPO@i>jVW4jh>46_YTuRB^vYCOe<=+~W{wU~ETgvD3jj ztL@g_QMY4!N<~8o$q9k1ZpR&W0@uE}qu#yaDPc(IbqhO)705!x?so#6QL&vP;WFhp zX0=HFjK|MQ^dKEcJ(BpAbR_$;i@3=Jgxtfa9V&hd)Kk{{2^L6Ym0BP?3Ix8F^uE*? zH47~Bim$%e4;l5w2|Ji_@fnu+Ov=^4-&5^by~F-K52B{I+g90+Si=|6nvS6k?>56T4RM`QU}scTj6(< zYZziQR~9SX=#&mrX?$ocrNN=m30Kw#WX$x6k(?G~B_ZGBx(Fu^B%OP_0&qh+-gVno zbaoVBh-mr+wY-%^{OUqe(2@3Z;tAM%QjCUSFcz0_9wQ)hUO*J+_Kpm!Q14;udm|qC zz0vJ$R5TonsTRy-UjY9Gfl@dbgUw{b=*oaChIPSx`NFLR?e45~assc!sN4C|*sMEC zDbGpWj*^XbL8(IumlYC%SV0NM<4}Gkbe>{yp*S=CE$QG!V_4gEYoBdweKJCyv@ao4 zX--VL<}9Dlrzg`A*5(4>6OIEBab@cuz&sUP(8RA$Y(JXww%mPvu-;Z6S_wE z1!D_1LrYBCKIL;BN4UpIH+NynK&Te*Y}*h4;{4lu-Deo@Qo*u?mGZ+z#zv{A|Las> z&}00${IFxtC+y&;H3KUr7rQV7n^p_E1fmX&GMA0@@d5o1sOGDwz- z7%SwX%`{E~5aM%LhyDfJf7l+y%B<~phE;u4mMG&n54Ip`^B}cl5EYwB6M_~t+LUV;!M46mL{AB@rHpETGK|6aM!?Ix zsN1M5IUmF4K{sa>j{%Ry4}*2GMf@FaIIw*PKs9X4cKrPaf#RQmcx33N57-UkG=5GB zANUQP?se>T$$SBCqLIb?JKH6D0-Y~cU#0-&dR$h5>vIu|y-q5%w48+gxh!}$!$34y z>dlr#z@6t3ATipo!F&w^jfw}rrU5Dsvmo>PuGJWnnZ8|6L`~<9nEE;w!suZ@9nJqAf?HoSv*HEC7fSf9xPTataV|3)gvYO|M%-bK`=v z@MEC#wnsk~7AI%;Wq4}Egp0In^A@$MKOOK?DV_m1|myJNN$6<}*j5D4RH#AWm%ppRih z6TmP{z}+-72jsUFP7;`^_dK72H9&g&xsB*BNLkJU#whgxV>f_sPb-`w&gE}qzwg)$ zoU)Z;O@^8ysZ93_~_?qwHIqIOlbpb4>}p9`VeyL-cdleq>?^lY>b6>U7Qpx(_1Ez z0tChs`PL59PIXMJ)ycHfFnV8N7<6)T#@6WDHGE2)k4$7xrgtB-f2Z+|hSld&?q96E z!2&b2gz{~8p|y7*02yJ__>^E_7Kdq~)VS}oBzj2zP!jTwQ-|G{K}3szbjhipiJr#l z=NKPW#lXsX{Wvl~)WWR92sBD^MH9jZGRV|8kq;UrW7ta2z+&KF4IiNH8P)*P0WN+Y zh-&R13yaU^6pvP_E@X5Eps_iK-Gw%bdvaRZ2KhXOXZ)@r&hti~x?XYjXVkW_<`X=| zx?R0&LMt>xbEg@N>y1N%YQlNQXd3`J?LCi%(QwN)VG3#+Hen!8K0z#1BYqmAT0q!s zP?OuPH+JF!;|*w&FLWSn6n?(-p$>Z3I#RQZ%H^dP0SbcR2MGJ#=)I9GLhm;*pjz$g z6fOt4#C-we!&JgnpzdgVYY`AFsFouPYTqyk?6;_o53;8NjSVT)V#cNqbkxVgh#M!} zl~D^FJs(m?@HivCJo&Pc$IS$fLx7Zr#&yT;Ku3(qL6yt|g5Lr9qa$HVvI%uNZkOy1 z&**l<6d1JAOoi9Ncaa$fU!RA!v;g_0Ne{tVEeW|?CAi?nwJI2P zJaDgUu*B_rC>F#({fc);9flVDC&M)ZpB^IB-Nxy?NmHIp+zD2*az@^5)TO8Uam>$?3#h)Q84m#ou)*D$P804E&?7l55uJ*8_74ZW-zQ5j7G7!S3LqKSl zEE@%=1H}W~-PW5RQKk(&vb!Y8j5tF^+&7msJ&$gL&4R$Hhn{=8jZk?ttUjhfoIu?7 zfGnuTm;=|2G3?bg6m}v|LcX!_z5(kA=;BboOUTb0Uz82A?x?R??W(ZbWiMv3_^B>K zt^YFgFN}+kXpq0QEmQ=A#B48!kKoAWllv$4=8H7gxkVS(o8E$XX}C}k3YcIq6OI&4h}$YN zC)aCloD=D+!QS<9{WZ_k?!N;Gs}<>=kS&iFp(6g;TlXBCR+LMb%PuWZEjhAHbQN|Q zwE`cWk#=cc!`hPLUht}cg#rBh@d&B|15ApnAr0RURUxq0uq^QeIC2Viw<}ejZ60I5 z%CrRQ{7(z|0_?D9yh0_k>Vih0KB+%Umtb{+-1*YxF`5#@3-J|du|lBOZa%z8$D5?5 zj(D+1CcJ7Xf8d;7Y&S z@4>bdJ6mA?7ZhQJCm{R|##wmp9_8;rYiTWXR30LAO@$pd~v=wnybS%!pb4s`@$^(ixU_bbyXw@BW{x%(_c!?mn8ITEOVA@ z#q7WYuvmdnGIxqp0&9ujJ-nRRu&QGV818pCGb|`zwi=-*ah9MFH|URtTA3m|Fy?c> zzU>3Xd^R5^6%$2t?&{*lFqk4>wCn(bB^A>z?HM1pMiJKwUI!o->O$$)F2GYh2hIbi znHEr!fxWP!p;FNx2kUcZMYYez1_yi@%Jz#B2aDGP%RY)MntE200f%K{xD>4TV)MmD zdu`6eCY-e2vr$pCSl>m>J1Rstf>VLVWA2YH!s?oDhEudKQr=>VFmqWwH)Sw|eGeo4 zdw2SbC;F~6sAuJ^ms@flkL%nUXR>OM`SCdoFLQkPFALQI2_Ls>@arf ztN;xGJGa8l08&`S%$m*$@@(_y7n_8ow&MFS12`OZVfIp8>`2R^4Xa56l+sxsuzzF6 zXg_-_1va}OA;0SU6~qT(-H8#Tu;WQMWlB=T=oE-O$5LvXp#&!j+7mbdGp>T+aQvVO zYl)pX#4)Mk_EwC`gfHVHhwv-FB3@K3s0@}@;6niuaxj~$SZ#r@*ck4VTZWY!fSnP% zMy_FThMR{!MDQY-c`=N^wgJQL8D`C_FK!WNO>^@gN)+Blw00hBc4N515_%`BLR`jc z4uKmD?>bxtLKhh2@Q;OIBOKCtBSa9aG$p;|t%6wR?WV zICdOW#LY`@>USz9gLkRk z)}Lv=tq`GP~HITZ6XaA2Ekb!0~7X>&ixDYtbFSr*=((_mUie~|02U}4}( zfRU@Cq<8Sq-%qiaH}unzFEH5~ZZb^FHA>}HZmJ{o#?fY%7#;ln8$bh|CeS*e@Sug;|o!lPch2HT)9Q^#**!itTZQg963-(O{ z$XiQErpPzx`yKR0MFYplcl9oW?n>F+X+@1bsfC1vr^JMpJ*d6{mDje(XR zp5y|2IDD1>B;LmwaEh{M&duAYRB{~-U-ev_Vq9xf7#oyg&&NhyzIl@Ecaq&=T((!=`sDQ? z&X9y^)A`sKUeWY`7GwBe15;*eN|jGqVYaw-dwM#iqxiuI6Q+k^p%17(Wx!m^oK(}d zsG;4mA>21!Q5NrzU>y$67DH1PWo^Z!)d!a$C967-`WUh`a4$~J5XoY7P`$n)jS&- z6EL_RKksUYl5iQWAiZWjfYZ$+<5)P@q&Flv(oz2SCCoj%S#?+sq(BiwA^b_Af zV!-mjL0FQ~IQas>a=I=XIx!+cc_=kfSSF6`Y3Hj0_)zFoeK9p1I2(S z!H(wynvXQd4ngLqqP`-04iv9Oh4)D8#BPCexDx|S631$e^Bq(LzC`J*FgY6a957V| zh?b*NP+#Mkux}H(c%vCqrU<+M@UeWjgDQxVI?+4=8ae1YnC8PCbBO@H?BctDGK7hx zC`<`@CkZd9@MOrJX-8Zys(syl8Fu8?jDobsaMGV4bM3ivCof+3$g`Y2-T=hkuSAbdolz`s_OV1_{5MR7XtgexPp_8+u z1YK(n7)szvC{RPO$AA+_8sqPDIidIeME{?y0I?QG{_4ph27FzUeFY(SbMH%KU@QVM zE6xbM;_)9BApq6cHwAB~viNx%foWG3h!ghu4cz!LSYexh7x(u9VZaL-2Tpz92mk3l zf9W`*<|};SLWIvVzJ+JP-@3$S1HKhe5f6ae=zA&Olz@ZI3{24|jQGX^7 z_rSyyFYTK1gu-8;wjG2%p<#CUs#;>2W! zs`$iYNrv#mWP{uidCZP*``mZj6N))P$#Bs zCnkQ0!xys?!ds&W3GU8cw!(TM-Sh9c7EGx6OLSxsV?z2@0tQ6*8F;gt_GMmS~7Dv20>OfRH~6Eq3wr=y{tUlx`OoZs62sv7XZUBE^RpJj?y9btm}X5(EF?@!S-AW< zwTWfF#xG4wR!>Z%;!>y+(tp97_|gew|0XVoGn&vRKjBx#n^5#?^e zVT&g$nf62SA&?h<679g}1CDD5SZ@O1E)afn?UkSs*n>3&Iua}aHt|#;cn_uP14?UP zk{&|94|6pn_F=0rjs&)XQjFJu!@2S1q2m#l+IL+7BrO9znveuR4wTckkb$+RIR_(l zXq#biz7cl)Px#LY{AUIJvjYEFf&Z+)|HBpF;|S=7V#z&}CNqB?#hhXxt1YW9d-`nN z<}hMIWT+~O4S&Yy8Q*qR7{7Ku)0l~Kv3$)zd`c~t1xxfvxy#h0y=6OZxV48Sr;LSk1qwIc}il9tUf_CzPprkaetpa>GGf2tt5os?rS4JW%t7^l?h0+^(H`Tm`(jZpUruY8#RO zncqBy+cx|%&S=$#h{2ITY*BmeJw!^n8#!pi5$zV|ua%T2XDk``rl=1S?F!g@33uE~ zaRgtZ4+@L!LUB?#v1*t4;m_URk3=Bwn{ebY_b1Kh(il$3dCZP@PL27=mw?7hgqsoK zUR02^XP~Dx(&$(932XXy0^c&#U!47|fPnap4%BZ6>?4H#E1_fEe4+o@vHk`1=3jn@ ze`jC++spfJdk^e=zu~n+i46Bw6rn z0o(&FF5sw?blG7=O(*dxk=dQX6z@(+WCKH};)vIQW5$3l2H{*1{93?e!j$rW_lkiOHpb1^O zQ;sL%BeQS#c-m1%vl(I!56ewCt)QjVt(;`pw%gG#xq8}h%ty!x<0PW*rXRjROz`z1 zl@$0x$;ucE2uk!-f3W?D=4Rk;-Xacy@Atr}asn(_*x!+07rvwUagLl&NBBwWM8OF@ zOce_?dGwPeV>m5G(p+WhwD5|lF=-KW`DoiiN{Y=-S7fa%yB87g`O$C2!hgr* z%tV)7-<`tz|LZhJ;85wihaF3S*C5P|6?k;|FTDl{+zb6^st&$&1Py<>6%shf`Cj3N zgPe8tcHiCO{BWo9!zYfW+C&#a21Zff+9v>9cX`l#{5CC@{c@cnOZ4+vF6*pquVx=& zM)&oz+7?Q8+_u8Wb6bF?o|mkVp_oCGepFCM5V-0gMg;|K3e%5TE=vRFYjk~xW7%X` zqM%B6z;am=tbn-A-hpTq97-o@sj8`XYQjvkv{W^;HT5*qm5GiKn}{pHNtl{CQA15_ zDN$WVUtLRIQ%}}Vj7TIJgnD`FuLieb<6}W;xvXz^c!)lkyk*N4)h(K;!J$56bv->j zvYG~2Lqi2>sDy3Z6i$m$*%U?^@8o+w=JYVnP`{9Hzu-+otY4Z(a76fWSy^nLA7Ol; za6j*#BW(&(70gi86C74xC1iC~HS&+Xp{W6ut1oa)B=ALKhgQ<wJ%3nP)Hyh^AhFY7ex;=RR7k)U-SRkZXM>(E-*qcD(o=?GUmPNTkDCXe{JdV zPg_nb{cFpg4cBb)3pdpGHrQXX|Jur)7C<+&poN78(KZongS}`0K)696v`t%;iQuw| z9^?T|&NS52zdyU6{$D%!=1FUORRmQ0gSZ6b>`34IYc57W7(E91VN3cg_On;4SdjZ` z!*4VCyQ7WQpFr>52NaAmA^*SP124}X{1y@s8Ytj4FHbT(kRF7&Lj*=p|CJHEJoP=n z&s%tK==UMVC;wm0+&nlq&@VhZkpACUfxjK?@6UWZ7jOjdi~Iw?An^kN!OPon*-#AN z{>Jm0ItJO_fz}d`c-#l%1SFp5yU&ysMi(GG+0+CJN_KugGy;}$q=))>gO5DF(6Deb zUs@=1OU9~x-;dx>qWU+Fk9I(Vf+{fhPw!_=;97Fr^`%oVkhQ?ep87WrkK_5{BMUGH z^LeSGqeEN@Zr-%j)QB1o*VNS`YG|tg{%Q)+*c~gwe&6F*8$o@jH_?rqVA|i8;s0!w z8sm;2$^ZH+HNH7Q{GXrYL_dEfVV;;LJY7A^7alF-o3lDSa4ZKd@W#tjG(enTZV!FJ zFkhqC`kRhB+Emeo#Z}Q%(?lS2?Yl*r)+%jSvOdz2NbosA(9@GFYq|oe#heo&5+D*5K;Gr>Eq&6HslBzrX zZb_Paak~ZK?DOd%8xPAqo=q$1Y0b#8-f%{|J}YbcKAJlDXL0P&Ukh@gyrjO_TGHP%})j*v*$c#(G~VzS$BwetmAD?wYWii`<;NED))}J zZ7o$-Z|Z#&`J;vCxN6O6Gs2ib#OG^si3^)c8j1reZX_KvDj(}MemBb}T2k9T)AiJ? zEkk0tc1<=1BiuyKk5W~XT0%cO_IaPDmDs4I@?=f>;iwB{?&@Dsn=7A7hMF8cRWtXA zSIvee4#(!d7jIv#*)hFNyN{$fXi$^3SyN{}9f< zfp8`|E&eBq4T48a7pUyJHosIuSLYY5?ax7&2f zc886z2bF9`b68i{`Lk{O2ol>Ft*26Rb{h5f(@6QsdS3>y{q+Rua>?{ z{IY_xBBnKL(b&d@n+mkBm3J)TR^lso@7F~~_Z>av+P84wSqknh!z zYG>7}^u~cW{CxL@`Ekmr^;M;6Ga^;GA5yqtm1Nw?lSLu@lbcC9gg5)iE}f%Wdhtoz z@Z5Ovu_>;2YF_B(xJ?!I!r^gbK%bQxJAq9%3Y zkV)vg5kEbnYrDMru4U|bVUd!`Fse@6W4FIR?I7WR^YK`&5Jk??@VBYg4jSxSYh?T^ z<)P-C^w*wAv7UO5n(y9xVN!Br``py)3VXsrxAkf130-dp73()hyC2o$ll02GsLIDk z^uSHr1>2h=P0d#2Z;z=L6x@yMizrFAYxgyn_O*9c3+MMx!jl1O%P{ZEkt1CC3r^^Z ztmpV^=}B$#f2%vPtTLF#6yveUJNC`G?O&EXD|$gjfNbe5*0riT(?`Uu7RtCDepoMc zT?*ArmdZU#d%Swk`!Tz$j`HfxT`zl`m^Wjuh^(;SQ`B7>hZ0-U0qL>K3di_?^40qqSIx!%3gSDr_UWI z-TyYXLdbr{(e-!2jQvGqxT;xfsVSfTd`X`kuJ13Bq447K>4>K#LrmT9$f6;40don<`G7r{yl}vcO3#T*NM^_MttN z+W6a*dzWYjN!cDBr8FZ%m?fv0yZm*Y2O3g7ExlIx>cP7F=f^yKLqb+O@saiSj9A!^nE&>%l{Jw=rff9Zq}nGR7^`<;xxStb$1K zH@~g!p(eh({3=k>i*7)zlwT*MJZ)`@cOG3g%2k#WU(WtN-S7}{`F9BL>RGdoisNYQ+hQv1122@UPG_cgv{ zy5&j!WZEqy+fO*F-%ofJJ>9n0Uzpo+-fCIE$?l8Q9iiI;E??JDcFu3Db(*Dp?S{QP zf0c!8WNAz33|CG5P&n?;AnzuN1Vtdc3UNjV-+2+ac_%fAd!FZuhWf zQ>Nt(+-(-&{0`=X7R#hJL^5}eV!AxX>-T4-98Nu z`dglB1kx-$zkED)yy{be2{SeLfarXh-_7#6A)(ToPglQ+ZF6l5G_=eO4{ZC@jP!EA%BZ6P(=?GmXzn%i^p)^2-qHM626kXv{) zjyOM!JyM~$gWwie8Go*vLEIT5e)(>h$RBr`?(G_KC~)-(J~1F=6KvhozhpUqGArrN zk4-JxSyRG=REM60Sc}>2pdD>EcJfHyo0A%OXy+eFYm+%T277(#Lt7RdQSM04ol2!W znlW`&&m8;MSn_LLnNV38>BV_g+U40MK5>qgy0o|rNg>Jj-N}EN$2NSeoBKwOG>?Ac zDt=qiMtYK#+tP}dBuP*I{rHbrzo}OoN?LZmb1g;UIwzkIZh^_BzmNb9v?>zR#VS(Qnb%}<`qmvri=e>$K@)GcKNv1 zkVm~(TUC}j{H{=a;5ngZ3zOci?q{SxW-F>HD4bS2zb-?iclcmxsJUW+fngBcLZ8lDoENok@x5O-W&;bux2Znq*WVvif^$))GYnMMN=0y2Uox+Yi zs=aS-#>VZqu2l-q80Ae`{-p^yZjUW7iUtP;DE(((7Y3~eIEvuzIFtV<@dfm=OWs-fwC7XKx0w;q7V;Rz_Uk0-j zr|dq_qQ3L-Ex)ZhV|~SvM-=OdnNCsB=0-PX>~>~eoFQ>_)}cS|ES8T)J6psf`YE4A+VMdrEMOI<{cu;RTtRT0t^;$L6rUpnt_Ff)enydil0 z$Cd@%)H43DKV7%9^{9NxOXeOxJ%gG$PI{4xZ(STad+x@^p<8kf&iH&1mrCEZ#p!Z( zOZJ_kdQQzVNFVmCl{%Q#p3xh9!uo_tvt@{23UVC#4%0>nC2R^7_i3)k2j*8+znia$X5BMFylYeL|O{J)K{& ztnGwNm@mcdL0yH_^aUH{N_I%)(5t;dgs198Fpmp`v;sF@Mm+{GEa zFB-4>LNcX1DC1Va3C%6rZ4#es=z1kGNFgXE4VI@)F5BL{DZ4AOZgBn!a&1WU;e*@m zt+B{IYr}5bP)Og#Vo&+{M_p!n+n*g>1rD(V-GiGgUrbsSy2Y|$X}O=J^&dMdo!4^q zw?92X){OPhF+AuVanwysEB&p_`eK=ohNj*>o*3K~yMF6t)QFpS^NpcKbLB73>yNG< ze$I>-9j)CHy?58jmu5X<93iJ?n}+M#hld*!CmW1aa?Z8CQa``rbLBij)efn*am(f1 z0vx3A+Gn)_b_H+x8_8S^?<=+48uz7ehuBx>LwC6H^kd~NuLU~z9JH`{-@J~ZkV6;pn`hkwkF*L94#^<%MPFmK?)z?IY|w8c-#t+Qe5@r}tg!k}u79@2Ga( zzQI_>V&Tn6BS)rApQhqE+{%6xK2VX(e^jvtac@7D&nB;%`UUkSMZ9SnJ0kujV@F_K zakl!C1KgdnyL3*_RLZar7_PUZ^f?aMnh`xtJ5mChLuuE{2$U2TB8+D z5#qGfp6loxSdlH8Nxag1?U-L~bZ)xy14{OeKRTThHB0vwRVnHh*GCTo%xwK_b8E#h zt@*4=b=2Rb>J5E6UY!Yz-nKG5Z#!KxEzEQHM6TXj9X+<#Si^7AZ4)O2y=pwMw8TIC zHm#}8!_(|Yb#u-J`Ud}cegAn2j;(QaT$P^T?BMX~y;429(sLU1<@>h5?G`O^75aEp ziSi|;M{$I`J(qFhy**+0GNpa7 zcX=1LHf#Cu;@>wezGyRA$m~`>7guYv-G9U8SH1Q~#Lsq+r9+$`1cC zYV+3nF4xuVy!pAlq9|r8C+TZ*ps#x_jNkR7nJWD_`KRpM%MB`wX<*Gk`tbJr#$=J#BT(}o>KXf zFe_ao8-MM#kb=0dirU85NO3Kf*Fxf%GIKeBuAjF|<#<2bKpeYU+Zd*#&)sq)`r-1g zN1e-u=A)8@F`qxppI5Q-3q6B6ZT&0i^TW%BwvFvc4`YWexZ&5U(z4RMDvWKdD7$s* zGmo;@uM$Tz8gmcpZD!xo3#r)Q?0BG7CE>H}9Q|c;&95BdF~prCw(L5y$$#CI59G>h z{szRjXoL=3VBB?!R$|-OBcY=5FZQFRH>0036J66Jnf-ij!NY*6*4eGHj}=YtUGrXj zn@sw{>4=fJKAv2CW%AbNy|4E6tcX6e>eQsx6)8&?)(o-83da|7xOx>g?@#aLtljGL zwZMMsp}rkiUyI(ZSve>Ds&3qo_8G!Es?gz{q<~o8rI+2F>i#abML~_ycQ;e0KT7I| z(?-dA^Bgv6E6BI5er=FTYIS@a9V)lgJXU-7`sO1-()(uku6b=5&!ogA)~x=#w;P2% z-pp1AQFo~gm~}Ny`HfCf=b8a_|MWg-1vLqiJ4$L48>dK-cX##I>=Y_+@OF)T?6vo! zyvPHR)Z;C)bdN=!)<1r0PKiy7U95P`>cK8C*P5Hh`2|AKse{?llIv_$6_%`i?Gfr? zQspe_rx&<LQ=UXnEKU7fjvJ-LCk^hZIwdc(_uSnE2d&vQxM^>q z_xAU?9XH=En;bDUVyaKp`&!HWrp8usLgn>d#9~T(@{zfF_TiE#`_DKC?M*$cwyv4F zI7;Y@gO;3dt_h7;(4x6iN@lA1Ti+$AVmTqol2umZLpJ97%WIYl9xSYt-{+Xjck&Ik zYQbBbFul9(qV_&>>g=b+i{{uq5gV&WZ8(3Pyl)osxR;kfLR~7a;+not=pUK3T63N! z%75MMul~8%e2GkodG)SE_AU)ltKS$W$S;?%k(14f-Mg39QgT<}3adc;C_>y&p)C%@*XvbD}`74(luP9 zSDbh|_wa+5v7yHzX-|?od_LBf^w&n6;q~roWC>r=&}&Hk#A1_qW*Jr2Ih0akgniR=6`p)@ja*kZ=8vQ%ET8-9HyaoAp9KO_kT$tk;A3@%G zSmHLbPVv(6_!&IAd$+axhNVA?Aol?DEKzH|OUf}_myQ{nptuTKUI_1sz)lIeM zUx`YY?E63uwatD1F!FW3Pu?+$D;Xm%$~$MR)NogNYc0nPzdvc+EcN`x_1(9vPPeM< z{G;%w$w_-3<+HviuOEq0KT;gR|FEIv)>XU9*_?GOc(grf`GLCNSur@}vha+DgDU6s zMbJNV2R~EMj z-}hrJSBUGDI{#4khVM#UkEQS5c9cKb?OeHg;=4oJy$^0z?8|@DUSrd;eZzCk0MFwO zu1HqAk5JInd+8ScqHC|&>Du~7S---~J2J|Di{vgr1i?N{%VUCm|B z4?dnhXWz#Uy??&&w&y>PyH%Z}KmE&^ZO0awUduQxl|3~>>Wui$gEl42_m<`=UtN9Y zU*but9w%}BtI_RNVbQ0LTyiv3TjS&?oBuTWhn(FpV0-PsXVY4_?ychQgXdN9t5^L`FI=E)?GOnxz#nlzm(O@uHL$JbMoHM>xpj*%51KGOgmmJTX|TvK>lB` z{S&)~&s4u>EsrjW&Uv?V?lZ^55r!EIcNfH6;Vo+D6+5{gc>9XSCA$*unp(xVM{$_v zvY5)WAKuLT*VHuj_-FCoyHa|eCrR?1xcc{t=2yOxmFy`-79|_Y#U4oSljf6ET6=oe zlVf%3-^4$AJ*k<)Lu7`V!~UFW9#OR?Q&zRb1_U>M5V7+-8^SA8u6{XW($mSaR8$JP zy^K^>EM&@*`4H(`>9L@y;nwcSEp{54)z-Rfo!n))Y)+j4u%B1Ya9XMm%WY#u$oDCZ z&U1{;bBxY&jLvh6&U1{;bBxY&jLvh6&U1{;bBxY&jLvh6&U1{;bBxY&jLvh6&U1{; zbBxY&jLvh6&U1{;bBxY&jLvfmp?QvqqSQ1l;C4F`F7Q@YFri>(YHDn%kfs0^18ph< z0fjt-n3)NNn4t-9;~SbfW6(}KG%-^X;FdZxF>^EEj0>8WC8l|XhUUP1VyNm2jf^qu zFf_JAx5LoV#0cFVhL&a+{xGyO2hIMV*Z~wX1@4Hx^e<4%96el&3{4C$#mtN` z#Vm}0`}t7qFfsye$wL=2F+lgP5pdHUaAyvxUQ0t0B=bs&5`jDUiWF>Y6!e4g^Gg(< zn-qgHt5SjSub}Vgq7ZFkVs4U{nwn^lW@KcTVriCamX?%gmSSdMm~3Wfm|~G+M_2`T Z8)k7yVo?b=aT%MM0#lc&s;j>n7XZ<#f7<{6 literal 0 HcmV?d00001 diff --git a/src/documents/tests/test_barcodes.py b/src/documents/tests/test_barcodes.py index e4e7566ad..3ffd5d753 100644 --- a/src/documents/tests/test_barcodes.py +++ b/src/documents/tests/test_barcodes.py @@ -287,6 +287,26 @@ class TestBarcode(DirectoriesMixin, TestCase): "patch-code-t-middle.pdf", ) pages = barcodes.separate_pages(test_file, [1]) + + self.assertEqual(len(pages), 2) + + def test_separate_pages_double_code(self): + """ + GIVEN: + - Input PDF with two patch code pages in a row + WHEN: + - The input file is split + THEN: + - Only two files are output + """ + test_file = os.path.join( + os.path.dirname(__file__), + "samples", + "barcodes", + "patch-code-t-double.pdf", + ) + pages = barcodes.separate_pages(test_file, [1, 2]) + self.assertEqual(len(pages), 2) def test_separate_pages_no_list(self):