diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py index e83a9b54d..ed084618a 100644 --- a/src/paperless_mail/parsers.py +++ b/src/paperless_mail/parsers.py @@ -21,6 +21,7 @@ class MailDocumentParser(DocumentParser): """ gotenberg_server = settings.PAPERLESS_TIKA_GOTENBERG_ENDPOINT + tika_server = settings.PAPERLESS_TIKA_ENDPOINT logging_name = "paperless.parsing.mail" _parsed = None @@ -133,13 +134,13 @@ class MailDocumentParser(DocumentParser): def tika_parse(self, html: str): self.log("info", "Sending content to Tika server") - tika_server = settings.PAPERLESS_TIKA_ENDPOINT try: - parsed = parser.from_buffer(html, tika_server) + parsed = parser.from_buffer(html, self.tika_server) except Exception as err: raise ParseError( - f"Could not parse content with tika server at " f"{tika_server}: {err}", + f"Could not parse content with tika server at " + f"{self.tika_server}: {err}", ) if parsed["content"]: return parsed["content"] @@ -246,7 +247,7 @@ class MailDocumentParser(DocumentParser): html = StringIO() - with open(html_file, "r") as html_template_handle: + with open(html_file) as html_template_handle: for line in html_template_handle.readlines(): for placeholder in placeholder_pattern.findall(line): line = re.sub( diff --git a/src/paperless_mail/tests/samples/html.eml b/src/paperless_mail/tests/samples/html.eml new file mode 100644 index 000000000..d6ee7c350 --- /dev/null +++ b/src/paperless_mail/tests/samples/html.eml @@ -0,0 +1,197 @@ +Return-Path: +Delivered-To: someoneelse@example.de +Received: from mail.example.de + by mail.example.de with LMTP id KDcHIQh8fmPHVQAAFx6lBw + for ; Sat, 15 Oct 2022 09:23:20 +0000 +Content-Type: multipart/alternative; + boundary="------------0UhSOOwwiiuLCrPveGIa7UzZ" +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=example.de; + s=2018; t=1665825800; + h=from:from:reply-to:subject:subject:date:date:message-id:message-id: + to:to:cc:mime-version:mime-version:content-type:content-type; + bh=/6OzHOWcwCHrfo1mlk+KcsiTCkt9lN5CEU2AETZBM/M=; + b=AM/Q8Xlmh5jmccjofuedENG9dk1K9ItOL7CBtRhQlTEkjJqb1e1WgrT86SZmU5K9WTVerX + b0GgndG9xavsCSsaKrZX9rIbozFVY1+pr80sl+sZB/UbUFlr2C4/CALwUBveC6H+HcAJUR + uRQycv5zuGm8XAXdo28oFWxCKcAsE0Vs+b8UNs5Qd0VJY9inquLKXHlvLYx+ivnkg/yPCZ + ZiOfv4+Ljfxh3oq6vjN0G7pHmANn1U3MmTLivgGLocl+PPxOCCzHeRp38gJQi3NC75JA/B + 4bSJxwjV0ghnq5z7RG/Yo8d9zlB8l7z31PwCNzbPy/bJVC2EFBvHdhVqow== +Message-ID: +Date: Sat, 15 Oct 2022 11:23:19 +0200 +MIME-Version: 1.0 +Content-Language: en-US +To: someone@example.de +From: Name +Subject: HTML Message +Authentication-Results: ORIGINATING; + auth=pass smtp.auth=someoneelse@example.de smtp.mailfrom=someone@example.de + +This is a multi-part message in MIME format. +--------------0UhSOOwwiiuLCrPveGIa7UzZ +Content-Type: text/plain; charset=UTF-8; format=flowed +Content-Transfer-Encoding: 7bit + +Some Text + +and an embedded image. + +--------------0UhSOOwwiiuLCrPveGIa7UzZ +Content-Type: multipart/related; + boundary="------------fyEsKoz3fdzPxAaSslESHcHz" + +--------------fyEsKoz3fdzPxAaSslESHcHz +Content-Type: text/html; charset=UTF-8 +Content-Transfer-Encoding: 7bit + + + + + + + +

Some Text

+

+

and an embedded image.
+

+ + +--------------fyEsKoz3fdzPxAaSslESHcHz +Content-Type: image/png; name="IntM6gnXFm00FEV5.png" +Content-Disposition: inline; filename="IntM6gnXFm00FEV5.png" +Content-Id: +Content-Transfer-Encoding: base64 + +iVBORw0KGgoAAAANSUhEUgAAAF0AAABdCAIAAABIE/2UAAAACXBIWXMAAA7EAAAOxAGVKw4b +AAAbQ0lEQVR4nO18eZxcVZX/+Z773qvq6iUbWSCsgsiu4BIEcQGRURAEHXWcGcdlXGB+I86o +g/7QQUHA+aGiAdFxHFTAARRlU0RBRFRAcRRkE4iShLBFkpBOd1fVe/ee7++Pe191JzRJE0Bi +Zk76U13dqXr93vede873fM+5BZLyv/YY02f6BDZR+19cJrfsmT6Bp9MohIDC+AMgQqEg/UAR +xB8E674V/4PiS7zQx0AwqW3u/iICEYLjTpHcAEIKgPR93bdu7vElXrBBSBJCCg0iYiKW/hOT +LZjN0F9IAiB7cSVaDCiMjiIgkByFmGRtbW649EABIHXkjaE1RWGICE1UIPr4oXVzw6UHSswn +8QnFAMRwIwRVjKQIImb/Q/wlPjEzVfXeC6RkmalzVBHVuJ5EhICIQDBZnt4c4m7yg9oAmJmI +mBkAoy1dvmRNNUqIRCdixCFFoEkX0+aAS1wHqK0XXMwMgm7VvfDSb3VDVxSAigkoTsQJYrB5 +bJKWzQMXxn8kY4YhIQBQ+VKdXvvza7/z/UtNQQqNgJgwQpNS9GQOs1ngQgqEwoohEThKVXa8 +lo/45V+46AuLV9/74xuvycRZMIJQo5CW3j4pf/mzx2U8MQscNF6kWaAz18hO+cKpdy65UxwX +L1kM1IQXlhhwSlCTHPbPHhcAwphwoSIgjBwNY4289b2ff/+Cyy7QlsuK/IHlD3bNuywTQUrQ +EE6ei0T+fHGJ+TglICFjciGM7LLKGsWNS276zFdOtz5YIaJYtepRA4MZhca6un78KvLPlb9M +5G8pPlBCYDt0WcjN99xy7Mf/cenK+/qm9XVDaUIoKKCQgNU5qKbAfzb8JS59Su0IFFIkPZKk +TCS10VSlstI13aiMffKMk+/9433N6QMdX4m6omgsWbpkrNPO8ywlr1QyCmVyn9k0cUlENAaC ++osQIHIT9IATqVdTtyyzhi5bvvSYD//DbUvubM7s74SuukKoAhkZGTNv0cmICdXk45zBpohL +nSqiWyQXMYLkhCuLcUJIGq1TdfIiW75y+Uf/7aM//eV1zFlZgLooKZhRVIIE9nLQ49Dcnm2K +uPRuJ0jCjDRIEAZIIE3EIoMjQmU+hOFypGjkP7vr568//q+uvPVHfXP7jSEXOC/iTUSoEJFc +FYAjMxG3Id1uU4y7URQwiUUdYmh0IqQoCVULBohAg1hp5WDf4CU/veS4U457NBsbmjWtqioR +MlABAQyJByMqLoyEuLbH0Xc3RVykV/sTESMhRKgiIqBRI6MNpSvyYN1P/sep37zywrZWzb5m +VfrobQB6eWqC7CAWZZkJQExaH22KuFDiNcGEEmOt0EhAKEYRQEsri6J564N3fPJzJ193889c +v+NAFoLH2oEjiv5rKXgkNMb19dmmiEtMPSY0Ee3xN+89SDWK0AHOXfzTS04589SlK5b1zerv +sPIMOUAjXK2wQMREVMwCoEZuINhOsGcSF46H2FjsMarzIkIQJqI9lQ2auXbVrsz6mv0r/YoT +Tzvxe9de0c18MbPZDl0qANBEVU2oUWegAUrSOReCJ8m0tjjxr0/qOM8kLhh/TN2KqBOYiFJs +Ys8LUlaVZciy/p/ddtPC80+/9sZr+6f3qZNSSqrAnBMVMQpRF4WoYSY5ffqMPMshEICgmrDu +kEzaJ3lm/aVuB1JiAEhn2cvTJhAxM4JaZA8+8vDCr5518VWXjbjVfbNapVRmXjLV4JRQgoCp +qYlo7S+iCvHeb731/L6+hnkCAJEoENJffuy5PaO4MN5ReFBIF6MJSaEJQVWvnh5NF4Q/+PUP +P/rpExY/fG9r5mAfmiSVTuEkEKIAidpZ6guNoKsoTDJozkyMVApJBZKbYJPQvdeqa2rdGRRI +yr6kCDQETwmCqtlo3vPwH/79/C9f9MPvdLNyYM600kpnDkxLIh02ZvXHXGBK955bzd1SBcFX +WUPZKzYpCtpkHvOnxiWeQjoRMlZwWcyfFKScKt3KN1t9XrrfvemK077wmdsX3dGc0UKuPhjU +xUtJEnY8LCE14xl3AApBB7XKD7UGVRDqmAZhlG02lXW0lr+QUKUZIDRCIm9hJ1SDrdZvl961 +8ILP/uAnV1bm++cOdqwUIJgJJVNEajbuIOPfJ/wOoDAEm94/bZ899yGo0DTFkNzNLAL8jPvL +Wv3ACBAQfCVGwjyD5FmujQt/fNkpnzt1SXtR/7RWJkXHOnQgxDlHMzGJBGf93Cz6Qqj87P7Z +L9j9BUKIquiEOlyAmvut894/BS6saWvU6evkk5xFRES1sqq0smg2lq5c9sWvf/nbV14aMhuY +PhBYCQOgqeUTTEmI1m2x+vpjdhaRqDbVvROBBB+222G7vqxJi0FdKFAhhCaqYozZ60+PS13r +pP5eTeKSmmjGThUCOK057bKfX/6ps/7tdw/c09iiZWrqTUWZQrOIGOtQndRL1MevBVtJ4ZdC +CEWhZVnt+uxdBlwrlMFBCRKkxHmG5L2PJTB/Yn8Z76PHhjHIEEo0tKr8Z88940vnfXEU7YG5 +Q6M2QkUfGhGKyE9Rt4rWaSr3/CX+MvpLzFAKsRDmDM0GaWaZcxG5GHQt6izPWN0YuazQ6CkO +VIj40ouYIDSK1k33/Phf/9+Jt//+rmyoxTzrBHNosQpBpBco41FiJotQTUg7AtIgJpZR1NQg +ldIUIfhpzf49tt2NInA0NQicZLGTnwDZOFweq6ROtIl9z8d7FwGN6ppmkcKqgGKVVK2ieeGP +zj/+08ePWac1c6BDH2KeDXCiomZMIalmOzUcE0piCAWa2iUWfwQFCkhgf6O1x3P2AACo0ZSq +qmKaBIaN1l/WYhxrX/bEF6zvXawnuiiAIIi3MkjVZvuz55xx5jlfzFvImwMd86aRvMchHsbU +AY4XmD35fnxwUOqLi4sVUfg0QB3Ud8Jzdn3OjJkz4h1yzkn8X01N/HUgfgK4TOov6/xm/f6S +bmfUUARmFVyAyr+c9OHzr754aKuZlVnHShcvuZ6gjDM8E7xxrUtYt/lDYcpRZNTjyBx5Z3T4 +uTvv1cpawXt1Lg5/RKgx6f2cIi4TL368UVOP3PTmBx7vvQkdYdSog7Ebyizn8pUPfui0j1x3 +6w1DW03vqhcycwhmGhupvXZ6T3HbgIpUZyzEcsIAKoVl6HetvXbeM5bRE3GYdO1PGRcmR0hA +KIQSR0seC5asXR/LBCfqDaRUvps387seuPt9H3vfbUvvKGa2Ar0GQ4wHoJrFBWNKAmpQwrAB +/lbXE9pjALECYhm2mT3/gH0OoJiq9q5FdcNq//pekY7em1UzETIuUQji1M1agUZSfToRzQiS +cyoimWL12PD//fQJNy29PZ830GaZCxolc0JpWRBHRsYFIUFB7IlsQGVjLR1EIhBzdZZrp9M9 +7FWHzhuaU1Zl4lATurcbj0sSeCgkzSyIXzmy6re3/1ZSdFxrkq0HQapWo7xcI9gtSx+qDjvH +nfqhG377i/7ZgyO+SxUEayCHUQkVUUIsBs9YYyMV2+u11FLqddwgFAllGGi0XnPga0Tg1EU/ +wgTbeFzGPaVk5Suv4b8X33z6BQuHuSbQKycmCpH6OymhLo4liFUWLFTircBJ55580U++M21W +SztlXylNy03QVYooRAM0KKggoKKZAWuVho+PSyLTdCwF3sQy5DJmu87feadtdgpGVbehY0wN +l3F9JGVK8eYrC5dfdfkNv7mhHTqaaWrycvz1iMw+gRWZOI3mEQaK/qtv/NG5F/3XjLkzu1VX +gUxUzAQwxFlaEOkLQNTfxgn+eq2nXRJmMIo6c9VoddjBr5meDQTvVcfHMyelHVPFZfxtEFC8 +WLPRt3TF0ht/84vhsZHLr/4u4EytDrB12oZEMh4viUJRGCyDu+ehe888+wsuzwKJzJnFSkfq +GEJJi4dITfn0THphfD1WMz8PJTIn6rth+y23ffmCl3WqTu5y1sVHL7JsZHwZfxtFVAga+K3L +vr34gfvQ1G9c9I37Rx6opOpVxnWgETC12euBAQYNlfqTzjjp1ntuzZq5kSEEOGf1yUbBIabS +3lfv2ZT8JVVNoIOZOMn9muo9b/n7PbfezXyFEGdkesn6SfjL+J80hhAIGbH2Tbf8d8gsG8zv +XPy7q268Ond5t+z44M3MzJjIVFxIkaRKYMiy/Cc3/+y6X/20mN5XiQVlarAiMfA0l4LJ59ym +YjWdZzDLXVaNls/acvvXv/qoQN8oGqIT+P6U7XH9hazHW0nNdNGSRYuWLEJDK3jX78779nlL +Hr0vi52HtdedUMRIEwAm7IbOOd88Zw1Hq8IHkACdBJJJeAbrcnnDp91bVfVXGoyJSxJw1IzK +sfC6Q47YophFI8TBaWwtTd5wfUK4pEulxJyTqfvd3b9bPTactbK2tRuDzVvuuHnh2QuLrKjK +SiZuYorMkwQQLBRa3HLXrb+65abGYNHVMoiZiAmpNQFMNGVq/pIWQx2TUpgeJ0q5Oj9Wzuwf +evUrXiUUB0doSK+UJ+SQjxt3U7YnBOLpb7njlsoqgi5z7W53aIvp3/nuxd/78RWNZsPoRWK9 +HEdUxgckDXbeReeOVKOSa7AQG8P1zg6wDiw9dW0D1pt1SoBEeKhQEZFABFoV3v32d++67XNC +8IDGSoipdf9U+EsPIFWMofPw8P0h76og90VmRQXrDvlPfOXk+9sPBvH0XgkxUlCJmFogTeX2 +h+685pZrbcgCrQjNzCOjOBMXM9ETOM8aGJG4sklHc0IHceZNzRqqYbi73x77vuP175SQZ5JJ +ECWyuFrjwn2SuNR5lzHjlFX3/gfvLxqF95WESLkNuS5btezE009aMbJSnVpitwIRHzyFCtx9 +z91ruiOSqxmVqgRItQmhBfJE/CVdGcREaDAqPUOWxx6s9KN1zNuO7pOm+B7qBDxoeIJ3Yf3+ +AhFRaFVWyx9erqrJY+MfpBX9xQWXXbDwnDOQZZ5lZIBqdHAWghL3LP79aLsNOMT6IMZd1KOl +mBBBZQr5IomTAjEgCCyIRwZPE3GrVw6/6dVvfOmeL/VVlasm/QrBGJhU/yftLz0+Ek/6oRUP +Rbmorsok3uIuyplbz/qvSy74zk8vdnnR9aUQsOi3Uor9fsm9LssMluqX1JdAVJ6Sj0zdXxI6 +EUSjBCIYg4p2R8v9n/+S97/9WOn4gjmgBAWpA1Dnu6covkAkhADghuuvX7FyRV4UKUhGwFQM +VqJyQ/kHTzzuwh9+M28UY2U3mEFYFHlJv/yRR5g7E6lHfZ4iS+UCBZKLSsfm9M0+9YOnzBma +o3SZZiKisSqX3hinbLAun2jr85dEnIWddjelJ60HMkQgWnkTl83bfv5qP/z5s89YumqpNlwQ +T9I5HWmPLXlgmeaZ1Z2JqdraPIVpLpP1hSbnomgmmQtORnns3/6f5265R6iYFQ1RiKZ5BYqK +OHL9w91TxqVX70TCkDdyVUUcuYg3QMSMuWusWTM6fdbM/V6+/+/uu+vDJx73yIqHVZOr+8pX +wTRTSx2aKUPTawelod1UTI7LD8xIBzgIQjt88L3//I7XvbXqVA4qEqOYpQ2vcEAGKKcQvjaM +i9QUVqEkd9zhWQP9A+1OR1Wjqh7jgxKNZvPW2297z9+9d8G+Cy770fe+dOGXszzrdjshmMud +OoQQIHW3cYpW+wssSWP1o8S2WIAZmMGtfvjRV+530NFvfI/zmklGBZUCM6FJTVpSmJyKYjEF +XOr/h4B77bFbf2tQJDd1Qb2pN1FlQ0NXmvLomjUXn3fBsW89eto2c8++6vxLb7rE9bluGAMr +xyqn5Ua1JzIpnJiwqDkx7Q0Mgsh85kx9s501QvePo39z0FtOOvoTmc8oJoVBCSjonKgDHKmo +uWQ87lOFCygki7wYHBgSi3WhReJhVDqIDzP6hy75/uVrxtpvOOzIPz704Jlnf2l1dw0yV/mS +PmQSIwKmvrxZl1yExhBrMdbBTA1gXrnVD6x+7UGHL/zY57eeNd9o6mDiI8mOyp3GtScTqdJT +6C8AaQPNwW223NoC03iEQEXEsatommsSHGhccu2V73rzW1+4216/vvWW0876rFOHQmfNmukr +n0JDGimcAk1JFRcJMUhQmpppsKR3qq7WN73qrz7+/hN85b33RZalida6/qD01k8iRk+wnN6g +vwjN2GBzzozZoRNg0NQwp5l5p85TqsoNFj/59fVzZsw46pWvNeC711xx6723DbWmz5u7Jb04 +giKmScbbYMJOW5ujBCc9MOE0U8qqP658yyF//cUPn7lFcws1NFwuAojWLSXUmWECV6rhfrK4 +TJCyxcwy5HvsvKeDc3BOtCauNWeFUOXRNSuvvuaHh73iNbNnzV4+uuIHN1zVb83B1oCE6Nsw +WEpLGzrB+mMmaDB1AOGCNiRHx9DmP77lmOOPOR5BG5L15UWETjVOtvSUnSdr69MxKVTVTDML +fsELXjRzYLpVIW3hp0CYBa1UO8qcbGX51y44d9ZWc56/+15jvn3Vz68p1XbcYcdQeo20B/Wk +JWpqsrakWMuME0cVzFdVDteUolrVHeLAmScsPOk9H29qAxRYzZsRY5JLLaSngkCuDxfU9VHw +Yaetdtxh22eVnTKr291qKIJ0M2nnLIxDLr9z2R9uvueOQ/Y/MGtldy9ZdN1tP9tj9z1bfX3m +TUCrpXBhr1SqOy2S4KhbGD01x5qNLHS77dWjBy448KunfeXwBYeFTmi6LFMwdrRq+p3G5aag +e248LmvJwgDJftc68rVHWmWMXUASwszgAa8QY6Z4tL3mljtv323H5wwMtoZHVv/qV7/cZaed +m1lfhARJGlICSULpQUOoKkmrHUYgqtrUorumM60xdNzRHzrrkwv33WVBOVIVzOOJOxenR3qV +ci+uPwUOs/5+QNyXLLm6KnSOfOlhe+/+/DVj3ZA7L95QebU+r31V1nYcbpRFQx9aumyrrbYb +zPty86MjK+b2z9lxm12rtmbIxLqMG2YIo0aaF7OUOvXew2mc3BV14tzo6Fjn4fCK3V75vbOu ++MBR7x+yweAtbzWsIalvWwvjIoDGoflxN39acOnhk5BR573NbEx/+5F/4ypRE8lQihdHNVET +UTWFiN1//zLAOZcVWfHwQw81WOz7wn19p1JKrnASlEFJpWn8fBqaU/G+cqpq0pCswRxj1nlk +9MW7L/jsJ0474+TTt91iK+9LFXXOCUit9ySlRYfxSbCNSDwbgUtqdkBEtVE0u1X70P0P3nv7 +3apHOy7LvaZQIXGcv/JZo1i8bOnw2KjmDR9k7uwtFe6Nr3vDlrPnhU4FEzGv4pVeEQReJACB +DArNoCjNr6nCqnLr1tx/OOpdZ5/472942RF9LFh5BydU1opwIm+1OE/W22KnSJCmYOubZ4hD +9BYnQokGGwNF/yeOPf5t//Lu0XqcxIkBDqLOudLK0sogQZAFL/NmbQVi1tDM5+6259U3XNls +9XVCGSTEcl2gACiaaSZd6451C8mfv/PeRxx8+MEHHLj1jPndbkfKsuUKJRgIx+QdQgp04vx6 +XErj9/NpxyVReAMdJXNZqPz+e7zsUx855b0nvb9vehHQIQFk0XPUKR3vuW/R6tUjc2bOPfCA +V4xVo4PF4KsP+otrfnl16avgYIJcMwhoqLqV0R4dXrNla9bhBx121CFH7rPH3rPcDJMQur4P +/ZKJAjRIHBekSJxuiel4YxTipwKXWJEaqRIAiKgia3faB7/4kHce8Xdfu/jrnB4ELjaMBAy0 +aVtMX/LA0lXLVx74isN23mbnUJZmnDNvbt5slOzm2hS6ql3SW+iGmYPTd91pl7133/ule+/3 +0r1ekonrWln5KkOWuUJSey4l8tTDnDBs+/SBsgFcYmsGYk5KwpE5FCEAyE445qNDfc1PfuuU +6TNmS4dZlokTs87MLWb94NqrChSHH3xYgXys7LSazSu//8PVo6OtGY01y9c00JzRP23reVu+ +4TVHHfySA+fNmjfkhoJYtyxNrHBF7LFDIGneS6zu9kqq/0CKYryj9qfGRYSxQqyjW1zZ6qBZ +sA+8458W29Lzzr9wxsBsOPUhZI1i0eJFK+9b+YLdn3vAC/frdjqtwWnfv/4Hl115WaPRGOqb +9qbD//LZW+304hct2GX+swezQRHSe9/pClxDCxGhxQ+fSDNhJmJIH0WhiaykD8t6OjERWatV +OBkuHJ9HjqeSij4LIVjouuqsr33p65edt9I/qgMu0HdH2/Onz/vGyf+557Z7hUrvXX3/mz/w +xgdX3T+AgYUf+/yhL/qLeFyTYD6NACK1NnsH71W/WOcH1Lv8pH7+9C2l9c4FpcoDSPpOTRFI +AKpa+OLD7/zgCf/0r9PywfaKsT7p0wqvP/T1e2//vM5YRxvZ+Zeff9+KZYHhr498y6tfdEhZ +llVVmZkYXJZDHTSNGgqk7qv2dEsBRCc8Sl0N9Z4/jcb1mVl6TBMLNsFCCN12OTy2Zphjv1z8 +m6M/c+z8g7Y/9INHLKmWDY+u9t5fd/v1Ox6+y6zD5u79thf8fsWScixUpbfoaSFMPBTX+Z6e +0IzGdR9p6bTqquFpsfXHlwk3pp5P6607CDJRoBgb7ey13e5n/fPnXFvuXHRnA40iawyXaz7/ +tc8NV6s63fabD33js2ZuW414V7g0V8U0F1kP1PaCRc8HJjgD1n3EY17ylNsTUF3XauZHdSzT +LM+n9Q1pJVWojn3X+0YeGVn0h0WNonH6uQuv/sVVLtf99ljwple+ueyWTtMgZ6wSpzif80zZ +E9s3MZ4voyymQooZM2Sefv4W8+fN2+ruuxbNnDXnPy/+6rTZQ6jklPd/arvp21RjFfJ4F9b6 +nMbNBBcR6cklIhJgAGCg0QIbRb7D9jv86Pprr/jtVZ28Ld1w+L5H7LPD3n7Eq4PXbiaF1Buo +yLUA2tRsY/bZkFSoWQgSnDqIKmAM0GLbHbb/6ufPcXO0GCzQ5nv/9hilCJUwbz7Topfi4qE2 +TVBk4z7/JfINAK6ONRSqywI5Z/vZ/XP6p08bGFk2/Jcvf/M+2z4vBEMTcK7p+uUJdR2fUdu4 +z8VJLWJAUReX3gcADy9/2MrQWdU+4Hn7Hf/ej3S7nbg1MT70ujlP7TU8HbZxuPSEw3pUnswz +N9wdvvSiS9kO2ai++03vmtOYqaZS97nTfvjx6Z5N2p7UPj4gfuiZCMWpsgoHLNh/oNX3rHnb +vWrBq7plp3A5VEUosPSZI3BPL4F/imz99dHklmqW3vtoQqUEcxTBsB9poVloJiagxrJYEMQo +6kA3YUP8pmsbh0vaSJbKOaaNpSahpBenBXOkycm6/6kWBU+JnGez9JeEQvrG1F1VE5HY1gcE +kjZWACJpJ1eadd9kOctE+/+PsA04/7ZXkgAAAABJRU5ErkJggg== + +--------------fyEsKoz3fdzPxAaSslESHcHz-- + +--------------0UhSOOwwiiuLCrPveGIa7UzZ-- diff --git a/src/paperless_mail/tests/test_eml.py b/src/paperless_mail/tests/test_eml.py index 3fe05e8a3..e365e4d54 100644 --- a/src/paperless_mail/tests/test_eml.py +++ b/src/paperless_mail/tests/test_eml.py @@ -6,6 +6,7 @@ import pytest from django.test import TestCase from documents.parsers import ParseError from paperless_mail.parsers import MailDocumentParser +from paperless_mail.parsers import settings class TestParser(TestCase): @@ -201,3 +202,26 @@ class TestParser(TestCase): } in metadata, ) + + @mock.patch("documents.loggers.LoggingMixin.log") # Disable log output + def test_tika_parse(self, m): + html = '

Some Text

' + expected_text = "\n\n\n\n\n\n\n\n\nSome Text\n" + + parser = MailDocumentParser(None) + tika_server_original = parser.tika_server + + # Check if exception is raised when Tika cannot be reached. + with pytest.raises(ParseError): + parser.tika_server = "" + parser.tika_parse(html) + + # Check unsuccessful parsing + parser.tika_server = tika_server_original + + parsed = parser.tika_parse(None) + self.assertEqual("", parsed) + + # Check successful parsing + parsed = parser.tika_parse(html) + self.assertEqual(expected_text, parsed)