Add unitest for tika_parse()

This commit is contained in:
phail 2022-10-15 13:13:29 +02:00
parent 3d37e49c1a
commit daf90399bd
3 changed files with 226 additions and 4 deletions

View File

@ -21,6 +21,7 @@ class MailDocumentParser(DocumentParser):
"""
gotenberg_server = settings.PAPERLESS_TIKA_GOTENBERG_ENDPOINT
tika_server = settings.PAPERLESS_TIKA_ENDPOINT
logging_name = "paperless.parsing.mail"
_parsed = None
@ -133,13 +134,13 @@ class MailDocumentParser(DocumentParser):
def tika_parse(self, html: str):
self.log("info", "Sending content to Tika server")
tika_server = settings.PAPERLESS_TIKA_ENDPOINT
try:
parsed = parser.from_buffer(html, tika_server)
parsed = parser.from_buffer(html, self.tika_server)
except Exception as err:
raise ParseError(
f"Could not parse content with tika server at " f"{tika_server}: {err}",
f"Could not parse content with tika server at "
f"{self.tika_server}: {err}",
)
if parsed["content"]:
return parsed["content"]
@ -246,7 +247,7 @@ class MailDocumentParser(DocumentParser):
html = StringIO()
with open(html_file, "r") as html_template_handle:
with open(html_file) as html_template_handle:
for line in html_template_handle.readlines():
for placeholder in placeholder_pattern.findall(line):
line = re.sub(

View File

@ -0,0 +1,197 @@
Return-Path: <someone@example.de>
Delivered-To: someoneelse@example.de
Received: from mail.example.de
by mail.example.de with LMTP id KDcHIQh8fmPHVQAAFx6lBw
for <someoneelse@example.de>; Sat, 15 Oct 2022 09:23:20 +0000
Content-Type: multipart/alternative;
boundary="------------0UhSOOwwiiuLCrPveGIa7UzZ"
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=example.de;
s=2018; t=1665825800;
h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
to:to:cc:mime-version:mime-version:content-type:content-type;
bh=/6OzHOWcwCHrfo1mlk+KcsiTCkt9lN5CEU2AETZBM/M=;
b=AM/Q8Xlmh5jmccjofuedENG9dk1K9ItOL7CBtRhQlTEkjJqb1e1WgrT86SZmU5K9WTVerX
b0GgndG9xavsCSsaKrZX9rIbozFVY1+pr80sl+sZB/UbUFlr2C4/CALwUBveC6H+HcAJUR
uRQycv5zuGm8XAXdo28oFWxCKcAsE0Vs+b8UNs5Qd0VJY9inquLKXHlvLYx+ivnkg/yPCZ
ZiOfv4+Ljfxh3oq6vjN0G7pHmANn1U3MmTLivgGLocl+PPxOCCzHeRp38gJQi3NC75JA/B
4bSJxwjV0ghnq5z7RG/Yo8d9zlB8l7z31PwCNzbPy/bJVC2EFBvHdhVqow==
Message-ID: <a9215c39-5464-8dbf-bb8a-c9fa95eee30f@example.de>
Date: Sat, 15 Oct 2022 11:23:19 +0200
MIME-Version: 1.0
Content-Language: en-US
To: someone@example.de
From: Name <someone@example.de>
Subject: HTML Message
Authentication-Results: ORIGINATING;
auth=pass smtp.auth=someoneelse@example.de smtp.mailfrom=someone@example.de
This is a multi-part message in MIME format.
--------------0UhSOOwwiiuLCrPveGIa7UzZ
Content-Type: text/plain; charset=UTF-8; format=flowed
Content-Transfer-Encoding: 7bit
Some Text
and an embedded image.
--------------0UhSOOwwiiuLCrPveGIa7UzZ
Content-Type: multipart/related;
boundary="------------fyEsKoz3fdzPxAaSslESHcHz"
--------------fyEsKoz3fdzPxAaSslESHcHz
Content-Type: text/html; charset=UTF-8
Content-Transfer-Encoding: 7bit
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body>
<p>Some Text</p>
<p><img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt=""></p>
<p>and an embedded image.<br>
</p>
</body>
</html>
--------------fyEsKoz3fdzPxAaSslESHcHz
Content-Type: image/png; name="IntM6gnXFm00FEV5.png"
Content-Disposition: inline; filename="IntM6gnXFm00FEV5.png"
Content-Id: <part1.pNdUSz0s.D3NqVtPg@example.de>
Content-Transfer-Encoding: base64
iVBORw0KGgoAAAANSUhEUgAAAF0AAABdCAIAAABIE/2UAAAACXBIWXMAAA7EAAAOxAGVKw4b
AAAbQ0lEQVR4nO18eZxcVZX/+Z773qvq6iUbWSCsgsiu4BIEcQGRURAEHXWcGcdlXGB+I86o
g/7QQUHA+aGiAdFxHFTAARRlU0RBRFRAcRRkE4iShLBFkpBOd1fVe/ee7++Pe191JzRJE0Bi
Zk76U13dqXr93vede873fM+5BZLyv/YY02f6BDZR+19cJrfsmT6Bp9MohIDC+AMgQqEg/UAR
xB8E674V/4PiS7zQx0AwqW3u/iICEYLjTpHcAEIKgPR93bdu7vElXrBBSBJCCg0iYiKW/hOT
LZjN0F9IAiB7cSVaDCiMjiIgkByFmGRtbW649EABIHXkjaE1RWGICE1UIPr4oXVzw6UHSswn
8QnFAMRwIwRVjKQIImb/Q/wlPjEzVfXeC6RkmalzVBHVuJ5EhICIQDBZnt4c4m7yg9oAmJmI
mBkAoy1dvmRNNUqIRCdixCFFoEkX0+aAS1wHqK0XXMwMgm7VvfDSb3VDVxSAigkoTsQJYrB5
bJKWzQMXxn8kY4YhIQBQ+VKdXvvza7/z/UtNQQqNgJgwQpNS9GQOs1ngQgqEwoohEThKVXa8
lo/45V+46AuLV9/74xuvycRZMIJQo5CW3j4pf/mzx2U8MQscNF6kWaAz18hO+cKpdy65UxwX
L1kM1IQXlhhwSlCTHPbPHhcAwphwoSIgjBwNY4289b2ff/+Cyy7QlsuK/IHlD3bNuywTQUrQ
EE6ei0T+fHGJ+TglICFjciGM7LLKGsWNS276zFdOtz5YIaJYtepRA4MZhca6un78KvLPlb9M
5G8pPlBCYDt0WcjN99xy7Mf/cenK+/qm9XVDaUIoKKCQgNU5qKbAfzb8JS59Su0IFFIkPZKk
TCS10VSlstI13aiMffKMk+/9433N6QMdX4m6omgsWbpkrNPO8ywlr1QyCmVyn9k0cUlENAaC
+osQIHIT9IATqVdTtyyzhi5bvvSYD//DbUvubM7s74SuukKoAhkZGTNv0cmICdXk45zBpohL
nSqiWyQXMYLkhCuLcUJIGq1TdfIiW75y+Uf/7aM//eV1zFlZgLooKZhRVIIE9nLQ49Dcnm2K
uPRuJ0jCjDRIEAZIIE3EIoMjQmU+hOFypGjkP7vr568//q+uvPVHfXP7jSEXOC/iTUSoEJFc
FYAjMxG3Id1uU4y7URQwiUUdYmh0IqQoCVULBohAg1hp5WDf4CU/veS4U457NBsbmjWtqioR
MlABAQyJByMqLoyEuLbH0Xc3RVykV/sTESMhRKgiIqBRI6MNpSvyYN1P/sep37zywrZWzb5m
VfrobQB6eWqC7CAWZZkJQExaH22KuFDiNcGEEmOt0EhAKEYRQEsri6J564N3fPJzJ193889c
v+NAFoLH2oEjiv5rKXgkNMb19dmmiEtMPSY0Ee3xN+89SDWK0AHOXfzTS04589SlK5b1zerv
sPIMOUAjXK2wQMREVMwCoEZuINhOsGcSF46H2FjsMarzIkIQJqI9lQ2auXbVrsz6mv0r/YoT
Tzvxe9de0c18MbPZDl0qANBEVU2oUWegAUrSOReCJ8m0tjjxr0/qOM8kLhh/TN2KqBOYiFJs
Ys8LUlaVZciy/p/ddtPC80+/9sZr+6f3qZNSSqrAnBMVMQpRF4WoYSY5ffqMPMshEICgmrDu
kEzaJ3lm/aVuB1JiAEhn2cvTJhAxM4JaZA8+8vDCr5518VWXjbjVfbNapVRmXjLV4JRQgoCp
qYlo7S+iCvHeb731/L6+hnkCAJEoENJffuy5PaO4MN5ReFBIF6MJSaEJQVWvnh5NF4Q/+PUP
P/rpExY/fG9r5mAfmiSVTuEkEKIAidpZ6guNoKsoTDJozkyMVApJBZKbYJPQvdeqa2rdGRRI
yr6kCDQETwmCqtlo3vPwH/79/C9f9MPvdLNyYM600kpnDkxLIh02ZvXHXGBK955bzd1SBcFX
WUPZKzYpCtpkHvOnxiWeQjoRMlZwWcyfFKScKt3KN1t9XrrfvemK077wmdsX3dGc0UKuPhjU
xUtJEnY8LCE14xl3AApBB7XKD7UGVRDqmAZhlG02lXW0lr+QUKUZIDRCIm9hJ1SDrdZvl961
8ILP/uAnV1bm++cOdqwUIJgJJVNEajbuIOPfJ/wOoDAEm94/bZ899yGo0DTFkNzNLAL8jPvL
Wv3ACBAQfCVGwjyD5FmujQt/fNkpnzt1SXtR/7RWJkXHOnQgxDlHMzGJBGf93Cz6Qqj87P7Z
L9j9BUKIquiEOlyAmvut894/BS6saWvU6evkk5xFRES1sqq0smg2lq5c9sWvf/nbV14aMhuY
PhBYCQOgqeUTTEmI1m2x+vpjdhaRqDbVvROBBB+222G7vqxJi0FdKFAhhCaqYozZ60+PS13r
pP5eTeKSmmjGThUCOK057bKfX/6ps/7tdw/c09iiZWrqTUWZQrOIGOtQndRL1MevBVtJ4ZdC
CEWhZVnt+uxdBlwrlMFBCRKkxHmG5L2PJTB/Yn8Z76PHhjHIEEo0tKr8Z88940vnfXEU7YG5
Q6M2QkUfGhGKyE9Rt4rWaSr3/CX+MvpLzFAKsRDmDM0GaWaZcxG5GHQt6izPWN0YuazQ6CkO
VIj40ouYIDSK1k33/Phf/9+Jt//+rmyoxTzrBHNosQpBpBco41FiJotQTUg7AtIgJpZR1NQg
ldIUIfhpzf49tt2NInA0NQicZLGTnwDZOFweq6ROtIl9z8d7FwGN6ppmkcKqgGKVVK2ieeGP
zj/+08ePWac1c6BDH2KeDXCiomZMIalmOzUcE0piCAWa2iUWfwQFCkhgf6O1x3P2AACo0ZSq
qmKaBIaN1l/WYhxrX/bEF6zvXawnuiiAIIi3MkjVZvuz55xx5jlfzFvImwMd86aRvMchHsbU
AY4XmD35fnxwUOqLi4sVUfg0QB3Ud8Jzdn3OjJkz4h1yzkn8X01N/HUgfgK4TOov6/xm/f6S
bmfUUARmFVyAyr+c9OHzr754aKuZlVnHShcvuZ6gjDM8E7xxrUtYt/lDYcpRZNTjyBx5Z3T4
uTvv1cpawXt1Lg5/RKgx6f2cIi4TL368UVOP3PTmBx7vvQkdYdSog7Ebyizn8pUPfui0j1x3
6w1DW03vqhcycwhmGhupvXZ6T3HbgIpUZyzEcsIAKoVl6HetvXbeM5bRE3GYdO1PGRcmR0hA
KIQSR0seC5asXR/LBCfqDaRUvps387seuPt9H3vfbUvvKGa2Ar0GQ4wHoJrFBWNKAmpQwrAB
/lbXE9pjALECYhm2mT3/gH0OoJiq9q5FdcNq//pekY7em1UzETIuUQji1M1agUZSfToRzQiS
cyoimWL12PD//fQJNy29PZ830GaZCxolc0JpWRBHRsYFIUFB7IlsQGVjLR1EIhBzdZZrp9M9
7FWHzhuaU1Zl4lATurcbj0sSeCgkzSyIXzmy6re3/1ZSdFxrkq0HQapWo7xcI9gtSx+qDjvH
nfqhG377i/7ZgyO+SxUEayCHUQkVUUIsBs9YYyMV2+u11FLqddwgFAllGGi0XnPga0Tg1EU/
wgTbeFzGPaVk5Suv4b8X33z6BQuHuSbQKycmCpH6OymhLo4liFUWLFTircBJ55580U++M21W
SztlXylNy03QVYooRAM0KKggoKKZAWuVho+PSyLTdCwF3sQy5DJmu87feadtdgpGVbehY0wN
l3F9JGVK8eYrC5dfdfkNv7mhHTqaaWrycvz1iMw+gRWZOI3mEQaK/qtv/NG5F/3XjLkzu1VX
gUxUzAQwxFlaEOkLQNTfxgn+eq2nXRJmMIo6c9VoddjBr5meDQTvVcfHMyelHVPFZfxtEFC8
WLPRt3TF0ht/84vhsZHLr/4u4EytDrB12oZEMh4viUJRGCyDu+ehe888+wsuzwKJzJnFSkfq
GEJJi4dITfn0THphfD1WMz8PJTIn6rth+y23ffmCl3WqTu5y1sVHL7JsZHwZfxtFVAga+K3L
vr34gfvQ1G9c9I37Rx6opOpVxnWgETC12euBAQYNlfqTzjjp1ntuzZq5kSEEOGf1yUbBIabS
3lfv2ZT8JVVNoIOZOMn9muo9b/n7PbfezXyFEGdkesn6SfjL+J80hhAIGbH2Tbf8d8gsG8zv
XPy7q268Ond5t+z44M3MzJjIVFxIkaRKYMiy/Cc3/+y6X/20mN5XiQVlarAiMfA0l4LJ59ym
YjWdZzDLXVaNls/acvvXv/qoQN8oGqIT+P6U7XH9hazHW0nNdNGSRYuWLEJDK3jX78779nlL
Hr0vi52HtdedUMRIEwAm7IbOOd88Zw1Hq8IHkACdBJJJeAbrcnnDp91bVfVXGoyJSxJw1IzK
sfC6Q47YophFI8TBaWwtTd5wfUK4pEulxJyTqfvd3b9bPTactbK2tRuDzVvuuHnh2QuLrKjK
SiZuYorMkwQQLBRa3HLXrb+65abGYNHVMoiZiAmpNQFMNGVq/pIWQx2TUpgeJ0q5Oj9Wzuwf
evUrXiUUB0doSK+UJ+SQjxt3U7YnBOLpb7njlsoqgi5z7W53aIvp3/nuxd/78RWNZsPoRWK9
HEdUxgckDXbeReeOVKOSa7AQG8P1zg6wDiw9dW0D1pt1SoBEeKhQEZFABFoV3v32d++67XNC
8IDGSoipdf9U+EsPIFWMofPw8P0h76og90VmRQXrDvlPfOXk+9sPBvH0XgkxUlCJmFogTeX2
h+685pZrbcgCrQjNzCOjOBMXM9ETOM8aGJG4sklHc0IHceZNzRqqYbi73x77vuP175SQZ5JJ
ECWyuFrjwn2SuNR5lzHjlFX3/gfvLxqF95WESLkNuS5btezE009aMbJSnVpitwIRHzyFCtx9
z91ruiOSqxmVqgRItQmhBfJE/CVdGcREaDAqPUOWxx6s9KN1zNuO7pOm+B7qBDxoeIJ3Yf3+
AhFRaFVWyx9erqrJY+MfpBX9xQWXXbDwnDOQZZ5lZIBqdHAWghL3LP79aLsNOMT6IMZd1KOl
mBBBZQr5IomTAjEgCCyIRwZPE3GrVw6/6dVvfOmeL/VVlasm/QrBGJhU/yftLz0+Ek/6oRUP
Rbmorsok3uIuyplbz/qvSy74zk8vdnnR9aUQsOi3Uor9fsm9LssMluqX1JdAVJ6Sj0zdXxI6
EUSjBCIYg4p2R8v9n/+S97/9WOn4gjmgBAWpA1Dnu6covkAkhADghuuvX7FyRV4UKUhGwFQM
VqJyQ/kHTzzuwh9+M28UY2U3mEFYFHlJv/yRR5g7E6lHfZ4iS+UCBZKLSsfm9M0+9YOnzBma
o3SZZiKisSqX3hinbLAun2jr85dEnIWddjelJ60HMkQgWnkTl83bfv5qP/z5s89YumqpNlwQ
T9I5HWmPLXlgmeaZ1Z2JqdraPIVpLpP1hSbnomgmmQtORnns3/6f5265R6iYFQ1RiKZ5BYqK
OHL9w91TxqVX70TCkDdyVUUcuYg3QMSMuWusWTM6fdbM/V6+/+/uu+vDJx73yIqHVZOr+8pX
wTRTSx2aKUPTawelod1UTI7LD8xIBzgIQjt88L3//I7XvbXqVA4qEqOYpQ2vcEAGKKcQvjaM
i9QUVqEkd9zhWQP9A+1OR1Wjqh7jgxKNZvPW2297z9+9d8G+Cy770fe+dOGXszzrdjshmMud
OoQQIHW3cYpW+wssSWP1o8S2WIAZmMGtfvjRV+530NFvfI/zmklGBZUCM6FJTVpSmJyKYjEF
XOr/h4B77bFbf2tQJDd1Qb2pN1FlQ0NXmvLomjUXn3fBsW89eto2c8++6vxLb7rE9bluGAMr
xyqn5Ua1JzIpnJiwqDkx7Q0Mgsh85kx9s501QvePo39z0FtOOvoTmc8oJoVBCSjonKgDHKmo
uWQ87lOFCygki7wYHBgSi3WhReJhVDqIDzP6hy75/uVrxtpvOOzIPz704Jlnf2l1dw0yV/mS
PmQSIwKmvrxZl1yExhBrMdbBTA1gXrnVD6x+7UGHL/zY57eeNd9o6mDiI8mOyp3GtScTqdJT
6C8AaQPNwW223NoC03iEQEXEsatommsSHGhccu2V73rzW1+4216/vvWW0876rFOHQmfNmukr
n0JDGimcAk1JFRcJMUhQmpppsKR3qq7WN73qrz7+/hN85b33RZalida6/qD01k8iRk+wnN6g
vwjN2GBzzozZoRNg0NQwp5l5p85TqsoNFj/59fVzZsw46pWvNeC711xx6723DbWmz5u7Jb04
giKmScbbYMJOW5ujBCc9MOE0U8qqP658yyF//cUPn7lFcws1NFwuAojWLSXUmWECV6rhfrK4
TJCyxcwy5HvsvKeDc3BOtCauNWeFUOXRNSuvvuaHh73iNbNnzV4+uuIHN1zVb83B1oCE6Nsw
WEpLGzrB+mMmaDB1AOGCNiRHx9DmP77lmOOPOR5BG5L15UWETjVOtvSUnSdr69MxKVTVTDML
fsELXjRzYLpVIW3hp0CYBa1UO8qcbGX51y44d9ZWc56/+15jvn3Vz68p1XbcYcdQeo20B/Wk
JWpqsrakWMuME0cVzFdVDteUolrVHeLAmScsPOk9H29qAxRYzZsRY5JLLaSngkCuDxfU9VHw
Yaetdtxh22eVnTKr291qKIJ0M2nnLIxDLr9z2R9uvueOQ/Y/MGtldy9ZdN1tP9tj9z1bfX3m
TUCrpXBhr1SqOy2S4KhbGD01x5qNLHS77dWjBy448KunfeXwBYeFTmi6LFMwdrRq+p3G5aag
e248LmvJwgDJftc68rVHWmWMXUASwszgAa8QY6Z4tL3mljtv323H5wwMtoZHVv/qV7/cZaed
m1lfhARJGlICSULpQUOoKkmrHUYgqtrUorumM60xdNzRHzrrkwv33WVBOVIVzOOJOxenR3qV
ci+uPwUOs/5+QNyXLLm6KnSOfOlhe+/+/DVj3ZA7L95QebU+r31V1nYcbpRFQx9aumyrrbYb
zPty86MjK+b2z9lxm12rtmbIxLqMG2YIo0aaF7OUOvXew2mc3BV14tzo6Fjn4fCK3V75vbOu
+MBR7x+yweAtbzWsIalvWwvjIoDGoflxN39acOnhk5BR573NbEx/+5F/4ypRE8lQihdHNVET
UTWFiN1//zLAOZcVWfHwQw81WOz7wn19p1JKrnASlEFJpWn8fBqaU/G+cqpq0pCswRxj1nlk
9MW7L/jsJ0474+TTt91iK+9LFXXOCUit9ySlRYfxSbCNSDwbgUtqdkBEtVE0u1X70P0P3nv7
3apHOy7LvaZQIXGcv/JZo1i8bOnw2KjmDR9k7uwtFe6Nr3vDlrPnhU4FEzGv4pVeEQReJACB
DArNoCjNr6nCqnLr1tx/OOpdZ5/472942RF9LFh5BydU1opwIm+1OE/W22KnSJCmYOubZ4hD
9BYnQokGGwNF/yeOPf5t//Lu0XqcxIkBDqLOudLK0sogQZAFL/NmbQVi1tDM5+6259U3XNls
9XVCGSTEcl2gACiaaSZd6451C8mfv/PeRxx8+MEHHLj1jPndbkfKsuUKJRgIx+QdQgp04vx6
XErj9/NpxyVReAMdJXNZqPz+e7zsUx855b0nvb9vehHQIQFk0XPUKR3vuW/R6tUjc2bOPfCA
V4xVo4PF4KsP+otrfnl16avgYIJcMwhoqLqV0R4dXrNla9bhBx121CFH7rPH3rPcDJMQur4P
/ZKJAjRIHBekSJxuiel4YxTipwKXWJEaqRIAiKgia3faB7/4kHce8Xdfu/jrnB4ELjaMBAy0
aVtMX/LA0lXLVx74isN23mbnUJZmnDNvbt5slOzm2hS6ql3SW+iGmYPTd91pl7133/ule+/3
0r1ekonrWln5KkOWuUJSey4l8tTDnDBs+/SBsgFcYmsGYk5KwpE5FCEAyE445qNDfc1PfuuU
6TNmS4dZlokTs87MLWb94NqrChSHH3xYgXys7LSazSu//8PVo6OtGY01y9c00JzRP23reVu+
4TVHHfySA+fNmjfkhoJYtyxNrHBF7LFDIGneS6zu9kqq/0CKYryj9qfGRYSxQqyjW1zZ6qBZ
sA+8458W29Lzzr9wxsBsOPUhZI1i0eJFK+9b+YLdn3vAC/frdjqtwWnfv/4Hl115WaPRGOqb
9qbD//LZW+304hct2GX+swezQRHSe9/pClxDCxGhxQ+fSDNhJmJIH0WhiaykD8t6OjERWatV
OBkuHJ9HjqeSij4LIVjouuqsr33p65edt9I/qgMu0HdH2/Onz/vGyf+557Z7hUrvXX3/mz/w
xgdX3T+AgYUf+/yhL/qLeFyTYD6NACK1NnsH71W/WOcH1Lv8pH7+9C2l9c4FpcoDSPpOTRFI
AKpa+OLD7/zgCf/0r9PywfaKsT7p0wqvP/T1e2//vM5YRxvZ+Zeff9+KZYHhr498y6tfdEhZ
llVVmZkYXJZDHTSNGgqk7qv2dEsBRCc8Sl0N9Z4/jcb1mVl6TBMLNsFCCN12OTy2Zphjv1z8
m6M/c+z8g7Y/9INHLKmWDY+u9t5fd/v1Ox6+y6zD5u79thf8fsWScixUpbfoaSFMPBTX+Z6e
0IzGdR9p6bTqquFpsfXHlwk3pp5P6607CDJRoBgb7ey13e5n/fPnXFvuXHRnA40iawyXaz7/
tc8NV6s63fabD33js2ZuW414V7g0V8U0F1kP1PaCRc8HJjgD1n3EY17ylNsTUF3XauZHdSzT
LM+n9Q1pJVWojn3X+0YeGVn0h0WNonH6uQuv/sVVLtf99ljwple+ueyWTtMgZ6wSpzif80zZ
E9s3MZ4voyymQooZM2Sefv4W8+fN2+ruuxbNnDXnPy/+6rTZQ6jklPd/arvp21RjFfJ4F9b6
nMbNBBcR6cklIhJgAGCg0QIbRb7D9jv86Pprr/jtVZ28Ld1w+L5H7LPD3n7Eq4PXbiaF1Buo
yLUA2tRsY/bZkFSoWQgSnDqIKmAM0GLbHbb/6ufPcXO0GCzQ5nv/9hilCJUwbz7Topfi4qE2
TVBk4z7/JfINAK6ONRSqywI5Z/vZ/XP6p08bGFk2/Jcvf/M+2z4vBEMTcK7p+uUJdR2fUdu4
z8VJLWJAUReX3gcADy9/2MrQWdU+4Hn7Hf/ej3S7nbg1MT70ujlP7TU8HbZxuPSEw3pUnswz
N9wdvvSiS9kO2ai++03vmtOYqaZS97nTfvjx6Z5N2p7UPj4gfuiZCMWpsgoHLNh/oNX3rHnb
vWrBq7plp3A5VEUosPSZI3BPL4F/imz99dHklmqW3vtoQqUEcxTBsB9poVloJiagxrJYEMQo
6kA3YUP8pmsbh0vaSJbKOaaNpSahpBenBXOkycm6/6kWBU+JnGez9JeEQvrG1F1VE5HY1gcE
kjZWACJpJ1eadd9kOctE+/+PsA04/7ZXkgAAAABJRU5ErkJggg==
--------------fyEsKoz3fdzPxAaSslESHcHz--
--------------0UhSOOwwiiuLCrPveGIa7UzZ--

View File

@ -6,6 +6,7 @@ import pytest
from django.test import TestCase
from documents.parsers import ParseError
from paperless_mail.parsers import MailDocumentParser
from paperless_mail.parsers import settings
class TestParser(TestCase):
@ -201,3 +202,26 @@ class TestParser(TestCase):
}
in metadata,
)
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
def test_tika_parse(self, m):
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
expected_text = "\n\n\n\n\n\n\n\n\nSome Text\n"
parser = MailDocumentParser(None)
tika_server_original = parser.tika_server
# Check if exception is raised when Tika cannot be reached.
with pytest.raises(ParseError):
parser.tika_server = ""
parser.tika_parse(html)
# Check unsuccessful parsing
parser.tika_server = tika_server_original
parsed = parser.tika_parse(None)
self.assertEqual("", parsed)
# Check successful parsing
parsed = parser.tika_parse(html)
self.assertEqual(expected_text, parsed)