mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Add unitest for tika_parse()
This commit is contained in:
parent
3d37e49c1a
commit
daf90399bd
@ -21,6 +21,7 @@ class MailDocumentParser(DocumentParser):
|
||||
"""
|
||||
|
||||
gotenberg_server = settings.PAPERLESS_TIKA_GOTENBERG_ENDPOINT
|
||||
tika_server = settings.PAPERLESS_TIKA_ENDPOINT
|
||||
|
||||
logging_name = "paperless.parsing.mail"
|
||||
_parsed = None
|
||||
@ -133,13 +134,13 @@ class MailDocumentParser(DocumentParser):
|
||||
|
||||
def tika_parse(self, html: str):
|
||||
self.log("info", "Sending content to Tika server")
|
||||
tika_server = settings.PAPERLESS_TIKA_ENDPOINT
|
||||
|
||||
try:
|
||||
parsed = parser.from_buffer(html, tika_server)
|
||||
parsed = parser.from_buffer(html, self.tika_server)
|
||||
except Exception as err:
|
||||
raise ParseError(
|
||||
f"Could not parse content with tika server at " f"{tika_server}: {err}",
|
||||
f"Could not parse content with tika server at "
|
||||
f"{self.tika_server}: {err}",
|
||||
)
|
||||
if parsed["content"]:
|
||||
return parsed["content"]
|
||||
@ -246,7 +247,7 @@ class MailDocumentParser(DocumentParser):
|
||||
|
||||
html = StringIO()
|
||||
|
||||
with open(html_file, "r") as html_template_handle:
|
||||
with open(html_file) as html_template_handle:
|
||||
for line in html_template_handle.readlines():
|
||||
for placeholder in placeholder_pattern.findall(line):
|
||||
line = re.sub(
|
||||
|
197
src/paperless_mail/tests/samples/html.eml
Normal file
197
src/paperless_mail/tests/samples/html.eml
Normal file
@ -0,0 +1,197 @@
|
||||
Return-Path: <someone@example.de>
|
||||
Delivered-To: someoneelse@example.de
|
||||
Received: from mail.example.de
|
||||
by mail.example.de with LMTP id KDcHIQh8fmPHVQAAFx6lBw
|
||||
for <someoneelse@example.de>; Sat, 15 Oct 2022 09:23:20 +0000
|
||||
Content-Type: multipart/alternative;
|
||||
boundary="------------0UhSOOwwiiuLCrPveGIa7UzZ"
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=example.de;
|
||||
s=2018; t=1665825800;
|
||||
h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
|
||||
to:to:cc:mime-version:mime-version:content-type:content-type;
|
||||
bh=/6OzHOWcwCHrfo1mlk+KcsiTCkt9lN5CEU2AETZBM/M=;
|
||||
b=AM/Q8Xlmh5jmccjofuedENG9dk1K9ItOL7CBtRhQlTEkjJqb1e1WgrT86SZmU5K9WTVerX
|
||||
b0GgndG9xavsCSsaKrZX9rIbozFVY1+pr80sl+sZB/UbUFlr2C4/CALwUBveC6H+HcAJUR
|
||||
uRQycv5zuGm8XAXdo28oFWxCKcAsE0Vs+b8UNs5Qd0VJY9inquLKXHlvLYx+ivnkg/yPCZ
|
||||
ZiOfv4+Ljfxh3oq6vjN0G7pHmANn1U3MmTLivgGLocl+PPxOCCzHeRp38gJQi3NC75JA/B
|
||||
4bSJxwjV0ghnq5z7RG/Yo8d9zlB8l7z31PwCNzbPy/bJVC2EFBvHdhVqow==
|
||||
Message-ID: <a9215c39-5464-8dbf-bb8a-c9fa95eee30f@example.de>
|
||||
Date: Sat, 15 Oct 2022 11:23:19 +0200
|
||||
MIME-Version: 1.0
|
||||
Content-Language: en-US
|
||||
To: someone@example.de
|
||||
From: Name <someone@example.de>
|
||||
Subject: HTML Message
|
||||
Authentication-Results: ORIGINATING;
|
||||
auth=pass smtp.auth=someoneelse@example.de smtp.mailfrom=someone@example.de
|
||||
|
||||
This is a multi-part message in MIME format.
|
||||
--------------0UhSOOwwiiuLCrPveGIa7UzZ
|
||||
Content-Type: text/plain; charset=UTF-8; format=flowed
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
Some Text
|
||||
|
||||
and an embedded image.
|
||||
|
||||
--------------0UhSOOwwiiuLCrPveGIa7UzZ
|
||||
Content-Type: multipart/related;
|
||||
boundary="------------fyEsKoz3fdzPxAaSslESHcHz"
|
||||
|
||||
--------------fyEsKoz3fdzPxAaSslESHcHz
|
||||
Content-Type: text/html; charset=UTF-8
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
<html>
|
||||
<head>
|
||||
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||||
</head>
|
||||
<body>
|
||||
<p>Some Text</p>
|
||||
<p><img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt=""></p>
|
||||
<p>and an embedded image.<br>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
--------------fyEsKoz3fdzPxAaSslESHcHz
|
||||
Content-Type: image/png; name="IntM6gnXFm00FEV5.png"
|
||||
Content-Disposition: inline; filename="IntM6gnXFm00FEV5.png"
|
||||
Content-Id: <part1.pNdUSz0s.D3NqVtPg@example.de>
|
||||
Content-Transfer-Encoding: base64
|
||||
|
||||
iVBORw0KGgoAAAANSUhEUgAAAF0AAABdCAIAAABIE/2UAAAACXBIWXMAAA7EAAAOxAGVKw4b
|
||||
AAAbQ0lEQVR4nO18eZxcVZX/+Z773qvq6iUbWSCsgsiu4BIEcQGRURAEHXWcGcdlXGB+I86o
|
||||
g/7QQUHA+aGiAdFxHFTAARRlU0RBRFRAcRRkE4iShLBFkpBOd1fVe/ee7++Pe191JzRJE0Bi
|
||||
Zk76U13dqXr93vede873fM+5BZLyv/YY02f6BDZR+19cJrfsmT6Bp9MohIDC+AMgQqEg/UAR
|
||||
xB8E674V/4PiS7zQx0AwqW3u/iICEYLjTpHcAEIKgPR93bdu7vElXrBBSBJCCg0iYiKW/hOT
|
||||
LZjN0F9IAiB7cSVaDCiMjiIgkByFmGRtbW649EABIHXkjaE1RWGICE1UIPr4oXVzw6UHSswn
|
||||
8QnFAMRwIwRVjKQIImb/Q/wlPjEzVfXeC6RkmalzVBHVuJ5EhICIQDBZnt4c4m7yg9oAmJmI
|
||||
mBkAoy1dvmRNNUqIRCdixCFFoEkX0+aAS1wHqK0XXMwMgm7VvfDSb3VDVxSAigkoTsQJYrB5
|
||||
bJKWzQMXxn8kY4YhIQBQ+VKdXvvza7/z/UtNQQqNgJgwQpNS9GQOs1ngQgqEwoohEThKVXa8
|
||||
lo/45V+46AuLV9/74xuvycRZMIJQo5CW3j4pf/mzx2U8MQscNF6kWaAz18hO+cKpdy65UxwX
|
||||
L1kM1IQXlhhwSlCTHPbPHhcAwphwoSIgjBwNY4289b2ff/+Cyy7QlsuK/IHlD3bNuywTQUrQ
|
||||
EE6ei0T+fHGJ+TglICFjciGM7LLKGsWNS276zFdOtz5YIaJYtepRA4MZhca6un78KvLPlb9M
|
||||
5G8pPlBCYDt0WcjN99xy7Mf/cenK+/qm9XVDaUIoKKCQgNU5qKbAfzb8JS59Su0IFFIkPZKk
|
||||
TCS10VSlstI13aiMffKMk+/9433N6QMdX4m6omgsWbpkrNPO8ywlr1QyCmVyn9k0cUlENAaC
|
||||
+osQIHIT9IATqVdTtyyzhi5bvvSYD//DbUvubM7s74SuukKoAhkZGTNv0cmICdXk45zBpohL
|
||||
nSqiWyQXMYLkhCuLcUJIGq1TdfIiW75y+Uf/7aM//eV1zFlZgLooKZhRVIIE9nLQ49Dcnm2K
|
||||
uPRuJ0jCjDRIEAZIIE3EIoMjQmU+hOFypGjkP7vr568//q+uvPVHfXP7jSEXOC/iTUSoEJFc
|
||||
FYAjMxG3Id1uU4y7URQwiUUdYmh0IqQoCVULBohAg1hp5WDf4CU/veS4U457NBsbmjWtqioR
|
||||
MlABAQyJByMqLoyEuLbH0Xc3RVykV/sTESMhRKgiIqBRI6MNpSvyYN1P/sep37zywrZWzb5m
|
||||
VfrobQB6eWqC7CAWZZkJQExaH22KuFDiNcGEEmOt0EhAKEYRQEsri6J564N3fPJzJ193889c
|
||||
v+NAFoLH2oEjiv5rKXgkNMb19dmmiEtMPSY0Ee3xN+89SDWK0AHOXfzTS04589SlK5b1zerv
|
||||
sPIMOUAjXK2wQMREVMwCoEZuINhOsGcSF46H2FjsMarzIkIQJqI9lQ2auXbVrsz6mv0r/YoT
|
||||
Tzvxe9de0c18MbPZDl0qANBEVU2oUWegAUrSOReCJ8m0tjjxr0/qOM8kLhh/TN2KqBOYiFJs
|
||||
Ys8LUlaVZciy/p/ddtPC80+/9sZr+6f3qZNSSqrAnBMVMQpRF4WoYSY5ffqMPMshEICgmrDu
|
||||
kEzaJ3lm/aVuB1JiAEhn2cvTJhAxM4JaZA8+8vDCr5518VWXjbjVfbNapVRmXjLV4JRQgoCp
|
||||
qYlo7S+iCvHeb731/L6+hnkCAJEoENJffuy5PaO4MN5ReFBIF6MJSaEJQVWvnh5NF4Q/+PUP
|
||||
P/rpExY/fG9r5mAfmiSVTuEkEKIAidpZ6guNoKsoTDJozkyMVApJBZKbYJPQvdeqa2rdGRRI
|
||||
yr6kCDQETwmCqtlo3vPwH/79/C9f9MPvdLNyYM600kpnDkxLIh02ZvXHXGBK955bzd1SBcFX
|
||||
WUPZKzYpCtpkHvOnxiWeQjoRMlZwWcyfFKScKt3KN1t9XrrfvemK077wmdsX3dGc0UKuPhjU
|
||||
xUtJEnY8LCE14xl3AApBB7XKD7UGVRDqmAZhlG02lXW0lr+QUKUZIDRCIm9hJ1SDrdZvl961
|
||||
8ILP/uAnV1bm++cOdqwUIJgJJVNEajbuIOPfJ/wOoDAEm94/bZ899yGo0DTFkNzNLAL8jPvL
|
||||
Wv3ACBAQfCVGwjyD5FmujQt/fNkpnzt1SXtR/7RWJkXHOnQgxDlHMzGJBGf93Cz6Qqj87P7Z
|
||||
L9j9BUKIquiEOlyAmvut894/BS6saWvU6evkk5xFRES1sqq0smg2lq5c9sWvf/nbV14aMhuY
|
||||
PhBYCQOgqeUTTEmI1m2x+vpjdhaRqDbVvROBBB+222G7vqxJi0FdKFAhhCaqYozZ60+PS13r
|
||||
pP5eTeKSmmjGThUCOK057bKfX/6ps/7tdw/c09iiZWrqTUWZQrOIGOtQndRL1MevBVtJ4ZdC
|
||||
CEWhZVnt+uxdBlwrlMFBCRKkxHmG5L2PJTB/Yn8Z76PHhjHIEEo0tKr8Z88940vnfXEU7YG5
|
||||
Q6M2QkUfGhGKyE9Rt4rWaSr3/CX+MvpLzFAKsRDmDM0GaWaZcxG5GHQt6izPWN0YuazQ6CkO
|
||||
VIj40ouYIDSK1k33/Phf/9+Jt//+rmyoxTzrBHNosQpBpBco41FiJotQTUg7AtIgJpZR1NQg
|
||||
ldIUIfhpzf49tt2NInA0NQicZLGTnwDZOFweq6ROtIl9z8d7FwGN6ppmkcKqgGKVVK2ieeGP
|
||||
zj/+08ePWac1c6BDH2KeDXCiomZMIalmOzUcE0piCAWa2iUWfwQFCkhgf6O1x3P2AACo0ZSq
|
||||
qmKaBIaN1l/WYhxrX/bEF6zvXawnuiiAIIi3MkjVZvuz55xx5jlfzFvImwMd86aRvMchHsbU
|
||||
AY4XmD35fnxwUOqLi4sVUfg0QB3Ud8Jzdn3OjJkz4h1yzkn8X01N/HUgfgK4TOov6/xm/f6S
|
||||
bmfUUARmFVyAyr+c9OHzr754aKuZlVnHShcvuZ6gjDM8E7xxrUtYt/lDYcpRZNTjyBx5Z3T4
|
||||
uTvv1cpawXt1Lg5/RKgx6f2cIi4TL368UVOP3PTmBx7vvQkdYdSog7Ebyizn8pUPfui0j1x3
|
||||
6w1DW03vqhcycwhmGhupvXZ6T3HbgIpUZyzEcsIAKoVl6HetvXbeM5bRE3GYdO1PGRcmR0hA
|
||||
KIQSR0seC5asXR/LBCfqDaRUvps387seuPt9H3vfbUvvKGa2Ar0GQ4wHoJrFBWNKAmpQwrAB
|
||||
/lbXE9pjALECYhm2mT3/gH0OoJiq9q5FdcNq//pekY7em1UzETIuUQji1M1agUZSfToRzQiS
|
||||
cyoimWL12PD//fQJNy29PZ830GaZCxolc0JpWRBHRsYFIUFB7IlsQGVjLR1EIhBzdZZrp9M9
|
||||
7FWHzhuaU1Zl4lATurcbj0sSeCgkzSyIXzmy6re3/1ZSdFxrkq0HQapWo7xcI9gtSx+qDjvH
|
||||
nfqhG377i/7ZgyO+SxUEayCHUQkVUUIsBs9YYyMV2+u11FLqddwgFAllGGi0XnPga0Tg1EU/
|
||||
wgTbeFzGPaVk5Suv4b8X33z6BQuHuSbQKycmCpH6OymhLo4liFUWLFTircBJ55580U++M21W
|
||||
SztlXylNy03QVYooRAM0KKggoKKZAWuVho+PSyLTdCwF3sQy5DJmu87feadtdgpGVbehY0wN
|
||||
l3F9JGVK8eYrC5dfdfkNv7mhHTqaaWrycvz1iMw+gRWZOI3mEQaK/qtv/NG5F/3XjLkzu1VX
|
||||
gUxUzAQwxFlaEOkLQNTfxgn+eq2nXRJmMIo6c9VoddjBr5meDQTvVcfHMyelHVPFZfxtEFC8
|
||||
WLPRt3TF0ht/84vhsZHLr/4u4EytDrB12oZEMh4viUJRGCyDu+ehe888+wsuzwKJzJnFSkfq
|
||||
GEJJi4dITfn0THphfD1WMz8PJTIn6rth+y23ffmCl3WqTu5y1sVHL7JsZHwZfxtFVAga+K3L
|
||||
vr34gfvQ1G9c9I37Rx6opOpVxnWgETC12euBAQYNlfqTzjjp1ntuzZq5kSEEOGf1yUbBIabS
|
||||
3lfv2ZT8JVVNoIOZOMn9muo9b/n7PbfezXyFEGdkesn6SfjL+J80hhAIGbH2Tbf8d8gsG8zv
|
||||
XPy7q268Ond5t+z44M3MzJjIVFxIkaRKYMiy/Cc3/+y6X/20mN5XiQVlarAiMfA0l4LJ59ym
|
||||
YjWdZzDLXVaNls/acvvXv/qoQN8oGqIT+P6U7XH9hazHW0nNdNGSRYuWLEJDK3jX78779nlL
|
||||
Hr0vi52HtdedUMRIEwAm7IbOOd88Zw1Hq8IHkACdBJJJeAbrcnnDp91bVfVXGoyJSxJw1IzK
|
||||
sfC6Q47YophFI8TBaWwtTd5wfUK4pEulxJyTqfvd3b9bPTactbK2tRuDzVvuuHnh2QuLrKjK
|
||||
SiZuYorMkwQQLBRa3HLXrb+65abGYNHVMoiZiAmpNQFMNGVq/pIWQx2TUpgeJ0q5Oj9Wzuwf
|
||||
evUrXiUUB0doSK+UJ+SQjxt3U7YnBOLpb7njlsoqgi5z7W53aIvp3/nuxd/78RWNZsPoRWK9
|
||||
HEdUxgckDXbeReeOVKOSa7AQG8P1zg6wDiw9dW0D1pt1SoBEeKhQEZFABFoV3v32d++67XNC
|
||||
8IDGSoipdf9U+EsPIFWMofPw8P0h76og90VmRQXrDvlPfOXk+9sPBvH0XgkxUlCJmFogTeX2
|
||||
h+685pZrbcgCrQjNzCOjOBMXM9ETOM8aGJG4sklHc0IHceZNzRqqYbi73x77vuP175SQZ5JJ
|
||||
ECWyuFrjwn2SuNR5lzHjlFX3/gfvLxqF95WESLkNuS5btezE009aMbJSnVpitwIRHzyFCtx9
|
||||
z91ruiOSqxmVqgRItQmhBfJE/CVdGcREaDAqPUOWxx6s9KN1zNuO7pOm+B7qBDxoeIJ3Yf3+
|
||||
AhFRaFVWyx9erqrJY+MfpBX9xQWXXbDwnDOQZZ5lZIBqdHAWghL3LP79aLsNOMT6IMZd1KOl
|
||||
mBBBZQr5IomTAjEgCCyIRwZPE3GrVw6/6dVvfOmeL/VVlasm/QrBGJhU/yftLz0+Ek/6oRUP
|
||||
Rbmorsok3uIuyplbz/qvSy74zk8vdnnR9aUQsOi3Uor9fsm9LssMluqX1JdAVJ6Sj0zdXxI6
|
||||
EUSjBCIYg4p2R8v9n/+S97/9WOn4gjmgBAWpA1Dnu6covkAkhADghuuvX7FyRV4UKUhGwFQM
|
||||
VqJyQ/kHTzzuwh9+M28UY2U3mEFYFHlJv/yRR5g7E6lHfZ4iS+UCBZKLSsfm9M0+9YOnzBma
|
||||
o3SZZiKisSqX3hinbLAun2jr85dEnIWddjelJ60HMkQgWnkTl83bfv5qP/z5s89YumqpNlwQ
|
||||
T9I5HWmPLXlgmeaZ1Z2JqdraPIVpLpP1hSbnomgmmQtORnns3/6f5265R6iYFQ1RiKZ5BYqK
|
||||
OHL9w91TxqVX70TCkDdyVUUcuYg3QMSMuWusWTM6fdbM/V6+/+/uu+vDJx73yIqHVZOr+8pX
|
||||
wTRTSx2aKUPTawelod1UTI7LD8xIBzgIQjt88L3//I7XvbXqVA4qEqOYpQ2vcEAGKKcQvjaM
|
||||
i9QUVqEkd9zhWQP9A+1OR1Wjqh7jgxKNZvPW2297z9+9d8G+Cy770fe+dOGXszzrdjshmMud
|
||||
OoQQIHW3cYpW+wssSWP1o8S2WIAZmMGtfvjRV+530NFvfI/zmklGBZUCM6FJTVpSmJyKYjEF
|
||||
XOr/h4B77bFbf2tQJDd1Qb2pN1FlQ0NXmvLomjUXn3fBsW89eto2c8++6vxLb7rE9bluGAMr
|
||||
xyqn5Ua1JzIpnJiwqDkx7Q0Mgsh85kx9s501QvePo39z0FtOOvoTmc8oJoVBCSjonKgDHKmo
|
||||
uWQ87lOFCygki7wYHBgSi3WhReJhVDqIDzP6hy75/uVrxtpvOOzIPz704Jlnf2l1dw0yV/mS
|
||||
PmQSIwKmvrxZl1yExhBrMdbBTA1gXrnVD6x+7UGHL/zY57eeNd9o6mDiI8mOyp3GtScTqdJT
|
||||
6C8AaQPNwW223NoC03iEQEXEsatommsSHGhccu2V73rzW1+4216/vvWW0876rFOHQmfNmukr
|
||||
n0JDGimcAk1JFRcJMUhQmpppsKR3qq7WN73qrz7+/hN85b33RZalida6/qD01k8iRk+wnN6g
|
||||
vwjN2GBzzozZoRNg0NQwp5l5p85TqsoNFj/59fVzZsw46pWvNeC711xx6723DbWmz5u7Jb04
|
||||
giKmScbbYMJOW5ujBCc9MOE0U8qqP658yyF//cUPn7lFcws1NFwuAojWLSXUmWECV6rhfrK4
|
||||
TJCyxcwy5HvsvKeDc3BOtCauNWeFUOXRNSuvvuaHh73iNbNnzV4+uuIHN1zVb83B1oCE6Nsw
|
||||
WEpLGzrB+mMmaDB1AOGCNiRHx9DmP77lmOOPOR5BG5L15UWETjVOtvSUnSdr69MxKVTVTDML
|
||||
fsELXjRzYLpVIW3hp0CYBa1UO8qcbGX51y44d9ZWc56/+15jvn3Vz68p1XbcYcdQeo20B/Wk
|
||||
JWpqsrakWMuME0cVzFdVDteUolrVHeLAmScsPOk9H29qAxRYzZsRY5JLLaSngkCuDxfU9VHw
|
||||
Yaetdtxh22eVnTKr291qKIJ0M2nnLIxDLr9z2R9uvueOQ/Y/MGtldy9ZdN1tP9tj9z1bfX3m
|
||||
TUCrpXBhr1SqOy2S4KhbGD01x5qNLHS77dWjBy448KunfeXwBYeFTmi6LFMwdrRq+p3G5aag
|
||||
e248LmvJwgDJftc68rVHWmWMXUASwszgAa8QY6Z4tL3mljtv323H5wwMtoZHVv/qV7/cZaed
|
||||
m1lfhARJGlICSULpQUOoKkmrHUYgqtrUorumM60xdNzRHzrrkwv33WVBOVIVzOOJOxenR3qV
|
||||
ci+uPwUOs/5+QNyXLLm6KnSOfOlhe+/+/DVj3ZA7L95QebU+r31V1nYcbpRFQx9aumyrrbYb
|
||||
zPty86MjK+b2z9lxm12rtmbIxLqMG2YIo0aaF7OUOvXew2mc3BV14tzo6Fjn4fCK3V75vbOu
|
||||
+MBR7x+yweAtbzWsIalvWwvjIoDGoflxN39acOnhk5BR573NbEx/+5F/4ypRE8lQihdHNVET
|
||||
UTWFiN1//zLAOZcVWfHwQw81WOz7wn19p1JKrnASlEFJpWn8fBqaU/G+cqpq0pCswRxj1nlk
|
||||
9MW7L/jsJ0474+TTt91iK+9LFXXOCUit9ySlRYfxSbCNSDwbgUtqdkBEtVE0u1X70P0P3nv7
|
||||
3apHOy7LvaZQIXGcv/JZo1i8bOnw2KjmDR9k7uwtFe6Nr3vDlrPnhU4FEzGv4pVeEQReJACB
|
||||
DArNoCjNr6nCqnLr1tx/OOpdZ5/472942RF9LFh5BydU1opwIm+1OE/W22KnSJCmYOubZ4hD
|
||||
9BYnQokGGwNF/yeOPf5t//Lu0XqcxIkBDqLOudLK0sogQZAFL/NmbQVi1tDM5+6259U3XNls
|
||||
9XVCGSTEcl2gACiaaSZd6451C8mfv/PeRxx8+MEHHLj1jPndbkfKsuUKJRgIx+QdQgp04vx6
|
||||
XErj9/NpxyVReAMdJXNZqPz+e7zsUx855b0nvb9vehHQIQFk0XPUKR3vuW/R6tUjc2bOPfCA
|
||||
V4xVo4PF4KsP+otrfnl16avgYIJcMwhoqLqV0R4dXrNla9bhBx121CFH7rPH3rPcDJMQur4P
|
||||
/ZKJAjRIHBekSJxuiel4YxTipwKXWJEaqRIAiKgia3faB7/4kHce8Xdfu/jrnB4ELjaMBAy0
|
||||
aVtMX/LA0lXLVx74isN23mbnUJZmnDNvbt5slOzm2hS6ql3SW+iGmYPTd91pl7133/ule+/3
|
||||
0r1ekonrWln5KkOWuUJSey4l8tTDnDBs+/SBsgFcYmsGYk5KwpE5FCEAyE445qNDfc1PfuuU
|
||||
6TNmS4dZlokTs87MLWb94NqrChSHH3xYgXys7LSazSu//8PVo6OtGY01y9c00JzRP23reVu+
|
||||
4TVHHfySA+fNmjfkhoJYtyxNrHBF7LFDIGneS6zu9kqq/0CKYryj9qfGRYSxQqyjW1zZ6qBZ
|
||||
sA+8458W29Lzzr9wxsBsOPUhZI1i0eJFK+9b+YLdn3vAC/frdjqtwWnfv/4Hl115WaPRGOqb
|
||||
9qbD//LZW+304hct2GX+swezQRHSe9/pClxDCxGhxQ+fSDNhJmJIH0WhiaykD8t6OjERWatV
|
||||
OBkuHJ9HjqeSij4LIVjouuqsr33p65edt9I/qgMu0HdH2/Onz/vGyf+557Z7hUrvXX3/mz/w
|
||||
xgdX3T+AgYUf+/yhL/qLeFyTYD6NACK1NnsH71W/WOcH1Lv8pH7+9C2l9c4FpcoDSPpOTRFI
|
||||
AKpa+OLD7/zgCf/0r9PywfaKsT7p0wqvP/T1e2//vM5YRxvZ+Zeff9+KZYHhr498y6tfdEhZ
|
||||
llVVmZkYXJZDHTSNGgqk7qv2dEsBRCc8Sl0N9Z4/jcb1mVl6TBMLNsFCCN12OTy2Zphjv1z8
|
||||
m6M/c+z8g7Y/9INHLKmWDY+u9t5fd/v1Ox6+y6zD5u79thf8fsWScixUpbfoaSFMPBTX+Z6e
|
||||
0IzGdR9p6bTqquFpsfXHlwk3pp5P6607CDJRoBgb7ey13e5n/fPnXFvuXHRnA40iawyXaz7/
|
||||
tc8NV6s63fabD33js2ZuW414V7g0V8U0F1kP1PaCRc8HJjgD1n3EY17ylNsTUF3XauZHdSzT
|
||||
LM+n9Q1pJVWojn3X+0YeGVn0h0WNonH6uQuv/sVVLtf99ljwple+ueyWTtMgZ6wSpzif80zZ
|
||||
E9s3MZ4voyymQooZM2Sefv4W8+fN2+ruuxbNnDXnPy/+6rTZQ6jklPd/arvp21RjFfJ4F9b6
|
||||
nMbNBBcR6cklIhJgAGCg0QIbRb7D9jv86Pprr/jtVZ28Ld1w+L5H7LPD3n7Eq4PXbiaF1Buo
|
||||
yLUA2tRsY/bZkFSoWQgSnDqIKmAM0GLbHbb/6ufPcXO0GCzQ5nv/9hilCJUwbz7Topfi4qE2
|
||||
TVBk4z7/JfINAK6ONRSqywI5Z/vZ/XP6p08bGFk2/Jcvf/M+2z4vBEMTcK7p+uUJdR2fUdu4
|
||||
z8VJLWJAUReX3gcADy9/2MrQWdU+4Hn7Hf/ej3S7nbg1MT70ujlP7TU8HbZxuPSEw3pUnswz
|
||||
N9wdvvSiS9kO2ai++03vmtOYqaZS97nTfvjx6Z5N2p7UPj4gfuiZCMWpsgoHLNh/oNX3rHnb
|
||||
vWrBq7plp3A5VEUosPSZI3BPL4F/imz99dHklmqW3vtoQqUEcxTBsB9poVloJiagxrJYEMQo
|
||||
6kA3YUP8pmsbh0vaSJbKOaaNpSahpBenBXOkycm6/6kWBU+JnGez9JeEQvrG1F1VE5HY1gcE
|
||||
kjZWACJpJ1eadd9kOctE+/+PsA04/7ZXkgAAAABJRU5ErkJggg==
|
||||
|
||||
--------------fyEsKoz3fdzPxAaSslESHcHz--
|
||||
|
||||
--------------0UhSOOwwiiuLCrPveGIa7UzZ--
|
@ -6,6 +6,7 @@ import pytest
|
||||
from django.test import TestCase
|
||||
from documents.parsers import ParseError
|
||||
from paperless_mail.parsers import MailDocumentParser
|
||||
from paperless_mail.parsers import settings
|
||||
|
||||
|
||||
class TestParser(TestCase):
|
||||
@ -201,3 +202,26 @@ class TestParser(TestCase):
|
||||
}
|
||||
in metadata,
|
||||
)
|
||||
|
||||
@mock.patch("documents.loggers.LoggingMixin.log") # Disable log output
|
||||
def test_tika_parse(self, m):
|
||||
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
|
||||
expected_text = "\n\n\n\n\n\n\n\n\nSome Text\n"
|
||||
|
||||
parser = MailDocumentParser(None)
|
||||
tika_server_original = parser.tika_server
|
||||
|
||||
# Check if exception is raised when Tika cannot be reached.
|
||||
with pytest.raises(ParseError):
|
||||
parser.tika_server = ""
|
||||
parser.tika_parse(html)
|
||||
|
||||
# Check unsuccessful parsing
|
||||
parser.tika_server = tika_server_original
|
||||
|
||||
parsed = parser.tika_parse(None)
|
||||
self.assertEqual("", parsed)
|
||||
|
||||
# Check successful parsing
|
||||
parsed = parser.tika_parse(html)
|
||||
self.assertEqual(expected_text, parsed)
|
||||
|
Loading…
x
Reference in New Issue
Block a user