optimize regex

This commit is contained in:
phail
2022-11-20 12:48:03 +01:00
parent 073c3c8fed
commit d132eba143
2 changed files with 3 additions and 4 deletions

View File

@@ -105,9 +105,8 @@ class MailDocumentParser(DocumentParser):
def parse(self, document_path, mime_type, file_name=None):
def strip_text(text: str):
text = re.sub("\t", " ", text)
text = re.sub(" +", " ", text)
text = re.sub("(\n *)+", "\n", text)
text = re.sub(r"\s+", " ", text)
text = re.sub(r"(\n *)+", "\n", text)
return text.strip()
mail = self.get_parsed(document_path)