From 5342db6adab8cdb92bca76eac38960a7aaaa0613 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 9 Sep 2018 20:00:12 +0100 Subject: [PATCH] Fix pycodestyle complaints Apparently, pycodestyle updated itself to now check for invalid escape sequences, which only complain if the regex in use isn't a raw string (r""). --- src/documents/models.py | 2 +- src/documents/tests/test_matchables.py | 2 +- src/paperless_tesseract/parsers.py | 5 +++-- src/paperless_tesseract/signals.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 36466bbac..9a8e6003d 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -135,7 +135,7 @@ class MatchingModel(models.Model): Example: ' some random words "with quotes " and spaces' ==> - ["some", "random", "words", "with\s+quotes", "and", "spaces"] + ["some", "random", "words", "with+quotes", "and", "spaces"] """ findterms = re.compile(r'"([^"]+)"|(\S+)').findall normspace = re.compile(r"\s+").sub diff --git a/src/documents/tests/test_matchables.py b/src/documents/tests/test_matchables.py index 55d25598a..e592237b6 100644 --- a/src/documents/tests/test_matchables.py +++ b/src/documents/tests/test_matchables.py @@ -166,7 +166,7 @@ class TestMatching(TestCase): def test_match_regex(self): self._test_matching( - "alpha\w+gamma", + r"alpha\w+gamma", "MATCH_REGEX", ( "I have alpha_and_gamma in me", diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index add65985a..bd1ce8ffb 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -272,8 +272,9 @@ def run_unpaper(args): def strip_excess_whitespace(text): collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text) no_leading_whitespace = re.sub( - "([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces) - no_trailing_whitespace = re.sub("([^\S\n\r]+)$", '', no_leading_whitespace) + r"([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces) + no_trailing_whitespace = re.sub( + r"([^\S\n\r]+)$", '', no_leading_whitespace) return no_trailing_whitespace diff --git a/src/paperless_tesseract/signals.py b/src/paperless_tesseract/signals.py index 2fa54f5d5..237f15c52 100644 --- a/src/paperless_tesseract/signals.py +++ b/src/paperless_tesseract/signals.py @@ -5,7 +5,7 @@ from .parsers import RasterisedDocumentParser class ConsumerDeclaration: - MATCHING_FILES = re.compile("^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$") + MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$") @classmethod def handle(cls, sender, **kwargs):