Chore: Backend bulk updates (#4509)

This commit is contained in:
Trenton H
2023-11-13 09:09:56 -08:00
committed by GitHub
parent b671f54cb7
commit facb7226fe
15 changed files with 1764 additions and 1596 deletions

View File

@@ -248,7 +248,7 @@ class DocumentClassifier:
data_vectorized = self.data_vectorizer.fit_transform(content_generator())
# See the notes here:
# https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html # noqa: E501
# https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html
# This attribute isn't needed to function and can be large
self.data_vectorizer.stop_words_ = None

View File

@@ -36,13 +36,13 @@ from documents.utils import copy_file_with_basic_stats
# TODO: isnt there a date parsing library for this?
DATE_REGEX = re.compile(
r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" # noqa: E501
r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # noqa: E501
r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[a-zA-Z]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # noqa: E501
r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[a-zA-Z]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))([0-9]{1,2}[^ ]{2}[\. ]+[^ ]{3,9}[ \.\/-][0-9]{4})(\b|(?=([_-])))|" # noqa: E501
r"(\b|(?!=([_-])))(\b[0-9]{1,2}[ \.\/-][a-zA-Z]{3}[ \.\/-][0-9]{4})(\b|(?=([_-])))", # noqa: E501
r"(\b|(?!=([_-])))([0-9]{1,2}[^ ]{2}[\. ]+[^ ]{3,9}[ \.\/-][0-9]{4})(\b|(?=([_-])))|"
r"(\b|(?!=([_-])))(\b[0-9]{1,2}[ \.\/-][a-zA-Z]{3}[ \.\/-][0-9]{4})(\b|(?=([_-])))",
)

View File

@@ -1023,7 +1023,6 @@ class AcknowledgeTasksViewSerializer(serializers.Serializer):
)
def _validate_task_id_list(self, tasks, name="tasks"):
pass
if not isinstance(tasks, list):
raise serializers.ValidationError(f"{name} must be a list")
if not all(isinstance(i, int) for i in tasks):

View File

@@ -158,10 +158,10 @@ class IndexView(TemplateView):
context["main_js"] = f"frontend/{self.get_frontend_language()}/main.js"
context[
"webmanifest"
] = f"frontend/{self.get_frontend_language()}/manifest.webmanifest" # noqa: E501
] = f"frontend/{self.get_frontend_language()}/manifest.webmanifest"
context[
"apple_touch_icon"
] = f"frontend/{self.get_frontend_language()}/apple-touch-icon.png" # noqa: E501
] = f"frontend/{self.get_frontend_language()}/apple-touch-icon.png"
return context

View File

@@ -469,7 +469,7 @@ SECRET_KEY = os.getenv(
AUTH_PASSWORD_VALIDATORS = [
{
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa: E501
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
},
{
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
@@ -776,7 +776,7 @@ CONSUMER_IGNORE_PATTERNS = list(
json.loads(
os.getenv(
"PAPERLESS_CONSUMER_IGNORE_PATTERNS",
'[".DS_Store", ".DS_STORE", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini", "@eaDir/*"]', # noqa: E501
'[".DS_Store", ".DS_STORE", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini", "@eaDir/*"]',
),
),
)

View File

@@ -338,7 +338,7 @@ class RasterisedDocumentParser(DocumentParser):
if "Ghostscript PDF/A rendering" in str(e):
self.log.warning(
"Ghostscript PDF/A rendering failed, consider setting "
"PAPERLESS_OCR_USER_ARGS: '{\"continue_on_soft_render_error\": true}'", # noqa: E501
"PAPERLESS_OCR_USER_ARGS: '{\"continue_on_soft_render_error\": true}'",
)
raise ParseError(

View File

@@ -10,12 +10,12 @@ def tika_consumer_declaration(sender, **kwargs):
"weight": 10,
"mime_types": {
"application/msword": ".doc",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", # noqa: E501
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
"application/vnd.ms-excel": ".xls",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", # noqa: E501
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
"application/vnd.ms-powerpoint": ".ppt",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", # noqa: E501
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx", # noqa: E501
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",
"application/vnd.oasis.opendocument.presentation": ".odp",
"application/vnd.oasis.opendocument.spreadsheet": ".ods",
"application/vnd.oasis.opendocument.text": ".odt",

View File

@@ -1,30 +1,23 @@
[flake8]
extend-exclude = */migrations/*, */tests/*
# E203 - https://www.flake8rules.com/rules/E203.html
# W503 - https://www.flake8rules.com/rules/W503.html
ignore = E203,W503
max-line-length = 88
[tool:pytest]
DJANGO_SETTINGS_MODULE=paperless.settings
DJANGO_SETTINGS_MODULE = paperless.settings
addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --maxprocesses=16 --quiet --durations=50
env =
PAPERLESS_DISABLE_DBHANDLER=true
PAPERLESS_DISABLE_DBHANDLER=true
[coverage:run]
source =
./
./
omit =
*/tests/*
manage.py
paperless/workers.py
paperless/wsgi.py
paperless/auth.py
*/tests/*
manage.py
paperless/workers.py
paperless/wsgi.py
paperless/auth.py
[coverage:report]
exclude_also =
if settings.AUDIT_LOG_ENABLED:
if AUDIT_LOG_ENABLED:
if AUDIT_LOG_ENABLED:
[mypy]
plugins = mypy_django_plugin.main, mypy_drf_plugin.main, numpy.typing.mypy_plugin