Merge branch 'dev' into chore/codecov-testing-frontend

This commit is contained in:
shamoon 2025-03-04 14:49:18 -08:00 committed by GitHub
commit 4c4c8bb704
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 48 additions and 265 deletions

View File

@ -549,7 +549,7 @@ jobs:
-
name: Generate requirements file
run: |
uv export --quiet --no-dev --format requirements-txt --output-file requirements.txt
uv export --quiet --no-dev --all-extras --format requirements-txt --output-file requirements.txt
-
name: Compile messages
run: |

View File

@ -205,9 +205,6 @@ COPY --chown=1000:1000 ["pyproject.toml", "uv.lock", "/usr/src/paperless/src/"]
# dependencies
ARG BUILD_PACKAGES="\
build-essential \
git \
# https://www.psycopg.org/docs/install.html#prerequisites
libpq-dev \
# https://github.com/PyMySQL/mysqlclient#linux
default-libmysqlclient-dev \
pkg-config"
@ -219,7 +216,7 @@ RUN --mount=type=cache,target=${UV_CACHE_DIR},id=python-cache \
&& apt-get update \
&& apt-get install --yes --quiet --no-install-recommends ${BUILD_PACKAGES} \
&& echo "Installing Python requirements" \
&& uv export --quiet --no-dev --format requirements-txt --output-file requirements.txt \
&& uv export --quiet --no-dev --all-extras --format requirements-txt --output-file requirements.txt \
&& uv pip install --system --no-python-downloads --python-preference system --requirements requirements.txt \
&& echo "Installing NLTK data" \
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" snowball_data \

View File

@ -380,6 +380,12 @@ are released, dependency support is confirmed, etc.
dependencies. This is an alternative to the above and may require adjusting
the example scripts to utilize the virtual environment paths
!!! tip
If you use modern Python tooling, such as `uv`, installation will not include
dependencies for Postgres or Mariadb. You can select those extras with `--extra <EXTRA>`
or all with `--all-extras`
9. Go to `/opt/paperless/src`, and execute the following commands:
```bash

View File

@ -42,20 +42,15 @@ dependencies = [
"filelock~=3.17.0",
"flower~=2.0.1",
"gotenberg-client~=0.9.0",
"granian~=1.7.6",
"httpx-oauth~=0.16",
"imap-tools~=1.10.0",
"inotifyrecursive~=0.3",
"jinja2~=3.1.5",
"langdetect~=1.0.9",
"mysqlclient~=2.2.7",
"nltk~=3.9.1",
"ocrmypdf~=16.9.0",
"pathvalidate~=3.2.3",
"pdf2image~=1.17.0",
"psycopg[c]==3.2.4",
# Direct dependency for proper resolution of the pre-build wheels
"psycopg-c==3.2.4",
"python-dateutil~=2.9.0",
"python-dotenv~=1.0.1",
"python-gnupg~=0.5.4",
@ -74,6 +69,18 @@ dependencies = [
"zxing-cpp~=2.3.0",
]
optional-dependencies.mariadb = [
"mysqlclient~=2.2.7",
]
optional-dependencies.postgres = [
"psycopg[c]==3.2.4",
# Direct dependency for proper resolution of the pre-built wheels
"psycopg-c==3.2.4",
]
optional-dependencies.webserver = [
"granian~=1.7.6",
]
[dependency-groups]
dev = [

View File

@ -26,7 +26,6 @@ from documents.models import CustomField
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import FileInfo
from documents.models import StoragePath
from documents.models import Tag
from documents.models import WorkflowTrigger
@ -705,8 +704,6 @@ class ConsumerPlugin(
) -> Document:
# If someone gave us the original filename, use it instead of doc.
file_info = FileInfo.from_filename(self.filename)
self.log.debug("Saving record to database")
if self.metadata.created is not None:
@ -714,9 +711,6 @@ class ConsumerPlugin(
self.log.debug(
f"Creation date from post_documents parameter: {create_date}",
)
elif file_info.created is not None:
create_date = file_info.created
self.log.debug(f"Creation date from FileInfo: {create_date}")
elif date is not None:
create_date = date
self.log.debug(f"Creation date from parse_date: {create_date}")
@ -729,7 +723,11 @@ class ConsumerPlugin(
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
title = file_info.title
if self.metadata.filename:
title = Path(self.metadata.filename).stem
else:
title = self.input_doc.original_file.stem
if self.metadata.title is not None:
try:
title = self._parse_title_placeholders(self.metadata.title)

View File

@ -1,11 +1,7 @@
import datetime
import os
import re
from collections import OrderedDict
from pathlib import Path
from typing import Final
import dateutil.parser
import pathvalidate
from celery import states
from django.conf import settings
@ -517,91 +513,6 @@ class SavedViewFilterRule(models.Model):
return f"SavedViewFilterRule: {self.rule_type} : {self.value}"
# TODO: why is this in the models file?
# TODO: how about, what is this and where is it documented?
# It appears to parsing JSON from an environment variable to get a title and date from
# the filename, if possible, as a higher priority than either document filename or
# content parsing
class FileInfo:
REGEXES = OrderedDict(
[
(
"created-title",
re.compile(
r"^(?P<created>\d{8}(\d{6})?Z) - (?P<title>.*)$",
flags=re.IGNORECASE,
),
),
("title", re.compile(r"(?P<title>.*)$", flags=re.IGNORECASE)),
],
)
def __init__(
self,
created=None,
correspondent=None,
title=None,
tags=(),
extension=None,
):
self.created = created
self.title = title
self.extension = extension
self.correspondent = correspondent
self.tags = tags
@classmethod
def _get_created(cls, created):
try:
return dateutil.parser.parse(f"{created[:-1]:0<14}Z")
except ValueError:
return None
@classmethod
def _get_title(cls, title):
return title
@classmethod
def _mangle_property(cls, properties, name):
if name in properties:
properties[name] = getattr(cls, f"_get_{name}")(properties[name])
@classmethod
def from_filename(cls, filename) -> "FileInfo":
# Mutate filename in-place before parsing its components
# by applying at most one of the configured transformations.
for pattern, repl in settings.FILENAME_PARSE_TRANSFORMS:
(filename, count) = pattern.subn(repl, filename)
if count:
break
# do this after the transforms so that the transforms can do whatever
# with the file extension.
filename_no_ext = os.path.splitext(filename)[0]
if filename_no_ext == filename and filename.startswith("."):
# This is a very special case where there is no text before the
# file type.
# TODO: this should be handled better. The ext is not removed
# because usually, files like '.pdf' are just hidden files
# with the name pdf, but in our case, its more likely that
# there's just no name to begin with.
filename = ""
# This isn't too bad either, since we'll just not match anything
# and return an empty title. TODO: actually, this is kinda bad.
else:
filename = filename_no_ext
# Parse filename components.
for regex in cls.REGEXES.values():
m = regex.match(filename)
if m:
properties = m.groupdict()
cls._mangle_property(properties, "created")
cls._mangle_property(properties, "title")
return cls(**properties)
# Extending User Model Using a One-To-One Link
class UiSettings(models.Model):
user = models.OneToOneField(

View File

@ -1,12 +1,10 @@
import datetime
import os
import re
import shutil
import stat
import tempfile
import zoneinfo
from pathlib import Path
from unittest import TestCase as UnittestTestCase
from unittest import mock
from unittest.mock import MagicMock
@ -26,7 +24,6 @@ from documents.models import Correspondent
from documents.models import CustomField
from documents.models import Document
from documents.models import DocumentType
from documents.models import FileInfo
from documents.models import StoragePath
from documents.models import Tag
from documents.parsers import DocumentParser
@ -40,143 +37,6 @@ from paperless_mail.models import MailRule
from paperless_mail.parsers import MailDocumentParser
class TestAttributes(UnittestTestCase):
TAGS = ("tag1", "tag2", "tag3")
def _test_guess_attributes_from_name(self, filename, sender, title, tags):
file_info = FileInfo.from_filename(filename)
if sender:
self.assertEqual(file_info.correspondent.name, sender, filename)
else:
self.assertIsNone(file_info.correspondent, filename)
self.assertEqual(file_info.title, title, filename)
self.assertEqual(tuple(t.name for t in file_info.tags), tags, filename)
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
self._test_guess_attributes_from_name(
"- weird but should not break.pdf",
None,
"- weird but should not break",
(),
)
def test_guess_attributes_from_name_when_title_ends_with_dash(self):
self._test_guess_attributes_from_name(
"weird but should not break -.pdf",
None,
"weird but should not break -",
(),
)
class TestFieldPermutations(TestCase):
valid_dates = (
"20150102030405Z",
"20150102Z",
)
valid_correspondents = ["timmy", "Dr. McWheelie", "Dash Gor-don", "o Θεpμaoτής", ""]
valid_titles = ["title", "Title w Spaces", "Title a-dash", "Tίτλoς", ""]
valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
def _test_guessed_attributes(
self,
filename,
created=None,
correspondent=None,
title=None,
tags=None,
):
info = FileInfo.from_filename(filename)
# Created
if created is None:
self.assertIsNone(info.created, filename)
else:
self.assertEqual(info.created.year, int(created[:4]), filename)
self.assertEqual(info.created.month, int(created[4:6]), filename)
self.assertEqual(info.created.day, int(created[6:8]), filename)
# Correspondent
if correspondent:
self.assertEqual(info.correspondent.name, correspondent, filename)
else:
self.assertEqual(info.correspondent, None, filename)
# Title
self.assertEqual(info.title, title, filename)
# Tags
if tags is None:
self.assertEqual(info.tags, (), filename)
else:
self.assertEqual([t.name for t in info.tags], tags.split(","), filename)
def test_just_title(self):
template = "{title}.pdf"
for title in self.valid_titles:
spec = dict(title=title)
filename = template.format(**spec)
self._test_guessed_attributes(filename, **spec)
def test_created_and_title(self):
template = "{created} - {title}.pdf"
for created in self.valid_dates:
for title in self.valid_titles:
spec = {"created": created, "title": title}
self._test_guessed_attributes(template.format(**spec), **spec)
def test_invalid_date_format(self):
info = FileInfo.from_filename("06112017Z - title.pdf")
self.assertEqual(info.title, "title")
self.assertIsNone(info.created)
def test_filename_parse_transforms(self):
filename = "tag1,tag2_20190908_180610_0001.pdf"
all_patt = re.compile("^.*$")
none_patt = re.compile("$a")
re.compile("^([a-z0-9,]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.")
# No transformations configured (= default)
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
self.assertEqual(info.tags, ())
self.assertIsNone(info.created)
# Pattern doesn't match (filename unaltered)
with self.settings(FILENAME_PARSE_TRANSFORMS=[(none_patt, "none.gif")]):
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
# Simple transformation (match all)
with self.settings(FILENAME_PARSE_TRANSFORMS=[(all_patt, "all.gif")]):
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "all")
# Multiple transformations configured (first pattern matches)
with self.settings(
FILENAME_PARSE_TRANSFORMS=[
(all_patt, "all.gif"),
(all_patt, "anotherall.gif"),
],
):
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "all")
# Multiple transformations configured (second pattern matches)
with self.settings(
FILENAME_PARSE_TRANSFORMS=[
(none_patt, "none.gif"),
(all_patt, "anotherall.gif"),
],
):
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "anotherall")
class _BaseTestParser(DocumentParser):
def get_settings(self):
"""

View File

@ -3,7 +3,6 @@ import json
import math
import multiprocessing
import os
import re
import tempfile
from os import PathLike
from pathlib import Path
@ -1089,11 +1088,6 @@ FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
# fewer dates shown.
NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
# Transformations applied before filename parsing
FILENAME_PARSE_TRANSFORMS = []
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"]))
# Specify the filename format for out files
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")

36
uv.lock generated
View File

@ -1,4 +1,5 @@
version = 1
revision = 1
requires-python = ">=3.10"
resolution-markers = [
"sys_platform == 'darwin'",
@ -1840,7 +1841,7 @@ wheels = [
[[package]]
name = "paperless-ngx"
version = "0.1.0"
version = "2.14.7"
source = { virtual = "." }
dependencies = [
{ name = "bleach", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@ -1868,21 +1869,15 @@ dependencies = [
{ name = "filelock", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "flower", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "gotenberg-client", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "granian", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "httpx-oauth", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "imap-tools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "inotifyrecursive", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "langdetect", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "mysqlclient", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "nltk", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "ocrmypdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pathvalidate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pdf2image", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "psycopg", extra = ["c"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "psycopg-c", version = "3.2.4", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version != '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
{ name = "psycopg-c", version = "3.2.4", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.4/psycopg_c-3.2.4-cp312-cp312-linux_aarch64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'" },
{ name = "psycopg-c", version = "3.2.4", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.4/psycopg_c-3.2.4-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux'" },
{ name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-dotenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-gnupg", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@ -1903,6 +1898,20 @@ dependencies = [
{ name = "zxing-cpp", version = "2.3.0", source = { url = "https://github.com/paperless-ngx/builder/releases/download/zxing-2.3.0/zxing_cpp-2.3.0-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux'" },
]
[package.optional-dependencies]
mariadb = [
{ name = "mysqlclient", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
postgres = [
{ name = "psycopg", extra = ["c"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "psycopg-c", version = "3.2.4", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version != '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
{ name = "psycopg-c", version = "3.2.4", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.4/psycopg_c-3.2.4-cp312-cp312-linux_aarch64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'" },
{ name = "psycopg-c", version = "3.2.4", source = { url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.4/psycopg_c-3.2.4-cp312-cp312-linux_x86_64.whl" }, marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux'" },
]
webserver = [
{ name = "granian", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
]
[package.dev-dependencies]
dev = [
{ name = "daphne", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@ -1990,21 +1999,21 @@ requires-dist = [
{ name = "filelock", specifier = "~=3.17.0" },
{ name = "flower", specifier = "~=2.0.1" },
{ name = "gotenberg-client", specifier = "~=0.9.0" },
{ name = "granian", specifier = "~=1.7.6" },
{ name = "granian", marker = "extra == 'webserver'", specifier = "~=1.7.6" },
{ name = "httpx-oauth", specifier = "~=0.16" },
{ name = "imap-tools", specifier = "~=1.10.0" },
{ name = "inotifyrecursive", specifier = "~=0.3" },
{ name = "jinja2", specifier = "~=3.1.5" },
{ name = "langdetect", specifier = "~=1.0.9" },
{ name = "mysqlclient", specifier = "~=2.2.7" },
{ name = "mysqlclient", marker = "extra == 'mariadb'", specifier = "~=2.2.7" },
{ name = "nltk", specifier = "~=3.9.1" },
{ name = "ocrmypdf", specifier = "~=16.9.0" },
{ name = "pathvalidate", specifier = "~=3.2.3" },
{ name = "pdf2image", specifier = "~=1.17.0" },
{ name = "psycopg", extras = ["c"], specifier = "==3.2.4" },
{ name = "psycopg-c", marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64') or (python_full_version != '3.12.*' and platform_machine == 'x86_64') or (platform_machine != 'aarch64' and platform_machine != 'x86_64') or sys_platform != 'linux'", specifier = "==3.2.4" },
{ name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.4/psycopg_c-3.2.4-cp312-cp312-linux_aarch64.whl" },
{ name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.4/psycopg_c-3.2.4-cp312-cp312-linux_x86_64.whl" },
{ name = "psycopg", extras = ["c"], marker = "extra == 'postgres'", specifier = "==3.2.4" },
{ name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.4/psycopg_c-3.2.4-cp312-cp312-linux_aarch64.whl" },
{ name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.4/psycopg_c-3.2.4-cp312-cp312-linux_x86_64.whl" },
{ name = "psycopg-c", marker = "(python_full_version != '3.12.*' and platform_machine == 'aarch64' and extra == 'postgres') or (python_full_version != '3.12.*' and platform_machine == 'x86_64' and extra == 'postgres') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'postgres') or (sys_platform != 'linux' and extra == 'postgres')", specifier = "==3.2.4" },
{ name = "python-dateutil", specifier = "~=2.9.0" },
{ name = "python-dotenv", specifier = "~=1.0.1" },
{ name = "python-gnupg", specifier = "~=0.5.4" },
@ -2024,6 +2033,7 @@ requires-dist = [
{ name = "zxing-cpp", marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", url = "https://github.com/paperless-ngx/builder/releases/download/zxing-2.3.0/zxing_cpp-2.3.0-cp312-cp312-linux_aarch64.whl" },
{ name = "zxing-cpp", marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux'", url = "https://github.com/paperless-ngx/builder/releases/download/zxing-2.3.0/zxing_cpp-2.3.0-cp312-cp312-linux_x86_64.whl" },
]
provides-extras = ["mariadb", "postgres", "webserver"]
[package.metadata.requires-dev]
dev = [