Merge branch 'dev' into feature-document-versions-1218

This commit is contained in:
shamoon
2026-02-09 23:41:44 -08:00
724 changed files with 146173 additions and 72126 deletions

View File

@@ -1,5 +1,4 @@
# this is here so that django finds the checks.
from documents.checks import changed_password_check
from documents.checks import parser_check
__all__ = ["changed_password_check", "parser_check"]
__all__ = ["parser_check"]

View File

@@ -13,6 +13,7 @@ from documents.models import PaperlessTask
from documents.models import SavedView
from documents.models import SavedViewFilterRule
from documents.models import ShareLink
from documents.models import ShareLinkBundle
from documents.models import StoragePath
from documents.models import Tag
from documents.tasks import update_document_parent_tags
@@ -60,7 +61,6 @@ class DocumentAdmin(GuardedModelAdmin):
"added",
"modified",
"mime_type",
"storage_type",
"filename",
"checksum",
"archive_filename",
@@ -185,6 +185,22 @@ class ShareLinksAdmin(GuardedModelAdmin):
return super().get_queryset(request).select_related("document__correspondent")
class ShareLinkBundleAdmin(GuardedModelAdmin):
list_display = ("created", "status", "expiration", "owner", "slug")
list_filter = ("status", "created", "expiration", "owner")
search_fields = ("slug",)
def get_queryset(self, request): # pragma: no cover
return (
super()
.get_queryset(request)
.select_related("owner")
.prefetch_related(
"documents",
)
)
class CustomFieldsAdmin(GuardedModelAdmin):
fields = ("name", "created", "data_type")
readonly_fields = ("created", "data_type")
@@ -216,6 +232,7 @@ admin.site.register(StoragePath, StoragePathAdmin)
admin.site.register(PaperlessTask, TaskAdmin)
admin.site.register(Note, NotesAdmin)
admin.site.register(ShareLink, ShareLinksAdmin)
admin.site.register(ShareLinkBundle, ShareLinkBundleAdmin)
admin.site.register(CustomField, CustomFieldsAdmin)
admin.site.register(CustomFieldInstance, CustomFieldInstancesAdmin)

View File

@@ -7,10 +7,11 @@ class DocumentsConfig(AppConfig):
verbose_name = _("Documents")
def ready(self):
def ready(self) -> None:
from documents.signals import document_consumption_finished
from documents.signals import document_updated
from documents.signals.handlers import add_inbox_tags
from documents.signals.handlers import add_or_update_document_in_llm_index
from documents.signals.handlers import add_to_index
from documents.signals.handlers import run_workflows_added
from documents.signals.handlers import run_workflows_updated
@@ -26,6 +27,7 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(set_storage_path)
document_consumption_finished.connect(add_to_index)
document_consumption_finished.connect(run_workflows_added)
document_consumption_finished.connect(add_or_update_document_in_llm_index)
document_updated.connect(run_workflows_updated)
import documents.schema # noqa: F401

View File

@@ -16,6 +16,7 @@ from pikepdf import Pdf
from documents.converters import convert_from_tiff_to_pdf
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.models import Document
from documents.models import Tag
from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import StopConsumeTaskError
@@ -60,6 +61,20 @@ class Barcode:
"""
return self.value.startswith(self.settings.barcode_asn_prefix)
@property
def is_tag(self) -> bool:
"""
Returns True if the barcode value matches any configured tag mapping pattern,
False otherwise.
Note: This does NOT exclude ASN or separator barcodes - they can also be used
as tags if they match a tag mapping pattern (e.g., {"ASN12.*": "JOHN"}).
"""
for regex in self.settings.barcode_tag_mapping:
if re.match(regex, self.value, flags=re.IGNORECASE):
return True
return False
class BarcodePlugin(ConsumeTaskPlugin):
NAME: str = "BarcodePlugin"
@@ -115,6 +130,24 @@ class BarcodePlugin(ConsumeTaskPlugin):
self._tiff_conversion_done = False
self.barcodes: list[Barcode] = []
def _apply_detected_asn(self, detected_asn: int) -> None:
"""
Apply a detected ASN to metadata if allowed.
"""
if (
self.metadata.skip_asn_if_exists
and Document.global_objects.filter(
archive_serial_number=detected_asn,
).exists()
):
logger.info(
f"Found ASN in barcode {detected_asn} but skipping because it already exists.",
)
return
logger.info(f"Found ASN in barcode: {detected_asn}")
self.metadata.asn = detected_asn
def run(self) -> None:
# Some operations may use PIL, override pixel setting if needed
maybe_override_pixel_limit()
@@ -126,8 +159,14 @@ class BarcodePlugin(ConsumeTaskPlugin):
self.detect()
# try reading tags from barcodes
# If tag splitting is enabled, skip this on the original document - let each split document extract its own tags
# However, if we're processing a split document (original_path is set), extract tags
if (
self.settings.barcode_enable_tag
and (
not self.settings.barcode_tag_split
or self.input_doc.original_path is not None
)
and (tags := self.tags) is not None
and len(tags) > 0
):
@@ -164,6 +203,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
mailrule_id=self.input_doc.mailrule_id,
# Can't use same folder or the consume might grab it again
original_file=(tmp_dir / new_document.name).resolve(),
# Adding optional original_path for later uses in
# workflow matching
original_path=self.input_doc.original_file,
),
# All the same metadata
self.metadata,
@@ -184,8 +226,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
# Update/overwrite an ASN if possible
# After splitting, as otherwise each split document gets the same ASN
if self.settings.barcode_enable_asn and (located_asn := self.asn) is not None:
logger.info(f"Found ASN in barcode: {located_asn}")
self.metadata.asn = located_asn
self._apply_detected_asn(located_asn)
def cleanup(self) -> None:
self.temp_dir.cleanup()
@@ -425,15 +466,24 @@ class BarcodePlugin(ConsumeTaskPlugin):
for bc in self.barcodes
if bc.is_separator and (not retain or (retain and bc.page > 0))
} # as below, dont include the first page if retain is enabled
if not self.settings.barcode_enable_asn:
return separator_pages
# add the page numbers of the ASN barcodes
# (except for first page, that might lead to infinite loops).
return {
**separator_pages,
**{bc.page: True for bc in self.barcodes if bc.is_asn and bc.page != 0},
}
if self.settings.barcode_enable_asn:
separator_pages = {
**separator_pages,
**{bc.page: True for bc in self.barcodes if bc.is_asn and bc.page != 0},
}
# add the page numbers of the TAG barcodes if splitting is enabled
# (except for first page, that might lead to infinite loops).
if self.settings.barcode_tag_split and self.settings.barcode_enable_tag:
separator_pages = {
**separator_pages,
**{bc.page: True for bc in self.barcodes if bc.is_tag and bc.page != 0},
}
return separator_pages
def separate_pages(self, pages_to_split_on: dict[int, bool]) -> list[Path]:
"""

View File

@@ -1,12 +1,12 @@
from __future__ import annotations
import hashlib
import logging
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Literal
from celery import chain
from celery import chord
from celery import group
from celery import shared_task
@@ -37,6 +37,42 @@ if TYPE_CHECKING:
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
@shared_task(bind=True)
def restore_archive_serial_numbers_task(
self,
backup: dict[int, int | None],
*args,
**kwargs,
) -> None:
restore_archive_serial_numbers(backup)
def release_archive_serial_numbers(doc_ids: list[int]) -> dict[int, int | None]:
"""
Clears ASNs on documents that are about to be replaced so new documents
can be assigned ASNs without uniqueness collisions. Returns a backup map
of doc_id -> previous ASN for potential restoration.
"""
qs = Document.objects.filter(
id__in=doc_ids,
archive_serial_number__isnull=False,
).only("pk", "archive_serial_number")
backup = dict(qs.values_list("pk", "archive_serial_number"))
qs.update(archive_serial_number=None)
logger.info(f"Released archive serial numbers for documents {list(backup.keys())}")
return backup
def restore_archive_serial_numbers(backup: dict[int, int | None]) -> None:
"""
Restores ASNs using the provided backup map, intended for
rollback when replacement consumption fails.
"""
for doc_id, asn in backup.items():
Document.objects.filter(pk=doc_id).update(archive_serial_number=asn)
logger.info(f"Restored archive serial numbers for documents {list(backup.keys())}")
def set_correspondent(
doc_ids: list[int],
correspondent: Correspondent,
@@ -304,10 +340,10 @@ def reprocess(doc_ids: list[int]) -> Literal["OK"]:
def set_permissions(
doc_ids: list[int],
set_permissions,
set_permissions: dict,
*,
owner=None,
merge=False,
owner: User | None = None,
merge: bool = False,
) -> Literal["OK"]:
qs = Document.objects.filter(id__in=doc_ids).select_related("owner")
@@ -389,6 +425,7 @@ def merge(
merged_pdf = pikepdf.new()
version: str = merged_pdf.pdf_version
handoff_asn: int | None = None
# use doc_ids to preserve order
for doc_id in doc_ids:
doc = qs.get(id=doc_id)
@@ -404,6 +441,8 @@ def merge(
version = max(version, pdf.pdf_version)
merged_pdf.pages.extend(pdf.pages)
affected_docs.append(doc.id)
if handoff_asn is None and doc.archive_serial_number is not None:
handoff_asn = doc.archive_serial_number
except Exception as e:
logger.exception(
f"Error merging document {doc.id}, it will not be included in the merge: {e}",
@@ -429,6 +468,8 @@ def merge(
DocumentMetadataOverrides.from_document(metadata_document)
)
overrides.title = metadata_document.title + " (merged)"
if metadata_document.archive_serial_number is not None:
handoff_asn = metadata_document.archive_serial_number
else:
overrides = DocumentMetadataOverrides()
else:
@@ -436,6 +477,11 @@ def merge(
if user is not None:
overrides.owner_id = user.id
if not delete_originals:
overrides.skip_asn_if_exists = True
if delete_originals and handoff_asn is not None:
overrides.asn = handoff_asn
logger.info("Adding merged document to the task queue.")
@@ -448,12 +494,20 @@ def merge(
)
if delete_originals:
backup = release_archive_serial_numbers(affected_docs)
logger.info(
"Queueing removal of original documents after consumption of merged document",
)
chain(consume_task, delete.si(affected_docs)).delay()
else:
consume_task.delay()
try:
consume_task.apply_async(
link=[delete.si(affected_docs)],
link_error=[restore_archive_serial_numbers_task.s(backup)],
)
except Exception:
restore_archive_serial_numbers(backup)
raise
else:
consume_task.delay()
return "OK"
@@ -495,6 +549,8 @@ def split(
overrides.title = f"{doc.title} (split {idx + 1})"
if user is not None:
overrides.owner_id = user.id
if not delete_originals:
overrides.skip_asn_if_exists = True
logger.info(
f"Adding split document with pages {split_doc} to the task queue.",
)
@@ -509,10 +565,20 @@ def split(
)
if delete_originals:
backup = release_archive_serial_numbers([doc.id])
logger.info(
"Queueing removal of original document after consumption of the split documents",
)
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
try:
chord(
header=consume_tasks,
body=delete.si([doc.id]),
).apply_async(
link_error=[restore_archive_serial_numbers_task.s(backup)],
)
except Exception:
restore_archive_serial_numbers(backup)
raise
else:
group(consume_tasks).delay()
@@ -564,7 +630,7 @@ def delete_pages(doc_ids: list[int], pages: list[int]) -> Literal["OK"]:
def edit_pdf(
doc_ids: list[int],
operations: list[dict],
operations: list[dict[str, int]],
*,
delete_original: bool = False,
update_document: bool = False,
@@ -639,7 +705,10 @@ def edit_pdf(
)
if user is not None:
overrides.owner_id = user.id
if not delete_original:
overrides.skip_asn_if_exists = True
if delete_original and len(pdf_docs) == 1:
overrides.asn = doc.archive_serial_number
for idx, pdf in enumerate(pdf_docs, start=1):
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
@@ -658,7 +727,17 @@ def edit_pdf(
)
if delete_original:
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
backup = release_archive_serial_numbers([doc.id])
try:
chord(
header=consume_tasks,
body=delete.si([doc.id]),
).apply_async(
link_error=[restore_archive_serial_numbers_task.s(backup)],
)
except Exception:
restore_archive_serial_numbers(backup)
raise
else:
group(consume_tasks).delay()
@@ -671,11 +750,82 @@ def edit_pdf(
return "OK"
def remove_password(
doc_ids: list[int],
password: str,
*,
update_document: bool = False,
delete_original: bool = False,
include_metadata: bool = True,
user: User | None = None,
) -> Literal["OK"]:
"""
Remove password protection from PDF documents.
"""
import pikepdf
for doc_id in doc_ids:
doc = Document.objects.get(id=doc_id)
try:
logger.info(
f"Attempting password removal from document {doc_ids[0]}",
)
with pikepdf.open(doc.source_path, password=password) as pdf:
temp_path = doc.source_path.with_suffix(".tmp.pdf")
pdf.remove_unreferenced_resources()
pdf.save(temp_path)
if update_document:
# replace the original document with the unprotected one
temp_path.replace(doc.source_path)
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
doc.page_count = len(pdf.pages)
doc.save()
update_document_content_maybe_archive_file.delay(document_id=doc.id)
else:
consume_tasks = []
overrides = (
DocumentMetadataOverrides().from_document(doc)
if include_metadata
else DocumentMetadataOverrides()
)
if user is not None:
overrides.owner_id = user.id
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
/ f"{doc.id}_unprotected.pdf"
)
temp_path.replace(filepath)
consume_tasks.append(
consume_file.s(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
),
overrides,
),
)
if delete_original:
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
else:
group(consume_tasks).delay()
except Exception as e:
logger.exception(f"Error removing password from document {doc.id}: {e}")
raise ValueError(
f"An error occurred while removing the password: {e}",
) from e
return "OK"
def reflect_doclinks(
document: Document,
field: CustomField,
target_doc_ids: list[int],
):
) -> None:
"""
Add or remove 'symmetrical' links to `document` on all `target_doc_ids`
"""
@@ -738,7 +888,7 @@ def remove_doclink(
document: Document,
field: CustomField,
target_doc_id: int,
):
) -> None:
"""
Removes a 'symmetrical' link to `document` from the target document's existing custom field instance
"""

View File

@@ -41,6 +41,7 @@ class SuggestionCacheData:
CLASSIFIER_VERSION_KEY: Final[str] = "classifier_version"
CLASSIFIER_HASH_KEY: Final[str] = "classifier_hash"
CLASSIFIER_MODIFIED_KEY: Final[str] = "classifier_modified"
LLM_CACHE_CLASSIFIER_VERSION: Final[int] = 1000 # Marker distinguishing LLM suggestions
CACHE_1_MINUTE: Final[int] = 60
CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
@@ -196,6 +197,54 @@ def refresh_suggestions_cache(
cache.touch(doc_key, timeout)
def get_llm_suggestion_cache(
document_id: int,
backend: str,
) -> SuggestionCacheData | None:
doc_key = get_suggestion_cache_key(document_id)
data: SuggestionCacheData = cache.get(doc_key)
if data and data.classifier_hash == backend:
return data
return None
def set_llm_suggestions_cache(
document_id: int,
suggestions: dict,
*,
backend: str,
timeout: int = CACHE_50_MINUTES,
) -> None:
"""
Cache LLM-generated suggestions using a backend-specific identifier (e.g. 'openai:gpt-4').
"""
doc_key = get_suggestion_cache_key(document_id)
cache.set(
doc_key,
SuggestionCacheData(
classifier_version=LLM_CACHE_CLASSIFIER_VERSION,
classifier_hash=backend,
suggestions=suggestions,
),
timeout,
)
def invalidate_llm_suggestions_cache(
document_id: int,
) -> None:
"""
Invalidate the LLM suggestions cache for a specific document and backend.
"""
doc_key = get_suggestion_cache_key(document_id)
data: SuggestionCacheData = cache.get(doc_key)
if data:
cache.delete(doc_key)
def get_metadata_cache_key(document_id: int) -> str:
"""
Returns the basic key for a document's metadata

View File

@@ -1,60 +1,12 @@
import textwrap
from django.conf import settings
from django.core.checks import Error
from django.core.checks import Warning
from django.core.checks import register
from django.core.exceptions import FieldError
from django.db.utils import OperationalError
from django.db.utils import ProgrammingError
from documents.signals import document_consumer_declaration
from documents.templating.utils import convert_format_str_to_template_format
@register()
def changed_password_check(app_configs, **kwargs):
from documents.models import Document
from paperless.db import GnuPG
try:
encrypted_doc = (
Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG,
)
.only("pk", "storage_type")
.first()
)
except (OperationalError, ProgrammingError, FieldError):
return [] # No documents table yet
if encrypted_doc:
if not settings.PASSPHRASE:
return [
Error(
"The database contains encrypted documents but no password is set.",
),
]
if not GnuPG.decrypted(encrypted_doc.source_file):
return [
Error(
textwrap.dedent(
"""
The current password doesn't match the password of the
existing documents.
If you intend to change your password, you must first export
all of the old documents, start fresh with the new password
and then re-import them."
""",
),
),
]
return []
@register()
def parser_check(app_configs, **kwargs):
parsers = []

View File

@@ -122,7 +122,7 @@ class DocumentClassifier:
)
self._stop_words = None
def _update_data_vectorizer_hash(self):
def _update_data_vectorizer_hash(self) -> None:
self.data_vectorizer_hash = sha256(
pickle.dumps(self.data_vectorizer),
).hexdigest()

View File

@@ -5,6 +5,7 @@ import tempfile
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Final
import magic
from django.conf import settings
@@ -32,12 +33,12 @@ from documents.models import WorkflowTrigger
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import parse_date
from documents.permissions import set_permissions_for_object
from documents.plugins.base import AlwaysRunPluginMixin
from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import NoCleanupPluginMixin
from documents.plugins.base import NoSetupPluginMixin
from documents.plugins.date_parsing import get_date_parser
from documents.plugins.helpers import ProgressManager
from documents.plugins.helpers import ProgressStatusOptions
from documents.signals import document_consumption_finished
@@ -49,6 +50,8 @@ from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess
from paperless_mail.parsers import MailDocumentParser
LOGGING_NAME: Final[str] = "paperless.consumer"
class WorkflowTriggerPlugin(
NoCleanupPluginMixin,
@@ -126,7 +129,7 @@ class ConsumerPluginMixin:
status: ProgressStatusOptions,
message: ConsumerStatusShortMessage | str | None = None,
document_id=None,
): # pragma: no cover
) -> None: # pragma: no cover
self.status_mgr.send_progress(
status,
message,
@@ -162,9 +165,9 @@ class ConsumerPlugin(
ConsumerPluginMixin,
ConsumeTaskPlugin,
):
logging_name = "paperless.consumer"
logging_name = LOGGING_NAME
def run_pre_consume_script(self):
def run_pre_consume_script(self) -> None:
"""
If one is configured and exists, run the pre-consume script and
handle its output and/or errors
@@ -207,7 +210,7 @@ class ConsumerPlugin(
exception=e,
)
def run_post_consume_script(self, document: Document):
def run_post_consume_script(self, document: Document) -> None:
"""
If one is configured and exists, run the pre-consume script and
handle its output and/or errors
@@ -367,7 +370,10 @@ class ConsumerPlugin(
tempdir.cleanup()
raise
def progress_callback(current_progress, max_progress): # pragma: no cover
def progress_callback(
current_progress,
max_progress,
) -> None: # pragma: no cover
# recalculate progress to be within 20 and 80
p = int((current_progress / max_progress) * 50 + 20)
self._send_progress(p, 100, ProgressStatusOptions.WORKING)
@@ -432,7 +438,8 @@ class ConsumerPlugin(
ProgressStatusOptions.WORKING,
ConsumerStatusShortMessage.PARSE_DATE,
)
date = parse_date(self.filename, text)
with get_date_parser() as date_parser:
date = next(date_parser.parse(self.filename, text), None)
archive_path = document_parser.get_archive_path()
page_count = document_parser.get_page_count(self.working_copy, mime_type)
@@ -535,7 +542,6 @@ class ConsumerPlugin(
create_source_path_directory(document.source_path)
self._write(
document.storage_type,
self.unmodified_original
if self.unmodified_original is not None
else self.working_copy,
@@ -543,7 +549,6 @@ class ConsumerPlugin(
)
self._write(
document.storage_type,
thumbnail,
document.thumbnail_path,
)
@@ -555,7 +560,6 @@ class ConsumerPlugin(
)
create_source_path_directory(document.archive_path)
self._write(
document.storage_type,
archive_path,
document.archive_path,
)
@@ -675,8 +679,6 @@ class ConsumerPlugin(
)
self.log.debug(f"Creation date from st_mtime: {create_date}")
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
if self.metadata.filename:
title = Path(self.metadata.filename).stem
else:
@@ -703,7 +705,6 @@ class ConsumerPlugin(
checksum=hashlib.md5(file_for_checksum.read_bytes()).hexdigest(),
created=create_date,
modified=create_date,
storage_type=storage_type,
page_count=page_count,
original_filename=self.filename,
)
@@ -714,7 +715,7 @@ class ConsumerPlugin(
return document
def apply_overrides(self, document):
def apply_overrides(self, document) -> None:
if self.metadata.correspondent_id:
document.correspondent = Correspondent.objects.get(
pk=self.metadata.correspondent_id,
@@ -774,7 +775,7 @@ class ConsumerPlugin(
}
CustomFieldInstance.objects.create(**args) # adds to document
def _write(self, storage_type, source, target):
def _write(self, source, target) -> None:
with (
Path(source).open("rb") as read_file,
Path(target).open("wb") as write_file,
@@ -797,9 +798,9 @@ class ConsumerPreflightPlugin(
ConsumeTaskPlugin,
):
NAME: str = "ConsumerPreflightPlugin"
logging_name = "paperless.consumer"
logging_name = LOGGING_NAME
def pre_check_file_exists(self):
def pre_check_file_exists(self) -> None:
"""
Confirm the input file still exists where it should
"""
@@ -813,7 +814,7 @@ class ConsumerPreflightPlugin(
f"Cannot consume {self.input_doc.original_file}: File not found.",
)
def pre_check_duplicate(self):
def pre_check_duplicate(self) -> None:
"""
Using the MD5 of the file, check this exact file doesn't already exist
"""
@@ -823,21 +824,47 @@ class ConsumerPreflightPlugin(
Q(checksum=checksum) | Q(archive_checksum=checksum),
)
if existing_doc.exists():
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
log_msg = f"Not consuming {self.filename}: It is a duplicate of {existing_doc.get().title} (#{existing_doc.get().pk})."
if existing_doc.first().deleted_at is not None:
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
log_msg += " Note: existing document is in the trash."
if settings.CONSUMER_DELETE_DUPLICATES:
Path(self.input_doc.original_file).unlink()
self._fail(
msg,
log_msg,
existing_doc = existing_doc.order_by("-created")
duplicates_in_trash = existing_doc.filter(deleted_at__isnull=False)
log_msg = (
f"Consuming duplicate {self.filename}: "
f"{existing_doc.count()} existing document(s) share the same content."
)
def pre_check_directories(self):
if duplicates_in_trash.exists():
log_msg += " Note: at least one existing document is in the trash."
self.log.warning(log_msg)
if settings.CONSUMER_DELETE_DUPLICATES:
duplicate = existing_doc.first()
duplicate_label = (
duplicate.title
or duplicate.original_filename
or (Path(duplicate.filename).name if duplicate.filename else None)
or str(duplicate.pk)
)
Path(self.input_doc.original_file).unlink()
failure_msg = (
f"Not consuming {self.filename}: "
f"It is a duplicate of {duplicate_label} (#{duplicate.pk})"
)
status_msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
if duplicates_in_trash.exists():
status_msg = (
ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
)
failure_msg += " Note: existing document is in the trash."
self._fail(
status_msg,
failure_msg,
)
def pre_check_directories(self) -> None:
"""
Ensure all required directories exist before attempting to use them
"""
@@ -846,12 +873,38 @@ class ConsumerPreflightPlugin(
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
def pre_check_asn_value(self):
def run(self) -> None:
self._send_progress(
0,
100,
ProgressStatusOptions.STARTED,
ConsumerStatusShortMessage.NEW_FILE,
)
# Make sure that preconditions for consuming the file are met.
self.pre_check_file_exists()
self.pre_check_duplicate()
self.pre_check_directories()
class AsnCheckPlugin(
NoCleanupPluginMixin,
NoSetupPluginMixin,
AlwaysRunPluginMixin,
LoggingMixin,
ConsumerPluginMixin,
ConsumeTaskPlugin,
):
NAME: str = "AsnCheckPlugin"
logging_name = LOGGING_NAME
def pre_check_asn_value(self) -> None:
"""
Check that if override_asn is given, it is unique and within a valid range
"""
if self.metadata.asn is None:
# check not necessary in case no ASN gets set
# if ASN is None
return
# Validate the range is above zero and less than uint32_t max
# otherwise, Whoosh can't handle it in the index
@@ -883,16 +936,4 @@ class ConsumerPreflightPlugin(
)
def run(self) -> None:
self._send_progress(
0,
100,
ProgressStatusOptions.STARTED,
ConsumerStatusShortMessage.NEW_FILE,
)
# Make sure that preconditions for consuming the file are met.
self.pre_check_file_exists()
self.pre_check_duplicate()
self.pre_check_directories()
self.pre_check_asn_value()

View File

@@ -22,7 +22,7 @@ class DocumentMetadataOverrides:
document_type_id: int | None = None
tag_ids: list[int] | None = None
storage_path_id: int | None = None
created: datetime.datetime | None = None
created: datetime.date | None = None
asn: int | None = None
owner_id: int | None = None
view_users: list[int] | None = None
@@ -30,6 +30,7 @@ class DocumentMetadataOverrides:
change_users: list[int] | None = None
change_groups: list[int] | None = None
custom_fields: dict | None = None
skip_asn_if_exists: bool = False
def update(self, other: "DocumentMetadataOverrides") -> "DocumentMetadataOverrides":
"""
@@ -49,6 +50,8 @@ class DocumentMetadataOverrides:
self.storage_path_id = other.storage_path_id
if other.owner_id is not None:
self.owner_id = other.owner_id
if other.skip_asn_if_exists:
self.skip_asn_if_exists = True
# merge
if self.tag_ids is None:
@@ -100,6 +103,7 @@ class DocumentMetadataOverrides:
overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None
overrides.owner_id = doc.owner.id if doc.owner else None
overrides.tag_ids = list(doc.tags.values_list("id", flat=True))
overrides.created = doc.created
overrides.view_users = list(
get_users_with_perms(
@@ -114,7 +118,7 @@ class DocumentMetadataOverrides:
).values_list("id", flat=True),
)
overrides.custom_fields = {
custom_field.id: custom_field.value
custom_field.field.id: custom_field.value
for custom_field in doc.custom_fields.all()
}
@@ -157,10 +161,11 @@ class ConsumableDocument:
source: DocumentSource
original_file: Path
head_version_id: int | None = None
original_path: Path | None = None
mailrule_id: int | None = None
mime_type: str = dataclasses.field(init=False, default=None)
def __post_init__(self):
def __post_init__(self) -> None:
"""
After a dataclass is initialized, this is called to finalize some data
1. Make sure the original path is an absolute, fully qualified path

View File

@@ -99,37 +99,37 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
return new_filename
def format_filename(document: Document, template_str: str) -> str | None:
rendered_filename = validate_filepath_template_and_render(
template_str,
document,
)
if rendered_filename is None:
return None
# Apply this setting. It could become a filter in the future (or users could use |default)
if settings.FILENAME_FORMAT_REMOVE_NONE:
rendered_filename = rendered_filename.replace("/-none-/", "/")
rendered_filename = rendered_filename.replace(" -none-", "")
rendered_filename = rendered_filename.replace("-none-", "")
rendered_filename = rendered_filename.strip(os.sep)
rendered_filename = rendered_filename.replace(
"-none-",
"none",
) # backward compatibility
return rendered_filename
def generate_filename(
doc: Document,
*,
counter=0,
append_gpg=True,
archive_filename=False,
) -> Path:
base_path: Path | None = None
def format_filename(document: Document, template_str: str) -> str | None:
rendered_filename = validate_filepath_template_and_render(
template_str,
document,
)
if rendered_filename is None:
return None
# Apply this setting. It could become a filter in the future (or users could use |default)
if settings.FILENAME_FORMAT_REMOVE_NONE:
rendered_filename = rendered_filename.replace("/-none-/", "/")
rendered_filename = rendered_filename.replace(" -none-", "")
rendered_filename = rendered_filename.replace("-none-", "")
rendered_filename = rendered_filename.strip(os.sep)
rendered_filename = rendered_filename.replace(
"-none-",
"none",
) # backward compatibility
return rendered_filename
# Determine the source of the format string
if doc.storage_path is not None:
filename_format = doc.storage_path.path
@@ -169,8 +169,4 @@ def generate_filename(
final_filename = f"{doc.pk:07}{counter_str}{filetype_str}"
full_path = Path(final_filename)
# Add GPG extension if needed
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
full_path = full_path.with_suffix(full_path.suffix + ".gpg")
return full_path

View File

@@ -39,6 +39,7 @@ from documents.models import Document
from documents.models import DocumentType
from documents.models import PaperlessTask
from documents.models import ShareLink
from documents.models import ShareLinkBundle
from documents.models import StoragePath
from documents.models import Tag
@@ -92,6 +93,12 @@ class TagFilterSet(FilterSet):
"name": CHAR_KWARGS,
}
is_root = BooleanFilter(
label="Is root tag",
field_name="tn_parent",
lookup_expr="isnull",
)
class DocumentTypeFilterSet(FilterSet):
class Meta:
@@ -113,7 +120,7 @@ class StoragePathFilterSet(FilterSet):
class ObjectFilter(Filter):
def __init__(self, *, exclude=False, in_list=False, field_name=""):
def __init__(self, *, exclude=False, in_list=False, field_name="") -> None:
super().__init__()
self.exclude = exclude
self.in_list = in_list
@@ -154,6 +161,7 @@ class InboxFilter(Filter):
@extend_schema_field(serializers.CharField)
class TitleContentFilter(Filter):
def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value
if value:
return qs.filter(Q(title__icontains=value) | Q(content__icontains=value))
else:
@@ -208,6 +216,7 @@ class CustomFieldFilterSet(FilterSet):
@extend_schema_field(serializers.CharField)
class CustomFieldsFilter(Filter):
def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value
if value:
fields_with_matching_selects = CustomField.objects.filter(
extra_data__icontains=value,
@@ -238,6 +247,7 @@ class CustomFieldsFilter(Filter):
class MimeTypeFilter(Filter):
def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value
if value:
return qs.filter(mime_type__icontains=value)
else:
@@ -245,7 +255,7 @@ class MimeTypeFilter(Filter):
class SelectField(serializers.CharField):
def __init__(self, custom_field: CustomField):
def __init__(self, custom_field: CustomField) -> None:
self._options = custom_field.extra_data["select_options"]
super().__init__(max_length=16)
@@ -666,7 +676,7 @@ class CustomFieldQueryParser:
@extend_schema_field(serializers.CharField)
class CustomFieldQueryFilter(Filter):
def __init__(self, validation_prefix):
def __init__(self, validation_prefix) -> None:
"""
A filter that filters documents based on custom field name and value.
@@ -787,6 +797,29 @@ class ShareLinkFilterSet(FilterSet):
}
class ShareLinkBundleFilterSet(FilterSet):
documents = Filter(method="filter_documents")
class Meta:
model = ShareLinkBundle
fields = {
"created": DATETIME_KWARGS,
"expiration": DATETIME_KWARGS,
"status": ["exact"],
}
def filter_documents(self, queryset, name, value):
ids = []
if value:
try:
ids = [int(item) for item in value.split(",") if item]
except ValueError:
return queryset.none()
if not ids:
return queryset
return queryset.filter(documents__in=ids).distinct()
class PaperlessTaskFilterSet(FilterSet):
acknowledged = BooleanFilter(
label="Acknowledged",

View File

@@ -10,9 +10,11 @@ from datetime import time
from datetime import timedelta
from datetime import timezone
from shutil import rmtree
from time import sleep
from typing import TYPE_CHECKING
from typing import Literal
from dateutil.relativedelta import relativedelta
from django.conf import settings
from django.utils import timezone as django_timezone
from django.utils.timezone import get_current_timezone
@@ -31,6 +33,7 @@ from whoosh.highlight import HtmlFormatter
from whoosh.idsets import BitSet
from whoosh.idsets import DocIdSet
from whoosh.index import FileIndex
from whoosh.index import LockError
from whoosh.index import create_in
from whoosh.index import exists_in
from whoosh.index import open_dir
@@ -96,11 +99,33 @@ def get_schema() -> Schema:
def open_index(*, recreate=False) -> FileIndex:
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
except Exception:
logger.exception("Error while opening the index, recreating.")
transient_exceptions = (FileNotFoundError, LockError)
max_retries = 3
retry_delay = 0.1
for attempt in range(max_retries + 1):
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
break
except transient_exceptions as exc:
is_last_attempt = attempt == max_retries or recreate
if is_last_attempt:
logger.exception(
"Error while opening the index after retries, recreating.",
)
break
logger.warning(
"Transient error while opening the index (attempt %s/%s): %s. Retrying.",
attempt + 1,
max_retries + 1,
exc,
)
sleep(retry_delay)
except Exception:
logger.exception("Error while opening the index, recreating.")
break
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first
if settings.INDEX_DIR.is_dir():
@@ -287,15 +312,75 @@ class DelayedQuery:
self.first_score = None
self.filter_queryset = filter_queryset
self.suggested_correction = None
self._manual_hits_cache: list | None = None
def __len__(self) -> int:
if self._manual_sort_requested():
manual_hits = self._manual_hits()
return len(manual_hits)
page = self[0:1]
return len(page)
def _manual_sort_requested(self):
ordering = self.query_params.get("ordering", "")
return ordering.lstrip("-").startswith("custom_field_")
def _manual_hits(self):
if self._manual_hits_cache is None:
q, mask, suggested_correction = self._get_query()
self.suggested_correction = suggested_correction
results = self.searcher.search(
q,
mask=mask,
filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
limit=None,
)
results.fragmenter = highlight.ContextFragmenter(surround=50)
results.formatter = HtmlFormatter(tagname="span", between=" ... ")
if not self.first_score and len(results) > 0:
self.first_score = results[0].score
if self.first_score:
results.top_n = [
(
(hit[0] / self.first_score) if self.first_score else None,
hit[1],
)
for hit in results.top_n
]
hits_by_id = {hit["id"]: hit for hit in results}
matching_ids = list(hits_by_id.keys())
ordered_ids = list(
self.filter_queryset.filter(id__in=matching_ids).values_list(
"id",
flat=True,
),
)
ordered_ids = list(dict.fromkeys(ordered_ids))
self._manual_hits_cache = [
hits_by_id[_id] for _id in ordered_ids if _id in hits_by_id
]
return self._manual_hits_cache
def __getitem__(self, item):
if item.start in self.saved_results:
return self.saved_results[item.start]
if self._manual_sort_requested():
manual_hits = self._manual_hits()
start = 0 if item.start is None else item.start
stop = item.stop
hits = manual_hits[start:stop] if stop is not None else manual_hits[start:]
page = ManualResultsPage(hits)
self.saved_results[start] = page
return page
q, mask, suggested_correction = self._get_query()
self.suggested_correction = suggested_correction
sortedby, reverse = self._get_query_sortedby()
@@ -315,21 +400,33 @@ class DelayedQuery:
if not self.first_score and len(page.results) > 0 and sortedby is None:
self.first_score = page.results[0].score
page.results.top_n = list(
map(
lambda hit: (
(hit[0] / self.first_score) if self.first_score else None,
hit[1],
),
page.results.top_n,
),
)
page.results.top_n = [
(
(hit[0] / self.first_score) if self.first_score else None,
hit[1],
)
for hit in page.results.top_n
]
self.saved_results[item.start] = page
return page
class ManualResultsPage(list):
def __init__(self, hits) -> None:
super().__init__(hits)
self.results = ManualResults(hits)
class ManualResults:
def __init__(self, hits) -> None:
self._docnums = [hit.docnum for hit in hits]
def docs(self):
return self._docnums
class LocalDateParser(English):
def reverse_timezone_offset(self, d):
return (d.replace(tzinfo=django_timezone.get_current_timezone())).astimezone(
@@ -461,32 +558,84 @@ def get_permissions_criterias(user: User | None = None) -> list:
def rewrite_natural_date_keywords(query_string: str) -> str:
"""
Rewrites natural date keywords (e.g. added:today or added:"yesterday") to UTC range syntax for Whoosh.
This resolves timezone issues with date parsing in Whoosh as well as adding support for more
natural date keywords.
"""
tz = get_current_timezone()
local_now = now().astimezone(tz)
today = local_now.date()
yesterday = today - timedelta(days=1)
ranges = {
"today": (
datetime.combine(today, time.min, tzinfo=tz),
datetime.combine(today, time.max, tzinfo=tz),
),
"yesterday": (
datetime.combine(yesterday, time.min, tzinfo=tz),
datetime.combine(yesterday, time.max, tzinfo=tz),
),
}
pattern = r"(\b(?:added|created))\s*:\s*[\"']?(today|yesterday)[\"']?"
# all supported Keywords
pattern = r"(\b(?:added|created|modified))\s*:\s*[\"']?(today|yesterday|this month|previous month|previous week|previous quarter|this year|previous year)[\"']?"
def repl(m):
field, keyword = m.group(1), m.group(2)
start, end = ranges[keyword]
field = m.group(1)
keyword = m.group(2).lower()
match keyword:
case "today":
start = datetime.combine(today, time.min, tzinfo=tz)
end = datetime.combine(today, time.max, tzinfo=tz)
case "yesterday":
yesterday = today - timedelta(days=1)
start = datetime.combine(yesterday, time.min, tzinfo=tz)
end = datetime.combine(yesterday, time.max, tzinfo=tz)
case "this month":
start = datetime(local_now.year, local_now.month, 1, 0, 0, 0, tzinfo=tz)
end = start + relativedelta(months=1) - timedelta(seconds=1)
case "previous month":
this_month_start = datetime(
local_now.year,
local_now.month,
1,
0,
0,
0,
tzinfo=tz,
)
start = this_month_start - relativedelta(months=1)
end = this_month_start - timedelta(seconds=1)
case "this year":
start = datetime(local_now.year, 1, 1, 0, 0, 0, tzinfo=tz)
end = datetime(local_now.year, 12, 31, 23, 59, 59, tzinfo=tz)
case "previous week":
days_since_monday = local_now.weekday()
this_week_start = datetime.combine(
today - timedelta(days=days_since_monday),
time.min,
tzinfo=tz,
)
start = this_week_start - timedelta(days=7)
end = this_week_start - timedelta(seconds=1)
case "previous quarter":
current_quarter = (local_now.month - 1) // 3 + 1
this_quarter_start_month = (current_quarter - 1) * 3 + 1
this_quarter_start = datetime(
local_now.year,
this_quarter_start_month,
1,
0,
0,
0,
tzinfo=tz,
)
start = this_quarter_start - relativedelta(months=3)
end = this_quarter_start - timedelta(seconds=1)
case "previous year":
start = datetime(local_now.year - 1, 1, 1, 0, 0, 0, tzinfo=tz)
end = datetime(local_now.year - 1, 12, 31, 23, 59, 59, tzinfo=tz)
# Convert to UTC and format
start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
return f"{field}:[{start_str} TO {end_str}]"
return re.sub(pattern, repl, query_string)
return re.sub(pattern, repl, query_string, flags=re.IGNORECASE)

View File

@@ -3,7 +3,7 @@ import uuid
class LoggingMixin:
def renew_logging_group(self):
def renew_logging_group(self) -> None:
"""
Creates a new UUID to group subsequent log calls together with
the extra data named group

View File

@@ -1,3 +1,6 @@
from __future__ import annotations
from dataclasses import dataclass
from email import message_from_bytes
from pathlib import Path
@@ -6,15 +9,31 @@ from django.core.mail import EmailMessage
from filelock import FileLock
@dataclass(frozen=True)
class EmailAttachment:
path: Path
mime_type: str
friendly_name: str
def send_email(
subject: str,
body: str,
to: list[str],
attachment: Path | None = None,
attachment_mime_type: str | None = None,
attachments: list[EmailAttachment],
) -> int:
"""
Send an email with an optional attachment.
Send an email with attachments.
Args:
subject: Email subject
body: Email body text
to: List of recipient email addresses
attachments: List of attachments
Returns:
Number of emails sent
TODO: re-evaluate this pending https://code.djangoproject.com/ticket/35581 / https://github.com/django/django/pull/18966
"""
email = EmailMessage(
@@ -22,17 +41,46 @@ def send_email(
body=body,
to=to,
)
if attachment:
# Something could be renaming the file concurrently so it can't be attached
with FileLock(settings.MEDIA_LOCK), attachment.open("rb") as f:
content = f.read()
if attachment_mime_type == "message/rfc822":
# See https://forum.djangoproject.com/t/using-emailmessage-with-an-attached-email-file-crashes-due-to-non-ascii/37981
content = message_from_bytes(f.read())
email.attach(
filename=attachment.name,
content=content,
mimetype=attachment_mime_type,
used_filenames: set[str] = set()
# Something could be renaming the file concurrently so it can't be attached
with FileLock(settings.MEDIA_LOCK):
for attachment in attachments:
filename = _get_unique_filename(
attachment.friendly_name,
used_filenames,
)
used_filenames.add(filename)
with attachment.path.open("rb") as f:
content = f.read()
if attachment.mime_type == "message/rfc822":
# See https://forum.djangoproject.com/t/using-emailmessage-with-an-attached-email-file-crashes-due-to-non-ascii/37981
content = message_from_bytes(content)
email.attach(
filename=filename,
content=content,
mimetype=attachment.mime_type,
)
return email.send()
def _get_unique_filename(friendly_name: str, used_names: set[str]) -> str:
"""
Constructs a unique friendly filename for the given document, append a counter if needed.
"""
if friendly_name not in used_names:
return friendly_name
stem = Path(friendly_name).stem
suffix = "".join(Path(friendly_name).suffixes)
counter = 1
while True:
filename = f"{stem}_{counter:02}{suffix}"
if filename not in used_names:
return filename
counter += 1

View File

@@ -9,7 +9,7 @@ class Command(BaseCommand):
# This code is taken almost entirely from https://github.com/wagtail/wagtail/pull/11912 with all credit to the original author.
help = "Converts UUID columns from char type to the native UUID type used in MariaDB 10.7+ and Django 5.0+."
def convert_field(self, model, field_name, *, null=False):
def convert_field(self, model, field_name, *, null=False) -> None:
if model._meta.get_field(field_name).model != model: # pragma: no cover
# Field is inherited from a parent model
return

View File

@@ -1,93 +0,0 @@
from pathlib import Path
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from documents.models import Document
from paperless.db import GnuPG
class Command(BaseCommand):
help = (
"This is how you migrate your stored documents from an encrypted "
"state to an unencrypted one (or vice-versa)"
)
def add_arguments(self, parser) -> None:
parser.add_argument(
"--passphrase",
help=(
"If PAPERLESS_PASSPHRASE isn't set already, you need to specify it here"
),
)
def handle(self, *args, **options) -> None:
try:
self.stdout.write(
self.style.WARNING(
"\n\n"
"WARNING: This script is going to work directly on your "
"document originals, so\n"
"WARNING: you probably shouldn't run "
"this unless you've got a recent backup\n"
"WARNING: handy. It "
"*should* work without a hitch, but be safe and backup your\n"
"WARNING: stuff first.\n\n"
"Hit Ctrl+C to exit now, or Enter to "
"continue.\n\n",
),
)
_ = input()
except KeyboardInterrupt:
return
passphrase = options["passphrase"] or settings.PASSPHRASE
if not passphrase:
raise CommandError(
"Passphrase not defined. Please set it with --passphrase or "
"by declaring it in your environment or your config.",
)
self.__gpg_to_unencrypted(passphrase)
def __gpg_to_unencrypted(self, passphrase: str) -> None:
encrypted_files = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG,
)
for document in encrypted_files:
self.stdout.write(f"Decrypting {document}")
old_paths = [document.source_path, document.thumbnail_path]
with document.source_file as file_handle:
raw_document = GnuPG.decrypted(file_handle, passphrase)
with document.thumbnail_file as file_handle:
raw_thumb = GnuPG.decrypted(file_handle, passphrase)
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
ext: str = Path(document.filename).suffix
if not ext == ".gpg":
raise CommandError(
f"Abort: encrypted file {document.source_path} does not "
f"end with .gpg",
)
document.filename = Path(document.filename).stem
with document.source_path.open("wb") as f:
f.write(raw_document)
with document.thumbnail_path.open("wb") as f:
f.write(raw_thumb)
Document.objects.filter(id=document.id).update(
storage_type=document.storage_type,
filename=document.filename,
)
for path in old_paths:
path.unlink()

View File

@@ -1,128 +1,343 @@
"""
Document consumer management command.
Watches a consumption directory for new documents and queues them for processing.
Uses watchfiles for efficient file system monitoring with support for both
native OS notifications and polling fallback.
"""
from __future__ import annotations
import logging
import os
from concurrent.futures import ThreadPoolExecutor
from fnmatch import filter
from dataclasses import dataclass
from pathlib import Path
from pathlib import PurePath
from threading import Event
from time import monotonic
from time import sleep
from typing import TYPE_CHECKING
from typing import Final
from django import db
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
from watchfiles import Change
from watchfiles import DefaultFilter
from watchfiles import watch
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.models import Tag
from documents.parsers import is_file_ext_supported
from documents.parsers import get_supported_file_extensions
from documents.tasks import consume_file
try:
from inotifyrecursive import INotify
from inotifyrecursive import flags
except ImportError: # pragma: no cover
INotify = flags = None
if TYPE_CHECKING:
from collections.abc import Iterator
logger = logging.getLogger("paperless.management.consumer")
def _tags_from_path(filepath: Path) -> list[int]:
@dataclass
class TrackedFile:
"""Represents a file being tracked for stability."""
path: Path
last_event_time: float
last_mtime: float | None = None
last_size: int | None = None
def update_stats(self) -> bool:
"""
Update file stats. Returns True if file exists and stats were updated.
"""
try:
stat = self.path.stat()
self.last_mtime = stat.st_mtime
self.last_size = stat.st_size
return True
except OSError:
return False
def is_unchanged(self) -> bool:
"""
Check if file stats match the previously recorded values.
Returns False if file doesn't exist or stats changed.
"""
try:
stat = self.path.stat()
return stat.st_mtime == self.last_mtime and stat.st_size == self.last_size
except OSError:
return False
class FileStabilityTracker:
"""
Walk up the directory tree from filepath to CONSUMPTION_DIR
Tracks file events and determines when files are stable for consumption.
A file is considered stable when:
1. No new events have been received for it within the stability delay
2. Its size and modification time haven't changed
3. It still exists as a regular file
This handles various edge cases:
- Network copies that write in chunks
- Scanners that open/close files multiple times
- Temporary files that get renamed
- Files that are deleted before becoming stable
"""
def __init__(self, stability_delay: float = 1.0) -> None:
"""
Initialize the tracker.
Args:
stability_delay: Time in seconds a file must remain unchanged
before being considered stable.
"""
self.stability_delay = stability_delay
self._tracked: dict[Path, TrackedFile] = {}
def track(self, path: Path, change: Change) -> None:
"""
Register a file event.
Args:
path: The file path that changed.
change: The type of change (added, modified, deleted).
"""
path = path.resolve()
match change:
case Change.deleted:
self._tracked.pop(path, None)
logger.debug(f"Stopped tracking deleted file: {path}")
case Change.added | Change.modified:
current_time = monotonic()
if path in self._tracked:
tracked = self._tracked[path]
tracked.last_event_time = current_time
tracked.update_stats()
logger.debug(f"Updated tracking for: {path}")
else:
tracked = TrackedFile(path=path, last_event_time=current_time)
if tracked.update_stats():
self._tracked[path] = tracked
logger.debug(f"Started tracking: {path}")
else:
logger.debug(f"Could not stat file, not tracking: {path}")
def get_stable_files(self) -> Iterator[Path]:
"""
Yield files that have been stable for the configured delay.
Files are removed from tracking once yielded or determined to be invalid.
"""
current_time = monotonic()
to_remove: list[Path] = []
to_yield: list[Path] = []
for path, tracked in self._tracked.items():
time_since_event = current_time - tracked.last_event_time
if time_since_event < self.stability_delay:
continue
# File has waited long enough, verify it's unchanged
if not tracked.is_unchanged():
# Stats changed or file gone - update and wait again
if tracked.update_stats():
tracked.last_event_time = current_time
logger.debug(f"File changed during stability check: {path}")
else:
# File no longer exists, remove from tracking
to_remove.append(path)
logger.debug(f"File disappeared during stability check: {path}")
continue
# File is stable, we can return it
to_yield.append(path)
logger.info(f"File is stable: {path}")
# Remove files that are no longer valid
for path in to_remove:
self._tracked.pop(path, None)
# Remove and yield stable files
for path in to_yield:
self._tracked.pop(path, None)
yield path
def has_pending_files(self) -> bool:
"""Check if there are files waiting for stability check."""
return len(self._tracked) > 0
@property
def pending_count(self) -> int:
"""Number of files being tracked."""
return len(self._tracked)
class ConsumerFilter(DefaultFilter):
"""
Filter for watchfiles that accepts only supported document types
and ignores system files/directories.
Extends DefaultFilter leveraging its built-in filtering:
- `ignore_dirs`: Directory names to ignore (and all their contents)
- `ignore_entity_patterns`: Regex patterns matched against filename/dirname only
We add custom logic for file extension filtering (only accept supported
document types), which the library doesn't provide.
"""
# Regex patterns for files to always ignore (matched against filename only)
# These are passed to DefaultFilter.ignore_entity_patterns
DEFAULT_IGNORE_PATTERNS: Final[tuple[str, ...]] = (
r"^\.DS_Store$",
r"^\.DS_STORE$",
r"^\._.*",
r"^desktop\.ini$",
r"^Thumbs\.db$",
)
# Directories to always ignore (passed to DefaultFilter.ignore_dirs)
# These are matched by directory name, not full path
DEFAULT_IGNORE_DIRS: Final[tuple[str, ...]] = (
".stfolder", # Syncthing
".stversions", # Syncthing
".localized", # macOS
"@eaDir", # Synology NAS
".Spotlight-V100", # macOS
".Trashes", # macOS
"__MACOSX", # macOS archive artifacts
)
def __init__(
self,
*,
supported_extensions: frozenset[str] | None = None,
ignore_patterns: list[str] | None = None,
ignore_dirs: list[str] | None = None,
) -> None:
"""
Initialize the consumer filter.
Args:
supported_extensions: Set of file extensions to accept (e.g., {".pdf", ".png"}).
If None, uses get_supported_file_extensions().
ignore_patterns: Additional regex patterns to ignore (matched against filename).
ignore_dirs: Additional directory names to ignore (merged with defaults).
"""
# Get supported extensions
if supported_extensions is None:
supported_extensions = frozenset(get_supported_file_extensions())
self._supported_extensions = supported_extensions
# Combine default and user patterns
all_patterns: list[str] = list(self.DEFAULT_IGNORE_PATTERNS)
if ignore_patterns:
all_patterns.extend(ignore_patterns)
# Combine default and user ignore_dirs
all_ignore_dirs: list[str] = list(self.DEFAULT_IGNORE_DIRS)
if ignore_dirs:
all_ignore_dirs.extend(ignore_dirs)
# Let DefaultFilter handle all the pattern and directory filtering
super().__init__(
ignore_dirs=tuple(all_ignore_dirs),
ignore_entity_patterns=tuple(all_patterns),
ignore_paths=(),
)
def __call__(self, change: Change, path: str) -> bool:
"""
Filter function for watchfiles.
Returns True if the path should be watched, False to ignore.
The parent DefaultFilter handles:
- Hidden files/directories (starting with .)
- Directories in ignore_dirs
- Files/directories matching ignore_entity_patterns
We additionally filter files by extension.
"""
# Let parent filter handle directory ignoring and pattern matching
if not super().__call__(change, path):
return False
path_obj = Path(path)
# For directories, parent filter already handled everything
if path_obj.is_dir():
return True
# For files, check extension
return self._has_supported_extension(path_obj)
def _has_supported_extension(self, path: Path) -> bool:
"""Check if the file has a supported extension."""
suffix = path.suffix.lower()
return suffix in self._supported_extensions
def _tags_from_path(filepath: Path, consumption_dir: Path) -> list[int]:
"""
Walk up the directory tree from filepath to consumption_dir
and get or create Tag IDs for every directory.
Returns set of Tag models
Returns list of Tag primary keys.
"""
db.close_old_connections()
tag_ids = set()
path_parts = filepath.relative_to(settings.CONSUMPTION_DIR).parent.parts
tag_ids: set[int] = set()
path_parts = filepath.relative_to(consumption_dir).parent.parts
for part in path_parts:
tag_ids.add(
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
tag, _ = Tag.objects.get_or_create(
name__iexact=part,
defaults={"name": part},
)
tag_ids.add(tag.pk)
return list(tag_ids)
def _is_ignored(filepath: Path) -> bool:
def _consume_file(
filepath: Path,
consumption_dir: Path,
*,
subdirs_as_tags: bool,
) -> None:
"""
Checks if the given file should be ignored, based on configured
patterns.
Queue a file for consumption.
Returns True if the file is ignored, False otherwise
Args:
filepath: Path to the file to consume.
consumption_dir: Base consumption directory.
subdirs_as_tags: Whether to create tags from subdirectory names.
"""
# Trim out the consume directory, leaving only filename and it's
# path relative to the consume directory
filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR)
# March through the components of the path, including directories and the filename
# looking for anything matching
# foo/bar/baz/file.pdf -> (foo, bar, baz, file.pdf)
parts = []
for part in filepath_relative.parts:
# If the part is not the name (ie, it's a dir)
# Need to append the trailing slash or fnmatch doesn't match
# fnmatch("dir", "dir/*") == False
# fnmatch("dir/", "dir/*") == True
if part != filepath_relative.name:
part = part + "/"
parts.append(part)
for pattern in settings.CONSUMER_IGNORE_PATTERNS:
if len(filter(parts, pattern)):
return True
return False
def _consume(filepath: Path) -> None:
if filepath.is_dir() or _is_ignored(filepath):
# Verify file still exists and is accessible
try:
if not filepath.is_file():
logger.debug(f"Not consuming {filepath}: not a file or doesn't exist")
return
except OSError as e:
logger.warning(f"Not consuming {filepath}: {e}")
return
if not filepath.is_file():
logger.debug(f"Not consuming file {filepath}: File has moved.")
return
if not is_file_ext_supported(filepath.suffix):
logger.warning(f"Not consuming file {filepath}: Unknown file extension.")
return
# Total wait time: up to 500ms
os_error_retry_count: Final[int] = 50
os_error_retry_wait: Final[float] = 0.01
read_try_count = 0
file_open_ok = False
os_error_str = None
while (read_try_count < os_error_retry_count) and not file_open_ok:
# Get tags from path if configured
tag_ids: list[int] | None = None
if subdirs_as_tags:
try:
with filepath.open("rb"):
file_open_ok = True
except OSError as e:
read_try_count += 1
os_error_str = str(e)
sleep(os_error_retry_wait)
tag_ids = _tags_from_path(filepath, consumption_dir)
except Exception:
logger.exception(f"Error creating tags from path for {filepath}")
if read_try_count >= os_error_retry_count:
logger.warning(f"Not consuming file {filepath}: OS reports {os_error_str}")
return
tag_ids = None
# Queue for consumption
try:
if settings.CONSUMER_SUBDIRS_AS_TAGS:
tag_ids = _tags_from_path(filepath)
except Exception:
logger.exception("Error creating tags from path")
try:
logger.info(f"Adding {filepath} to the task queue.")
logger.info(f"Adding {filepath} to the task queue")
consume_file.delay(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
@@ -131,223 +346,228 @@ def _consume(filepath: Path) -> None:
DocumentMetadataOverrides(tag_ids=tag_ids),
)
except Exception:
# Catch all so that the consumer won't crash.
# This is also what the test case is listening for to check for
# errors.
logger.exception("Error while consuming document")
def _consume_wait_unmodified(file: Path) -> None:
"""
Waits for the given file to appear unmodified based on file size
and modification time. Will wait a configured number of seconds
and retry a configured number of times before either consuming or
giving up
"""
if _is_ignored(file):
return
logger.debug(f"Waiting for file {file} to remain unmodified")
mtime = -1
size = -1
current_try = 0
while current_try < settings.CONSUMER_POLLING_RETRY_COUNT:
try:
stat_data = file.stat()
new_mtime = stat_data.st_mtime
new_size = stat_data.st_size
except FileNotFoundError:
logger.debug(
f"File {file} moved while waiting for it to remain unmodified.",
)
return
if new_mtime == mtime and new_size == size:
_consume(file)
return
mtime = new_mtime
size = new_size
sleep(settings.CONSUMER_POLLING_DELAY)
current_try += 1
logger.error(f"Timeout while waiting on file {file} to remain unmodified.")
class Handler(FileSystemEventHandler):
def __init__(self, pool: ThreadPoolExecutor) -> None:
super().__init__()
self._pool = pool
def on_created(self, event):
self._pool.submit(_consume_wait_unmodified, Path(event.src_path))
def on_moved(self, event):
self._pool.submit(_consume_wait_unmodified, Path(event.dest_path))
logger.exception(f"Error while queuing document {filepath}")
class Command(BaseCommand):
"""
On every iteration of an infinite loop, consume what we can from the
consumption directory.
Watch a consumption directory and queue new documents for processing.
Uses watchfiles for efficient file system monitoring. Supports both
native OS notifications (inotify on Linux, FSEvents on macOS) and
polling for network filesystems.
"""
# This is here primarily for the tests and is irrelevant in production.
stop_flag = Event()
# Also only for testing, configures in one place the timeout used before checking
# the stop flag
testing_timeout_s: Final[float] = 0.5
testing_timeout_ms: Final[float] = testing_timeout_s * 1000.0
help = "Watch the consumption directory for new documents"
def add_arguments(self, parser):
# For testing - allows tests to stop the consumer
stop_flag: Event = Event()
# Testing timeout in seconds
testing_timeout_s: Final[float] = 0.5
def add_arguments(self, parser) -> None:
parser.add_argument(
"directory",
default=settings.CONSUMPTION_DIR,
default=None,
nargs="?",
help="The consumption directory.",
help="The consumption directory (defaults to CONSUMPTION_DIR setting)",
)
parser.add_argument(
"--oneshot",
action="store_true",
help="Process existing files and exit without watching",
)
parser.add_argument("--oneshot", action="store_true", help="Run only once.")
# Only use during unit testing, will configure a timeout
# Leaving it unset or false and the consumer will exit when it
# receives SIGINT
parser.add_argument(
"--testing",
action="store_true",
help="Flag used only for unit testing",
help="Enable testing mode with shorter timeouts",
default=False,
)
def handle(self, *args, **options):
directory = options["directory"]
recursive = settings.CONSUMER_RECURSIVE
def handle(self, *args, **options) -> None:
# Resolve consumption directory
directory = options.get("directory")
if not directory:
raise CommandError("CONSUMPTION_DIR does not appear to be set.")
directory = getattr(settings, "CONSUMPTION_DIR", None)
if not directory:
raise CommandError("CONSUMPTION_DIR is not configured")
directory = Path(directory).resolve()
if not directory.is_dir():
raise CommandError(f"Consumption directory {directory} does not exist")
if not directory.exists():
raise CommandError(f"Consumption directory does not exist: {directory}")
# Consumer will need this
if not directory.is_dir():
raise CommandError(f"Consumption path is not a directory: {directory}")
# Ensure scratch directory exists
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
if recursive:
for dirpath, _, filenames in os.walk(directory):
for filename in filenames:
filepath = Path(dirpath) / filename
_consume(filepath)
else:
for filepath in directory.iterdir():
_consume(filepath)
# Get settings
recursive: bool = settings.CONSUMER_RECURSIVE
subdirs_as_tags: bool = settings.CONSUMER_SUBDIRS_AS_TAGS
polling_interval: float = settings.CONSUMER_POLLING_INTERVAL
stability_delay: float = settings.CONSUMER_STABILITY_DELAY
ignore_patterns: list[str] = settings.CONSUMER_IGNORE_PATTERNS
ignore_dirs: list[str] = settings.CONSUMER_IGNORE_DIRS
is_testing: bool = options.get("testing", False)
is_oneshot: bool = options.get("oneshot", False)
if options["oneshot"]:
# Create filter
consumer_filter = ConsumerFilter(
ignore_patterns=ignore_patterns,
ignore_dirs=ignore_dirs,
)
# Process existing files
self._process_existing_files(
directory=directory,
recursive=recursive,
subdirs_as_tags=subdirs_as_tags,
consumer_filter=consumer_filter,
)
if is_oneshot:
logger.info("Oneshot mode: processed existing files, exiting")
return
if settings.CONSUMER_POLLING == 0 and INotify:
self.handle_inotify(directory, recursive, is_testing=options["testing"])
# Start watching
self._watch_directory(
directory=directory,
recursive=recursive,
subdirs_as_tags=subdirs_as_tags,
consumer_filter=consumer_filter,
polling_interval=polling_interval,
stability_delay=stability_delay,
is_testing=is_testing,
)
logger.debug("Consumer exiting")
def _process_existing_files(
self,
*,
directory: Path,
recursive: bool,
subdirs_as_tags: bool,
consumer_filter: ConsumerFilter,
) -> None:
"""Process any existing files in the consumption directory."""
logger.info(f"Processing existing files in {directory}")
glob_pattern = "**/*" if recursive else "*"
for filepath in directory.glob(glob_pattern):
# Use filter to check if file should be processed
if not filepath.is_file():
continue
if not consumer_filter(Change.added, str(filepath)):
continue
_consume_file(
filepath=filepath,
consumption_dir=directory,
subdirs_as_tags=subdirs_as_tags,
)
def _watch_directory(
self,
*,
directory: Path,
recursive: bool,
subdirs_as_tags: bool,
consumer_filter: ConsumerFilter,
polling_interval: float,
stability_delay: float,
is_testing: bool,
) -> None:
"""Watch directory for changes and process stable files."""
use_polling = polling_interval > 0
poll_delay_ms = int(polling_interval * 1000) if use_polling else 0
if use_polling:
logger.info(
f"Watching {directory} using polling (interval: {polling_interval}s)",
)
else:
if INotify is None and settings.CONSUMER_POLLING == 0: # pragma: no cover
logger.warning("Using polling as INotify import failed")
self.handle_polling(directory, recursive, is_testing=options["testing"])
logger.info(f"Watching {directory} using native file system events")
logger.debug("Consumer exiting.")
# Create stability tracker
tracker = FileStabilityTracker(stability_delay=stability_delay)
def handle_polling(self, directory, recursive, *, is_testing: bool):
logger.info(f"Polling directory for changes: {directory}")
# Calculate timeouts
stability_timeout_ms = int(stability_delay * 1000)
testing_timeout_ms = int(self.testing_timeout_s * 1000)
timeout = None
# Calculate appropriate timeout for watch loop
# In polling mode, rust_timeout must be significantly longer than poll_delay_ms
# to ensure poll cycles can complete before timing out
if is_testing:
timeout = self.testing_timeout_s
logger.debug(f"Configuring timeout to {timeout}s")
if use_polling:
# For polling: timeout must be at least 3x the poll interval to allow
# multiple poll cycles. This prevents timeouts from interfering with
# the polling mechanism.
min_polling_timeout_ms = poll_delay_ms * 3
timeout_ms = max(min_polling_timeout_ms, testing_timeout_ms)
else:
# For native watching, use short timeout to check stop flag
timeout_ms = testing_timeout_ms
else:
# Not testing, wait indefinitely for first event
timeout_ms = 0
polling_interval = settings.CONSUMER_POLLING
if polling_interval == 0: # pragma: no cover
# Only happens if INotify failed to import
logger.warning("Using polling of 10s, consider setting this")
polling_interval = 10
self.stop_flag.clear()
with ThreadPoolExecutor(max_workers=4) as pool:
observer = PollingObserver(timeout=polling_interval)
observer.schedule(Handler(pool), directory, recursive=recursive)
observer.start()
while not self.stop_flag.is_set():
try:
while observer.is_alive():
observer.join(timeout)
if self.stop_flag.is_set():
observer.stop()
except KeyboardInterrupt:
observer.stop()
observer.join()
for changes in watch(
directory,
watch_filter=consumer_filter,
rust_timeout=timeout_ms,
yield_on_timeout=True,
force_polling=use_polling,
poll_delay_ms=poll_delay_ms,
recursive=recursive,
stop_event=self.stop_flag,
):
# Process each change
for change_type, path in changes:
path = Path(path).resolve()
if not path.is_file():
continue
logger.debug(f"Event: {change_type.name} for {path}")
tracker.track(path, change_type)
def handle_inotify(self, directory, recursive, *, is_testing: bool):
logger.info(f"Using inotify to watch directory for changes: {directory}")
# Check for stable files
for stable_path in tracker.get_stable_files():
_consume_file(
filepath=stable_path,
consumption_dir=directory,
subdirs_as_tags=subdirs_as_tags,
)
timeout_ms = None
if is_testing:
timeout_ms = self.testing_timeout_ms
logger.debug(f"Configuring timeout to {timeout_ms}ms")
# Exit watch loop to reconfigure timeout
break
inotify = INotify()
inotify_flags = flags.CLOSE_WRITE | flags.MOVED_TO | flags.MODIFY
if recursive:
inotify.add_watch_recursive(directory, inotify_flags)
else:
inotify.add_watch(directory, inotify_flags)
inotify_debounce_secs: Final[float] = settings.CONSUMER_INOTIFY_DELAY
inotify_debounce_ms: Final[int] = inotify_debounce_secs * 1000
finished = False
notified_files = {}
try:
while not finished:
try:
for event in inotify.read(timeout=timeout_ms):
path = inotify.get_path(event.wd) if recursive else directory
filepath = Path(path) / event.name
if flags.MODIFY in flags.from_mask(event.mask):
notified_files.pop(filepath, None)
else:
notified_files[filepath] = monotonic()
# Check the files against the timeout
still_waiting = {}
# last_event_time is time of the last inotify event for this file
for filepath, last_event_time in notified_files.items():
# Current time - last time over the configured timeout
waited_long_enough = (
monotonic() - last_event_time
) > inotify_debounce_secs
# Also make sure the file exists still, some scanners might write a
# temporary file first
file_still_exists = filepath.exists() and filepath.is_file()
if waited_long_enough and file_still_exists:
_consume(filepath)
elif file_still_exists:
still_waiting[filepath] = last_event_time
# These files are still waiting to hit the timeout
notified_files = still_waiting
# If files are waiting, need to exit read() to check them
# Otherwise, go back to infinite sleep time, but only if not testing
if len(notified_files) > 0:
timeout_ms = inotify_debounce_ms
elif is_testing:
timeout_ms = self.testing_timeout_ms
# Determine next timeout
if tracker.has_pending_files():
# Check pending files at stability interval
timeout_ms = stability_timeout_ms
elif is_testing:
# In testing, use appropriate timeout based on watch mode
if use_polling:
# For polling: ensure timeout allows polls to complete
min_polling_timeout_ms = poll_delay_ms * 3
timeout_ms = max(min_polling_timeout_ms, testing_timeout_ms)
else:
timeout_ms = None
# For native watching, use short timeout to check stop flag
timeout_ms = testing_timeout_ms
else: # pragma: nocover
# No pending files, wait indefinitely
timeout_ms = 0
if self.stop_flag.is_set():
logger.debug("Finishing because event is set")
finished = True
except KeyboardInterrupt:
logger.info("Received SIGINT, stopping inotify")
finished = True
finally:
inotify.close()
except KeyboardInterrupt: # pragma: nocover
logger.info("Received interrupt, stopping consumer")
self.stop_flag.set()

View File

@@ -3,7 +3,6 @@ import json
import os
import shutil
import tempfile
import time
from pathlib import Path
from typing import TYPE_CHECKING
@@ -56,7 +55,6 @@ from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.utils import copy_file_with_basic_stats
from paperless import version
from paperless.db import GnuPG
from paperless.models import ApplicationConfiguration
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
@@ -69,7 +67,7 @@ class Command(CryptMixin, BaseCommand):
"easy import."
)
def add_arguments(self, parser):
def add_arguments(self, parser) -> None:
parser.add_argument("target")
parser.add_argument(
@@ -188,7 +186,7 @@ class Command(CryptMixin, BaseCommand):
help="If provided, is used to encrypt sensitive data in the export",
)
def handle(self, *args, **options):
def handle(self, *args, **options) -> None:
self.target = Path(options["target"]).resolve()
self.split_manifest: bool = options["split_manifest"]
self.compare_checksums: bool = options["compare_checksums"]
@@ -246,7 +244,7 @@ class Command(CryptMixin, BaseCommand):
if self.zip_export and temp_dir is not None:
temp_dir.cleanup()
def dump(self):
def dump(self) -> None:
# 1. Take a snapshot of what files exist in the current export folder
for x in self.target.glob("**/*"):
if x.is_file():
@@ -316,20 +314,17 @@ class Command(CryptMixin, BaseCommand):
total=len(document_manifest),
disable=self.no_progress_bar,
):
# 3.1. store files unencrypted
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED
document = document_map[document_dict["pk"]]
# 3.2. generate a unique filename
# 3.1. generate a unique filename
base_name = self.generate_base_name(document)
# 3.3. write filenames into manifest
# 3.2. write filenames into manifest
original_target, thumbnail_target, archive_target = (
self.generate_document_targets(document, base_name, document_dict)
)
# 3.4. write files to target folder
# 3.3. write files to target folder
if not self.data_only:
self.copy_document_files(
document,
@@ -423,7 +418,6 @@ class Command(CryptMixin, BaseCommand):
base_name = generate_filename(
document,
counter=filename_counter,
append_gpg=False,
)
else:
base_name = document.get_public_filename(counter=filename_counter)
@@ -482,51 +476,29 @@ class Command(CryptMixin, BaseCommand):
If the document is encrypted, the files are decrypted before copying them to the target location.
"""
if document.storage_type == Document.STORAGE_TYPE_GPG:
t = int(time.mktime(document.created.timetuple()))
self.check_and_copy(
document.source_path,
document.checksum,
original_target,
)
original_target.parent.mkdir(parents=True, exist_ok=True)
with document.source_file as out_file:
original_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(original_target, times=(t, t))
if thumbnail_target:
self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
if thumbnail_target:
thumbnail_target.parent.mkdir(parents=True, exist_ok=True)
with document.thumbnail_file as out_file:
thumbnail_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(thumbnail_target, times=(t, t))
if archive_target:
archive_target.parent.mkdir(parents=True, exist_ok=True)
if TYPE_CHECKING:
assert isinstance(document.archive_path, Path)
with document.archive_path as out_file:
archive_target.write_bytes(GnuPG.decrypted(out_file))
os.utime(archive_target, times=(t, t))
else:
if archive_target:
if TYPE_CHECKING:
assert isinstance(document.archive_path, Path)
self.check_and_copy(
document.source_path,
document.checksum,
original_target,
document.archive_path,
document.archive_checksum,
archive_target,
)
if thumbnail_target:
self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
if archive_target:
if TYPE_CHECKING:
assert isinstance(document.archive_path, Path)
self.check_and_copy(
document.archive_path,
document.archive_checksum,
archive_target,
)
def check_and_write_json(
self,
content: list[dict] | dict,
target: Path,
):
) -> None:
"""
Writes the source content to the target json file.
If --compare-json arg was used, don't write to target file if
@@ -556,7 +528,7 @@ class Command(CryptMixin, BaseCommand):
source: Path,
source_checksum: str | None,
target: Path,
):
) -> None:
"""
Copies the source to the target, if target doesn't exist or the target doesn't seem to match
the source attributes

View File

@@ -92,6 +92,9 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
# doc to doc is obviously not useful
if first_doc.pk == second_doc.pk:
continue
# Skip empty documents (e.g. password-protected)
if first_doc.content.strip() == "" or second_doc.content.strip() == "":
continue
# Skip matching which have already been matched together
# doc 1 to doc 2 is the same as doc 2 to doc 1
doc_1_to_doc_2 = (first_doc.pk, second_doc.pk)

View File

@@ -48,12 +48,13 @@ if settings.AUDIT_LOG_ENABLED:
@contextmanager
def disable_signal(sig, receiver, sender) -> Generator:
def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Generator:
try:
sig.disconnect(receiver=receiver, sender=sender)
yield
finally:
sig.connect(receiver=receiver, sender=sender)
kwargs = {"weak": weak} if weak is not None else {}
sig.connect(receiver=receiver, sender=sender, **kwargs)
class Command(CryptMixin, BaseCommand):
@@ -245,7 +246,7 @@ class Command(CryptMixin, BaseCommand):
self.source = Path(tmp_dir)
self._run_import()
def _run_import(self):
def _run_import(self) -> None:
self.pre_check()
self.load_metadata()
self.load_manifest_files()
@@ -258,16 +259,19 @@ class Command(CryptMixin, BaseCommand):
post_save,
receiver=update_filename_and_move_files,
sender=Document,
weak=False,
),
disable_signal(
m2m_changed,
receiver=update_filename_and_move_files,
sender=Document.tags.through,
weak=False,
),
disable_signal(
post_save,
receiver=update_filename_and_move_files,
sender=CustomFieldInstance,
weak=False,
),
disable_signal(
post_save,
@@ -379,8 +383,6 @@ class Command(CryptMixin, BaseCommand):
else:
archive_path = None
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
with FileLock(settings.MEDIA_LOCK):
if Path(document.source_path).is_file():
raise FileExistsError(document.source_path)

View File

@@ -0,0 +1,22 @@
from django.core.management import BaseCommand
from django.db import transaction
from documents.management.commands.mixins import ProgressBarMixin
from documents.tasks import llmindex_index
class Command(ProgressBarMixin, BaseCommand):
help = "Manages the LLM-based vector index for Paperless."
def add_arguments(self, parser):
parser.add_argument("command", choices=["rebuild", "update"])
self.add_argument_progress_bar_mixin(parser)
def handle(self, *args, **options):
self.handle_progress_bar_mixin(**options)
with transaction.atomic():
llmindex_index(
progress_bar_disable=self.no_progress_bar,
rebuild=options["command"] == "rebuild",
scheduled=False,
)

View File

@@ -12,7 +12,7 @@ from documents.models import Document
from documents.parsers import get_parser_class_for_mime_type
def _process_document(doc_id):
def _process_document(doc_id) -> None:
document: Document = Document.objects.get(id=doc_id)
parser_class = get_parser_class_for_mime_type(document.mime_type)
@@ -37,7 +37,7 @@ def _process_document(doc_id):
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
help = "This will regenerate the thumbnails for all documents."
def add_arguments(self, parser):
def add_arguments(self, parser) -> None:
parser.add_argument(
"-d",
"--document",

View File

@@ -25,7 +25,7 @@ class Command(BaseCommand):
parser.formatter_class = RawTextHelpFormatter
return parser
def handle(self, *args, **options):
def handle(self, *args, **options) -> None:
username = os.getenv("PAPERLESS_ADMIN_USER", "admin")
mail = os.getenv("PAPERLESS_ADMIN_MAIL", "root@localhost")
password = os.getenv("PAPERLESS_ADMIN_PASSWORD")

View File

@@ -27,7 +27,7 @@ class MultiProcessMixin:
for the use of multiple processes
"""
def add_argument_processes_mixin(self, parser: ArgumentParser):
def add_argument_processes_mixin(self, parser: ArgumentParser) -> None:
parser.add_argument(
"--processes",
default=max(1, os.cpu_count() // 4),
@@ -35,7 +35,7 @@ class MultiProcessMixin:
help="Number of processes to distribute work amongst",
)
def handle_processes_mixin(self, *args, **options):
def handle_processes_mixin(self, *args, **options) -> None:
self.process_count = options["processes"]
if self.process_count < 1:
raise CommandError("There must be at least 1 process")
@@ -47,7 +47,7 @@ class ProgressBarMixin:
via this class
"""
def add_argument_progress_bar_mixin(self, parser: ArgumentParser):
def add_argument_progress_bar_mixin(self, parser: ArgumentParser) -> None:
parser.add_argument(
"--no-progress-bar",
default=False,
@@ -55,7 +55,7 @@ class ProgressBarMixin:
help="If set, the progress bar will not be shown",
)
def handle_progress_bar_mixin(self, *args, **options):
def handle_progress_bar_mixin(self, *args, **options) -> None:
self.no_progress_bar = options["no_progress_bar"]
self.use_progress_bar = not self.no_progress_bar
@@ -120,7 +120,7 @@ class CryptMixin:
},
}
def load_crypt_params(self, metadata: dict):
def load_crypt_params(self, metadata: dict) -> None:
# Load up the values for setting up decryption
self.kdf_algorithm: str = metadata[EXPORTER_CRYPTO_SETTINGS_NAME][
EXPORTER_CRYPTO_ALGO_NAME
@@ -135,7 +135,7 @@ class CryptMixin:
EXPORTER_CRYPTO_SALT_NAME
]
def setup_crypto(self, *, passphrase: str, salt: str | None = None):
def setup_crypto(self, *, passphrase: str, salt: str | None = None) -> None:
"""
Constructs a class for encryption or decryption using the specified passphrase and salt

View File

@@ -6,8 +6,11 @@ from fnmatch import fnmatch
from fnmatch import translate as fnmatch_translate
from typing import TYPE_CHECKING
from rest_framework import serializers
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentSource
from documents.filters import CustomFieldQueryParser
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
@@ -17,6 +20,7 @@ from documents.models import Tag
from documents.models import Workflow
from documents.models import WorkflowTrigger
from documents.permissions import get_objects_for_user_owner_aware
from documents.regex import safe_regex_search
if TYPE_CHECKING:
from django.db.models import QuerySet
@@ -30,7 +34,7 @@ def log_reason(
matching_model: MatchingModel | WorkflowTrigger,
document: Document,
reason: str,
):
) -> None:
class_name = type(matching_model).__name__
name = (
matching_model.name if hasattr(matching_model, "name") else str(matching_model)
@@ -61,8 +65,12 @@ def match_correspondents(document: Document, classifier: DocumentClassifier, use
return list(
filter(
lambda o: matches(o, document)
or (o.pk == pred_id and o.matching_algorithm == MatchingModel.MATCH_AUTO),
lambda o: (
matches(o, document)
or (
o.pk == pred_id and o.matching_algorithm == MatchingModel.MATCH_AUTO
)
),
correspondents,
),
)
@@ -88,8 +96,12 @@ def match_document_types(document: Document, classifier: DocumentClassifier, use
return list(
filter(
lambda o: matches(o, document)
or (o.pk == pred_id and o.matching_algorithm == MatchingModel.MATCH_AUTO),
lambda o: (
matches(o, document)
or (
o.pk == pred_id and o.matching_algorithm == MatchingModel.MATCH_AUTO
)
),
document_types,
),
)
@@ -110,10 +122,12 @@ def match_tags(document: Document, classifier: DocumentClassifier, user=None):
return list(
filter(
lambda o: matches(o, document)
or (
o.matching_algorithm == MatchingModel.MATCH_AUTO
and o.pk in predicted_tag_ids
lambda o: (
matches(o, document)
or (
o.matching_algorithm == MatchingModel.MATCH_AUTO
and o.pk in predicted_tag_ids
)
),
tags,
),
@@ -141,15 +155,19 @@ def match_storage_paths(document: Document, classifier: DocumentClassifier, user
return list(
filter(
lambda o: matches(o, document)
or (o.pk == pred_id and o.matching_algorithm == MatchingModel.MATCH_AUTO),
lambda o: (
matches(o, document)
or (
o.pk == pred_id and o.matching_algorithm == MatchingModel.MATCH_AUTO
)
),
storage_paths,
),
)
def matches(matching_model: MatchingModel, document: Document):
search_kwargs = {}
search_flags = 0
document_content = document.content
@@ -158,14 +176,18 @@ def matches(matching_model: MatchingModel, document: Document):
return False
if matching_model.is_insensitive:
search_kwargs = {"flags": re.IGNORECASE}
search_flags = re.IGNORECASE
if matching_model.matching_algorithm == MatchingModel.MATCH_NONE:
return False
elif matching_model.matching_algorithm == MatchingModel.MATCH_ALL:
for word in _split_match(matching_model):
search_result = re.search(rf"\b{word}\b", document_content, **search_kwargs)
search_result = re.search(
rf"\b{word}\b",
document_content,
flags=search_flags,
)
if not search_result:
return False
log_reason(
@@ -177,7 +199,7 @@ def matches(matching_model: MatchingModel, document: Document):
elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
for word in _split_match(matching_model):
if re.search(rf"\b{word}\b", document_content, **search_kwargs):
if re.search(rf"\b{word}\b", document_content, flags=search_flags):
log_reason(matching_model, document, f"it contains this word: {word}")
return True
return False
@@ -187,7 +209,7 @@ def matches(matching_model: MatchingModel, document: Document):
re.search(
rf"\b{re.escape(matching_model.match)}\b",
document_content,
**search_kwargs,
flags=search_flags,
),
)
if result:
@@ -199,16 +221,11 @@ def matches(matching_model: MatchingModel, document: Document):
return result
elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
try:
match = re.search(
re.compile(matching_model.match, **search_kwargs),
document_content,
)
except re.error:
logger.error(
f"Error while processing regular expression {matching_model.match}",
)
return False
match = safe_regex_search(
matching_model.match,
document_content,
flags=search_flags,
)
if match:
log_reason(
matching_model,
@@ -314,11 +331,19 @@ def consumable_document_matches_workflow(
trigger_matched = False
# Document path vs trigger path
# Use the original_path if set, else us the original_file
match_against = (
document.original_path
if document.original_path is not None
else document.original_file
)
if (
trigger.filter_path is not None
and len(trigger.filter_path) > 0
and not fnmatch(
document.original_file,
match_against,
trigger.filter_path,
)
):
@@ -334,67 +359,181 @@ def consumable_document_matches_workflow(
def existing_document_matches_workflow(
document: Document,
trigger: WorkflowTrigger,
) -> tuple[bool, str]:
) -> tuple[bool, str | None]:
"""
Returns True if the Document matches all filters from the workflow trigger,
False otherwise. Includes a reason if doesn't match
"""
trigger_matched = True
reason = ""
# Check content matching algorithm
if trigger.matching_algorithm > MatchingModel.MATCH_NONE and not matches(
trigger,
document,
):
reason = (
return (
False,
f"Document content matching settings for algorithm '{trigger.matching_algorithm}' did not match",
)
trigger_matched = False
# Document tags vs trigger has_tags
# Check if any tag filters exist to determine if we need to load document tags
trigger_has_tags_qs = trigger.filter_has_tags.all()
trigger_has_all_tags_qs = trigger.filter_has_all_tags.all()
trigger_has_not_tags_qs = trigger.filter_has_not_tags.all()
has_tags_filter = trigger_has_tags_qs.exists()
has_all_tags_filter = trigger_has_all_tags_qs.exists()
has_not_tags_filter = trigger_has_not_tags_qs.exists()
# Load document tags once if any tag filters exist
document_tag_ids = None
if has_tags_filter or has_all_tags_filter or has_not_tags_filter:
document_tag_ids = set(document.tags.values_list("id", flat=True))
# Document tags vs trigger has_tags (any of)
if has_tags_filter:
trigger_has_tag_ids = set(trigger_has_tags_qs.values_list("id", flat=True))
if not (document_tag_ids & trigger_has_tag_ids):
# For error message, load the actual tag objects
return (
False,
f"Document tags {list(document.tags.all())} do not include {list(trigger_has_tags_qs)}",
)
# Document tags vs trigger has_all_tags (all of)
if has_all_tags_filter:
required_tag_ids = set(trigger_has_all_tags_qs.values_list("id", flat=True))
if not required_tag_ids.issubset(document_tag_ids):
return (
False,
f"Document tags {list(document.tags.all())} do not contain all of {list(trigger_has_all_tags_qs)}",
)
# Document tags vs trigger has_not_tags (none of)
if has_not_tags_filter:
excluded_tag_ids = set(trigger_has_not_tags_qs.values_list("id", flat=True))
if document_tag_ids & excluded_tag_ids:
return (
False,
f"Document tags {list(document.tags.all())} include excluded tags {list(trigger_has_not_tags_qs)}",
)
allowed_correspondent_ids = set(
trigger.filter_has_any_correspondents.values_list("id", flat=True),
)
if (
trigger.filter_has_tags.all().count() > 0
and document.tags.filter(
id__in=trigger.filter_has_tags.all().values_list("id"),
).count()
== 0
allowed_correspondent_ids
and document.correspondent_id not in allowed_correspondent_ids
):
reason = (
f"Document tags {document.tags.all()} do not include"
f" {trigger.filter_has_tags.all()}",
return (
False,
f"Document correspondent {document.correspondent} is not one of {list(trigger.filter_has_any_correspondents.all())}",
)
trigger_matched = False
# Document correspondent vs trigger has_correspondent
if (
trigger.filter_has_correspondent is not None
and document.correspondent != trigger.filter_has_correspondent
trigger.filter_has_correspondent_id is not None
and document.correspondent_id != trigger.filter_has_correspondent_id
):
reason = (
return (
False,
f"Document correspondent {document.correspondent} does not match {trigger.filter_has_correspondent}",
)
trigger_matched = False
if (
document.correspondent_id
and trigger.filter_has_not_correspondents.filter(
id=document.correspondent_id,
).exists()
):
return (
False,
f"Document correspondent {document.correspondent} is excluded by {list(trigger.filter_has_not_correspondents.all())}",
)
allowed_document_type_ids = set(
trigger.filter_has_any_document_types.values_list("id", flat=True),
)
if allowed_document_type_ids and (
document.document_type_id not in allowed_document_type_ids
):
return (
False,
f"Document doc type {document.document_type} is not one of {list(trigger.filter_has_any_document_types.all())}",
)
# Document document_type vs trigger has_document_type
if (
trigger.filter_has_document_type is not None
and document.document_type != trigger.filter_has_document_type
trigger.filter_has_document_type_id is not None
and document.document_type_id != trigger.filter_has_document_type_id
):
reason = (
return (
False,
f"Document doc type {document.document_type} does not match {trigger.filter_has_document_type}",
)
trigger_matched = False
if (
document.document_type_id
and trigger.filter_has_not_document_types.filter(
id=document.document_type_id,
).exists()
):
return (
False,
f"Document doc type {document.document_type} is excluded by {list(trigger.filter_has_not_document_types.all())}",
)
allowed_storage_path_ids = set(
trigger.filter_has_any_storage_paths.values_list("id", flat=True),
)
if allowed_storage_path_ids and (
document.storage_path_id not in allowed_storage_path_ids
):
return (
False,
f"Document storage path {document.storage_path} is not one of {list(trigger.filter_has_any_storage_paths.all())}",
)
# Document storage_path vs trigger has_storage_path
if (
trigger.filter_has_storage_path is not None
and document.storage_path != trigger.filter_has_storage_path
trigger.filter_has_storage_path_id is not None
and document.storage_path_id != trigger.filter_has_storage_path_id
):
reason = (
return (
False,
f"Document storage path {document.storage_path} does not match {trigger.filter_has_storage_path}",
)
trigger_matched = False
if (
document.storage_path_id
and trigger.filter_has_not_storage_paths.filter(
id=document.storage_path_id,
).exists()
):
return (
False,
f"Document storage path {document.storage_path} is excluded by {list(trigger.filter_has_not_storage_paths.all())}",
)
# Custom field query check
if trigger.filter_custom_field_query:
parser = CustomFieldQueryParser("filter_custom_field_query")
try:
custom_field_q, annotations = parser.parse(
trigger.filter_custom_field_query,
)
except serializers.ValidationError:
return (False, "Invalid custom field query configuration")
qs = (
Document.objects.filter(id=document.id)
.annotate(**annotations)
.filter(custom_field_q)
)
if not qs.exists():
return (
False,
"Document custom fields do not match the configured custom field query",
)
# Document original_filename vs trigger filename
if (
@@ -406,13 +545,12 @@ def existing_document_matches_workflow(
trigger.filter_filename.lower(),
)
):
reason = (
f"Document filename {document.original_filename} does not match"
f" {trigger.filter_filename.lower()}",
return (
False,
f"Document filename {document.original_filename} does not match {trigger.filter_filename.lower()}",
)
trigger_matched = False
return (trigger_matched, reason)
return (True, None)
def prefilter_documents_by_workflowtrigger(
@@ -425,31 +563,78 @@ def prefilter_documents_by_workflowtrigger(
document_matches_workflow in run_workflows
"""
if trigger.filter_has_tags.all().count() > 0:
documents = documents.filter(
tags__in=trigger.filter_has_tags.all(),
).distinct()
# Filter for documents that have AT LEAST ONE of the specified tags.
if trigger.filter_has_tags.exists():
documents = documents.filter(tags__in=trigger.filter_has_tags.all()).distinct()
# Filter for documents that have ALL of the specified tags.
if trigger.filter_has_all_tags.exists():
for tag in trigger.filter_has_all_tags.all():
documents = documents.filter(tags=tag)
# Multiple JOINs can create duplicate results.
documents = documents.distinct()
# Exclude documents that have ANY of the specified tags.
if trigger.filter_has_not_tags.exists():
documents = documents.exclude(tags__in=trigger.filter_has_not_tags.all())
# Correspondent, DocumentType, etc. filtering
if trigger.filter_has_any_correspondents.exists():
documents = documents.filter(
correspondent__in=trigger.filter_has_any_correspondents.all(),
)
if trigger.filter_has_correspondent is not None:
documents = documents.filter(
correspondent=trigger.filter_has_correspondent,
)
if trigger.filter_has_not_correspondents.exists():
documents = documents.exclude(
correspondent__in=trigger.filter_has_not_correspondents.all(),
)
if trigger.filter_has_any_document_types.exists():
documents = documents.filter(
document_type__in=trigger.filter_has_any_document_types.all(),
)
if trigger.filter_has_document_type is not None:
documents = documents.filter(
document_type=trigger.filter_has_document_type,
)
if trigger.filter_has_not_document_types.exists():
documents = documents.exclude(
document_type__in=trigger.filter_has_not_document_types.all(),
)
if trigger.filter_has_any_storage_paths.exists():
documents = documents.filter(
storage_path__in=trigger.filter_has_any_storage_paths.all(),
)
if trigger.filter_has_storage_path is not None:
documents = documents.filter(
storage_path=trigger.filter_has_storage_path,
)
if trigger.filter_has_not_storage_paths.exists():
documents = documents.exclude(
storage_path__in=trigger.filter_has_not_storage_paths.all(),
)
if trigger.filter_filename is not None and len(trigger.filter_filename) > 0:
# the true fnmatch will actually run later so we just want a loose filter here
# Custom Field & Filename Filtering
if trigger.filter_custom_field_query:
parser = CustomFieldQueryParser("filter_custom_field_query")
try:
custom_field_q, annotations = parser.parse(
trigger.filter_custom_field_query,
)
except serializers.ValidationError:
return documents.none()
documents = documents.annotate(**annotations).filter(custom_field_q)
if trigger.filter_filename:
regex = fnmatch_translate(trigger.filter_filename).lstrip("^").rstrip("$")
regex = f"(?i){regex}"
documents = documents.filter(original_filename__regex=regex)
documents = documents.filter(original_filename__iregex=regex)
return documents
@@ -464,13 +649,37 @@ def document_matches_workflow(
settings from the workflow trigger, False otherwise
"""
triggers_queryset = (
workflow.triggers.filter(
type=trigger_type,
)
.select_related(
"filter_mailrule",
"filter_has_document_type",
"filter_has_correspondent",
"filter_has_storage_path",
"schedule_date_custom_field",
)
.prefetch_related(
"filter_has_tags",
"filter_has_all_tags",
"filter_has_not_tags",
"filter_has_any_document_types",
"filter_has_not_document_types",
"filter_has_any_correspondents",
"filter_has_not_correspondents",
"filter_has_any_storage_paths",
"filter_has_not_storage_paths",
)
)
trigger_matched = True
if workflow.triggers.filter(type=trigger_type).count() == 0:
if not triggers_queryset.exists():
trigger_matched = False
logger.info(f"Document did not match {workflow}")
logger.debug(f"No matching triggers with type {trigger_type} found")
else:
for trigger in workflow.triggers.filter(type=trigger_type):
for trigger in triggers_queryset:
if trigger_type == WorkflowTrigger.WorkflowTriggerType.CONSUMPTION:
trigger_matched, reason = consumable_document_matches_workflow(
document,

File diff suppressed because it is too large Load Diff

View File

@@ -1,26 +0,0 @@
# Generated by Django 1.9 on 2015-12-26 13:16
import django.utils.timezone
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0001_initial"),
]
operations = [
migrations.AlterModelOptions(
name="document",
options={"ordering": ("sender", "title")},
),
migrations.AlterField(
model_name="document",
name="created",
field=models.DateTimeField(
default=django.utils.timezone.now,
editable=False,
),
),
]

View File

@@ -1,50 +1,49 @@
# Generated by Django 4.1.5 on 2023-03-04 22:33
# Generated by Django 5.2.9 on 2026-01-20 18:46
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
initial = True
dependencies = [
("documents", "1032_alter_correspondent_matching_algorithm_and_more"),
("documents", "0001_initial"),
("paperless_mail", "0001_initial"),
]
operations = [
migrations.AlterModelOptions(
name="documenttype",
options={
"ordering": ("name",),
"verbose_name": "document type",
"verbose_name_plural": "document types",
},
migrations.AddField(
model_name="workflowtrigger",
name="filter_mailrule",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="paperless_mail.mailrule",
verbose_name="filter documents from this mail rule",
),
),
migrations.AlterModelOptions(
name="tag",
options={
"ordering": ("name",),
"verbose_name": "tag",
"verbose_name_plural": "tags",
},
migrations.AddField(
model_name="workflowtrigger",
name="schedule_date_custom_field",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.customfield",
verbose_name="schedule date custom field",
),
),
migrations.AlterField(
model_name="correspondent",
name="name",
field=models.CharField(max_length=128, verbose_name="name"),
),
migrations.AlterField(
model_name="documenttype",
name="name",
field=models.CharField(max_length=128, verbose_name="name"),
),
migrations.AlterField(
model_name="storagepath",
name="name",
field=models.CharField(max_length=128, verbose_name="name"),
),
migrations.AlterField(
model_name="tag",
name="name",
field=models.CharField(max_length=128, verbose_name="name"),
migrations.AddField(
model_name="workflow",
name="triggers",
field=models.ManyToManyField(
related_name="workflows",
to="documents.workflowtrigger",
verbose_name="triggers",
),
),
migrations.AddConstraint(
model_name="correspondent",
@@ -61,6 +60,13 @@ class Migration(migrations.Migration):
name="documents_correspondent_name_uniq",
),
),
migrations.AddConstraint(
model_name="customfieldinstance",
constraint=models.UniqueConstraint(
fields=("document", "field"),
name="documents_customfieldinstance_unique_document_field",
),
),
migrations.AddConstraint(
model_name="documenttype",
constraint=models.UniqueConstraint(

View File

@@ -1,70 +0,0 @@
# Generated by Django 1.9 on 2016-01-11 12:21
import django.db.models.deletion
from django.db import migrations
from django.db import models
from django.template.defaultfilters import slugify
DOCUMENT_SENDER_MAP = {}
def move_sender_strings_to_sender_model(apps, schema_editor):
sender_model = apps.get_model("documents", "Sender")
document_model = apps.get_model("documents", "Document")
# Create the sender and log the relationship with the document
for document in document_model.objects.all():
if document.sender:
(
DOCUMENT_SENDER_MAP[document.pk],
_,
) = sender_model.objects.get_or_create(
name=document.sender,
defaults={"slug": slugify(document.sender)},
)
def realign_senders(apps, schema_editor):
document_model = apps.get_model("documents", "Document")
for pk, sender in DOCUMENT_SENDER_MAP.items():
document_model.objects.filter(pk=pk).update(sender=sender)
class Migration(migrations.Migration):
dependencies = [
("documents", "0002_auto_20151226_1316"),
]
operations = [
migrations.CreateModel(
name="Sender",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=128, unique=True)),
("slug", models.SlugField()),
],
),
migrations.RunPython(move_sender_strings_to_sender_model),
migrations.RemoveField(
model_name="document",
name="sender",
),
migrations.AddField(
model_name="document",
name="sender",
field=models.ForeignKey(
blank=True,
on_delete=django.db.models.deletion.CASCADE,
to="documents.Sender",
),
),
migrations.RunPython(realign_senders),
]

View File

@@ -0,0 +1,18 @@
# Generated by Django 5.2.9 on 2026-01-20 20:06
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0002_initial"),
]
operations = [
migrations.AddField(
model_name="workflowaction",
name="order",
field=models.PositiveIntegerField(default=0, verbose_name="order"),
),
]

View File

@@ -1,25 +0,0 @@
# Generated by Django 1.9 on 2016-01-14 18:44
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0003_sender"),
]
operations = [
migrations.AlterField(
model_name="document",
name="sender",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="documents",
to="documents.Sender",
),
),
]

View File

@@ -1,178 +0,0 @@
# Generated by Django 4.2.13 on 2024-06-28 17:52
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
replaces = [
("documents", "0004_auto_20160114_1844"),
("documents", "0005_auto_20160123_0313"),
("documents", "0006_auto_20160123_0430"),
("documents", "0007_auto_20160126_2114"),
("documents", "0008_document_file_type"),
("documents", "0009_auto_20160214_0040"),
("documents", "0010_log"),
("documents", "0011_auto_20160303_1929"),
]
dependencies = [
("documents", "0003_sender"),
]
operations = [
migrations.AlterField(
model_name="document",
name="sender",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="documents",
to="documents.sender",
),
),
migrations.AlterModelOptions(
name="sender",
options={"ordering": ("name",)},
),
migrations.CreateModel(
name="Tag",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=128, unique=True)),
("slug", models.SlugField(blank=True)),
(
"colour",
models.PositiveIntegerField(
choices=[
(1, "#a6cee3"),
(2, "#1f78b4"),
(3, "#b2df8a"),
(4, "#33a02c"),
(5, "#fb9a99"),
(6, "#e31a1c"),
(7, "#fdbf6f"),
(8, "#ff7f00"),
(9, "#cab2d6"),
(10, "#6a3d9a"),
(11, "#b15928"),
(12, "#000000"),
(13, "#cccccc"),
],
default=1,
),
),
("match", models.CharField(blank=True, max_length=256)),
(
"matching_algorithm",
models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
),
),
],
options={
"abstract": False,
},
),
migrations.AlterField(
model_name="sender",
name="slug",
field=models.SlugField(blank=True),
),
migrations.AddField(
model_name="document",
name="file_type",
field=models.CharField(
choices=[
("pdf", "PDF"),
("png", "PNG"),
("jpg", "JPG"),
("gif", "GIF"),
("tiff", "TIFF"),
],
default="pdf",
editable=False,
max_length=4,
),
preserve_default=False,
),
migrations.AddField(
model_name="document",
name="tags",
field=models.ManyToManyField(
blank=True,
related_name="documents",
to="documents.tag",
),
),
migrations.CreateModel(
name="Log",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("group", models.UUIDField(blank=True)),
("message", models.TextField()),
(
"level",
models.PositiveIntegerField(
choices=[
(10, "Debugging"),
(20, "Informational"),
(30, "Warning"),
(40, "Error"),
(50, "Critical"),
],
default=20,
),
),
(
"component",
models.PositiveIntegerField(
choices=[(1, "Consumer"), (2, "Mail Fetcher")],
),
),
("created", models.DateTimeField(auto_now_add=True)),
("modified", models.DateTimeField(auto_now=True)),
],
options={
"ordering": ("-modified",),
},
),
migrations.RenameModel(
old_name="Sender",
new_name="Correspondent",
),
migrations.AlterModelOptions(
name="document",
options={"ordering": ("correspondent", "title")},
),
migrations.RenameField(
model_name="document",
old_name="sender",
new_name="correspondent",
),
]

View File

@@ -0,0 +1,16 @@
# Generated by Django 5.2.9 on 2026-01-24 23:05
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "0003_workflowaction_order"),
]
operations = [
migrations.RemoveField(
model_name="document",
name="storage_type",
),
]

View File

@@ -1,16 +0,0 @@
# Generated by Django 1.9 on 2016-01-23 03:13
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "0004_auto_20160114_1844"),
]
operations = [
migrations.AlterModelOptions(
name="sender",
options={"ordering": ("name",)},
),
]

View File

@@ -0,0 +1,43 @@
# Generated by Django 5.2.7 on 2025-12-17 22:25
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0004_remove_document_storage_type"),
]
operations = [
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_correspondents",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_correspondent",
to="documents.correspondent",
verbose_name="has one of these correspondents",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_document_types",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_document_type",
to="documents.documenttype",
verbose_name="has one of these document types",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_storage_paths",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_storage_path",
to="documents.storagepath",
verbose_name="has one of these storage paths",
),
),
]

View File

@@ -0,0 +1,23 @@
# Generated by Django 5.2.7 on 2026-01-14 17:45
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0005_workflowtrigger_filter_has_any_correspondents_and_more"),
]
operations = [
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
max_length=32,
verbose_name="checksum",
help_text="The checksum of the original document.",
),
),
]

View File

@@ -1,64 +0,0 @@
# Generated by Django 1.9 on 2016-01-23 04:30
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0005_auto_20160123_0313"),
]
operations = [
migrations.CreateModel(
name="Tag",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=128, unique=True)),
("slug", models.SlugField(blank=True)),
(
"colour",
models.PositiveIntegerField(
choices=[
(1, "#a6cee3"),
(2, "#1f78b4"),
(3, "#b2df8a"),
(4, "#33a02c"),
(5, "#fb9a99"),
(6, "#e31a1c"),
(7, "#fdbf6f"),
(8, "#ff7f00"),
(9, "#cab2d6"),
(10, "#6a3d9a"),
(11, "#ffff99"),
(12, "#b15928"),
(13, "#000000"),
(14, "#cccccc"),
],
default=1,
),
),
],
options={
"abstract": False,
},
),
migrations.AlterField(
model_name="sender",
name="slug",
field=models.SlugField(blank=True),
),
migrations.AddField(
model_name="document",
name="tags",
field=models.ManyToManyField(related_name="documents", to="documents.Tag"),
),
]

View File

@@ -1,55 +0,0 @@
# Generated by Django 1.9 on 2016-01-26 21:14
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0006_auto_20160123_0430"),
]
operations = [
migrations.AddField(
model_name="tag",
name="match",
field=models.CharField(blank=True, max_length=256),
),
migrations.AddField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
blank=True,
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
],
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
null=True,
),
),
migrations.AlterField(
model_name="tag",
name="colour",
field=models.PositiveIntegerField(
choices=[
(1, "#a6cee3"),
(2, "#1f78b4"),
(3, "#b2df8a"),
(4, "#33a02c"),
(5, "#fb9a99"),
(6, "#e31a1c"),
(7, "#fdbf6f"),
(8, "#ff7f00"),
(9, "#cab2d6"),
(10, "#6a3d9a"),
(11, "#b15928"),
(12, "#000000"),
(13, "#cccccc"),
],
default=1,
),
),
]

View File

@@ -0,0 +1,25 @@
# Generated by Django 5.2.6 on 2026-01-24 07:33
import django.db.models.functions.text
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0006_alter_document_checksum_unique"),
]
operations = [
migrations.AddField(
model_name="document",
name="content_length",
field=models.GeneratedField(
db_persist=True,
expression=django.db.models.functions.text.Length("content"),
null=False,
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
output_field=models.PositiveIntegerField(default=0),
),
),
]

View File

@@ -1,39 +0,0 @@
# Generated by Django 1.9 on 2016-01-29 22:58
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0007_auto_20160126_2114"),
]
operations = [
migrations.AddField(
model_name="document",
name="file_type",
field=models.CharField(
choices=[
("pdf", "PDF"),
("png", "PNG"),
("jpg", "JPG"),
("gif", "GIF"),
("tiff", "TIFF"),
],
default="pdf",
editable=False,
max_length=4,
),
preserve_default=False,
),
migrations.AlterField(
model_name="document",
name="tags",
field=models.ManyToManyField(
blank=True,
related_name="documents",
to="documents.Tag",
),
),
]

View File

@@ -0,0 +1,177 @@
# Generated by Django 5.2.9 on 2026-01-27 01:09
import django.db.models.deletion
import django.db.models.functions.text
import django.utils.timezone
from django.conf import settings
from django.contrib.auth.management import create_permissions
from django.contrib.auth.models import Group
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
from django.db import migrations
from django.db import models
def grant_share_link_bundle_permissions(apps, schema_editor):
# Ensure newly introduced permissions are created for all apps
for app_config in apps.get_app_configs():
app_config.models_module = True
create_permissions(app_config, apps=apps, verbosity=0)
app_config.models_module = None
add_document_perm = Permission.objects.filter(codename="add_document").first()
share_bundle_permissions = Permission.objects.filter(
codename__contains="sharelinkbundle",
)
users = User.objects.filter(user_permissions=add_document_perm).distinct()
for user in users:
user.user_permissions.add(*share_bundle_permissions)
groups = Group.objects.filter(permissions=add_document_perm).distinct()
for group in groups:
group.permissions.add(*share_bundle_permissions)
def revoke_share_link_bundle_permissions(apps, schema_editor):
share_bundle_permissions = Permission.objects.filter(
codename__contains="sharelinkbundle",
)
for user in User.objects.all():
user.user_permissions.remove(*share_bundle_permissions)
for group in Group.objects.all():
group.permissions.remove(*share_bundle_permissions)
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("documents", "0007_document_content_length"),
]
operations = [
migrations.CreateModel(
name="ShareLinkBundle",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"created",
models.DateTimeField(
blank=True,
db_index=True,
default=django.utils.timezone.now,
editable=False,
verbose_name="created",
),
),
(
"expiration",
models.DateTimeField(
blank=True,
db_index=True,
null=True,
verbose_name="expiration",
),
),
(
"slug",
models.SlugField(
blank=True,
editable=False,
unique=True,
verbose_name="slug",
),
),
(
"file_version",
models.CharField(
choices=[("archive", "Archive"), ("original", "Original")],
default="archive",
max_length=50,
),
),
(
"status",
models.CharField(
choices=[
("pending", "Pending"),
("processing", "Processing"),
("ready", "Ready"),
("failed", "Failed"),
],
default="pending",
max_length=50,
),
),
(
"size_bytes",
models.PositiveIntegerField(
blank=True,
null=True,
verbose_name="size (bytes)",
),
),
(
"last_error",
models.JSONField(
blank=True,
null=True,
default=None,
verbose_name="last error",
),
),
(
"file_path",
models.CharField(
blank=True,
max_length=512,
verbose_name="file path",
),
),
(
"built_at",
models.DateTimeField(
blank=True,
null=True,
verbose_name="built at",
),
),
(
"documents",
models.ManyToManyField(
related_name="share_link_bundles",
to="documents.document",
verbose_name="documents",
),
),
(
"owner",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="share_link_bundles",
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
],
options={
"ordering": ("-created",),
"verbose_name": "share link bundle",
"verbose_name_plural": "share link bundles",
},
),
migrations.RunPython(
grant_share_link_bundle_permissions,
reverse_code=revoke_share_link_bundle_permissions,
),
]

View File

@@ -1,27 +0,0 @@
# Generated by Django 1.9 on 2016-02-14 00:40
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0008_document_file_type"),
]
operations = [
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
),
),
]

View File

@@ -0,0 +1,38 @@
# Generated by Django 5.2.7 on 2025-12-29 03:56
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0008_sharelinkbundle"),
]
operations = [
migrations.AddField(
model_name="workflowaction",
name="passwords",
field=models.JSONField(
blank=True,
help_text="Passwords to try when removing PDF protection. Separate with commas or new lines.",
null=True,
verbose_name="passwords",
),
),
migrations.AlterField(
model_name="workflowaction",
name="type",
field=models.PositiveIntegerField(
choices=[
(1, "Assignment"),
(2, "Removal"),
(3, "Email"),
(4, "Webhook"),
(5, "Password removal"),
],
default=1,
verbose_name="Workflow Action Type",
),
),
]

View File

@@ -0,0 +1,25 @@
# Generated by Django 5.2.11 on 2026-02-07 19:06
import django.db.models.functions.text
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0009_workflowaction_passwords_alter_workflowaction_type"),
]
operations = [
migrations.AlterField(
model_name="document",
name="content_length",
field=models.GeneratedField(
db_persist=True,
expression=django.db.models.functions.text.Length("content"),
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
output_field=models.PositiveIntegerField(default=0),
serialize=False,
),
),
]

View File

@@ -1,53 +0,0 @@
# Generated by Django 1.9 on 2016-02-27 17:54
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0009_auto_20160214_0040"),
]
operations = [
migrations.CreateModel(
name="Log",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("group", models.UUIDField(blank=True)),
("message", models.TextField()),
(
"level",
models.PositiveIntegerField(
choices=[
(10, "Debugging"),
(20, "Informational"),
(30, "Warning"),
(40, "Error"),
(50, "Critical"),
],
default=20,
),
),
(
"component",
models.PositiveIntegerField(
choices=[(1, "Consumer"), (2, "Mail Fetcher")],
),
),
("created", models.DateTimeField(auto_now_add=True)),
("modified", models.DateTimeField(auto_now=True)),
],
options={
"ordering": ("-modified",),
},
),
]

View File

@@ -1,26 +0,0 @@
# Generated by Django 1.9.2 on 2016-03-03 19:29
from django.db import migrations
class Migration(migrations.Migration):
atomic = False
dependencies = [
("documents", "0010_log"),
]
operations = [
migrations.RenameModel(
old_name="Sender",
new_name="Correspondent",
),
migrations.AlterModelOptions(
name="document",
options={"ordering": ("correspondent", "title")},
),
migrations.RenameField(
model_name="document",
old_name="sender",
new_name="correspondent",
),
]

View File

@@ -1,128 +0,0 @@
# Generated by Django 1.9.2 on 2016-03-05 00:40
import os
import re
import shutil
import subprocess
import tempfile
from pathlib import Path
import gnupg
from django.conf import settings
from django.db import migrations
from django.utils.termcolors import colorize as colourise # Spelling hurts me
class GnuPG:
"""
A handy singleton to use when handling encrypted files.
"""
gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME)
@classmethod
def decrypted(cls, file_handle):
return cls.gpg.decrypt_file(file_handle, passphrase=settings.PASSPHRASE).data
@classmethod
def encrypted(cls, file_handle):
return cls.gpg.encrypt_file(
file_handle,
recipients=None,
passphrase=settings.PASSPHRASE,
symmetric=True,
).data
def move_documents_and_create_thumbnails(apps, schema_editor):
(Path(settings.MEDIA_ROOT) / "documents" / "originals").mkdir(
parents=True,
exist_ok=True,
)
(Path(settings.MEDIA_ROOT) / "documents" / "thumbnails").mkdir(
parents=True,
exist_ok=True,
)
documents: list[str] = os.listdir(Path(settings.MEDIA_ROOT) / "documents") # noqa: PTH208
if set(documents) == {"originals", "thumbnails"}:
return
print(
colourise(
"\n\n"
" This is a one-time only migration to generate thumbnails for all of your\n"
" documents so that future UIs will have something to work with. If you have\n"
" a lot of documents though, this may take a while, so a coffee break may be\n"
" in order."
"\n",
opts=("bold",),
),
)
Path(settings.SCRATCH_DIR).mkdir(parents=True, exist_ok=True)
for f in sorted(documents):
if not f.endswith("gpg"):
continue
print(
" {} {} {}".format(
colourise("*", fg="green"),
colourise("Generating a thumbnail for", fg="white"),
colourise(f, fg="cyan"),
),
)
thumb_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
orig_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
orig_source: Path = Path(settings.MEDIA_ROOT) / "documents" / f
orig_target: Path = Path(orig_temp) / f.replace(".gpg", "")
with orig_source.open("rb") as encrypted, orig_target.open("wb") as unencrypted:
unencrypted.write(GnuPG.decrypted(encrypted))
subprocess.Popen(
(
settings.CONVERT_BINARY,
"-scale",
"500x5000",
"-alpha",
"remove",
orig_target,
Path(thumb_temp) / "convert-%04d.png",
),
).wait()
thumb_source: Path = Path(thumb_temp) / "convert-0000.png"
thumb_target: Path = (
Path(settings.MEDIA_ROOT)
/ "documents"
/ "thumbnails"
/ re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f)
)
with (
thumb_source.open("rb") as unencrypted,
thumb_target.open("wb") as encrypted,
):
encrypted.write(GnuPG.encrypted(unencrypted))
shutil.rmtree(thumb_temp)
shutil.rmtree(orig_temp)
shutil.move(
Path(settings.MEDIA_ROOT) / "documents" / f,
Path(settings.MEDIA_ROOT) / "documents" / "originals" / f,
)
class Migration(migrations.Migration):
dependencies = [
("documents", "0011_auto_20160303_1929"),
]
operations = [
migrations.RunPython(move_documents_and_create_thumbnails),
]

View File

@@ -1,42 +0,0 @@
# Generated by Django 1.9.4 on 2016-03-25 21:11
import django.utils.timezone
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0012_auto_20160305_0040"),
]
operations = [
migrations.AddField(
model_name="correspondent",
name="match",
field=models.CharField(blank=True, max_length=256),
),
migrations.AddField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
),
),
migrations.AlterField(
model_name="document",
name="created",
field=models.DateTimeField(default=django.utils.timezone.now),
),
migrations.RemoveField(
model_name="log",
name="component",
),
]

View File

@@ -1,182 +0,0 @@
# Generated by Django 1.9.4 on 2016-03-28 19:09
import hashlib
from pathlib import Path
import django.utils.timezone
import gnupg
from django.conf import settings
from django.db import migrations
from django.db import models
from django.template.defaultfilters import slugify
from django.utils.termcolors import colorize as colourise # Spelling hurts me
class GnuPG:
"""
A handy singleton to use when handling encrypted files.
"""
gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME)
@classmethod
def decrypted(cls, file_handle):
return cls.gpg.decrypt_file(file_handle, passphrase=settings.PASSPHRASE).data
@classmethod
def encrypted(cls, file_handle):
return cls.gpg.encrypt_file(
file_handle,
recipients=None,
passphrase=settings.PASSPHRASE,
symmetric=True,
).data
class Document:
"""
Django's migrations restrict access to model methods, so this is a snapshot
of the methods that existed at the time this migration was written, since
we need to make use of a lot of these shortcuts here.
"""
def __init__(self, doc):
self.pk = doc.pk
self.correspondent = doc.correspondent
self.title = doc.title
self.file_type = doc.file_type
self.tags = doc.tags
self.created = doc.created
def __str__(self):
created = self.created.strftime("%Y%m%d%H%M%S")
if self.correspondent and self.title:
return f"{created}: {self.correspondent} - {self.title}"
if self.correspondent or self.title:
return f"{created}: {self.correspondent or self.title}"
return str(created)
@property
def source_path(self):
return (
Path(settings.MEDIA_ROOT)
/ "documents"
/ "originals"
/ f"{self.pk:07}.{self.file_type}.gpg"
)
@property
def source_file(self):
return self.source_path.open("rb")
@property
def file_name(self):
return slugify(str(self)) + "." + self.file_type
def set_checksums(apps, schema_editor):
document_model = apps.get_model("documents", "Document")
if not document_model.objects.all().exists():
return
print(
colourise(
"\n\n"
" This is a one-time only migration to generate checksums for all\n"
" of your existing documents. If you have a lot of documents\n"
" though, this may take a while, so a coffee break may be in\n"
" order."
"\n",
opts=("bold",),
),
)
sums = {}
for d in document_model.objects.all():
document = Document(d)
print(
" {} {} {}".format(
colourise("*", fg="green"),
colourise("Generating a checksum for", fg="white"),
colourise(document.file_name, fg="cyan"),
),
)
with document.source_file as encrypted:
checksum = hashlib.md5(GnuPG.decrypted(encrypted)).hexdigest()
if checksum in sums:
error = "\n{line}{p1}\n\n{doc1}\n{doc2}\n\n{p2}\n\n{code}\n\n{p3}{line}".format(
p1=colourise(
"It appears that you have two identical documents in your collection and \nPaperless no longer supports this (see issue #97). The documents in question\nare:",
fg="yellow",
),
p2=colourise(
"To fix this problem, you'll have to remove one of them from the database, a task\nmost easily done by running the following command in the same\ndirectory as manage.py:",
fg="yellow",
),
p3=colourise(
"When that's finished, re-run the migrate, and provided that there aren't any\nother duplicates, you should be good to go.",
fg="yellow",
),
doc1=colourise(
f" * {sums[checksum][1]} (id: {sums[checksum][0]})",
fg="red",
),
doc2=colourise(
f" * {document.file_name} (id: {document.pk})",
fg="red",
),
code=colourise(
f" $ echo 'DELETE FROM documents_document WHERE id = {document.pk};' | ./manage.py dbshell",
fg="green",
),
line=colourise("\n{}\n".format("=" * 80), fg="white", opts=("bold",)),
)
raise RuntimeError(error)
sums[checksum] = (document.pk, document.file_name)
document_model.objects.filter(pk=document.pk).update(checksum=checksum)
def do_nothing(apps, schema_editor):
pass
class Migration(migrations.Migration):
dependencies = [
("documents", "0013_auto_20160325_2111"),
]
operations = [
migrations.AddField(
model_name="document",
name="checksum",
field=models.CharField(
default="-",
db_index=True,
editable=False,
max_length=32,
help_text="The checksum of the original document (before it "
"was encrypted). We use this to prevent duplicate "
"document imports.",
),
preserve_default=False,
),
migrations.RunPython(set_checksums, do_nothing),
migrations.AlterField(
model_name="document",
name="created",
field=models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
),
),
migrations.AlterField(
model_name="document",
name="modified",
field=models.DateTimeField(auto_now=True, db_index=True),
),
]

View File

@@ -1,33 +0,0 @@
# Generated by Django 1.10.2 on 2016-10-05 21:38
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0014_document_checksum"),
]
operations = [
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
help_text="The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.",
max_length=32,
unique=True,
),
),
migrations.AddField(
model_name="correspondent",
name="is_insensitive",
field=models.BooleanField(default=True),
),
migrations.AddField(
model_name="tag",
name="is_insensitive",
field=models.BooleanField(default=True),
),
]

View File

@@ -1,92 +0,0 @@
# Generated by Django 4.2.13 on 2024-06-28 17:57
import django.db.models.deletion
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
replaces = [
("documents", "0015_add_insensitive_to_match"),
("documents", "0016_auto_20170325_1558"),
("documents", "0017_auto_20170512_0507"),
("documents", "0018_auto_20170715_1712"),
]
dependencies = [
("documents", "0014_document_checksum"),
]
operations = [
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
help_text="The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.",
max_length=32,
unique=True,
),
),
migrations.AddField(
model_name="correspondent",
name="is_insensitive",
field=models.BooleanField(default=True),
),
migrations.AddField(
model_name="tag",
name="is_insensitive",
field=models.BooleanField(default=True),
),
migrations.AlterField(
model_name="document",
name="content",
field=models.TextField(
blank=True,
db_index=("mysql" not in settings.DATABASES["default"]["ENGINE"]),
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
),
),
migrations.AlterField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
(5, "Fuzzy Match"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
(5, "Fuzzy Match"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
migrations.AlterField(
model_name="document",
name="correspondent",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.correspondent",
),
),
]

View File

@@ -1,23 +0,0 @@
# Generated by Django 1.10.5 on 2017-03-25 15:58
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0015_add_insensitive_to_match"),
]
operations = [
migrations.AlterField(
model_name="document",
name="content",
field=models.TextField(
blank=True,
db_index=("mysql" not in settings.DATABASES["default"]["ENGINE"]),
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
),
),
]

View File

@@ -1,43 +0,0 @@
# Generated by Django 1.10.5 on 2017-05-12 05:07
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0016_auto_20170325_1558"),
]
operations = [
migrations.AlterField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
(5, "Fuzzy Match"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
(5, "Fuzzy Match"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
]

View File

@@ -1,25 +0,0 @@
# Generated by Django 1.10.5 on 2017-07-15 17:12
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0017_auto_20170512_0507"),
]
operations = [
migrations.AlterField(
model_name="document",
name="correspondent",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.Correspondent",
),
),
]

View File

@@ -1,22 +0,0 @@
# Generated by Django 1.10.5 on 2017-07-15 17:12
from django.contrib.auth.models import User
from django.db import migrations
def forwards_func(apps, schema_editor):
User.objects.create(username="consumer")
def reverse_func(apps, schema_editor):
User.objects.get(username="consumer").delete()
class Migration(migrations.Migration):
dependencies = [
("documents", "0018_auto_20170715_1712"),
]
operations = [
migrations.RunPython(forwards_func, reverse_func),
]

View File

@@ -1,29 +0,0 @@
import django.utils.timezone
from django.db import migrations
from django.db import models
def set_added_time_to_created_time(apps, schema_editor):
Document = apps.get_model("documents", "Document")
for doc in Document.objects.all():
doc.added = doc.created
doc.save()
class Migration(migrations.Migration):
dependencies = [
("documents", "0019_add_consumer_user"),
]
operations = [
migrations.AddField(
model_name="document",
name="added",
field=models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
editable=False,
),
),
migrations.RunPython(set_added_time_to_created_time),
]

View File

@@ -1,41 +0,0 @@
# Generated by Django 1.11.10 on 2018-02-04 13:07
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0020_document_added"),
]
operations = [
# Add the field with the default GPG-encrypted value
migrations.AddField(
model_name="document",
name="storage_type",
field=models.CharField(
choices=[
("unencrypted", "Unencrypted"),
("gpg", "Encrypted with GNU Privacy Guard"),
],
default="gpg",
editable=False,
max_length=11,
),
),
# Now that the field is added, change the default to unencrypted
migrations.AlterField(
model_name="document",
name="storage_type",
field=models.CharField(
choices=[
("unencrypted", "Unencrypted"),
("gpg", "Encrypted with GNU Privacy Guard"),
],
default="unencrypted",
editable=False,
max_length=11,
),
),
]

View File

@@ -1,61 +0,0 @@
# Generated by Django 2.0.8 on 2018-10-07 14:20
from django.db import migrations
from django.db import models
from django.utils.text import slugify
def re_slug_all_the_things(apps, schema_editor):
"""
Rewrite all slug values to make sure they're actually slugs before we brand
them as uneditable.
"""
Tag = apps.get_model("documents", "Tag")
Correspondent = apps.get_model("documents", "Correspondent")
for klass in (Tag, Correspondent):
for instance in klass.objects.all():
klass.objects.filter(pk=instance.pk).update(slug=slugify(instance.slug))
class Migration(migrations.Migration):
dependencies = [
("documents", "0021_document_storage_type"),
]
operations = [
migrations.AlterModelOptions(
name="tag",
options={"ordering": ("name",)},
),
migrations.AlterField(
model_name="correspondent",
name="slug",
field=models.SlugField(blank=True, editable=False),
),
migrations.AlterField(
model_name="document",
name="file_type",
field=models.CharField(
choices=[
("pdf", "PDF"),
("png", "PNG"),
("jpg", "JPG"),
("gif", "GIF"),
("tiff", "TIFF"),
("txt", "TXT"),
("csv", "CSV"),
("md", "MD"),
],
editable=False,
max_length=4,
),
),
migrations.AlterField(
model_name="tag",
name="slug",
field=models.SlugField(blank=True, editable=False),
),
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop),
]

View File

@@ -1,39 +0,0 @@
# Generated by Django 2.0.10 on 2019-04-26 18:57
from django.db import migrations
from django.db import models
def set_filename(apps, schema_editor):
Document = apps.get_model("documents", "Document")
for doc in Document.objects.all():
file_name = f"{doc.pk:07}.{doc.file_type}"
if doc.storage_type == "gpg":
file_name += ".gpg"
# Set filename
doc.filename = file_name
# Save document
doc.save()
class Migration(migrations.Migration):
dependencies = [
("documents", "0022_auto_20181007_1420"),
]
operations = [
migrations.AddField(
model_name="document",
name="filename",
field=models.FilePathField(
default=None,
null=True,
editable=False,
help_text="Current filename in storage",
max_length=256,
),
),
migrations.RunPython(set_filename),
]

View File

@@ -1,147 +0,0 @@
# Generated by Django 3.1.3 on 2020-11-07 12:35
import uuid
import django.db.models.deletion
from django.db import migrations
from django.db import models
def logs_set_default_group(apps, schema_editor):
Log = apps.get_model("documents", "Log")
for log in Log.objects.all():
if log.group is None:
log.group = uuid.uuid4()
log.save()
class Migration(migrations.Migration):
dependencies = [
("documents", "0023_document_current_filename"),
]
operations = [
migrations.AddField(
model_name="document",
name="archive_serial_number",
field=models.IntegerField(
blank=True,
db_index=True,
help_text="The position of this document in your physical document archive.",
null=True,
unique=True,
),
),
migrations.AddField(
model_name="tag",
name="is_inbox_tag",
field=models.BooleanField(
default=False,
help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.",
),
),
migrations.CreateModel(
name="DocumentType",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=128, unique=True)),
("slug", models.SlugField(blank=True, editable=False)),
("match", models.CharField(blank=True, max_length=256)),
(
"matching_algorithm",
models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
(5, "Fuzzy Match"),
(6, "Automatic Classification"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
("is_insensitive", models.BooleanField(default=True)),
],
options={
"abstract": False,
"ordering": ("name",),
},
),
migrations.AddField(
model_name="document",
name="document_type",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.documenttype",
),
),
migrations.AlterField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
(5, "Fuzzy Match"),
(6, "Automatic Classification"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any"),
(2, "All"),
(3, "Literal"),
(4, "Regular Expression"),
(5, "Fuzzy Match"),
(6, "Automatic Classification"),
],
default=1,
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containing imperfections that foil accurate OCR.',
),
),
migrations.AlterField(
model_name="document",
name="content",
field=models.TextField(
blank=True,
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
),
),
migrations.AlterModelOptions(
name="log",
options={"ordering": ("-created",)},
),
migrations.RemoveField(
model_name="log",
name="modified",
),
migrations.AlterField(
model_name="log",
name="group",
field=models.UUIDField(blank=True, null=True),
),
migrations.RunPython(
code=django.db.migrations.operations.special.RunPython.noop,
reverse_code=logs_set_default_group,
),
]

View File

@@ -1,13 +0,0 @@
# Generated by Django 3.1.3 on 2020-11-09 16:36
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "1000_update_paperless_all"),
]
operations = [
migrations.RunPython(migrations.RunPython.noop, migrations.RunPython.noop),
]

View File

@@ -1,24 +0,0 @@
# Generated by Django 3.1.3 on 2020-11-11 11:05
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1001_auto_20201109_1636"),
]
operations = [
migrations.AlterField(
model_name="document",
name="filename",
field=models.FilePathField(
default=None,
editable=False,
help_text="Current filename in storage",
max_length=1024,
null=True,
),
),
]

View File

@@ -1,92 +0,0 @@
# Generated by Django 3.1.3 on 2020-11-20 11:21
from pathlib import Path
import magic
from django.conf import settings
from django.db import migrations
from django.db import models
from paperless.db import GnuPG
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg"
def source_path(self) -> Path:
if self.filename:
fname: str = str(self.filename)
else:
fname = f"{self.pk:07}.{self.file_type}"
if self.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg"
return Path(settings.ORIGINALS_DIR) / fname
def add_mime_types(apps, schema_editor):
Document = apps.get_model("documents", "Document")
documents = Document.objects.all()
for d in documents:
with Path(source_path(d)).open("rb") as f:
if d.storage_type == STORAGE_TYPE_GPG:
data = GnuPG.decrypted(f)
else:
data = f.read(1024)
d.mime_type = magic.from_buffer(data, mime=True)
d.save()
def add_file_extensions(apps, schema_editor):
Document = apps.get_model("documents", "Document")
documents = Document.objects.all()
for d in documents:
d.file_type = Path(d.filename).suffix.lstrip(".")
d.save()
class Migration(migrations.Migration):
dependencies = [
("documents", "1002_auto_20201111_1105"),
]
operations = [
migrations.AddField(
model_name="document",
name="mime_type",
field=models.CharField(default="-", editable=False, max_length=256),
preserve_default=False,
),
migrations.RunPython(add_mime_types, migrations.RunPython.noop),
# This operation is here so that we can revert the entire migration:
# By allowing this field to be blank and null, we can revert the
# remove operation further down and the database won't complain about
# NOT NULL violations.
migrations.AlterField(
model_name="document",
name="file_type",
field=models.CharField(
choices=[
("pdf", "PDF"),
("png", "PNG"),
("jpg", "JPG"),
("gif", "GIF"),
("tiff", "TIFF"),
("txt", "TXT"),
("csv", "CSV"),
("md", "MD"),
],
editable=False,
max_length=4,
null=True,
blank=True,
),
),
migrations.RunPython(migrations.RunPython.noop, add_file_extensions),
migrations.RemoveField(
model_name="document",
name="file_type",
),
]

View File

@@ -1,12 +0,0 @@
# Generated by Django 3.1.3 on 2020-11-25 14:53
from django.db import migrations
from django.db.migrations import RunPython
class Migration(migrations.Migration):
dependencies = [
("documents", "1003_mime_types"),
]
operations = [RunPython(migrations.RunPython.noop, migrations.RunPython.noop)]

View File

@@ -1,34 +0,0 @@
# Generated by Django 3.1.3 on 2020-11-29 00:48
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1004_sanity_check_schedule"),
]
operations = [
migrations.AddField(
model_name="document",
name="archive_checksum",
field=models.CharField(
blank=True,
editable=False,
help_text="The checksum of the archived document.",
max_length=32,
null=True,
),
),
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
help_text="The checksum of the original document.",
max_length=32,
unique=True,
),
),
]

View File

@@ -1,24 +0,0 @@
# Generated by Django 3.1.4 on 2020-12-08 22:09
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "1005_checksums"),
]
operations = [
migrations.RemoveField(
model_name="correspondent",
name="slug",
),
migrations.RemoveField(
model_name="documenttype",
name="slug",
),
migrations.RemoveField(
model_name="tag",
name="slug",
),
]

View File

@@ -1,485 +0,0 @@
# Generated by Django 4.2.13 on 2024-06-28 18:01
import django.db.models.deletion
import django.utils.timezone
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
replaces = [
("documents", "1006_auto_20201208_2209"),
("documents", "1007_savedview_savedviewfilterrule"),
("documents", "1008_auto_20201216_1736"),
("documents", "1009_auto_20201216_2005"),
("documents", "1010_auto_20210101_2159"),
("documents", "1011_auto_20210101_2340"),
]
dependencies = [
("documents", "1005_checksums"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.RemoveField(
model_name="correspondent",
name="slug",
),
migrations.RemoveField(
model_name="documenttype",
name="slug",
),
migrations.RemoveField(
model_name="tag",
name="slug",
),
migrations.CreateModel(
name="SavedView",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=128, verbose_name="name")),
(
"show_on_dashboard",
models.BooleanField(verbose_name="show on dashboard"),
),
(
"show_in_sidebar",
models.BooleanField(verbose_name="show in sidebar"),
),
(
"sort_field",
models.CharField(max_length=128, verbose_name="sort field"),
),
(
"sort_reverse",
models.BooleanField(default=False, verbose_name="sort reverse"),
),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to=settings.AUTH_USER_MODEL,
verbose_name="user",
),
),
],
options={
"ordering": ("name",),
"verbose_name": "saved view",
"verbose_name_plural": "saved views",
},
),
migrations.AlterModelOptions(
name="correspondent",
options={
"ordering": ("name",),
"verbose_name": "correspondent",
"verbose_name_plural": "correspondents",
},
),
migrations.AlterModelOptions(
name="document",
options={
"ordering": ("-created",),
"verbose_name": "document",
"verbose_name_plural": "documents",
},
),
migrations.AlterModelOptions(
name="documenttype",
options={
"verbose_name": "document type",
"verbose_name_plural": "document types",
},
),
migrations.AlterModelOptions(
name="log",
options={
"ordering": ("-created",),
"verbose_name": "log",
"verbose_name_plural": "logs",
},
),
migrations.AlterModelOptions(
name="tag",
options={"verbose_name": "tag", "verbose_name_plural": "tags"},
),
migrations.AlterField(
model_name="correspondent",
name="is_insensitive",
field=models.BooleanField(default=True, verbose_name="is insensitive"),
),
migrations.AlterField(
model_name="correspondent",
name="match",
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
),
migrations.AlterField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="correspondent",
name="name",
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
),
migrations.AlterField(
model_name="document",
name="added",
field=models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
editable=False,
verbose_name="added",
),
),
migrations.AlterField(
model_name="document",
name="archive_checksum",
field=models.CharField(
blank=True,
editable=False,
help_text="The checksum of the archived document.",
max_length=32,
null=True,
verbose_name="archive checksum",
),
),
migrations.AlterField(
model_name="document",
name="archive_serial_number",
field=models.IntegerField(
blank=True,
db_index=True,
help_text="The position of this document in your physical document archive.",
null=True,
unique=True,
verbose_name="archive serial number",
),
),
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
help_text="The checksum of the original document.",
max_length=32,
unique=True,
verbose_name="checksum",
),
),
migrations.AlterField(
model_name="document",
name="content",
field=models.TextField(
blank=True,
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
verbose_name="content",
),
),
migrations.AlterField(
model_name="document",
name="correspondent",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.correspondent",
verbose_name="correspondent",
),
),
migrations.AlterField(
model_name="document",
name="created",
field=models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
verbose_name="created",
),
),
migrations.AlterField(
model_name="document",
name="document_type",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.documenttype",
verbose_name="document type",
),
),
migrations.AlterField(
model_name="document",
name="filename",
field=models.FilePathField(
default=None,
editable=False,
help_text="Current filename in storage",
max_length=1024,
null=True,
verbose_name="filename",
),
),
migrations.AlterField(
model_name="document",
name="mime_type",
field=models.CharField(
editable=False,
max_length=256,
verbose_name="mime type",
),
),
migrations.AlterField(
model_name="document",
name="modified",
field=models.DateTimeField(
auto_now=True,
db_index=True,
verbose_name="modified",
),
),
migrations.AlterField(
model_name="document",
name="storage_type",
field=models.CharField(
choices=[
("unencrypted", "Unencrypted"),
("gpg", "Encrypted with GNU Privacy Guard"),
],
default="unencrypted",
editable=False,
max_length=11,
verbose_name="storage type",
),
),
migrations.AlterField(
model_name="document",
name="tags",
field=models.ManyToManyField(
blank=True,
related_name="documents",
to="documents.tag",
verbose_name="tags",
),
),
migrations.AlterField(
model_name="document",
name="title",
field=models.CharField(
blank=True,
db_index=True,
max_length=128,
verbose_name="title",
),
),
migrations.AlterField(
model_name="documenttype",
name="is_insensitive",
field=models.BooleanField(default=True, verbose_name="is insensitive"),
),
migrations.AlterField(
model_name="documenttype",
name="match",
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
),
migrations.AlterField(
model_name="documenttype",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="documenttype",
name="name",
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
),
migrations.AlterField(
model_name="log",
name="created",
field=models.DateTimeField(auto_now_add=True, verbose_name="created"),
),
migrations.AlterField(
model_name="log",
name="group",
field=models.UUIDField(blank=True, null=True, verbose_name="group"),
),
migrations.AlterField(
model_name="log",
name="level",
field=models.PositiveIntegerField(
choices=[
(10, "debug"),
(20, "information"),
(30, "warning"),
(40, "error"),
(50, "critical"),
],
default=20,
verbose_name="level",
),
),
migrations.AlterField(
model_name="log",
name="message",
field=models.TextField(verbose_name="message"),
),
migrations.CreateModel(
name="SavedViewFilterRule",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"rule_type",
models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
],
verbose_name="rule type",
),
),
(
"value",
models.CharField(
blank=True,
max_length=128,
null=True,
verbose_name="value",
),
),
(
"saved_view",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="filter_rules",
to="documents.savedview",
verbose_name="saved view",
),
),
],
options={
"verbose_name": "filter rule",
"verbose_name_plural": "filter rules",
},
),
migrations.AlterField(
model_name="tag",
name="colour",
field=models.PositiveIntegerField(
choices=[
(1, "#a6cee3"),
(2, "#1f78b4"),
(3, "#b2df8a"),
(4, "#33a02c"),
(5, "#fb9a99"),
(6, "#e31a1c"),
(7, "#fdbf6f"),
(8, "#ff7f00"),
(9, "#cab2d6"),
(10, "#6a3d9a"),
(11, "#b15928"),
(12, "#000000"),
(13, "#cccccc"),
],
default=1,
verbose_name="color",
),
),
migrations.AlterField(
model_name="tag",
name="is_inbox_tag",
field=models.BooleanField(
default=False,
help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.",
verbose_name="is inbox tag",
),
),
migrations.AlterField(
model_name="tag",
name="is_insensitive",
field=models.BooleanField(default=True, verbose_name="is insensitive"),
),
migrations.AlterField(
model_name="tag",
name="match",
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
),
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="tag",
name="name",
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
),
]

View File

@@ -1,90 +0,0 @@
# Generated by Django 3.1.4 on 2020-12-12 14:41
import django.db.models.deletion
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("documents", "1006_auto_20201208_2209"),
]
operations = [
migrations.CreateModel(
name="SavedView",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=128)),
("show_on_dashboard", models.BooleanField()),
("show_in_sidebar", models.BooleanField()),
("sort_field", models.CharField(max_length=128)),
("sort_reverse", models.BooleanField(default=False)),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to=settings.AUTH_USER_MODEL,
),
),
],
),
migrations.CreateModel(
name="SavedViewFilterRule",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"rule_type",
models.PositiveIntegerField(
choices=[
(0, "Title contains"),
(1, "Content contains"),
(2, "ASN is"),
(3, "Correspondent is"),
(4, "Document type is"),
(5, "Is in inbox"),
(6, "Has tag"),
(7, "Has any tag"),
(8, "Created before"),
(9, "Created after"),
(10, "Created year is"),
(11, "Created month is"),
(12, "Created day is"),
(13, "Added before"),
(14, "Added after"),
(15, "Modified before"),
(16, "Modified after"),
(17, "Does not have tag"),
],
),
),
("value", models.CharField(max_length=128)),
(
"saved_view",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="filter_rules",
to="documents.savedview",
),
),
],
),
]

View File

@@ -1,33 +0,0 @@
# Generated by Django 3.1.4 on 2020-12-16 17:36
import django.db.models.functions.text
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "1007_savedview_savedviewfilterrule"),
]
operations = [
migrations.AlterModelOptions(
name="correspondent",
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
),
migrations.AlterModelOptions(
name="document",
options={"ordering": ("-created",)},
),
migrations.AlterModelOptions(
name="documenttype",
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
),
migrations.AlterModelOptions(
name="savedview",
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
),
migrations.AlterModelOptions(
name="tag",
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
),
]

View File

@@ -1,28 +0,0 @@
# Generated by Django 3.1.4 on 2020-12-16 20:05
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "1008_auto_20201216_1736"),
]
operations = [
migrations.AlterModelOptions(
name="correspondent",
options={"ordering": ("name",)},
),
migrations.AlterModelOptions(
name="documenttype",
options={"ordering": ("name",)},
),
migrations.AlterModelOptions(
name="savedview",
options={"ordering": ("name",)},
),
migrations.AlterModelOptions(
name="tag",
options={"ordering": ("name",)},
),
]

View File

@@ -1,18 +0,0 @@
# Generated by Django 3.1.4 on 2021-01-01 21:59
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1009_auto_20201216_2005"),
]
operations = [
migrations.AlterField(
model_name="savedviewfilterrule",
name="value",
field=models.CharField(blank=True, max_length=128, null=True),
),
]

View File

@@ -1,454 +0,0 @@
# Generated by Django 3.1.4 on 2021-01-01 23:40
import django.db.models.deletion
import django.utils.timezone
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("documents", "1010_auto_20210101_2159"),
]
operations = [
migrations.AlterModelOptions(
name="correspondent",
options={
"ordering": ("name",),
"verbose_name": "correspondent",
"verbose_name_plural": "correspondents",
},
),
migrations.AlterModelOptions(
name="document",
options={
"ordering": ("-created",),
"verbose_name": "document",
"verbose_name_plural": "documents",
},
),
migrations.AlterModelOptions(
name="documenttype",
options={
"verbose_name": "document type",
"verbose_name_plural": "document types",
},
),
migrations.AlterModelOptions(
name="log",
options={
"ordering": ("-created",),
"verbose_name": "log",
"verbose_name_plural": "logs",
},
),
migrations.AlterModelOptions(
name="savedview",
options={
"ordering": ("name",),
"verbose_name": "saved view",
"verbose_name_plural": "saved views",
},
),
migrations.AlterModelOptions(
name="savedviewfilterrule",
options={
"verbose_name": "filter rule",
"verbose_name_plural": "filter rules",
},
),
migrations.AlterModelOptions(
name="tag",
options={"verbose_name": "tag", "verbose_name_plural": "tags"},
),
migrations.AlterField(
model_name="correspondent",
name="is_insensitive",
field=models.BooleanField(default=True, verbose_name="is insensitive"),
),
migrations.AlterField(
model_name="correspondent",
name="match",
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
),
migrations.AlterField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="correspondent",
name="name",
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
),
migrations.AlterField(
model_name="document",
name="added",
field=models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
editable=False,
verbose_name="added",
),
),
migrations.AlterField(
model_name="document",
name="archive_checksum",
field=models.CharField(
blank=True,
editable=False,
help_text="The checksum of the archived document.",
max_length=32,
null=True,
verbose_name="archive checksum",
),
),
migrations.AlterField(
model_name="document",
name="archive_serial_number",
field=models.IntegerField(
blank=True,
db_index=True,
help_text="The position of this document in your physical document archive.",
null=True,
unique=True,
verbose_name="archive serial number",
),
),
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
help_text="The checksum of the original document.",
max_length=32,
unique=True,
verbose_name="checksum",
),
),
migrations.AlterField(
model_name="document",
name="content",
field=models.TextField(
blank=True,
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
verbose_name="content",
),
),
migrations.AlterField(
model_name="document",
name="correspondent",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.correspondent",
verbose_name="correspondent",
),
),
migrations.AlterField(
model_name="document",
name="created",
field=models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
verbose_name="created",
),
),
migrations.AlterField(
model_name="document",
name="document_type",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.documenttype",
verbose_name="document type",
),
),
migrations.AlterField(
model_name="document",
name="filename",
field=models.FilePathField(
default=None,
editable=False,
help_text="Current filename in storage",
max_length=1024,
null=True,
verbose_name="filename",
),
),
migrations.AlterField(
model_name="document",
name="mime_type",
field=models.CharField(
editable=False,
max_length=256,
verbose_name="mime type",
),
),
migrations.AlterField(
model_name="document",
name="modified",
field=models.DateTimeField(
auto_now=True,
db_index=True,
verbose_name="modified",
),
),
migrations.AlterField(
model_name="document",
name="storage_type",
field=models.CharField(
choices=[
("unencrypted", "Unencrypted"),
("gpg", "Encrypted with GNU Privacy Guard"),
],
default="unencrypted",
editable=False,
max_length=11,
verbose_name="storage type",
),
),
migrations.AlterField(
model_name="document",
name="tags",
field=models.ManyToManyField(
blank=True,
related_name="documents",
to="documents.Tag",
verbose_name="tags",
),
),
migrations.AlterField(
model_name="document",
name="title",
field=models.CharField(
blank=True,
db_index=True,
max_length=128,
verbose_name="title",
),
),
migrations.AlterField(
model_name="documenttype",
name="is_insensitive",
field=models.BooleanField(default=True, verbose_name="is insensitive"),
),
migrations.AlterField(
model_name="documenttype",
name="match",
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
),
migrations.AlterField(
model_name="documenttype",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="documenttype",
name="name",
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
),
migrations.AlterField(
model_name="log",
name="created",
field=models.DateTimeField(auto_now_add=True, verbose_name="created"),
),
migrations.AlterField(
model_name="log",
name="group",
field=models.UUIDField(blank=True, null=True, verbose_name="group"),
),
migrations.AlterField(
model_name="log",
name="level",
field=models.PositiveIntegerField(
choices=[
(10, "debug"),
(20, "information"),
(30, "warning"),
(40, "error"),
(50, "critical"),
],
default=20,
verbose_name="level",
),
),
migrations.AlterField(
model_name="log",
name="message",
field=models.TextField(verbose_name="message"),
),
migrations.AlterField(
model_name="savedview",
name="name",
field=models.CharField(max_length=128, verbose_name="name"),
),
migrations.AlterField(
model_name="savedview",
name="show_in_sidebar",
field=models.BooleanField(verbose_name="show in sidebar"),
),
migrations.AlterField(
model_name="savedview",
name="show_on_dashboard",
field=models.BooleanField(verbose_name="show on dashboard"),
),
migrations.AlterField(
model_name="savedview",
name="sort_field",
field=models.CharField(max_length=128, verbose_name="sort field"),
),
migrations.AlterField(
model_name="savedview",
name="sort_reverse",
field=models.BooleanField(default=False, verbose_name="sort reverse"),
),
migrations.AlterField(
model_name="savedview",
name="user",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to=settings.AUTH_USER_MODEL,
verbose_name="user",
),
),
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
],
verbose_name="rule type",
),
),
migrations.AlterField(
model_name="savedviewfilterrule",
name="saved_view",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="filter_rules",
to="documents.savedview",
verbose_name="saved view",
),
),
migrations.AlterField(
model_name="savedviewfilterrule",
name="value",
field=models.CharField(
blank=True,
max_length=128,
null=True,
verbose_name="value",
),
),
migrations.AlterField(
model_name="tag",
name="colour",
field=models.PositiveIntegerField(
choices=[
(1, "#a6cee3"),
(2, "#1f78b4"),
(3, "#b2df8a"),
(4, "#33a02c"),
(5, "#fb9a99"),
(6, "#e31a1c"),
(7, "#fdbf6f"),
(8, "#ff7f00"),
(9, "#cab2d6"),
(10, "#6a3d9a"),
(11, "#b15928"),
(12, "#000000"),
(13, "#cccccc"),
],
default=1,
verbose_name="color",
),
),
migrations.AlterField(
model_name="tag",
name="is_inbox_tag",
field=models.BooleanField(
default=False,
help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.",
verbose_name="is inbox tag",
),
),
migrations.AlterField(
model_name="tag",
name="is_insensitive",
field=models.BooleanField(default=True, verbose_name="is insensitive"),
),
migrations.AlterField(
model_name="tag",
name="match",
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
),
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="tag",
name="name",
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
),
]

View File

@@ -1,367 +0,0 @@
# Generated by Django 3.1.6 on 2021-02-07 22:26
import datetime
import hashlib
import logging
import os
import shutil
from collections import defaultdict
from pathlib import Path
from time import sleep
import pathvalidate
from django.conf import settings
from django.db import migrations
from django.db import models
from django.template.defaultfilters import slugify
logger = logging.getLogger("paperless.migrations")
###############################################################################
# This is code copied straight paperless before the change.
###############################################################################
class defaultdictNoStr(defaultdict):
def __str__(self): # pragma: no cover
raise ValueError("Don't use {tags} directly.")
def many_to_dictionary(field): # pragma: no cover
# Converts ManyToManyField to dictionary by assuming, that field
# entries contain an _ or - which will be used as a delimiter
mydictionary = dict()
for index, t in enumerate(field.all()):
# Populate tag names by index
mydictionary[index] = slugify(t.name)
# Find delimiter
delimiter = t.name.find("_")
if delimiter == -1:
delimiter = t.name.find("-")
if delimiter == -1:
continue
key = t.name[:delimiter]
value = t.name[delimiter + 1 :]
mydictionary[slugify(key)] = slugify(value)
return mydictionary
def archive_name_from_filename(filename: Path) -> Path:
return Path(filename.stem + ".pdf")
def archive_path_old(doc) -> Path:
if doc.filename:
fname = archive_name_from_filename(Path(doc.filename))
else:
fname = Path(f"{doc.pk:07}.pdf")
return settings.ARCHIVE_DIR / fname
STORAGE_TYPE_GPG = "gpg"
def archive_path_new(doc) -> Path | None:
if doc.archive_filename is not None:
return settings.ARCHIVE_DIR / doc.archive_filename
else:
return None
def source_path(doc) -> Path:
if doc.filename:
fname = doc.filename
else:
fname = f"{doc.pk:07}{doc.file_type}"
if doc.storage_type == STORAGE_TYPE_GPG:
fname = Path(str(fname) + ".gpg") # pragma: no cover
return settings.ORIGINALS_DIR / fname
def generate_unique_filename(doc, *, archive_filename=False):
if archive_filename:
old_filename = doc.archive_filename
root = settings.ARCHIVE_DIR
else:
old_filename = doc.filename
root = settings.ORIGINALS_DIR
counter = 0
while True:
new_filename = generate_filename(
doc,
counter=counter,
archive_filename=archive_filename,
)
if new_filename == old_filename:
# still the same as before.
return new_filename
if (root / new_filename).exists():
counter += 1
else:
return new_filename
def generate_filename(doc, *, counter=0, append_gpg=True, archive_filename=False):
path = ""
try:
if settings.FILENAME_FORMAT is not None:
tags = defaultdictNoStr(lambda: slugify(None), many_to_dictionary(doc.tags))
tag_list = pathvalidate.sanitize_filename(
",".join(sorted([tag.name for tag in doc.tags.all()])),
replacement_text="-",
)
if doc.correspondent:
correspondent = pathvalidate.sanitize_filename(
doc.correspondent.name,
replacement_text="-",
)
else:
correspondent = "none"
if doc.document_type:
document_type = pathvalidate.sanitize_filename(
doc.document_type.name,
replacement_text="-",
)
else:
document_type = "none"
path = settings.FILENAME_FORMAT.format(
title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
created=datetime.date.isoformat(doc.created),
created_year=doc.created.year if doc.created else "none",
created_month=f"{doc.created.month:02}" if doc.created else "none",
created_day=f"{doc.created.day:02}" if doc.created else "none",
added=datetime.date.isoformat(doc.added),
added_year=doc.added.year if doc.added else "none",
added_month=f"{doc.added.month:02}" if doc.added else "none",
added_day=f"{doc.added.day:02}" if doc.added else "none",
tags=tags,
tag_list=tag_list,
).strip()
path = path.strip(os.sep)
except (ValueError, KeyError, IndexError):
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.FILENAME_FORMAT}, falling back to default",
)
counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type
if len(path) > 0:
filename = f"{path}{counter_str}{filetype_str}"
else:
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
# Append .gpg for encrypted files
if append_gpg and doc.storage_type == STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
###############################################################################
# This code performs bidirection archive file transformation.
###############################################################################
def parse_wrapper(parser, path, mime_type, file_name):
# this is here so that I can mock this out for testing.
parser.parse(path, mime_type, file_name)
def create_archive_version(doc, retry_count=3):
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import get_parser_class_for_mime_type
logger.info(f"Regenerating archive document for document ID:{doc.id}")
parser_class = get_parser_class_for_mime_type(doc.mime_type)
for try_num in range(retry_count):
parser: DocumentParser = parser_class(None, None)
try:
parse_wrapper(
parser,
source_path(doc),
doc.mime_type,
Path(doc.filename).name,
)
doc.content = parser.get_text()
if parser.get_archive_path() and Path(parser.get_archive_path()).is_file():
doc.archive_filename = generate_unique_filename(
doc,
archive_filename=True,
)
with Path(parser.get_archive_path()).open("rb") as f:
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
archive_path_new(doc).parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
else:
doc.archive_checksum = None
logger.error(
f"Parser did not return an archive document for document "
f"ID:{doc.id}. Removing archive document.",
)
doc.save()
return
except ParseError:
if try_num + 1 == retry_count:
logger.exception(
f"Unable to regenerate archive document for ID:{doc.id}. You "
f"need to invoke the document_archiver management command "
f"manually for that document.",
)
doc.archive_checksum = None
doc.save()
return
else:
# This is mostly here for the tika parser in docker
# environments. The servers for parsing need to come up first,
# and the docker setup doesn't ensure that tika is running
# before attempting migrations.
logger.error("Parse error, will try again in 5 seconds...")
sleep(5)
finally:
parser.cleanup()
def move_old_to_new_locations(apps, schema_editor):
Document = apps.get_model("documents", "Document")
affected_document_ids = set()
old_archive_path_to_id = {}
# check for documents that have incorrect archive versions
for doc in Document.objects.filter(archive_checksum__isnull=False):
old_path = archive_path_old(doc)
if old_path in old_archive_path_to_id:
affected_document_ids.add(doc.id)
affected_document_ids.add(old_archive_path_to_id[old_path])
else:
old_archive_path_to_id[old_path] = doc.id
# check that archive files of all unaffected documents are in place
for doc in Document.objects.filter(archive_checksum__isnull=False):
old_path = archive_path_old(doc)
if doc.id not in affected_document_ids and not old_path.is_file():
raise ValueError(
f"Archived document ID:{doc.id} does not exist at: {old_path}",
)
# check that we can regenerate affected archive versions
for doc_id in affected_document_ids:
from documents.parsers import get_parser_class_for_mime_type
doc = Document.objects.get(id=doc_id)
parser_class = get_parser_class_for_mime_type(doc.mime_type)
if not parser_class:
raise ValueError(
f"Document ID:{doc.id} has an invalid archived document, "
f"but no parsers are available. Cannot migrate.",
)
for doc in Document.objects.filter(archive_checksum__isnull=False):
if doc.id in affected_document_ids:
old_path = archive_path_old(doc)
# remove affected archive versions
if old_path.is_file():
logger.debug(f"Removing {old_path}")
old_path.unlink()
else:
# Set archive path for unaffected files
doc.archive_filename = archive_name_from_filename(Path(doc.filename))
Document.objects.filter(id=doc.id).update(
archive_filename=doc.archive_filename,
)
# regenerate archive documents
for doc_id in affected_document_ids:
doc = Document.objects.get(id=doc_id)
create_archive_version(doc)
def move_new_to_old_locations(apps, schema_editor):
Document = apps.get_model("documents", "Document")
old_archive_paths = set()
for doc in Document.objects.filter(archive_checksum__isnull=False):
new_archive_path = archive_path_new(doc)
old_archive_path = archive_path_old(doc)
if old_archive_path in old_archive_paths:
raise ValueError(
f"Cannot migrate: Archive file name {old_archive_path} of "
f"document {doc.filename} would clash with another archive "
f"filename.",
)
old_archive_paths.add(old_archive_path)
if new_archive_path != old_archive_path and old_archive_path.is_file():
raise ValueError(
f"Cannot migrate: Cannot move {new_archive_path} to "
f"{old_archive_path}: file already exists.",
)
for doc in Document.objects.filter(archive_checksum__isnull=False):
new_archive_path = archive_path_new(doc)
old_archive_path = archive_path_old(doc)
if new_archive_path != old_archive_path:
logger.debug(f"Moving {new_archive_path} to {old_archive_path}")
shutil.move(new_archive_path, old_archive_path)
class Migration(migrations.Migration):
dependencies = [
("documents", "1011_auto_20210101_2340"),
]
operations = [
migrations.AddField(
model_name="document",
name="archive_filename",
field=models.FilePathField(
default=None,
editable=False,
help_text="Current archive filename in storage",
max_length=1024,
null=True,
unique=True,
verbose_name="archive filename",
),
),
migrations.AlterField(
model_name="document",
name="filename",
field=models.FilePathField(
default=None,
editable=False,
help_text="Current filename in storage",
max_length=1024,
null=True,
unique=True,
verbose_name="filename",
),
),
migrations.RunPython(move_old_to_new_locations, move_new_to_old_locations),
]

View File

@@ -1,74 +0,0 @@
# Generated by Django 3.1.4 on 2020-12-02 21:43
from django.db import migrations
from django.db import models
COLOURS_OLD = {
1: "#a6cee3",
2: "#1f78b4",
3: "#b2df8a",
4: "#33a02c",
5: "#fb9a99",
6: "#e31a1c",
7: "#fdbf6f",
8: "#ff7f00",
9: "#cab2d6",
10: "#6a3d9a",
11: "#b15928",
12: "#000000",
13: "#cccccc",
}
def forward(apps, schema_editor):
Tag = apps.get_model("documents", "Tag")
for tag in Tag.objects.all():
colour_old_id = tag.colour_old
rgb = COLOURS_OLD[colour_old_id]
tag.color = rgb
tag.save()
def reverse(apps, schema_editor):
Tag = apps.get_model("documents", "Tag")
def _get_colour_id(rdb):
for idx, rdbx in COLOURS_OLD.items():
if rdbx == rdb:
return idx
# Return colour 1 if we can't match anything
return 1
for tag in Tag.objects.all():
colour_id = _get_colour_id(tag.color)
tag.colour_old = colour_id
tag.save()
class Migration(migrations.Migration):
dependencies = [
("documents", "1012_fix_archive_files"),
]
operations = [
migrations.RenameField(
model_name="tag",
old_name="colour",
new_name="colour_old",
),
migrations.AddField(
model_name="tag",
name="color",
field=models.CharField(
default="#a6cee3",
max_length=7,
verbose_name="color",
),
),
migrations.RunPython(forward, reverse),
migrations.RemoveField(
model_name="tag",
name="colour_old",
),
]

View File

@@ -1,42 +0,0 @@
# Generated by Django 3.1.7 on 2021-02-28 15:14
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1013_migrate_tag_colour"),
]
operations = [
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
],
verbose_name="rule type",
),
),
]

View File

@@ -1,27 +0,0 @@
# Generated by Django 3.1.7 on 2021-04-04 18:28
import logging
from django.db import migrations
logger = logging.getLogger("paperless.migrations")
def remove_null_characters(apps, schema_editor):
Document = apps.get_model("documents", "Document")
for doc in Document.objects.all():
content: str = doc.content
if "\0" in content:
logger.info(f"Removing null characters from document {doc}...")
doc.content = content.replace("\0", " ")
doc.save()
class Migration(migrations.Migration):
dependencies = [
("documents", "1014_auto_20210228_1614"),
]
operations = [
migrations.RunPython(remove_null_characters, migrations.RunPython.noop),
]

View File

@@ -1,54 +0,0 @@
# Generated by Django 3.1.7 on 2021-03-17 12:51
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1015_remove_null_characters"),
]
operations = [
migrations.AlterField(
model_name="savedview",
name="sort_field",
field=models.CharField(
blank=True,
max_length=128,
null=True,
verbose_name="sort field",
),
),
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
(20, "fulltext query"),
(21, "more like this"),
],
verbose_name="rule type",
),
),
]

View File

@@ -1,190 +0,0 @@
# Generated by Django 4.2.13 on 2024-06-28 18:09
import django.db.models.deletion
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
replaces = [
("documents", "1016_auto_20210317_1351"),
("documents", "1017_alter_savedviewfilterrule_rule_type"),
("documents", "1018_alter_savedviewfilterrule_value"),
("documents", "1019_uisettings"),
("documents", "1019_storagepath_document_storage_path"),
("documents", "1020_merge_20220518_1839"),
]
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("documents", "1015_remove_null_characters"),
]
operations = [
migrations.AlterField(
model_name="savedview",
name="sort_field",
field=models.CharField(
blank=True,
max_length=128,
null=True,
verbose_name="sort field",
),
),
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
(20, "fulltext query"),
(21, "more like this"),
],
verbose_name="rule type",
),
),
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
(20, "fulltext query"),
(21, "more like this"),
(22, "has tags in"),
],
verbose_name="rule type",
),
),
migrations.AlterField(
model_name="savedviewfilterrule",
name="value",
field=models.CharField(
blank=True,
max_length=255,
null=True,
verbose_name="value",
),
),
migrations.CreateModel(
name="UiSettings",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("settings", models.JSONField(null=True)),
(
"user",
models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
related_name="ui_settings",
to=settings.AUTH_USER_MODEL,
),
),
],
),
migrations.CreateModel(
name="StoragePath",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"name",
models.CharField(max_length=128, unique=True, verbose_name="name"),
),
(
"match",
models.CharField(blank=True, max_length=256, verbose_name="match"),
),
(
"matching_algorithm",
models.PositiveIntegerField(
choices=[
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
(
"is_insensitive",
models.BooleanField(default=True, verbose_name="is insensitive"),
),
("path", models.CharField(max_length=512, verbose_name="path")),
],
options={
"verbose_name": "storage path",
"verbose_name_plural": "storage paths",
"ordering": ("name",),
},
),
migrations.AddField(
model_name="document",
name="storage_path",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.storagepath",
verbose_name="storage path",
),
),
]

View File

@@ -1,45 +0,0 @@
# Generated by Django 3.2.12 on 2022-03-17 11:59
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1016_auto_20210317_1351"),
]
operations = [
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
(20, "fulltext query"),
(21, "more like this"),
(22, "has tags in"),
],
verbose_name="rule type",
),
),
]

View File

@@ -1,23 +0,0 @@
# Generated by Django 4.0.3 on 2022-04-01 22:50
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1017_alter_savedviewfilterrule_rule_type"),
]
operations = [
migrations.AlterField(
model_name="savedviewfilterrule",
name="value",
field=models.CharField(
blank=True,
max_length=255,
null=True,
verbose_name="value",
),
),
]

View File

@@ -1,73 +0,0 @@
# Generated by Django 4.0.4 on 2022-05-02 15:56
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1018_alter_savedviewfilterrule_value"),
]
operations = [
migrations.CreateModel(
name="StoragePath",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"name",
models.CharField(max_length=128, unique=True, verbose_name="name"),
),
(
"match",
models.CharField(blank=True, max_length=256, verbose_name="match"),
),
(
"matching_algorithm",
models.PositiveIntegerField(
choices=[
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
(
"is_insensitive",
models.BooleanField(default=True, verbose_name="is insensitive"),
),
("path", models.CharField(max_length=512, verbose_name="path")),
],
options={
"verbose_name": "storage path",
"verbose_name_plural": "storage paths",
"ordering": ("name",),
},
),
migrations.AddField(
model_name="document",
name="storage_path",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.storagepath",
verbose_name="storage path",
),
),
]

View File

@@ -1,39 +0,0 @@
# Generated by Django 4.0.4 on 2022-05-07 05:10
import django.db.models.deletion
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("documents", "1018_alter_savedviewfilterrule_value"),
]
operations = [
migrations.CreateModel(
name="UiSettings",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("settings", models.JSONField(null=True)),
(
"user",
models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
related_name="ui_settings",
to=settings.AUTH_USER_MODEL,
),
),
],
),
]

View File

@@ -1,12 +0,0 @@
# Generated by Django 4.0.4 on 2022-05-18 18:39
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "1019_storagepath_document_storage_path"),
("documents", "1019_uisettings"),
]
operations = []

View File

@@ -1,104 +0,0 @@
# Generated by Django 4.0.5 on 2022-06-11 15:40
import logging
import multiprocessing.pool
import shutil
import tempfile
import time
from pathlib import Path
from django.conf import settings
from django.db import migrations
from documents.parsers import run_convert
logger = logging.getLogger("paperless.migrations")
def _do_convert(work_package):
existing_thumbnail, converted_thumbnail = work_package
try:
logger.info(f"Converting thumbnail: {existing_thumbnail}")
# Run actual conversion
run_convert(
density=300,
scale="500x5000>",
alpha="remove",
strip=True,
trim=False,
auto_orient=True,
input_file=f"{existing_thumbnail}[0]",
output_file=str(converted_thumbnail),
)
# Copy newly created thumbnail to thumbnail directory
shutil.copy(converted_thumbnail, existing_thumbnail.parent)
# Remove the PNG version
existing_thumbnail.unlink()
logger.info(
"Conversion to WebP completed, "
f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}",
)
except Exception as e:
logger.error(f"Error converting thumbnail (existing file unchanged): {e}")
def _convert_thumbnails_to_webp(apps, schema_editor):
start = time.time()
with tempfile.TemporaryDirectory() as tempdir:
work_packages = []
for file in Path(settings.THUMBNAIL_DIR).glob("*.png"):
existing_thumbnail = file.resolve()
# Change the existing filename suffix from png to webp
converted_thumbnail_name = existing_thumbnail.with_suffix(
".webp",
).name
# Create the expected output filename in the tempdir
converted_thumbnail = (
Path(tempdir) / Path(converted_thumbnail_name)
).resolve()
# Package up the necessary info
work_packages.append(
(existing_thumbnail, converted_thumbnail),
)
if work_packages:
logger.info(
"\n\n"
" This is a one-time only migration to convert thumbnails for all of your\n"
" documents into WebP format. If you have a lot of documents though, \n"
" this may take a while, so a coffee break may be in order."
"\n",
)
with multiprocessing.pool.Pool(
processes=min(multiprocessing.cpu_count(), 4),
maxtasksperchild=4,
) as pool:
pool.map(_do_convert, work_packages)
end = time.time()
duration = end - start
logger.info(f"Conversion completed in {duration:.3f}s")
class Migration(migrations.Migration):
dependencies = [
("documents", "1020_merge_20220518_1839"),
]
operations = [
migrations.RunPython(
code=_convert_thumbnails_to_webp,
reverse_code=migrations.RunPython.noop,
),
]

View File

@@ -1,52 +0,0 @@
# Generated by Django 4.0.4 on 2022-05-23 07:14
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1021_webp_thumbnail_conversion"),
]
operations = [
migrations.CreateModel(
name="PaperlessTask",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("task_id", models.CharField(max_length=128)),
("name", models.CharField(max_length=256, null=True)),
(
"created",
models.DateTimeField(auto_now=True, verbose_name="created"),
),
(
"started",
models.DateTimeField(null=True, verbose_name="started"),
),
("acknowledged", models.BooleanField(default=False)),
(
"attempted_task",
models.OneToOneField(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="attempted_task",
# This is a dummy field, 1026 will fix up the column
# This manual change is required, as django doesn't django doesn't really support
# removing an app which has migration deps like this
to="documents.document",
),
),
],
),
]

View File

@@ -1,668 +0,0 @@
# Generated by Django 4.2.13 on 2024-06-28 18:10
import django.core.validators
import django.db.models.deletion
import django.utils.timezone
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
replaces = [
("documents", "1022_paperlesstask"),
("documents", "1023_add_comments"),
("documents", "1024_document_original_filename"),
("documents", "1025_alter_savedviewfilterrule_rule_type"),
("documents", "1026_transition_to_celery"),
("documents", "1027_remove_paperlesstask_attempted_task_and_more"),
("documents", "1028_remove_paperlesstask_task_args_and_more"),
("documents", "1029_alter_document_archive_serial_number"),
("documents", "1030_alter_paperlesstask_task_file_name"),
("documents", "1031_remove_savedview_user_correspondent_owner_and_more"),
("documents", "1032_alter_correspondent_matching_algorithm_and_more"),
("documents", "1033_alter_documenttype_options_alter_tag_options_and_more"),
("documents", "1034_alter_savedviewfilterrule_rule_type"),
("documents", "1035_rename_comment_note"),
("documents", "1036_alter_savedviewfilterrule_rule_type"),
]
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("django_celery_results", "0011_taskresult_periodic_task_name"),
("documents", "1021_webp_thumbnail_conversion"),
]
operations = [
migrations.CreateModel(
name="Comment",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"comment",
models.TextField(
blank=True,
help_text="Comment for the document",
verbose_name="content",
),
),
(
"created",
models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
verbose_name="created",
),
),
(
"document",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="documents",
to="documents.document",
verbose_name="document",
),
),
(
"user",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="users",
to=settings.AUTH_USER_MODEL,
verbose_name="user",
),
),
],
options={
"verbose_name": "comment",
"verbose_name_plural": "comments",
"ordering": ("created",),
},
),
migrations.AddField(
model_name="document",
name="original_filename",
field=models.CharField(
default=None,
editable=False,
help_text="The original name of the file when it was uploaded",
max_length=1024,
null=True,
verbose_name="original filename",
),
),
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
(20, "fulltext query"),
(21, "more like this"),
(22, "has tags in"),
(23, "ASN greater than"),
(24, "ASN less than"),
(25, "storage path is"),
],
verbose_name="rule type",
),
),
migrations.CreateModel(
name="PaperlessTask",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("task_id", models.CharField(max_length=128)),
("acknowledged", models.BooleanField(default=False)),
(
"attempted_task",
models.OneToOneField(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="attempted_task",
to="django_celery_results.taskresult",
),
),
],
),
migrations.RunSQL(
sql="DROP TABLE IF EXISTS django_q_ormq",
reverse_sql="",
),
migrations.RunSQL(
sql="DROP TABLE IF EXISTS django_q_schedule",
reverse_sql="",
),
migrations.RunSQL(
sql="DROP TABLE IF EXISTS django_q_task",
reverse_sql="",
),
migrations.RemoveField(
model_name="paperlesstask",
name="attempted_task",
),
migrations.AddField(
model_name="paperlesstask",
name="date_created",
field=models.DateTimeField(
default=django.utils.timezone.now,
help_text="Datetime field when the task result was created in UTC",
null=True,
verbose_name="Created DateTime",
),
),
migrations.AddField(
model_name="paperlesstask",
name="date_done",
field=models.DateTimeField(
default=None,
help_text="Datetime field when the task was completed in UTC",
null=True,
verbose_name="Completed DateTime",
),
),
migrations.AddField(
model_name="paperlesstask",
name="date_started",
field=models.DateTimeField(
default=None,
help_text="Datetime field when the task was started in UTC",
null=True,
verbose_name="Started DateTime",
),
),
migrations.AddField(
model_name="paperlesstask",
name="result",
field=models.TextField(
default=None,
help_text="The data returned by the task",
null=True,
verbose_name="Result Data",
),
),
migrations.AddField(
model_name="paperlesstask",
name="status",
field=models.CharField(
choices=[
("FAILURE", "FAILURE"),
("PENDING", "PENDING"),
("RECEIVED", "RECEIVED"),
("RETRY", "RETRY"),
("REVOKED", "REVOKED"),
("STARTED", "STARTED"),
("SUCCESS", "SUCCESS"),
],
default="PENDING",
help_text="Current state of the task being run",
max_length=30,
verbose_name="Task State",
),
),
migrations.AddField(
model_name="paperlesstask",
name="task_name",
field=models.CharField(
help_text="Name of the Task which was run",
max_length=255,
null=True,
verbose_name="Task Name",
),
),
migrations.AlterField(
model_name="paperlesstask",
name="acknowledged",
field=models.BooleanField(
default=False,
help_text="If the task is acknowledged via the frontend or API",
verbose_name="Acknowledged",
),
),
migrations.AlterField(
model_name="paperlesstask",
name="task_id",
field=models.CharField(
help_text="Celery ID for the Task that was run",
max_length=255,
unique=True,
verbose_name="Task ID",
),
),
migrations.AlterField(
model_name="document",
name="archive_serial_number",
field=models.PositiveIntegerField(
blank=True,
db_index=True,
help_text="The position of this document in your physical document archive.",
null=True,
unique=True,
validators=[
django.core.validators.MaxValueValidator(4294967295),
django.core.validators.MinValueValidator(0),
],
verbose_name="archive serial number",
),
),
migrations.AddField(
model_name="paperlesstask",
name="task_file_name",
field=models.CharField(
help_text="Name of the file which the Task was run for",
max_length=255,
null=True,
verbose_name="Task Filename",
),
),
migrations.RenameField(
model_name="savedview",
old_name="user",
new_name="owner",
),
migrations.AlterField(
model_name="savedview",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="correspondent",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="document",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="documenttype",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="storagepath",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="tag",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AlterField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="documenttype",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="storagepath",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterModelOptions(
name="documenttype",
options={
"ordering": ("name",),
"verbose_name": "document type",
"verbose_name_plural": "document types",
},
),
migrations.AlterModelOptions(
name="tag",
options={
"ordering": ("name",),
"verbose_name": "tag",
"verbose_name_plural": "tags",
},
),
migrations.AlterField(
model_name="correspondent",
name="name",
field=models.CharField(max_length=128, verbose_name="name"),
),
migrations.AlterField(
model_name="documenttype",
name="name",
field=models.CharField(max_length=128, verbose_name="name"),
),
migrations.AlterField(
model_name="storagepath",
name="name",
field=models.CharField(max_length=128, verbose_name="name"),
),
migrations.AlterField(
model_name="tag",
name="name",
field=models.CharField(max_length=128, verbose_name="name"),
),
migrations.AddConstraint(
model_name="correspondent",
constraint=models.UniqueConstraint(
fields=("name", "owner"),
name="documents_correspondent_unique_name_owner",
),
),
migrations.AddConstraint(
model_name="correspondent",
constraint=models.UniqueConstraint(
condition=models.Q(("owner__isnull", True)),
fields=("name",),
name="documents_correspondent_name_uniq",
),
),
migrations.AddConstraint(
model_name="documenttype",
constraint=models.UniqueConstraint(
fields=("name", "owner"),
name="documents_documenttype_unique_name_owner",
),
),
migrations.AddConstraint(
model_name="documenttype",
constraint=models.UniqueConstraint(
condition=models.Q(("owner__isnull", True)),
fields=("name",),
name="documents_documenttype_name_uniq",
),
),
migrations.AddConstraint(
model_name="storagepath",
constraint=models.UniqueConstraint(
fields=("name", "owner"),
name="documents_storagepath_unique_name_owner",
),
),
migrations.AddConstraint(
model_name="storagepath",
constraint=models.UniqueConstraint(
condition=models.Q(("owner__isnull", True)),
fields=("name",),
name="documents_storagepath_name_uniq",
),
),
migrations.AddConstraint(
model_name="tag",
constraint=models.UniqueConstraint(
fields=("name", "owner"),
name="documents_tag_unique_name_owner",
),
),
migrations.AddConstraint(
model_name="tag",
constraint=models.UniqueConstraint(
condition=models.Q(("owner__isnull", True)),
fields=("name",),
name="documents_tag_name_uniq",
),
),
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
(20, "fulltext query"),
(21, "more like this"),
(22, "has tags in"),
(23, "ASN greater than"),
(24, "ASN less than"),
(25, "storage path is"),
(26, "has correspondent in"),
(27, "does not have correspondent in"),
(28, "has document type in"),
(29, "does not have document type in"),
(30, "has storage path in"),
(31, "does not have storage path in"),
],
verbose_name="rule type",
),
),
migrations.RenameModel(
old_name="Comment",
new_name="Note",
),
migrations.RenameField(
model_name="note",
old_name="comment",
new_name="note",
),
migrations.AlterModelOptions(
name="note",
options={
"ordering": ("created",),
"verbose_name": "note",
"verbose_name_plural": "notes",
},
),
migrations.AlterField(
model_name="note",
name="document",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="notes",
to="documents.document",
verbose_name="document",
),
),
migrations.AlterField(
model_name="note",
name="note",
field=models.TextField(
blank=True,
help_text="Note for the document",
verbose_name="content",
),
),
migrations.AlterField(
model_name="note",
name="user",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="notes",
to=settings.AUTH_USER_MODEL,
verbose_name="user",
),
),
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
(20, "fulltext query"),
(21, "more like this"),
(22, "has tags in"),
(23, "ASN greater than"),
(24, "ASN less than"),
(25, "storage path is"),
(26, "has correspondent in"),
(27, "does not have correspondent in"),
(28, "has document type in"),
(29, "does not have document type in"),
(30, "has storage path in"),
(31, "does not have storage path in"),
(32, "owner is"),
(33, "has owner in"),
(34, "does not have owner"),
(35, "does not have owner in"),
],
verbose_name="rule type",
),
),
]

View File

@@ -1,70 +0,0 @@
import django.utils.timezone
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1022_paperlesstask"),
]
operations = [
migrations.CreateModel(
name="Comment",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"comment",
models.TextField(
blank=True,
help_text="Comment for the document",
verbose_name="content",
),
),
(
"created",
models.DateTimeField(
db_index=True,
default=django.utils.timezone.now,
verbose_name="created",
),
),
(
"document",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="documents",
to="documents.document",
verbose_name="document",
),
),
(
"user",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="users",
to=settings.AUTH_USER_MODEL,
verbose_name="user",
),
),
],
options={
"verbose_name": "comment",
"verbose_name_plural": "comments",
"ordering": ("created",),
},
),
]

View File

@@ -1,25 +0,0 @@
# Generated by Django 4.0.6 on 2022-07-25 06:34
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1023_add_comments"),
]
operations = [
migrations.AddField(
model_name="document",
name="original_filename",
field=models.CharField(
default=None,
editable=False,
help_text="The original name of the file when it was uploaded",
max_length=1024,
null=True,
verbose_name="original filename",
),
),
]

View File

@@ -1,48 +0,0 @@
# Generated by Django 4.0.5 on 2022-08-26 16:49
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1024_document_original_filename"),
]
operations = [
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
(20, "fulltext query"),
(21, "more like this"),
(22, "has tags in"),
(23, "ASN greater than"),
(24, "ASN less than"),
(25, "storage path is"),
],
verbose_name="rule type",
),
),
]

View File

@@ -1,60 +0,0 @@
# Generated by Django 4.1.1 on 2022-09-27 19:31
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("django_celery_results", "0011_taskresult_periodic_task_name"),
("documents", "1025_alter_savedviewfilterrule_rule_type"),
]
operations = [
migrations.RemoveField(
model_name="paperlesstask",
name="created",
),
migrations.RemoveField(
model_name="paperlesstask",
name="name",
),
migrations.RemoveField(
model_name="paperlesstask",
name="started",
),
# Remove the field from the model
migrations.RemoveField(
model_name="paperlesstask",
name="attempted_task",
),
# Add the field back, pointing to the correct model
# This resolves a problem where the temporary change in 1022
# results in a type mismatch
migrations.AddField(
model_name="paperlesstask",
name="attempted_task",
field=models.OneToOneField(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="attempted_task",
to="django_celery_results.taskresult",
),
),
# Drop the django-q tables entirely
# Must be done last or there could be references here
migrations.RunSQL(
"DROP TABLE IF EXISTS django_q_ormq",
reverse_sql=migrations.RunSQL.noop,
),
migrations.RunSQL(
"DROP TABLE IF EXISTS django_q_schedule",
reverse_sql=migrations.RunSQL.noop,
),
migrations.RunSQL(
"DROP TABLE IF EXISTS django_q_task",
reverse_sql=migrations.RunSQL.noop,
),
]

View File

@@ -1,134 +0,0 @@
# Generated by Django 4.1.2 on 2022-10-17 16:31
import django.utils.timezone
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1026_transition_to_celery"),
]
operations = [
migrations.RemoveField(
model_name="paperlesstask",
name="attempted_task",
),
migrations.AddField(
model_name="paperlesstask",
name="date_created",
field=models.DateTimeField(
default=django.utils.timezone.now,
help_text="Datetime field when the task result was created in UTC",
null=True,
verbose_name="Created DateTime",
),
),
migrations.AddField(
model_name="paperlesstask",
name="date_done",
field=models.DateTimeField(
default=None,
help_text="Datetime field when the task was completed in UTC",
null=True,
verbose_name="Completed DateTime",
),
),
migrations.AddField(
model_name="paperlesstask",
name="date_started",
field=models.DateTimeField(
default=None,
help_text="Datetime field when the task was started in UTC",
null=True,
verbose_name="Started DateTime",
),
),
migrations.AddField(
model_name="paperlesstask",
name="result",
field=models.TextField(
default=None,
help_text="The data returned by the task",
null=True,
verbose_name="Result Data",
),
),
migrations.AddField(
model_name="paperlesstask",
name="status",
field=models.CharField(
choices=[
("FAILURE", "FAILURE"),
("PENDING", "PENDING"),
("RECEIVED", "RECEIVED"),
("RETRY", "RETRY"),
("REVOKED", "REVOKED"),
("STARTED", "STARTED"),
("SUCCESS", "SUCCESS"),
],
default="PENDING",
help_text="Current state of the task being run",
max_length=30,
verbose_name="Task State",
),
),
migrations.AddField(
model_name="paperlesstask",
name="task_args",
field=models.JSONField(
help_text="JSON representation of the positional arguments used with the task",
null=True,
verbose_name="Task Positional Arguments",
),
),
migrations.AddField(
model_name="paperlesstask",
name="task_file_name",
field=models.CharField(
help_text="Name of the file which the Task was run for",
max_length=255,
null=True,
verbose_name="Task Name",
),
),
migrations.AddField(
model_name="paperlesstask",
name="task_kwargs",
field=models.JSONField(
help_text="JSON representation of the named arguments used with the task",
null=True,
verbose_name="Task Named Arguments",
),
),
migrations.AddField(
model_name="paperlesstask",
name="task_name",
field=models.CharField(
help_text="Name of the Task which was run",
max_length=255,
null=True,
verbose_name="Task Name",
),
),
migrations.AlterField(
model_name="paperlesstask",
name="acknowledged",
field=models.BooleanField(
default=False,
help_text="If the task is acknowledged via the frontend or API",
verbose_name="Acknowledged",
),
),
migrations.AlterField(
model_name="paperlesstask",
name="task_id",
field=models.CharField(
help_text="Celery ID for the Task that was run",
max_length=255,
unique=True,
verbose_name="Task ID",
),
),
]

View File

@@ -1,20 +0,0 @@
# Generated by Django 4.1.3 on 2022-11-22 17:50
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "1027_remove_paperlesstask_attempted_task_and_more"),
]
operations = [
migrations.RemoveField(
model_name="paperlesstask",
name="task_args",
),
migrations.RemoveField(
model_name="paperlesstask",
name="task_kwargs",
),
]

View File

@@ -1,30 +0,0 @@
# Generated by Django 4.1.4 on 2023-01-24 17:56
import django.core.validators
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1028_remove_paperlesstask_task_args_and_more"),
]
operations = [
migrations.AlterField(
model_name="document",
name="archive_serial_number",
field=models.PositiveIntegerField(
blank=True,
db_index=True,
help_text="The position of this document in your physical document archive.",
null=True,
unique=True,
validators=[
django.core.validators.MaxValueValidator(4294967295),
django.core.validators.MinValueValidator(0),
],
verbose_name="archive serial number",
),
),
]

View File

@@ -1,23 +0,0 @@
# Generated by Django 4.1.5 on 2023-02-03 21:53
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1029_alter_document_archive_serial_number"),
]
operations = [
migrations.AlterField(
model_name="paperlesstask",
name="task_file_name",
field=models.CharField(
help_text="Name of the file which the Task was run for",
max_length=255,
null=True,
verbose_name="Task Filename",
),
),
]

View File

@@ -1,87 +0,0 @@
# Generated by Django 4.1.4 on 2022-02-03 04:24
import django.db.models.deletion
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("documents", "1030_alter_paperlesstask_task_file_name"),
]
operations = [
migrations.RenameField(
model_name="savedview",
old_name="user",
new_name="owner",
),
migrations.AlterField(
model_name="savedview",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="correspondent",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="document",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="documenttype",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="storagepath",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="tag",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
]

View File

@@ -1,81 +0,0 @@
# Generated by Django 4.1.7 on 2023-02-22 00:45
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1031_remove_savedview_user_correspondent_owner_and_more"),
]
operations = [
migrations.AlterField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="documenttype",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="storagepath",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
]

View File

@@ -1,54 +0,0 @@
# Generated by Django 4.1.5 on 2023-03-15 07:10
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1033_alter_documenttype_options_alter_tag_options_and_more"),
]
operations = [
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
(20, "fulltext query"),
(21, "more like this"),
(22, "has tags in"),
(23, "ASN greater than"),
(24, "ASN less than"),
(25, "storage path is"),
(26, "has correspondent in"),
(27, "does not have correspondent in"),
(28, "has document type in"),
(29, "does not have document type in"),
(30, "has storage path in"),
(31, "does not have storage path in"),
],
verbose_name="rule type",
),
),
]

View File

@@ -1,62 +0,0 @@
# Generated by Django 4.1.5 on 2023-03-17 22:15
import django.db.models.deletion
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("documents", "1034_alter_savedviewfilterrule_rule_type"),
]
operations = [
migrations.RenameModel(
old_name="Comment",
new_name="Note",
),
migrations.RenameField(model_name="note", old_name="comment", new_name="note"),
migrations.AlterModelOptions(
name="note",
options={
"ordering": ("created",),
"verbose_name": "note",
"verbose_name_plural": "notes",
},
),
migrations.AlterField(
model_name="note",
name="document",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="notes",
to="documents.document",
verbose_name="document",
),
),
migrations.AlterField(
model_name="note",
name="note",
field=models.TextField(
blank=True,
help_text="Note for the document",
verbose_name="content",
),
),
migrations.AlterField(
model_name="note",
name="user",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="notes",
to=settings.AUTH_USER_MODEL,
verbose_name="user",
),
),
]

View File

@@ -1,58 +0,0 @@
# Generated by Django 4.1.7 on 2023-05-04 04:11
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1035_rename_comment_note"),
]
operations = [
migrations.AlterField(
model_name="savedviewfilterrule",
name="rule_type",
field=models.PositiveIntegerField(
choices=[
(0, "title contains"),
(1, "content contains"),
(2, "ASN is"),
(3, "correspondent is"),
(4, "document type is"),
(5, "is in inbox"),
(6, "has tag"),
(7, "has any tag"),
(8, "created before"),
(9, "created after"),
(10, "created year is"),
(11, "created month is"),
(12, "created day is"),
(13, "added before"),
(14, "added after"),
(15, "modified before"),
(16, "modified after"),
(17, "does not have tag"),
(18, "does not have ASN"),
(19, "title or content contains"),
(20, "fulltext query"),
(21, "more like this"),
(22, "has tags in"),
(23, "ASN greater than"),
(24, "ASN less than"),
(25, "storage path is"),
(26, "has correspondent in"),
(27, "does not have correspondent in"),
(28, "has document type in"),
(29, "does not have document type in"),
(30, "has storage path in"),
(31, "does not have storage path in"),
(32, "owner is"),
(33, "has owner in"),
(34, "does not have owner"),
(35, "does not have owner in"),
],
verbose_name="rule type",
),
),
]

Some files were not shown because too many files have changed in this diff Show More