mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-24 22:39:02 -06:00
Compare commits
15 Commits
chore/pyte
...
feature-47
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7795740f2d | ||
|
|
e036ea972a | ||
|
|
2523100a52 | ||
|
|
3ea21f3d20 | ||
|
|
e19ef49ed2 | ||
|
|
b710fc2907 | ||
|
|
21985e5d84 | ||
|
|
107f58c4ae | ||
|
|
1b8fd1fffa | ||
|
|
13e45fd45c | ||
|
|
bbffaf22d2 | ||
|
|
fdb45a8134 | ||
|
|
a8e1344339 | ||
|
|
cf89d81b9e | ||
|
|
d0032c18be |
@@ -4,8 +4,7 @@
|
||||
|
||||
set -eu
|
||||
|
||||
for command in decrypt_documents \
|
||||
document_archiver \
|
||||
for command in document_archiver \
|
||||
document_exporter \
|
||||
document_importer \
|
||||
mail_fetcher \
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/command/with-contenv /usr/bin/bash
|
||||
# shellcheck shell=bash
|
||||
|
||||
set -e
|
||||
|
||||
cd "${PAPERLESS_SRC_DIR}"
|
||||
|
||||
if [[ $(id -u) == 0 ]]; then
|
||||
s6-setuidgid paperless python3 manage.py decrypt_documents "$@"
|
||||
elif [[ $(id -un) == "paperless" ]]; then
|
||||
python3 manage.py decrypt_documents "$@"
|
||||
else
|
||||
echo "Unknown user."
|
||||
fi
|
||||
@@ -580,36 +580,6 @@ document.
|
||||
documents, such as encrypted PDF documents. The archiver will skip over
|
||||
these documents each time it sees them.
|
||||
|
||||
### Managing encryption {#encryption}
|
||||
|
||||
!!! warning
|
||||
|
||||
Encryption was removed in [paperless-ng 0.9](changelog.md#paperless-ng-090)
|
||||
because it did not really provide any additional security, the passphrase
|
||||
was stored in a configuration file on the same system as the documents.
|
||||
Furthermore, the entire text content of the documents is stored plain in
|
||||
the database, even if your documents are encrypted. Filenames are not
|
||||
encrypted as well. Finally, the web server provides transparent access to
|
||||
your encrypted documents.
|
||||
|
||||
Consider running paperless on an encrypted filesystem instead, which
|
||||
will then at least provide security against physical hardware theft.
|
||||
|
||||
#### Enabling encryption
|
||||
|
||||
Enabling encryption is no longer supported.
|
||||
|
||||
#### Disabling encryption
|
||||
|
||||
Basic usage to disable encryption of your document store:
|
||||
|
||||
(Note: If `PAPERLESS_PASSPHRASE` isn't set already, you need to specify
|
||||
it here)
|
||||
|
||||
```
|
||||
decrypt_documents [--passphrase SECR3TP4SSPHRA$E]
|
||||
```
|
||||
|
||||
### Detecting duplicates {#fuzzy_duplicate}
|
||||
|
||||
Paperless already catches and prevents upload of exactly matching documents,
|
||||
|
||||
@@ -1146,8 +1146,9 @@ via the consumption directory, you can disable the consumer to save resources.
|
||||
|
||||
#### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}
|
||||
|
||||
: When the consumer detects a duplicate document, it will not touch
|
||||
the original document. This default behavior can be changed here.
|
||||
: As of version 3.0 Paperless-ngx allows duplicate documents to be consumed by default, _except_ when
|
||||
this setting is enabled. When enabled, Paperless will check if a document with the same hash already
|
||||
exists in the system and delete the duplicate file from the consumption directory without consuming it.
|
||||
|
||||
Defaults to false.
|
||||
|
||||
|
||||
@@ -17,3 +17,9 @@ separating the directory ignore from the file ignore.
|
||||
| `CONSUMER_POLLING_RETRY_COUNT` | _Removed_ | Automatic with stability tracking |
|
||||
| `CONSUMER_IGNORE_PATTERNS` | [`CONSUMER_IGNORE_PATTERNS`](configuration.md#PAPERLESS_CONSUMER_IGNORE_PATTERNS) | **Now regex, not fnmatch**; user patterns are added to (not replacing) default ones |
|
||||
| _New_ | [`CONSUMER_IGNORE_DIRS`](configuration.md#PAPERLESS_CONSUMER_IGNORE_DIRS) | Additional directories to ignore; user entries are added to (not replacing) defaults |
|
||||
|
||||
## Encryption Support
|
||||
|
||||
Document and thumbnail encryption is no longer supported. This was previously deprecated in [paperless-ng 0.9.3](https://github.com/paperless-ngx/paperless-ngx/blob/dev/docs/changelog.md#paperless-ng-093)
|
||||
|
||||
Users must decrypt their document using the `decrypt_documents` command before upgrading.
|
||||
|
||||
@@ -97,6 +97,12 @@
|
||||
<br/><em>(<ng-container i18n>click for full output</ng-container>)</em>
|
||||
}
|
||||
</ng-template>
|
||||
@if (task.duplicate_documents?.length > 0) {
|
||||
<div class="small text-warning-emphasis d-flex align-items-center gap-1">
|
||||
<i-bs class="lh-1" width="1em" height="1em" name="exclamation-triangle"></i-bs>
|
||||
<span i18n>Duplicate(s) detected</span>
|
||||
</div>
|
||||
}
|
||||
</td>
|
||||
}
|
||||
<td class="d-lg-none">
|
||||
|
||||
@@ -370,6 +370,37 @@
|
||||
</ng-template>
|
||||
</li>
|
||||
}
|
||||
|
||||
@if (document?.duplicate_documents?.length) {
|
||||
<li [ngbNavItem]="DocumentDetailNavIDs.Duplicates">
|
||||
<a class="text-nowrap" ngbNavLink i18n>
|
||||
Duplicates
|
||||
<span class="badge text-bg-secondary ms-1">{{ document.duplicate_documents.length }}</span>
|
||||
</a>
|
||||
<ng-template ngbNavContent>
|
||||
<div class="d-flex flex-column gap-2">
|
||||
<div class="fst-italic" i18n>Duplicate documents detected:</div>
|
||||
<div class="list-group">
|
||||
@for (duplicate of document.duplicate_documents; track duplicate.id) {
|
||||
<a
|
||||
class="list-group-item list-group-item-action d-flex justify-content-between align-items-center"
|
||||
[routerLink]="['/documents', duplicate.id, 'details']"
|
||||
[class.disabled]="duplicate.deleted_at"
|
||||
>
|
||||
<span class="d-flex align-items-center gap-2">
|
||||
<span>{{ duplicate.title || ('#' + duplicate.id) }}</span>
|
||||
@if (duplicate.deleted_at) {
|
||||
<span class="badge text-bg-secondary" i18n>In trash</span>
|
||||
}
|
||||
</span>
|
||||
<span class="text-secondary">#{{ duplicate.id }}</span>
|
||||
</a>
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
</ng-template>
|
||||
</li>
|
||||
}
|
||||
</ul>
|
||||
|
||||
<div [ngbNavOutlet]="nav" class="mt-3"></div>
|
||||
|
||||
@@ -301,16 +301,16 @@ describe('DocumentDetailComponent', () => {
|
||||
.spyOn(openDocumentsService, 'openDocument')
|
||||
.mockReturnValueOnce(of(true))
|
||||
fixture.detectChanges()
|
||||
expect(component.activeNavID).toEqual(5) // DocumentDetailNavIDs.Notes
|
||||
expect(component.activeNavID).toEqual(component.DocumentDetailNavIDs.Notes)
|
||||
})
|
||||
|
||||
it('should change url on tab switch', () => {
|
||||
initNormally()
|
||||
const navigateSpy = jest.spyOn(router, 'navigate')
|
||||
component.nav.select(5)
|
||||
component.nav.select(component.DocumentDetailNavIDs.Notes)
|
||||
component.nav.navChange.next({
|
||||
activeId: 1,
|
||||
nextId: 5,
|
||||
nextId: component.DocumentDetailNavIDs.Notes,
|
||||
preventDefault: () => {},
|
||||
})
|
||||
fixture.detectChanges()
|
||||
@@ -352,6 +352,18 @@ describe('DocumentDetailComponent', () => {
|
||||
expect(component.document).toEqual(doc)
|
||||
})
|
||||
|
||||
it('should fall back to details tab when duplicates tab is active but no duplicates', () => {
|
||||
initNormally()
|
||||
component.activeNavID = component.DocumentDetailNavIDs.Duplicates
|
||||
const noDupDoc = { ...doc, duplicate_documents: [] }
|
||||
|
||||
component.updateComponent(noDupDoc)
|
||||
|
||||
expect(component.activeNavID).toEqual(
|
||||
component.DocumentDetailNavIDs.Details
|
||||
)
|
||||
})
|
||||
|
||||
it('should load already-opened document via param', () => {
|
||||
initNormally()
|
||||
jest.spyOn(documentService, 'get').mockReturnValueOnce(of(doc))
|
||||
@@ -367,6 +379,38 @@ describe('DocumentDetailComponent', () => {
|
||||
expect(component.document).toEqual(doc)
|
||||
})
|
||||
|
||||
it('should update cached open document duplicates when reloading an open doc', () => {
|
||||
const openDoc = { ...doc, duplicate_documents: [{ id: 1, title: 'Old' }] }
|
||||
const updatedDuplicates = [
|
||||
{ id: 2, title: 'Newer duplicate', deleted_at: null },
|
||||
]
|
||||
jest
|
||||
.spyOn(activatedRoute, 'paramMap', 'get')
|
||||
.mockReturnValue(of(convertToParamMap({ id: 3, section: 'details' })))
|
||||
jest.spyOn(documentService, 'get').mockReturnValue(
|
||||
of({
|
||||
...doc,
|
||||
modified: new Date('2024-01-02T00:00:00Z'),
|
||||
duplicate_documents: updatedDuplicates,
|
||||
})
|
||||
)
|
||||
jest.spyOn(openDocumentsService, 'getOpenDocument').mockReturnValue(openDoc)
|
||||
const saveSpy = jest.spyOn(openDocumentsService, 'save')
|
||||
jest.spyOn(openDocumentsService, 'openDocument').mockReturnValue(of(true))
|
||||
jest.spyOn(customFieldsService, 'listAll').mockReturnValue(
|
||||
of({
|
||||
count: customFields.length,
|
||||
all: customFields.map((f) => f.id),
|
||||
results: customFields,
|
||||
})
|
||||
)
|
||||
|
||||
fixture.detectChanges()
|
||||
|
||||
expect(openDoc.duplicate_documents).toEqual(updatedDuplicates)
|
||||
expect(saveSpy).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should disable form if user cannot edit', () => {
|
||||
currentUserHasObjectPermissions = false
|
||||
initNormally()
|
||||
|
||||
@@ -8,7 +8,7 @@ import {
|
||||
FormsModule,
|
||||
ReactiveFormsModule,
|
||||
} from '@angular/forms'
|
||||
import { ActivatedRoute, Router } from '@angular/router'
|
||||
import { ActivatedRoute, Router, RouterModule } from '@angular/router'
|
||||
import {
|
||||
NgbDateStruct,
|
||||
NgbDropdownModule,
|
||||
@@ -124,6 +124,7 @@ enum DocumentDetailNavIDs {
|
||||
Notes = 5,
|
||||
Permissions = 6,
|
||||
History = 7,
|
||||
Duplicates = 8,
|
||||
}
|
||||
|
||||
enum ContentRenderType {
|
||||
@@ -181,6 +182,7 @@ export enum ZoomSetting {
|
||||
NgxBootstrapIconsModule,
|
||||
PdfViewerModule,
|
||||
TextAreaComponent,
|
||||
RouterModule,
|
||||
],
|
||||
})
|
||||
export class DocumentDetailComponent
|
||||
@@ -454,6 +456,11 @@ export class DocumentDetailComponent
|
||||
const openDocument = this.openDocumentService.getOpenDocument(
|
||||
this.documentId
|
||||
)
|
||||
// update duplicate documents if present
|
||||
if (openDocument && doc?.duplicate_documents) {
|
||||
openDocument.duplicate_documents = doc.duplicate_documents
|
||||
this.openDocumentService.save()
|
||||
}
|
||||
const useDoc = openDocument || doc
|
||||
if (openDocument) {
|
||||
if (
|
||||
@@ -704,6 +711,13 @@ export class DocumentDetailComponent
|
||||
}
|
||||
this.title = this.documentTitlePipe.transform(doc.title)
|
||||
this.prepareForm(doc)
|
||||
|
||||
if (
|
||||
this.activeNavID === DocumentDetailNavIDs.Duplicates &&
|
||||
!doc?.duplicate_documents?.length
|
||||
) {
|
||||
this.activeNavID = DocumentDetailNavIDs.Details
|
||||
}
|
||||
}
|
||||
|
||||
get customFieldFormFields(): FormArray {
|
||||
|
||||
@@ -159,6 +159,8 @@ export interface Document extends ObjectWithPermissions {
|
||||
|
||||
page_count?: number
|
||||
|
||||
duplicate_documents?: Document[]
|
||||
|
||||
// Frontend only
|
||||
__changedFields?: string[]
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { Document } from './document'
|
||||
import { ObjectWithId } from './object-with-id'
|
||||
|
||||
export enum PaperlessTaskType {
|
||||
@@ -42,5 +43,7 @@ export interface PaperlessTask extends ObjectWithId {
|
||||
|
||||
related_document?: number
|
||||
|
||||
duplicate_documents?: Document[]
|
||||
|
||||
owner?: number
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
# this is here so that django finds the checks.
|
||||
from documents.checks import changed_password_check
|
||||
from documents.checks import parser_check
|
||||
|
||||
__all__ = ["changed_password_check", "parser_check"]
|
||||
__all__ = ["parser_check"]
|
||||
|
||||
@@ -60,7 +60,6 @@ class DocumentAdmin(GuardedModelAdmin):
|
||||
"added",
|
||||
"modified",
|
||||
"mime_type",
|
||||
"storage_type",
|
||||
"filename",
|
||||
"checksum",
|
||||
"archive_filename",
|
||||
|
||||
@@ -1,60 +1,12 @@
|
||||
import textwrap
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.checks import Error
|
||||
from django.core.checks import Warning
|
||||
from django.core.checks import register
|
||||
from django.core.exceptions import FieldError
|
||||
from django.db.utils import OperationalError
|
||||
from django.db.utils import ProgrammingError
|
||||
|
||||
from documents.signals import document_consumer_declaration
|
||||
from documents.templating.utils import convert_format_str_to_template_format
|
||||
|
||||
|
||||
@register()
|
||||
def changed_password_check(app_configs, **kwargs):
|
||||
from documents.models import Document
|
||||
from paperless.db import GnuPG
|
||||
|
||||
try:
|
||||
encrypted_doc = (
|
||||
Document.objects.filter(
|
||||
storage_type=Document.STORAGE_TYPE_GPG,
|
||||
)
|
||||
.only("pk", "storage_type")
|
||||
.first()
|
||||
)
|
||||
except (OperationalError, ProgrammingError, FieldError):
|
||||
return [] # No documents table yet
|
||||
|
||||
if encrypted_doc:
|
||||
if not settings.PASSPHRASE:
|
||||
return [
|
||||
Error(
|
||||
"The database contains encrypted documents but no password is set.",
|
||||
),
|
||||
]
|
||||
|
||||
if not GnuPG.decrypted(encrypted_doc.source_file):
|
||||
return [
|
||||
Error(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
The current password doesn't match the password of the
|
||||
existing documents.
|
||||
|
||||
If you intend to change your password, you must first export
|
||||
all of the old documents, start fresh with the new password
|
||||
and then re-import them."
|
||||
""",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
@register()
|
||||
def parser_check(app_configs, **kwargs):
|
||||
parsers = []
|
||||
|
||||
@@ -128,7 +128,7 @@ def thumbnail_last_modified(request, pk: int) -> datetime | None:
|
||||
Cache should be (slightly?) faster than filesystem
|
||||
"""
|
||||
try:
|
||||
doc = Document.objects.only("storage_type").get(pk=pk)
|
||||
doc = Document.objects.only("pk").get(pk=pk)
|
||||
if not doc.thumbnail_path.exists():
|
||||
return None
|
||||
doc_key = get_thumbnail_modified_key(pk)
|
||||
|
||||
@@ -497,7 +497,6 @@ class ConsumerPlugin(
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
self._write(
|
||||
document.storage_type,
|
||||
self.unmodified_original
|
||||
if self.unmodified_original is not None
|
||||
else self.working_copy,
|
||||
@@ -505,7 +504,6 @@ class ConsumerPlugin(
|
||||
)
|
||||
|
||||
self._write(
|
||||
document.storage_type,
|
||||
thumbnail,
|
||||
document.thumbnail_path,
|
||||
)
|
||||
@@ -517,7 +515,6 @@ class ConsumerPlugin(
|
||||
)
|
||||
create_source_path_directory(document.archive_path)
|
||||
self._write(
|
||||
document.storage_type,
|
||||
archive_path,
|
||||
document.archive_path,
|
||||
)
|
||||
@@ -637,8 +634,6 @@ class ConsumerPlugin(
|
||||
)
|
||||
self.log.debug(f"Creation date from st_mtime: {create_date}")
|
||||
|
||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
if self.metadata.filename:
|
||||
title = Path(self.metadata.filename).stem
|
||||
else:
|
||||
@@ -665,7 +660,6 @@ class ConsumerPlugin(
|
||||
checksum=hashlib.md5(file_for_checksum.read_bytes()).hexdigest(),
|
||||
created=create_date,
|
||||
modified=create_date,
|
||||
storage_type=storage_type,
|
||||
page_count=page_count,
|
||||
original_filename=self.filename,
|
||||
)
|
||||
@@ -736,7 +730,7 @@ class ConsumerPlugin(
|
||||
}
|
||||
CustomFieldInstance.objects.create(**args) # adds to document
|
||||
|
||||
def _write(self, storage_type, source, target):
|
||||
def _write(self, source, target):
|
||||
with (
|
||||
Path(source).open("rb") as read_file,
|
||||
Path(target).open("wb") as write_file,
|
||||
@@ -785,18 +779,44 @@ class ConsumerPreflightPlugin(
|
||||
Q(checksum=checksum) | Q(archive_checksum=checksum),
|
||||
)
|
||||
if existing_doc.exists():
|
||||
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
|
||||
log_msg = f"Not consuming {self.filename}: It is a duplicate of {existing_doc.get().title} (#{existing_doc.get().pk})."
|
||||
existing_doc = existing_doc.order_by("-created")
|
||||
duplicates_in_trash = existing_doc.filter(deleted_at__isnull=False)
|
||||
log_msg = (
|
||||
f"Consuming duplicate {self.filename}: "
|
||||
f"{existing_doc.count()} existing document(s) share the same content."
|
||||
)
|
||||
|
||||
if existing_doc.first().deleted_at is not None:
|
||||
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
|
||||
log_msg += " Note: existing document is in the trash."
|
||||
if duplicates_in_trash.exists():
|
||||
log_msg += " Note: at least one existing document is in the trash."
|
||||
|
||||
self.log.warning(log_msg)
|
||||
|
||||
if settings.CONSUMER_DELETE_DUPLICATES:
|
||||
duplicate = existing_doc.first()
|
||||
duplicate_label = (
|
||||
duplicate.title
|
||||
or duplicate.original_filename
|
||||
or (Path(duplicate.filename).name if duplicate.filename else None)
|
||||
or str(duplicate.pk)
|
||||
)
|
||||
|
||||
Path(self.input_doc.original_file).unlink()
|
||||
|
||||
failure_msg = (
|
||||
f"Not consuming {self.filename}: "
|
||||
f"It is a duplicate of {duplicate_label} (#{duplicate.pk})"
|
||||
)
|
||||
status_msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
|
||||
|
||||
if duplicates_in_trash.exists():
|
||||
status_msg = (
|
||||
ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
|
||||
)
|
||||
failure_msg += " Note: existing document is in the trash."
|
||||
|
||||
self._fail(
|
||||
msg,
|
||||
log_msg,
|
||||
status_msg,
|
||||
failure_msg,
|
||||
)
|
||||
|
||||
def pre_check_directories(self):
|
||||
|
||||
@@ -126,7 +126,6 @@ def generate_filename(
|
||||
doc: Document,
|
||||
*,
|
||||
counter=0,
|
||||
append_gpg=True,
|
||||
archive_filename=False,
|
||||
) -> Path:
|
||||
base_path: Path | None = None
|
||||
@@ -170,8 +169,4 @@ def generate_filename(
|
||||
final_filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||
full_path = Path(final_filename)
|
||||
|
||||
# Add GPG extension if needed
|
||||
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
||||
full_path = full_path.with_suffix(full_path.suffix + ".gpg")
|
||||
|
||||
return full_path
|
||||
|
||||
@@ -1,93 +0,0 @@
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
|
||||
from documents.models import Document
|
||||
from paperless.db import GnuPG
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = (
|
||||
"This is how you migrate your stored documents from an encrypted "
|
||||
"state to an unencrypted one (or vice-versa)"
|
||||
)
|
||||
|
||||
def add_arguments(self, parser) -> None:
|
||||
parser.add_argument(
|
||||
"--passphrase",
|
||||
help=(
|
||||
"If PAPERLESS_PASSPHRASE isn't set already, you need to specify it here"
|
||||
),
|
||||
)
|
||||
|
||||
def handle(self, *args, **options) -> None:
|
||||
try:
|
||||
self.stdout.write(
|
||||
self.style.WARNING(
|
||||
"\n\n"
|
||||
"WARNING: This script is going to work directly on your "
|
||||
"document originals, so\n"
|
||||
"WARNING: you probably shouldn't run "
|
||||
"this unless you've got a recent backup\n"
|
||||
"WARNING: handy. It "
|
||||
"*should* work without a hitch, but be safe and backup your\n"
|
||||
"WARNING: stuff first.\n\n"
|
||||
"Hit Ctrl+C to exit now, or Enter to "
|
||||
"continue.\n\n",
|
||||
),
|
||||
)
|
||||
_ = input()
|
||||
except KeyboardInterrupt:
|
||||
return
|
||||
|
||||
passphrase = options["passphrase"] or settings.PASSPHRASE
|
||||
if not passphrase:
|
||||
raise CommandError(
|
||||
"Passphrase not defined. Please set it with --passphrase or "
|
||||
"by declaring it in your environment or your config.",
|
||||
)
|
||||
|
||||
self.__gpg_to_unencrypted(passphrase)
|
||||
|
||||
def __gpg_to_unencrypted(self, passphrase: str) -> None:
|
||||
encrypted_files = Document.objects.filter(
|
||||
storage_type=Document.STORAGE_TYPE_GPG,
|
||||
)
|
||||
|
||||
for document in encrypted_files:
|
||||
self.stdout.write(f"Decrypting {document}")
|
||||
|
||||
old_paths = [document.source_path, document.thumbnail_path]
|
||||
|
||||
with document.source_file as file_handle:
|
||||
raw_document = GnuPG.decrypted(file_handle, passphrase)
|
||||
with document.thumbnail_file as file_handle:
|
||||
raw_thumb = GnuPG.decrypted(file_handle, passphrase)
|
||||
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
ext: str = Path(document.filename).suffix
|
||||
|
||||
if not ext == ".gpg":
|
||||
raise CommandError(
|
||||
f"Abort: encrypted file {document.source_path} does not "
|
||||
f"end with .gpg",
|
||||
)
|
||||
|
||||
document.filename = Path(document.filename).stem
|
||||
|
||||
with document.source_path.open("wb") as f:
|
||||
f.write(raw_document)
|
||||
|
||||
with document.thumbnail_path.open("wb") as f:
|
||||
f.write(raw_thumb)
|
||||
|
||||
Document.objects.filter(id=document.id).update(
|
||||
storage_type=document.storage_type,
|
||||
filename=document.filename,
|
||||
)
|
||||
|
||||
for path in old_paths:
|
||||
path.unlink()
|
||||
@@ -3,7 +3,6 @@ import json
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@@ -56,7 +55,6 @@ from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from paperless import version
|
||||
from paperless.db import GnuPG
|
||||
from paperless.models import ApplicationConfiguration
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
@@ -316,20 +314,17 @@ class Command(CryptMixin, BaseCommand):
|
||||
total=len(document_manifest),
|
||||
disable=self.no_progress_bar,
|
||||
):
|
||||
# 3.1. store files unencrypted
|
||||
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document = document_map[document_dict["pk"]]
|
||||
|
||||
# 3.2. generate a unique filename
|
||||
# 3.1. generate a unique filename
|
||||
base_name = self.generate_base_name(document)
|
||||
|
||||
# 3.3. write filenames into manifest
|
||||
# 3.2. write filenames into manifest
|
||||
original_target, thumbnail_target, archive_target = (
|
||||
self.generate_document_targets(document, base_name, document_dict)
|
||||
)
|
||||
|
||||
# 3.4. write files to target folder
|
||||
# 3.3. write files to target folder
|
||||
if not self.data_only:
|
||||
self.copy_document_files(
|
||||
document,
|
||||
@@ -423,7 +418,6 @@ class Command(CryptMixin, BaseCommand):
|
||||
base_name = generate_filename(
|
||||
document,
|
||||
counter=filename_counter,
|
||||
append_gpg=False,
|
||||
)
|
||||
else:
|
||||
base_name = document.get_public_filename(counter=filename_counter)
|
||||
@@ -482,28 +476,6 @@ class Command(CryptMixin, BaseCommand):
|
||||
|
||||
If the document is encrypted, the files are decrypted before copying them to the target location.
|
||||
"""
|
||||
if document.storage_type == Document.STORAGE_TYPE_GPG:
|
||||
t = int(time.mktime(document.created.timetuple()))
|
||||
|
||||
original_target.parent.mkdir(parents=True, exist_ok=True)
|
||||
with document.source_file as out_file:
|
||||
original_target.write_bytes(GnuPG.decrypted(out_file))
|
||||
os.utime(original_target, times=(t, t))
|
||||
|
||||
if thumbnail_target:
|
||||
thumbnail_target.parent.mkdir(parents=True, exist_ok=True)
|
||||
with document.thumbnail_file as out_file:
|
||||
thumbnail_target.write_bytes(GnuPG.decrypted(out_file))
|
||||
os.utime(thumbnail_target, times=(t, t))
|
||||
|
||||
if archive_target:
|
||||
archive_target.parent.mkdir(parents=True, exist_ok=True)
|
||||
if TYPE_CHECKING:
|
||||
assert isinstance(document.archive_path, Path)
|
||||
with document.archive_path as out_file:
|
||||
archive_target.write_bytes(GnuPG.decrypted(out_file))
|
||||
os.utime(archive_target, times=(t, t))
|
||||
else:
|
||||
self.check_and_copy(
|
||||
document.source_path,
|
||||
document.checksum,
|
||||
|
||||
@@ -383,8 +383,6 @@ class Command(CryptMixin, BaseCommand):
|
||||
else:
|
||||
archive_path = None
|
||||
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
if Path(document.source_path).is_file():
|
||||
raise FileExistsError(document.source_path)
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
# Generated by Django 5.2.9 on 2026-01-24 23:05
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0003_workflowaction_order"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name="document",
|
||||
name="storage_type",
|
||||
),
|
||||
]
|
||||
@@ -0,0 +1,23 @@
|
||||
# Generated by Django 5.2.7 on 2026-01-14 17:45
|
||||
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0004_remove_document_storage_type"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="document",
|
||||
name="checksum",
|
||||
field=models.CharField(
|
||||
editable=False,
|
||||
max_length=32,
|
||||
verbose_name="checksum",
|
||||
help_text="The checksum of the original document.",
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -154,13 +154,6 @@ class StoragePath(MatchingModel):
|
||||
|
||||
|
||||
class Document(SoftDeleteModel, ModelWithOwner):
|
||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
STORAGE_TYPES = (
|
||||
(STORAGE_TYPE_UNENCRYPTED, _("Unencrypted")),
|
||||
(STORAGE_TYPE_GPG, _("Encrypted with GNU Privacy Guard")),
|
||||
)
|
||||
|
||||
correspondent = models.ForeignKey(
|
||||
Correspondent,
|
||||
blank=True,
|
||||
@@ -212,7 +205,6 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
||||
_("checksum"),
|
||||
max_length=32,
|
||||
editable=False,
|
||||
unique=True,
|
||||
help_text=_("The checksum of the original document."),
|
||||
)
|
||||
|
||||
@@ -250,14 +242,6 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
storage_type = models.CharField(
|
||||
_("storage type"),
|
||||
max_length=11,
|
||||
choices=STORAGE_TYPES,
|
||||
default=STORAGE_TYPE_UNENCRYPTED,
|
||||
editable=False,
|
||||
)
|
||||
|
||||
added = models.DateTimeField(
|
||||
_("added"),
|
||||
default=timezone.now,
|
||||
@@ -353,12 +337,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
||||
|
||||
@property
|
||||
def source_path(self) -> Path:
|
||||
if self.filename:
|
||||
fname = str(self.filename)
|
||||
else:
|
||||
fname = f"{self.pk:07}{self.file_type}"
|
||||
if self.storage_type == self.STORAGE_TYPE_GPG:
|
||||
fname += ".gpg" # pragma: no cover
|
||||
fname = str(self.filename) if self.filename else f"{self.pk:07}{self.file_type}"
|
||||
|
||||
return (settings.ORIGINALS_DIR / Path(fname)).resolve()
|
||||
|
||||
@@ -407,8 +386,6 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
||||
@property
|
||||
def thumbnail_path(self) -> Path:
|
||||
webp_file_name = f"{self.pk:07}.webp"
|
||||
if self.storage_type == self.STORAGE_TYPE_GPG:
|
||||
webp_file_name += ".gpg"
|
||||
|
||||
webp_file_path = settings.THUMBNAIL_DIR / Path(webp_file_name)
|
||||
|
||||
|
||||
@@ -148,13 +148,29 @@ def get_document_count_filter_for_user(user):
|
||||
)
|
||||
|
||||
|
||||
def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet:
|
||||
objects_owned = Model.objects.filter(owner=user)
|
||||
objects_unowned = Model.objects.filter(owner__isnull=True)
|
||||
def get_objects_for_user_owner_aware(
|
||||
user,
|
||||
perms,
|
||||
Model,
|
||||
*,
|
||||
include_deleted=False,
|
||||
) -> QuerySet:
|
||||
"""
|
||||
Returns objects the user owns, are unowned, or has explicit perms.
|
||||
When include_deleted is True, soft-deleted items are also included.
|
||||
"""
|
||||
manager = (
|
||||
Model.global_objects
|
||||
if include_deleted and hasattr(Model, "global_objects")
|
||||
else Model.objects
|
||||
)
|
||||
|
||||
objects_owned = manager.filter(owner=user)
|
||||
objects_unowned = manager.filter(owner__isnull=True)
|
||||
objects_with_perms = get_objects_for_user(
|
||||
user=user,
|
||||
perms=perms,
|
||||
klass=Model,
|
||||
klass=manager.all(),
|
||||
accept_global_perms=False,
|
||||
)
|
||||
return objects_owned | objects_unowned | objects_with_perms
|
||||
|
||||
@@ -23,6 +23,7 @@ from django.core.validators import MinValueValidator
|
||||
from django.core.validators import RegexValidator
|
||||
from django.core.validators import integer_validator
|
||||
from django.db.models import Count
|
||||
from django.db.models import Q
|
||||
from django.db.models.functions import Lower
|
||||
from django.utils.crypto import get_random_string
|
||||
from django.utils.dateparse import parse_datetime
|
||||
@@ -72,6 +73,7 @@ from documents.models import WorkflowTrigger
|
||||
from documents.parsers import is_mime_type_supported
|
||||
from documents.permissions import get_document_count_filter_for_user
|
||||
from documents.permissions import get_groups_with_only_permission
|
||||
from documents.permissions import get_objects_for_user_owner_aware
|
||||
from documents.permissions import set_permissions_for_object
|
||||
from documents.regex import validate_regex_pattern
|
||||
from documents.templating.filepath import validate_filepath_template_and_render
|
||||
@@ -1014,6 +1016,29 @@ class NotesSerializer(serializers.ModelSerializer):
|
||||
return ret
|
||||
|
||||
|
||||
def _get_viewable_duplicates(document: Document, user: User | None):
|
||||
checksums = {document.checksum}
|
||||
if document.archive_checksum:
|
||||
checksums.add(document.archive_checksum)
|
||||
duplicates = Document.global_objects.filter(
|
||||
Q(checksum__in=checksums) | Q(archive_checksum__in=checksums),
|
||||
).exclude(pk=document.pk)
|
||||
duplicates = duplicates.order_by("-created")
|
||||
allowed = get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_document",
|
||||
Document,
|
||||
include_deleted=True,
|
||||
)
|
||||
return duplicates.filter(id__in=allowed.values_list("id", flat=True))
|
||||
|
||||
|
||||
class DuplicateDocumentSummarySerializer(serializers.Serializer):
|
||||
id = serializers.IntegerField()
|
||||
title = serializers.CharField()
|
||||
deleted_at = serializers.DateTimeField(allow_null=True)
|
||||
|
||||
|
||||
@extend_schema_serializer(
|
||||
deprecate_fields=["created_date"],
|
||||
)
|
||||
@@ -1031,6 +1056,7 @@ class DocumentSerializer(
|
||||
archived_file_name = SerializerMethodField()
|
||||
created_date = serializers.DateField(required=False)
|
||||
page_count = SerializerMethodField()
|
||||
duplicate_documents = SerializerMethodField()
|
||||
|
||||
notes = NotesSerializer(many=True, required=False, read_only=True)
|
||||
|
||||
@@ -1056,6 +1082,16 @@ class DocumentSerializer(
|
||||
def get_page_count(self, obj) -> int | None:
|
||||
return obj.page_count
|
||||
|
||||
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
|
||||
def get_duplicate_documents(self, obj):
|
||||
view = self.context.get("view")
|
||||
if view and getattr(view, "action", None) != "retrieve":
|
||||
return []
|
||||
request = self.context.get("request")
|
||||
user = request.user if request else None
|
||||
duplicates = _get_viewable_duplicates(obj, user)
|
||||
return list(duplicates.values("id", "title", "deleted_at"))
|
||||
|
||||
def get_original_file_name(self, obj) -> str | None:
|
||||
return obj.original_filename
|
||||
|
||||
@@ -1233,6 +1269,7 @@ class DocumentSerializer(
|
||||
"archive_serial_number",
|
||||
"original_file_name",
|
||||
"archived_file_name",
|
||||
"duplicate_documents",
|
||||
"owner",
|
||||
"permissions",
|
||||
"user_can_change",
|
||||
@@ -2094,10 +2131,12 @@ class TasksViewSerializer(OwnedObjectSerializer):
|
||||
"result",
|
||||
"acknowledged",
|
||||
"related_document",
|
||||
"duplicate_documents",
|
||||
"owner",
|
||||
)
|
||||
|
||||
related_document = serializers.SerializerMethodField()
|
||||
duplicate_documents = serializers.SerializerMethodField()
|
||||
created_doc_re = re.compile(r"New document id (\d+) created")
|
||||
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
|
||||
|
||||
@@ -2122,6 +2161,17 @@ class TasksViewSerializer(OwnedObjectSerializer):
|
||||
|
||||
return result
|
||||
|
||||
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
|
||||
def get_duplicate_documents(self, obj):
|
||||
related_document = self.get_related_document(obj)
|
||||
request = self.context.get("request")
|
||||
user = request.user if request else None
|
||||
document = Document.global_objects.filter(pk=related_document).first()
|
||||
if not related_document or not user or not document:
|
||||
return []
|
||||
duplicates = _get_viewable_duplicates(document, user)
|
||||
return list(duplicates.values("id", "title", "deleted_at"))
|
||||
|
||||
|
||||
class RunTaskViewSerializer(serializers.Serializer):
|
||||
task_name = serializers.ChoiceField(
|
||||
|
||||
@@ -108,7 +108,6 @@ def create_dummy_document():
|
||||
page_count=5,
|
||||
created=timezone.now(),
|
||||
modified=timezone.now(),
|
||||
storage_type=Document.STORAGE_TYPE_UNENCRYPTED,
|
||||
added=timezone.now(),
|
||||
filename="/dummy/filename.pdf",
|
||||
archive_filename="/dummy/archive_filename.pdf",
|
||||
|
||||
BIN
src/documents/tests/samples/documents/originals/0000004.pdf
Normal file
BIN
src/documents/tests/samples/documents/originals/0000004.pdf
Normal file
Binary file not shown.
Binary file not shown.
BIN
src/documents/tests/samples/documents/thumbnails/0000004.webp
Normal file
BIN
src/documents/tests/samples/documents/thumbnails/0000004.webp
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.6 KiB |
Binary file not shown.
@@ -7,6 +7,7 @@ from django.contrib.auth.models import User
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.views import TasksViewSet
|
||||
@@ -258,7 +259,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
status=celery.states.FAILURE,
|
||||
result="test.pdf: Not consuming test.pdf: It is a duplicate.",
|
||||
result="test.pdf: Unexpected error during ingestion.",
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -270,7 +271,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(
|
||||
returned_data["result"],
|
||||
"test.pdf: Not consuming test.pdf: It is a duplicate.",
|
||||
"test.pdf: Unexpected error during ingestion.",
|
||||
)
|
||||
|
||||
def test_task_name_webui(self):
|
||||
@@ -325,20 +326,34 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
|
||||
|
||||
def test_task_result_failed_duplicate_includes_related_doc(self):
|
||||
def test_task_result_duplicate_warning_includes_count(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task failed with a duplicate error
|
||||
- A celery task succeeds, but a duplicate exists
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- The returned data includes a related document link
|
||||
- The returned data includes duplicate warning metadata
|
||||
"""
|
||||
checksum = "duplicate-checksum"
|
||||
Document.objects.create(
|
||||
title="Existing",
|
||||
content="",
|
||||
mime_type="application/pdf",
|
||||
checksum=checksum,
|
||||
)
|
||||
created_doc = Document.objects.create(
|
||||
title="Created",
|
||||
content="",
|
||||
mime_type="application/pdf",
|
||||
checksum=checksum,
|
||||
archive_checksum="another-checksum",
|
||||
)
|
||||
PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
status=celery.states.FAILURE,
|
||||
result="Not consuming task_one.pdf: It is a duplicate of task_one_existing.pdf (#1234).",
|
||||
status=celery.states.SUCCESS,
|
||||
result=f"Success. New document id {created_doc.pk} created",
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -348,7 +363,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
||||
|
||||
returned_data = response.data[0]
|
||||
|
||||
self.assertEqual(returned_data["related_document"], "1234")
|
||||
self.assertEqual(returned_data["related_document"], str(created_doc.pk))
|
||||
|
||||
def test_run_train_classifier_task(self):
|
||||
"""
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import textwrap
|
||||
from unittest import mock
|
||||
|
||||
from django.core.checks import Error
|
||||
@@ -6,60 +5,11 @@ from django.core.checks import Warning
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
|
||||
from documents.checks import changed_password_check
|
||||
from documents.checks import filename_format_check
|
||||
from documents.checks import parser_check
|
||||
from documents.models import Document
|
||||
from documents.tests.factories import DocumentFactory
|
||||
|
||||
|
||||
class TestDocumentChecks(TestCase):
|
||||
def test_changed_password_check_empty_db(self):
|
||||
self.assertListEqual(changed_password_check(None), [])
|
||||
|
||||
def test_changed_password_check_no_encryption(self):
|
||||
DocumentFactory.create(storage_type=Document.STORAGE_TYPE_UNENCRYPTED)
|
||||
self.assertListEqual(changed_password_check(None), [])
|
||||
|
||||
def test_encrypted_missing_passphrase(self):
|
||||
DocumentFactory.create(storage_type=Document.STORAGE_TYPE_GPG)
|
||||
msgs = changed_password_check(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
msg_text = msgs[0].msg
|
||||
self.assertEqual(
|
||||
msg_text,
|
||||
"The database contains encrypted documents but no password is set.",
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
PASSPHRASE="test",
|
||||
)
|
||||
@mock.patch("paperless.db.GnuPG.decrypted")
|
||||
@mock.patch("documents.models.Document.source_file")
|
||||
def test_encrypted_decrypt_fails(self, mock_decrypted, mock_source_file):
|
||||
mock_decrypted.return_value = None
|
||||
mock_source_file.return_value = b""
|
||||
|
||||
DocumentFactory.create(storage_type=Document.STORAGE_TYPE_GPG)
|
||||
|
||||
msgs = changed_password_check(None)
|
||||
|
||||
self.assertEqual(len(msgs), 1)
|
||||
msg_text = msgs[0].msg
|
||||
self.assertEqual(
|
||||
msg_text,
|
||||
textwrap.dedent(
|
||||
"""
|
||||
The current password doesn't match the password of the
|
||||
existing documents.
|
||||
|
||||
If you intend to change your password, you must first export
|
||||
all of the old documents, start fresh with the new password
|
||||
and then re-import them."
|
||||
""",
|
||||
),
|
||||
)
|
||||
|
||||
def test_parser_check(self):
|
||||
self.assertEqual(parser_check(None), [])
|
||||
|
||||
|
||||
@@ -485,21 +485,21 @@ class TestConsumer(
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
def testDuplicates2(self):
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
|
||||
with self.get_consumer(self.get_test_archive_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
def testDuplicates3(self):
|
||||
with self.get_consumer(self.get_test_archive_file()) as consumer:
|
||||
@@ -513,10 +513,11 @@ class TestConsumer(
|
||||
|
||||
Document.objects.all().delete()
|
||||
|
||||
with self.assertRaisesMessage(ConsumerError, "document is in the trash"):
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self.assertEqual(Document.objects.count(), 1)
|
||||
|
||||
def testAsnExists(self):
|
||||
with self.get_consumer(
|
||||
self.get_test_file(),
|
||||
@@ -718,12 +719,45 @@ class TestConsumer(
|
||||
dst = self.get_test_file()
|
||||
self.assertIsFile(dst)
|
||||
|
||||
with self.assertRaises(ConsumerError):
|
||||
expected_message = (
|
||||
f"{dst.name}: Not consuming {dst.name}: "
|
||||
f"It is a duplicate of {document.title} (#{document.pk})"
|
||||
)
|
||||
|
||||
with self.assertRaisesMessage(ConsumerError, expected_message):
|
||||
with self.get_consumer(dst) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self.assertIsNotFile(dst)
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
self.assertEqual(Document.objects.count(), 1)
|
||||
self._assert_first_last_send_progress(last_status=ProgressStatusOptions.FAILED)
|
||||
|
||||
@override_settings(CONSUMER_DELETE_DUPLICATES=True)
|
||||
def test_delete_duplicate_in_trash(self):
|
||||
dst = self.get_test_file()
|
||||
with self.get_consumer(dst) as consumer:
|
||||
consumer.run()
|
||||
|
||||
# Move the existing document to trash
|
||||
document = Document.objects.first()
|
||||
document.delete()
|
||||
|
||||
dst = self.get_test_file()
|
||||
self.assertIsFile(dst)
|
||||
|
||||
expected_message = (
|
||||
f"{dst.name}: Not consuming {dst.name}: "
|
||||
f"It is a duplicate of {document.title} (#{document.pk})"
|
||||
f" Note: existing document is in the trash."
|
||||
)
|
||||
|
||||
with self.assertRaisesMessage(ConsumerError, expected_message):
|
||||
with self.get_consumer(dst) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self.assertIsNotFile(dst)
|
||||
self.assertEqual(Document.global_objects.count(), 1)
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
|
||||
@override_settings(CONSUMER_DELETE_DUPLICATES=False)
|
||||
def test_no_delete_duplicate(self):
|
||||
@@ -743,15 +777,12 @@ class TestConsumer(
|
||||
dst = self.get_test_file()
|
||||
self.assertIsFile(dst)
|
||||
|
||||
with self.assertRaisesRegex(
|
||||
ConsumerError,
|
||||
r"sample\.pdf: Not consuming sample\.pdf: It is a duplicate of sample \(#\d+\)",
|
||||
):
|
||||
with self.get_consumer(dst) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self.assertIsFile(dst)
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
self.assertIsNotFile(dst)
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{title}")
|
||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
|
||||
@@ -34,22 +34,14 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_generate_source_filename(self):
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
self.assertEqual(generate_filename(document), Path(f"{document.pk:07d}.pdf"))
|
||||
|
||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||
self.assertEqual(
|
||||
generate_filename(document),
|
||||
Path(f"{document.pk:07d}.pdf.gpg"),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming(self):
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
# Test default source_path
|
||||
@@ -63,11 +55,6 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
# Ensure that filename is properly generated
|
||||
self.assertEqual(document.filename, Path("none/none.pdf"))
|
||||
|
||||
# Enable encryption and check again
|
||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename, Path("none/none.pdf.gpg"))
|
||||
|
||||
document.save()
|
||||
|
||||
# test that creating dirs for the source_path creates the correct directory
|
||||
@@ -87,14 +74,14 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
settings.ORIGINALS_DIR / "none",
|
||||
)
|
||||
self.assertIsFile(
|
||||
settings.ORIGINALS_DIR / "test" / "test.pdf.gpg",
|
||||
settings.ORIGINALS_DIR / "test" / "test.pdf",
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming_missing_permissions(self):
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
@@ -128,14 +115,13 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_file_renaming_database_error(self):
|
||||
Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
storage_type=Document.STORAGE_TYPE_UNENCRYPTED,
|
||||
checksum="AAAAA",
|
||||
)
|
||||
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
document.checksum = "BBBBB"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
@@ -170,7 +156,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_document_delete(self):
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
@@ -196,7 +182,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_document_delete_trash_dir(self):
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
@@ -221,7 +207,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
# Create an identical document and ensure it is trashed under a new name
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document.save()
|
||||
document.filename = generate_filename(document)
|
||||
document.save()
|
||||
@@ -235,7 +221,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_document_delete_nofile(self):
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document.save()
|
||||
|
||||
document.delete()
|
||||
@@ -245,7 +231,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_directory_not_empty(self):
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
@@ -362,7 +348,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
def test_nested_directory_cleanup(self):
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
@@ -390,7 +376,6 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
document = Document()
|
||||
document.pk = 1
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
self.assertEqual(generate_filename(document), Path("0000001.pdf"))
|
||||
|
||||
@@ -403,7 +388,6 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
document = Document()
|
||||
document.pk = 1
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
self.assertEqual(generate_filename(document), Path("0000001.pdf"))
|
||||
|
||||
@@ -429,7 +413,6 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
document = Document()
|
||||
document.pk = 1
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
self.assertEqual(generate_filename(document), Path("0000001.pdf"))
|
||||
|
||||
@@ -438,7 +421,6 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
document = Document()
|
||||
document.pk = 1
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
self.assertEqual(generate_filename(document), Path("0000001.pdf"))
|
||||
|
||||
@@ -1258,7 +1240,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
title="doc1",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
@@ -1732,7 +1714,6 @@ class TestPathDateLocalization:
|
||||
document = DocumentFactory.create(
|
||||
title="My Document",
|
||||
mime_type="application/pdf",
|
||||
storage_type=Document.STORAGE_TYPE_UNENCRYPTED,
|
||||
created=self.TEST_DATE, # 2023-10-26 (which is a Thursday)
|
||||
)
|
||||
with override_settings(FILENAME_FORMAT=filename_format):
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
import filecmp
|
||||
import hashlib
|
||||
import shutil
|
||||
import tempfile
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
@@ -96,66 +94,6 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
self.assertEqual(doc2.archive_filename, "document_01.pdf")
|
||||
|
||||
|
||||
class TestDecryptDocuments(FileSystemAssertsMixin, TestCase):
|
||||
@mock.patch("documents.management.commands.decrypt_documents.input")
|
||||
def test_decrypt(self, m):
|
||||
media_dir = tempfile.mkdtemp()
|
||||
originals_dir = Path(media_dir) / "documents" / "originals"
|
||||
thumb_dir = Path(media_dir) / "documents" / "thumbnails"
|
||||
originals_dir.mkdir(parents=True, exist_ok=True)
|
||||
thumb_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with override_settings(
|
||||
ORIGINALS_DIR=originals_dir,
|
||||
THUMBNAIL_DIR=thumb_dir,
|
||||
PASSPHRASE="test",
|
||||
FILENAME_FORMAT=None,
|
||||
):
|
||||
doc = Document.objects.create(
|
||||
checksum="82186aaa94f0b98697d704b90fd1c072",
|
||||
title="wow",
|
||||
filename="0000004.pdf.gpg",
|
||||
mime_type="application/pdf",
|
||||
storage_type=Document.STORAGE_TYPE_GPG,
|
||||
)
|
||||
|
||||
shutil.copy(
|
||||
(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ "0000004.pdf.gpg"
|
||||
),
|
||||
originals_dir / "0000004.pdf.gpg",
|
||||
)
|
||||
shutil.copy(
|
||||
(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "thumbnails"
|
||||
/ "0000004.webp.gpg"
|
||||
),
|
||||
thumb_dir / f"{doc.id:07}.webp.gpg",
|
||||
)
|
||||
|
||||
call_command("decrypt_documents")
|
||||
|
||||
doc.refresh_from_db()
|
||||
|
||||
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
|
||||
self.assertEqual(doc.filename, "0000004.pdf")
|
||||
self.assertIsFile(Path(originals_dir) / "0000004.pdf")
|
||||
self.assertIsFile(doc.source_path)
|
||||
self.assertIsFile(Path(thumb_dir) / f"{doc.id:07}.webp")
|
||||
self.assertIsFile(doc.thumbnail_path)
|
||||
|
||||
with doc.source_file as f:
|
||||
checksum: str = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, doc.checksum)
|
||||
|
||||
|
||||
class TestMakeIndex(TestCase):
|
||||
@mock.patch("documents.management.commands.document_index.index_reindex")
|
||||
def test_reindex(self, m):
|
||||
|
||||
@@ -86,9 +86,8 @@ class TestExportImport(
|
||||
content="Content",
|
||||
checksum="82186aaa94f0b98697d704b90fd1c072",
|
||||
title="wow_dec",
|
||||
filename="0000004.pdf.gpg",
|
||||
filename="0000004.pdf",
|
||||
mime_type="application/pdf",
|
||||
storage_type=Document.STORAGE_TYPE_GPG,
|
||||
)
|
||||
|
||||
self.note = Note.objects.create(
|
||||
@@ -242,11 +241,6 @@ class TestExportImport(
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, element["fields"]["checksum"])
|
||||
|
||||
self.assertEqual(
|
||||
element["fields"]["storage_type"],
|
||||
Document.STORAGE_TYPE_UNENCRYPTED,
|
||||
)
|
||||
|
||||
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
|
||||
fname = (
|
||||
self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME]
|
||||
@@ -436,7 +430,7 @@ class TestExportImport(
|
||||
Document.objects.create(
|
||||
checksum="AAAAAAAAAAAAAAAAA",
|
||||
title="wow",
|
||||
filename="0000004.pdf",
|
||||
filename="0000010.pdf",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
self.assertRaises(FileNotFoundError, call_command, "document_exporter", target)
|
||||
|
||||
@@ -195,7 +195,6 @@ from paperless import version
|
||||
from paperless.celery import app as celery_app
|
||||
from paperless.config import AIConfig
|
||||
from paperless.config import GeneralConfig
|
||||
from paperless.db import GnuPG
|
||||
from paperless.models import ApplicationConfiguration
|
||||
from paperless.serialisers import GroupSerializer
|
||||
from paperless.serialisers import UserSerializer
|
||||
@@ -1071,9 +1070,7 @@ class DocumentViewSet(
|
||||
doc,
|
||||
):
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
if doc.storage_type == Document.STORAGE_TYPE_GPG:
|
||||
handle = GnuPG.decrypted(doc.thumbnail_file)
|
||||
else:
|
||||
|
||||
handle = doc.thumbnail_file
|
||||
|
||||
return HttpResponse(handle, content_type="image/webp")
|
||||
@@ -2824,9 +2821,6 @@ def serve_file(*, doc: Document, use_archive: bool, disposition: str):
|
||||
if mime_type in {"application/csv", "text/csv"} and disposition == "inline":
|
||||
mime_type = "text/plain"
|
||||
|
||||
if doc.storage_type == Document.STORAGE_TYPE_GPG:
|
||||
file_handle = GnuPG.decrypted(file_handle)
|
||||
|
||||
response = HttpResponse(file_handle, content_type=mime_type)
|
||||
# Firefox is not able to handle unicode characters in filename field
|
||||
# RFC 5987 addresses this issue
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,17 +0,0 @@
|
||||
import gnupg
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class GnuPG:
|
||||
"""
|
||||
A handy singleton to use when handling encrypted files.
|
||||
"""
|
||||
|
||||
gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME)
|
||||
|
||||
@classmethod
|
||||
def decrypted(cls, file_handle, passphrase=None):
|
||||
if not passphrase:
|
||||
passphrase = settings.PASSPHRASE
|
||||
|
||||
return cls.gpg.decrypt_file(file_handle, passphrase=passphrase).data
|
||||
@@ -1203,19 +1203,6 @@ EMAIL_PARSE_DEFAULT_LAYOUT = __get_int(
|
||||
1, # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
|
||||
)
|
||||
|
||||
# Pre-2.x versions of Paperless stored your documents locally with GPG
|
||||
# encryption, but that is no longer the default. This behaviour is still
|
||||
# available, but it must be explicitly enabled by setting
|
||||
# `PAPERLESS_PASSPHRASE` in your environment or config file. The default is to
|
||||
# store these files unencrypted.
|
||||
#
|
||||
# Translation:
|
||||
# * If you're a new user, you can safely ignore this setting.
|
||||
# * If you're upgrading from 1.x, this must be set, OR you can run
|
||||
# `./manage.py change_storage_type gpg unencrypted` to decrypt your files,
|
||||
# after which you can unset this value.
|
||||
PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE")
|
||||
|
||||
# Trigger a script after every successful document consumption?
|
||||
PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT")
|
||||
POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
|
||||
|
||||
Reference in New Issue
Block a user