mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-08-10 00:18:57 +00:00
Merge branch 'dev' into feature-autocolor
This commit is contained in:
@@ -6,13 +6,18 @@ import magic
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
from paperless.db import GnuPG
|
||||
|
||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
|
||||
|
||||
def source_path(self):
|
||||
if self.filename:
|
||||
fname = str(self.filename)
|
||||
else:
|
||||
fname = "{:07}.{}".format(self.pk, self.file_type)
|
||||
if self.storage_type == self.STORAGE_TYPE_GPG:
|
||||
if self.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg"
|
||||
|
||||
return os.path.join(
|
||||
@@ -26,9 +31,18 @@ def add_mime_types(apps, schema_editor):
|
||||
documents = Document.objects.all()
|
||||
|
||||
for d in documents:
|
||||
d.mime_type = magic.from_file(source_path(d), mime=True)
|
||||
f = open(source_path(d), "rb")
|
||||
if d.storage_type == STORAGE_TYPE_GPG:
|
||||
|
||||
data = GnuPG.decrypted(f)
|
||||
else:
|
||||
data = f.read(1024)
|
||||
|
||||
d.mime_type = magic.from_buffer(data, mime=True)
|
||||
d.save()
|
||||
|
||||
f.close()
|
||||
|
||||
|
||||
def add_file_extensions(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
25
src/documents/migrations/1006_auto_20201208_2209.py
Normal file
25
src/documents/migrations/1006_auto_20201208_2209.py
Normal file
@@ -0,0 +1,25 @@
|
||||
# Generated by Django 3.1.4 on 2020-12-08 22:09
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1005_checksums'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name='correspondent',
|
||||
name='slug',
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='documenttype',
|
||||
name='slug',
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='tag',
|
||||
name='slug',
|
||||
),
|
||||
]
|
@@ -0,0 +1,37 @@
|
||||
# Generated by Django 3.1.4 on 2020-12-12 14:41
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
('documents', '1006_auto_20201208_2209'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='SavedView',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=128)),
|
||||
('show_on_dashboard', models.BooleanField()),
|
||||
('show_in_sidebar', models.BooleanField()),
|
||||
('sort_field', models.CharField(max_length=128)),
|
||||
('sort_reverse', models.BooleanField(default=False)),
|
||||
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='SavedViewFilterRule',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('rule_type', models.PositiveIntegerField(choices=[(0, 'Title contains'), (1, 'Content contains'), (2, 'ASN is'), (3, 'Correspondent is'), (4, 'Document type is'), (5, 'Is in inbox'), (6, 'Has tag'), (7, 'Has any tag'), (8, 'Created before'), (9, 'Created after'), (10, 'Created year is'), (11, 'Created month is'), (12, 'Created day is'), (13, 'Added before'), (14, 'Added after'), (15, 'Modified before'), (16, 'Modified after'), (17, 'Does not have tag')])),
|
||||
('value', models.CharField(max_length=128)),
|
||||
('saved_view', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='filter_rules', to='documents.savedview')),
|
||||
],
|
||||
),
|
||||
]
|
34
src/documents/migrations/1008_auto_20201216_1736.py
Normal file
34
src/documents/migrations/1008_auto_20201216_1736.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# Generated by Django 3.1.4 on 2020-12-16 17:36
|
||||
|
||||
from django.db import migrations
|
||||
import django.db.models.functions.text
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1007_savedview_savedviewfilterrule'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='correspondent',
|
||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='document',
|
||||
options={'ordering': ('-created',)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='documenttype',
|
||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='savedview',
|
||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='tag',
|
||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
||||
),
|
||||
]
|
29
src/documents/migrations/1009_auto_20201216_2005.py
Normal file
29
src/documents/migrations/1009_auto_20201216_2005.py
Normal file
@@ -0,0 +1,29 @@
|
||||
# Generated by Django 3.1.4 on 2020-12-16 20:05
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1008_auto_20201216_1736'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='correspondent',
|
||||
options={'ordering': ('name',)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='documenttype',
|
||||
options={'ordering': ('name',)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='savedview',
|
||||
options={'ordering': ('name',)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='tag',
|
||||
options={'ordering': ('name',)},
|
||||
),
|
||||
]
|
18
src/documents/migrations/1010_auto_20210101_2159.py
Normal file
18
src/documents/migrations/1010_auto_20210101_2159.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# Generated by Django 3.1.4 on 2021-01-01 21:59
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1009_auto_20201216_2005'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='savedviewfilterrule',
|
||||
name='value',
|
||||
field=models.CharField(blank=True, max_length=128, null=True),
|
||||
),
|
||||
]
|
250
src/documents/migrations/1011_auto_20210101_2340.py
Normal file
250
src/documents/migrations/1011_auto_20210101_2340.py
Normal file
@@ -0,0 +1,250 @@
|
||||
# Generated by Django 3.1.4 on 2021-01-01 23:40
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
('documents', '1010_auto_20210101_2159'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='correspondent',
|
||||
options={'ordering': ('name',), 'verbose_name': 'correspondent', 'verbose_name_plural': 'correspondents'},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='document',
|
||||
options={'ordering': ('-created',), 'verbose_name': 'document', 'verbose_name_plural': 'documents'},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='documenttype',
|
||||
options={'verbose_name': 'document type', 'verbose_name_plural': 'document types'},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='log',
|
||||
options={'ordering': ('-created',), 'verbose_name': 'log', 'verbose_name_plural': 'logs'},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='savedview',
|
||||
options={'ordering': ('name',), 'verbose_name': 'saved view', 'verbose_name_plural': 'saved views'},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='savedviewfilterrule',
|
||||
options={'verbose_name': 'filter rule', 'verbose_name_plural': 'filter rules'},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='tag',
|
||||
options={'verbose_name': 'tag', 'verbose_name_plural': 'tags'},
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='is_insensitive',
|
||||
field=models.BooleanField(default=True, verbose_name='is insensitive'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='match',
|
||||
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='name',
|
||||
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='added',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, editable=False, verbose_name='added'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='archive_checksum',
|
||||
field=models.CharField(blank=True, editable=False, help_text='The checksum of the archived document.', max_length=32, null=True, verbose_name='archive checksum'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='archive_serial_number',
|
||||
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True, verbose_name='archive serial number'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='checksum',
|
||||
field=models.CharField(editable=False, help_text='The checksum of the original document.', max_length=32, unique=True, verbose_name='checksum'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='content',
|
||||
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.', verbose_name='content'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='correspondent',
|
||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.correspondent', verbose_name='correspondent'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='created',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, verbose_name='created'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='document_type',
|
||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype', verbose_name='document type'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='filename',
|
||||
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, verbose_name='filename'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='mime_type',
|
||||
field=models.CharField(editable=False, max_length=256, verbose_name='mime type'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='modified',
|
||||
field=models.DateTimeField(auto_now=True, db_index=True, verbose_name='modified'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='storage_type',
|
||||
field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='unencrypted', editable=False, max_length=11, verbose_name='storage type'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='tags',
|
||||
field=models.ManyToManyField(blank=True, related_name='documents', to='documents.Tag', verbose_name='tags'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='title',
|
||||
field=models.CharField(blank=True, db_index=True, max_length=128, verbose_name='title'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='documenttype',
|
||||
name='is_insensitive',
|
||||
field=models.BooleanField(default=True, verbose_name='is insensitive'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='documenttype',
|
||||
name='match',
|
||||
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='documenttype',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='documenttype',
|
||||
name='name',
|
||||
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='log',
|
||||
name='created',
|
||||
field=models.DateTimeField(auto_now_add=True, verbose_name='created'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='log',
|
||||
name='group',
|
||||
field=models.UUIDField(blank=True, null=True, verbose_name='group'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='log',
|
||||
name='level',
|
||||
field=models.PositiveIntegerField(choices=[(10, 'debug'), (20, 'information'), (30, 'warning'), (40, 'error'), (50, 'critical')], default=20, verbose_name='level'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='log',
|
||||
name='message',
|
||||
field=models.TextField(verbose_name='message'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='name',
|
||||
field=models.CharField(max_length=128, verbose_name='name'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='show_in_sidebar',
|
||||
field=models.BooleanField(verbose_name='show in sidebar'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='show_on_dashboard',
|
||||
field=models.BooleanField(verbose_name='show on dashboard'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='sort_field',
|
||||
field=models.CharField(max_length=128, verbose_name='sort field'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='sort_reverse',
|
||||
field=models.BooleanField(default=False, verbose_name='sort reverse'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='user',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL, verbose_name='user'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedviewfilterrule',
|
||||
name='rule_type',
|
||||
field=models.PositiveIntegerField(choices=[(0, 'title contains'), (1, 'content contains'), (2, 'ASN is'), (3, 'correspondent is'), (4, 'document type is'), (5, 'is in inbox'), (6, 'has tag'), (7, 'has any tag'), (8, 'created before'), (9, 'created after'), (10, 'created year is'), (11, 'created month is'), (12, 'created day is'), (13, 'added before'), (14, 'added after'), (15, 'modified before'), (16, 'modified after'), (17, 'does not have tag')], verbose_name='rule type'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedviewfilterrule',
|
||||
name='saved_view',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='filter_rules', to='documents.savedview', verbose_name='saved view'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedviewfilterrule',
|
||||
name='value',
|
||||
field=models.CharField(blank=True, max_length=128, null=True, verbose_name='value'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='colour',
|
||||
field=models.PositiveIntegerField(choices=[(1, '#a6cee3'), (2, '#1f78b4'), (3, '#b2df8a'), (4, '#33a02c'), (5, '#fb9a99'), (6, '#e31a1c'), (7, '#fdbf6f'), (8, '#ff7f00'), (9, '#cab2d6'), (10, '#6a3d9a'), (11, '#b15928'), (12, '#000000'), (13, '#cccccc')], default=1, verbose_name='color'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='is_inbox_tag',
|
||||
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.', verbose_name='is inbox tag'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='is_insensitive',
|
||||
field=models.BooleanField(default=True, verbose_name='is insensitive'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='match',
|
||||
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='name',
|
||||
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
|
||||
),
|
||||
]
|
330
src/documents/migrations/1012_fix_archive_files.py
Normal file
330
src/documents/migrations/1012_fix_archive_files.py
Normal file
@@ -0,0 +1,330 @@
|
||||
# Generated by Django 3.1.6 on 2021-02-07 22:26
|
||||
import datetime
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from time import sleep
|
||||
|
||||
import pathvalidate
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
from django.template.defaultfilters import slugify
|
||||
|
||||
from documents.file_handling import defaultdictNoStr, many_to_dictionary
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.migrations")
|
||||
|
||||
###############################################################################
|
||||
# This is code copied straight paperless before the change.
|
||||
###############################################################################
|
||||
|
||||
def archive_name_from_filename(filename):
|
||||
return os.path.splitext(filename)[0] + ".pdf"
|
||||
|
||||
|
||||
def archive_path_old(doc):
|
||||
if doc.filename:
|
||||
fname = archive_name_from_filename(doc.filename)
|
||||
else:
|
||||
fname = "{:07}.pdf".format(doc.pk)
|
||||
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
fname
|
||||
)
|
||||
|
||||
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
|
||||
|
||||
def archive_path_new(doc):
|
||||
if doc.archive_filename is not None:
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
str(doc.archive_filename)
|
||||
)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def source_path(doc):
|
||||
if doc.filename:
|
||||
fname = str(doc.filename)
|
||||
else:
|
||||
fname = "{:07}{}".format(doc.pk, doc.file_type)
|
||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg" # pragma: no cover
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
|
||||
|
||||
def generate_unique_filename(doc, archive_filename=False):
|
||||
if archive_filename:
|
||||
old_filename = doc.archive_filename
|
||||
root = settings.ARCHIVE_DIR
|
||||
else:
|
||||
old_filename = doc.filename
|
||||
root = settings.ORIGINALS_DIR
|
||||
|
||||
counter = 0
|
||||
|
||||
while True:
|
||||
new_filename = generate_filename(
|
||||
doc, counter, archive_filename=archive_filename)
|
||||
if new_filename == old_filename:
|
||||
# still the same as before.
|
||||
return new_filename
|
||||
|
||||
if os.path.exists(os.path.join(root, new_filename)):
|
||||
counter += 1
|
||||
else:
|
||||
return new_filename
|
||||
|
||||
|
||||
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
path = ""
|
||||
|
||||
try:
|
||||
if settings.PAPERLESS_FILENAME_FORMAT is not None:
|
||||
tags = defaultdictNoStr(lambda: slugify(None),
|
||||
many_to_dictionary(doc.tags))
|
||||
|
||||
tag_list = pathvalidate.sanitize_filename(
|
||||
",".join(sorted(
|
||||
[tag.name for tag in doc.tags.all()]
|
||||
)),
|
||||
replacement_text="-"
|
||||
)
|
||||
|
||||
if doc.correspondent:
|
||||
correspondent = pathvalidate.sanitize_filename(
|
||||
doc.correspondent.name, replacement_text="-"
|
||||
)
|
||||
else:
|
||||
correspondent = "none"
|
||||
|
||||
if doc.document_type:
|
||||
document_type = pathvalidate.sanitize_filename(
|
||||
doc.document_type.name, replacement_text="-"
|
||||
)
|
||||
else:
|
||||
document_type = "none"
|
||||
|
||||
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
||||
title=pathvalidate.sanitize_filename(
|
||||
doc.title, replacement_text="-"),
|
||||
correspondent=correspondent,
|
||||
document_type=document_type,
|
||||
created=datetime.date.isoformat(doc.created),
|
||||
created_year=doc.created.year if doc.created else "none",
|
||||
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
|
||||
created_day=f"{doc.created.day:02}" if doc.created else "none",
|
||||
added=datetime.date.isoformat(doc.added),
|
||||
added_year=doc.added.year if doc.added else "none",
|
||||
added_month=f"{doc.added.month:02}" if doc.added else "none",
|
||||
added_day=f"{doc.added.day:02}" if doc.added else "none",
|
||||
tags=tags,
|
||||
tag_list=tag_list
|
||||
).strip()
|
||||
|
||||
path = path.strip(os.sep)
|
||||
|
||||
except (ValueError, KeyError, IndexError):
|
||||
logger.warning(
|
||||
f"Invalid PAPERLESS_FILENAME_FORMAT: "
|
||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
|
||||
|
||||
counter_str = f"_{counter:02}" if counter else ""
|
||||
|
||||
filetype_str = ".pdf" if archive_filename else doc.file_type
|
||||
|
||||
if len(path) > 0:
|
||||
filename = f"{path}{counter_str}{filetype_str}"
|
||||
else:
|
||||
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||
|
||||
# Append .gpg for encrypted files
|
||||
if append_gpg and doc.storage_type == STORAGE_TYPE_GPG:
|
||||
filename += ".gpg"
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
###############################################################################
|
||||
# This code performs bidirection archive file transformation.
|
||||
###############################################################################
|
||||
|
||||
|
||||
def parse_wrapper(parser, path, mime_type, file_name):
|
||||
# this is here so that I can mock this out for testing.
|
||||
parser.parse(path, mime_type, file_name)
|
||||
|
||||
|
||||
def create_archive_version(doc, retry_count=3):
|
||||
from documents.parsers import get_parser_class_for_mime_type, \
|
||||
DocumentParser, \
|
||||
ParseError
|
||||
|
||||
logger.info(
|
||||
f"Regenerating archive document for document ID:{doc.id}"
|
||||
)
|
||||
parser_class = get_parser_class_for_mime_type(doc.mime_type)
|
||||
for try_num in range(retry_count):
|
||||
parser: DocumentParser = parser_class(None, None)
|
||||
try:
|
||||
parse_wrapper(parser, source_path(doc), doc.mime_type,
|
||||
os.path.basename(doc.filename))
|
||||
doc.content = parser.get_text()
|
||||
|
||||
if parser.get_archive_path() and os.path.isfile(
|
||||
parser.get_archive_path()):
|
||||
doc.archive_filename = generate_unique_filename(
|
||||
doc, archive_filename=True)
|
||||
with open(parser.get_archive_path(), "rb") as f:
|
||||
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
os.makedirs(os.path.dirname(archive_path_new(doc)),
|
||||
exist_ok=True)
|
||||
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
|
||||
else:
|
||||
doc.archive_checksum = None
|
||||
logger.error(
|
||||
f"Parser did not return an archive document for document "
|
||||
f"ID:{doc.id}. Removing archive document."
|
||||
)
|
||||
doc.save()
|
||||
return
|
||||
except ParseError:
|
||||
if try_num + 1 == retry_count:
|
||||
logger.exception(
|
||||
f"Unable to regenerate archive document for ID:{doc.id}. You "
|
||||
f"need to invoke the document_archiver management command "
|
||||
f"manually for that document."
|
||||
)
|
||||
doc.archive_checksum = None
|
||||
doc.save()
|
||||
return
|
||||
else:
|
||||
# This is mostly here for the tika parser in docker
|
||||
# environemnts. The servers for parsing need to come up first,
|
||||
# and the docker setup doesn't ensure that tika is running
|
||||
# before attempting migrations.
|
||||
logger.error("Parse error, will try again in 5 seconds...")
|
||||
sleep(5)
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
def move_old_to_new_locations(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
affected_document_ids = set()
|
||||
|
||||
old_archive_path_to_id = {}
|
||||
|
||||
# check for documents that have incorrect archive versions
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
old_path = archive_path_old(doc)
|
||||
|
||||
if old_path in old_archive_path_to_id:
|
||||
affected_document_ids.add(doc.id)
|
||||
affected_document_ids.add(old_archive_path_to_id[old_path])
|
||||
else:
|
||||
old_archive_path_to_id[old_path] = doc.id
|
||||
|
||||
# check that archive files of all unaffected documents are in place
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
old_path = archive_path_old(doc)
|
||||
if doc.id not in affected_document_ids and not os.path.isfile(old_path):
|
||||
raise ValueError(
|
||||
f"Archived document ID:{doc.id} does not exist at: "
|
||||
f"{old_path}")
|
||||
|
||||
# check that we can regenerate affected archive versions
|
||||
for doc_id in affected_document_ids:
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
|
||||
doc = Document.objects.get(id=doc_id)
|
||||
parser_class = get_parser_class_for_mime_type(doc.mime_type)
|
||||
if not parser_class:
|
||||
raise ValueError(
|
||||
f"Document ID:{doc.id} has an invalid archived document, "
|
||||
f"but no parsers are available. Cannot migrate.")
|
||||
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
|
||||
if doc.id in affected_document_ids:
|
||||
old_path = archive_path_old(doc)
|
||||
# remove affected archive versions
|
||||
if os.path.isfile(old_path):
|
||||
logger.debug(
|
||||
f"Removing {old_path}"
|
||||
)
|
||||
os.unlink(old_path)
|
||||
else:
|
||||
# Set archive path for unaffected files
|
||||
doc.archive_filename = archive_name_from_filename(doc.filename)
|
||||
Document.objects.filter(id=doc.id).update(
|
||||
archive_filename=doc.archive_filename
|
||||
)
|
||||
|
||||
# regenerate archive documents
|
||||
for doc_id in affected_document_ids:
|
||||
doc = Document.objects.get(id=doc_id)
|
||||
create_archive_version(doc)
|
||||
|
||||
|
||||
def move_new_to_old_locations(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
old_archive_paths = set()
|
||||
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
new_archive_path = archive_path_new(doc)
|
||||
old_archive_path = archive_path_old(doc)
|
||||
if old_archive_path in old_archive_paths:
|
||||
raise ValueError(
|
||||
f"Cannot migrate: Archive file name {old_archive_path} of "
|
||||
f"document {doc.filename} would clash with another archive "
|
||||
f"filename.")
|
||||
old_archive_paths.add(old_archive_path)
|
||||
if new_archive_path != old_archive_path and os.path.isfile(old_archive_path):
|
||||
raise ValueError(
|
||||
f"Cannot migrate: Cannot move {new_archive_path} to "
|
||||
f"{old_archive_path}: file already exists."
|
||||
)
|
||||
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
new_archive_path = archive_path_new(doc)
|
||||
old_archive_path = archive_path_old(doc)
|
||||
if new_archive_path != old_archive_path:
|
||||
logger.debug(f"Moving {new_archive_path} to {old_archive_path}")
|
||||
shutil.move(new_archive_path, old_archive_path)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1011_auto_20210101_2340'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='archive_filename',
|
||||
field=models.FilePathField(default=None, editable=False, help_text='Current archive filename in storage', max_length=1024, null=True, unique=True, verbose_name='archive filename'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='filename',
|
||||
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, unique=True, verbose_name='filename'),
|
||||
),
|
||||
migrations.RunPython(
|
||||
move_old_to_new_locations,
|
||||
move_new_to_old_locations
|
||||
),
|
||||
]
|
Reference in New Issue
Block a user