mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge pull request #168 from kpj/feature-black
GitHub Actions workflow for black code formatting
This commit is contained in:
commit
5c9c10a6db
37
.github/workflow-scripts/check-trailing-newline
vendored
37
.github/workflow-scripts/check-trailing-newline
vendored
@ -1,37 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Verify that all text files end in a trailing newline.
|
||||
|
||||
# Exit on first failing command.
|
||||
set -e
|
||||
|
||||
# Exit on unset variable.
|
||||
set -u
|
||||
|
||||
success=0
|
||||
|
||||
function is_plaintext_file() {
|
||||
local file="$1"
|
||||
if [[ $file == *.svg ]]; then
|
||||
echo ""
|
||||
return
|
||||
fi
|
||||
file --brief "${file}" | grep text
|
||||
}
|
||||
|
||||
# Split strings on newlines.
|
||||
IFS='
|
||||
'
|
||||
for file in $(git ls-files)
|
||||
do
|
||||
if [[ -z $(is_plaintext_file "${file}") ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
if ! [[ -z "$(tail -c 1 "${file}")" ]]; then
|
||||
printf "File must end in a trailing newline: %s\n" "${file}" >&2
|
||||
success=255
|
||||
fi
|
||||
done
|
||||
|
||||
exit "${success}"
|
@ -1,26 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Check for trailing whitespace at end of lines.
|
||||
|
||||
# Exit on first failing command.
|
||||
set -e
|
||||
# Exit on unset variable.
|
||||
set -u
|
||||
|
||||
FOUND_TRAILING_WHITESPACE=0
|
||||
|
||||
while read -r line; do
|
||||
if grep \
|
||||
"\s$" \
|
||||
--line-number \
|
||||
--with-filename \
|
||||
--binary-files=without-match \
|
||||
--exclude="*.svg" \
|
||||
--exclude="*.eps" \
|
||||
"${line}"; then
|
||||
echo "ERROR: Found trailing whitespace" >&2;
|
||||
FOUND_TRAILING_WHITESPACE=1
|
||||
fi
|
||||
done < <(git ls-files)
|
||||
|
||||
exit "${FOUND_TRAILING_WHITESPACE}"
|
20
.github/workflows/ci.yml
vendored
20
.github/workflows/ci.yml
vendored
@ -80,21 +80,19 @@ jobs:
|
||||
name: Codestyle
|
||||
run: |
|
||||
cd src/
|
||||
pycodestyle
|
||||
whitespace:
|
||||
pycodestyle --max-line-length=88 --ignore=E121,E123,E126,E226,E24,E704,W503,W504,E203
|
||||
codeformatting:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
-
|
||||
name: Ensure there are no trailing spaces
|
||||
run: |
|
||||
.github/workflow-scripts/check-trailing-whitespace
|
||||
-
|
||||
name: Ensure all text files end with a trailing newline
|
||||
run: |
|
||||
.github/workflow-scripts/check-trailing-whitespace
|
||||
name: Run black
|
||||
uses: psf/black@stable
|
||||
with:
|
||||
options: "--check --diff"
|
||||
version: "22.1.0"
|
||||
|
||||
tests:
|
||||
runs-on: ubuntu-20.04
|
||||
@ -145,7 +143,7 @@ jobs:
|
||||
coveralls --service=github
|
||||
|
||||
build-release:
|
||||
needs: [build-docker-image, documentation, tests, whitespace, codestyle]
|
||||
needs: [build-docker-image, documentation, tests, codeformatting, codestyle]
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
-
|
||||
@ -256,7 +254,7 @@ jobs:
|
||||
build-docker-image:
|
||||
if: github.event_name == 'push' && (startsWith(github.ref, 'refs/heads/feature-') || github.ref == 'refs/heads/dev' || startsWith(github.ref, 'refs/tags/ng-'))
|
||||
runs-on: ubuntu-latest
|
||||
needs: [tests, whitespace, codestyle]
|
||||
needs: [tests, codeformatting, codestyle]
|
||||
steps:
|
||||
-
|
||||
name: Prepare
|
||||
|
@ -11,7 +11,7 @@ If you want to implement something big:
|
||||
|
||||
## Python
|
||||
|
||||
Paperless supports python 3.8 and 3.9.
|
||||
Paperless supports python 3.8 and 3.9. We format Python code with [Black](https://github.com/psf/black).
|
||||
|
||||
## Branches
|
||||
|
||||
@ -23,7 +23,7 @@ Paperless supports python 3.8 and 3.9.
|
||||
|
||||
## Testing:
|
||||
|
||||
Please test your code! I know it's a hassle, but it makes sure that your code works now and will allow us to detect regressions easily.
|
||||
Please format and test your code! I know it's a hassle, but it makes sure that your code works now and will allow us to detect regressions easily.
|
||||
|
||||
To test your code, execute `pytest` in the src/ directory. This also generates a html coverage report, which you can use to see if you missed anything important during testing.
|
||||
|
||||
|
1
Pipfile
1
Pipfile
@ -65,3 +65,4 @@ pytest-xdist = "*"
|
||||
sphinx = "~=3.4.2"
|
||||
sphinx_rtd_theme = "*"
|
||||
tox = "*"
|
||||
black = "*"
|
||||
|
188
docs/conf.py
188
docs/conf.py
@ -6,29 +6,29 @@ exec(open("../src/paperless/version.py").read())
|
||||
|
||||
|
||||
extensions = [
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.intersphinx',
|
||||
'sphinx.ext.todo',
|
||||
'sphinx.ext.imgmath',
|
||||
'sphinx.ext.viewcode',
|
||||
'sphinx_rtd_theme',
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.intersphinx",
|
||||
"sphinx.ext.todo",
|
||||
"sphinx.ext.imgmath",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinx_rtd_theme",
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
# templates_path = ['_templates']
|
||||
|
||||
# The suffix of source filenames.
|
||||
source_suffix = '.rst'
|
||||
source_suffix = ".rst"
|
||||
|
||||
# The encoding of source files.
|
||||
#source_encoding = 'utf-8-sig'
|
||||
# source_encoding = 'utf-8-sig'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
master_doc = "index"
|
||||
|
||||
# General information about the project.
|
||||
project = u'Paperless-ngx'
|
||||
copyright = u'2015-2022, Daniel Quinn, Jonas Winkler, and the paperless-ngx team'
|
||||
project = "Paperless-ngx"
|
||||
copyright = "2015-2022, Daniel Quinn, Jonas Winkler, and the paperless-ngx team"
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
@ -47,180 +47,174 @@ release = ".".join([str(_) for _ in __version__[:3]])
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
#language = None
|
||||
# language = None
|
||||
|
||||
# There are two options for replacing |today|: either, you set today to some
|
||||
# non-false value, then it is used:
|
||||
#today = ''
|
||||
# today = ''
|
||||
# Else, today_fmt is used as the format for a strftime call.
|
||||
#today_fmt = '%B %d, %Y'
|
||||
# today_fmt = '%B %d, %Y'
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
exclude_patterns = ['_build']
|
||||
exclude_patterns = ["_build"]
|
||||
|
||||
# The reST default role (used for this markup: `text`) to use for all
|
||||
# documents.
|
||||
#default_role = None
|
||||
# default_role = None
|
||||
|
||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||
#add_function_parentheses = True
|
||||
# add_function_parentheses = True
|
||||
|
||||
# If true, the current module name will be prepended to all description
|
||||
# unit titles (such as .. function::).
|
||||
#add_module_names = True
|
||||
# add_module_names = True
|
||||
|
||||
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||
# output. They are ignored by default.
|
||||
#show_authors = False
|
||||
# show_authors = False
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'sphinx'
|
||||
pygments_style = "sphinx"
|
||||
|
||||
# A list of ignored prefixes for module index sorting.
|
||||
#modindex_common_prefix = []
|
||||
# modindex_common_prefix = []
|
||||
|
||||
# If true, keep warnings as "system message" paragraphs in the built documents.
|
||||
#keep_warnings = False
|
||||
# keep_warnings = False
|
||||
|
||||
|
||||
# -- Options for HTML output ----------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
#html_theme_options = {}
|
||||
# html_theme_options = {}
|
||||
|
||||
# Add any paths that contain custom themes here, relative to this directory.
|
||||
html_theme_path = []
|
||||
|
||||
# The name for this set of Sphinx documents. If None, it defaults to
|
||||
# "<project> v<release> documentation".
|
||||
#html_title = None
|
||||
# html_title = None
|
||||
|
||||
# A shorter title for the navigation bar. Default is the same as html_title.
|
||||
#html_short_title = None
|
||||
# html_short_title = None
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top
|
||||
# of the sidebar.
|
||||
#html_logo = None
|
||||
# html_logo = None
|
||||
|
||||
# The name of an image file (within the static path) to use as favicon of the
|
||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
||||
# pixels large.
|
||||
#html_favicon = None
|
||||
# html_favicon = None
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
html_static_path = ["_static"]
|
||||
|
||||
# Add any extra paths that contain custom files (such as robots.txt or
|
||||
# .htaccess) here, relative to this directory. These files are copied
|
||||
# directly to the root of the documentation.
|
||||
#html_extra_path = []
|
||||
# html_extra_path = []
|
||||
|
||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||
# using the given strftime format.
|
||||
#html_last_updated_fmt = '%b %d, %Y'
|
||||
# html_last_updated_fmt = '%b %d, %Y'
|
||||
|
||||
# If true, SmartyPants will be used to convert quotes and dashes to
|
||||
# typographically correct entities.
|
||||
#html_use_smartypants = True
|
||||
# html_use_smartypants = True
|
||||
|
||||
# Custom sidebar templates, maps document names to template names.
|
||||
#html_sidebars = {}
|
||||
# html_sidebars = {}
|
||||
|
||||
# Additional templates that should be rendered to pages, maps page names to
|
||||
# template names.
|
||||
#html_additional_pages = {}
|
||||
# html_additional_pages = {}
|
||||
|
||||
# If false, no module index is generated.
|
||||
#html_domain_indices = True
|
||||
# html_domain_indices = True
|
||||
|
||||
# If false, no index is generated.
|
||||
#html_use_index = True
|
||||
# html_use_index = True
|
||||
|
||||
# If true, the index is split into individual pages for each letter.
|
||||
#html_split_index = False
|
||||
# html_split_index = False
|
||||
|
||||
# If true, links to the reST sources are added to the pages.
|
||||
#html_show_sourcelink = True
|
||||
# html_show_sourcelink = True
|
||||
|
||||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
||||
#html_show_sphinx = True
|
||||
# html_show_sphinx = True
|
||||
|
||||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
||||
#html_show_copyright = True
|
||||
# html_show_copyright = True
|
||||
|
||||
# If true, an OpenSearch description file will be output, and all pages will
|
||||
# contain a <link> tag referring to it. The value of this option must be the
|
||||
# base URL from which the finished HTML is served.
|
||||
#html_use_opensearch = ''
|
||||
# html_use_opensearch = ''
|
||||
|
||||
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
||||
#html_file_suffix = None
|
||||
# html_file_suffix = None
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = 'paperless'
|
||||
htmlhelp_basename = "paperless"
|
||||
|
||||
# -- Options for LaTeX output ---------------------------------------------
|
||||
|
||||
latex_elements = {
|
||||
# The paper size ('letterpaper' or 'a4paper').
|
||||
#'papersize': 'letterpaper',
|
||||
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
#'pointsize': '10pt',
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
#'preamble': '',
|
||||
# The paper size ('letterpaper' or 'a4paper').
|
||||
#'papersize': 'letterpaper',
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
#'pointsize': '10pt',
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
#'preamble': '',
|
||||
}
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title,
|
||||
# author, documentclass [howto, manual, or own class]).
|
||||
latex_documents = [
|
||||
('index', 'paperless.tex', u'Paperless Documentation',
|
||||
u'Daniel Quinn', 'manual'),
|
||||
("index", "paperless.tex", "Paperless Documentation", "Daniel Quinn", "manual"),
|
||||
]
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top of
|
||||
# the title page.
|
||||
#latex_logo = None
|
||||
# latex_logo = None
|
||||
|
||||
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||
# not chapters.
|
||||
#latex_use_parts = False
|
||||
# latex_use_parts = False
|
||||
|
||||
# If true, show page references after internal links.
|
||||
#latex_show_pagerefs = False
|
||||
# latex_show_pagerefs = False
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#latex_show_urls = False
|
||||
# latex_show_urls = False
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#latex_appendices = []
|
||||
# latex_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#latex_domain_indices = True
|
||||
# latex_domain_indices = True
|
||||
|
||||
|
||||
# -- Options for manual page output ---------------------------------------
|
||||
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [
|
||||
('index', 'paperless', u'Paperless Documentation',
|
||||
[u'Daniel Quinn'], 1)
|
||||
]
|
||||
man_pages = [("index", "paperless", "Paperless Documentation", ["Daniel Quinn"], 1)]
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#man_show_urls = False
|
||||
# man_show_urls = False
|
||||
|
||||
|
||||
# -- Options for Texinfo output -------------------------------------------
|
||||
@ -229,93 +223,99 @@ man_pages = [
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
('index', 'Paperless', u'Paperless Documentation',
|
||||
u'Daniel Quinn', 'paperless', 'Scan, index, and archive all of your paper documents.',
|
||||
'Miscellaneous'),
|
||||
(
|
||||
"index",
|
||||
"Paperless",
|
||||
"Paperless Documentation",
|
||||
"Daniel Quinn",
|
||||
"paperless",
|
||||
"Scan, index, and archive all of your paper documents.",
|
||||
"Miscellaneous",
|
||||
),
|
||||
]
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#texinfo_appendices = []
|
||||
# texinfo_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#texinfo_domain_indices = True
|
||||
# texinfo_domain_indices = True
|
||||
|
||||
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
||||
#texinfo_show_urls = 'footnote'
|
||||
# texinfo_show_urls = 'footnote'
|
||||
|
||||
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
||||
#texinfo_no_detailmenu = False
|
||||
# texinfo_no_detailmenu = False
|
||||
|
||||
|
||||
# -- Options for Epub output ----------------------------------------------
|
||||
|
||||
# Bibliographic Dublin Core info.
|
||||
epub_title = u'Paperless'
|
||||
epub_author = u'Daniel Quinn'
|
||||
epub_publisher = u'Daniel Quinn'
|
||||
epub_copyright = u'2015, Daniel Quinn'
|
||||
epub_title = "Paperless"
|
||||
epub_author = "Daniel Quinn"
|
||||
epub_publisher = "Daniel Quinn"
|
||||
epub_copyright = "2015, Daniel Quinn"
|
||||
|
||||
# The basename for the epub file. It defaults to the project name.
|
||||
#epub_basename = u'Paperless'
|
||||
# epub_basename = u'Paperless'
|
||||
|
||||
# The HTML theme for the epub output. Since the default themes are not optimized
|
||||
# for small screen space, using the same theme for HTML and epub output is
|
||||
# usually not wise. This defaults to 'epub', a theme designed to save visual
|
||||
# space.
|
||||
#epub_theme = 'epub'
|
||||
# epub_theme = 'epub'
|
||||
|
||||
# The language of the text. It defaults to the language option
|
||||
# or en if the language is not set.
|
||||
#epub_language = ''
|
||||
# epub_language = ''
|
||||
|
||||
# The scheme of the identifier. Typical schemes are ISBN or URL.
|
||||
#epub_scheme = ''
|
||||
# epub_scheme = ''
|
||||
|
||||
# The unique identifier of the text. This can be a ISBN number
|
||||
# or the project homepage.
|
||||
#epub_identifier = ''
|
||||
# epub_identifier = ''
|
||||
|
||||
# A unique identification for the text.
|
||||
#epub_uid = ''
|
||||
# epub_uid = ''
|
||||
|
||||
# A tuple containing the cover image and cover page html template filenames.
|
||||
#epub_cover = ()
|
||||
# epub_cover = ()
|
||||
|
||||
# A sequence of (type, uri, title) tuples for the guide element of content.opf.
|
||||
#epub_guide = ()
|
||||
# epub_guide = ()
|
||||
|
||||
# HTML files that should be inserted before the pages created by sphinx.
|
||||
# The format is a list of tuples containing the path and title.
|
||||
#epub_pre_files = []
|
||||
# epub_pre_files = []
|
||||
|
||||
# HTML files shat should be inserted after the pages created by sphinx.
|
||||
# The format is a list of tuples containing the path and title.
|
||||
#epub_post_files = []
|
||||
# epub_post_files = []
|
||||
|
||||
# A list of files that should not be packed into the epub file.
|
||||
epub_exclude_files = ['search.html']
|
||||
epub_exclude_files = ["search.html"]
|
||||
|
||||
# The depth of the table of contents in toc.ncx.
|
||||
#epub_tocdepth = 3
|
||||
# epub_tocdepth = 3
|
||||
|
||||
# Allow duplicate toc entries.
|
||||
#epub_tocdup = True
|
||||
# epub_tocdup = True
|
||||
|
||||
# Choose between 'default' and 'includehidden'.
|
||||
#epub_tocscope = 'default'
|
||||
# epub_tocscope = 'default'
|
||||
|
||||
# Fix unsupported image types using the PIL.
|
||||
#epub_fix_images = False
|
||||
# epub_fix_images = False
|
||||
|
||||
# Scale large images.
|
||||
#epub_max_image_width = 0
|
||||
# epub_max_image_width = 0
|
||||
|
||||
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
||||
#epub_show_urls = 'inline'
|
||||
# epub_show_urls = 'inline'
|
||||
|
||||
# If false, no index is generated.
|
||||
#epub_use_index = True
|
||||
# epub_use_index = True
|
||||
|
||||
|
||||
# Example configuration for intersphinx: refer to the Python standard library.
|
||||
intersphinx_mapping = {'http://docs.python.org/': None}
|
||||
intersphinx_mapping = {"http://docs.python.org/": None}
|
||||
|
@ -108,6 +108,7 @@ Testing and code style:
|
||||
* Run ``pytest`` in the src/ directory to execute all tests. This also generates a HTML coverage
|
||||
report. When runnings test, paperless.conf is loaded as well. However: the tests rely on the default
|
||||
configuration. This is not ideal. But for now, make sure no settings except for DEBUG are overridden when testing.
|
||||
* Run ``black`` to format your code.
|
||||
* Run ``pycodestyle`` to test your code for issues with the configured code style settings.
|
||||
|
||||
.. note::
|
||||
|
@ -2,35 +2,38 @@ import os
|
||||
|
||||
bind = f'0.0.0.0:{os.getenv("PAPERLESS_PORT", 8000)}'
|
||||
workers = int(os.getenv("PAPERLESS_WEBSERVER_WORKERS", 2))
|
||||
worker_class = 'paperless.workers.ConfigurableWorker'
|
||||
worker_class = "paperless.workers.ConfigurableWorker"
|
||||
timeout = 120
|
||||
|
||||
|
||||
def pre_fork(server, worker):
|
||||
pass
|
||||
|
||||
|
||||
def pre_exec(server):
|
||||
server.log.info("Forked child, re-executing.")
|
||||
|
||||
|
||||
def when_ready(server):
|
||||
server.log.info("Server is ready. Spawning workers")
|
||||
|
||||
|
||||
def worker_int(worker):
|
||||
worker.log.info("worker received INT or QUIT signal")
|
||||
|
||||
## get traceback info
|
||||
import threading, sys, traceback
|
||||
|
||||
id2name = dict([(th.ident, th.name) for th in threading.enumerate()])
|
||||
code = []
|
||||
for threadId, stack in sys._current_frames().items():
|
||||
code.append("\n# Thread: %s(%d)" % (id2name.get(threadId,""),
|
||||
threadId))
|
||||
code.append("\n# Thread: %s(%d)" % (id2name.get(threadId, ""), threadId))
|
||||
for filename, lineno, name, line in traceback.extract_stack(stack):
|
||||
code.append('File: "%s", line %d, in %s' % (filename,
|
||||
lineno, name))
|
||||
code.append('File: "%s", line %d, in %s' % (filename, lineno, name))
|
||||
if line:
|
||||
code.append(" %s" % (line.strip()))
|
||||
worker.log.debug("\n".join(code))
|
||||
|
||||
|
||||
def worker_abort(worker):
|
||||
worker.log.info("worker received SIGABRT signal")
|
||||
|
||||
|
@ -1,39 +1,32 @@
|
||||
from django.contrib import admin
|
||||
|
||||
from .models import Correspondent, Document, DocumentType, Tag, \
|
||||
SavedView, SavedViewFilterRule
|
||||
from .models import (
|
||||
Correspondent,
|
||||
Document,
|
||||
DocumentType,
|
||||
Tag,
|
||||
SavedView,
|
||||
SavedViewFilterRule,
|
||||
)
|
||||
|
||||
|
||||
class CorrespondentAdmin(admin.ModelAdmin):
|
||||
|
||||
list_display = (
|
||||
"name",
|
||||
"match",
|
||||
"matching_algorithm"
|
||||
)
|
||||
list_display = ("name", "match", "matching_algorithm")
|
||||
list_filter = ("matching_algorithm",)
|
||||
list_editable = ("match", "matching_algorithm")
|
||||
|
||||
|
||||
class TagAdmin(admin.ModelAdmin):
|
||||
|
||||
list_display = (
|
||||
"name",
|
||||
"color",
|
||||
"match",
|
||||
"matching_algorithm"
|
||||
)
|
||||
list_display = ("name", "color", "match", "matching_algorithm")
|
||||
list_filter = ("color", "matching_algorithm")
|
||||
list_editable = ("color", "match", "matching_algorithm")
|
||||
|
||||
|
||||
class DocumentTypeAdmin(admin.ModelAdmin):
|
||||
|
||||
list_display = (
|
||||
"name",
|
||||
"match",
|
||||
"matching_algorithm"
|
||||
)
|
||||
list_display = ("name", "match", "matching_algorithm")
|
||||
list_filter = ("matching_algorithm",)
|
||||
list_editable = ("match", "matching_algorithm")
|
||||
|
||||
@ -49,18 +42,12 @@ class DocumentAdmin(admin.ModelAdmin):
|
||||
"filename",
|
||||
"checksum",
|
||||
"archive_filename",
|
||||
"archive_checksum"
|
||||
"archive_checksum",
|
||||
)
|
||||
|
||||
list_display_links = ("title",)
|
||||
|
||||
list_display = (
|
||||
"id",
|
||||
"title",
|
||||
"mime_type",
|
||||
"filename",
|
||||
"archive_filename"
|
||||
)
|
||||
list_display = ("id", "title", "mime_type", "filename", "archive_filename")
|
||||
|
||||
list_filter = (
|
||||
("mime_type"),
|
||||
@ -79,6 +66,7 @@ class DocumentAdmin(admin.ModelAdmin):
|
||||
|
||||
def created_(self, obj):
|
||||
return obj.created.date().strftime("%Y-%m-%d")
|
||||
|
||||
created_.short_description = "Created"
|
||||
|
||||
def delete_queryset(self, request, queryset):
|
||||
@ -92,11 +80,13 @@ class DocumentAdmin(admin.ModelAdmin):
|
||||
|
||||
def delete_model(self, request, obj):
|
||||
from documents import index
|
||||
|
||||
index.remove_document_from_index(obj)
|
||||
super(DocumentAdmin, self).delete_model(request, obj)
|
||||
|
||||
def save_model(self, request, obj, form, change):
|
||||
from documents import index
|
||||
|
||||
index.add_or_update_document(obj)
|
||||
super(DocumentAdmin, self).save_model(request, obj, form, change)
|
||||
|
||||
@ -109,9 +99,7 @@ class SavedViewAdmin(admin.ModelAdmin):
|
||||
|
||||
list_display = ("name", "user")
|
||||
|
||||
inlines = [
|
||||
RuleInline
|
||||
]
|
||||
inlines = [RuleInline]
|
||||
|
||||
|
||||
admin.site.register(Correspondent, CorrespondentAdmin)
|
||||
|
@ -17,7 +17,7 @@ class DocumentsConfig(AppConfig):
|
||||
set_correspondent,
|
||||
set_document_type,
|
||||
set_tags,
|
||||
add_to_index
|
||||
add_to_index,
|
||||
)
|
||||
|
||||
document_consumption_finished.connect(add_inbox_tags)
|
||||
|
@ -4,14 +4,12 @@ from documents.models import Document
|
||||
|
||||
|
||||
class BulkArchiveStrategy:
|
||||
|
||||
def __init__(self, zipf: ZipFile):
|
||||
self.zipf = zipf
|
||||
|
||||
def make_unique_filename(self,
|
||||
doc: Document,
|
||||
archive: bool = False,
|
||||
folder: str = ""):
|
||||
def make_unique_filename(
|
||||
self, doc: Document, archive: bool = False, folder: str = ""
|
||||
):
|
||||
counter = 0
|
||||
while True:
|
||||
filename = folder + doc.get_public_filename(archive, counter)
|
||||
@ -25,36 +23,31 @@ class BulkArchiveStrategy:
|
||||
|
||||
|
||||
class OriginalsOnlyStrategy(BulkArchiveStrategy):
|
||||
|
||||
def add_document(self, doc: Document):
|
||||
self.zipf.write(doc.source_path, self.make_unique_filename(doc))
|
||||
|
||||
|
||||
class ArchiveOnlyStrategy(BulkArchiveStrategy):
|
||||
|
||||
def __init__(self, zipf):
|
||||
super(ArchiveOnlyStrategy, self).__init__(zipf)
|
||||
|
||||
def add_document(self, doc: Document):
|
||||
if doc.has_archive_version:
|
||||
self.zipf.write(doc.archive_path,
|
||||
self.make_unique_filename(doc, archive=True))
|
||||
self.zipf.write(
|
||||
doc.archive_path, self.make_unique_filename(doc, archive=True)
|
||||
)
|
||||
else:
|
||||
self.zipf.write(doc.source_path,
|
||||
self.make_unique_filename(doc))
|
||||
self.zipf.write(doc.source_path, self.make_unique_filename(doc))
|
||||
|
||||
|
||||
class OriginalAndArchiveStrategy(BulkArchiveStrategy):
|
||||
|
||||
def add_document(self, doc: Document):
|
||||
if doc.has_archive_version:
|
||||
self.zipf.write(
|
||||
doc.archive_path, self.make_unique_filename(
|
||||
doc, archive=True, folder="archive/"
|
||||
)
|
||||
doc.archive_path,
|
||||
self.make_unique_filename(doc, archive=True, folder="archive/"),
|
||||
)
|
||||
|
||||
self.zipf.write(
|
||||
doc.source_path,
|
||||
self.make_unique_filename(doc, folder="originals/")
|
||||
doc.source_path, self.make_unique_filename(doc, folder="originals/")
|
||||
)
|
||||
|
@ -10,13 +10,11 @@ def set_correspondent(doc_ids, correspondent):
|
||||
if correspondent:
|
||||
correspondent = Correspondent.objects.get(id=correspondent)
|
||||
|
||||
qs = Document.objects.filter(
|
||||
Q(id__in=doc_ids) & ~Q(correspondent=correspondent))
|
||||
qs = Document.objects.filter(Q(id__in=doc_ids) & ~Q(correspondent=correspondent))
|
||||
affected_docs = [doc.id for doc in qs]
|
||||
qs.update(correspondent=correspondent)
|
||||
|
||||
async_task(
|
||||
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
@ -25,13 +23,11 @@ def set_document_type(doc_ids, document_type):
|
||||
if document_type:
|
||||
document_type = DocumentType.objects.get(id=document_type)
|
||||
|
||||
qs = Document.objects.filter(
|
||||
Q(id__in=doc_ids) & ~Q(document_type=document_type))
|
||||
qs = Document.objects.filter(Q(id__in=doc_ids) & ~Q(document_type=document_type))
|
||||
affected_docs = [doc.id for doc in qs]
|
||||
qs.update(document_type=document_type)
|
||||
|
||||
async_task(
|
||||
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
@ -43,13 +39,11 @@ def add_tag(doc_ids, tag):
|
||||
|
||||
DocumentTagRelationship = Document.tags.through
|
||||
|
||||
DocumentTagRelationship.objects.bulk_create([
|
||||
DocumentTagRelationship(
|
||||
document_id=doc, tag_id=tag) for doc in affected_docs
|
||||
])
|
||||
DocumentTagRelationship.objects.bulk_create(
|
||||
[DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs]
|
||||
)
|
||||
|
||||
async_task(
|
||||
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
@ -62,12 +56,10 @@ def remove_tag(doc_ids, tag):
|
||||
DocumentTagRelationship = Document.tags.through
|
||||
|
||||
DocumentTagRelationship.objects.filter(
|
||||
Q(document_id__in=affected_docs) &
|
||||
Q(tag_id=tag)
|
||||
Q(document_id__in=affected_docs) & Q(tag_id=tag)
|
||||
).delete()
|
||||
|
||||
async_task(
|
||||
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
@ -83,13 +75,15 @@ def modify_tags(doc_ids, add_tags, remove_tags):
|
||||
tag_id__in=remove_tags,
|
||||
).delete()
|
||||
|
||||
DocumentTagRelationship.objects.bulk_create([DocumentTagRelationship(
|
||||
document_id=doc, tag_id=tag) for (doc, tag) in itertools.product(
|
||||
affected_docs, add_tags)
|
||||
], ignore_conflicts=True)
|
||||
DocumentTagRelationship.objects.bulk_create(
|
||||
[
|
||||
DocumentTagRelationship(document_id=doc, tag_id=tag)
|
||||
for (doc, tag) in itertools.product(affected_docs, add_tags)
|
||||
],
|
||||
ignore_conflicts=True,
|
||||
)
|
||||
|
||||
async_task(
|
||||
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||
|
||||
return "OK"
|
||||
|
||||
|
@ -16,28 +16,36 @@ def changed_password_check(app_configs, **kwargs):
|
||||
|
||||
try:
|
||||
encrypted_doc = Document.objects.filter(
|
||||
storage_type=Document.STORAGE_TYPE_GPG).first()
|
||||
storage_type=Document.STORAGE_TYPE_GPG
|
||||
).first()
|
||||
except (OperationalError, ProgrammingError, FieldError):
|
||||
return [] # No documents table yet
|
||||
|
||||
if encrypted_doc:
|
||||
|
||||
if not settings.PASSPHRASE:
|
||||
return [Error(
|
||||
"The database contains encrypted documents but no password "
|
||||
"is set."
|
||||
)]
|
||||
return [
|
||||
Error(
|
||||
"The database contains encrypted documents but no password "
|
||||
"is set."
|
||||
)
|
||||
]
|
||||
|
||||
if not GnuPG.decrypted(encrypted_doc.source_file):
|
||||
return [Error(textwrap.dedent(
|
||||
"""
|
||||
return [
|
||||
Error(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
The current password doesn't match the password of the
|
||||
existing documents.
|
||||
|
||||
If you intend to change your password, you must first export
|
||||
all of the old documents, start fresh with the new password
|
||||
and then re-import them."
|
||||
"""))]
|
||||
"""
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
return []
|
||||
|
||||
@ -50,7 +58,11 @@ def parser_check(app_configs, **kwargs):
|
||||
parsers.append(response[1])
|
||||
|
||||
if len(parsers) == 0:
|
||||
return [Error("No parsers found. This is a bug. The consumer won't be "
|
||||
"able to consume any documents without parsers.")]
|
||||
return [
|
||||
Error(
|
||||
"No parsers found. This is a bug. The consumer won't be "
|
||||
"able to consume any documents without parsers."
|
||||
)
|
||||
]
|
||||
else:
|
||||
return []
|
||||
|
@ -39,8 +39,7 @@ def load_classifier():
|
||||
try:
|
||||
classifier.load()
|
||||
|
||||
except (ClassifierModelCorruptError,
|
||||
IncompatibleClassifierVersionError):
|
||||
except (ClassifierModelCorruptError, IncompatibleClassifierVersionError):
|
||||
# there's something wrong with the model file.
|
||||
logger.exception(
|
||||
f"Unrecoverable error while loading document "
|
||||
@ -49,14 +48,10 @@ def load_classifier():
|
||||
os.unlink(settings.MODEL_FILE)
|
||||
classifier = None
|
||||
except OSError:
|
||||
logger.exception(
|
||||
f"IO error while loading document classification model"
|
||||
)
|
||||
logger.exception(f"IO error while loading document classification model")
|
||||
classifier = None
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Unknown error while loading document classification model"
|
||||
)
|
||||
logger.exception(f"Unknown error while loading document classification model")
|
||||
classifier = None
|
||||
|
||||
return classifier
|
||||
@ -83,7 +78,8 @@ class DocumentClassifier(object):
|
||||
|
||||
if schema_version != self.FORMAT_VERSION:
|
||||
raise IncompatibleClassifierVersionError(
|
||||
"Cannor load classifier, incompatible versions.")
|
||||
"Cannor load classifier, incompatible versions."
|
||||
)
|
||||
else:
|
||||
try:
|
||||
self.data_hash = pickle.load(f)
|
||||
@ -125,30 +121,37 @@ class DocumentClassifier(object):
|
||||
# Step 1: Extract and preprocess training data from the database.
|
||||
logger.debug("Gathering data from database...")
|
||||
m = hashlib.sha1()
|
||||
for doc in Document.objects.order_by('pk').exclude(tags__is_inbox_tag=True): # NOQA: E501
|
||||
for doc in Document.objects.order_by("pk").exclude(
|
||||
tags__is_inbox_tag=True
|
||||
): # NOQA: E501
|
||||
preprocessed_content = preprocess_content(doc.content)
|
||||
m.update(preprocessed_content.encode('utf-8'))
|
||||
m.update(preprocessed_content.encode("utf-8"))
|
||||
data.append(preprocessed_content)
|
||||
|
||||
y = -1
|
||||
dt = doc.document_type
|
||||
if dt and dt.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
y = dt.pk
|
||||
m.update(y.to_bytes(4, 'little', signed=True))
|
||||
m.update(y.to_bytes(4, "little", signed=True))
|
||||
labels_document_type.append(y)
|
||||
|
||||
y = -1
|
||||
cor = doc.correspondent
|
||||
if cor and cor.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
y = cor.pk
|
||||
m.update(y.to_bytes(4, 'little', signed=True))
|
||||
m.update(y.to_bytes(4, "little", signed=True))
|
||||
labels_correspondent.append(y)
|
||||
|
||||
tags = sorted([tag.pk for tag in doc.tags.filter(
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO
|
||||
)])
|
||||
tags = sorted(
|
||||
[
|
||||
tag.pk
|
||||
for tag in doc.tags.filter(
|
||||
matching_algorithm=MatchingModel.MATCH_AUTO
|
||||
)
|
||||
]
|
||||
)
|
||||
for tag in tags:
|
||||
m.update(tag.to_bytes(4, 'little', signed=True))
|
||||
m.update(tag.to_bytes(4, "little", signed=True))
|
||||
labels_tags.append(tags)
|
||||
|
||||
if not data:
|
||||
@ -174,10 +177,7 @@ class DocumentClassifier(object):
|
||||
logger.debug(
|
||||
"{} documents, {} tag(s), {} correspondent(s), "
|
||||
"{} document type(s).".format(
|
||||
len(data),
|
||||
num_tags,
|
||||
num_correspondents,
|
||||
num_document_types
|
||||
len(data), num_tags, num_correspondents, num_document_types
|
||||
)
|
||||
)
|
||||
|
||||
@ -188,9 +188,7 @@ class DocumentClassifier(object):
|
||||
# Step 2: vectorize data
|
||||
logger.debug("Vectorizing data...")
|
||||
self.data_vectorizer = CountVectorizer(
|
||||
analyzer="word",
|
||||
ngram_range=(1, 2),
|
||||
min_df=0.01
|
||||
analyzer="word", ngram_range=(1, 2), min_df=0.01
|
||||
)
|
||||
data_vectorized = self.data_vectorizer.fit_transform(data)
|
||||
|
||||
@ -201,54 +199,41 @@ class DocumentClassifier(object):
|
||||
if num_tags == 1:
|
||||
# Special case where only one tag has auto:
|
||||
# Fallback to binary classification.
|
||||
labels_tags = [label[0] if len(label) == 1 else -1
|
||||
for label in labels_tags]
|
||||
labels_tags = [
|
||||
label[0] if len(label) == 1 else -1 for label in labels_tags
|
||||
]
|
||||
self.tags_binarizer = LabelBinarizer()
|
||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(
|
||||
labels_tags).ravel()
|
||||
labels_tags
|
||||
).ravel()
|
||||
else:
|
||||
self.tags_binarizer = MultiLabelBinarizer()
|
||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(
|
||||
labels_tags)
|
||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(labels_tags)
|
||||
|
||||
self.tags_classifier = MLPClassifier(tol=0.01)
|
||||
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
|
||||
else:
|
||||
self.tags_classifier = None
|
||||
logger.debug(
|
||||
"There are no tags. Not training tags classifier."
|
||||
)
|
||||
logger.debug("There are no tags. Not training tags classifier.")
|
||||
|
||||
if num_correspondents > 0:
|
||||
logger.debug(
|
||||
"Training correspondent classifier..."
|
||||
)
|
||||
logger.debug("Training correspondent classifier...")
|
||||
self.correspondent_classifier = MLPClassifier(tol=0.01)
|
||||
self.correspondent_classifier.fit(
|
||||
data_vectorized,
|
||||
labels_correspondent
|
||||
)
|
||||
self.correspondent_classifier.fit(data_vectorized, labels_correspondent)
|
||||
else:
|
||||
self.correspondent_classifier = None
|
||||
logger.debug(
|
||||
"There are no correspondents. Not training correspondent "
|
||||
"classifier."
|
||||
"There are no correspondents. Not training correspondent " "classifier."
|
||||
)
|
||||
|
||||
if num_document_types > 0:
|
||||
logger.debug(
|
||||
"Training document type classifier..."
|
||||
)
|
||||
logger.debug("Training document type classifier...")
|
||||
self.document_type_classifier = MLPClassifier(tol=0.01)
|
||||
self.document_type_classifier.fit(
|
||||
data_vectorized,
|
||||
labels_document_type
|
||||
)
|
||||
self.document_type_classifier.fit(data_vectorized, labels_document_type)
|
||||
else:
|
||||
self.document_type_classifier = None
|
||||
logger.debug(
|
||||
"There are no document types. Not training document type "
|
||||
"classifier."
|
||||
"There are no document types. Not training document type " "classifier."
|
||||
)
|
||||
|
||||
self.data_hash = new_data_hash
|
||||
@ -284,10 +269,10 @@ class DocumentClassifier(object):
|
||||
X = self.data_vectorizer.transform([preprocess_content(content)])
|
||||
y = self.tags_classifier.predict(X)
|
||||
tags_ids = self.tags_binarizer.inverse_transform(y)[0]
|
||||
if type_of_target(y).startswith('multilabel'):
|
||||
if type_of_target(y).startswith("multilabel"):
|
||||
# the usual case when there are multiple tags.
|
||||
return list(tags_ids)
|
||||
elif type_of_target(y) == 'binary' and tags_ids != -1:
|
||||
elif type_of_target(y) == "binary" and tags_ids != -1:
|
||||
# This is for when we have binary classification with only one
|
||||
# tag and the result is to assign this tag.
|
||||
return [tags_ids]
|
||||
|
@ -15,15 +15,11 @@ from filelock import FileLock
|
||||
from rest_framework.reverse import reverse
|
||||
|
||||
from .classifier import load_classifier
|
||||
from .file_handling import create_source_path_directory, \
|
||||
generate_unique_filename
|
||||
from .file_handling import create_source_path_directory, generate_unique_filename
|
||||
from .loggers import LoggingMixin
|
||||
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
|
||||
from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
|
||||
from .signals import (
|
||||
document_consumption_finished,
|
||||
document_consumption_started
|
||||
)
|
||||
from .signals import document_consumption_finished, document_consumption_started
|
||||
|
||||
|
||||
class ConsumerError(Exception):
|
||||
@ -49,23 +45,26 @@ class Consumer(LoggingMixin):
|
||||
|
||||
logging_name = "paperless.consumer"
|
||||
|
||||
def _send_progress(self, current_progress, max_progress, status,
|
||||
message=None, document_id=None):
|
||||
def _send_progress(
|
||||
self, current_progress, max_progress, status, message=None, document_id=None
|
||||
):
|
||||
payload = {
|
||||
'filename': os.path.basename(self.filename) if self.filename else None, # NOQA: E501
|
||||
'task_id': self.task_id,
|
||||
'current_progress': current_progress,
|
||||
'max_progress': max_progress,
|
||||
'status': status,
|
||||
'message': message,
|
||||
'document_id': document_id
|
||||
"filename": os.path.basename(self.filename)
|
||||
if self.filename
|
||||
else None, # NOQA: E501
|
||||
"task_id": self.task_id,
|
||||
"current_progress": current_progress,
|
||||
"max_progress": max_progress,
|
||||
"status": status,
|
||||
"message": message,
|
||||
"document_id": document_id,
|
||||
}
|
||||
async_to_sync(self.channel_layer.group_send)("status_updates",
|
||||
{'type': 'status_update',
|
||||
'data': payload})
|
||||
async_to_sync(self.channel_layer.group_send)(
|
||||
"status_updates", {"type": "status_update", "data": payload}
|
||||
)
|
||||
|
||||
def _fail(self, message, log_message=None, exc_info=None):
|
||||
self._send_progress(100, 100, 'FAILED', message)
|
||||
self._send_progress(100, 100, "FAILED", message)
|
||||
self.log("error", log_message or message, exc_info=exc_info)
|
||||
raise ConsumerError(f"{self.filename}: {log_message or message}")
|
||||
|
||||
@ -84,19 +83,20 @@ class Consumer(LoggingMixin):
|
||||
def pre_check_file_exists(self):
|
||||
if not os.path.isfile(self.path):
|
||||
self._fail(
|
||||
MESSAGE_FILE_NOT_FOUND,
|
||||
f"Cannot consume {self.path}: File not found."
|
||||
MESSAGE_FILE_NOT_FOUND, f"Cannot consume {self.path}: File not found."
|
||||
)
|
||||
|
||||
def pre_check_duplicate(self):
|
||||
with open(self.path, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501
|
||||
if Document.objects.filter(
|
||||
Q(checksum=checksum) | Q(archive_checksum=checksum)
|
||||
).exists(): # NOQA: E501
|
||||
if settings.CONSUMER_DELETE_DUPLICATES:
|
||||
os.unlink(self.path)
|
||||
self._fail(
|
||||
MESSAGE_DOCUMENT_ALREADY_EXISTS,
|
||||
f"Not consuming {self.filename}: It is a duplicate."
|
||||
f"Not consuming {self.filename}: It is a duplicate.",
|
||||
)
|
||||
|
||||
def pre_check_directories(self):
|
||||
@ -113,10 +113,10 @@ class Consumer(LoggingMixin):
|
||||
self._fail(
|
||||
MESSAGE_PRE_CONSUME_SCRIPT_NOT_FOUND,
|
||||
f"Configured pre-consume script "
|
||||
f"{settings.PRE_CONSUME_SCRIPT} does not exist.")
|
||||
f"{settings.PRE_CONSUME_SCRIPT} does not exist.",
|
||||
)
|
||||
|
||||
self.log("info",
|
||||
f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||
|
||||
try:
|
||||
Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
|
||||
@ -124,7 +124,7 @@ class Consumer(LoggingMixin):
|
||||
self._fail(
|
||||
MESSAGE_PRE_CONSUME_SCRIPT_ERROR,
|
||||
f"Error while executing pre-consume script: {e}",
|
||||
exc_info=True
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def run_post_consume_script(self, document):
|
||||
@ -135,42 +135,44 @@ class Consumer(LoggingMixin):
|
||||
self._fail(
|
||||
MESSAGE_POST_CONSUME_SCRIPT_NOT_FOUND,
|
||||
f"Configured post-consume script "
|
||||
f"{settings.POST_CONSUME_SCRIPT} does not exist."
|
||||
f"{settings.POST_CONSUME_SCRIPT} does not exist.",
|
||||
)
|
||||
|
||||
self.log(
|
||||
"info",
|
||||
f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}"
|
||||
"info", f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}"
|
||||
)
|
||||
|
||||
try:
|
||||
Popen((
|
||||
settings.POST_CONSUME_SCRIPT,
|
||||
str(document.pk),
|
||||
document.get_public_filename(),
|
||||
os.path.normpath(document.source_path),
|
||||
os.path.normpath(document.thumbnail_path),
|
||||
reverse("document-download", kwargs={"pk": document.pk}),
|
||||
reverse("document-thumb", kwargs={"pk": document.pk}),
|
||||
str(document.correspondent),
|
||||
str(",".join(document.tags.all().values_list(
|
||||
"name", flat=True)))
|
||||
)).wait()
|
||||
Popen(
|
||||
(
|
||||
settings.POST_CONSUME_SCRIPT,
|
||||
str(document.pk),
|
||||
document.get_public_filename(),
|
||||
os.path.normpath(document.source_path),
|
||||
os.path.normpath(document.thumbnail_path),
|
||||
reverse("document-download", kwargs={"pk": document.pk}),
|
||||
reverse("document-thumb", kwargs={"pk": document.pk}),
|
||||
str(document.correspondent),
|
||||
str(",".join(document.tags.all().values_list("name", flat=True))),
|
||||
)
|
||||
).wait()
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
MESSAGE_POST_CONSUME_SCRIPT_ERROR,
|
||||
f"Error while executing post-consume script: {e}",
|
||||
exc_info=True
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def try_consume_file(self,
|
||||
path,
|
||||
override_filename=None,
|
||||
override_title=None,
|
||||
override_correspondent_id=None,
|
||||
override_document_type_id=None,
|
||||
override_tag_ids=None,
|
||||
task_id=None):
|
||||
def try_consume_file(
|
||||
self,
|
||||
path,
|
||||
override_filename=None,
|
||||
override_title=None,
|
||||
override_correspondent_id=None,
|
||||
override_document_type_id=None,
|
||||
override_tag_ids=None,
|
||||
task_id=None,
|
||||
):
|
||||
"""
|
||||
Return the document object if it was successfully created.
|
||||
"""
|
||||
@ -183,7 +185,7 @@ class Consumer(LoggingMixin):
|
||||
self.override_tag_ids = override_tag_ids
|
||||
self.task_id = task_id or str(uuid.uuid4())
|
||||
|
||||
self._send_progress(0, 100, 'STARTING', MESSAGE_NEW_FILE)
|
||||
self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
|
||||
|
||||
# this is for grouping logging entries for this particular file
|
||||
# together.
|
||||
@ -206,17 +208,12 @@ class Consumer(LoggingMixin):
|
||||
|
||||
parser_class = get_parser_class_for_mime_type(mime_type)
|
||||
if not parser_class:
|
||||
self._fail(
|
||||
MESSAGE_UNSUPPORTED_TYPE,
|
||||
f"Unsupported mime type {mime_type}"
|
||||
)
|
||||
self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}")
|
||||
|
||||
# Notify all listeners that we're going to do some work.
|
||||
|
||||
document_consumption_started.send(
|
||||
sender=self.__class__,
|
||||
filename=self.path,
|
||||
logging_group=self.logging_group
|
||||
sender=self.__class__, filename=self.path, logging_group=self.logging_group
|
||||
)
|
||||
|
||||
self.run_pre_consume_script()
|
||||
@ -243,21 +240,20 @@ class Consumer(LoggingMixin):
|
||||
archive_path = None
|
||||
|
||||
try:
|
||||
self._send_progress(20, 100, 'WORKING', MESSAGE_PARSING_DOCUMENT)
|
||||
self._send_progress(20, 100, "WORKING", MESSAGE_PARSING_DOCUMENT)
|
||||
self.log("debug", "Parsing {}...".format(self.filename))
|
||||
document_parser.parse(self.path, mime_type, self.filename)
|
||||
|
||||
self.log("debug", f"Generating thumbnail for {self.filename}...")
|
||||
self._send_progress(70, 100, 'WORKING',
|
||||
MESSAGE_GENERATING_THUMBNAIL)
|
||||
self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
|
||||
thumbnail = document_parser.get_optimised_thumbnail(
|
||||
self.path, mime_type, self.filename)
|
||||
self.path, mime_type, self.filename
|
||||
)
|
||||
|
||||
text = document_parser.get_text()
|
||||
date = document_parser.get_date()
|
||||
if not date:
|
||||
self._send_progress(90, 100, 'WORKING',
|
||||
MESSAGE_PARSE_DATE)
|
||||
self._send_progress(90, 100, "WORKING", MESSAGE_PARSE_DATE)
|
||||
date = parse_date(self.filename, text)
|
||||
archive_path = document_parser.get_archive_path()
|
||||
|
||||
@ -266,7 +262,7 @@ class Consumer(LoggingMixin):
|
||||
self._fail(
|
||||
str(e),
|
||||
f"Error while consuming document {self.filename}: {e}",
|
||||
exc_info=True
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Prepare the document classifier.
|
||||
@ -277,18 +273,14 @@ class Consumer(LoggingMixin):
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
self._send_progress(95, 100, 'WORKING', MESSAGE_SAVE_DOCUMENT)
|
||||
self._send_progress(95, 100, "WORKING", MESSAGE_SAVE_DOCUMENT)
|
||||
# now that everything is done, we can start to store the document
|
||||
# in the system. This will be a transaction and reasonably fast.
|
||||
try:
|
||||
with transaction.atomic():
|
||||
|
||||
# store the document.
|
||||
document = self._store(
|
||||
text=text,
|
||||
date=date,
|
||||
mime_type=mime_type
|
||||
)
|
||||
document = self._store(text=text, date=date, mime_type=mime_type)
|
||||
|
||||
# If we get here, it was successful. Proceed with post-consume
|
||||
# hooks. If they fail, nothing will get changed.
|
||||
@ -297,7 +289,7 @@ class Consumer(LoggingMixin):
|
||||
sender=self.__class__,
|
||||
document=document,
|
||||
logging_group=self.logging_group,
|
||||
classifier=classifier
|
||||
classifier=classifier,
|
||||
)
|
||||
|
||||
# After everything is in the database, copy the files into
|
||||
@ -306,24 +298,25 @@ class Consumer(LoggingMixin):
|
||||
document.filename = generate_unique_filename(document)
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
self._write(document.storage_type,
|
||||
self.path, document.source_path)
|
||||
self._write(document.storage_type, self.path, document.source_path)
|
||||
|
||||
self._write(document.storage_type,
|
||||
thumbnail, document.thumbnail_path)
|
||||
self._write(
|
||||
document.storage_type, thumbnail, document.thumbnail_path
|
||||
)
|
||||
|
||||
if archive_path and os.path.isfile(archive_path):
|
||||
document.archive_filename = generate_unique_filename(
|
||||
document,
|
||||
archive_filename=True
|
||||
document, archive_filename=True
|
||||
)
|
||||
create_source_path_directory(document.archive_path)
|
||||
self._write(document.storage_type,
|
||||
archive_path, document.archive_path)
|
||||
self._write(
|
||||
document.storage_type, archive_path, document.archive_path
|
||||
)
|
||||
|
||||
with open(archive_path, 'rb') as f:
|
||||
with open(archive_path, "rb") as f:
|
||||
document.archive_checksum = hashlib.md5(
|
||||
f.read()).hexdigest()
|
||||
f.read()
|
||||
).hexdigest()
|
||||
|
||||
# Don't save with the lock active. Saving will cause the file
|
||||
# renaming logic to aquire the lock as well.
|
||||
@ -335,8 +328,8 @@ class Consumer(LoggingMixin):
|
||||
|
||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||
shadow_file = os.path.join(
|
||||
os.path.dirname(self.path),
|
||||
"._" + os.path.basename(self.path))
|
||||
os.path.dirname(self.path), "._" + os.path.basename(self.path)
|
||||
)
|
||||
|
||||
if os.path.isfile(shadow_file):
|
||||
self.log("debug", "Deleting file {}".format(shadow_file))
|
||||
@ -345,21 +338,17 @@ class Consumer(LoggingMixin):
|
||||
except Exception as e:
|
||||
self._fail(
|
||||
str(e),
|
||||
f"The following error occured while consuming "
|
||||
f"{self.filename}: {e}",
|
||||
exc_info=True
|
||||
f"The following error occured while consuming " f"{self.filename}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
finally:
|
||||
document_parser.cleanup()
|
||||
|
||||
self.run_post_consume_script(document)
|
||||
|
||||
self.log(
|
||||
"info",
|
||||
"Document {} consumption finished".format(document)
|
||||
)
|
||||
self.log("info", "Document {} consumption finished".format(document))
|
||||
|
||||
self._send_progress(100, 100, 'SUCCESS', MESSAGE_FINISHED, document.id)
|
||||
self._send_progress(100, 100, "SUCCESS", MESSAGE_FINISHED, document.id)
|
||||
|
||||
return document
|
||||
|
||||
@ -373,8 +362,11 @@ class Consumer(LoggingMixin):
|
||||
|
||||
self.log("debug", "Saving record to database")
|
||||
|
||||
created = file_info.created or date or timezone.make_aware(
|
||||
datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
created = (
|
||||
file_info.created
|
||||
or date
|
||||
or timezone.make_aware(datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
)
|
||||
|
||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
@ -386,7 +378,7 @@ class Consumer(LoggingMixin):
|
||||
checksum=hashlib.md5(f.read()).hexdigest(),
|
||||
created=created,
|
||||
modified=created,
|
||||
storage_type=storage_type
|
||||
storage_type=storage_type,
|
||||
)
|
||||
|
||||
self.apply_overrides(document)
|
||||
@ -398,11 +390,13 @@ class Consumer(LoggingMixin):
|
||||
def apply_overrides(self, document):
|
||||
if self.override_correspondent_id:
|
||||
document.correspondent = Correspondent.objects.get(
|
||||
pk=self.override_correspondent_id)
|
||||
pk=self.override_correspondent_id
|
||||
)
|
||||
|
||||
if self.override_document_type_id:
|
||||
document.document_type = DocumentType.objects.get(
|
||||
pk=self.override_document_type_id)
|
||||
pk=self.override_document_type_id
|
||||
)
|
||||
|
||||
if self.override_tag_ids:
|
||||
for tag_id in self.override_tag_ids:
|
||||
|
@ -12,7 +12,6 @@ logger = logging.getLogger("paperless.filehandling")
|
||||
|
||||
|
||||
class defaultdictNoStr(defaultdict):
|
||||
|
||||
def __str__(self):
|
||||
raise ValueError("Don't use {tags} directly.")
|
||||
|
||||
@ -63,24 +62,23 @@ def many_to_dictionary(field):
|
||||
mydictionary[index] = slugify(t.name)
|
||||
|
||||
# Find delimiter
|
||||
delimiter = t.name.find('_')
|
||||
delimiter = t.name.find("_")
|
||||
|
||||
if delimiter == -1:
|
||||
delimiter = t.name.find('-')
|
||||
delimiter = t.name.find("-")
|
||||
|
||||
if delimiter == -1:
|
||||
continue
|
||||
|
||||
key = t.name[:delimiter]
|
||||
value = t.name[delimiter + 1:]
|
||||
value = t.name[delimiter + 1 :]
|
||||
|
||||
mydictionary[slugify(key)] = slugify(value)
|
||||
|
||||
return mydictionary
|
||||
|
||||
|
||||
def generate_unique_filename(doc,
|
||||
archive_filename=False):
|
||||
def generate_unique_filename(doc, archive_filename=False):
|
||||
"""
|
||||
Generates a unique filename for doc in settings.ORIGINALS_DIR.
|
||||
|
||||
@ -104,14 +102,17 @@ def generate_unique_filename(doc,
|
||||
|
||||
if archive_filename and doc.filename:
|
||||
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
|
||||
if new_filename == old_filename or not os.path.exists(os.path.join(root, new_filename)): # NOQA: E501
|
||||
if new_filename == old_filename or not os.path.exists(
|
||||
os.path.join(root, new_filename)
|
||||
): # NOQA: E501
|
||||
return new_filename
|
||||
|
||||
counter = 0
|
||||
|
||||
while True:
|
||||
new_filename = generate_filename(
|
||||
doc, counter, archive_filename=archive_filename)
|
||||
doc, counter, archive_filename=archive_filename
|
||||
)
|
||||
if new_filename == old_filename:
|
||||
# still the same as before.
|
||||
return new_filename
|
||||
@ -127,14 +128,11 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
|
||||
try:
|
||||
if settings.PAPERLESS_FILENAME_FORMAT is not None:
|
||||
tags = defaultdictNoStr(lambda: slugify(None),
|
||||
many_to_dictionary(doc.tags))
|
||||
tags = defaultdictNoStr(lambda: slugify(None), many_to_dictionary(doc.tags))
|
||||
|
||||
tag_list = pathvalidate.sanitize_filename(
|
||||
",".join(sorted(
|
||||
[tag.name for tag in doc.tags.all()]
|
||||
)),
|
||||
replacement_text="-"
|
||||
",".join(sorted([tag.name for tag in doc.tags.all()])),
|
||||
replacement_text="-",
|
||||
)
|
||||
|
||||
if doc.correspondent:
|
||||
@ -157,13 +155,14 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
asn = "none"
|
||||
|
||||
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
||||
title=pathvalidate.sanitize_filename(
|
||||
doc.title, replacement_text="-"),
|
||||
title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"),
|
||||
correspondent=correspondent,
|
||||
document_type=document_type,
|
||||
created=datetime.date.isoformat(doc.created),
|
||||
created_year=doc.created.year if doc.created else "none",
|
||||
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
|
||||
created_month=f"{doc.created.month:02}"
|
||||
if doc.created
|
||||
else "none", # NOQA: E501
|
||||
created_day=f"{doc.created.day:02}" if doc.created else "none",
|
||||
added=datetime.date.isoformat(doc.added),
|
||||
added_year=doc.added.year if doc.added else "none",
|
||||
@ -171,7 +170,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
added_day=f"{doc.added.day:02}" if doc.added else "none",
|
||||
asn=asn,
|
||||
tags=tags,
|
||||
tag_list=tag_list
|
||||
tag_list=tag_list,
|
||||
).strip()
|
||||
|
||||
path = path.strip(os.sep)
|
||||
@ -179,7 +178,8 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
except (ValueError, KeyError, IndexError):
|
||||
logger.warning(
|
||||
f"Invalid PAPERLESS_FILENAME_FORMAT: "
|
||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
|
||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default"
|
||||
)
|
||||
|
||||
counter_str = f"_{counter:02}" if counter else ""
|
||||
|
||||
|
@ -10,34 +10,24 @@ DATE_KWARGS = ["year", "month", "day", "date__gt", "gt", "date__lt", "lt"]
|
||||
|
||||
|
||||
class CorrespondentFilterSet(FilterSet):
|
||||
|
||||
class Meta:
|
||||
model = Correspondent
|
||||
fields = {
|
||||
"name": CHAR_KWARGS
|
||||
}
|
||||
fields = {"name": CHAR_KWARGS}
|
||||
|
||||
|
||||
class TagFilterSet(FilterSet):
|
||||
|
||||
class Meta:
|
||||
model = Tag
|
||||
fields = {
|
||||
"name": CHAR_KWARGS
|
||||
}
|
||||
fields = {"name": CHAR_KWARGS}
|
||||
|
||||
|
||||
class DocumentTypeFilterSet(FilterSet):
|
||||
|
||||
class Meta:
|
||||
model = DocumentType
|
||||
fields = {
|
||||
"name": CHAR_KWARGS
|
||||
}
|
||||
fields = {"name": CHAR_KWARGS}
|
||||
|
||||
|
||||
class TagsFilter(Filter):
|
||||
|
||||
def __init__(self, exclude=False, in_list=False):
|
||||
super(TagsFilter, self).__init__()
|
||||
self.exclude = exclude
|
||||
@ -48,7 +38,7 @@ class TagsFilter(Filter):
|
||||
return qs
|
||||
|
||||
try:
|
||||
tag_ids = [int(x) for x in value.split(',')]
|
||||
tag_ids = [int(x) for x in value.split(",")]
|
||||
except ValueError:
|
||||
return qs
|
||||
|
||||
@ -65,22 +55,19 @@ class TagsFilter(Filter):
|
||||
|
||||
|
||||
class InboxFilter(Filter):
|
||||
|
||||
def filter(self, qs, value):
|
||||
if value == 'true':
|
||||
if value == "true":
|
||||
return qs.filter(tags__is_inbox_tag=True)
|
||||
elif value == 'false':
|
||||
elif value == "false":
|
||||
return qs.exclude(tags__is_inbox_tag=True)
|
||||
else:
|
||||
return qs
|
||||
|
||||
|
||||
class TitleContentFilter(Filter):
|
||||
|
||||
def filter(self, qs, value):
|
||||
if value:
|
||||
return qs.filter(Q(title__icontains=value) |
|
||||
Q(content__icontains=value))
|
||||
return qs.filter(Q(title__icontains=value) | Q(content__icontains=value))
|
||||
else:
|
||||
return qs
|
||||
|
||||
@ -88,10 +75,7 @@ class TitleContentFilter(Filter):
|
||||
class DocumentFilterSet(FilterSet):
|
||||
|
||||
is_tagged = BooleanFilter(
|
||||
label="Is tagged",
|
||||
field_name="tags",
|
||||
lookup_expr="isnull",
|
||||
exclude=True
|
||||
label="Is tagged", field_name="tags", lookup_expr="isnull", exclude=True
|
||||
)
|
||||
|
||||
tags__id__all = TagsFilter()
|
||||
@ -107,38 +91,24 @@ class DocumentFilterSet(FilterSet):
|
||||
class Meta:
|
||||
model = Document
|
||||
fields = {
|
||||
|
||||
"title": CHAR_KWARGS,
|
||||
"content": CHAR_KWARGS,
|
||||
|
||||
"archive_serial_number": INT_KWARGS,
|
||||
|
||||
"created": DATE_KWARGS,
|
||||
"added": DATE_KWARGS,
|
||||
"modified": DATE_KWARGS,
|
||||
|
||||
"correspondent": ["isnull"],
|
||||
"correspondent__id": ID_KWARGS,
|
||||
"correspondent__name": CHAR_KWARGS,
|
||||
|
||||
"tags__id": ID_KWARGS,
|
||||
"tags__name": CHAR_KWARGS,
|
||||
|
||||
"document_type": ["isnull"],
|
||||
"document_type__id": ID_KWARGS,
|
||||
"document_type__name": CHAR_KWARGS,
|
||||
|
||||
}
|
||||
|
||||
|
||||
class LogFilterSet(FilterSet):
|
||||
|
||||
class Meta:
|
||||
model = Log
|
||||
fields = {
|
||||
|
||||
"level": INT_KWARGS,
|
||||
"created": DATE_KWARGS,
|
||||
"group": ID_KWARGS
|
||||
|
||||
}
|
||||
fields = {"level": INT_KWARGS, "created": DATE_KWARGS, "group": ID_KWARGS}
|
||||
|
@ -21,51 +21,22 @@ logger = logging.getLogger("paperless.index")
|
||||
|
||||
def get_schema():
|
||||
return Schema(
|
||||
id=NUMERIC(
|
||||
stored=True,
|
||||
unique=True
|
||||
),
|
||||
title=TEXT(
|
||||
sortable=True
|
||||
),
|
||||
id=NUMERIC(stored=True, unique=True),
|
||||
title=TEXT(sortable=True),
|
||||
content=TEXT(),
|
||||
asn=NUMERIC(
|
||||
sortable=True
|
||||
),
|
||||
|
||||
correspondent=TEXT(
|
||||
sortable=True
|
||||
),
|
||||
asn=NUMERIC(sortable=True),
|
||||
correspondent=TEXT(sortable=True),
|
||||
correspondent_id=NUMERIC(),
|
||||
has_correspondent=BOOLEAN(),
|
||||
|
||||
tag=KEYWORD(
|
||||
commas=True,
|
||||
scorable=True,
|
||||
lowercase=True
|
||||
),
|
||||
tag_id=KEYWORD(
|
||||
commas=True,
|
||||
scorable=True
|
||||
),
|
||||
tag=KEYWORD(commas=True, scorable=True, lowercase=True),
|
||||
tag_id=KEYWORD(commas=True, scorable=True),
|
||||
has_tag=BOOLEAN(),
|
||||
|
||||
type=TEXT(
|
||||
sortable=True
|
||||
),
|
||||
type=TEXT(sortable=True),
|
||||
type_id=NUMERIC(),
|
||||
has_type=BOOLEAN(),
|
||||
|
||||
created=DATETIME(
|
||||
sortable=True
|
||||
),
|
||||
modified=DATETIME(
|
||||
sortable=True
|
||||
),
|
||||
added=DATETIME(
|
||||
sortable=True
|
||||
),
|
||||
|
||||
created=DATETIME(sortable=True),
|
||||
modified=DATETIME(sortable=True),
|
||||
added=DATETIME(sortable=True),
|
||||
)
|
||||
|
||||
|
||||
@ -132,7 +103,7 @@ def remove_document(writer, doc):
|
||||
|
||||
|
||||
def remove_document_by_id(writer, doc_id):
|
||||
writer.delete_by_term('id', doc_id)
|
||||
writer.delete_by_term("id", doc_id)
|
||||
|
||||
|
||||
def add_or_update_document(document):
|
||||
@ -146,48 +117,47 @@ def remove_document_from_index(document):
|
||||
|
||||
|
||||
class DelayedQuery:
|
||||
|
||||
def _get_query(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _get_query_filter(self):
|
||||
criterias = []
|
||||
for k, v in self.query_params.items():
|
||||
if k == 'correspondent__id':
|
||||
criterias.append(query.Term('correspondent_id', v))
|
||||
elif k == 'tags__id__all':
|
||||
if k == "correspondent__id":
|
||||
criterias.append(query.Term("correspondent_id", v))
|
||||
elif k == "tags__id__all":
|
||||
for tag_id in v.split(","):
|
||||
criterias.append(query.Term('tag_id', tag_id))
|
||||
elif k == 'document_type__id':
|
||||
criterias.append(query.Term('type_id', v))
|
||||
elif k == 'correspondent__isnull':
|
||||
criterias.append(query.Term("tag_id", tag_id))
|
||||
elif k == "document_type__id":
|
||||
criterias.append(query.Term("type_id", v))
|
||||
elif k == "correspondent__isnull":
|
||||
criterias.append(query.Term("has_correspondent", v == "false"))
|
||||
elif k == 'is_tagged':
|
||||
elif k == "is_tagged":
|
||||
criterias.append(query.Term("has_tag", v == "true"))
|
||||
elif k == 'document_type__isnull':
|
||||
elif k == "document_type__isnull":
|
||||
criterias.append(query.Term("has_type", v == "false"))
|
||||
elif k == 'created__date__lt':
|
||||
elif k == "created__date__lt":
|
||||
criterias.append(
|
||||
query.DateRange("created", start=None, end=isoparse(v)))
|
||||
elif k == 'created__date__gt':
|
||||
query.DateRange("created", start=None, end=isoparse(v))
|
||||
)
|
||||
elif k == "created__date__gt":
|
||||
criterias.append(
|
||||
query.DateRange("created", start=isoparse(v), end=None))
|
||||
elif k == 'added__date__gt':
|
||||
criterias.append(
|
||||
query.DateRange("added", start=isoparse(v), end=None))
|
||||
elif k == 'added__date__lt':
|
||||
criterias.append(
|
||||
query.DateRange("added", start=None, end=isoparse(v)))
|
||||
query.DateRange("created", start=isoparse(v), end=None)
|
||||
)
|
||||
elif k == "added__date__gt":
|
||||
criterias.append(query.DateRange("added", start=isoparse(v), end=None))
|
||||
elif k == "added__date__lt":
|
||||
criterias.append(query.DateRange("added", start=None, end=isoparse(v)))
|
||||
if len(criterias) > 0:
|
||||
return query.And(criterias)
|
||||
else:
|
||||
return None
|
||||
|
||||
def _get_query_sortedby(self):
|
||||
if 'ordering' not in self.query_params:
|
||||
if "ordering" not in self.query_params:
|
||||
return None, False
|
||||
|
||||
field: str = self.query_params['ordering']
|
||||
field: str = self.query_params["ordering"]
|
||||
|
||||
sort_fields_map = {
|
||||
"created": "created",
|
||||
@ -196,10 +166,10 @@ class DelayedQuery:
|
||||
"title": "title",
|
||||
"correspondent__name": "correspondent",
|
||||
"document_type__name": "type",
|
||||
"archive_serial_number": "asn"
|
||||
"archive_serial_number": "asn",
|
||||
}
|
||||
|
||||
if field.startswith('-'):
|
||||
if field.startswith("-"):
|
||||
field = field[1:]
|
||||
reverse = True
|
||||
else:
|
||||
@ -235,24 +205,23 @@ class DelayedQuery:
|
||||
pagenum=math.floor(item.start / self.page_size) + 1,
|
||||
pagelen=self.page_size,
|
||||
sortedby=sortedby,
|
||||
reverse=reverse
|
||||
reverse=reverse,
|
||||
)
|
||||
page.results.fragmenter = highlight.ContextFragmenter(
|
||||
surround=50)
|
||||
page.results.fragmenter = highlight.ContextFragmenter(surround=50)
|
||||
page.results.formatter = HtmlFormatter(tagname="span", between=" ... ")
|
||||
|
||||
if (not self.first_score and
|
||||
len(page.results) > 0 and
|
||||
sortedby is None):
|
||||
if not self.first_score and len(page.results) > 0 and sortedby is None:
|
||||
self.first_score = page.results[0].score
|
||||
|
||||
page.results.top_n = list(map(
|
||||
lambda hit: (
|
||||
(hit[0] / self.first_score) if self.first_score else None,
|
||||
hit[1]
|
||||
),
|
||||
page.results.top_n
|
||||
))
|
||||
page.results.top_n = list(
|
||||
map(
|
||||
lambda hit: (
|
||||
(hit[0] / self.first_score) if self.first_score else None,
|
||||
hit[1],
|
||||
),
|
||||
page.results.top_n,
|
||||
)
|
||||
)
|
||||
|
||||
self.saved_results[item.start] = page
|
||||
|
||||
@ -260,12 +229,12 @@ class DelayedQuery:
|
||||
|
||||
|
||||
class DelayedFullTextQuery(DelayedQuery):
|
||||
|
||||
def _get_query(self):
|
||||
q_str = self.query_params['query']
|
||||
q_str = self.query_params["query"]
|
||||
qp = MultifieldParser(
|
||||
["content", "title", "correspondent", "tag", "type"],
|
||||
self.searcher.ixreader.schema)
|
||||
self.searcher.ixreader.schema,
|
||||
)
|
||||
qp.add_plugin(DateParserPlugin())
|
||||
q = qp.parse(q_str)
|
||||
|
||||
@ -277,18 +246,17 @@ class DelayedFullTextQuery(DelayedQuery):
|
||||
|
||||
|
||||
class DelayedMoreLikeThisQuery(DelayedQuery):
|
||||
|
||||
def _get_query(self):
|
||||
more_like_doc_id = int(self.query_params['more_like_id'])
|
||||
more_like_doc_id = int(self.query_params["more_like_id"])
|
||||
content = Document.objects.get(id=more_like_doc_id).content
|
||||
|
||||
docnum = self.searcher.document_number(id=more_like_doc_id)
|
||||
kts = self.searcher.key_terms_from_text(
|
||||
'content', content, numterms=20,
|
||||
model=classify.Bo1Model, normalize=False)
|
||||
"content", content, numterms=20, model=classify.Bo1Model, normalize=False
|
||||
)
|
||||
q = query.Or(
|
||||
[query.Term('content', word, boost=weight)
|
||||
for word, weight in kts])
|
||||
[query.Term("content", word, boost=weight) for word, weight in kts]
|
||||
)
|
||||
mask = {docnum}
|
||||
|
||||
return q, mask
|
||||
@ -298,6 +266,7 @@ def autocomplete(ix, term, limit=10):
|
||||
with ix.reader() as reader:
|
||||
terms = []
|
||||
for (score, t) in reader.most_distinctive_terms(
|
||||
"content", number=limit, prefix=term.lower()):
|
||||
"content", number=limit, prefix=term.lower()
|
||||
):
|
||||
terms.append(t)
|
||||
return terms
|
||||
|
@ -17,12 +17,7 @@ class LoggingMixin:
|
||||
if self.logging_name:
|
||||
logger = logging.getLogger(self.logging_name)
|
||||
else:
|
||||
name = ".".join([
|
||||
self.__class__.__module__,
|
||||
self.__class__.__name__
|
||||
])
|
||||
name = ".".join([self.__class__.__module__, self.__class__.__name__])
|
||||
logger = logging.getLogger(name)
|
||||
|
||||
getattr(logger, level)(message, extra={
|
||||
"group": self.logging_group
|
||||
}, **kwargs)
|
||||
getattr(logger, level)(message, extra={"group": self.logging_group}, **kwargs)
|
||||
|
@ -19,7 +19,7 @@ class Command(BaseCommand):
|
||||
parser.add_argument(
|
||||
"--passphrase",
|
||||
help="If PAPERLESS_PASSPHRASE isn't set already, you need to "
|
||||
"specify it here"
|
||||
"specify it here",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
@ -50,12 +50,12 @@ class Command(BaseCommand):
|
||||
def __gpg_to_unencrypted(passphrase):
|
||||
|
||||
encrypted_files = Document.objects.filter(
|
||||
storage_type=Document.STORAGE_TYPE_GPG)
|
||||
storage_type=Document.STORAGE_TYPE_GPG
|
||||
)
|
||||
|
||||
for document in encrypted_files:
|
||||
|
||||
print("Decrypting {}".format(
|
||||
document).encode('utf-8'))
|
||||
print("Decrypting {}".format(document).encode("utf-8"))
|
||||
|
||||
old_paths = [document.source_path, document.thumbnail_path]
|
||||
|
||||
@ -66,10 +66,11 @@ class Command(BaseCommand):
|
||||
|
||||
ext = os.path.splitext(document.filename)[1]
|
||||
|
||||
if not ext == '.gpg':
|
||||
if not ext == ".gpg":
|
||||
raise CommandError(
|
||||
f"Abort: encrypted file {document.source_path} does not "
|
||||
f"end with .gpg")
|
||||
f"end with .gpg"
|
||||
)
|
||||
|
||||
document.filename = os.path.splitext(document.filename)[0]
|
||||
|
||||
@ -80,7 +81,8 @@ class Command(BaseCommand):
|
||||
f.write(raw_thumb)
|
||||
|
||||
Document.objects.filter(id=document.id).update(
|
||||
storage_type=document.storage_type, filename=document.filename)
|
||||
storage_type=document.storage_type, filename=document.filename
|
||||
)
|
||||
|
||||
for path in old_paths:
|
||||
os.unlink(path)
|
||||
|
@ -16,8 +16,7 @@ from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents.models import Document
|
||||
from ... import index
|
||||
from ...file_handling import create_source_path_directory, \
|
||||
generate_unique_filename
|
||||
from ...file_handling import create_source_path_directory, generate_unique_filename
|
||||
from ...parsers import get_parser_class_for_mime_type
|
||||
|
||||
|
||||
@ -32,51 +31,49 @@ def handle_document(document_id):
|
||||
parser_class = get_parser_class_for_mime_type(mime_type)
|
||||
|
||||
if not parser_class:
|
||||
logger.error(f"No parser found for mime type {mime_type}, cannot "
|
||||
f"archive document {document} (ID: {document_id})")
|
||||
logger.error(
|
||||
f"No parser found for mime type {mime_type}, cannot "
|
||||
f"archive document {document} (ID: {document_id})"
|
||||
)
|
||||
return
|
||||
|
||||
parser = parser_class(logging_group=uuid.uuid4())
|
||||
|
||||
try:
|
||||
parser.parse(
|
||||
document.source_path,
|
||||
mime_type,
|
||||
document.get_public_filename())
|
||||
parser.parse(document.source_path, mime_type, document.get_public_filename())
|
||||
|
||||
thumbnail = parser.get_optimised_thumbnail(
|
||||
document.source_path,
|
||||
mime_type,
|
||||
document.get_public_filename()
|
||||
document.source_path, mime_type, document.get_public_filename()
|
||||
)
|
||||
|
||||
if parser.get_archive_path():
|
||||
with transaction.atomic():
|
||||
with open(parser.get_archive_path(), 'rb') as f:
|
||||
with open(parser.get_archive_path(), "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
# I'm going to save first so that in case the file move
|
||||
# fails, the database is rolled back.
|
||||
# We also don't use save() since that triggers the filehandling
|
||||
# logic, and we don't want that yet (file not yet in place)
|
||||
document.archive_filename = generate_unique_filename(
|
||||
document, archive_filename=True)
|
||||
document, archive_filename=True
|
||||
)
|
||||
Document.objects.filter(pk=document.pk).update(
|
||||
archive_checksum=checksum,
|
||||
content=parser.get_text(),
|
||||
archive_filename=document.archive_filename
|
||||
archive_filename=document.archive_filename,
|
||||
)
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
create_source_path_directory(document.archive_path)
|
||||
shutil.move(parser.get_archive_path(),
|
||||
document.archive_path)
|
||||
shutil.move(parser.get_archive_path(), document.archive_path)
|
||||
shutil.move(thumbnail, document.thumbnail_path)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, document)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Error while parsing document {document} "
|
||||
f"(ID: {document_id})")
|
||||
logger.exception(
|
||||
f"Error while parsing document {document} " f"(ID: {document_id})"
|
||||
)
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
||||
@ -88,29 +85,33 @@ class Command(BaseCommand):
|
||||
and document types to all documents, effectively allowing you to
|
||||
back-tag all previously indexed documents with metadata created (or
|
||||
modified) after their initial import.
|
||||
""".replace(" ", "")
|
||||
""".replace(
|
||||
" ", ""
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"-f", "--overwrite",
|
||||
"-f",
|
||||
"--overwrite",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Recreates the archived document for documents that already "
|
||||
"have an archived version."
|
||||
"have an archived version.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d", "--document",
|
||||
"-d",
|
||||
"--document",
|
||||
default=None,
|
||||
type=int,
|
||||
required=False,
|
||||
help="Specify the ID of a document, and this command will only "
|
||||
"run on this specific document."
|
||||
"run on this specific document.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown"
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
@ -119,18 +120,17 @@ class Command(BaseCommand):
|
||||
|
||||
overwrite = options["overwrite"]
|
||||
|
||||
if options['document']:
|
||||
documents = Document.objects.filter(pk=options['document'])
|
||||
if options["document"]:
|
||||
documents = Document.objects.filter(pk=options["document"])
|
||||
else:
|
||||
documents = Document.objects.all()
|
||||
|
||||
document_ids = list(map(
|
||||
lambda doc: doc.id,
|
||||
filter(
|
||||
lambda d: overwrite or not d.has_archive_version,
|
||||
documents
|
||||
document_ids = list(
|
||||
map(
|
||||
lambda doc: doc.id,
|
||||
filter(lambda d: overwrite or not d.has_archive_version, documents),
|
||||
)
|
||||
))
|
||||
)
|
||||
|
||||
# Note to future self: this prevents django from reusing database
|
||||
# conncetions between processes, which is bad and does not work
|
||||
@ -141,13 +141,12 @@ class Command(BaseCommand):
|
||||
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
|
||||
list(tqdm.tqdm(
|
||||
pool.imap_unordered(
|
||||
handle_document,
|
||||
document_ids
|
||||
),
|
||||
total=len(document_ids),
|
||||
disable=options['no_progress_bar']
|
||||
))
|
||||
list(
|
||||
tqdm.tqdm(
|
||||
pool.imap_unordered(handle_document, document_ids),
|
||||
total=len(document_ids),
|
||||
disable=options["no_progress_bar"],
|
||||
)
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("Aborting...")
|
||||
|
@ -23,24 +23,21 @@ logger = logging.getLogger("paperless.management.consumer")
|
||||
|
||||
def _tags_from_path(filepath):
|
||||
"""Walk up the directory tree from filepath to CONSUMPTION_DIR
|
||||
and get or create Tag IDs for every directory.
|
||||
and get or create Tag IDs for every directory.
|
||||
"""
|
||||
tag_ids = set()
|
||||
path_parts = Path(filepath).relative_to(
|
||||
settings.CONSUMPTION_DIR).parent.parts
|
||||
path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
|
||||
for part in path_parts:
|
||||
tag_ids.add(Tag.objects.get_or_create(name__iexact=part, defaults={
|
||||
"name": part
|
||||
})[0].pk)
|
||||
tag_ids.add(
|
||||
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk
|
||||
)
|
||||
|
||||
return tag_ids
|
||||
|
||||
|
||||
def _is_ignored(filepath: str) -> bool:
|
||||
filepath_relative = PurePath(filepath).relative_to(
|
||||
settings.CONSUMPTION_DIR)
|
||||
return any(
|
||||
filepath_relative.match(p) for p in settings.CONSUMER_IGNORE_PATTERNS)
|
||||
filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR)
|
||||
return any(filepath_relative.match(p) for p in settings.CONSUMER_IGNORE_PATTERNS)
|
||||
|
||||
|
||||
def _consume(filepath):
|
||||
@ -48,13 +45,11 @@ def _consume(filepath):
|
||||
return
|
||||
|
||||
if not os.path.isfile(filepath):
|
||||
logger.debug(
|
||||
f"Not consuming file {filepath}: File has moved.")
|
||||
logger.debug(f"Not consuming file {filepath}: File has moved.")
|
||||
return
|
||||
|
||||
if not is_file_ext_supported(os.path.splitext(filepath)[1]):
|
||||
logger.warning(
|
||||
f"Not consuming file {filepath}: Unknown file extension.")
|
||||
logger.warning(f"Not consuming file {filepath}: Unknown file extension.")
|
||||
return
|
||||
|
||||
tag_ids = None
|
||||
@ -66,10 +61,12 @@ def _consume(filepath):
|
||||
|
||||
try:
|
||||
logger.info(f"Adding {filepath} to the task queue.")
|
||||
async_task("documents.tasks.consume_file",
|
||||
filepath,
|
||||
override_tag_ids=tag_ids if tag_ids else None,
|
||||
task_name=os.path.basename(filepath)[:100])
|
||||
async_task(
|
||||
"documents.tasks.consume_file",
|
||||
filepath,
|
||||
override_tag_ids=tag_ids if tag_ids else None,
|
||||
task_name=os.path.basename(filepath)[:100],
|
||||
)
|
||||
except Exception as e:
|
||||
# Catch all so that the consumer won't crash.
|
||||
# This is also what the test case is listening for to check for
|
||||
@ -88,8 +85,9 @@ def _consume_wait_unmodified(file):
|
||||
try:
|
||||
new_mtime = os.stat(file).st_mtime
|
||||
except FileNotFoundError:
|
||||
logger.debug(f"File {file} moved while waiting for it to remain "
|
||||
f"unmodified.")
|
||||
logger.debug(
|
||||
f"File {file} moved while waiting for it to remain " f"unmodified."
|
||||
)
|
||||
return
|
||||
if new_mtime == mtime:
|
||||
_consume(file)
|
||||
@ -102,16 +100,11 @@ def _consume_wait_unmodified(file):
|
||||
|
||||
|
||||
class Handler(FileSystemEventHandler):
|
||||
|
||||
def on_created(self, event):
|
||||
Thread(
|
||||
target=_consume_wait_unmodified, args=(event.src_path,)
|
||||
).start()
|
||||
Thread(target=_consume_wait_unmodified, args=(event.src_path,)).start()
|
||||
|
||||
def on_moved(self, event):
|
||||
Thread(
|
||||
target=_consume_wait_unmodified, args=(event.dest_path,)
|
||||
).start()
|
||||
Thread(target=_consume_wait_unmodified, args=(event.dest_path,)).start()
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
@ -130,26 +123,19 @@ class Command(BaseCommand):
|
||||
"directory",
|
||||
default=settings.CONSUMPTION_DIR,
|
||||
nargs="?",
|
||||
help="The consumption directory."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--oneshot",
|
||||
action="store_true",
|
||||
help="Run only once."
|
||||
help="The consumption directory.",
|
||||
)
|
||||
parser.add_argument("--oneshot", action="store_true", help="Run only once.")
|
||||
|
||||
def handle(self, *args, **options):
|
||||
directory = options["directory"]
|
||||
recursive = settings.CONSUMER_RECURSIVE
|
||||
|
||||
if not directory:
|
||||
raise CommandError(
|
||||
"CONSUMPTION_DIR does not appear to be set."
|
||||
)
|
||||
raise CommandError("CONSUMPTION_DIR does not appear to be set.")
|
||||
|
||||
if not os.path.isdir(directory):
|
||||
raise CommandError(
|
||||
f"Consumption directory {directory} does not exist")
|
||||
raise CommandError(f"Consumption directory {directory} does not exist")
|
||||
|
||||
if recursive:
|
||||
for dirpath, _, filenames in os.walk(directory):
|
||||
@ -171,8 +157,7 @@ class Command(BaseCommand):
|
||||
logger.debug("Consumer exiting.")
|
||||
|
||||
def handle_polling(self, directory, recursive):
|
||||
logger.info(
|
||||
f"Polling directory for changes: {directory}")
|
||||
logger.info(f"Polling directory for changes: {directory}")
|
||||
self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
|
||||
self.observer.schedule(Handler(), directory, recursive=recursive)
|
||||
self.observer.start()
|
||||
@ -186,8 +171,7 @@ class Command(BaseCommand):
|
||||
self.observer.join()
|
||||
|
||||
def handle_inotify(self, directory, recursive):
|
||||
logger.info(
|
||||
f"Using inotify to watch directory for changes: {directory}")
|
||||
logger.info(f"Using inotify to watch directory for changes: {directory}")
|
||||
|
||||
inotify = INotify()
|
||||
inotify_flags = flags.CLOSE_WRITE | flags.MOVED_TO
|
||||
|
@ -8,7 +8,9 @@ class Command(BaseCommand):
|
||||
help = """
|
||||
Trains the classifier on your data and saves the resulting models to a
|
||||
file. The document consumer will then automatically use this new model.
|
||||
""".replace(" ", "")
|
||||
""".replace(
|
||||
" ", ""
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
|
@ -12,10 +12,19 @@ from django.core.management.base import BaseCommand, CommandError
|
||||
from django.db import transaction
|
||||
from filelock import FileLock
|
||||
|
||||
from documents.models import Document, Correspondent, Tag, DocumentType, \
|
||||
SavedView, SavedViewFilterRule
|
||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
|
||||
EXPORTER_ARCHIVE_NAME
|
||||
from documents.models import (
|
||||
Document,
|
||||
Correspondent,
|
||||
Tag,
|
||||
DocumentType,
|
||||
SavedView,
|
||||
SavedViewFilterRule,
|
||||
)
|
||||
from documents.settings import (
|
||||
EXPORTER_FILE_NAME,
|
||||
EXPORTER_THUMBNAIL_NAME,
|
||||
EXPORTER_ARCHIVE_NAME,
|
||||
)
|
||||
from paperless.db import GnuPG
|
||||
from paperless_mail.models import MailAccount, MailRule
|
||||
from ...file_handling import generate_filename, delete_empty_directories
|
||||
@ -27,41 +36,46 @@ class Command(BaseCommand):
|
||||
Decrypt and rename all files in our collection into a given target
|
||||
directory. And include a manifest file containing document data for
|
||||
easy import.
|
||||
""".replace(" ", "")
|
||||
""".replace(
|
||||
" ", ""
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("target")
|
||||
|
||||
parser.add_argument(
|
||||
"-c", "--compare-checksums",
|
||||
"-c",
|
||||
"--compare-checksums",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Compare file checksums when determining whether to export "
|
||||
"a file or not. If not specified, file size and time "
|
||||
"modified is used instead."
|
||||
"a file or not. If not specified, file size and time "
|
||||
"modified is used instead.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-f", "--use-filename-format",
|
||||
"-f",
|
||||
"--use-filename-format",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
|
||||
"export directory, if configured."
|
||||
"export directory, if configured.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-d", "--delete",
|
||||
"-d",
|
||||
"--delete",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="After exporting, delete files in the export directory that "
|
||||
"do not belong to the current export, such as files from "
|
||||
"deleted documents."
|
||||
"do not belong to the current export, such as files from "
|
||||
"deleted documents.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown"
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -76,9 +90,9 @@ class Command(BaseCommand):
|
||||
def handle(self, *args, **options):
|
||||
|
||||
self.target = options["target"]
|
||||
self.compare_checksums = options['compare_checksums']
|
||||
self.use_filename_format = options['use_filename_format']
|
||||
self.delete = options['delete']
|
||||
self.compare_checksums = options["compare_checksums"]
|
||||
self.use_filename_format = options["use_filename_format"]
|
||||
self.delete = options["delete"]
|
||||
|
||||
if not os.path.exists(self.target):
|
||||
raise CommandError("That path doesn't exist")
|
||||
@ -87,7 +101,7 @@ class Command(BaseCommand):
|
||||
raise CommandError("That path doesn't appear to be writable")
|
||||
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
self.dump(options['no_progress_bar'])
|
||||
self.dump(options["no_progress_bar"])
|
||||
|
||||
def dump(self, progress_bar_disable=False):
|
||||
# 1. Take a snapshot of what files exist in the current export folder
|
||||
@ -100,43 +114,48 @@ class Command(BaseCommand):
|
||||
# documents
|
||||
with transaction.atomic():
|
||||
manifest = json.loads(
|
||||
serializers.serialize("json", Correspondent.objects.all()))
|
||||
serializers.serialize("json", Correspondent.objects.all())
|
||||
)
|
||||
|
||||
manifest += json.loads(serializers.serialize(
|
||||
"json", Tag.objects.all()))
|
||||
manifest += json.loads(serializers.serialize("json", Tag.objects.all()))
|
||||
|
||||
manifest += json.loads(serializers.serialize(
|
||||
"json", DocumentType.objects.all()))
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", DocumentType.objects.all())
|
||||
)
|
||||
|
||||
documents = Document.objects.order_by("id")
|
||||
document_map = {d.pk: d for d in documents}
|
||||
document_manifest = json.loads(
|
||||
serializers.serialize("json", documents))
|
||||
document_manifest = json.loads(serializers.serialize("json", documents))
|
||||
manifest += document_manifest
|
||||
|
||||
manifest += json.loads(serializers.serialize(
|
||||
"json", MailAccount.objects.all()))
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", MailAccount.objects.all())
|
||||
)
|
||||
|
||||
manifest += json.loads(serializers.serialize(
|
||||
"json", MailRule.objects.all()))
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", MailRule.objects.all())
|
||||
)
|
||||
|
||||
manifest += json.loads(serializers.serialize(
|
||||
"json", SavedView.objects.all()))
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", SavedView.objects.all())
|
||||
)
|
||||
|
||||
manifest += json.loads(serializers.serialize(
|
||||
"json", SavedViewFilterRule.objects.all()))
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", SavedViewFilterRule.objects.all())
|
||||
)
|
||||
|
||||
manifest += json.loads(serializers.serialize(
|
||||
"json", User.objects.all()))
|
||||
manifest += json.loads(serializers.serialize("json", User.objects.all()))
|
||||
|
||||
# 3. Export files from each document
|
||||
for index, document_dict in tqdm.tqdm(
|
||||
enumerate(document_manifest),
|
||||
total=len(document_manifest),
|
||||
disable=progress_bar_disable
|
||||
disable=progress_bar_disable,
|
||||
):
|
||||
# 3.1. store files unencrypted
|
||||
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
|
||||
document_dict["fields"][
|
||||
"storage_type"
|
||||
] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
|
||||
|
||||
document = document_map[document_dict["pk"]]
|
||||
|
||||
@ -145,11 +164,10 @@ class Command(BaseCommand):
|
||||
while True:
|
||||
if self.use_filename_format:
|
||||
base_name = generate_filename(
|
||||
document, counter=filename_counter,
|
||||
append_gpg=False)
|
||||
document, counter=filename_counter, append_gpg=False
|
||||
)
|
||||
else:
|
||||
base_name = document.get_public_filename(
|
||||
counter=filename_counter)
|
||||
base_name = document.get_public_filename(counter=filename_counter)
|
||||
|
||||
if base_name not in self.exported_files:
|
||||
self.exported_files.append(base_name)
|
||||
@ -193,22 +211,19 @@ class Command(BaseCommand):
|
||||
f.write(GnuPG.decrypted(document.archive_path))
|
||||
os.utime(archive_target, times=(t, t))
|
||||
else:
|
||||
self.check_and_copy(document.source_path,
|
||||
document.checksum,
|
||||
original_target)
|
||||
self.check_and_copy(
|
||||
document.source_path, document.checksum, original_target
|
||||
)
|
||||
|
||||
self.check_and_copy(document.thumbnail_path,
|
||||
None,
|
||||
thumbnail_target)
|
||||
self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
|
||||
|
||||
if archive_target:
|
||||
self.check_and_copy(document.archive_path,
|
||||
document.archive_checksum,
|
||||
archive_target)
|
||||
self.check_and_copy(
|
||||
document.archive_path, document.archive_checksum, archive_target
|
||||
)
|
||||
|
||||
# 4. write manifest to target forlder
|
||||
manifest_path = os.path.abspath(
|
||||
os.path.join(self.target, "manifest.json"))
|
||||
manifest_path = os.path.abspath(os.path.join(self.target, "manifest.json"))
|
||||
|
||||
with open(manifest_path, "w") as f:
|
||||
json.dump(manifest, f, indent=2)
|
||||
@ -222,8 +237,9 @@ class Command(BaseCommand):
|
||||
for f in self.files_in_export_dir:
|
||||
os.remove(f)
|
||||
|
||||
delete_empty_directories(os.path.abspath(os.path.dirname(f)),
|
||||
os.path.abspath(self.target))
|
||||
delete_empty_directories(
|
||||
os.path.abspath(os.path.dirname(f)), os.path.abspath(self.target)
|
||||
)
|
||||
|
||||
def check_and_copy(self, source, source_checksum, target):
|
||||
if os.path.abspath(target) in self.files_in_export_dir:
|
||||
|
@ -12,8 +12,11 @@ from django.db.models.signals import post_save, m2m_changed
|
||||
from filelock import FileLock
|
||||
|
||||
from documents.models import Document
|
||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
|
||||
EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import (
|
||||
EXPORTER_FILE_NAME,
|
||||
EXPORTER_THUMBNAIL_NAME,
|
||||
EXPORTER_ARCHIVE_NAME,
|
||||
)
|
||||
from ...file_handling import create_source_path_directory
|
||||
from ...signals.handlers import update_filename_and_move_files
|
||||
|
||||
@ -32,7 +35,9 @@ class Command(BaseCommand):
|
||||
help = """
|
||||
Using a manifest.json file, load the data from there, and import the
|
||||
documents it refers to.
|
||||
""".replace(" ", "")
|
||||
""".replace(
|
||||
" ", ""
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("source")
|
||||
@ -40,7 +45,7 @@ class Command(BaseCommand):
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown"
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -67,26 +72,27 @@ class Command(BaseCommand):
|
||||
self.manifest = json.load(f)
|
||||
|
||||
self._check_manifest()
|
||||
with disable_signal(post_save,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document):
|
||||
with disable_signal(m2m_changed,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document.tags.through):
|
||||
with disable_signal(
|
||||
post_save, receiver=update_filename_and_move_files, sender=Document
|
||||
):
|
||||
with disable_signal(
|
||||
m2m_changed,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document.tags.through,
|
||||
):
|
||||
# Fill up the database with whatever is in the manifest
|
||||
call_command("loaddata", manifest_path)
|
||||
|
||||
self._import_files_from_manifest(options['no_progress_bar'])
|
||||
self._import_files_from_manifest(options["no_progress_bar"])
|
||||
|
||||
print("Updating search index...")
|
||||
call_command('document_index', 'reindex')
|
||||
call_command("document_index", "reindex")
|
||||
|
||||
@staticmethod
|
||||
def _check_manifest_exists(path):
|
||||
if not os.path.exists(path):
|
||||
raise CommandError(
|
||||
"That directory doesn't appear to contain a manifest.json "
|
||||
"file."
|
||||
"That directory doesn't appear to contain a manifest.json " "file."
|
||||
)
|
||||
|
||||
def _check_manifest(self):
|
||||
@ -98,15 +104,15 @@ class Command(BaseCommand):
|
||||
|
||||
if EXPORTER_FILE_NAME not in record:
|
||||
raise CommandError(
|
||||
'The manifest file contains a record which does not '
|
||||
'refer to an actual document file.'
|
||||
"The manifest file contains a record which does not "
|
||||
"refer to an actual document file."
|
||||
)
|
||||
|
||||
doc_file = record[EXPORTER_FILE_NAME]
|
||||
if not os.path.exists(os.path.join(self.source, doc_file)):
|
||||
raise CommandError(
|
||||
'The manifest file refers to "{}" which does not '
|
||||
'appear to be in the source directory.'.format(doc_file)
|
||||
"appear to be in the source directory.".format(doc_file)
|
||||
)
|
||||
|
||||
if EXPORTER_ARCHIVE_NAME in record:
|
||||
@ -125,14 +131,11 @@ class Command(BaseCommand):
|
||||
|
||||
print("Copy files into paperless...")
|
||||
|
||||
manifest_documents = list(filter(
|
||||
lambda r: r["model"] == "documents.document",
|
||||
self.manifest))
|
||||
manifest_documents = list(
|
||||
filter(lambda r: r["model"] == "documents.document", self.manifest)
|
||||
)
|
||||
|
||||
for record in tqdm.tqdm(
|
||||
manifest_documents,
|
||||
disable=progress_bar_disable
|
||||
):
|
||||
for record in tqdm.tqdm(manifest_documents, disable=progress_bar_disable):
|
||||
|
||||
document = Document.objects.get(pk=record["pk"])
|
||||
|
||||
|
@ -9,17 +9,17 @@ class Command(BaseCommand):
|
||||
help = "Manages the document index."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("command", choices=['reindex', 'optimize'])
|
||||
parser.add_argument("command", choices=["reindex", "optimize"])
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown"
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
with transaction.atomic():
|
||||
if options['command'] == 'reindex':
|
||||
index_reindex(progress_bar_disable=options['no_progress_bar'])
|
||||
elif options['command'] == 'optimize':
|
||||
if options["command"] == "reindex":
|
||||
index_reindex(progress_bar_disable=options["no_progress_bar"])
|
||||
elif options["command"] == "optimize":
|
||||
index_optimize()
|
||||
|
@ -11,14 +11,16 @@ class Command(BaseCommand):
|
||||
|
||||
help = """
|
||||
This will rename all documents to match the latest filename format.
|
||||
""".replace(" ", "")
|
||||
""".replace(
|
||||
" ", ""
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown"
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
@ -26,7 +28,6 @@ class Command(BaseCommand):
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
|
||||
for document in tqdm.tqdm(
|
||||
Document.objects.all(),
|
||||
disable=options['no_progress_bar']
|
||||
Document.objects.all(), disable=options["no_progress_bar"]
|
||||
):
|
||||
post_save.send(Document, instance=document)
|
||||
|
@ -18,60 +18,46 @@ class Command(BaseCommand):
|
||||
and document types to all documents, effectively allowing you to
|
||||
back-tag all previously indexed documents with metadata created (or
|
||||
modified) after their initial import.
|
||||
""".replace(" ", "")
|
||||
""".replace(
|
||||
" ", ""
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"-c", "--correspondent",
|
||||
default=False,
|
||||
action="store_true"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-T", "--tags",
|
||||
default=False,
|
||||
action="store_true"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t", "--document_type",
|
||||
default=False,
|
||||
action="store_true"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-i", "--inbox-only",
|
||||
default=False,
|
||||
action="store_true"
|
||||
)
|
||||
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
|
||||
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
||||
parser.add_argument("-t", "--document_type", default=False, action="store_true")
|
||||
parser.add_argument("-i", "--inbox-only", default=False, action="store_true")
|
||||
parser.add_argument(
|
||||
"--use-first",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="By default this command won't try to assign a correspondent "
|
||||
"if more than one matches the document. Use this flag if "
|
||||
"you'd rather it just pick the first one it finds."
|
||||
"if more than one matches the document. Use this flag if "
|
||||
"you'd rather it just pick the first one it finds.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f", "--overwrite",
|
||||
"-f",
|
||||
"--overwrite",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the document retagger will overwrite any previously"
|
||||
"set correspondent, document and remove correspondents, types"
|
||||
"and tags that do not match anymore due to changed rules."
|
||||
"set correspondent, document and remove correspondents, types"
|
||||
"and tags that do not match anymore due to changed rules.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown"
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--suggest",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Return the suggestion, don't change anything."
|
||||
help="Return the suggestion, don't change anything.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--base-url",
|
||||
help="The base URL to use to build the link to the documents."
|
||||
"--base-url", help="The base URL to use to build the link to the documents."
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
@ -86,38 +72,39 @@ class Command(BaseCommand):
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
for document in tqdm.tqdm(
|
||||
documents,
|
||||
disable=options['no_progress_bar']
|
||||
):
|
||||
for document in tqdm.tqdm(documents, disable=options["no_progress_bar"]):
|
||||
|
||||
if options['correspondent']:
|
||||
if options["correspondent"]:
|
||||
set_correspondent(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options['overwrite'],
|
||||
use_first=options['use_first'],
|
||||
suggest=options['suggest'],
|
||||
base_url=options['base_url'],
|
||||
color=color)
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
color=color,
|
||||
)
|
||||
|
||||
if options['document_type']:
|
||||
set_document_type(sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options['overwrite'],
|
||||
use_first=options['use_first'],
|
||||
suggest=options['suggest'],
|
||||
base_url=options['base_url'],
|
||||
color=color)
|
||||
if options["document_type"]:
|
||||
set_document_type(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
color=color,
|
||||
)
|
||||
|
||||
if options['tags']:
|
||||
if options["tags"]:
|
||||
set_tags(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options['overwrite'],
|
||||
suggest=options['suggest'],
|
||||
base_url=options['base_url'],
|
||||
color=color)
|
||||
replace=options["overwrite"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
color=color,
|
||||
)
|
||||
|
@ -6,18 +6,20 @@ class Command(BaseCommand):
|
||||
|
||||
help = """
|
||||
This command checks your document archive for issues.
|
||||
""".replace(" ", "")
|
||||
""".replace(
|
||||
" ", ""
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown"
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
messages = check_sanity(progress=not options['no_progress_bar'])
|
||||
messages = check_sanity(progress=not options["no_progress_bar"])
|
||||
|
||||
messages.log_messages()
|
||||
|
@ -22,9 +22,7 @@ def _process_document(doc_in):
|
||||
|
||||
try:
|
||||
thumb = parser.get_optimised_thumbnail(
|
||||
document.source_path,
|
||||
document.mime_type,
|
||||
document.get_public_filename()
|
||||
document.source_path, document.mime_type, document.get_public_filename()
|
||||
)
|
||||
|
||||
shutil.move(thumb, document.thumbnail_path)
|
||||
@ -36,29 +34,32 @@ class Command(BaseCommand):
|
||||
|
||||
help = """
|
||||
This will regenerate the thumbnails for all documents.
|
||||
""".replace(" ", "")
|
||||
""".replace(
|
||||
" ", ""
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"-d", "--document",
|
||||
"-d",
|
||||
"--document",
|
||||
default=None,
|
||||
type=int,
|
||||
required=False,
|
||||
help="Specify the ID of a document, and this command will only "
|
||||
"run on this specific document."
|
||||
"run on this specific document.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown"
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
|
||||
if options['document']:
|
||||
documents = Document.objects.filter(pk=options['document'])
|
||||
if options["document"]:
|
||||
documents = Document.objects.filter(pk=options["document"])
|
||||
else:
|
||||
documents = Document.objects.all()
|
||||
|
||||
@ -70,8 +71,10 @@ class Command(BaseCommand):
|
||||
db.connections.close_all()
|
||||
|
||||
with multiprocessing.Pool() as pool:
|
||||
list(tqdm.tqdm(
|
||||
pool.imap_unordered(_process_document, ids),
|
||||
total=len(ids),
|
||||
disable=options['no_progress_bar']
|
||||
))
|
||||
list(
|
||||
tqdm.tqdm(
|
||||
pool.imap_unordered(_process_document, ids),
|
||||
total=len(ids),
|
||||
disable=options["no_progress_bar"],
|
||||
)
|
||||
)
|
||||
|
@ -10,11 +10,11 @@ class Command(LoadDataCommand):
|
||||
"""
|
||||
|
||||
def parse_name(self, fixture_name):
|
||||
self.compression_formats['stdin'] = (lambda x, y: sys.stdin, None)
|
||||
if fixture_name == '-':
|
||||
return '-', 'json', 'stdin'
|
||||
self.compression_formats["stdin"] = (lambda x, y: sys.stdin, None)
|
||||
if fixture_name == "-":
|
||||
return "-", "json", "stdin"
|
||||
|
||||
def find_fixtures(self, fixture_label):
|
||||
if fixture_label == '-':
|
||||
return [('-', None, '-')]
|
||||
if fixture_label == "-":
|
||||
return [("-", None, "-")]
|
||||
return super(Command, self).find_fixtures(fixture_label)
|
||||
|
@ -12,16 +12,18 @@ class Command(BaseCommand):
|
||||
|
||||
help = """
|
||||
Creates a Django superuser based on env variables.
|
||||
""".replace(" ", "")
|
||||
""".replace(
|
||||
" ", ""
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
username = os.getenv('PAPERLESS_ADMIN_USER')
|
||||
username = os.getenv("PAPERLESS_ADMIN_USER")
|
||||
if not username:
|
||||
return
|
||||
|
||||
mail = os.getenv('PAPERLESS_ADMIN_MAIL', 'root@localhost')
|
||||
password = os.getenv('PAPERLESS_ADMIN_PASSWORD')
|
||||
mail = os.getenv("PAPERLESS_ADMIN_MAIL", "root@localhost")
|
||||
password = os.getenv("PAPERLESS_ADMIN_PASSWORD")
|
||||
|
||||
# Check if user exists already, leave as is if it does
|
||||
if User.objects.filter(username=username).exists():
|
||||
@ -32,11 +34,10 @@ class Command(BaseCommand):
|
||||
elif password:
|
||||
# Create superuser based on env variables
|
||||
User.objects.create_superuser(username, mail, password)
|
||||
self.stdout.write(
|
||||
f'Created superuser "{username}" with provided password.')
|
||||
self.stdout.write(f'Created superuser "{username}" with provided password.')
|
||||
else:
|
||||
self.stdout.write(
|
||||
f'Did not create superuser "{username}".')
|
||||
self.stdout.write(f'Did not create superuser "{username}".')
|
||||
self.stdout.write(
|
||||
'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your '
|
||||
'"docker-compose.env" file.')
|
||||
'"docker-compose.env" file.'
|
||||
)
|
||||
|
@ -12,7 +12,8 @@ def log_reason(matching_model, document, reason):
|
||||
class_name = type(matching_model).__name__
|
||||
logger.debug(
|
||||
f"{class_name} {matching_model.name} matched on document "
|
||||
f"{document} because {reason}")
|
||||
f"{document} because {reason}"
|
||||
)
|
||||
|
||||
|
||||
def match_correspondents(document, classifier):
|
||||
@ -23,9 +24,9 @@ def match_correspondents(document, classifier):
|
||||
|
||||
correspondents = Correspondent.objects.all()
|
||||
|
||||
return list(filter(
|
||||
lambda o: matches(o, document) or o.pk == pred_id,
|
||||
correspondents))
|
||||
return list(
|
||||
filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents)
|
||||
)
|
||||
|
||||
|
||||
def match_document_types(document, classifier):
|
||||
@ -36,9 +37,9 @@ def match_document_types(document, classifier):
|
||||
|
||||
document_types = DocumentType.objects.all()
|
||||
|
||||
return list(filter(
|
||||
lambda o: matches(o, document) or o.pk == pred_id,
|
||||
document_types))
|
||||
return list(
|
||||
filter(lambda o: matches(o, document) or o.pk == pred_id, document_types)
|
||||
)
|
||||
|
||||
|
||||
def match_tags(document, classifier):
|
||||
@ -49,9 +50,9 @@ def match_tags(document, classifier):
|
||||
|
||||
tags = Tag.objects.all()
|
||||
|
||||
return list(filter(
|
||||
lambda o: matches(o, document) or o.pk in predicted_tag_ids,
|
||||
tags))
|
||||
return list(
|
||||
filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags)
|
||||
)
|
||||
|
||||
|
||||
def matches(matching_model, document):
|
||||
@ -68,73 +69,73 @@ def matches(matching_model, document):
|
||||
|
||||
if matching_model.matching_algorithm == MatchingModel.MATCH_ALL:
|
||||
for word in _split_match(matching_model):
|
||||
search_result = re.search(
|
||||
rf"\b{word}\b", document_content, **search_kwargs)
|
||||
search_result = re.search(rf"\b{word}\b", document_content, **search_kwargs)
|
||||
if not search_result:
|
||||
return False
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
f"it contains all of these words: {matching_model.match}"
|
||||
matching_model,
|
||||
document,
|
||||
f"it contains all of these words: {matching_model.match}",
|
||||
)
|
||||
return True
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
|
||||
for word in _split_match(matching_model):
|
||||
if re.search(rf"\b{word}\b", document_content, **search_kwargs):
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
f"it contains this word: {word}"
|
||||
)
|
||||
log_reason(matching_model, document, f"it contains this word: {word}")
|
||||
return True
|
||||
return False
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
|
||||
result = bool(re.search(
|
||||
rf"\b{re.escape(matching_model.match)}\b",
|
||||
document_content,
|
||||
**search_kwargs
|
||||
))
|
||||
result = bool(
|
||||
re.search(
|
||||
rf"\b{re.escape(matching_model.match)}\b",
|
||||
document_content,
|
||||
**search_kwargs,
|
||||
)
|
||||
)
|
||||
if result:
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
f"it contains this string: \"{matching_model.match}\""
|
||||
matching_model,
|
||||
document,
|
||||
f'it contains this string: "{matching_model.match}"',
|
||||
)
|
||||
return result
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
|
||||
try:
|
||||
match = re.search(
|
||||
re.compile(matching_model.match, **search_kwargs),
|
||||
document_content
|
||||
re.compile(matching_model.match, **search_kwargs), document_content
|
||||
)
|
||||
except re.error:
|
||||
logger.error(
|
||||
f"Error while processing regular expression "
|
||||
f"{matching_model.match}"
|
||||
f"Error while processing regular expression " f"{matching_model.match}"
|
||||
)
|
||||
return False
|
||||
if match:
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
matching_model,
|
||||
document,
|
||||
f"the string {match.group()} matches the regular expression "
|
||||
f"{matching_model.match}"
|
||||
f"{matching_model.match}",
|
||||
)
|
||||
return bool(match)
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
match = re.sub(r'[^\w\s]', '', matching_model.match)
|
||||
text = re.sub(r'[^\w\s]', '', document_content)
|
||||
match = re.sub(r"[^\w\s]", "", matching_model.match)
|
||||
text = re.sub(r"[^\w\s]", "", document_content)
|
||||
if matching_model.is_insensitive:
|
||||
match = match.lower()
|
||||
text = text.lower()
|
||||
if fuzz.partial_ratio(match, text) >= 90:
|
||||
# TODO: make this better
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
matching_model,
|
||||
document,
|
||||
f"parts of the document content somehow match the string "
|
||||
f"{matching_model.match}"
|
||||
f"{matching_model.match}",
|
||||
)
|
||||
return True
|
||||
else:
|
||||
@ -162,8 +163,6 @@ def _split_match(matching_model):
|
||||
normspace = re.compile(r"\s+").sub
|
||||
return [
|
||||
# normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
|
||||
re.escape(
|
||||
normspace(" ", (t[0] or t[1]).strip())
|
||||
).replace(r"\ ", r"\s+")
|
||||
re.escape(normspace(" ", (t[0] or t[1]).strip())).replace(r"\ ", r"\s+")
|
||||
for t in findterms(matching_model.match)
|
||||
]
|
||||
|
@ -10,19 +10,33 @@ class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
dependencies = []
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Document',
|
||||
name="Document",
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('sender', models.CharField(blank=True, db_index=True, max_length=128)),
|
||||
('title', models.CharField(blank=True, db_index=True, max_length=128)),
|
||||
('content', models.TextField(db_index=("mysql" not in settings.DATABASES["default"]["ENGINE"]))),
|
||||
('created', models.DateTimeField(auto_now_add=True)),
|
||||
('modified', models.DateTimeField(auto_now=True)),
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("sender", models.CharField(blank=True, db_index=True, max_length=128)),
|
||||
("title", models.CharField(blank=True, db_index=True, max_length=128)),
|
||||
(
|
||||
"content",
|
||||
models.TextField(
|
||||
db_index=(
|
||||
"mysql" not in settings.DATABASES["default"]["ENGINE"]
|
||||
)
|
||||
),
|
||||
),
|
||||
("created", models.DateTimeField(auto_now_add=True)),
|
||||
("modified", models.DateTimeField(auto_now=True)),
|
||||
],
|
||||
),
|
||||
]
|
||||
|
@ -9,17 +9,19 @@ import django.utils.timezone
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0001_initial'),
|
||||
("documents", "0001_initial"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='document',
|
||||
options={'ordering': ('sender', 'title')},
|
||||
name="document",
|
||||
options={"ordering": ("sender", "title")},
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='created',
|
||||
field=models.DateTimeField(default=django.utils.timezone.now, editable=False),
|
||||
model_name="document",
|
||||
name="created",
|
||||
field=models.DateTimeField(
|
||||
default=django.utils.timezone.now, editable=False
|
||||
),
|
||||
),
|
||||
]
|
||||
|
@ -19,9 +19,11 @@ def move_sender_strings_to_sender_model(apps, schema_editor):
|
||||
# Create the sender and log the relationship with the document
|
||||
for document in document_model.objects.all():
|
||||
if document.sender:
|
||||
DOCUMENT_SENDER_MAP[document.pk], created = sender_model.objects.get_or_create(
|
||||
name=document.sender,
|
||||
defaults={"slug": slugify(document.sender)}
|
||||
(
|
||||
DOCUMENT_SENDER_MAP[document.pk],
|
||||
created,
|
||||
) = sender_model.objects.get_or_create(
|
||||
name=document.sender, defaults={"slug": slugify(document.sender)}
|
||||
)
|
||||
|
||||
|
||||
@ -33,27 +35,39 @@ def realign_senders(apps, schema_editor):
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
('documents', '0002_auto_20151226_1316'),
|
||||
("documents", "0002_auto_20151226_1316"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Sender',
|
||||
name="Sender",
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=128, unique=True)),
|
||||
('slug', models.SlugField()),
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("name", models.CharField(max_length=128, unique=True)),
|
||||
("slug", models.SlugField()),
|
||||
],
|
||||
),
|
||||
migrations.RunPython(move_sender_strings_to_sender_model),
|
||||
migrations.RemoveField(
|
||||
model_name='document',
|
||||
name='sender',
|
||||
model_name="document",
|
||||
name="sender",
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='sender',
|
||||
field=models.ForeignKey(blank=True, on_delete=django.db.models.deletion.CASCADE, to='documents.Sender'),
|
||||
model_name="document",
|
||||
name="sender",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
to="documents.Sender",
|
||||
),
|
||||
),
|
||||
migrations.RunPython(realign_senders),
|
||||
]
|
||||
|
@ -9,13 +9,19 @@ import django.db.models.deletion
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0003_sender'),
|
||||
("documents", "0003_sender"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='sender',
|
||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='documents', to='documents.Sender'),
|
||||
model_name="document",
|
||||
name="sender",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="documents",
|
||||
to="documents.Sender",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
@ -8,12 +8,12 @@ from django.db import migrations
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0004_auto_20160114_1844'),
|
||||
("documents", "0004_auto_20160114_1844"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='sender',
|
||||
options={'ordering': ('name',)},
|
||||
name="sender",
|
||||
options={"ordering": ("name",)},
|
||||
),
|
||||
]
|
||||
|
@ -8,30 +8,59 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0005_auto_20160123_0313'),
|
||||
("documents", "0005_auto_20160123_0313"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Tag',
|
||||
name="Tag",
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=128, unique=True)),
|
||||
('slug', models.SlugField(blank=True)),
|
||||
('colour', models.PositiveIntegerField(choices=[(1, '#a6cee3'), (2, '#1f78b4'), (3, '#b2df8a'), (4, '#33a02c'), (5, '#fb9a99'), (6, '#e31a1c'), (7, '#fdbf6f'), (8, '#ff7f00'), (9, '#cab2d6'), (10, '#6a3d9a'), (11, '#ffff99'), (12, '#b15928'), (13, '#000000'), (14, '#cccccc')], default=1)),
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("name", models.CharField(max_length=128, unique=True)),
|
||||
("slug", models.SlugField(blank=True)),
|
||||
(
|
||||
"colour",
|
||||
models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "#a6cee3"),
|
||||
(2, "#1f78b4"),
|
||||
(3, "#b2df8a"),
|
||||
(4, "#33a02c"),
|
||||
(5, "#fb9a99"),
|
||||
(6, "#e31a1c"),
|
||||
(7, "#fdbf6f"),
|
||||
(8, "#ff7f00"),
|
||||
(9, "#cab2d6"),
|
||||
(10, "#6a3d9a"),
|
||||
(11, "#ffff99"),
|
||||
(12, "#b15928"),
|
||||
(13, "#000000"),
|
||||
(14, "#cccccc"),
|
||||
],
|
||||
default=1,
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
'abstract': False,
|
||||
"abstract": False,
|
||||
},
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='sender',
|
||||
name='slug',
|
||||
model_name="sender",
|
||||
name="slug",
|
||||
field=models.SlugField(blank=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='tags',
|
||||
field=models.ManyToManyField(related_name='documents', to='documents.Tag'),
|
||||
model_name="document",
|
||||
name="tags",
|
||||
field=models.ManyToManyField(related_name="documents", to="documents.Tag"),
|
||||
),
|
||||
]
|
||||
|
@ -8,23 +8,50 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0006_auto_20160123_0430'),
|
||||
("documents", "0006_auto_20160123_0430"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='match',
|
||||
model_name="tag",
|
||||
name="match",
|
||||
field=models.CharField(blank=True, max_length=256),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(blank=True, choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression')], help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.', null=True),
|
||||
model_name="tag",
|
||||
name="matching_algorithm",
|
||||
field=models.PositiveIntegerField(
|
||||
blank=True,
|
||||
choices=[
|
||||
(1, "Any"),
|
||||
(2, "All"),
|
||||
(3, "Literal"),
|
||||
(4, "Regular Expression"),
|
||||
],
|
||||
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
|
||||
null=True,
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='colour',
|
||||
field=models.PositiveIntegerField(choices=[(1, '#a6cee3'), (2, '#1f78b4'), (3, '#b2df8a'), (4, '#33a02c'), (5, '#fb9a99'), (6, '#e31a1c'), (7, '#fdbf6f'), (8, '#ff7f00'), (9, '#cab2d6'), (10, '#6a3d9a'), (11, '#b15928'), (12, '#000000'), (13, '#cccccc')], default=1),
|
||||
model_name="tag",
|
||||
name="colour",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "#a6cee3"),
|
||||
(2, "#1f78b4"),
|
||||
(3, "#b2df8a"),
|
||||
(4, "#33a02c"),
|
||||
(5, "#fb9a99"),
|
||||
(6, "#e31a1c"),
|
||||
(7, "#fdbf6f"),
|
||||
(8, "#ff7f00"),
|
||||
(9, "#cab2d6"),
|
||||
(10, "#6a3d9a"),
|
||||
(11, "#b15928"),
|
||||
(12, "#000000"),
|
||||
(13, "#cccccc"),
|
||||
],
|
||||
default=1,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
@ -8,20 +8,32 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0007_auto_20160126_2114'),
|
||||
("documents", "0007_auto_20160126_2114"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='file_type',
|
||||
field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF')], default='pdf', editable=False, max_length=4),
|
||||
model_name="document",
|
||||
name="file_type",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("pdf", "PDF"),
|
||||
("png", "PNG"),
|
||||
("jpg", "JPG"),
|
||||
("gif", "GIF"),
|
||||
("tiff", "TIFF"),
|
||||
],
|
||||
default="pdf",
|
||||
editable=False,
|
||||
max_length=4,
|
||||
),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='tags',
|
||||
field=models.ManyToManyField(blank=True, related_name='documents', to='documents.Tag'),
|
||||
model_name="document",
|
||||
name="tags",
|
||||
field=models.ManyToManyField(
|
||||
blank=True, related_name="documents", to="documents.Tag"
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
|
@ -8,13 +8,22 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0008_document_file_type'),
|
||||
("documents", "0008_document_file_type"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.'),
|
||||
model_name="tag",
|
||||
name="matching_algorithm",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Any"),
|
||||
(2, "All"),
|
||||
(3, "Literal"),
|
||||
(4, "Regular Expression"),
|
||||
],
|
||||
default=1,
|
||||
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
|
||||
),
|
||||
),
|
||||
]
|
||||
|
@ -8,23 +8,48 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0009_auto_20160214_0040'),
|
||||
("documents", "0009_auto_20160214_0040"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Log',
|
||||
name="Log",
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('group', models.UUIDField(blank=True)),
|
||||
('message', models.TextField()),
|
||||
('level', models.PositiveIntegerField(choices=[(10, 'Debugging'), (20, 'Informational'), (30, 'Warning'), (40, 'Error'), (50, 'Critical')], default=20)),
|
||||
('component', models.PositiveIntegerField(choices=[(1, 'Consumer'), (2, 'Mail Fetcher')])),
|
||||
('created', models.DateTimeField(auto_now_add=True)),
|
||||
('modified', models.DateTimeField(auto_now=True)),
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("group", models.UUIDField(blank=True)),
|
||||
("message", models.TextField()),
|
||||
(
|
||||
"level",
|
||||
models.PositiveIntegerField(
|
||||
choices=[
|
||||
(10, "Debugging"),
|
||||
(20, "Informational"),
|
||||
(30, "Warning"),
|
||||
(40, "Error"),
|
||||
(50, "Critical"),
|
||||
],
|
||||
default=20,
|
||||
),
|
||||
),
|
||||
(
|
||||
"component",
|
||||
models.PositiveIntegerField(
|
||||
choices=[(1, "Consumer"), (2, "Mail Fetcher")]
|
||||
),
|
||||
),
|
||||
("created", models.DateTimeField(auto_now_add=True)),
|
||||
("modified", models.DateTimeField(auto_now=True)),
|
||||
],
|
||||
options={
|
||||
'ordering': ('-modified',),
|
||||
"ordering": ("-modified",),
|
||||
},
|
||||
),
|
||||
]
|
||||
|
@ -8,21 +8,21 @@ from django.db import migrations
|
||||
class Migration(migrations.Migration):
|
||||
atomic = False
|
||||
dependencies = [
|
||||
('documents', '0010_log'),
|
||||
("documents", "0010_log"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RenameModel(
|
||||
old_name='Sender',
|
||||
new_name='Correspondent',
|
||||
old_name="Sender",
|
||||
new_name="Correspondent",
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='document',
|
||||
options={'ordering': ('correspondent', 'title')},
|
||||
name="document",
|
||||
options={"ordering": ("correspondent", "title")},
|
||||
),
|
||||
migrations.RenameField(
|
||||
model_name='document',
|
||||
old_name='sender',
|
||||
new_name='correspondent',
|
||||
model_name="document",
|
||||
old_name="sender",
|
||||
new_name="correspondent",
|
||||
),
|
||||
]
|
||||
|
@ -23,37 +23,40 @@ class GnuPG(object):
|
||||
|
||||
@classmethod
|
||||
def decrypted(cls, file_handle):
|
||||
return cls.gpg.decrypt_file(
|
||||
file_handle, passphrase=settings.PASSPHRASE).data
|
||||
return cls.gpg.decrypt_file(file_handle, passphrase=settings.PASSPHRASE).data
|
||||
|
||||
@classmethod
|
||||
def encrypted(cls, file_handle):
|
||||
return cls.gpg.encrypt_file(
|
||||
file_handle,
|
||||
recipients=None,
|
||||
passphrase=settings.PASSPHRASE,
|
||||
symmetric=True
|
||||
file_handle, recipients=None, passphrase=settings.PASSPHRASE, symmetric=True
|
||||
).data
|
||||
|
||||
|
||||
def move_documents_and_create_thumbnails(apps, schema_editor):
|
||||
|
||||
os.makedirs(os.path.join(settings.MEDIA_ROOT, "documents", "originals"), exist_ok=True)
|
||||
os.makedirs(os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails"), exist_ok=True)
|
||||
os.makedirs(
|
||||
os.path.join(settings.MEDIA_ROOT, "documents", "originals"), exist_ok=True
|
||||
)
|
||||
os.makedirs(
|
||||
os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails"), exist_ok=True
|
||||
)
|
||||
|
||||
documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents"))
|
||||
|
||||
if set(documents) == {"originals", "thumbnails"}:
|
||||
return
|
||||
|
||||
print(colourise(
|
||||
"\n\n"
|
||||
" This is a one-time only migration to generate thumbnails for all of your\n"
|
||||
" documents so that future UIs will have something to work with. If you have\n"
|
||||
" a lot of documents though, this may take a while, so a coffee break may be\n"
|
||||
" in order."
|
||||
"\n", opts=("bold",)
|
||||
))
|
||||
print(
|
||||
colourise(
|
||||
"\n\n"
|
||||
" This is a one-time only migration to generate thumbnails for all of your\n"
|
||||
" documents so that future UIs will have something to work with. If you have\n"
|
||||
" a lot of documents though, this may take a while, so a coffee break may be\n"
|
||||
" in order."
|
||||
"\n",
|
||||
opts=("bold",),
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
os.makedirs(settings.SCRATCH_DIR)
|
||||
@ -65,16 +68,16 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
|
||||
if not f.endswith("gpg"):
|
||||
continue
|
||||
|
||||
print(" {} {} {}".format(
|
||||
colourise("*", fg="green"),
|
||||
colourise("Generating a thumbnail for", fg="white"),
|
||||
colourise(f, fg="cyan")
|
||||
))
|
||||
print(
|
||||
" {} {} {}".format(
|
||||
colourise("*", fg="green"),
|
||||
colourise("Generating a thumbnail for", fg="white"),
|
||||
colourise(f, fg="cyan"),
|
||||
)
|
||||
)
|
||||
|
||||
thumb_temp = tempfile.mkdtemp(
|
||||
prefix="paperless", dir=settings.SCRATCH_DIR)
|
||||
orig_temp = tempfile.mkdtemp(
|
||||
prefix="paperless", dir=settings.SCRATCH_DIR)
|
||||
thumb_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
|
||||
orig_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
|
||||
|
||||
orig_source = os.path.join(settings.MEDIA_ROOT, "documents", f)
|
||||
orig_target = os.path.join(orig_temp, f.replace(".gpg", ""))
|
||||
@ -83,20 +86,24 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
|
||||
with open(orig_target, "wb") as unencrypted:
|
||||
unencrypted.write(GnuPG.decrypted(encrypted))
|
||||
|
||||
subprocess.Popen((
|
||||
settings.CONVERT_BINARY,
|
||||
"-scale", "500x5000",
|
||||
"-alpha", "remove",
|
||||
orig_target,
|
||||
os.path.join(thumb_temp, "convert-%04d.png")
|
||||
)).wait()
|
||||
subprocess.Popen(
|
||||
(
|
||||
settings.CONVERT_BINARY,
|
||||
"-scale",
|
||||
"500x5000",
|
||||
"-alpha",
|
||||
"remove",
|
||||
orig_target,
|
||||
os.path.join(thumb_temp, "convert-%04d.png"),
|
||||
)
|
||||
).wait()
|
||||
|
||||
thumb_source = os.path.join(thumb_temp, "convert-0000.png")
|
||||
thumb_target = os.path.join(
|
||||
settings.MEDIA_ROOT,
|
||||
"documents",
|
||||
"thumbnails",
|
||||
re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f)
|
||||
re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f),
|
||||
)
|
||||
with open(thumb_source, "rb") as unencrypted:
|
||||
with open(thumb_target, "wb") as encrypted:
|
||||
@ -113,7 +120,7 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
('documents', '0011_auto_20160303_1929'),
|
||||
("documents", "0011_auto_20160303_1929"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
|
@ -9,27 +9,36 @@ import django.utils.timezone
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0012_auto_20160305_0040'),
|
||||
("documents", "0012_auto_20160305_0040"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='correspondent',
|
||||
name='match',
|
||||
model_name="correspondent",
|
||||
name="match",
|
||||
field=models.CharField(blank=True, max_length=256),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='correspondent',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.'),
|
||||
model_name="correspondent",
|
||||
name="matching_algorithm",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Any"),
|
||||
(2, "All"),
|
||||
(3, "Literal"),
|
||||
(4, "Regular Expression"),
|
||||
],
|
||||
default=1,
|
||||
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='created',
|
||||
model_name="document",
|
||||
name="created",
|
||||
field=models.DateTimeField(default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='log',
|
||||
name='component',
|
||||
model_name="log",
|
||||
name="component",
|
||||
),
|
||||
]
|
||||
|
@ -22,16 +22,12 @@ class GnuPG(object):
|
||||
|
||||
@classmethod
|
||||
def decrypted(cls, file_handle):
|
||||
return cls.gpg.decrypt_file(
|
||||
file_handle, passphrase=settings.PASSPHRASE).data
|
||||
return cls.gpg.decrypt_file(file_handle, passphrase=settings.PASSPHRASE).data
|
||||
|
||||
@classmethod
|
||||
def encrypted(cls, file_handle):
|
||||
return cls.gpg.encrypt_file(
|
||||
file_handle,
|
||||
recipients=None,
|
||||
passphrase=settings.PASSPHRASE,
|
||||
symmetric=True
|
||||
file_handle, recipients=None, passphrase=settings.PASSPHRASE, symmetric=True
|
||||
).data
|
||||
|
||||
|
||||
@ -53,8 +49,7 @@ class Document(object):
|
||||
def __str__(self):
|
||||
created = self.created.strftime("%Y%m%d%H%M%S")
|
||||
if self.correspondent and self.title:
|
||||
return "{}: {} - {}".format(
|
||||
created, self.correspondent, self.title)
|
||||
return "{}: {} - {}".format(created, self.correspondent, self.title)
|
||||
if self.correspondent or self.title:
|
||||
return "{}: {}".format(created, self.correspondent or self.title)
|
||||
return str(created)
|
||||
@ -65,7 +60,7 @@ class Document(object):
|
||||
settings.MEDIA_ROOT,
|
||||
"documents",
|
||||
"originals",
|
||||
"{:07}.{}.gpg".format(self.pk, self.file_type)
|
||||
"{:07}.{}.gpg".format(self.pk, self.file_type),
|
||||
)
|
||||
|
||||
@property
|
||||
@ -84,38 +79,62 @@ def set_checksums(apps, schema_editor):
|
||||
if not document_model.objects.all().exists():
|
||||
return
|
||||
|
||||
print(colourise(
|
||||
"\n\n"
|
||||
" This is a one-time only migration to generate checksums for all\n"
|
||||
" of your existing documents. If you have a lot of documents\n"
|
||||
" though, this may take a while, so a coffee break may be in\n"
|
||||
" order."
|
||||
"\n", opts=("bold",)
|
||||
))
|
||||
print(
|
||||
colourise(
|
||||
"\n\n"
|
||||
" This is a one-time only migration to generate checksums for all\n"
|
||||
" of your existing documents. If you have a lot of documents\n"
|
||||
" though, this may take a while, so a coffee break may be in\n"
|
||||
" order."
|
||||
"\n",
|
||||
opts=("bold",),
|
||||
)
|
||||
)
|
||||
|
||||
sums = {}
|
||||
for d in document_model.objects.all():
|
||||
|
||||
document = Document(d)
|
||||
|
||||
print(" {} {} {}".format(
|
||||
colourise("*", fg="green"),
|
||||
colourise("Generating a checksum for", fg="white"),
|
||||
colourise(document.file_name, fg="cyan")
|
||||
))
|
||||
print(
|
||||
" {} {} {}".format(
|
||||
colourise("*", fg="green"),
|
||||
colourise("Generating a checksum for", fg="white"),
|
||||
colourise(document.file_name, fg="cyan"),
|
||||
)
|
||||
)
|
||||
|
||||
with document.source_file as encrypted:
|
||||
checksum = hashlib.md5(GnuPG.decrypted(encrypted)).hexdigest()
|
||||
|
||||
if checksum in sums:
|
||||
error = "\n{line}{p1}\n\n{doc1}\n{doc2}\n\n{p2}\n\n{code}\n\n{p3}{line}".format(
|
||||
p1=colourise("It appears that you have two identical documents in your collection and \nPaperless no longer supports this (see issue #97). The documents in question\nare:", fg="yellow"),
|
||||
p2=colourise("To fix this problem, you'll have to remove one of them from the database, a task\nmost easily done by running the following command in the same\ndirectory as manage.py:", fg="yellow"),
|
||||
p3=colourise("When that's finished, re-run the migrate, and provided that there aren't any\nother duplicates, you should be good to go.", fg="yellow"),
|
||||
doc1=colourise(" * {} (id: {})".format(sums[checksum][1], sums[checksum][0]), fg="red"),
|
||||
doc2=colourise(" * {} (id: {})".format(document.file_name, document.pk), fg="red"),
|
||||
code=colourise(" $ echo 'DELETE FROM documents_document WHERE id = {pk};' | ./manage.py dbshell".format(pk=document.pk), fg="green"),
|
||||
line=colourise("\n{}\n".format("=" * 80), fg="white", opts=("bold",))
|
||||
p1=colourise(
|
||||
"It appears that you have two identical documents in your collection and \nPaperless no longer supports this (see issue #97). The documents in question\nare:",
|
||||
fg="yellow",
|
||||
),
|
||||
p2=colourise(
|
||||
"To fix this problem, you'll have to remove one of them from the database, a task\nmost easily done by running the following command in the same\ndirectory as manage.py:",
|
||||
fg="yellow",
|
||||
),
|
||||
p3=colourise(
|
||||
"When that's finished, re-run the migrate, and provided that there aren't any\nother duplicates, you should be good to go.",
|
||||
fg="yellow",
|
||||
),
|
||||
doc1=colourise(
|
||||
" * {} (id: {})".format(sums[checksum][1], sums[checksum][0]),
|
||||
fg="red",
|
||||
),
|
||||
doc2=colourise(
|
||||
" * {} (id: {})".format(document.file_name, document.pk), fg="red"
|
||||
),
|
||||
code=colourise(
|
||||
" $ echo 'DELETE FROM documents_document WHERE id = {pk};' | ./manage.py dbshell".format(
|
||||
pk=document.pk
|
||||
),
|
||||
fg="green",
|
||||
),
|
||||
line=colourise("\n{}\n".format("=" * 80), fg="white", opts=("bold",)),
|
||||
)
|
||||
raise RuntimeError(error)
|
||||
sums[checksum] = (document.pk, document.file_name)
|
||||
@ -129,33 +148,35 @@ def do_nothing(apps, schema_editor):
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
('documents', '0013_auto_20160325_2111'),
|
||||
("documents", "0013_auto_20160325_2111"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='checksum',
|
||||
model_name="document",
|
||||
name="checksum",
|
||||
field=models.CharField(
|
||||
default='-',
|
||||
default="-",
|
||||
db_index=True,
|
||||
editable=False,
|
||||
max_length=32,
|
||||
help_text='The checksum of the original document (before it '
|
||||
'was encrypted). We use this to prevent duplicate '
|
||||
'document imports.',
|
||||
help_text="The checksum of the original document (before it "
|
||||
"was encrypted). We use this to prevent duplicate "
|
||||
"document imports.",
|
||||
),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.RunPython(set_checksums, do_nothing),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='created',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
||||
model_name="document",
|
||||
name="created",
|
||||
field=models.DateTimeField(
|
||||
db_index=True, default=django.utils.timezone.now
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='modified',
|
||||
model_name="document",
|
||||
name="modified",
|
||||
field=models.DateTimeField(auto_now=True, db_index=True),
|
||||
),
|
||||
]
|
||||
|
@ -8,23 +8,28 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0014_document_checksum'),
|
||||
("documents", "0014_document_checksum"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='checksum',
|
||||
field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.', max_length=32, unique=True),
|
||||
model_name="document",
|
||||
name="checksum",
|
||||
field=models.CharField(
|
||||
editable=False,
|
||||
help_text="The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.",
|
||||
max_length=32,
|
||||
unique=True,
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='correspondent',
|
||||
name='is_insensitive',
|
||||
model_name="correspondent",
|
||||
name="is_insensitive",
|
||||
field=models.BooleanField(default=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='is_insensitive',
|
||||
model_name="tag",
|
||||
name="is_insensitive",
|
||||
field=models.BooleanField(default=True),
|
||||
),
|
||||
]
|
||||
|
@ -9,13 +9,17 @@ from django.conf import settings
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0015_add_insensitive_to_match'),
|
||||
("documents", "0015_add_insensitive_to_match"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='content',
|
||||
field=models.TextField(blank=True, db_index=("mysql" not in settings.DATABASES["default"]["ENGINE"]), help_text='The raw, text-only data of the document. This field is primarily used for searching.'),
|
||||
model_name="document",
|
||||
name="content",
|
||||
field=models.TextField(
|
||||
blank=True,
|
||||
db_index=("mysql" not in settings.DATABASES["default"]["ENGINE"]),
|
||||
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
@ -8,18 +8,38 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0016_auto_20170325_1558'),
|
||||
("documents", "0016_auto_20170325_1558"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
||||
model_name="correspondent",
|
||||
name="matching_algorithm",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Any"),
|
||||
(2, "All"),
|
||||
(3, "Literal"),
|
||||
(4, "Regular Expression"),
|
||||
(5, "Fuzzy Match"),
|
||||
],
|
||||
default=1,
|
||||
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
||||
model_name="tag",
|
||||
name="matching_algorithm",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Any"),
|
||||
(2, "All"),
|
||||
(3, "Literal"),
|
||||
(4, "Regular Expression"),
|
||||
(5, "Fuzzy Match"),
|
||||
],
|
||||
default=1,
|
||||
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
|
||||
),
|
||||
),
|
||||
]
|
||||
|
@ -9,13 +9,19 @@ import django.db.models.deletion
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0017_auto_20170512_0507'),
|
||||
("documents", "0017_auto_20170512_0507"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='correspondent',
|
||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.Correspondent'),
|
||||
model_name="document",
|
||||
name="correspondent",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="documents",
|
||||
to="documents.Correspondent",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
@ -16,7 +16,7 @@ def reverse_func(apps, schema_editor):
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
('documents', '0018_auto_20170715_1712'),
|
||||
("documents", "0018_auto_20170715_1712"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
|
@ -14,14 +14,16 @@ def set_added_time_to_created_time(apps, schema_editor):
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
('documents', '0019_add_consumer_user'),
|
||||
("documents", "0019_add_consumer_user"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='added',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, editable=False),
|
||||
model_name="document",
|
||||
name="added",
|
||||
field=models.DateTimeField(
|
||||
db_index=True, default=django.utils.timezone.now, editable=False
|
||||
),
|
||||
),
|
||||
migrations.RunPython(set_added_time_to_created_time)
|
||||
migrations.RunPython(set_added_time_to_created_time),
|
||||
]
|
||||
|
@ -8,23 +8,36 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0020_document_added'),
|
||||
("documents", "0020_document_added"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
|
||||
# Add the field with the default GPG-encrypted value
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='storage_type',
|
||||
field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='gpg', editable=False, max_length=11),
|
||||
model_name="document",
|
||||
name="storage_type",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("unencrypted", "Unencrypted"),
|
||||
("gpg", "Encrypted with GNU Privacy Guard"),
|
||||
],
|
||||
default="gpg",
|
||||
editable=False,
|
||||
max_length=11,
|
||||
),
|
||||
),
|
||||
|
||||
# Now that the field is added, change the default to unencrypted
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='storage_type',
|
||||
field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='unencrypted', editable=False, max_length=11),
|
||||
model_name="document",
|
||||
name="storage_type",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("unencrypted", "Unencrypted"),
|
||||
("gpg", "Encrypted with GNU Privacy Guard"),
|
||||
],
|
||||
default="unencrypted",
|
||||
editable=False,
|
||||
max_length=11,
|
||||
),
|
||||
),
|
||||
|
||||
]
|
||||
|
@ -15,38 +15,47 @@ def re_slug_all_the_things(apps, schema_editor):
|
||||
|
||||
for klass in (Tag, Correspondent):
|
||||
for instance in klass.objects.all():
|
||||
klass.objects.filter(
|
||||
pk=instance.pk
|
||||
).update(
|
||||
slug=slugify(instance.slug)
|
||||
)
|
||||
klass.objects.filter(pk=instance.pk).update(slug=slugify(instance.slug))
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0021_document_storage_type'),
|
||||
("documents", "0021_document_storage_type"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='tag',
|
||||
options={'ordering': ('name',)},
|
||||
name="tag",
|
||||
options={"ordering": ("name",)},
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='slug',
|
||||
model_name="correspondent",
|
||||
name="slug",
|
||||
field=models.SlugField(blank=True, editable=False),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='file_type',
|
||||
field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')], editable=False, max_length=4),
|
||||
model_name="document",
|
||||
name="file_type",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("pdf", "PDF"),
|
||||
("png", "PNG"),
|
||||
("jpg", "JPG"),
|
||||
("gif", "GIF"),
|
||||
("tiff", "TIFF"),
|
||||
("txt", "TXT"),
|
||||
("csv", "CSV"),
|
||||
("md", "MD"),
|
||||
],
|
||||
editable=False,
|
||||
max_length=4,
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='slug',
|
||||
model_name="tag",
|
||||
name="slug",
|
||||
field=models.SlugField(blank=True, editable=False),
|
||||
),
|
||||
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
|
||||
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop),
|
||||
]
|
||||
|
@ -20,18 +20,20 @@ def set_filename(apps, schema_editor):
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0022_auto_20181007_1420'),
|
||||
("documents", "0022_auto_20181007_1420"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='filename',
|
||||
field=models.FilePathField(default=None,
|
||||
null=True,
|
||||
editable=False,
|
||||
help_text='Current filename in storage',
|
||||
max_length=256),
|
||||
model_name="document",
|
||||
name="filename",
|
||||
field=models.FilePathField(
|
||||
default=None,
|
||||
null=True,
|
||||
editable=False,
|
||||
help_text="Current filename in storage",
|
||||
max_length=256,
|
||||
),
|
||||
),
|
||||
migrations.RunPython(set_filename)
|
||||
migrations.RunPython(set_filename),
|
||||
]
|
||||
|
@ -6,7 +6,7 @@ import django.db.models.deletion
|
||||
|
||||
|
||||
def logs_set_default_group(apps, schema_editor):
|
||||
Log = apps.get_model('documents', 'Log')
|
||||
Log = apps.get_model("documents", "Log")
|
||||
for log in Log.objects.all():
|
||||
if log.group is None:
|
||||
log.group = uuid.uuid4()
|
||||
@ -16,70 +16,132 @@ def logs_set_default_group(apps, schema_editor):
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0023_document_current_filename'),
|
||||
("documents", "0023_document_current_filename"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='archive_serial_number',
|
||||
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
|
||||
model_name="document",
|
||||
name="archive_serial_number",
|
||||
field=models.IntegerField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
help_text="The position of this document in your physical document archive.",
|
||||
null=True,
|
||||
unique=True,
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='is_inbox_tag',
|
||||
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
|
||||
model_name="tag",
|
||||
name="is_inbox_tag",
|
||||
field=models.BooleanField(
|
||||
default=False,
|
||||
help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.",
|
||||
),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='DocumentType',
|
||||
name="DocumentType",
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=128, unique=True)),
|
||||
('slug', models.SlugField(blank=True, editable=False)),
|
||||
('match', models.CharField(blank=True, max_length=256)),
|
||||
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
|
||||
('is_insensitive', models.BooleanField(default=True)),
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("name", models.CharField(max_length=128, unique=True)),
|
||||
("slug", models.SlugField(blank=True, editable=False)),
|
||||
("match", models.CharField(blank=True, max_length=256)),
|
||||
(
|
||||
"matching_algorithm",
|
||||
models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Any"),
|
||||
(2, "All"),
|
||||
(3, "Literal"),
|
||||
(4, "Regular Expression"),
|
||||
(5, "Fuzzy Match"),
|
||||
(6, "Automatic Classification"),
|
||||
],
|
||||
default=1,
|
||||
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
|
||||
),
|
||||
),
|
||||
("is_insensitive", models.BooleanField(default=True)),
|
||||
],
|
||||
options={
|
||||
'abstract': False,
|
||||
'ordering': ('name',),
|
||||
"abstract": False,
|
||||
"ordering": ("name",),
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='document_type',
|
||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype'),
|
||||
model_name="document",
|
||||
name="document_type",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="documents",
|
||||
to="documents.documenttype",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
||||
model_name="correspondent",
|
||||
name="matching_algorithm",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Any"),
|
||||
(2, "All"),
|
||||
(3, "Literal"),
|
||||
(4, "Regular Expression"),
|
||||
(5, "Fuzzy Match"),
|
||||
(6, "Automatic Classification"),
|
||||
],
|
||||
default=1,
|
||||
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
||||
model_name="tag",
|
||||
name="matching_algorithm",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Any"),
|
||||
(2, "All"),
|
||||
(3, "Literal"),
|
||||
(4, "Regular Expression"),
|
||||
(5, "Fuzzy Match"),
|
||||
(6, "Automatic Classification"),
|
||||
],
|
||||
default=1,
|
||||
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='content',
|
||||
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'),
|
||||
model_name="document",
|
||||
name="content",
|
||||
field=models.TextField(
|
||||
blank=True,
|
||||
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
|
||||
),
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='log',
|
||||
options={'ordering': ('-created',)},
|
||||
name="log",
|
||||
options={"ordering": ("-created",)},
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='log',
|
||||
name='modified',
|
||||
model_name="log",
|
||||
name="modified",
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='log',
|
||||
name='group',
|
||||
model_name="log",
|
||||
name="group",
|
||||
field=models.UUIDField(blank=True, null=True),
|
||||
),
|
||||
migrations.RunPython(
|
||||
code=django.db.migrations.operations.special.RunPython.noop,
|
||||
reverse_code=logs_set_default_group
|
||||
reverse_code=logs_set_default_group,
|
||||
),
|
||||
]
|
||||
|
@ -7,22 +7,28 @@ from django_q.tasks import schedule
|
||||
|
||||
|
||||
def add_schedules(apps, schema_editor):
|
||||
schedule('documents.tasks.train_classifier', name="Train the classifier", schedule_type=Schedule.HOURLY)
|
||||
schedule('documents.tasks.index_optimize', name="Optimize the index", schedule_type=Schedule.DAILY)
|
||||
schedule(
|
||||
"documents.tasks.train_classifier",
|
||||
name="Train the classifier",
|
||||
schedule_type=Schedule.HOURLY,
|
||||
)
|
||||
schedule(
|
||||
"documents.tasks.index_optimize",
|
||||
name="Optimize the index",
|
||||
schedule_type=Schedule.DAILY,
|
||||
)
|
||||
|
||||
|
||||
def remove_schedules(apps, schema_editor):
|
||||
Schedule.objects.filter(func='documents.tasks.train_classifier').delete()
|
||||
Schedule.objects.filter(func='documents.tasks.index_optimize').delete()
|
||||
Schedule.objects.filter(func="documents.tasks.train_classifier").delete()
|
||||
Schedule.objects.filter(func="documents.tasks.index_optimize").delete()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1000_update_paperless_all'),
|
||||
('django_q', '0013_task_attempt_count'),
|
||||
("documents", "1000_update_paperless_all"),
|
||||
("django_q", "0013_task_attempt_count"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
RunPython(add_schedules, remove_schedules)
|
||||
]
|
||||
operations = [RunPython(add_schedules, remove_schedules)]
|
||||
|
@ -6,13 +6,19 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1001_auto_20201109_1636'),
|
||||
("documents", "1001_auto_20201109_1636"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='filename',
|
||||
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True),
|
||||
model_name="document",
|
||||
name="filename",
|
||||
field=models.FilePathField(
|
||||
default=None,
|
||||
editable=False,
|
||||
help_text="Current filename in storage",
|
||||
max_length=1024,
|
||||
null=True,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
@ -20,10 +20,7 @@ def source_path(self):
|
||||
if self.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg"
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||
|
||||
|
||||
def add_mime_types(apps, schema_editor):
|
||||
@ -49,43 +46,51 @@ def add_file_extensions(apps, schema_editor):
|
||||
documents = Document.objects.all()
|
||||
|
||||
for d in documents:
|
||||
d.file_type = os.path.splitext(d.filename)[1].strip('.')
|
||||
d.file_type = os.path.splitext(d.filename)[1].strip(".")
|
||||
d.save()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1002_auto_20201111_1105'),
|
||||
("documents", "1002_auto_20201111_1105"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='mime_type',
|
||||
model_name="document",
|
||||
name="mime_type",
|
||||
field=models.CharField(default="-", editable=False, max_length=256),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.RunPython(add_mime_types, migrations.RunPython.noop),
|
||||
|
||||
# This operation is here so that we can revert the entire migration:
|
||||
# By allowing this field to be blank and null, we can revert the
|
||||
# remove operation further down and the database won't complain about
|
||||
# NOT NULL violations.
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='file_type',
|
||||
model_name="document",
|
||||
name="file_type",
|
||||
field=models.CharField(
|
||||
choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')],
|
||||
choices=[
|
||||
("pdf", "PDF"),
|
||||
("png", "PNG"),
|
||||
("jpg", "JPG"),
|
||||
("gif", "GIF"),
|
||||
("tiff", "TIFF"),
|
||||
("txt", "TXT"),
|
||||
("csv", "CSV"),
|
||||
("md", "MD"),
|
||||
],
|
||||
editable=False,
|
||||
max_length=4,
|
||||
null=True,
|
||||
blank=True
|
||||
blank=True,
|
||||
),
|
||||
),
|
||||
migrations.RunPython(migrations.RunPython.noop, add_file_extensions),
|
||||
migrations.RemoveField(
|
||||
model_name='document',
|
||||
name='file_type',
|
||||
model_name="document",
|
||||
name="file_type",
|
||||
),
|
||||
]
|
||||
|
@ -7,20 +7,22 @@ from django_q.tasks import schedule
|
||||
|
||||
|
||||
def add_schedules(apps, schema_editor):
|
||||
schedule('documents.tasks.sanity_check', name="Perform sanity check", schedule_type=Schedule.WEEKLY)
|
||||
schedule(
|
||||
"documents.tasks.sanity_check",
|
||||
name="Perform sanity check",
|
||||
schedule_type=Schedule.WEEKLY,
|
||||
)
|
||||
|
||||
|
||||
def remove_schedules(apps, schema_editor):
|
||||
Schedule.objects.filter(func='documents.tasks.sanity_check').delete()
|
||||
Schedule.objects.filter(func="documents.tasks.sanity_check").delete()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1003_mime_types'),
|
||||
('django_q', '0013_task_attempt_count'),
|
||||
("documents", "1003_mime_types"),
|
||||
("django_q", "0013_task_attempt_count"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
RunPython(add_schedules, remove_schedules)
|
||||
]
|
||||
operations = [RunPython(add_schedules, remove_schedules)]
|
||||
|
@ -6,18 +6,29 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1004_sanity_check_schedule'),
|
||||
("documents", "1004_sanity_check_schedule"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='archive_checksum',
|
||||
field=models.CharField(blank=True, editable=False, help_text='The checksum of the archived document.', max_length=32, null=True),
|
||||
model_name="document",
|
||||
name="archive_checksum",
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
editable=False,
|
||||
help_text="The checksum of the archived document.",
|
||||
max_length=32,
|
||||
null=True,
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='checksum',
|
||||
field=models.CharField(editable=False, help_text='The checksum of the original document.', max_length=32, unique=True),
|
||||
model_name="document",
|
||||
name="checksum",
|
||||
field=models.CharField(
|
||||
editable=False,
|
||||
help_text="The checksum of the original document.",
|
||||
max_length=32,
|
||||
unique=True,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
@ -6,20 +6,20 @@ from django.db import migrations
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1005_checksums'),
|
||||
("documents", "1005_checksums"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name='correspondent',
|
||||
name='slug',
|
||||
model_name="correspondent",
|
||||
name="slug",
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='documenttype',
|
||||
name='slug',
|
||||
model_name="documenttype",
|
||||
name="slug",
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='tag',
|
||||
name='slug',
|
||||
model_name="tag",
|
||||
name="slug",
|
||||
),
|
||||
]
|
||||
|
@ -9,29 +9,82 @@ class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
('documents', '1006_auto_20201208_2209'),
|
||||
("documents", "1006_auto_20201208_2209"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='SavedView',
|
||||
name="SavedView",
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=128)),
|
||||
('show_on_dashboard', models.BooleanField()),
|
||||
('show_in_sidebar', models.BooleanField()),
|
||||
('sort_field', models.CharField(max_length=128)),
|
||||
('sort_reverse', models.BooleanField(default=False)),
|
||||
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("name", models.CharField(max_length=128)),
|
||||
("show_on_dashboard", models.BooleanField()),
|
||||
("show_in_sidebar", models.BooleanField()),
|
||||
("sort_field", models.CharField(max_length=128)),
|
||||
("sort_reverse", models.BooleanField(default=False)),
|
||||
(
|
||||
"user",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='SavedViewFilterRule',
|
||||
name="SavedViewFilterRule",
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('rule_type', models.PositiveIntegerField(choices=[(0, 'Title contains'), (1, 'Content contains'), (2, 'ASN is'), (3, 'Correspondent is'), (4, 'Document type is'), (5, 'Is in inbox'), (6, 'Has tag'), (7, 'Has any tag'), (8, 'Created before'), (9, 'Created after'), (10, 'Created year is'), (11, 'Created month is'), (12, 'Created day is'), (13, 'Added before'), (14, 'Added after'), (15, 'Modified before'), (16, 'Modified after'), (17, 'Does not have tag')])),
|
||||
('value', models.CharField(max_length=128)),
|
||||
('saved_view', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='filter_rules', to='documents.savedview')),
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"rule_type",
|
||||
models.PositiveIntegerField(
|
||||
choices=[
|
||||
(0, "Title contains"),
|
||||
(1, "Content contains"),
|
||||
(2, "ASN is"),
|
||||
(3, "Correspondent is"),
|
||||
(4, "Document type is"),
|
||||
(5, "Is in inbox"),
|
||||
(6, "Has tag"),
|
||||
(7, "Has any tag"),
|
||||
(8, "Created before"),
|
||||
(9, "Created after"),
|
||||
(10, "Created year is"),
|
||||
(11, "Created month is"),
|
||||
(12, "Created day is"),
|
||||
(13, "Added before"),
|
||||
(14, "Added after"),
|
||||
(15, "Modified before"),
|
||||
(16, "Modified after"),
|
||||
(17, "Does not have tag"),
|
||||
]
|
||||
),
|
||||
),
|
||||
("value", models.CharField(max_length=128)),
|
||||
(
|
||||
"saved_view",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="filter_rules",
|
||||
to="documents.savedview",
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
|
@ -7,28 +7,28 @@ import django.db.models.functions.text
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1007_savedview_savedviewfilterrule'),
|
||||
("documents", "1007_savedview_savedviewfilterrule"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='correspondent',
|
||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
||||
name="correspondent",
|
||||
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='document',
|
||||
options={'ordering': ('-created',)},
|
||||
name="document",
|
||||
options={"ordering": ("-created",)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='documenttype',
|
||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
||||
name="documenttype",
|
||||
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='savedview',
|
||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
||||
name="savedview",
|
||||
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='tag',
|
||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
||||
name="tag",
|
||||
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
|
||||
),
|
||||
]
|
||||
|
@ -6,24 +6,24 @@ from django.db import migrations
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1008_auto_20201216_1736'),
|
||||
("documents", "1008_auto_20201216_1736"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='correspondent',
|
||||
options={'ordering': ('name',)},
|
||||
name="correspondent",
|
||||
options={"ordering": ("name",)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='documenttype',
|
||||
options={'ordering': ('name',)},
|
||||
name="documenttype",
|
||||
options={"ordering": ("name",)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='savedview',
|
||||
options={'ordering': ('name',)},
|
||||
name="savedview",
|
||||
options={"ordering": ("name",)},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='tag',
|
||||
options={'ordering': ('name',)},
|
||||
name="tag",
|
||||
options={"ordering": ("name",)},
|
||||
),
|
||||
]
|
||||
|
@ -6,13 +6,13 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1009_auto_20201216_2005'),
|
||||
("documents", "1009_auto_20201216_2005"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='savedviewfilterrule',
|
||||
name='value',
|
||||
model_name="savedviewfilterrule",
|
||||
name="value",
|
||||
field=models.CharField(blank=True, max_length=128, null=True),
|
||||
),
|
||||
]
|
||||
|
@ -10,241 +10,433 @@ class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
('documents', '1010_auto_20210101_2159'),
|
||||
("documents", "1010_auto_20210101_2159"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='correspondent',
|
||||
options={'ordering': ('name',), 'verbose_name': 'correspondent', 'verbose_name_plural': 'correspondents'},
|
||||
name="correspondent",
|
||||
options={
|
||||
"ordering": ("name",),
|
||||
"verbose_name": "correspondent",
|
||||
"verbose_name_plural": "correspondents",
|
||||
},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='document',
|
||||
options={'ordering': ('-created',), 'verbose_name': 'document', 'verbose_name_plural': 'documents'},
|
||||
name="document",
|
||||
options={
|
||||
"ordering": ("-created",),
|
||||
"verbose_name": "document",
|
||||
"verbose_name_plural": "documents",
|
||||
},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='documenttype',
|
||||
options={'verbose_name': 'document type', 'verbose_name_plural': 'document types'},
|
||||
name="documenttype",
|
||||
options={
|
||||
"verbose_name": "document type",
|
||||
"verbose_name_plural": "document types",
|
||||
},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='log',
|
||||
options={'ordering': ('-created',), 'verbose_name': 'log', 'verbose_name_plural': 'logs'},
|
||||
name="log",
|
||||
options={
|
||||
"ordering": ("-created",),
|
||||
"verbose_name": "log",
|
||||
"verbose_name_plural": "logs",
|
||||
},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='savedview',
|
||||
options={'ordering': ('name',), 'verbose_name': 'saved view', 'verbose_name_plural': 'saved views'},
|
||||
name="savedview",
|
||||
options={
|
||||
"ordering": ("name",),
|
||||
"verbose_name": "saved view",
|
||||
"verbose_name_plural": "saved views",
|
||||
},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='savedviewfilterrule',
|
||||
options={'verbose_name': 'filter rule', 'verbose_name_plural': 'filter rules'},
|
||||
name="savedviewfilterrule",
|
||||
options={
|
||||
"verbose_name": "filter rule",
|
||||
"verbose_name_plural": "filter rules",
|
||||
},
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='tag',
|
||||
options={'verbose_name': 'tag', 'verbose_name_plural': 'tags'},
|
||||
name="tag",
|
||||
options={"verbose_name": "tag", "verbose_name_plural": "tags"},
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='is_insensitive',
|
||||
field=models.BooleanField(default=True, verbose_name='is insensitive'),
|
||||
model_name="correspondent",
|
||||
name="is_insensitive",
|
||||
field=models.BooleanField(default=True, verbose_name="is insensitive"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='match',
|
||||
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
|
||||
model_name="correspondent",
|
||||
name="match",
|
||||
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
|
||||
model_name="correspondent",
|
||||
name="matching_algorithm",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Any word"),
|
||||
(2, "All words"),
|
||||
(3, "Exact match"),
|
||||
(4, "Regular expression"),
|
||||
(5, "Fuzzy word"),
|
||||
(6, "Automatic"),
|
||||
],
|
||||
default=1,
|
||||
verbose_name="matching algorithm",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='name',
|
||||
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
|
||||
model_name="correspondent",
|
||||
name="name",
|
||||
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='added',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, editable=False, verbose_name='added'),
|
||||
model_name="document",
|
||||
name="added",
|
||||
field=models.DateTimeField(
|
||||
db_index=True,
|
||||
default=django.utils.timezone.now,
|
||||
editable=False,
|
||||
verbose_name="added",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='archive_checksum',
|
||||
field=models.CharField(blank=True, editable=False, help_text='The checksum of the archived document.', max_length=32, null=True, verbose_name='archive checksum'),
|
||||
model_name="document",
|
||||
name="archive_checksum",
|
||||
field=models.CharField(
|
||||
blank=True,
|
||||
editable=False,
|
||||
help_text="The checksum of the archived document.",
|
||||
max_length=32,
|
||||
null=True,
|
||||
verbose_name="archive checksum",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='archive_serial_number',
|
||||
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True, verbose_name='archive serial number'),
|
||||
model_name="document",
|
||||
name="archive_serial_number",
|
||||
field=models.IntegerField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
help_text="The position of this document in your physical document archive.",
|
||||
null=True,
|
||||
unique=True,
|
||||
verbose_name="archive serial number",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='checksum',
|
||||
field=models.CharField(editable=False, help_text='The checksum of the original document.', max_length=32, unique=True, verbose_name='checksum'),
|
||||
model_name="document",
|
||||
name="checksum",
|
||||
field=models.CharField(
|
||||
editable=False,
|
||||
help_text="The checksum of the original document.",
|
||||
max_length=32,
|
||||
unique=True,
|
||||
verbose_name="checksum",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='content',
|
||||
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.', verbose_name='content'),
|
||||
model_name="document",
|
||||
name="content",
|
||||
field=models.TextField(
|
||||
blank=True,
|
||||
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
|
||||
verbose_name="content",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='correspondent',
|
||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.correspondent', verbose_name='correspondent'),
|
||||
model_name="document",
|
||||
name="correspondent",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="documents",
|
||||
to="documents.correspondent",
|
||||
verbose_name="correspondent",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='created',
|
||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, verbose_name='created'),
|
||||
model_name="document",
|
||||
name="created",
|
||||
field=models.DateTimeField(
|
||||
db_index=True, default=django.utils.timezone.now, verbose_name="created"
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='document_type',
|
||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype', verbose_name='document type'),
|
||||
model_name="document",
|
||||
name="document_type",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="documents",
|
||||
to="documents.documenttype",
|
||||
verbose_name="document type",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='filename',
|
||||
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, verbose_name='filename'),
|
||||
model_name="document",
|
||||
name="filename",
|
||||
field=models.FilePathField(
|
||||
default=None,
|
||||
editable=False,
|
||||
help_text="Current filename in storage",
|
||||
max_length=1024,
|
||||
null=True,
|
||||
verbose_name="filename",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='mime_type',
|
||||
field=models.CharField(editable=False, max_length=256, verbose_name='mime type'),
|
||||
model_name="document",
|
||||
name="mime_type",
|
||||
field=models.CharField(
|
||||
editable=False, max_length=256, verbose_name="mime type"
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='modified',
|
||||
field=models.DateTimeField(auto_now=True, db_index=True, verbose_name='modified'),
|
||||
model_name="document",
|
||||
name="modified",
|
||||
field=models.DateTimeField(
|
||||
auto_now=True, db_index=True, verbose_name="modified"
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='storage_type',
|
||||
field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='unencrypted', editable=False, max_length=11, verbose_name='storage type'),
|
||||
model_name="document",
|
||||
name="storage_type",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("unencrypted", "Unencrypted"),
|
||||
("gpg", "Encrypted with GNU Privacy Guard"),
|
||||
],
|
||||
default="unencrypted",
|
||||
editable=False,
|
||||
max_length=11,
|
||||
verbose_name="storage type",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='tags',
|
||||
field=models.ManyToManyField(blank=True, related_name='documents', to='documents.Tag', verbose_name='tags'),
|
||||
model_name="document",
|
||||
name="tags",
|
||||
field=models.ManyToManyField(
|
||||
blank=True,
|
||||
related_name="documents",
|
||||
to="documents.Tag",
|
||||
verbose_name="tags",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='title',
|
||||
field=models.CharField(blank=True, db_index=True, max_length=128, verbose_name='title'),
|
||||
model_name="document",
|
||||
name="title",
|
||||
field=models.CharField(
|
||||
blank=True, db_index=True, max_length=128, verbose_name="title"
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='documenttype',
|
||||
name='is_insensitive',
|
||||
field=models.BooleanField(default=True, verbose_name='is insensitive'),
|
||||
model_name="documenttype",
|
||||
name="is_insensitive",
|
||||
field=models.BooleanField(default=True, verbose_name="is insensitive"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='documenttype',
|
||||
name='match',
|
||||
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
|
||||
model_name="documenttype",
|
||||
name="match",
|
||||
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='documenttype',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
|
||||
model_name="documenttype",
|
||||
name="matching_algorithm",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Any word"),
|
||||
(2, "All words"),
|
||||
(3, "Exact match"),
|
||||
(4, "Regular expression"),
|
||||
(5, "Fuzzy word"),
|
||||
(6, "Automatic"),
|
||||
],
|
||||
default=1,
|
||||
verbose_name="matching algorithm",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='documenttype',
|
||||
name='name',
|
||||
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
|
||||
model_name="documenttype",
|
||||
name="name",
|
||||
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='log',
|
||||
name='created',
|
||||
field=models.DateTimeField(auto_now_add=True, verbose_name='created'),
|
||||
model_name="log",
|
||||
name="created",
|
||||
field=models.DateTimeField(auto_now_add=True, verbose_name="created"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='log',
|
||||
name='group',
|
||||
field=models.UUIDField(blank=True, null=True, verbose_name='group'),
|
||||
model_name="log",
|
||||
name="group",
|
||||
field=models.UUIDField(blank=True, null=True, verbose_name="group"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='log',
|
||||
name='level',
|
||||
field=models.PositiveIntegerField(choices=[(10, 'debug'), (20, 'information'), (30, 'warning'), (40, 'error'), (50, 'critical')], default=20, verbose_name='level'),
|
||||
model_name="log",
|
||||
name="level",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(10, "debug"),
|
||||
(20, "information"),
|
||||
(30, "warning"),
|
||||
(40, "error"),
|
||||
(50, "critical"),
|
||||
],
|
||||
default=20,
|
||||
verbose_name="level",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='log',
|
||||
name='message',
|
||||
field=models.TextField(verbose_name='message'),
|
||||
model_name="log",
|
||||
name="message",
|
||||
field=models.TextField(verbose_name="message"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='name',
|
||||
field=models.CharField(max_length=128, verbose_name='name'),
|
||||
model_name="savedview",
|
||||
name="name",
|
||||
field=models.CharField(max_length=128, verbose_name="name"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='show_in_sidebar',
|
||||
field=models.BooleanField(verbose_name='show in sidebar'),
|
||||
model_name="savedview",
|
||||
name="show_in_sidebar",
|
||||
field=models.BooleanField(verbose_name="show in sidebar"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='show_on_dashboard',
|
||||
field=models.BooleanField(verbose_name='show on dashboard'),
|
||||
model_name="savedview",
|
||||
name="show_on_dashboard",
|
||||
field=models.BooleanField(verbose_name="show on dashboard"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='sort_field',
|
||||
field=models.CharField(max_length=128, verbose_name='sort field'),
|
||||
model_name="savedview",
|
||||
name="sort_field",
|
||||
field=models.CharField(max_length=128, verbose_name="sort field"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='sort_reverse',
|
||||
field=models.BooleanField(default=False, verbose_name='sort reverse'),
|
||||
model_name="savedview",
|
||||
name="sort_reverse",
|
||||
field=models.BooleanField(default=False, verbose_name="sort reverse"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='user',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL, verbose_name='user'),
|
||||
model_name="savedview",
|
||||
name="user",
|
||||
field=models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
verbose_name="user",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedviewfilterrule',
|
||||
name='rule_type',
|
||||
field=models.PositiveIntegerField(choices=[(0, 'title contains'), (1, 'content contains'), (2, 'ASN is'), (3, 'correspondent is'), (4, 'document type is'), (5, 'is in inbox'), (6, 'has tag'), (7, 'has any tag'), (8, 'created before'), (9, 'created after'), (10, 'created year is'), (11, 'created month is'), (12, 'created day is'), (13, 'added before'), (14, 'added after'), (15, 'modified before'), (16, 'modified after'), (17, 'does not have tag')], verbose_name='rule type'),
|
||||
model_name="savedviewfilterrule",
|
||||
name="rule_type",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(0, "title contains"),
|
||||
(1, "content contains"),
|
||||
(2, "ASN is"),
|
||||
(3, "correspondent is"),
|
||||
(4, "document type is"),
|
||||
(5, "is in inbox"),
|
||||
(6, "has tag"),
|
||||
(7, "has any tag"),
|
||||
(8, "created before"),
|
||||
(9, "created after"),
|
||||
(10, "created year is"),
|
||||
(11, "created month is"),
|
||||
(12, "created day is"),
|
||||
(13, "added before"),
|
||||
(14, "added after"),
|
||||
(15, "modified before"),
|
||||
(16, "modified after"),
|
||||
(17, "does not have tag"),
|
||||
],
|
||||
verbose_name="rule type",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedviewfilterrule',
|
||||
name='saved_view',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='filter_rules', to='documents.savedview', verbose_name='saved view'),
|
||||
model_name="savedviewfilterrule",
|
||||
name="saved_view",
|
||||
field=models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="filter_rules",
|
||||
to="documents.savedview",
|
||||
verbose_name="saved view",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedviewfilterrule',
|
||||
name='value',
|
||||
field=models.CharField(blank=True, max_length=128, null=True, verbose_name='value'),
|
||||
model_name="savedviewfilterrule",
|
||||
name="value",
|
||||
field=models.CharField(
|
||||
blank=True, max_length=128, null=True, verbose_name="value"
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='colour',
|
||||
field=models.PositiveIntegerField(choices=[(1, '#a6cee3'), (2, '#1f78b4'), (3, '#b2df8a'), (4, '#33a02c'), (5, '#fb9a99'), (6, '#e31a1c'), (7, '#fdbf6f'), (8, '#ff7f00'), (9, '#cab2d6'), (10, '#6a3d9a'), (11, '#b15928'), (12, '#000000'), (13, '#cccccc')], default=1, verbose_name='color'),
|
||||
model_name="tag",
|
||||
name="colour",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "#a6cee3"),
|
||||
(2, "#1f78b4"),
|
||||
(3, "#b2df8a"),
|
||||
(4, "#33a02c"),
|
||||
(5, "#fb9a99"),
|
||||
(6, "#e31a1c"),
|
||||
(7, "#fdbf6f"),
|
||||
(8, "#ff7f00"),
|
||||
(9, "#cab2d6"),
|
||||
(10, "#6a3d9a"),
|
||||
(11, "#b15928"),
|
||||
(12, "#000000"),
|
||||
(13, "#cccccc"),
|
||||
],
|
||||
default=1,
|
||||
verbose_name="color",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='is_inbox_tag',
|
||||
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.', verbose_name='is inbox tag'),
|
||||
model_name="tag",
|
||||
name="is_inbox_tag",
|
||||
field=models.BooleanField(
|
||||
default=False,
|
||||
help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.",
|
||||
verbose_name="is inbox tag",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='is_insensitive',
|
||||
field=models.BooleanField(default=True, verbose_name='is insensitive'),
|
||||
model_name="tag",
|
||||
name="is_insensitive",
|
||||
field=models.BooleanField(default=True, verbose_name="is insensitive"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='match',
|
||||
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
|
||||
model_name="tag",
|
||||
name="match",
|
||||
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
|
||||
model_name="tag",
|
||||
name="matching_algorithm",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Any word"),
|
||||
(2, "All words"),
|
||||
(3, "Exact match"),
|
||||
(4, "Regular expression"),
|
||||
(5, "Fuzzy word"),
|
||||
(6, "Automatic"),
|
||||
],
|
||||
default=1,
|
||||
verbose_name="matching algorithm",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='name',
|
||||
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
|
||||
model_name="tag",
|
||||
name="name",
|
||||
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
|
||||
),
|
||||
]
|
||||
|
@ -20,6 +20,7 @@ logger = logging.getLogger("paperless.migrations")
|
||||
# This is code copied straight paperless before the change.
|
||||
###############################################################################
|
||||
|
||||
|
||||
def archive_name_from_filename(filename):
|
||||
return os.path.splitext(filename)[0] + ".pdf"
|
||||
|
||||
@ -30,10 +31,7 @@ def archive_path_old(doc):
|
||||
else:
|
||||
fname = "{:07}.pdf".format(doc.pk)
|
||||
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
fname
|
||||
)
|
||||
return os.path.join(settings.ARCHIVE_DIR, fname)
|
||||
|
||||
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
@ -41,10 +39,7 @@ STORAGE_TYPE_GPG = "gpg"
|
||||
|
||||
def archive_path_new(doc):
|
||||
if doc.archive_filename is not None:
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
str(doc.archive_filename)
|
||||
)
|
||||
return os.path.join(settings.ARCHIVE_DIR, str(doc.archive_filename))
|
||||
else:
|
||||
return None
|
||||
|
||||
@ -57,10 +52,7 @@ def source_path(doc):
|
||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg" # pragma: no cover
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||
|
||||
|
||||
def generate_unique_filename(doc, archive_filename=False):
|
||||
@ -75,7 +67,8 @@ def generate_unique_filename(doc, archive_filename=False):
|
||||
|
||||
while True:
|
||||
new_filename = generate_filename(
|
||||
doc, counter, archive_filename=archive_filename)
|
||||
doc, counter, archive_filename=archive_filename
|
||||
)
|
||||
if new_filename == old_filename:
|
||||
# still the same as before.
|
||||
return new_filename
|
||||
@ -91,14 +84,11 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
|
||||
try:
|
||||
if settings.PAPERLESS_FILENAME_FORMAT is not None:
|
||||
tags = defaultdictNoStr(lambda: slugify(None),
|
||||
many_to_dictionary(doc.tags))
|
||||
tags = defaultdictNoStr(lambda: slugify(None), many_to_dictionary(doc.tags))
|
||||
|
||||
tag_list = pathvalidate.sanitize_filename(
|
||||
",".join(sorted(
|
||||
[tag.name for tag in doc.tags.all()]
|
||||
)),
|
||||
replacement_text="-"
|
||||
",".join(sorted([tag.name for tag in doc.tags.all()])),
|
||||
replacement_text="-",
|
||||
)
|
||||
|
||||
if doc.correspondent:
|
||||
@ -116,20 +106,21 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
document_type = "none"
|
||||
|
||||
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
||||
title=pathvalidate.sanitize_filename(
|
||||
doc.title, replacement_text="-"),
|
||||
title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"),
|
||||
correspondent=correspondent,
|
||||
document_type=document_type,
|
||||
created=datetime.date.isoformat(doc.created),
|
||||
created_year=doc.created.year if doc.created else "none",
|
||||
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
|
||||
created_month=f"{doc.created.month:02}"
|
||||
if doc.created
|
||||
else "none", # NOQA: E501
|
||||
created_day=f"{doc.created.day:02}" if doc.created else "none",
|
||||
added=datetime.date.isoformat(doc.added),
|
||||
added_year=doc.added.year if doc.added else "none",
|
||||
added_month=f"{doc.added.month:02}" if doc.added else "none",
|
||||
added_day=f"{doc.added.day:02}" if doc.added else "none",
|
||||
tags=tags,
|
||||
tag_list=tag_list
|
||||
tag_list=tag_list,
|
||||
).strip()
|
||||
|
||||
path = path.strip(os.sep)
|
||||
@ -137,7 +128,8 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||
except (ValueError, KeyError, IndexError):
|
||||
logger.warning(
|
||||
f"Invalid PAPERLESS_FILENAME_FORMAT: "
|
||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
|
||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default"
|
||||
)
|
||||
|
||||
counter_str = f"_{counter:02}" if counter else ""
|
||||
|
||||
@ -166,29 +158,29 @@ def parse_wrapper(parser, path, mime_type, file_name):
|
||||
|
||||
|
||||
def create_archive_version(doc, retry_count=3):
|
||||
from documents.parsers import get_parser_class_for_mime_type, \
|
||||
DocumentParser, \
|
||||
ParseError
|
||||
|
||||
logger.info(
|
||||
f"Regenerating archive document for document ID:{doc.id}"
|
||||
from documents.parsers import (
|
||||
get_parser_class_for_mime_type,
|
||||
DocumentParser,
|
||||
ParseError,
|
||||
)
|
||||
|
||||
logger.info(f"Regenerating archive document for document ID:{doc.id}")
|
||||
parser_class = get_parser_class_for_mime_type(doc.mime_type)
|
||||
for try_num in range(retry_count):
|
||||
parser: DocumentParser = parser_class(None, None)
|
||||
try:
|
||||
parse_wrapper(parser, source_path(doc), doc.mime_type,
|
||||
os.path.basename(doc.filename))
|
||||
parse_wrapper(
|
||||
parser, source_path(doc), doc.mime_type, os.path.basename(doc.filename)
|
||||
)
|
||||
doc.content = parser.get_text()
|
||||
|
||||
if parser.get_archive_path() and os.path.isfile(
|
||||
parser.get_archive_path()):
|
||||
if parser.get_archive_path() and os.path.isfile(parser.get_archive_path()):
|
||||
doc.archive_filename = generate_unique_filename(
|
||||
doc, archive_filename=True)
|
||||
doc, archive_filename=True
|
||||
)
|
||||
with open(parser.get_archive_path(), "rb") as f:
|
||||
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
os.makedirs(os.path.dirname(archive_path_new(doc)),
|
||||
exist_ok=True)
|
||||
os.makedirs(os.path.dirname(archive_path_new(doc)), exist_ok=True)
|
||||
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
|
||||
else:
|
||||
doc.archive_checksum = None
|
||||
@ -241,8 +233,8 @@ def move_old_to_new_locations(apps, schema_editor):
|
||||
old_path = archive_path_old(doc)
|
||||
if doc.id not in affected_document_ids and not os.path.isfile(old_path):
|
||||
raise ValueError(
|
||||
f"Archived document ID:{doc.id} does not exist at: "
|
||||
f"{old_path}")
|
||||
f"Archived document ID:{doc.id} does not exist at: " f"{old_path}"
|
||||
)
|
||||
|
||||
# check that we can regenerate affected archive versions
|
||||
for doc_id in affected_document_ids:
|
||||
@ -253,7 +245,8 @@ def move_old_to_new_locations(apps, schema_editor):
|
||||
if not parser_class:
|
||||
raise ValueError(
|
||||
f"Document ID:{doc.id} has an invalid archived document, "
|
||||
f"but no parsers are available. Cannot migrate.")
|
||||
f"but no parsers are available. Cannot migrate."
|
||||
)
|
||||
|
||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||
|
||||
@ -261,9 +254,7 @@ def move_old_to_new_locations(apps, schema_editor):
|
||||
old_path = archive_path_old(doc)
|
||||
# remove affected archive versions
|
||||
if os.path.isfile(old_path):
|
||||
logger.debug(
|
||||
f"Removing {old_path}"
|
||||
)
|
||||
logger.debug(f"Removing {old_path}")
|
||||
os.unlink(old_path)
|
||||
else:
|
||||
# Set archive path for unaffected files
|
||||
@ -290,7 +281,8 @@ def move_new_to_old_locations(apps, schema_editor):
|
||||
raise ValueError(
|
||||
f"Cannot migrate: Archive file name {old_archive_path} of "
|
||||
f"document {doc.filename} would clash with another archive "
|
||||
f"filename.")
|
||||
f"filename."
|
||||
)
|
||||
old_archive_paths.add(old_archive_path)
|
||||
if new_archive_path != old_archive_path and os.path.isfile(old_archive_path):
|
||||
raise ValueError(
|
||||
@ -309,22 +301,35 @@ def move_new_to_old_locations(apps, schema_editor):
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1011_auto_20210101_2340'),
|
||||
("documents", "1011_auto_20210101_2340"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='archive_filename',
|
||||
field=models.FilePathField(default=None, editable=False, help_text='Current archive filename in storage', max_length=1024, null=True, unique=True, verbose_name='archive filename'),
|
||||
model_name="document",
|
||||
name="archive_filename",
|
||||
field=models.FilePathField(
|
||||
default=None,
|
||||
editable=False,
|
||||
help_text="Current archive filename in storage",
|
||||
max_length=1024,
|
||||
null=True,
|
||||
unique=True,
|
||||
verbose_name="archive filename",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='filename',
|
||||
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, unique=True, verbose_name='filename'),
|
||||
),
|
||||
migrations.RunPython(
|
||||
move_old_to_new_locations,
|
||||
move_new_to_old_locations
|
||||
model_name="document",
|
||||
name="filename",
|
||||
field=models.FilePathField(
|
||||
default=None,
|
||||
editable=False,
|
||||
help_text="Current filename in storage",
|
||||
max_length=1024,
|
||||
null=True,
|
||||
unique=True,
|
||||
verbose_name="filename",
|
||||
),
|
||||
),
|
||||
migrations.RunPython(move_old_to_new_locations, move_new_to_old_locations),
|
||||
]
|
||||
|
@ -20,7 +20,7 @@ COLOURS_OLD = {
|
||||
|
||||
|
||||
def forward(apps, schema_editor):
|
||||
Tag = apps.get_model('documents', 'Tag')
|
||||
Tag = apps.get_model("documents", "Tag")
|
||||
|
||||
for tag in Tag.objects.all():
|
||||
colour_old_id = tag.colour_old
|
||||
@ -30,7 +30,7 @@ def forward(apps, schema_editor):
|
||||
|
||||
|
||||
def reverse(apps, schema_editor):
|
||||
Tag = apps.get_model('documents', 'Tag')
|
||||
Tag = apps.get_model("documents", "Tag")
|
||||
|
||||
def _get_colour_id(rdb):
|
||||
for idx, rdbx in COLOURS_OLD.items():
|
||||
@ -48,23 +48,25 @@ def reverse(apps, schema_editor):
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1012_fix_archive_files'),
|
||||
("documents", "1012_fix_archive_files"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RenameField(
|
||||
model_name='tag',
|
||||
old_name='colour',
|
||||
new_name='colour_old',
|
||||
model_name="tag",
|
||||
old_name="colour",
|
||||
new_name="colour_old",
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='color',
|
||||
field=models.CharField(default='#a6cee3', max_length=7, verbose_name='color'),
|
||||
model_name="tag",
|
||||
name="color",
|
||||
field=models.CharField(
|
||||
default="#a6cee3", max_length=7, verbose_name="color"
|
||||
),
|
||||
),
|
||||
migrations.RunPython(forward, reverse),
|
||||
migrations.RemoveField(
|
||||
model_name='tag',
|
||||
name='colour_old',
|
||||
)
|
||||
model_name="tag",
|
||||
name="colour_old",
|
||||
),
|
||||
]
|
||||
|
@ -6,13 +6,37 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1013_migrate_tag_colour'),
|
||||
("documents", "1013_migrate_tag_colour"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='savedviewfilterrule',
|
||||
name='rule_type',
|
||||
field=models.PositiveIntegerField(choices=[(0, 'title contains'), (1, 'content contains'), (2, 'ASN is'), (3, 'correspondent is'), (4, 'document type is'), (5, 'is in inbox'), (6, 'has tag'), (7, 'has any tag'), (8, 'created before'), (9, 'created after'), (10, 'created year is'), (11, 'created month is'), (12, 'created day is'), (13, 'added before'), (14, 'added after'), (15, 'modified before'), (16, 'modified after'), (17, 'does not have tag'), (18, 'does not have ASN'), (19, 'title or content contains')], verbose_name='rule type'),
|
||||
model_name="savedviewfilterrule",
|
||||
name="rule_type",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(0, "title contains"),
|
||||
(1, "content contains"),
|
||||
(2, "ASN is"),
|
||||
(3, "correspondent is"),
|
||||
(4, "document type is"),
|
||||
(5, "is in inbox"),
|
||||
(6, "has tag"),
|
||||
(7, "has any tag"),
|
||||
(8, "created before"),
|
||||
(9, "created after"),
|
||||
(10, "created year is"),
|
||||
(11, "created month is"),
|
||||
(12, "created day is"),
|
||||
(13, "added before"),
|
||||
(14, "added after"),
|
||||
(15, "modified before"),
|
||||
(16, "modified after"),
|
||||
(17, "does not have tag"),
|
||||
(18, "does not have ASN"),
|
||||
(19, "title or content contains"),
|
||||
],
|
||||
verbose_name="rule type",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
@ -8,20 +8,20 @@ logger = logging.getLogger("paperless.migrations")
|
||||
|
||||
|
||||
def remove_null_characters(apps, schema_editor):
|
||||
Document = apps.get_model('documents', 'Document')
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
for doc in Document.objects.all():
|
||||
content: str = doc.content
|
||||
if '\0' in content:
|
||||
if "\0" in content:
|
||||
logger.info(f"Removing null characters from document {doc}...")
|
||||
doc.content = content.replace('\0', ' ')
|
||||
doc.content = content.replace("\0", " ")
|
||||
doc.save()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1014_auto_20210228_1614'),
|
||||
("documents", "1014_auto_20210228_1614"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
|
@ -6,18 +6,46 @@ from django.db import migrations, models
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1015_remove_null_characters'),
|
||||
("documents", "1015_remove_null_characters"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='savedview',
|
||||
name='sort_field',
|
||||
field=models.CharField(blank=True, max_length=128, null=True, verbose_name='sort field'),
|
||||
model_name="savedview",
|
||||
name="sort_field",
|
||||
field=models.CharField(
|
||||
blank=True, max_length=128, null=True, verbose_name="sort field"
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='savedviewfilterrule',
|
||||
name='rule_type',
|
||||
field=models.PositiveIntegerField(choices=[(0, 'title contains'), (1, 'content contains'), (2, 'ASN is'), (3, 'correspondent is'), (4, 'document type is'), (5, 'is in inbox'), (6, 'has tag'), (7, 'has any tag'), (8, 'created before'), (9, 'created after'), (10, 'created year is'), (11, 'created month is'), (12, 'created day is'), (13, 'added before'), (14, 'added after'), (15, 'modified before'), (16, 'modified after'), (17, 'does not have tag'), (18, 'does not have ASN'), (19, 'title or content contains'), (20, 'fulltext query'), (21, 'more like this')], verbose_name='rule type'),
|
||||
model_name="savedviewfilterrule",
|
||||
name="rule_type",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(0, "title contains"),
|
||||
(1, "content contains"),
|
||||
(2, "ASN is"),
|
||||
(3, "correspondent is"),
|
||||
(4, "document type is"),
|
||||
(5, "is in inbox"),
|
||||
(6, "has tag"),
|
||||
(7, "has any tag"),
|
||||
(8, "created before"),
|
||||
(9, "created after"),
|
||||
(10, "created year is"),
|
||||
(11, "created month is"),
|
||||
(12, "created day is"),
|
||||
(13, "added before"),
|
||||
(14, "added after"),
|
||||
(15, "modified before"),
|
||||
(16, "modified after"),
|
||||
(17, "does not have tag"),
|
||||
(18, "does not have ASN"),
|
||||
(19, "title or content contains"),
|
||||
(20, "fulltext query"),
|
||||
(21, "more like this"),
|
||||
],
|
||||
verbose_name="rule type",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
@ -37,23 +37,15 @@ class MatchingModel(models.Model):
|
||||
(MATCH_AUTO, _("Automatic")),
|
||||
)
|
||||
|
||||
name = models.CharField(
|
||||
_("name"),
|
||||
max_length=128, unique=True)
|
||||
name = models.CharField(_("name"), max_length=128, unique=True)
|
||||
|
||||
match = models.CharField(
|
||||
_("match"),
|
||||
max_length=256, blank=True)
|
||||
match = models.CharField(_("match"), max_length=256, blank=True)
|
||||
|
||||
matching_algorithm = models.PositiveIntegerField(
|
||||
_("matching algorithm"),
|
||||
choices=MATCHING_ALGORITHMS,
|
||||
default=MATCH_ANY
|
||||
_("matching algorithm"), choices=MATCHING_ALGORITHMS, default=MATCH_ANY
|
||||
)
|
||||
|
||||
is_insensitive = models.BooleanField(
|
||||
_("is insensitive"),
|
||||
default=True)
|
||||
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
|
||||
|
||||
class Meta:
|
||||
abstract = True
|
||||
@ -64,7 +56,6 @@ class MatchingModel(models.Model):
|
||||
|
||||
|
||||
class Correspondent(MatchingModel):
|
||||
|
||||
class Meta:
|
||||
ordering = ("name",)
|
||||
verbose_name = _("correspondent")
|
||||
@ -73,17 +64,15 @@ class Correspondent(MatchingModel):
|
||||
|
||||
class Tag(MatchingModel):
|
||||
|
||||
color = models.CharField(
|
||||
_("color"),
|
||||
max_length=7,
|
||||
default="#a6cee3"
|
||||
)
|
||||
color = models.CharField(_("color"), max_length=7, default="#a6cee3")
|
||||
|
||||
is_inbox_tag = models.BooleanField(
|
||||
_("is inbox tag"),
|
||||
default=False,
|
||||
help_text=_("Marks this tag as an inbox tag: All newly consumed "
|
||||
"documents will be tagged with inbox tags.")
|
||||
help_text=_(
|
||||
"Marks this tag as an inbox tag: All newly consumed "
|
||||
"documents will be tagged with inbox tags."
|
||||
),
|
||||
)
|
||||
|
||||
class Meta:
|
||||
@ -92,7 +81,6 @@ class Tag(MatchingModel):
|
||||
|
||||
|
||||
class DocumentType(MatchingModel):
|
||||
|
||||
class Meta:
|
||||
verbose_name = _("document type")
|
||||
verbose_name_plural = _("document types")
|
||||
@ -104,7 +92,7 @@ class Document(models.Model):
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
STORAGE_TYPES = (
|
||||
(STORAGE_TYPE_UNENCRYPTED, _("Unencrypted")),
|
||||
(STORAGE_TYPE_GPG, _("Encrypted with GNU Privacy Guard"))
|
||||
(STORAGE_TYPE_GPG, _("Encrypted with GNU Privacy Guard")),
|
||||
)
|
||||
|
||||
correspondent = models.ForeignKey(
|
||||
@ -113,12 +101,10 @@ class Document(models.Model):
|
||||
null=True,
|
||||
related_name="documents",
|
||||
on_delete=models.SET_NULL,
|
||||
verbose_name=_("correspondent")
|
||||
verbose_name=_("correspondent"),
|
||||
)
|
||||
|
||||
title = models.CharField(
|
||||
_("title"),
|
||||
max_length=128, blank=True, db_index=True)
|
||||
title = models.CharField(_("title"), max_length=128, blank=True, db_index=True)
|
||||
|
||||
document_type = models.ForeignKey(
|
||||
DocumentType,
|
||||
@ -126,25 +112,22 @@ class Document(models.Model):
|
||||
null=True,
|
||||
related_name="documents",
|
||||
on_delete=models.SET_NULL,
|
||||
verbose_name=_("document type")
|
||||
verbose_name=_("document type"),
|
||||
)
|
||||
|
||||
content = models.TextField(
|
||||
_("content"),
|
||||
blank=True,
|
||||
help_text=_("The raw, text-only data of the document. This field is "
|
||||
"primarily used for searching.")
|
||||
help_text=_(
|
||||
"The raw, text-only data of the document. This field is "
|
||||
"primarily used for searching."
|
||||
),
|
||||
)
|
||||
|
||||
mime_type = models.CharField(
|
||||
_("mime type"),
|
||||
max_length=256,
|
||||
editable=False
|
||||
)
|
||||
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
|
||||
|
||||
tags = models.ManyToManyField(
|
||||
Tag, related_name="documents", blank=True,
|
||||
verbose_name=_("tags")
|
||||
Tag, related_name="documents", blank=True, verbose_name=_("tags")
|
||||
)
|
||||
|
||||
checksum = models.CharField(
|
||||
@ -152,7 +135,7 @@ class Document(models.Model):
|
||||
max_length=32,
|
||||
editable=False,
|
||||
unique=True,
|
||||
help_text=_("The checksum of the original document.")
|
||||
help_text=_("The checksum of the original document."),
|
||||
)
|
||||
|
||||
archive_checksum = models.CharField(
|
||||
@ -161,28 +144,26 @@ class Document(models.Model):
|
||||
editable=False,
|
||||
blank=True,
|
||||
null=True,
|
||||
help_text=_("The checksum of the archived document.")
|
||||
help_text=_("The checksum of the archived document."),
|
||||
)
|
||||
|
||||
created = models.DateTimeField(
|
||||
_("created"),
|
||||
default=timezone.now, db_index=True)
|
||||
created = models.DateTimeField(_("created"), default=timezone.now, db_index=True)
|
||||
|
||||
modified = models.DateTimeField(
|
||||
_("modified"),
|
||||
auto_now=True, editable=False, db_index=True)
|
||||
_("modified"), auto_now=True, editable=False, db_index=True
|
||||
)
|
||||
|
||||
storage_type = models.CharField(
|
||||
_("storage type"),
|
||||
max_length=11,
|
||||
choices=STORAGE_TYPES,
|
||||
default=STORAGE_TYPE_UNENCRYPTED,
|
||||
editable=False
|
||||
editable=False,
|
||||
)
|
||||
|
||||
added = models.DateTimeField(
|
||||
_("added"),
|
||||
default=timezone.now, editable=False, db_index=True)
|
||||
_("added"), default=timezone.now, editable=False, db_index=True
|
||||
)
|
||||
|
||||
filename = models.FilePathField(
|
||||
_("filename"),
|
||||
@ -191,7 +172,7 @@ class Document(models.Model):
|
||||
default=None,
|
||||
unique=True,
|
||||
null=True,
|
||||
help_text=_("Current filename in storage")
|
||||
help_text=_("Current filename in storage"),
|
||||
)
|
||||
|
||||
archive_filename = models.FilePathField(
|
||||
@ -201,7 +182,7 @@ class Document(models.Model):
|
||||
default=None,
|
||||
unique=True,
|
||||
null=True,
|
||||
help_text=_("Current archive filename in storage")
|
||||
help_text=_("Current archive filename in storage"),
|
||||
)
|
||||
|
||||
archive_serial_number = models.IntegerField(
|
||||
@ -210,8 +191,9 @@ class Document(models.Model):
|
||||
null=True,
|
||||
unique=True,
|
||||
db_index=True,
|
||||
help_text=_("The position of this document in your physical document "
|
||||
"archive.")
|
||||
help_text=_(
|
||||
"The position of this document in your physical document " "archive."
|
||||
),
|
||||
)
|
||||
|
||||
class Meta:
|
||||
@ -238,10 +220,7 @@ class Document(models.Model):
|
||||
if self.storage_type == self.STORAGE_TYPE_GPG:
|
||||
fname += ".gpg" # pragma: no cover
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||
|
||||
@property
|
||||
def source_file(self):
|
||||
@ -254,10 +233,7 @@ class Document(models.Model):
|
||||
@property
|
||||
def archive_path(self):
|
||||
if self.has_archive_version:
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
str(self.archive_filename)
|
||||
)
|
||||
return os.path.join(settings.ARCHIVE_DIR, str(self.archive_filename))
|
||||
else:
|
||||
return None
|
||||
|
||||
@ -291,10 +267,7 @@ class Document(models.Model):
|
||||
if self.storage_type == self.STORAGE_TYPE_GPG:
|
||||
file_name += ".gpg"
|
||||
|
||||
return os.path.join(
|
||||
settings.THUMBNAIL_DIR,
|
||||
file_name
|
||||
)
|
||||
return os.path.join(settings.THUMBNAIL_DIR, file_name)
|
||||
|
||||
@property
|
||||
def thumbnail_file(self):
|
||||
@ -311,15 +284,13 @@ class Log(models.Model):
|
||||
(logging.CRITICAL, _("critical")),
|
||||
)
|
||||
|
||||
group = models.UUIDField(
|
||||
_("group"),
|
||||
blank=True, null=True)
|
||||
group = models.UUIDField(_("group"), blank=True, null=True)
|
||||
|
||||
message = models.TextField(_("message"))
|
||||
|
||||
level = models.PositiveIntegerField(
|
||||
_("level"),
|
||||
choices=LEVELS, default=logging.INFO)
|
||||
_("level"), choices=LEVELS, default=logging.INFO
|
||||
)
|
||||
|
||||
created = models.DateTimeField(_("created"), auto_now_add=True)
|
||||
|
||||
@ -333,18 +304,14 @@ class Log(models.Model):
|
||||
|
||||
|
||||
class SavedView(models.Model):
|
||||
|
||||
class Meta:
|
||||
|
||||
ordering = ("name",)
|
||||
verbose_name = _("saved view")
|
||||
verbose_name_plural = _("saved views")
|
||||
|
||||
user = models.ForeignKey(User, on_delete=models.CASCADE,
|
||||
verbose_name=_("user"))
|
||||
name = models.CharField(
|
||||
_("name"),
|
||||
max_length=128)
|
||||
user = models.ForeignKey(User, on_delete=models.CASCADE, verbose_name=_("user"))
|
||||
name = models.CharField(_("name"), max_length=128)
|
||||
|
||||
show_on_dashboard = models.BooleanField(
|
||||
_("show on dashboard"),
|
||||
@ -354,14 +321,9 @@ class SavedView(models.Model):
|
||||
)
|
||||
|
||||
sort_field = models.CharField(
|
||||
_("sort field"),
|
||||
max_length=128,
|
||||
null=True,
|
||||
blank=True
|
||||
_("sort field"), max_length=128, null=True, blank=True
|
||||
)
|
||||
sort_reverse = models.BooleanField(
|
||||
_("sort reverse"),
|
||||
default=False)
|
||||
sort_reverse = models.BooleanField(_("sort reverse"), default=False)
|
||||
|
||||
|
||||
class SavedViewFilterRule(models.Model):
|
||||
@ -388,25 +350,19 @@ class SavedViewFilterRule(models.Model):
|
||||
(19, _("title or content contains")),
|
||||
(20, _("fulltext query")),
|
||||
(21, _("more like this")),
|
||||
(22, _("has tags in"))
|
||||
(22, _("has tags in")),
|
||||
]
|
||||
|
||||
saved_view = models.ForeignKey(
|
||||
SavedView,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="filter_rules",
|
||||
verbose_name=_("saved view")
|
||||
verbose_name=_("saved view"),
|
||||
)
|
||||
|
||||
rule_type = models.PositiveIntegerField(
|
||||
_("rule type"),
|
||||
choices=RULE_TYPES)
|
||||
rule_type = models.PositiveIntegerField(_("rule type"), choices=RULE_TYPES)
|
||||
|
||||
value = models.CharField(
|
||||
_("value"),
|
||||
max_length=128,
|
||||
blank=True,
|
||||
null=True)
|
||||
value = models.CharField(_("value"), max_length=128, blank=True, null=True)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _("filter rule")
|
||||
@ -416,20 +372,23 @@ class SavedViewFilterRule(models.Model):
|
||||
# TODO: why is this in the models file?
|
||||
class FileInfo:
|
||||
|
||||
REGEXES = OrderedDict([
|
||||
("created-title", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<title>.*)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("title", re.compile(
|
||||
r"(?P<title>.*)$",
|
||||
flags=re.IGNORECASE
|
||||
))
|
||||
])
|
||||
REGEXES = OrderedDict(
|
||||
[
|
||||
(
|
||||
"created-title",
|
||||
re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<title>.*)$",
|
||||
flags=re.IGNORECASE,
|
||||
),
|
||||
),
|
||||
("title", re.compile(r"(?P<title>.*)$", flags=re.IGNORECASE)),
|
||||
]
|
||||
)
|
||||
|
||||
def __init__(self, created=None, correspondent=None, title=None, tags=(),
|
||||
extension=None):
|
||||
def __init__(
|
||||
self, created=None, correspondent=None, title=None, tags=(), extension=None
|
||||
):
|
||||
|
||||
self.created = created
|
||||
self.title = title
|
||||
@ -451,9 +410,7 @@ class FileInfo:
|
||||
@classmethod
|
||||
def _mangle_property(cls, properties, name):
|
||||
if name in properties:
|
||||
properties[name] = getattr(cls, "_get_{}".format(name))(
|
||||
properties[name]
|
||||
)
|
||||
properties[name] = getattr(cls, "_get_{}".format(name))(properties[name])
|
||||
|
||||
@classmethod
|
||||
def from_filename(cls, filename):
|
||||
|
@ -27,11 +27,11 @@ from documents.signals import document_consumer_declaration
|
||||
# TODO: isnt there a date parsing library for this?
|
||||
|
||||
DATE_REGEX = re.compile(
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|'
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
|
||||
r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" # NOQA: E501
|
||||
r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # NOQA: E501
|
||||
r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # NOQA: E501
|
||||
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|"
|
||||
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))"
|
||||
)
|
||||
|
||||
|
||||
@ -93,8 +93,7 @@ def get_parser_class_for_mime_type(mime_type):
|
||||
return None
|
||||
|
||||
# Return the parser with the highest weight.
|
||||
return sorted(
|
||||
options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
||||
return sorted(options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
||||
|
||||
|
||||
def get_parser_class(path):
|
||||
@ -107,18 +106,20 @@ def get_parser_class(path):
|
||||
return get_parser_class_for_mime_type(mime_type)
|
||||
|
||||
|
||||
def run_convert(input_file,
|
||||
output_file,
|
||||
density=None,
|
||||
scale=None,
|
||||
alpha=None,
|
||||
strip=False,
|
||||
trim=False,
|
||||
type=None,
|
||||
depth=None,
|
||||
auto_orient=False,
|
||||
extra=None,
|
||||
logging_group=None):
|
||||
def run_convert(
|
||||
input_file,
|
||||
output_file,
|
||||
density=None,
|
||||
scale=None,
|
||||
alpha=None,
|
||||
strip=False,
|
||||
trim=False,
|
||||
type=None,
|
||||
depth=None,
|
||||
auto_orient=False,
|
||||
extra=None,
|
||||
logging_group=None,
|
||||
):
|
||||
|
||||
environment = os.environ.copy()
|
||||
if settings.CONVERT_MEMORY_LIMIT:
|
||||
@ -127,17 +128,17 @@ def run_convert(input_file,
|
||||
environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
|
||||
|
||||
args = [settings.CONVERT_BINARY]
|
||||
args += ['-density', str(density)] if density else []
|
||||
args += ['-scale', str(scale)] if scale else []
|
||||
args += ['-alpha', str(alpha)] if alpha else []
|
||||
args += ['-strip'] if strip else []
|
||||
args += ['-trim'] if trim else []
|
||||
args += ['-type', str(type)] if type else []
|
||||
args += ['-depth', str(depth)] if depth else []
|
||||
args += ['-auto-orient'] if auto_orient else []
|
||||
args += ["-density", str(density)] if density else []
|
||||
args += ["-scale", str(scale)] if scale else []
|
||||
args += ["-alpha", str(alpha)] if alpha else []
|
||||
args += ["-strip"] if strip else []
|
||||
args += ["-trim"] if trim else []
|
||||
args += ["-type", str(type)] if type else []
|
||||
args += ["-depth", str(depth)] if depth else []
|
||||
args += ["-auto-orient"] if auto_orient else []
|
||||
args += [input_file, output_file]
|
||||
|
||||
logger.debug("Execute: " + " ".join(args), extra={'group': logging_group})
|
||||
logger.debug("Execute: " + " ".join(args), extra={"group": logging_group})
|
||||
|
||||
if not subprocess.Popen(args, env=environment).wait() == 0:
|
||||
raise ParseError("Convert failed at {}".format(args))
|
||||
@ -155,27 +156,25 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None):
|
||||
logger.warning(
|
||||
"Thumbnail generation with ImageMagick failed, falling back "
|
||||
"to ghostscript. Check your /etc/ImageMagick-x/policy.xml!",
|
||||
extra={'group': logging_group}
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
gs_out_path = os.path.join(temp_dir, "gs_out.png")
|
||||
cmd = [settings.GS_BINARY,
|
||||
"-q",
|
||||
"-sDEVICE=pngalpha",
|
||||
"-o", gs_out_path,
|
||||
in_path]
|
||||
cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path]
|
||||
try:
|
||||
if not subprocess.Popen(cmd).wait() == 0:
|
||||
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
|
||||
# then run convert on the output from gs
|
||||
run_convert(density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=gs_out_path,
|
||||
output_file=out_path,
|
||||
logging_group=logging_group)
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=gs_out_path,
|
||||
output_file=out_path,
|
||||
logging_group=logging_group,
|
||||
)
|
||||
|
||||
return out_path
|
||||
|
||||
@ -191,18 +190,19 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None):
|
||||
|
||||
# Run convert to get a decent thumbnail
|
||||
try:
|
||||
run_convert(density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file="{}[0]".format(in_path),
|
||||
output_file=out_path,
|
||||
logging_group=logging_group)
|
||||
run_convert(
|
||||
density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file="{}[0]".format(in_path),
|
||||
output_file=out_path,
|
||||
logging_group=logging_group,
|
||||
)
|
||||
except ParseError:
|
||||
out_path = make_thumbnail_from_pdf_gs_fallback(
|
||||
in_path, temp_dir, logging_group)
|
||||
out_path = make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group)
|
||||
|
||||
return out_path
|
||||
|
||||
@ -223,15 +223,17 @@ def parse_date(filename, text):
|
||||
settings={
|
||||
"DATE_ORDER": date_order,
|
||||
"PREFER_DAY_OF_MONTH": "first",
|
||||
"RETURN_AS_TIMEZONE_AWARE":
|
||||
True
|
||||
}
|
||||
"RETURN_AS_TIMEZONE_AWARE": True,
|
||||
},
|
||||
)
|
||||
|
||||
def __filter(date):
|
||||
if date and date.year > 1900 and \
|
||||
date <= timezone.now() and \
|
||||
date.date() not in settings.IGNORE_DATES:
|
||||
if (
|
||||
date
|
||||
and date.year > 1900
|
||||
and date <= timezone.now()
|
||||
and date.date() not in settings.IGNORE_DATES
|
||||
):
|
||||
return date
|
||||
return None
|
||||
|
||||
@ -285,8 +287,7 @@ class DocumentParser(LoggingMixin):
|
||||
super().__init__()
|
||||
self.logging_group = logging_group
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
self.tempdir = tempfile.mkdtemp(
|
||||
prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
|
||||
self.archive_path = None
|
||||
self.text = None
|
||||
@ -312,18 +313,21 @@ class DocumentParser(LoggingMixin):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_optimised_thumbnail(self,
|
||||
document_path,
|
||||
mime_type,
|
||||
file_name=None):
|
||||
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
|
||||
if settings.OPTIMIZE_THUMBNAILS:
|
||||
out_path = os.path.join(self.tempdir, "thumb_optipng.png")
|
||||
|
||||
args = (settings.OPTIPNG_BINARY,
|
||||
"-silent", "-o5", thumbnail, "-out", out_path)
|
||||
args = (
|
||||
settings.OPTIPNG_BINARY,
|
||||
"-silent",
|
||||
"-o5",
|
||||
thumbnail,
|
||||
"-out",
|
||||
out_path,
|
||||
)
|
||||
|
||||
self.log('debug', f"Execute: {' '.join(args)}")
|
||||
self.log("debug", f"Execute: {' '.join(args)}")
|
||||
|
||||
if not subprocess.Popen(args).wait() == 0:
|
||||
raise ParseError("Optipng failed at {}".format(args))
|
||||
|
@ -9,7 +9,6 @@ from documents.models import Document
|
||||
|
||||
|
||||
class SanityCheckMessages:
|
||||
|
||||
def __init__(self):
|
||||
self._messages = []
|
||||
|
||||
@ -29,7 +28,7 @@ class SanityCheckMessages:
|
||||
logger.info("Sanity checker detected no issues.")
|
||||
else:
|
||||
for msg in self._messages:
|
||||
logger.log(msg['level'], msg['message'])
|
||||
logger.log(msg["level"], msg["message"])
|
||||
|
||||
def __len__(self):
|
||||
return len(self._messages)
|
||||
@ -38,10 +37,10 @@ class SanityCheckMessages:
|
||||
return self._messages[item]
|
||||
|
||||
def has_error(self):
|
||||
return any([msg['level'] == logging.ERROR for msg in self._messages])
|
||||
return any([msg["level"] == logging.ERROR for msg in self._messages])
|
||||
|
||||
def has_warning(self):
|
||||
return any([msg['level'] == logging.WARNING for msg in self._messages])
|
||||
return any([msg["level"] == logging.WARNING for msg in self._messages])
|
||||
|
||||
|
||||
class SanityCheckFailedException(Exception):
|
||||
@ -71,9 +70,7 @@ def check_sanity(progress=False):
|
||||
with doc.thumbnail_file as f:
|
||||
f.read()
|
||||
except OSError as e:
|
||||
messages.error(
|
||||
f"Cannot read thumbnail file of document {doc.pk}: {e}"
|
||||
)
|
||||
messages.error(f"Cannot read thumbnail file of document {doc.pk}: {e}")
|
||||
|
||||
# Check sanity of the original file
|
||||
# TODO: extract method
|
||||
@ -86,8 +83,7 @@ def check_sanity(progress=False):
|
||||
with doc.source_file as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
except OSError as e:
|
||||
messages.error(
|
||||
f"Cannot read original file of document {doc.pk}: {e}")
|
||||
messages.error(f"Cannot read original file of document {doc.pk}: {e}")
|
||||
else:
|
||||
if not checksum == doc.checksum:
|
||||
messages.error(
|
||||
@ -108,9 +104,7 @@ def check_sanity(progress=False):
|
||||
)
|
||||
elif doc.has_archive_version:
|
||||
if not os.path.isfile(doc.archive_path):
|
||||
messages.error(
|
||||
f"Archived version of document {doc.pk} does not exist."
|
||||
)
|
||||
messages.error(f"Archived version of document {doc.pk} does not exist.")
|
||||
else:
|
||||
if os.path.normpath(doc.archive_path) in present_files:
|
||||
present_files.remove(os.path.normpath(doc.archive_path))
|
||||
|
@ -7,8 +7,15 @@ from rest_framework import serializers
|
||||
from rest_framework.fields import SerializerMethodField
|
||||
|
||||
from . import bulk_edit
|
||||
from .models import Correspondent, Tag, Document, DocumentType, \
|
||||
SavedView, SavedViewFilterRule, MatchingModel
|
||||
from .models import (
|
||||
Correspondent,
|
||||
Tag,
|
||||
Document,
|
||||
DocumentType,
|
||||
SavedView,
|
||||
SavedViewFilterRule,
|
||||
MatchingModel,
|
||||
)
|
||||
from .parsers import is_mime_type_supported
|
||||
|
||||
from django.utils.translation import gettext as _
|
||||
@ -23,7 +30,7 @@ class DynamicFieldsModelSerializer(serializers.ModelSerializer):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
# Don't pass the 'fields' arg up to the superclass
|
||||
fields = kwargs.pop('fields', None)
|
||||
fields = kwargs.pop("fields", None)
|
||||
|
||||
# Instantiate the superclass normally
|
||||
super(DynamicFieldsModelSerializer, self).__init__(*args, **kwargs)
|
||||
@ -42,16 +49,19 @@ class MatchingModelSerializer(serializers.ModelSerializer):
|
||||
|
||||
def get_slug(self, obj):
|
||||
return slugify(obj.name)
|
||||
|
||||
slug = SerializerMethodField()
|
||||
|
||||
def validate_match(self, match):
|
||||
if 'matching_algorithm' in self.initial_data and self.initial_data['matching_algorithm'] == MatchingModel.MATCH_REGEX: # NOQA: E501
|
||||
if (
|
||||
"matching_algorithm" in self.initial_data
|
||||
and self.initial_data["matching_algorithm"] == MatchingModel.MATCH_REGEX
|
||||
): # NOQA: E501
|
||||
try:
|
||||
re.compile(match)
|
||||
except Exception as e:
|
||||
raise serializers.ValidationError(
|
||||
_("Invalid regular expression: %(error)s") %
|
||||
{'error': str(e)}
|
||||
_("Invalid regular expression: %(error)s") % {"error": str(e)}
|
||||
)
|
||||
return match
|
||||
|
||||
@ -70,12 +80,11 @@ class CorrespondentSerializer(MatchingModelSerializer):
|
||||
"matching_algorithm",
|
||||
"is_insensitive",
|
||||
"document_count",
|
||||
"last_correspondence"
|
||||
"last_correspondence",
|
||||
)
|
||||
|
||||
|
||||
class DocumentTypeSerializer(MatchingModelSerializer):
|
||||
|
||||
class Meta:
|
||||
model = DocumentType
|
||||
fields = (
|
||||
@ -85,7 +94,7 @@ class DocumentTypeSerializer(MatchingModelSerializer):
|
||||
"match",
|
||||
"matching_algorithm",
|
||||
"is_insensitive",
|
||||
"document_count"
|
||||
"document_count",
|
||||
)
|
||||
|
||||
|
||||
@ -104,7 +113,7 @@ class ColorField(serializers.Field):
|
||||
(10, "#6a3d9a"),
|
||||
(11, "#b15928"),
|
||||
(12, "#000000"),
|
||||
(13, "#cccccc")
|
||||
(13, "#cccccc"),
|
||||
)
|
||||
|
||||
def to_internal_value(self, data):
|
||||
@ -122,7 +131,7 @@ class ColorField(serializers.Field):
|
||||
|
||||
class TagSerializerVersion1(MatchingModelSerializer):
|
||||
|
||||
colour = ColorField(source='color', default="#a6cee3")
|
||||
colour = ColorField(source="color", default="#a6cee3")
|
||||
|
||||
class Meta:
|
||||
model = Tag
|
||||
@ -135,20 +144,19 @@ class TagSerializerVersion1(MatchingModelSerializer):
|
||||
"matching_algorithm",
|
||||
"is_insensitive",
|
||||
"is_inbox_tag",
|
||||
"document_count"
|
||||
"document_count",
|
||||
)
|
||||
|
||||
|
||||
class TagSerializer(MatchingModelSerializer):
|
||||
|
||||
def get_text_color(self, obj):
|
||||
try:
|
||||
h = obj.color.lstrip('#')
|
||||
rgb = tuple(int(h[i:i + 2], 16)/256 for i in (0, 2, 4))
|
||||
h = obj.color.lstrip("#")
|
||||
rgb = tuple(int(h[i : i + 2], 16) / 256 for i in (0, 2, 4))
|
||||
luminance = math.sqrt(
|
||||
0.299 * math.pow(rgb[0], 2) +
|
||||
0.587 * math.pow(rgb[1], 2) +
|
||||
0.114 * math.pow(rgb[2], 2)
|
||||
0.299 * math.pow(rgb[0], 2)
|
||||
+ 0.587 * math.pow(rgb[1], 2)
|
||||
+ 0.114 * math.pow(rgb[2], 2)
|
||||
)
|
||||
return "#ffffff" if luminance < 0.53 else "#000000"
|
||||
except ValueError:
|
||||
@ -168,7 +176,7 @@ class TagSerializer(MatchingModelSerializer):
|
||||
"matching_algorithm",
|
||||
"is_insensitive",
|
||||
"is_inbox_tag",
|
||||
"document_count"
|
||||
"document_count",
|
||||
)
|
||||
|
||||
def validate_color(self, color):
|
||||
@ -231,7 +239,6 @@ class DocumentSerializer(DynamicFieldsModelSerializer):
|
||||
|
||||
|
||||
class SavedViewFilterRuleSerializer(serializers.ModelSerializer):
|
||||
|
||||
class Meta:
|
||||
model = SavedViewFilterRule
|
||||
fields = ["rule_type", "value"]
|
||||
@ -244,28 +251,33 @@ class SavedViewSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = SavedView
|
||||
depth = 1
|
||||
fields = ["id", "name", "show_on_dashboard", "show_in_sidebar",
|
||||
"sort_field", "sort_reverse", "filter_rules"]
|
||||
fields = [
|
||||
"id",
|
||||
"name",
|
||||
"show_on_dashboard",
|
||||
"show_in_sidebar",
|
||||
"sort_field",
|
||||
"sort_reverse",
|
||||
"filter_rules",
|
||||
]
|
||||
|
||||
def update(self, instance, validated_data):
|
||||
if 'filter_rules' in validated_data:
|
||||
rules_data = validated_data.pop('filter_rules')
|
||||
if "filter_rules" in validated_data:
|
||||
rules_data = validated_data.pop("filter_rules")
|
||||
else:
|
||||
rules_data = None
|
||||
super(SavedViewSerializer, self).update(instance, validated_data)
|
||||
if rules_data is not None:
|
||||
SavedViewFilterRule.objects.filter(saved_view=instance).delete()
|
||||
for rule_data in rules_data:
|
||||
SavedViewFilterRule.objects.create(
|
||||
saved_view=instance, **rule_data)
|
||||
SavedViewFilterRule.objects.create(saved_view=instance, **rule_data)
|
||||
return instance
|
||||
|
||||
def create(self, validated_data):
|
||||
rules_data = validated_data.pop('filter_rules')
|
||||
rules_data = validated_data.pop("filter_rules")
|
||||
saved_view = SavedView.objects.create(**validated_data)
|
||||
for rule_data in rules_data:
|
||||
SavedViewFilterRule.objects.create(
|
||||
saved_view=saved_view, **rule_data)
|
||||
SavedViewFilterRule.objects.create(saved_view=saved_view, **rule_data)
|
||||
return saved_view
|
||||
|
||||
|
||||
@ -275,20 +287,19 @@ class DocumentListSerializer(serializers.Serializer):
|
||||
required=True,
|
||||
label="Documents",
|
||||
write_only=True,
|
||||
child=serializers.IntegerField()
|
||||
child=serializers.IntegerField(),
|
||||
)
|
||||
|
||||
def _validate_document_id_list(self, documents, name="documents"):
|
||||
if not type(documents) == list:
|
||||
raise serializers.ValidationError(f"{name} must be a list")
|
||||
if not all([type(i) == int for i in documents]):
|
||||
raise serializers.ValidationError(
|
||||
f"{name} must be a list of integers")
|
||||
raise serializers.ValidationError(f"{name} must be a list of integers")
|
||||
count = Document.objects.filter(id__in=documents).count()
|
||||
if not count == len(documents):
|
||||
raise serializers.ValidationError(
|
||||
f"Some documents in {name} don't exist or were "
|
||||
f"specified twice.")
|
||||
f"Some documents in {name} don't exist or were " f"specified twice."
|
||||
)
|
||||
|
||||
def validate_documents(self, documents):
|
||||
self._validate_document_id_list(documents)
|
||||
@ -304,7 +315,7 @@ class BulkEditSerializer(DocumentListSerializer):
|
||||
"add_tag",
|
||||
"remove_tag",
|
||||
"modify_tags",
|
||||
"delete"
|
||||
"delete",
|
||||
],
|
||||
label="Method",
|
||||
write_only=True,
|
||||
@ -316,12 +327,12 @@ class BulkEditSerializer(DocumentListSerializer):
|
||||
if not type(tags) == list:
|
||||
raise serializers.ValidationError(f"{name} must be a list")
|
||||
if not all([type(i) == int for i in tags]):
|
||||
raise serializers.ValidationError(
|
||||
f"{name} must be a list of integers")
|
||||
raise serializers.ValidationError(f"{name} must be a list of integers")
|
||||
count = Tag.objects.filter(id__in=tags).count()
|
||||
if not count == len(tags):
|
||||
raise serializers.ValidationError(
|
||||
f"Some tags in {name} don't exist or were specified twice.")
|
||||
f"Some tags in {name} don't exist or were specified twice."
|
||||
)
|
||||
|
||||
def validate_method(self, method):
|
||||
if method == "set_correspondent":
|
||||
@ -340,8 +351,8 @@ class BulkEditSerializer(DocumentListSerializer):
|
||||
raise serializers.ValidationError("Unsupported method.")
|
||||
|
||||
def _validate_parameters_tags(self, parameters):
|
||||
if 'tag' in parameters:
|
||||
tag_id = parameters['tag']
|
||||
if "tag" in parameters:
|
||||
tag_id = parameters["tag"]
|
||||
try:
|
||||
Tag.objects.get(id=tag_id)
|
||||
except Tag.DoesNotExist:
|
||||
@ -350,48 +361,45 @@ class BulkEditSerializer(DocumentListSerializer):
|
||||
raise serializers.ValidationError("tag not specified")
|
||||
|
||||
def _validate_parameters_document_type(self, parameters):
|
||||
if 'document_type' in parameters:
|
||||
document_type_id = parameters['document_type']
|
||||
if "document_type" in parameters:
|
||||
document_type_id = parameters["document_type"]
|
||||
if document_type_id is None:
|
||||
# None is ok
|
||||
return
|
||||
try:
|
||||
DocumentType.objects.get(id=document_type_id)
|
||||
except DocumentType.DoesNotExist:
|
||||
raise serializers.ValidationError(
|
||||
"Document type does not exist")
|
||||
raise serializers.ValidationError("Document type does not exist")
|
||||
else:
|
||||
raise serializers.ValidationError("document_type not specified")
|
||||
|
||||
def _validate_parameters_correspondent(self, parameters):
|
||||
if 'correspondent' in parameters:
|
||||
correspondent_id = parameters['correspondent']
|
||||
if "correspondent" in parameters:
|
||||
correspondent_id = parameters["correspondent"]
|
||||
if correspondent_id is None:
|
||||
return
|
||||
try:
|
||||
Correspondent.objects.get(id=correspondent_id)
|
||||
except Correspondent.DoesNotExist:
|
||||
raise serializers.ValidationError(
|
||||
"Correspondent does not exist")
|
||||
raise serializers.ValidationError("Correspondent does not exist")
|
||||
else:
|
||||
raise serializers.ValidationError("correspondent not specified")
|
||||
|
||||
def _validate_parameters_modify_tags(self, parameters):
|
||||
if "add_tags" in parameters:
|
||||
self._validate_tag_id_list(parameters['add_tags'], "add_tags")
|
||||
self._validate_tag_id_list(parameters["add_tags"], "add_tags")
|
||||
else:
|
||||
raise serializers.ValidationError("add_tags not specified")
|
||||
|
||||
if "remove_tags" in parameters:
|
||||
self._validate_tag_id_list(parameters['remove_tags'],
|
||||
"remove_tags")
|
||||
self._validate_tag_id_list(parameters["remove_tags"], "remove_tags")
|
||||
else:
|
||||
raise serializers.ValidationError("remove_tags not specified")
|
||||
|
||||
def validate(self, attrs):
|
||||
|
||||
method = attrs['method']
|
||||
parameters = attrs['parameters']
|
||||
method = attrs["method"]
|
||||
parameters = attrs["parameters"]
|
||||
|
||||
if method == bulk_edit.set_correspondent:
|
||||
self._validate_parameters_correspondent(parameters)
|
||||
@ -448,8 +456,7 @@ class PostDocumentSerializer(serializers.Serializer):
|
||||
|
||||
if not is_mime_type_supported(mime_type):
|
||||
raise serializers.ValidationError(
|
||||
_("File type %(type)s not supported") %
|
||||
{'type': mime_type}
|
||||
_("File type %(type)s not supported") % {"type": mime_type}
|
||||
)
|
||||
|
||||
return document.name, document_data
|
||||
@ -476,13 +483,11 @@ class PostDocumentSerializer(serializers.Serializer):
|
||||
class BulkDownloadSerializer(DocumentListSerializer):
|
||||
|
||||
content = serializers.ChoiceField(
|
||||
choices=["archive", "originals", "both"],
|
||||
default="archive"
|
||||
choices=["archive", "originals", "both"], default="archive"
|
||||
)
|
||||
|
||||
compression = serializers.ChoiceField(
|
||||
choices=["none", "deflated", "bzip2", "lzma"],
|
||||
default="none"
|
||||
choices=["none", "deflated", "bzip2", "lzma"], default="none"
|
||||
)
|
||||
|
||||
def validate_compression(self, compression):
|
||||
@ -492,5 +497,5 @@ class BulkDownloadSerializer(DocumentListSerializer):
|
||||
"none": zipfile.ZIP_STORED,
|
||||
"deflated": zipfile.ZIP_DEFLATED,
|
||||
"bzip2": zipfile.ZIP_BZIP2,
|
||||
"lzma": zipfile.ZIP_LZMA
|
||||
"lzma": zipfile.ZIP_LZMA,
|
||||
}[compression]
|
||||
|
@ -13,9 +13,11 @@ from django.utils import termcolors, timezone
|
||||
from filelock import FileLock
|
||||
|
||||
from .. import matching
|
||||
from ..file_handling import delete_empty_directories, \
|
||||
create_source_path_directory, \
|
||||
generate_unique_filename
|
||||
from ..file_handling import (
|
||||
delete_empty_directories,
|
||||
create_source_path_directory,
|
||||
generate_unique_filename,
|
||||
)
|
||||
from ..models import Document, Tag, MatchingModel
|
||||
|
||||
|
||||
@ -27,21 +29,22 @@ def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
|
||||
document.tags.add(*inbox_tags)
|
||||
|
||||
|
||||
def set_correspondent(sender,
|
||||
document=None,
|
||||
logging_group=None,
|
||||
classifier=None,
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
color=False,
|
||||
**kwargs):
|
||||
def set_correspondent(
|
||||
sender,
|
||||
document=None,
|
||||
logging_group=None,
|
||||
classifier=None,
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
color=False,
|
||||
**kwargs,
|
||||
):
|
||||
if document.correspondent and not replace:
|
||||
return
|
||||
|
||||
potential_correspondents = matching.match_correspondents(document,
|
||||
classifier)
|
||||
potential_correspondents = matching.match_correspondents(document, classifier)
|
||||
|
||||
potential_count = len(potential_correspondents)
|
||||
if potential_correspondents:
|
||||
@ -53,13 +56,13 @@ def set_correspondent(sender,
|
||||
logger.debug(
|
||||
f"Detected {potential_count} potential correspondents, "
|
||||
f"so we've opted for {selected}",
|
||||
extra={'group': logging_group}
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
f"Detected {potential_count} potential correspondents, "
|
||||
f"not assigning any correspondent",
|
||||
extra={'group': logging_group}
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return
|
||||
|
||||
@ -67,7 +70,7 @@ def set_correspondent(sender,
|
||||
if suggest:
|
||||
if base_url:
|
||||
print(
|
||||
termcolors.colorize(str(document), fg='green')
|
||||
termcolors.colorize(str(document), fg="green")
|
||||
if color
|
||||
else str(document)
|
||||
)
|
||||
@ -75,37 +78,39 @@ def set_correspondent(sender,
|
||||
else:
|
||||
print(
|
||||
(
|
||||
termcolors.colorize(str(document), fg='green')
|
||||
termcolors.colorize(str(document), fg="green")
|
||||
if color
|
||||
else str(document)
|
||||
) + f" [{document.pk}]"
|
||||
)
|
||||
+ f" [{document.pk}]"
|
||||
)
|
||||
print(f"Suggest correspondent {selected}")
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning correspondent {selected} to {document}",
|
||||
extra={'group': logging_group}
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
|
||||
document.correspondent = selected
|
||||
document.save(update_fields=("correspondent",))
|
||||
|
||||
|
||||
def set_document_type(sender,
|
||||
document=None,
|
||||
logging_group=None,
|
||||
classifier=None,
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
color=False,
|
||||
**kwargs):
|
||||
def set_document_type(
|
||||
sender,
|
||||
document=None,
|
||||
logging_group=None,
|
||||
classifier=None,
|
||||
replace=False,
|
||||
use_first=True,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
color=False,
|
||||
**kwargs,
|
||||
):
|
||||
if document.document_type and not replace:
|
||||
return
|
||||
|
||||
potential_document_type = matching.match_document_types(document,
|
||||
classifier)
|
||||
potential_document_type = matching.match_document_types(document, classifier)
|
||||
|
||||
potential_count = len(potential_document_type)
|
||||
if potential_document_type:
|
||||
@ -118,13 +123,13 @@ def set_document_type(sender,
|
||||
logger.info(
|
||||
f"Detected {potential_count} potential document types, "
|
||||
f"so we've opted for {selected}",
|
||||
extra={'group': logging_group}
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Detected {potential_count} potential document types, "
|
||||
f"not assigning any document type",
|
||||
extra={'group': logging_group}
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return
|
||||
|
||||
@ -132,7 +137,7 @@ def set_document_type(sender,
|
||||
if suggest:
|
||||
if base_url:
|
||||
print(
|
||||
termcolors.colorize(str(document), fg='green')
|
||||
termcolors.colorize(str(document), fg="green")
|
||||
if color
|
||||
else str(document)
|
||||
)
|
||||
@ -140,35 +145,39 @@ def set_document_type(sender,
|
||||
else:
|
||||
print(
|
||||
(
|
||||
termcolors.colorize(str(document), fg='green')
|
||||
termcolors.colorize(str(document), fg="green")
|
||||
if color
|
||||
else str(document)
|
||||
) + f" [{document.pk}]"
|
||||
)
|
||||
+ f" [{document.pk}]"
|
||||
)
|
||||
print(f"Suggest document type {selected}")
|
||||
else:
|
||||
logger.info(
|
||||
f"Assigning document type {selected} to {document}",
|
||||
extra={'group': logging_group}
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
|
||||
document.document_type = selected
|
||||
document.save(update_fields=("document_type",))
|
||||
|
||||
|
||||
def set_tags(sender,
|
||||
document=None,
|
||||
logging_group=None,
|
||||
classifier=None,
|
||||
replace=False,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
color=False,
|
||||
**kwargs):
|
||||
def set_tags(
|
||||
sender,
|
||||
document=None,
|
||||
logging_group=None,
|
||||
classifier=None,
|
||||
replace=False,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
color=False,
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
if replace:
|
||||
Document.tags.through.objects.filter(document=document).exclude(
|
||||
Q(tag__is_inbox_tag=True)).exclude(
|
||||
Q(tag__is_inbox_tag=True)
|
||||
).exclude(
|
||||
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO)
|
||||
).delete()
|
||||
|
||||
@ -181,14 +190,13 @@ def set_tags(sender,
|
||||
if suggest:
|
||||
extra_tags = current_tags - set(matched_tags)
|
||||
extra_tags = [
|
||||
t for t in extra_tags
|
||||
if t.matching_algorithm == MatchingModel.MATCH_AUTO
|
||||
t for t in extra_tags if t.matching_algorithm == MatchingModel.MATCH_AUTO
|
||||
]
|
||||
if not relevant_tags and not extra_tags:
|
||||
return
|
||||
if base_url:
|
||||
print(
|
||||
termcolors.colorize(str(document), fg='green')
|
||||
termcolors.colorize(str(document), fg="green")
|
||||
if color
|
||||
else str(document)
|
||||
)
|
||||
@ -196,15 +204,14 @@ def set_tags(sender,
|
||||
else:
|
||||
print(
|
||||
(
|
||||
termcolors.colorize(str(document), fg='green')
|
||||
termcolors.colorize(str(document), fg="green")
|
||||
if color
|
||||
else str(document)
|
||||
) + f" [{document.pk}]"
|
||||
)
|
||||
+ f" [{document.pk}]"
|
||||
)
|
||||
if relevant_tags:
|
||||
print(
|
||||
"Suggest tags: " + ", ".join([t.name for t in relevant_tags])
|
||||
)
|
||||
print("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
|
||||
if extra_tags:
|
||||
print("Extra tags: " + ", ".join([t.name for t in extra_tags]))
|
||||
else:
|
||||
@ -213,10 +220,8 @@ def set_tags(sender,
|
||||
|
||||
message = 'Tagging "{}" with "{}"'
|
||||
logger.info(
|
||||
message.format(
|
||||
document, ", ".join([t.name for t in relevant_tags])
|
||||
),
|
||||
extra={'group': logging_group}
|
||||
message.format(document, ", ".join([t.name for t in relevant_tags])),
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
|
||||
document.tags.add(*relevant_tags)
|
||||
@ -235,9 +240,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
while True:
|
||||
new_file_path = os.path.join(
|
||||
settings.TRASH_DIR,
|
||||
old_filebase +
|
||||
(f"_{counter:02}" if counter else "") +
|
||||
old_fileext
|
||||
old_filebase + (f"_{counter:02}" if counter else "") + old_fileext,
|
||||
)
|
||||
|
||||
if os.path.exists(new_file_path):
|
||||
@ -245,8 +248,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
else:
|
||||
break
|
||||
|
||||
logger.debug(
|
||||
f"Moving {instance.source_path} to trash at {new_file_path}")
|
||||
logger.debug(f"Moving {instance.source_path} to trash at {new_file_path}")
|
||||
try:
|
||||
os.rename(instance.source_path, new_file_path)
|
||||
except OSError as e:
|
||||
@ -256,14 +258,15 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
)
|
||||
return
|
||||
|
||||
for filename in (instance.source_path,
|
||||
instance.archive_path,
|
||||
instance.thumbnail_path):
|
||||
for filename in (
|
||||
instance.source_path,
|
||||
instance.archive_path,
|
||||
instance.thumbnail_path,
|
||||
):
|
||||
if filename and os.path.isfile(filename):
|
||||
try:
|
||||
os.unlink(filename)
|
||||
logger.debug(
|
||||
f"Deleted file {filename}.")
|
||||
logger.debug(f"Deleted file {filename}.")
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
f"While deleting document {str(instance)}, the file "
|
||||
@ -271,14 +274,12 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
)
|
||||
|
||||
delete_empty_directories(
|
||||
os.path.dirname(instance.source_path),
|
||||
root=settings.ORIGINALS_DIR
|
||||
os.path.dirname(instance.source_path), root=settings.ORIGINALS_DIR
|
||||
)
|
||||
|
||||
if instance.has_archive_version:
|
||||
delete_empty_directories(
|
||||
os.path.dirname(instance.archive_path),
|
||||
root=settings.ARCHIVE_DIR
|
||||
os.path.dirname(instance.archive_path), root=settings.ARCHIVE_DIR
|
||||
)
|
||||
|
||||
|
||||
@ -289,15 +290,15 @@ class CannotMoveFilesException(Exception):
|
||||
def validate_move(instance, old_path, new_path):
|
||||
if not os.path.isfile(old_path):
|
||||
# Can't do anything if the old file does not exist anymore.
|
||||
logger.fatal(
|
||||
f"Document {str(instance)}: File {old_path} has gone.")
|
||||
logger.fatal(f"Document {str(instance)}: File {old_path} has gone.")
|
||||
raise CannotMoveFilesException()
|
||||
|
||||
if os.path.isfile(new_path):
|
||||
# Can't do anything if the new file already exists. Skip updating file.
|
||||
logger.warning(
|
||||
f"Document {str(instance)}: Cannot rename file "
|
||||
f"since target path {new_path} already exists.")
|
||||
f"since target path {new_path} already exists."
|
||||
)
|
||||
raise CannotMoveFilesException()
|
||||
|
||||
|
||||
@ -333,7 +334,9 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
instance, archive_filename=True
|
||||
)
|
||||
|
||||
move_archive = old_archive_filename != instance.archive_filename # NOQA: E501
|
||||
move_archive = (
|
||||
old_archive_filename != instance.archive_filename
|
||||
) # NOQA: E501
|
||||
else:
|
||||
move_archive = False
|
||||
|
||||
@ -347,8 +350,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
os.rename(old_source_path, instance.source_path)
|
||||
|
||||
if move_archive:
|
||||
validate_move(
|
||||
instance, old_archive_path, instance.archive_path)
|
||||
validate_move(instance, old_archive_path, instance.archive_path)
|
||||
create_source_path_directory(instance.archive_path)
|
||||
os.rename(old_archive_path, instance.archive_path)
|
||||
|
||||
@ -390,12 +392,16 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
# finally, remove any empty sub folders. This will do nothing if
|
||||
# something has failed above.
|
||||
if not os.path.isfile(old_source_path):
|
||||
delete_empty_directories(os.path.dirname(old_source_path),
|
||||
root=settings.ORIGINALS_DIR)
|
||||
delete_empty_directories(
|
||||
os.path.dirname(old_source_path), root=settings.ORIGINALS_DIR
|
||||
)
|
||||
|
||||
if instance.has_archive_version and not os.path.isfile(old_archive_path): # NOQA: E501
|
||||
delete_empty_directories(os.path.dirname(old_archive_path),
|
||||
root=settings.ARCHIVE_DIR)
|
||||
if instance.has_archive_version and not os.path.isfile(
|
||||
old_archive_path
|
||||
): # NOQA: E501
|
||||
delete_empty_directories(
|
||||
os.path.dirname(old_archive_path), root=settings.ARCHIVE_DIR
|
||||
)
|
||||
|
||||
|
||||
def set_log_entry(sender, document=None, logging_group=None, **kwargs):
|
||||
|
@ -31,12 +31,11 @@ def index_reindex(progress_bar_disable=False):
|
||||
|
||||
|
||||
def train_classifier():
|
||||
if (not Tag.objects.filter(
|
||||
matching_algorithm=Tag.MATCH_AUTO).exists() and
|
||||
not DocumentType.objects.filter(
|
||||
matching_algorithm=Tag.MATCH_AUTO).exists() and
|
||||
not Correspondent.objects.filter(
|
||||
matching_algorithm=Tag.MATCH_AUTO).exists()):
|
||||
if (
|
||||
not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
and not DocumentType.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
and not Correspondent.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||
):
|
||||
|
||||
return
|
||||
|
||||
@ -48,28 +47,25 @@ def train_classifier():
|
||||
try:
|
||||
if classifier.train():
|
||||
logger.info(
|
||||
"Saving updated classifier model to {}...".format(
|
||||
settings.MODEL_FILE)
|
||||
"Saving updated classifier model to {}...".format(settings.MODEL_FILE)
|
||||
)
|
||||
classifier.save()
|
||||
else:
|
||||
logger.debug(
|
||||
"Training data unchanged."
|
||||
)
|
||||
logger.debug("Training data unchanged.")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Classifier error: " + str(e)
|
||||
)
|
||||
logger.warning("Classifier error: " + str(e))
|
||||
|
||||
|
||||
def consume_file(path,
|
||||
override_filename=None,
|
||||
override_title=None,
|
||||
override_correspondent_id=None,
|
||||
override_document_type_id=None,
|
||||
override_tag_ids=None,
|
||||
task_id=None):
|
||||
def consume_file(
|
||||
path,
|
||||
override_filename=None,
|
||||
override_title=None,
|
||||
override_correspondent_id=None,
|
||||
override_document_type_id=None,
|
||||
override_tag_ids=None,
|
||||
task_id=None,
|
||||
):
|
||||
|
||||
document = Consumer().try_consume_file(
|
||||
path,
|
||||
@ -78,16 +74,16 @@ def consume_file(path,
|
||||
override_correspondent_id=override_correspondent_id,
|
||||
override_document_type_id=override_document_type_id,
|
||||
override_tag_ids=override_tag_ids,
|
||||
task_id=task_id
|
||||
task_id=task_id,
|
||||
)
|
||||
|
||||
if document:
|
||||
return "Success. New document id {} created".format(
|
||||
document.pk
|
||||
)
|
||||
return "Success. New document id {} created".format(document.pk)
|
||||
else:
|
||||
raise ConsumerError("Unknown error: Returned document was null, but "
|
||||
"no error message was given.")
|
||||
raise ConsumerError(
|
||||
"Unknown error: Returned document was null, but "
|
||||
"no error message was given."
|
||||
)
|
||||
|
||||
|
||||
def sanity_check():
|
||||
@ -96,8 +92,7 @@ def sanity_check():
|
||||
messages.log_messages()
|
||||
|
||||
if messages.has_error():
|
||||
raise SanityCheckFailedException(
|
||||
"Sanity check failed with errors. See log.")
|
||||
raise SanityCheckFailedException("Sanity check failed with errors. See log.")
|
||||
elif messages.has_warning():
|
||||
return "Sanity check exited with warnings. See log."
|
||||
elif len(messages) > 0:
|
||||
|
@ -5,7 +5,6 @@ from ..models import Document, Correspondent
|
||||
|
||||
|
||||
class CorrespondentFactory(DjangoModelFactory):
|
||||
|
||||
class Meta:
|
||||
model = Correspondent
|
||||
|
||||
@ -13,6 +12,5 @@ class CorrespondentFactory(DjangoModelFactory):
|
||||
|
||||
|
||||
class DocumentFactory(DjangoModelFactory):
|
||||
|
||||
class Meta:
|
||||
model = Document
|
||||
|
@ -11,7 +11,6 @@ from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||
|
||||
def get_document_from_index(self, doc):
|
||||
ix = index.open_index()
|
||||
with ix.searcher() as searcher:
|
||||
@ -27,7 +26,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||
doc.title = "new title"
|
||||
self.doc_admin.save_model(None, doc, None, None)
|
||||
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
|
||||
self.assertEqual(self.get_document_from_index(doc)['id'], doc.id)
|
||||
self.assertEqual(self.get_document_from_index(doc)["id"], doc.id)
|
||||
|
||||
def test_delete_model(self):
|
||||
doc = Document.objects.create(title="test")
|
||||
@ -42,7 +41,9 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||
def test_delete_queryset(self):
|
||||
docs = []
|
||||
for i in range(42):
|
||||
doc = Document.objects.create(title="Many documents with the same title", checksum=f"{i:02}")
|
||||
doc = Document.objects.create(
|
||||
title="Many documents with the same title", checksum=f"{i:02}"
|
||||
)
|
||||
docs.append(doc)
|
||||
index.add_or_update_document(doc)
|
||||
|
||||
@ -59,5 +60,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||
self.assertIsNone(self.get_document_from_index(doc))
|
||||
|
||||
def test_created(self):
|
||||
doc = Document.objects.create(title="test", created=timezone.datetime(2020, 4, 12))
|
||||
doc = Document.objects.create(
|
||||
title="test", created=timezone.datetime(2020, 4, 12)
|
||||
)
|
||||
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,6 @@ from ..models import Document
|
||||
|
||||
|
||||
class ChecksTestCase(TestCase):
|
||||
|
||||
def test_changed_password_check_empty_db(self):
|
||||
self.assertEqual(changed_password_check(None), [])
|
||||
|
||||
@ -23,8 +22,15 @@ class ChecksTestCase(TestCase):
|
||||
|
||||
self.assertEqual(parser_check(None), [])
|
||||
|
||||
with mock.patch('documents.checks.document_consumer_declaration.send') as m:
|
||||
with mock.patch("documents.checks.document_consumer_declaration.send") as m:
|
||||
m.return_value = []
|
||||
|
||||
self.assertEqual(parser_check(None), [Error("No parsers found. This is a bug. The consumer won't be "
|
||||
"able to consume any documents without parsers.")])
|
||||
self.assertEqual(
|
||||
parser_check(None),
|
||||
[
|
||||
Error(
|
||||
"No parsers found. This is a bug. The consumer won't be "
|
||||
"able to consume any documents without parsers."
|
||||
)
|
||||
],
|
||||
)
|
||||
|
@ -7,30 +7,60 @@ import pytest
|
||||
from django.conf import settings
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.classifier import DocumentClassifier, IncompatibleClassifierVersionError, load_classifier
|
||||
from documents.classifier import (
|
||||
DocumentClassifier,
|
||||
IncompatibleClassifierVersionError,
|
||||
load_classifier,
|
||||
)
|
||||
from documents.models import Correspondent, Document, Tag, DocumentType
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestClassifier(DirectoriesMixin, TestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(TestClassifier, self).setUp()
|
||||
self.classifier = DocumentClassifier()
|
||||
|
||||
def generate_test_data(self):
|
||||
self.c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||
self.c1 = Correspondent.objects.create(
|
||||
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
|
||||
)
|
||||
self.c2 = Correspondent.objects.create(name="c2")
|
||||
self.c3 = Correspondent.objects.create(name="c3", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||
self.t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
self.t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True)
|
||||
self.t3 = Tag.objects.create(name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45)
|
||||
self.dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||
self.dt2 = DocumentType.objects.create(name="dt2", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||
self.c3 = Correspondent.objects.create(
|
||||
name="c3", matching_algorithm=Correspondent.MATCH_AUTO
|
||||
)
|
||||
self.t1 = Tag.objects.create(
|
||||
name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12
|
||||
)
|
||||
self.t2 = Tag.objects.create(
|
||||
name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True
|
||||
)
|
||||
self.t3 = Tag.objects.create(
|
||||
name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45
|
||||
)
|
||||
self.dt = DocumentType.objects.create(
|
||||
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
|
||||
)
|
||||
self.dt2 = DocumentType.objects.create(
|
||||
name="dt2", matching_algorithm=DocumentType.MATCH_AUTO
|
||||
)
|
||||
|
||||
self.doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=self.c1, checksum="A", document_type=self.dt)
|
||||
self.doc2 = Document.objects.create(title="doc1", content="this is another document, but from c2", correspondent=self.c2, checksum="B")
|
||||
self.doc_inbox = Document.objects.create(title="doc235", content="aa", checksum="C")
|
||||
self.doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
content="this is a document from c1",
|
||||
correspondent=self.c1,
|
||||
checksum="A",
|
||||
document_type=self.dt,
|
||||
)
|
||||
self.doc2 = Document.objects.create(
|
||||
title="doc1",
|
||||
content="this is another document, but from c2",
|
||||
correspondent=self.c2,
|
||||
checksum="B",
|
||||
)
|
||||
self.doc_inbox = Document.objects.create(
|
||||
title="doc235", content="aa", checksum="C"
|
||||
)
|
||||
|
||||
self.doc1.tags.add(self.t1)
|
||||
self.doc2.tags.add(self.t1)
|
||||
@ -59,17 +89,29 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
def testTrain(self):
|
||||
self.generate_test_data()
|
||||
self.classifier.train()
|
||||
self.assertListEqual(list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk])
|
||||
self.assertListEqual(list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk])
|
||||
self.assertListEqual(
|
||||
list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk]
|
||||
)
|
||||
self.assertListEqual(
|
||||
list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk]
|
||||
)
|
||||
|
||||
def testPredict(self):
|
||||
self.generate_test_data()
|
||||
self.classifier.train()
|
||||
self.assertEqual(self.classifier.predict_correspondent(self.doc1.content), self.c1.pk)
|
||||
self.assertEqual(
|
||||
self.classifier.predict_correspondent(self.doc1.content), self.c1.pk
|
||||
)
|
||||
self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
|
||||
self.assertListEqual(self.classifier.predict_tags(self.doc1.content), [self.t1.pk])
|
||||
self.assertListEqual(self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk])
|
||||
self.assertEqual(self.classifier.predict_document_type(self.doc1.content), self.dt.pk)
|
||||
self.assertListEqual(
|
||||
self.classifier.predict_tags(self.doc1.content), [self.t1.pk]
|
||||
)
|
||||
self.assertListEqual(
|
||||
self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk]
|
||||
)
|
||||
self.assertEqual(
|
||||
self.classifier.predict_document_type(self.doc1.content), self.dt.pk
|
||||
)
|
||||
self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
|
||||
|
||||
def testDatasetHashing(self):
|
||||
@ -90,7 +132,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
classifier2 = DocumentClassifier()
|
||||
|
||||
current_ver = DocumentClassifier.FORMAT_VERSION
|
||||
with mock.patch("documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver+1):
|
||||
with mock.patch(
|
||||
"documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver + 1
|
||||
):
|
||||
# assure that we won't load old classifiers.
|
||||
self.assertRaises(IncompatibleClassifierVersionError, classifier2.load)
|
||||
|
||||
@ -112,7 +156,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
new_classifier.load()
|
||||
self.assertFalse(new_classifier.train())
|
||||
|
||||
@override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"))
|
||||
@override_settings(
|
||||
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")
|
||||
)
|
||||
def test_load_and_classify(self):
|
||||
self.generate_test_data()
|
||||
|
||||
@ -122,38 +168,67 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
self.assertCountEqual(new_classifier.predict_tags(self.doc2.content), [45, 12])
|
||||
|
||||
def test_one_correspondent_predict(self):
|
||||
c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=c1, checksum="A")
|
||||
c1 = Correspondent.objects.create(
|
||||
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
|
||||
)
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
content="this is a document from c1",
|
||||
correspondent=c1,
|
||||
checksum="A",
|
||||
)
|
||||
|
||||
self.classifier.train()
|
||||
self.assertEqual(self.classifier.predict_correspondent(doc1.content), c1.pk)
|
||||
|
||||
def test_one_correspondent_predict_manydocs(self):
|
||||
c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=c1, checksum="A")
|
||||
doc2 = Document.objects.create(title="doc2", content="this is a document from noone", checksum="B")
|
||||
c1 = Correspondent.objects.create(
|
||||
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
|
||||
)
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
content="this is a document from c1",
|
||||
correspondent=c1,
|
||||
checksum="A",
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="doc2", content="this is a document from noone", checksum="B"
|
||||
)
|
||||
|
||||
self.classifier.train()
|
||||
self.assertEqual(self.classifier.predict_correspondent(doc1.content), c1.pk)
|
||||
self.assertIsNone(self.classifier.predict_correspondent(doc2.content))
|
||||
|
||||
def test_one_type_predict(self):
|
||||
dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||
dt = DocumentType.objects.create(
|
||||
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
|
||||
)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1",
|
||||
checksum="A", document_type=dt)
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
content="this is a document from c1",
|
||||
checksum="A",
|
||||
document_type=dt,
|
||||
)
|
||||
|
||||
self.classifier.train()
|
||||
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
|
||||
|
||||
def test_one_type_predict_manydocs(self):
|
||||
dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||
dt = DocumentType.objects.create(
|
||||
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
|
||||
)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1",
|
||||
checksum="A", document_type=dt)
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1",
|
||||
content="this is a document from c1",
|
||||
checksum="A",
|
||||
document_type=dt,
|
||||
)
|
||||
|
||||
doc2 = Document.objects.create(title="doc1", content="this is a document from c2",
|
||||
checksum="B")
|
||||
doc2 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c2", checksum="B"
|
||||
)
|
||||
|
||||
self.classifier.train()
|
||||
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
|
||||
@ -162,7 +237,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
def test_one_tag_predict(self):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c1", checksum="A"
|
||||
)
|
||||
|
||||
doc1.tags.add(t1)
|
||||
self.classifier.train()
|
||||
@ -171,7 +248,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
def test_one_tag_predict_unassigned(self):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c1", checksum="A"
|
||||
)
|
||||
|
||||
self.classifier.train()
|
||||
self.assertListEqual(self.classifier.predict_tags(doc1.content), [])
|
||||
@ -180,7 +259,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
||||
|
||||
doc4 = Document.objects.create(title="doc1", content="this is a document from c4", checksum="D")
|
||||
doc4 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c4", checksum="D"
|
||||
)
|
||||
|
||||
doc4.tags.add(t1)
|
||||
doc4.tags.add(t2)
|
||||
@ -191,10 +272,18 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||
doc2 = Document.objects.create(title="doc1", content="this is a document from c2", checksum="B")
|
||||
doc3 = Document.objects.create(title="doc1", content="this is a document from c3", checksum="C")
|
||||
doc4 = Document.objects.create(title="doc1", content="this is a document from c4", checksum="D")
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c1", checksum="A"
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c2", checksum="B"
|
||||
)
|
||||
doc3 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c3", checksum="C"
|
||||
)
|
||||
doc4 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c4", checksum="D"
|
||||
)
|
||||
|
||||
doc1.tags.add(t1)
|
||||
doc2.tags.add(t2)
|
||||
@ -210,8 +299,12 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
def test_one_tag_predict_multi(self):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||
doc2 = Document.objects.create(title="doc2", content="this is a document from c2", checksum="B")
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c1", checksum="A"
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="doc2", content="this is a document from c2", checksum="B"
|
||||
)
|
||||
|
||||
doc1.tags.add(t1)
|
||||
doc2.tags.add(t1)
|
||||
@ -222,8 +315,12 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
def test_one_tag_predict_multi_2(self):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||
doc2 = Document.objects.create(title="doc2", content="this is a document from c2", checksum="B")
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", content="this is a document from c1", checksum="A"
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
title="doc2", content="this is a document from c2", checksum="B"
|
||||
)
|
||||
|
||||
doc1.tags.add(t1)
|
||||
self.classifier.train()
|
||||
@ -240,9 +337,15 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
self.assertIsNotNone(load_classifier())
|
||||
load.assert_called_once()
|
||||
|
||||
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}})
|
||||
@override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"))
|
||||
@pytest.mark.skip(reason="Disabled caching due to high memory usage - need to investigate.")
|
||||
@override_settings(
|
||||
CACHES={"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}}
|
||||
)
|
||||
@override_settings(
|
||||
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")
|
||||
)
|
||||
@pytest.mark.skip(
|
||||
reason="Disabled caching due to high memory usage - need to investigate."
|
||||
)
|
||||
def test_load_classifier_cached(self):
|
||||
classifier = load_classifier()
|
||||
self.assertIsNotNone(classifier)
|
||||
|
@ -31,21 +31,14 @@ class TestAttributes(TestCase):
|
||||
|
||||
self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename)
|
||||
|
||||
|
||||
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
'- weird but should not break.pdf',
|
||||
None,
|
||||
'- weird but should not break',
|
||||
()
|
||||
"- weird but should not break.pdf", None, "- weird but should not break", ()
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name_when_title_ends_with_dash(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
'weird but should not break -.pdf',
|
||||
None,
|
||||
'weird but should not break -',
|
||||
()
|
||||
"weird but should not break -.pdf", None, "weird but should not break -", ()
|
||||
)
|
||||
|
||||
|
||||
@ -55,19 +48,13 @@ class TestFieldPermutations(TestCase):
|
||||
"20150102030405Z",
|
||||
"20150102Z",
|
||||
)
|
||||
valid_correspondents = [
|
||||
"timmy",
|
||||
"Dr. McWheelie",
|
||||
"Dash Gor-don",
|
||||
"ο Θερμαστής",
|
||||
""
|
||||
]
|
||||
valid_correspondents = ["timmy", "Dr. McWheelie", "Dash Gor-don", "ο Θερμαστής", ""]
|
||||
valid_titles = ["title", "Title w Spaces", "Title a-dash", "Τίτλος", ""]
|
||||
valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
|
||||
|
||||
def _test_guessed_attributes(self, filename, created=None,
|
||||
correspondent=None, title=None,
|
||||
tags=None):
|
||||
def _test_guessed_attributes(
|
||||
self, filename, created=None, correspondent=None, title=None, tags=None
|
||||
):
|
||||
|
||||
info = FileInfo.from_filename(filename)
|
||||
|
||||
@ -92,13 +79,10 @@ class TestFieldPermutations(TestCase):
|
||||
if tags is None:
|
||||
self.assertEqual(info.tags, (), filename)
|
||||
else:
|
||||
self.assertEqual(
|
||||
[t.name for t in info.tags], tags.split(','),
|
||||
filename
|
||||
)
|
||||
self.assertEqual([t.name for t in info.tags], tags.split(","), filename)
|
||||
|
||||
def test_just_title(self):
|
||||
template = '{title}.pdf'
|
||||
template = "{title}.pdf"
|
||||
for title in self.valid_titles:
|
||||
spec = dict(title=title)
|
||||
filename = template.format(**spec)
|
||||
@ -109,12 +93,8 @@ class TestFieldPermutations(TestCase):
|
||||
|
||||
for created in self.valid_dates:
|
||||
for title in self.valid_titles:
|
||||
spec = {
|
||||
"created": created,
|
||||
"title": title
|
||||
}
|
||||
self._test_guessed_attributes(
|
||||
template.format(**spec), **spec)
|
||||
spec = {"created": created, "title": title}
|
||||
self._test_guessed_attributes(template.format(**spec), **spec)
|
||||
|
||||
def test_invalid_date_format(self):
|
||||
info = FileInfo.from_filename("06112017Z - title.pdf")
|
||||
@ -127,7 +107,7 @@ class TestFieldPermutations(TestCase):
|
||||
all_patt = re.compile("^.*$")
|
||||
none_patt = re.compile("$a")
|
||||
exact_patt = re.compile("^([a-z0-9,]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.")
|
||||
repl1 = " - \\4 - \\1." # (empty) corrspondent, title and tags
|
||||
repl1 = " - \\4 - \\1." # (empty) corrspondent, title and tags
|
||||
repl2 = "\\2Z - " + repl1 # creation date + repl1
|
||||
|
||||
# No transformations configured (= default)
|
||||
@ -137,36 +117,37 @@ class TestFieldPermutations(TestCase):
|
||||
self.assertIsNone(info.created)
|
||||
|
||||
# Pattern doesn't match (filename unaltered)
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[(none_patt, "none.gif")]):
|
||||
with self.settings(FILENAME_PARSE_TRANSFORMS=[(none_patt, "none.gif")]):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
|
||||
|
||||
# Simple transformation (match all)
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[(all_patt, "all.gif")]):
|
||||
with self.settings(FILENAME_PARSE_TRANSFORMS=[(all_patt, "all.gif")]):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "all")
|
||||
|
||||
# Multiple transformations configured (first pattern matches)
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[
|
||||
(all_patt, "all.gif"),
|
||||
(all_patt, "anotherall.gif")]):
|
||||
FILENAME_PARSE_TRANSFORMS=[
|
||||
(all_patt, "all.gif"),
|
||||
(all_patt, "anotherall.gif"),
|
||||
]
|
||||
):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "all")
|
||||
|
||||
# Multiple transformations configured (second pattern matches)
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[
|
||||
(none_patt, "none.gif"),
|
||||
(all_patt, "anotherall.gif")]):
|
||||
FILENAME_PARSE_TRANSFORMS=[
|
||||
(none_patt, "none.gif"),
|
||||
(all_patt, "anotherall.gif"),
|
||||
]
|
||||
):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "anotherall")
|
||||
|
||||
|
||||
class DummyParser(DocumentParser):
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
# not important during tests
|
||||
raise NotImplementedError()
|
||||
@ -184,7 +165,6 @@ class DummyParser(DocumentParser):
|
||||
|
||||
|
||||
class CopyParser(DocumentParser):
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
return self.fake_thumb
|
||||
|
||||
@ -202,7 +182,6 @@ class CopyParser(DocumentParser):
|
||||
|
||||
|
||||
class FaultyParser(DocumentParser):
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||
# not important during tests
|
||||
raise NotImplementedError()
|
||||
@ -233,8 +212,15 @@ def fake_magic_from_file(file, mime=False):
|
||||
|
||||
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
|
||||
class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
def _assert_first_last_send_progress(self, first_status="STARTING", last_status="SUCCESS", first_progress=0, first_progress_max=100, last_progress=100, last_progress_max=100):
|
||||
def _assert_first_last_send_progress(
|
||||
self,
|
||||
first_status="STARTING",
|
||||
last_status="SUCCESS",
|
||||
first_progress=0,
|
||||
first_progress_max=100,
|
||||
last_progress=100,
|
||||
last_progress_max=100,
|
||||
):
|
||||
|
||||
self._send_progress.assert_called()
|
||||
|
||||
@ -243,13 +229,17 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(args[1], first_progress_max)
|
||||
self.assertEqual(args[2], first_status)
|
||||
|
||||
args, kwargs = self._send_progress.call_args_list[len(self._send_progress.call_args_list) - 1]
|
||||
args, kwargs = self._send_progress.call_args_list[
|
||||
len(self._send_progress.call_args_list) - 1
|
||||
]
|
||||
self.assertEqual(args[0], last_progress)
|
||||
self.assertEqual(args[1], last_progress_max)
|
||||
self.assertEqual(args[2], last_status)
|
||||
|
||||
def make_dummy_parser(self, logging_group, progress_callback=None):
|
||||
return DummyParser(logging_group, self.dirs.scratch_dir, self.get_test_archive_file())
|
||||
return DummyParser(
|
||||
logging_group, self.dirs.scratch_dir, self.get_test_archive_file()
|
||||
)
|
||||
|
||||
def make_faulty_parser(self, logging_group, progress_callback=None):
|
||||
return FaultyParser(logging_group, self.dirs.scratch_dir)
|
||||
@ -259,11 +249,16 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
m = patcher.start()
|
||||
m.return_value = [(None, {
|
||||
"parser": self.make_dummy_parser,
|
||||
"mime_types": {"application/pdf": ".pdf"},
|
||||
"weight": 0
|
||||
})]
|
||||
m.return_value = [
|
||||
(
|
||||
None,
|
||||
{
|
||||
"parser": self.make_dummy_parser,
|
||||
"mime_types": {"application/pdf": ".pdf"},
|
||||
"weight": 0,
|
||||
},
|
||||
)
|
||||
]
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
# this prevents websocket message reports during testing.
|
||||
@ -274,13 +269,21 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
self.consumer = Consumer()
|
||||
|
||||
def get_test_file(self):
|
||||
src = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000001.pdf")
|
||||
src = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"documents",
|
||||
"originals",
|
||||
"0000001.pdf",
|
||||
)
|
||||
dst = os.path.join(self.dirs.scratch_dir, "sample.pdf")
|
||||
shutil.copy(src, dst)
|
||||
return dst
|
||||
|
||||
def get_test_archive_file(self):
|
||||
src = os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf")
|
||||
src = os.path.join(
|
||||
os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"
|
||||
)
|
||||
dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf")
|
||||
shutil.copy(src, dst)
|
||||
return dst
|
||||
@ -292,23 +295,19 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
document = self.consumer.try_consume_file(filename)
|
||||
|
||||
self.assertEqual(document.content, "The Text")
|
||||
self.assertEqual(document.title, os.path.splitext(os.path.basename(filename))[0])
|
||||
self.assertEqual(
|
||||
document.title, os.path.splitext(os.path.basename(filename))[0]
|
||||
)
|
||||
self.assertIsNone(document.correspondent)
|
||||
self.assertIsNone(document.document_type)
|
||||
self.assertEqual(document.filename, "0000001.pdf")
|
||||
self.assertEqual(document.archive_filename, "0000001.pdf")
|
||||
|
||||
self.assertTrue(os.path.isfile(
|
||||
document.source_path
|
||||
))
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
|
||||
self.assertTrue(os.path.isfile(
|
||||
document.thumbnail_path
|
||||
))
|
||||
self.assertTrue(os.path.isfile(document.thumbnail_path))
|
||||
|
||||
self.assertTrue(os.path.isfile(
|
||||
document.archive_path
|
||||
))
|
||||
self.assertTrue(os.path.isfile(document.archive_path))
|
||||
|
||||
self.assertEqual(document.checksum, "42995833e01aea9b3edee44bbfdd7ce1")
|
||||
self.assertEqual(document.archive_checksum, "62acb0bcbfbcaa62ca6ad3668e4e404b")
|
||||
@ -330,40 +329,45 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
document = self.consumer.try_consume_file(filename)
|
||||
|
||||
self.assertTrue(os.path.isfile(
|
||||
document.source_path
|
||||
))
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
|
||||
self.assertFalse(os.path.isfile(shadow_file))
|
||||
self.assertFalse(os.path.isfile(filename))
|
||||
|
||||
|
||||
def testOverrideFilename(self):
|
||||
filename = self.get_test_file()
|
||||
override_filename = "Statement for November.pdf"
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename=override_filename)
|
||||
document = self.consumer.try_consume_file(
|
||||
filename, override_filename=override_filename
|
||||
)
|
||||
|
||||
self.assertEqual(document.title, "Statement for November")
|
||||
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
def testOverrideTitle(self):
|
||||
document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title")
|
||||
document = self.consumer.try_consume_file(
|
||||
self.get_test_file(), override_title="Override Title"
|
||||
)
|
||||
self.assertEqual(document.title, "Override Title")
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
def testOverrideCorrespondent(self):
|
||||
c = Correspondent.objects.create(name="test")
|
||||
|
||||
document = self.consumer.try_consume_file(self.get_test_file(), override_correspondent_id=c.pk)
|
||||
document = self.consumer.try_consume_file(
|
||||
self.get_test_file(), override_correspondent_id=c.pk
|
||||
)
|
||||
self.assertEqual(document.correspondent.id, c.id)
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
def testOverrideDocumentType(self):
|
||||
dt = DocumentType.objects.create(name="test")
|
||||
|
||||
document = self.consumer.try_consume_file(self.get_test_file(), override_document_type_id=dt.pk)
|
||||
document = self.consumer.try_consume_file(
|
||||
self.get_test_file(), override_document_type_id=dt.pk
|
||||
)
|
||||
self.assertEqual(document.document_type.id, dt.id)
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
@ -371,7 +375,9 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
t1 = Tag.objects.create(name="t1")
|
||||
t2 = Tag.objects.create(name="t2")
|
||||
t3 = Tag.objects.create(name="t3")
|
||||
document = self.consumer.try_consume_file(self.get_test_file(), override_tag_ids=[t1.id, t3.id])
|
||||
document = self.consumer.try_consume_file(
|
||||
self.get_test_file(), override_tag_ids=[t1.id, t3.id]
|
||||
)
|
||||
|
||||
self.assertIn(t1, document.tags.all())
|
||||
self.assertNotIn(t2, document.tags.all())
|
||||
@ -384,7 +390,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
ConsumerError,
|
||||
"File not found",
|
||||
self.consumer.try_consume_file,
|
||||
"non-existing-file"
|
||||
"non-existing-file",
|
||||
)
|
||||
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
@ -396,7 +402,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
ConsumerError,
|
||||
"It is a duplicate",
|
||||
self.consumer.try_consume_file,
|
||||
self.get_test_file()
|
||||
self.get_test_file(),
|
||||
)
|
||||
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
@ -408,7 +414,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
ConsumerError,
|
||||
"It is a duplicate",
|
||||
self.consumer.try_consume_file,
|
||||
self.get_test_archive_file()
|
||||
self.get_test_archive_file(),
|
||||
)
|
||||
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
@ -425,25 +431,29 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
ConsumerError,
|
||||
"sample.pdf: Unsupported mime type application/pdf",
|
||||
self.consumer.try_consume_file,
|
||||
self.get_test_file()
|
||||
self.get_test_file(),
|
||||
)
|
||||
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
|
||||
|
||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
def testFaultyParser(self, m):
|
||||
m.return_value = [(None, {
|
||||
"parser": self.make_faulty_parser,
|
||||
"mime_types": {"application/pdf": ".pdf"},
|
||||
"weight": 0
|
||||
})]
|
||||
m.return_value = [
|
||||
(
|
||||
None,
|
||||
{
|
||||
"parser": self.make_faulty_parser,
|
||||
"mime_types": {"application/pdf": ".pdf"},
|
||||
"weight": 0,
|
||||
},
|
||||
)
|
||||
]
|
||||
|
||||
self.assertRaisesMessage(
|
||||
ConsumerError,
|
||||
"sample.pdf: Error while consuming document sample.pdf: Does not compute.",
|
||||
self.consumer.try_consume_file,
|
||||
self.get_test_file()
|
||||
self.get_test_file(),
|
||||
)
|
||||
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
@ -457,7 +467,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
ConsumerError,
|
||||
"sample.pdf: The following error occured while consuming sample.pdf: NO.",
|
||||
self.consumer.try_consume_file,
|
||||
filename
|
||||
filename,
|
||||
)
|
||||
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
@ -491,7 +501,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
filenames.insert(0, f)
|
||||
return f
|
||||
|
||||
m.side_effect = lambda f, archive_filename = False: get_filename()
|
||||
m.side_effect = lambda f, archive_filename=False: get_filename()
|
||||
|
||||
filename = self.get_test_file()
|
||||
|
||||
@ -565,17 +575,37 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
def test_similar_filenames(self, m):
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
|
||||
m.return_value = [(None, {
|
||||
"parser": CopyParser,
|
||||
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
|
||||
"weight": 0
|
||||
})]
|
||||
doc1 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
|
||||
doc2 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
|
||||
doc3 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
|
||||
shutil.copy(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"),
|
||||
)
|
||||
shutil.copy(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.png"),
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.png"),
|
||||
)
|
||||
shutil.copy(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png"),
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"),
|
||||
)
|
||||
m.return_value = [
|
||||
(
|
||||
None,
|
||||
{
|
||||
"parser": CopyParser,
|
||||
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
|
||||
"weight": 0,
|
||||
},
|
||||
)
|
||||
]
|
||||
doc1 = self.consumer.try_consume_file(
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.png")
|
||||
)
|
||||
doc2 = self.consumer.try_consume_file(
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.pdf")
|
||||
)
|
||||
doc3 = self.consumer.try_consume_file(
|
||||
os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf")
|
||||
)
|
||||
|
||||
self.assertEqual(doc1.filename, "simple.png")
|
||||
self.assertEqual(doc1.archive_filename, "simple.pdf")
|
||||
@ -588,7 +618,6 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
|
||||
class PreConsumeTestCase(TestCase):
|
||||
|
||||
@mock.patch("documents.consumer.Popen")
|
||||
@override_settings(PRE_CONSUME_SCRIPT=None)
|
||||
def test_no_pre_consume_script(self, m):
|
||||
@ -625,7 +654,6 @@ class PreConsumeTestCase(TestCase):
|
||||
|
||||
|
||||
class PostConsumeTestCase(TestCase):
|
||||
|
||||
@mock.patch("documents.consumer.Popen")
|
||||
@override_settings(POST_CONSUME_SCRIPT=None)
|
||||
def test_no_post_consume_script(self, m):
|
||||
@ -662,7 +690,9 @@ class PostConsumeTestCase(TestCase):
|
||||
with tempfile.NamedTemporaryFile() as script:
|
||||
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
||||
c = Correspondent.objects.create(name="my_bank")
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
|
||||
doc = Document.objects.create(
|
||||
title="Test", mime_type="application/pdf", correspondent=c
|
||||
)
|
||||
tag1 = Tag.objects.create(name="a")
|
||||
tag2 = Tag.objects.create(name="b")
|
||||
doc.tags.add(tag1)
|
||||
|
@ -12,7 +12,9 @@ from documents.parsers import parse_date
|
||||
|
||||
class TestDate(TestCase):
|
||||
|
||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "../../paperless_tesseract/tests/samples")
|
||||
SAMPLE_FILES = os.path.join(
|
||||
os.path.dirname(__file__), "../../paperless_tesseract/tests/samples"
|
||||
)
|
||||
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
|
||||
|
||||
def setUp(self):
|
||||
@ -38,24 +40,15 @@ class TestDate(TestCase):
|
||||
date = parse_date("", text)
|
||||
self.assertEqual(
|
||||
date,
|
||||
datetime.datetime(
|
||||
2018, 2, 13, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
|
||||
def test_date_format_5(self):
|
||||
text = (
|
||||
"lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
|
||||
"ipsum"
|
||||
)
|
||||
text = "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem " "ipsum"
|
||||
date = parse_date("", text)
|
||||
self.assertEqual(
|
||||
date,
|
||||
datetime.datetime(
|
||||
2018, 2, 13, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
|
||||
def test_date_format_6(self):
|
||||
@ -73,18 +66,11 @@ class TestDate(TestCase):
|
||||
self.assertEqual(parse_date("", text), None)
|
||||
|
||||
def test_date_format_7(self):
|
||||
text = (
|
||||
"lorem ipsum\n"
|
||||
"März 2019\n"
|
||||
"lorem ipsum"
|
||||
)
|
||||
text = "lorem ipsum\n" "März 2019\n" "lorem ipsum"
|
||||
date = parse_date("", text)
|
||||
self.assertEqual(
|
||||
date,
|
||||
datetime.datetime(
|
||||
2019, 3, 1, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
|
||||
def test_date_format_8(self):
|
||||
@ -102,26 +88,15 @@ class TestDate(TestCase):
|
||||
)
|
||||
self.assertEqual(
|
||||
parse_date("", text),
|
||||
datetime.datetime(
|
||||
2020, 3, 1, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
datetime.datetime(2020, 3, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
|
||||
@override_settings(SCRATCH_DIR=SCRATCH)
|
||||
def test_date_format_9(self):
|
||||
text = (
|
||||
"lorem ipsum\n"
|
||||
"27. Nullmonth 2020\n"
|
||||
"März 2020\n"
|
||||
"lorem ipsum"
|
||||
)
|
||||
text = "lorem ipsum\n" "27. Nullmonth 2020\n" "März 2020\n" "lorem ipsum"
|
||||
self.assertEqual(
|
||||
parse_date("", text),
|
||||
datetime.datetime(
|
||||
2020, 3, 1, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
datetime.datetime(2020, 3, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
|
||||
def test_crazy_date_past(self, *args):
|
||||
@ -135,19 +110,17 @@ class TestDate(TestCase):
|
||||
|
||||
@override_settings(FILENAME_DATE_ORDER="YMD")
|
||||
def test_filename_date_parse_invalid(self, *args):
|
||||
self.assertIsNone(parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"))
|
||||
|
||||
@override_settings(IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)))
|
||||
def test_ignored_dates(self, *args):
|
||||
text = (
|
||||
"lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem "
|
||||
"ipsum"
|
||||
self.assertIsNone(
|
||||
parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here")
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17))
|
||||
)
|
||||
def test_ignored_dates(self, *args):
|
||||
text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum"
|
||||
date = parse_date("", text)
|
||||
self.assertEqual(
|
||||
date,
|
||||
datetime.datetime(
|
||||
2018, 2, 13, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
|
@ -10,7 +10,6 @@ from ..models import Document, Correspondent
|
||||
|
||||
|
||||
class TestDocument(TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.originals_dir = tempfile.mkdtemp()
|
||||
self.thumb_dir = tempfile.mkdtemp()
|
||||
@ -30,7 +29,7 @@ class TestDocument(TestCase):
|
||||
title="Title",
|
||||
content="content",
|
||||
checksum="checksum",
|
||||
mime_type="application/pdf"
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
file_path = document.source_path
|
||||
@ -47,20 +46,36 @@ class TestDocument(TestCase):
|
||||
|
||||
def test_file_name(self):
|
||||
|
||||
doc = Document(mime_type="application/pdf", title="test", created=timezone.datetime(2020, 12, 25))
|
||||
doc = Document(
|
||||
mime_type="application/pdf",
|
||||
title="test",
|
||||
created=timezone.datetime(2020, 12, 25),
|
||||
)
|
||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")
|
||||
|
||||
def test_file_name_jpg(self):
|
||||
|
||||
doc = Document(mime_type="image/jpeg", title="test", created=timezone.datetime(2020, 12, 25))
|
||||
doc = Document(
|
||||
mime_type="image/jpeg",
|
||||
title="test",
|
||||
created=timezone.datetime(2020, 12, 25),
|
||||
)
|
||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg")
|
||||
|
||||
def test_file_name_unknown(self):
|
||||
|
||||
doc = Document(mime_type="application/zip", title="test", created=timezone.datetime(2020, 12, 25))
|
||||
doc = Document(
|
||||
mime_type="application/zip",
|
||||
title="test",
|
||||
created=timezone.datetime(2020, 12, 25),
|
||||
)
|
||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip")
|
||||
|
||||
def test_file_name_invalid_type(self):
|
||||
|
||||
doc = Document(mime_type="image/jpegasd", title="test", created=timezone.datetime(2020, 12, 25))
|
||||
doc = Document(
|
||||
mime_type="image/jpegasd",
|
||||
title="test",
|
||||
created=timezone.datetime(2020, 12, 25),
|
||||
)
|
||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")
|
||||
|
@ -13,13 +13,16 @@ from django.test import TestCase, override_settings
|
||||
from django.utils import timezone
|
||||
|
||||
from .utils import DirectoriesMixin
|
||||
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories, \
|
||||
generate_unique_filename
|
||||
from ..file_handling import (
|
||||
generate_filename,
|
||||
create_source_path_directory,
|
||||
delete_empty_directories,
|
||||
generate_unique_filename,
|
||||
)
|
||||
from ..models import Document, Correspondent, Tag, DocumentType
|
||||
|
||||
|
||||
class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
def test_generate_source_filename(self):
|
||||
document = Document()
|
||||
@ -30,8 +33,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(generate_filename(document), "{:07d}.pdf".format(document.pk))
|
||||
|
||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||
self.assertEqual(generate_filename(document),
|
||||
"{:07d}.pdf.gpg".format(document.pk))
|
||||
self.assertEqual(
|
||||
generate_filename(document), "{:07d}.pdf.gpg".format(document.pk)
|
||||
)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming(self):
|
||||
@ -41,7 +45,10 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
document.save()
|
||||
|
||||
# Test default source_path
|
||||
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/{:07d}.pdf".format(document.pk))
|
||||
self.assertEqual(
|
||||
document.source_path,
|
||||
settings.ORIGINALS_DIR + "/{:07d}.pdf".format(document.pk),
|
||||
)
|
||||
|
||||
document.filename = generate_filename(document)
|
||||
|
||||
@ -51,8 +58,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
# Enable encryption and check again
|
||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none.pdf.gpg")
|
||||
self.assertEqual(document.filename, "none/none.pdf.gpg")
|
||||
|
||||
document.save()
|
||||
|
||||
@ -68,7 +74,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True)
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True
|
||||
)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming_missing_permissions(self):
|
||||
@ -79,13 +87,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none.pdf")
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
|
||||
# Test source_path
|
||||
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf")
|
||||
self.assertEqual(
|
||||
document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf"
|
||||
)
|
||||
|
||||
# Make the folder read- and execute-only (no writing and no renaming)
|
||||
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
|
||||
@ -95,7 +104,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
document.save()
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True
|
||||
)
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
|
||||
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
|
||||
@ -103,7 +114,11 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming_database_error(self):
|
||||
|
||||
document1 = Document.objects.create(mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA")
|
||||
document1 = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
storage_type=Document.STORAGE_TYPE_UNENCRYPTED,
|
||||
checksum="AAAAA",
|
||||
)
|
||||
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
@ -113,8 +128,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none.pdf")
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
|
||||
@ -122,8 +136,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
|
||||
# Set a correspondent and save the document
|
||||
document.correspondent = Correspondent.objects.get_or_create(
|
||||
name="test")[0]
|
||||
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
|
||||
|
||||
with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
|
||||
m.side_effect = DatabaseError()
|
||||
@ -131,7 +144,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True
|
||||
)
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
@ -143,8 +158,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none.pdf")
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
@ -152,10 +166,15 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
# Ensure file deletion after delete
|
||||
pk = document.pk
|
||||
document.delete()
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False)
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False
|
||||
)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}", TRASH_DIR=tempfile.mkdtemp())
|
||||
@override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}",
|
||||
TRASH_DIR=tempfile.mkdtemp(),
|
||||
)
|
||||
def test_document_delete_trash(self):
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
@ -164,8 +183,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none.pdf")
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
@ -173,7 +191,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
# Ensure file was moved to trash after delete
|
||||
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False)
|
||||
document.delete()
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False)
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False
|
||||
)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True)
|
||||
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none_01.pdf"), False)
|
||||
@ -207,8 +227,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none.pdf")
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
@ -238,8 +257,18 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{asn} - {title}")
|
||||
def test_asn(self):
|
||||
d1 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=652, checksum="A")
|
||||
d2 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=None, checksum="B")
|
||||
d1 = Document.objects.create(
|
||||
title="the_doc",
|
||||
mime_type="application/pdf",
|
||||
archive_serial_number=652,
|
||||
checksum="A",
|
||||
)
|
||||
d2 = Document.objects.create(
|
||||
title="the_doc",
|
||||
mime_type="application/pdf",
|
||||
archive_serial_number=None,
|
||||
checksum="B",
|
||||
)
|
||||
self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
|
||||
self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
|
||||
|
||||
@ -256,8 +285,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
self.assertEqual(generate_filename(document),
|
||||
"demo.pdf")
|
||||
self.assertEqual(generate_filename(document), "demo.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||
def test_tags_with_dash(self):
|
||||
@ -272,8 +300,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
self.assertEqual(generate_filename(document),
|
||||
"demo.pdf")
|
||||
self.assertEqual(generate_filename(document), "demo.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||
def test_tags_malformed(self):
|
||||
@ -288,8 +315,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
self.assertEqual(generate_filename(document),
|
||||
"none.pdf")
|
||||
self.assertEqual(generate_filename(document), "none.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
|
||||
def test_tags_all(self):
|
||||
@ -303,8 +329,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
self.assertEqual(generate_filename(document),
|
||||
"demo.pdf")
|
||||
self.assertEqual(generate_filename(document), "demo.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
|
||||
def test_tags_out_of_bounds(self):
|
||||
@ -318,8 +343,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
self.assertEqual(generate_filename(document),
|
||||
"none.pdf")
|
||||
self.assertEqual(generate_filename(document), "none.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags}")
|
||||
def test_tags_without_args(self):
|
||||
@ -338,7 +362,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
|
||||
|
||||
doc = Document.objects.create(title="doc2", checksum="B", mime_type="application/pdf")
|
||||
doc = Document.objects.create(
|
||||
title="doc2", checksum="B", mime_type="application/pdf"
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc), "doc2.pdf")
|
||||
|
||||
@ -348,12 +374,19 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
doc.filename = generate_filename(doc)
|
||||
doc.save()
|
||||
|
||||
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "etc", "something", "doc1.pdf"))
|
||||
self.assertEqual(
|
||||
doc.source_path,
|
||||
os.path.join(settings.ORIGINALS_DIR, "etc", "something", "doc1.pdf"),
|
||||
)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}")
|
||||
@override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}"
|
||||
)
|
||||
def test_created_year_month_day(self):
|
||||
d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
|
||||
doc1 = Document.objects.create(title="doc1", mime_type="application/pdf", created=d1)
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", mime_type="application/pdf", created=d1
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
|
||||
|
||||
@ -361,10 +394,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}")
|
||||
@override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}"
|
||||
)
|
||||
def test_added_year_month_day(self):
|
||||
d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1))
|
||||
doc1 = Document.objects.create(title="doc1", mime_type="application/pdf", added=d1)
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", mime_type="application/pdf", added=d1
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
|
||||
|
||||
@ -372,7 +409,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
|
||||
@override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}"
|
||||
)
|
||||
def test_nested_directory_cleanup(self):
|
||||
document = Document()
|
||||
document.mime_type = "application/pdf"
|
||||
@ -391,7 +430,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
pk = document.pk
|
||||
document.delete()
|
||||
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False)
|
||||
self.assertEqual(
|
||||
os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False
|
||||
)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
|
||||
@ -414,12 +455,12 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
Path(os.path.join(tmp, "notempty", "file")).touch()
|
||||
os.makedirs(os.path.join(tmp, "notempty", "empty"))
|
||||
|
||||
delete_empty_directories(os.path.join(tmp, "notempty", "empty"), root=settings.ORIGINALS_DIR)
|
||||
delete_empty_directories(
|
||||
os.path.join(tmp, "notempty", "empty"), root=settings.ORIGINALS_DIR
|
||||
)
|
||||
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
|
||||
self.assertEqual(os.path.isfile(
|
||||
os.path.join(tmp, "notempty", "file")), True)
|
||||
self.assertEqual(os.path.isdir(
|
||||
os.path.join(tmp, "notempty", "empty")), False)
|
||||
self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True)
|
||||
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty", "empty")), False)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{created/[title]")
|
||||
def test_invalid_format(self):
|
||||
@ -441,8 +482,12 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
def test_duplicates(self):
|
||||
document = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="A", pk=1)
|
||||
document2 = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="B", pk=2)
|
||||
document = Document.objects.create(
|
||||
mime_type="application/pdf", title="qwe", checksum="A", pk=1
|
||||
)
|
||||
document2 = Document.objects.create(
|
||||
mime_type="application/pdf", title="qwe", checksum="B", pk=2
|
||||
)
|
||||
Path(document.source_path).touch()
|
||||
Path(document2.source_path).touch()
|
||||
document.filename = "0000001.pdf"
|
||||
@ -480,11 +525,17 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
self.assertEqual(document2.filename, "qwe.pdf")
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
@mock.patch("documents.signals.handlers.Document.objects.filter")
|
||||
def test_no_update_without_change(self, m):
|
||||
doc = Document.objects.create(title="document", filename="document.pdf", archive_filename="document.pdf", checksum="A", archive_checksum="B", mime_type="application/pdf")
|
||||
doc = Document.objects.create(
|
||||
title="document",
|
||||
filename="document.pdf",
|
||||
archive_filename="document.pdf",
|
||||
checksum="A",
|
||||
archive_checksum="B",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
Path(doc.source_path).touch()
|
||||
Path(doc.archive_path).touch()
|
||||
|
||||
@ -493,16 +544,20 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
m.assert_not_called()
|
||||
|
||||
|
||||
|
||||
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
|
||||
def test_create_no_format(self):
|
||||
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
|
||||
doc = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
filename="0000001.pdf",
|
||||
checksum="A",
|
||||
archive_filename="0000001.pdf",
|
||||
archive_checksum="B",
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.isfile(original))
|
||||
self.assertTrue(os.path.isfile(archive))
|
||||
@ -515,21 +570,39 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
doc = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
title="my_doc",
|
||||
filename="0000001.pdf",
|
||||
checksum="A",
|
||||
archive_checksum="B",
|
||||
archive_filename="0000001.pdf",
|
||||
)
|
||||
|
||||
self.assertFalse(os.path.isfile(original))
|
||||
self.assertFalse(os.path.isfile(archive))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"))
|
||||
self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"))
|
||||
self.assertEqual(
|
||||
doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf")
|
||||
)
|
||||
self.assertEqual(
|
||||
doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")
|
||||
)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
def test_move_archive_gone(self):
|
||||
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
doc = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
title="my_doc",
|
||||
filename="0000001.pdf",
|
||||
checksum="A",
|
||||
archive_checksum="B",
|
||||
archive_filename="0000001.pdf",
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.isfile(original))
|
||||
self.assertFalse(os.path.isfile(archive))
|
||||
@ -545,7 +618,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
Path(archive).touch()
|
||||
os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
|
||||
Path(existing_archive_file).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
doc = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
title="my_doc",
|
||||
filename="0000001.pdf",
|
||||
checksum="A",
|
||||
archive_checksum="B",
|
||||
archive_filename="0000001.pdf",
|
||||
)
|
||||
|
||||
self.assertFalse(os.path.isfile(original))
|
||||
self.assertFalse(os.path.isfile(archive))
|
||||
@ -561,8 +641,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document_01.pdf", checksum="A",
|
||||
archive_checksum="B", archive_filename="document.pdf")
|
||||
doc = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
title="document",
|
||||
filename="document_01.pdf",
|
||||
checksum="A",
|
||||
archive_checksum="B",
|
||||
archive_filename="document.pdf",
|
||||
)
|
||||
|
||||
self.assertEqual(doc.filename, "document.pdf")
|
||||
self.assertEqual(doc.archive_filename, "document.pdf")
|
||||
@ -577,8 +663,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document.pdf", checksum="A",
|
||||
archive_checksum="B", archive_filename="document_01.pdf")
|
||||
doc = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
title="document",
|
||||
filename="document.pdf",
|
||||
checksum="A",
|
||||
archive_checksum="B",
|
||||
archive_filename="document_01.pdf",
|
||||
)
|
||||
|
||||
self.assertEqual(doc.filename, "document.pdf")
|
||||
self.assertEqual(doc.archive_filename, "document.pdf")
|
||||
@ -589,7 +681,6 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
@mock.patch("documents.signals.handlers.os.rename")
|
||||
def test_move_archive_error(self, m):
|
||||
|
||||
def fake_rename(src, dst):
|
||||
if "archive" in src:
|
||||
raise OSError()
|
||||
@ -603,7 +694,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
doc = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
title="my_doc",
|
||||
filename="0000001.pdf",
|
||||
checksum="A",
|
||||
archive_checksum="B",
|
||||
archive_filename="0000001.pdf",
|
||||
)
|
||||
|
||||
m.assert_called()
|
||||
self.assertTrue(os.path.isfile(original))
|
||||
@ -615,9 +713,16 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
def test_move_file_gone(self):
|
||||
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
#Path(original).touch()
|
||||
# Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
doc = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
title="my_doc",
|
||||
filename="0000001.pdf",
|
||||
archive_filename="0000001.pdf",
|
||||
checksum="A",
|
||||
archive_checksum="B",
|
||||
)
|
||||
|
||||
self.assertFalse(os.path.isfile(original))
|
||||
self.assertTrue(os.path.isfile(archive))
|
||||
@ -627,7 +732,6 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
@mock.patch("documents.signals.handlers.os.rename")
|
||||
def test_move_file_error(self, m):
|
||||
|
||||
def fake_rename(src, dst):
|
||||
if "original" in src:
|
||||
raise OSError()
|
||||
@ -641,7 +745,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
|
||||
doc = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
title="my_doc",
|
||||
filename="0000001.pdf",
|
||||
archive_filename="0000001.pdf",
|
||||
checksum="A",
|
||||
archive_checksum="B",
|
||||
)
|
||||
|
||||
m.assert_called()
|
||||
self.assertTrue(os.path.isfile(original))
|
||||
@ -655,7 +766,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
doc = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
title="my_doc",
|
||||
filename="0000001.pdf",
|
||||
checksum="A",
|
||||
archive_checksum="B",
|
||||
archive_filename="0000001.pdf",
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.isfile(original))
|
||||
self.assertTrue(os.path.isfile(archive))
|
||||
@ -678,8 +796,20 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
Path(original2).touch()
|
||||
Path(archive).touch()
|
||||
|
||||
doc1 = Document.objects.create(mime_type="image/png", title="document", filename="document.png", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
||||
doc2 = Document.objects.create(mime_type="application/pdf", title="0000001", filename="0000001.pdf", checksum="C")
|
||||
doc1 = Document.objects.create(
|
||||
mime_type="image/png",
|
||||
title="document",
|
||||
filename="document.png",
|
||||
checksum="A",
|
||||
archive_checksum="B",
|
||||
archive_filename="0000001.pdf",
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
mime_type="application/pdf",
|
||||
title="0000001",
|
||||
filename="0000001.pdf",
|
||||
checksum="C",
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.isfile(doc1.source_path))
|
||||
self.assertTrue(os.path.isfile(doc1.archive_path))
|
||||
@ -698,7 +828,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||
Path(original).touch()
|
||||
Path(archive).touch()
|
||||
doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
|
||||
doc = Document(
|
||||
mime_type="application/pdf",
|
||||
title="my_doc",
|
||||
filename="0000001.pdf",
|
||||
checksum="A",
|
||||
archive_filename="0000001.pdf",
|
||||
archive_checksum="B",
|
||||
)
|
||||
with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
|
||||
m.side_effect = DatabaseError()
|
||||
doc.save()
|
||||
@ -710,28 +847,38 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||
|
||||
|
||||
class TestFilenameGeneration(TestCase):
|
||||
|
||||
@override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{title}"
|
||||
)
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
def test_invalid_characters(self):
|
||||
|
||||
doc = Document.objects.create(title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1")
|
||||
doc = Document.objects.create(
|
||||
title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1"
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), "This. is the title.pdf")
|
||||
|
||||
doc = Document.objects.create(title="my\\invalid/../title:yay", mime_type="application/pdf", pk=2, checksum="2")
|
||||
doc = Document.objects.create(
|
||||
title="my\\invalid/../title:yay",
|
||||
mime_type="application/pdf",
|
||||
pk=2,
|
||||
checksum="2",
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
|
||||
|
||||
@override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{created}"
|
||||
)
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{created}")
|
||||
def test_date(self):
|
||||
doc = Document.objects.create(title="does not matter", created=timezone.make_aware(datetime.datetime(2020,5,21, 7,36,51, 153)), mime_type="application/pdf", pk=2, checksum="2")
|
||||
doc = Document.objects.create(
|
||||
title="does not matter",
|
||||
created=timezone.make_aware(datetime.datetime(2020, 5, 21, 7, 36, 51, 153)),
|
||||
mime_type="application/pdf",
|
||||
pk=2,
|
||||
checksum="2",
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), "2020-05-21.pdf")
|
||||
|
||||
|
||||
def run():
|
||||
doc = Document.objects.create(checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow")
|
||||
doc = Document.objects.create(
|
||||
checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow"
|
||||
)
|
||||
doc.filename = generate_unique_filename(doc)
|
||||
Path(doc.thumbnail_path).touch()
|
||||
with open(doc.source_path, "w") as f:
|
||||
|
@ -6,14 +6,14 @@ from ..management.commands.document_importer import Command
|
||||
|
||||
|
||||
class TestImporter(TestCase):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
TestCase.__init__(self, *args, **kwargs)
|
||||
|
||||
def test_check_manifest_exists(self):
|
||||
cmd = Command()
|
||||
self.assertRaises(
|
||||
CommandError, cmd._check_manifest_exists, "/tmp/manifest.json")
|
||||
CommandError, cmd._check_manifest_exists, "/tmp/manifest.json"
|
||||
)
|
||||
|
||||
def test_check_manifest(self):
|
||||
|
||||
@ -23,15 +23,14 @@ class TestImporter(TestCase):
|
||||
cmd.manifest = [{"model": "documents.document"}]
|
||||
with self.assertRaises(CommandError) as cm:
|
||||
cmd._check_manifest()
|
||||
self.assertTrue(
|
||||
'The manifest file contains a record' in str(cm.exception))
|
||||
self.assertTrue("The manifest file contains a record" in str(cm.exception))
|
||||
|
||||
cmd.manifest = [{
|
||||
"model": "documents.document",
|
||||
EXPORTER_FILE_NAME: "noexist.pdf"
|
||||
}]
|
||||
cmd.manifest = [
|
||||
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}
|
||||
]
|
||||
# self.assertRaises(CommandError, cmd._check_manifest)
|
||||
with self.assertRaises(CommandError) as cm:
|
||||
cmd._check_manifest()
|
||||
self.assertTrue(
|
||||
'The manifest file refers to "noexist.pdf"' in str(cm.exception))
|
||||
'The manifest file refers to "noexist.pdf"' in str(cm.exception)
|
||||
)
|
||||
|
@ -6,10 +6,11 @@ from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestAutoComplete(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_auto_complete(self):
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", checksum="A", content="test test2 test3")
|
||||
doc1 = Document.objects.create(
|
||||
title="doc1", checksum="A", content="test test2 test3"
|
||||
)
|
||||
doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
|
||||
doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
|
||||
|
||||
@ -19,7 +20,11 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
||||
|
||||
ix = index.open_index()
|
||||
|
||||
self.assertListEqual(index.autocomplete(ix, "tes"), [b"test3", b"test", b"test2"])
|
||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=3), [b"test3", b"test", b"test2"])
|
||||
self.assertListEqual(
|
||||
index.autocomplete(ix, "tes"), [b"test3", b"test", b"test2"]
|
||||
)
|
||||
self.assertListEqual(
|
||||
index.autocomplete(ix, "tes", limit=3), [b"test3", b"test", b"test2"]
|
||||
)
|
||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
|
||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
||||
|
@ -22,21 +22,29 @@ sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
class TestArchiver(DirectoriesMixin, TestCase):
|
||||
|
||||
def make_models(self):
|
||||
return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf")
|
||||
return Document.objects.create(
|
||||
checksum="A",
|
||||
title="A",
|
||||
content="first document",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
def test_archiver(self):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||
shutil.copy(
|
||||
sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")
|
||||
)
|
||||
|
||||
call_command('document_archiver')
|
||||
call_command("document_archiver")
|
||||
|
||||
def test_handle_document(self):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||
shutil.copy(
|
||||
sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")
|
||||
)
|
||||
|
||||
handle_document(doc.pk)
|
||||
|
||||
@ -66,10 +74,24 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||
def test_naming_priorities(self):
|
||||
doc1 = Document.objects.create(checksum="A", title="document", content="first document", mime_type="application/pdf", filename="document.pdf")
|
||||
doc2 = Document.objects.create(checksum="B", title="document", content="second document", mime_type="application/pdf", filename="document_01.pdf")
|
||||
doc1 = Document.objects.create(
|
||||
checksum="A",
|
||||
title="document",
|
||||
content="first document",
|
||||
mime_type="application/pdf",
|
||||
filename="document.pdf",
|
||||
)
|
||||
doc2 = Document.objects.create(
|
||||
checksum="B",
|
||||
title="document",
|
||||
content="second document",
|
||||
mime_type="application/pdf",
|
||||
filename="document_01.pdf",
|
||||
)
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf"))
|
||||
shutil.copy(
|
||||
sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf")
|
||||
)
|
||||
|
||||
handle_document(doc2.pk)
|
||||
handle_document(doc1.pk)
|
||||
@ -82,12 +104,11 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
||||
|
||||
|
||||
class TestDecryptDocuments(TestCase):
|
||||
|
||||
@override_settings(
|
||||
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
||||
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
||||
PASSPHRASE="test",
|
||||
PAPERLESS_FILENAME_FORMAT=None
|
||||
PAPERLESS_FILENAME_FORMAT=None,
|
||||
)
|
||||
@mock.patch("documents.management.commands.decrypt_documents.input")
|
||||
def test_decrypt(self, m):
|
||||
@ -99,17 +120,39 @@ class TestDecryptDocuments(TestCase):
|
||||
os.makedirs(thumb_dir, exist_ok=True)
|
||||
|
||||
override_settings(
|
||||
ORIGINALS_DIR=originals_dir,
|
||||
THUMBNAIL_DIR=thumb_dir,
|
||||
PASSPHRASE="test"
|
||||
ORIGINALS_DIR=originals_dir, THUMBNAIL_DIR=thumb_dir, PASSPHRASE="test"
|
||||
).enable()
|
||||
|
||||
doc = Document.objects.create(checksum="82186aaa94f0b98697d704b90fd1c072", title="wow", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
doc = Document.objects.create(
|
||||
checksum="82186aaa94f0b98697d704b90fd1c072",
|
||||
title="wow",
|
||||
filename="0000004.pdf.gpg",
|
||||
mime_type="application/pdf",
|
||||
storage_type=Document.STORAGE_TYPE_GPG,
|
||||
)
|
||||
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), os.path.join(originals_dir, "0000004.pdf.gpg"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000004.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
|
||||
shutil.copy(
|
||||
os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"documents",
|
||||
"originals",
|
||||
"0000004.pdf.gpg",
|
||||
),
|
||||
os.path.join(originals_dir, "0000004.pdf.gpg"),
|
||||
)
|
||||
shutil.copy(
|
||||
os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"documents",
|
||||
"thumbnails",
|
||||
f"0000004.png.gpg",
|
||||
),
|
||||
os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"),
|
||||
)
|
||||
|
||||
call_command('decrypt_documents')
|
||||
call_command("decrypt_documents")
|
||||
|
||||
doc.refresh_from_db()
|
||||
|
||||
@ -126,7 +169,6 @@ class TestDecryptDocuments(TestCase):
|
||||
|
||||
|
||||
class TestMakeIndex(TestCase):
|
||||
|
||||
@mock.patch("documents.management.commands.document_index.index_reindex")
|
||||
def test_reindex(self, m):
|
||||
call_command("document_index", "reindex")
|
||||
@ -139,7 +181,6 @@ class TestMakeIndex(TestCase):
|
||||
|
||||
|
||||
class TestRenamer(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
def test_rename(self):
|
||||
doc = Document.objects.create(title="test", mime_type="image/jpeg")
|
||||
@ -164,8 +205,9 @@ class TestRenamer(DirectoriesMixin, TestCase):
|
||||
|
||||
|
||||
class TestCreateClassifier(TestCase):
|
||||
|
||||
@mock.patch("documents.management.commands.document_create_classifier.train_classifier")
|
||||
@mock.patch(
|
||||
"documents.management.commands.document_create_classifier.train_classifier"
|
||||
)
|
||||
def test_create_classifier(self, m):
|
||||
call_command("document_create_classifier")
|
||||
|
||||
@ -173,7 +215,6 @@ class TestCreateClassifier(TestCase):
|
||||
|
||||
|
||||
class TestSanityChecker(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_no_issues(self):
|
||||
with self.assertLogs() as capture:
|
||||
call_command("document_sanity_checker")
|
||||
@ -182,7 +223,9 @@ class TestSanityChecker(DirectoriesMixin, TestCase):
|
||||
self.assertIn("Sanity checker detected no issues.", capture.output[0])
|
||||
|
||||
def test_errors(self):
|
||||
doc = Document.objects.create(title="test", content="test", filename="test.pdf", checksum="abc")
|
||||
doc = Document.objects.create(
|
||||
title="test", content="test", filename="test.pdf", checksum="abc"
|
||||
)
|
||||
Path(doc.source_path).touch()
|
||||
Path(doc.thumbnail_path).touch()
|
||||
|
||||
|
@ -16,7 +16,6 @@ from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class ConsumerThread(Thread):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.cmd = document_consumer.Command()
|
||||
@ -31,7 +30,7 @@ class ConsumerThread(Thread):
|
||||
|
||||
def chunked(size, source):
|
||||
for i in range(0, len(source), size):
|
||||
yield source[i:i+size]
|
||||
yield source[i : i + size]
|
||||
|
||||
|
||||
class ConsumerMixin:
|
||||
@ -41,7 +40,9 @@ class ConsumerMixin:
|
||||
def setUp(self) -> None:
|
||||
super(ConsumerMixin, self).setUp()
|
||||
self.t = None
|
||||
patcher = mock.patch("documents.management.commands.document_consumer.async_task")
|
||||
patcher = mock.patch(
|
||||
"documents.management.commands.document_consumer.async_task"
|
||||
)
|
||||
self.task_mock = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
@ -81,13 +82,13 @@ class ConsumerMixin:
|
||||
print("Consumed a perfectly valid file.")
|
||||
|
||||
def slow_write_file(self, target, incomplete=False):
|
||||
with open(self.sample_file, 'rb') as f:
|
||||
with open(self.sample_file, "rb") as f:
|
||||
pdf_bytes = f.read()
|
||||
|
||||
if incomplete:
|
||||
pdf_bytes = pdf_bytes[:len(pdf_bytes) - 100]
|
||||
pdf_bytes = pdf_bytes[: len(pdf_bytes) - 100]
|
||||
|
||||
with open(target, 'wb') as f:
|
||||
with open(target, "wb") as f:
|
||||
# this will take 2 seconds, since the file is about 20k.
|
||||
print("Start writing file.")
|
||||
for b in chunked(1000, pdf_bytes):
|
||||
@ -97,7 +98,6 @@ class ConsumerMixin:
|
||||
|
||||
|
||||
class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
def test_consume_file(self):
|
||||
self.t_start()
|
||||
|
||||
@ -195,23 +195,35 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
@override_settings(CONSUMPTION_DIR="does_not_exist")
|
||||
def test_consumption_directory_invalid(self):
|
||||
|
||||
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')
|
||||
self.assertRaises(CommandError, call_command, "document_consumer", "--oneshot")
|
||||
|
||||
@override_settings(CONSUMPTION_DIR="")
|
||||
def test_consumption_directory_unset(self):
|
||||
|
||||
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')
|
||||
self.assertRaises(CommandError, call_command, "document_consumer", "--oneshot")
|
||||
|
||||
def test_mac_write(self):
|
||||
self.task_mock.side_effect = self.bogus_task
|
||||
|
||||
self.t_start()
|
||||
|
||||
shutil.copy(self.sample_file, os.path.join(self.dirs.consumption_dir, ".DS_STORE"))
|
||||
shutil.copy(self.sample_file, os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
|
||||
shutil.copy(self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_file.pdf"))
|
||||
shutil.copy(self.sample_file, os.path.join(self.dirs.consumption_dir, "my_second_file.pdf"))
|
||||
shutil.copy(self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_second_file.pdf"))
|
||||
shutil.copy(
|
||||
self.sample_file, os.path.join(self.dirs.consumption_dir, ".DS_STORE")
|
||||
)
|
||||
shutil.copy(
|
||||
self.sample_file, os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||
)
|
||||
shutil.copy(
|
||||
self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_file.pdf")
|
||||
)
|
||||
shutil.copy(
|
||||
self.sample_file,
|
||||
os.path.join(self.dirs.consumption_dir, "my_second_file.pdf"),
|
||||
)
|
||||
shutil.copy(
|
||||
self.sample_file,
|
||||
os.path.join(self.dirs.consumption_dir, "._my_second_file.pdf"),
|
||||
)
|
||||
|
||||
sleep(5)
|
||||
|
||||
@ -219,15 +231,20 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
self.assertEqual(2, self.task_mock.call_count)
|
||||
|
||||
fnames = [os.path.basename(args[1]) for args, _ in self.task_mock.call_args_list]
|
||||
fnames = [
|
||||
os.path.basename(args[1]) for args, _ in self.task_mock.call_args_list
|
||||
]
|
||||
self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"])
|
||||
|
||||
def test_is_ignored(self):
|
||||
test_paths = [
|
||||
(os.path.join(self.dirs.consumption_dir, "foo.pdf"), False),
|
||||
(os.path.join(self.dirs.consumption_dir, "foo","bar.pdf"), False),
|
||||
(os.path.join(self.dirs.consumption_dir, "foo", "bar.pdf"), False),
|
||||
(os.path.join(self.dirs.consumption_dir, ".DS_STORE", "foo.pdf"), True),
|
||||
(os.path.join(self.dirs.consumption_dir, "foo", ".DS_STORE", "bar.pdf"), True),
|
||||
(
|
||||
os.path.join(self.dirs.consumption_dir, "foo", ".DS_STORE", "bar.pdf"),
|
||||
True,
|
||||
),
|
||||
(os.path.join(self.dirs.consumption_dir, ".stfolder", "foo.pdf"), True),
|
||||
(os.path.join(self.dirs.consumption_dir, "._foo.pdf"), True),
|
||||
(os.path.join(self.dirs.consumption_dir, "._foo", "bar.pdf"), False),
|
||||
@ -236,10 +253,13 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
self.assertEqual(
|
||||
expected_ignored,
|
||||
document_consumer._is_ignored(file_path),
|
||||
f'_is_ignored("{file_path}") != {expected_ignored}')
|
||||
f'_is_ignored("{file_path}") != {expected_ignored}',
|
||||
)
|
||||
|
||||
|
||||
@override_settings(CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20)
|
||||
@override_settings(
|
||||
CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20
|
||||
)
|
||||
class TestConsumerPolling(TestConsumer):
|
||||
# just do all the tests with polling
|
||||
pass
|
||||
@ -251,21 +271,27 @@ class TestConsumerRecursive(TestConsumer):
|
||||
pass
|
||||
|
||||
|
||||
@override_settings(CONSUMER_RECURSIVE=True, CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20)
|
||||
@override_settings(
|
||||
CONSUMER_RECURSIVE=True,
|
||||
CONSUMER_POLLING=1,
|
||||
CONSUMER_POLLING_DELAY=3,
|
||||
CONSUMER_POLLING_RETRY_COUNT=20,
|
||||
)
|
||||
class TestConsumerRecursivePolling(TestConsumer):
|
||||
# just do all the tests with polling and recursive
|
||||
pass
|
||||
|
||||
|
||||
class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
@override_settings(CONSUMER_RECURSIVE=True)
|
||||
@override_settings(CONSUMER_SUBDIRS_AS_TAGS=True)
|
||||
def test_consume_file_with_path_tags(self):
|
||||
|
||||
tag_names = ("existingTag", "Space Tag")
|
||||
# Create a Tag prior to consuming a file using it in path
|
||||
tag_ids = [Tag.objects.create(name="existingtag").pk,]
|
||||
tag_ids = [
|
||||
Tag.objects.create(name="existingtag").pk,
|
||||
]
|
||||
|
||||
self.t_start()
|
||||
|
||||
@ -292,6 +318,8 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
# their order.
|
||||
self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
|
||||
|
||||
@override_settings(CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20)
|
||||
@override_settings(
|
||||
CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20
|
||||
)
|
||||
def test_consume_file_with_path_tags_polling(self):
|
||||
self.test_consume_file_with_path_tags()
|
||||
|
@ -17,15 +17,41 @@ from documents.tests.utils import DirectoriesMixin, paperless_environment
|
||||
|
||||
|
||||
class TestExportImport(DirectoriesMixin, TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.target = tempfile.mkdtemp()
|
||||
self.addCleanup(shutil.rmtree, self.target)
|
||||
|
||||
self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
|
||||
self.d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow2", filename="0000002.pdf", mime_type="application/pdf")
|
||||
self.d3 = Document.objects.create(content="Content", checksum="d38d7ed02e988e072caf924e0f3fcb76", title="wow2", filename="0000003.pdf", mime_type="application/pdf")
|
||||
self.d4 = Document.objects.create(content="Content", checksum="82186aaa94f0b98697d704b90fd1c072", title="wow_dec", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
self.d1 = Document.objects.create(
|
||||
content="Content",
|
||||
checksum="42995833e01aea9b3edee44bbfdd7ce1",
|
||||
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
|
||||
title="wow1",
|
||||
filename="0000001.pdf",
|
||||
mime_type="application/pdf",
|
||||
archive_filename="0000001.pdf",
|
||||
)
|
||||
self.d2 = Document.objects.create(
|
||||
content="Content",
|
||||
checksum="9c9691e51741c1f4f41a20896af31770",
|
||||
title="wow2",
|
||||
filename="0000002.pdf",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
self.d3 = Document.objects.create(
|
||||
content="Content",
|
||||
checksum="d38d7ed02e988e072caf924e0f3fcb76",
|
||||
title="wow2",
|
||||
filename="0000003.pdf",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
self.d4 = Document.objects.create(
|
||||
content="Content",
|
||||
checksum="82186aaa94f0b98697d704b90fd1c072",
|
||||
title="wow_dec",
|
||||
filename="0000004.pdf.gpg",
|
||||
mime_type="application/pdf",
|
||||
storage_type=Document.STORAGE_TYPE_GPG,
|
||||
)
|
||||
|
||||
self.t1 = Tag.objects.create(name="t")
|
||||
self.dt1 = DocumentType.objects.create(name="dt")
|
||||
@ -38,17 +64,21 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
super(TestExportImport, self).setUp()
|
||||
|
||||
def _get_document_from_manifest(self, manifest, id):
|
||||
f = list(filter(lambda d: d['model'] == "documents.document" and d['pk'] == id, manifest))
|
||||
f = list(
|
||||
filter(
|
||||
lambda d: d["model"] == "documents.document" and d["pk"] == id, manifest
|
||||
)
|
||||
)
|
||||
if len(f) == 1:
|
||||
return f[0]
|
||||
else:
|
||||
raise ValueError(f"document with id {id} does not exist in manifest")
|
||||
|
||||
@override_settings(
|
||||
PASSPHRASE="test"
|
||||
)
|
||||
def _do_export(self, use_filename_format=False, compare_checksums=False, delete=False):
|
||||
args = ['document_exporter', self.target]
|
||||
@override_settings(PASSPHRASE="test")
|
||||
def _do_export(
|
||||
self, use_filename_format=False, compare_checksums=False, delete=False
|
||||
):
|
||||
args = ["document_exporter", self.target]
|
||||
if use_filename_format:
|
||||
args += ["--use-filename-format"]
|
||||
if compare_checksums:
|
||||
@ -65,39 +95,69 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_exporter(self, use_filename_format=False):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||
os.path.join(self.dirs.media_dir, "documents"),
|
||||
)
|
||||
|
||||
manifest = self._do_export(use_filename_format=use_filename_format)
|
||||
|
||||
self.assertEqual(len(manifest), 8)
|
||||
self.assertEqual(len(list(filter(lambda e: e['model'] == 'documents.document', manifest))), 4)
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda e: e["model"] == "documents.document", manifest))), 4
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
|
||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d1.id)['fields']['title'], "wow1")
|
||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d2.id)['fields']['title'], "wow2")
|
||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d3.id)['fields']['title'], "wow2")
|
||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d4.id)['fields']['title'], "wow_dec")
|
||||
self.assertEqual(
|
||||
self._get_document_from_manifest(manifest, self.d1.id)["fields"]["title"],
|
||||
"wow1",
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_document_from_manifest(manifest, self.d2.id)["fields"]["title"],
|
||||
"wow2",
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_document_from_manifest(manifest, self.d3.id)["fields"]["title"],
|
||||
"wow2",
|
||||
)
|
||||
self.assertEqual(
|
||||
self._get_document_from_manifest(manifest, self.d4.id)["fields"]["title"],
|
||||
"wow_dec",
|
||||
)
|
||||
|
||||
for element in manifest:
|
||||
if element['model'] == 'documents.document':
|
||||
fname = os.path.join(self.target, element[document_exporter.EXPORTER_FILE_NAME])
|
||||
if element["model"] == "documents.document":
|
||||
fname = os.path.join(
|
||||
self.target, element[document_exporter.EXPORTER_FILE_NAME]
|
||||
)
|
||||
self.assertTrue(os.path.exists(fname))
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, element[document_exporter.EXPORTER_THUMBNAIL_NAME])))
|
||||
self.assertTrue(
|
||||
os.path.exists(
|
||||
os.path.join(
|
||||
self.target,
|
||||
element[document_exporter.EXPORTER_THUMBNAIL_NAME],
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
with open(fname, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, element['fields']['checksum'])
|
||||
self.assertEqual(checksum, element["fields"]["checksum"])
|
||||
|
||||
self.assertEqual(element['fields']['storage_type'], Document.STORAGE_TYPE_UNENCRYPTED)
|
||||
self.assertEqual(
|
||||
element["fields"]["storage_type"], Document.STORAGE_TYPE_UNENCRYPTED
|
||||
)
|
||||
|
||||
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
|
||||
fname = os.path.join(self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME])
|
||||
fname = os.path.join(
|
||||
self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME]
|
||||
)
|
||||
self.assertTrue(os.path.exists(fname))
|
||||
|
||||
with open(fname, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, element['fields']['archive_checksum'])
|
||||
self.assertEqual(checksum, element["fields"]["archive_checksum"])
|
||||
|
||||
with paperless_environment() as dirs:
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
@ -107,7 +167,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
Tag.objects.all().delete()
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
|
||||
call_command('document_importer', self.target)
|
||||
call_command("document_importer", self.target)
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
self.assertEqual(Tag.objects.count(), 1)
|
||||
self.assertEqual(Correspondent.objects.count(), 1)
|
||||
@ -122,21 +182,31 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_exporter_with_filename_format(self):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||
os.path.join(self.dirs.media_dir, "documents"),
|
||||
)
|
||||
|
||||
with override_settings(PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}"):
|
||||
with override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}"
|
||||
):
|
||||
self.test_exporter(use_filename_format=True)
|
||||
|
||||
def test_update_export_changed_time(self):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||
os.path.join(self.dirs.media_dir, "documents"),
|
||||
)
|
||||
|
||||
self._do_export()
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
|
||||
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
||||
|
||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2"
|
||||
) as m:
|
||||
self._do_export()
|
||||
m.assert_not_called()
|
||||
|
||||
@ -145,7 +215,9 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
|
||||
Path(self.d1.source_path).touch()
|
||||
|
||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2"
|
||||
) as m:
|
||||
self._do_export()
|
||||
self.assertEqual(m.call_count, 1)
|
||||
|
||||
@ -157,13 +229,18 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_update_export_changed_checksum(self):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||
os.path.join(self.dirs.media_dir, "documents"),
|
||||
)
|
||||
|
||||
self._do_export()
|
||||
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
|
||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2"
|
||||
) as m:
|
||||
self._do_export()
|
||||
m.assert_not_called()
|
||||
|
||||
@ -172,7 +249,9 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
self.d2.checksum = "asdfasdgf3"
|
||||
self.d2.save()
|
||||
|
||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2"
|
||||
) as m:
|
||||
self._do_export(compare_checksums=True)
|
||||
self.assertEqual(m.call_count, 1)
|
||||
|
||||
@ -180,28 +259,48 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_update_export_deleted_document(self):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||
os.path.join(self.dirs.media_dir, "documents"),
|
||||
)
|
||||
|
||||
manifest = self._do_export()
|
||||
|
||||
self.assertTrue(len(manifest), 7)
|
||||
doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
|
||||
self.assertTrue(
|
||||
os.path.isfile(
|
||||
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
|
||||
)
|
||||
)
|
||||
self.d3.delete()
|
||||
|
||||
manifest = self._do_export()
|
||||
self.assertRaises(ValueError, self._get_document_from_manifest, manifest, self.d3.id)
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
|
||||
self.assertRaises(
|
||||
ValueError, self._get_document_from_manifest, manifest, self.d3.id
|
||||
)
|
||||
self.assertTrue(
|
||||
os.path.isfile(
|
||||
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
|
||||
)
|
||||
)
|
||||
|
||||
manifest = self._do_export(delete=True)
|
||||
self.assertFalse(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
|
||||
self.assertFalse(
|
||||
os.path.isfile(
|
||||
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
|
||||
)
|
||||
)
|
||||
|
||||
self.assertTrue(len(manifest), 6)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}/{correspondent}")
|
||||
def test_update_export_changed_location(self):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||
os.path.join(self.dirs.media_dir, "documents"),
|
||||
)
|
||||
|
||||
m = self._do_export(use_filename_format=True)
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow1", "c.pdf")))
|
||||
@ -216,11 +315,18 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "new_title", "c.pdf")))
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")))
|
||||
self.assertTrue(
|
||||
os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf"))
|
||||
)
|
||||
|
||||
def test_export_missing_files(self):
|
||||
|
||||
target = tempfile.mkdtemp()
|
||||
self.addCleanup(shutil.rmtree, target)
|
||||
Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", mime_type="application/pdf")
|
||||
self.assertRaises(FileNotFoundError, call_command, 'document_exporter', target)
|
||||
Document.objects.create(
|
||||
checksum="AAAAAAAAAAAAAAAAA",
|
||||
title="wow",
|
||||
filename="0000004.pdf",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
self.assertRaises(FileNotFoundError, call_command, "document_exporter", target)
|
||||
|
@ -6,44 +6,64 @@ from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestRetagger(DirectoriesMixin, TestCase):
|
||||
|
||||
def make_models(self):
|
||||
self.d1 = Document.objects.create(checksum="A", title="A", content="first document")
|
||||
self.d2 = Document.objects.create(checksum="B", title="B", content="second document")
|
||||
self.d3 = Document.objects.create(checksum="C", title="C", content="unrelated document")
|
||||
self.d4 = Document.objects.create(checksum="D", title="D", content="auto document")
|
||||
self.d1 = Document.objects.create(
|
||||
checksum="A", title="A", content="first document"
|
||||
)
|
||||
self.d2 = Document.objects.create(
|
||||
checksum="B", title="B", content="second document"
|
||||
)
|
||||
self.d3 = Document.objects.create(
|
||||
checksum="C", title="C", content="unrelated document"
|
||||
)
|
||||
self.d4 = Document.objects.create(
|
||||
checksum="D", title="D", content="auto document"
|
||||
)
|
||||
|
||||
self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY)
|
||||
self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY)
|
||||
self.tag_first = Tag.objects.create(
|
||||
name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY
|
||||
)
|
||||
self.tag_second = Tag.objects.create(
|
||||
name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY
|
||||
)
|
||||
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
|
||||
self.tag_no_match = Tag.objects.create(name="test2")
|
||||
self.tag_auto = Tag.objects.create(name="tagauto", matching_algorithm=Tag.MATCH_AUTO)
|
||||
self.tag_auto = Tag.objects.create(
|
||||
name="tagauto", matching_algorithm=Tag.MATCH_AUTO
|
||||
)
|
||||
|
||||
self.d3.tags.add(self.tag_inbox)
|
||||
self.d3.tags.add(self.tag_no_match)
|
||||
self.d4.tags.add(self.tag_auto)
|
||||
|
||||
|
||||
self.correspondent_first = Correspondent.objects.create(
|
||||
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY)
|
||||
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY
|
||||
)
|
||||
self.correspondent_second = Correspondent.objects.create(
|
||||
name="c2", match="second", matching_algorithm=Correspondent.MATCH_ANY)
|
||||
name="c2", match="second", matching_algorithm=Correspondent.MATCH_ANY
|
||||
)
|
||||
|
||||
self.doctype_first = DocumentType.objects.create(
|
||||
name="dt1", match="first", matching_algorithm=DocumentType.MATCH_ANY)
|
||||
name="dt1", match="first", matching_algorithm=DocumentType.MATCH_ANY
|
||||
)
|
||||
self.doctype_second = DocumentType.objects.create(
|
||||
name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY)
|
||||
name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY
|
||||
)
|
||||
|
||||
def get_updated_docs(self):
|
||||
return Document.objects.get(title="A"), Document.objects.get(title="B"), \
|
||||
Document.objects.get(title="C"), Document.objects.get(title="D")
|
||||
return (
|
||||
Document.objects.get(title="A"),
|
||||
Document.objects.get(title="B"),
|
||||
Document.objects.get(title="C"),
|
||||
Document.objects.get(title="D"),
|
||||
)
|
||||
|
||||
def setUp(self) -> None:
|
||||
super(TestRetagger, self).setUp()
|
||||
self.make_models()
|
||||
|
||||
def test_add_tags(self):
|
||||
call_command('document_retagger', '--tags')
|
||||
call_command("document_retagger", "--tags")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.tags.count(), 1)
|
||||
@ -55,14 +75,14 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(d_second.tags.first(), self.tag_second)
|
||||
|
||||
def test_add_type(self):
|
||||
call_command('document_retagger', '--document_type')
|
||||
call_command("document_retagger", "--document_type")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.document_type, self.doctype_first)
|
||||
self.assertEqual(d_second.document_type, self.doctype_second)
|
||||
|
||||
def test_add_correspondent(self):
|
||||
call_command('document_retagger', '--correspondent')
|
||||
call_command("document_retagger", "--correspondent")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.correspondent, self.correspondent_first)
|
||||
@ -71,19 +91,26 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
||||
def test_overwrite_preserve_inbox(self):
|
||||
self.d1.tags.add(self.tag_second)
|
||||
|
||||
call_command('document_retagger', '--tags', '--overwrite')
|
||||
call_command("document_retagger", "--tags", "--overwrite")
|
||||
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
|
||||
|
||||
self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id])
|
||||
self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id])
|
||||
self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id])
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_first.tags.all()], [self.tag_first.id]
|
||||
)
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_second.tags.all()], [self.tag_second.id]
|
||||
)
|
||||
self.assertCountEqual(
|
||||
[tag.id for tag in d_unrelated.tags.all()],
|
||||
[self.tag_inbox.id, self.tag_no_match.id],
|
||||
)
|
||||
self.assertEqual(d_auto.tags.count(), 0)
|
||||
|
||||
def test_add_tags_suggest(self):
|
||||
call_command('document_retagger', '--tags', '--suggest')
|
||||
call_command("document_retagger", "--tags", "--suggest")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.tags.count(), 0)
|
||||
@ -91,21 +118,23 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(d_auto.tags.count(), 1)
|
||||
|
||||
def test_add_type_suggest(self):
|
||||
call_command('document_retagger', '--document_type', '--suggest')
|
||||
call_command("document_retagger", "--document_type", "--suggest")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.document_type, None)
|
||||
self.assertEqual(d_second.document_type, None)
|
||||
|
||||
def test_add_correspondent_suggest(self):
|
||||
call_command('document_retagger', '--correspondent', '--suggest')
|
||||
call_command("document_retagger", "--correspondent", "--suggest")
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.correspondent, None)
|
||||
self.assertEqual(d_second.correspondent, None)
|
||||
|
||||
def test_add_tags_suggest_url(self):
|
||||
call_command('document_retagger', '--tags', '--suggest', '--base-url=http://localhost')
|
||||
call_command(
|
||||
"document_retagger", "--tags", "--suggest", "--base-url=http://localhost"
|
||||
)
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.tags.count(), 0)
|
||||
@ -113,14 +142,24 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(d_auto.tags.count(), 1)
|
||||
|
||||
def test_add_type_suggest_url(self):
|
||||
call_command('document_retagger', '--document_type', '--suggest', '--base-url=http://localhost')
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--document_type",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
)
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.document_type, None)
|
||||
self.assertEqual(d_second.document_type, None)
|
||||
|
||||
def test_add_correspondent_suggest_url(self):
|
||||
call_command('document_retagger', '--correspondent', '--suggest', '--base-url=http://localhost')
|
||||
call_command(
|
||||
"document_retagger",
|
||||
"--correspondent",
|
||||
"--suggest",
|
||||
"--base-url=http://localhost",
|
||||
)
|
||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||
|
||||
self.assertEqual(d_first.correspondent, None)
|
||||
|
@ -12,7 +12,6 @@ from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestManageSuperUser(DirectoriesMixin, TestCase):
|
||||
|
||||
def reset_environment(self):
|
||||
if "PAPERLESS_ADMIN_USER" in os.environ:
|
||||
del os.environ["PAPERLESS_ADMIN_USER"]
|
||||
|
@ -11,13 +11,30 @@ from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestMakeThumbnails(DirectoriesMixin, TestCase):
|
||||
|
||||
def make_models(self):
|
||||
self.d1 = Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf", filename="test.pdf")
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d1.source_path)
|
||||
self.d1 = Document.objects.create(
|
||||
checksum="A",
|
||||
title="A",
|
||||
content="first document",
|
||||
mime_type="application/pdf",
|
||||
filename="test.pdf",
|
||||
)
|
||||
shutil.copy(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
self.d1.source_path,
|
||||
)
|
||||
|
||||
self.d2 = Document.objects.create(checksum="Ass", title="A", content="first document", mime_type="application/pdf", filename="test2.pdf")
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d2.source_path)
|
||||
self.d2 = Document.objects.create(
|
||||
checksum="Ass",
|
||||
title="A",
|
||||
content="first document",
|
||||
mime_type="application/pdf",
|
||||
filename="test2.pdf",
|
||||
)
|
||||
shutil.copy(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
self.d2.source_path,
|
||||
)
|
||||
|
||||
def setUp(self) -> None:
|
||||
super(TestMakeThumbnails, self).setUp()
|
||||
@ -40,13 +57,13 @@ class TestMakeThumbnails(DirectoriesMixin, TestCase):
|
||||
def test_command(self):
|
||||
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
|
||||
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
||||
call_command('document_thumbnails')
|
||||
call_command("document_thumbnails")
|
||||
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
|
||||
self.assertTrue(os.path.isfile(self.d2.thumbnail_path))
|
||||
|
||||
def test_command_documentid(self):
|
||||
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
|
||||
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
||||
call_command('document_thumbnails', '-d', f"{self.d1.id}")
|
||||
call_command("document_thumbnails", "-d", f"{self.d1.id}")
|
||||
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
|
||||
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
||||
|
@ -12,25 +12,24 @@ from ..signals import document_consumption_finished
|
||||
|
||||
|
||||
class TestMatching(TestCase):
|
||||
|
||||
def _test_matching(self, text, algorithm, true, false):
|
||||
for klass in (Tag, Correspondent, DocumentType):
|
||||
instance = klass.objects.create(
|
||||
name=str(randint(10000, 99999)),
|
||||
match=text,
|
||||
matching_algorithm=getattr(klass, algorithm)
|
||||
matching_algorithm=getattr(klass, algorithm),
|
||||
)
|
||||
for string in true:
|
||||
doc = Document(content=string)
|
||||
self.assertTrue(
|
||||
matching.matches(instance, doc),
|
||||
'"%s" should match "%s" but it does not' % (text, string)
|
||||
'"%s" should match "%s" but it does not' % (text, string),
|
||||
)
|
||||
for string in false:
|
||||
doc = Document(content=string)
|
||||
self.assertFalse(
|
||||
matching.matches(instance, doc),
|
||||
'"%s" should not match "%s" but it does' % (text, string)
|
||||
'"%s" should not match "%s" but it does' % (text, string),
|
||||
)
|
||||
|
||||
def test_match_all(self):
|
||||
@ -47,15 +46,13 @@ class TestMatching(TestCase):
|
||||
"I have alphas, charlie, and gamma in me",
|
||||
"I have alphas in me",
|
||||
"I have bravo in me",
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
"12 34 56",
|
||||
"MATCH_ALL",
|
||||
(
|
||||
"I have 12 34, and 56 in me",
|
||||
),
|
||||
("I have 12 34, and 56 in me",),
|
||||
(
|
||||
"I have 12 in me",
|
||||
"I have 34 in me",
|
||||
@ -64,7 +61,7 @@ class TestMatching(TestCase):
|
||||
"I have 120, 34, and 56 in me",
|
||||
"I have 123456 in me",
|
||||
"I have 01234567 in me",
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
@ -79,7 +76,7 @@ class TestMatching(TestCase):
|
||||
"the quick brown wolf jumped over the lazy dogs",
|
||||
"the quick brown fox jumped over the fat dogs",
|
||||
"the quick brown fox jumped over the lazy... dogs",
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
def test_match_any(self):
|
||||
@ -97,7 +94,7 @@ class TestMatching(TestCase):
|
||||
(
|
||||
"I have alphas in me",
|
||||
"I have bravo in me",
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
@ -114,7 +111,7 @@ class TestMatching(TestCase):
|
||||
(
|
||||
"I have 123456 in me",
|
||||
"I have 01234567 in me",
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
@ -124,9 +121,7 @@ class TestMatching(TestCase):
|
||||
"the quick brown fox",
|
||||
"jumped over the lazy dogs.",
|
||||
),
|
||||
(
|
||||
"the lazy fox jumped over the brown dogs",
|
||||
)
|
||||
("the lazy fox jumped over the brown dogs",),
|
||||
)
|
||||
|
||||
def test_match_literal(self):
|
||||
@ -134,9 +129,7 @@ class TestMatching(TestCase):
|
||||
self._test_matching(
|
||||
"alpha charlie gamma",
|
||||
"MATCH_LITERAL",
|
||||
(
|
||||
"I have 'alpha charlie gamma' in me",
|
||||
),
|
||||
("I have 'alpha charlie gamma' in me",),
|
||||
(
|
||||
"I have alpha in me",
|
||||
"I have charlie in me",
|
||||
@ -146,15 +139,13 @@ class TestMatching(TestCase):
|
||||
"I have alphas, charlie, and gamma in me",
|
||||
"I have alphas in me",
|
||||
"I have bravo in me",
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
self._test_matching(
|
||||
"12 34 56",
|
||||
"MATCH_LITERAL",
|
||||
(
|
||||
"I have 12 34 56 in me",
|
||||
),
|
||||
("I have 12 34 56 in me",),
|
||||
(
|
||||
"I have 12 in me",
|
||||
"I have 34 in me",
|
||||
@ -165,7 +156,7 @@ class TestMatching(TestCase):
|
||||
"I have 120, 340, and 560 in me",
|
||||
"I have 123456 in me",
|
||||
"I have 01234567 in me",
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
def test_match_regex(self):
|
||||
@ -186,18 +177,11 @@ class TestMatching(TestCase):
|
||||
"I have alpha, charlie, and gamma in me",
|
||||
"I have alphas, charlie, and gamma in me",
|
||||
"I have alphas in me",
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
def test_tach_invalid_regex(self):
|
||||
self._test_matching(
|
||||
"[[",
|
||||
"MATCH_REGEX",
|
||||
[],
|
||||
[
|
||||
"Don't match this"
|
||||
]
|
||||
)
|
||||
self._test_matching("[[", "MATCH_REGEX", [], ["Don't match this"])
|
||||
|
||||
def test_match_fuzzy(self):
|
||||
|
||||
@ -210,9 +194,7 @@ class TestMatching(TestCase):
|
||||
"1220 Main Street, Springfeld, Miss.",
|
||||
"1220 Main Street Springfield Miss",
|
||||
),
|
||||
(
|
||||
"1220 Main Street, Springfield, Mich.",
|
||||
)
|
||||
("1220 Main Street, Springfield, Mich.",),
|
||||
)
|
||||
|
||||
|
||||
@ -225,9 +207,10 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
TestCase.setUp(self)
|
||||
User.objects.create_user(username='test_consumer', password='12345')
|
||||
User.objects.create_user(username="test_consumer", password="12345")
|
||||
self.doc_contains = Document.objects.create(
|
||||
content="I contain the keyword.", mime_type="application/pdf")
|
||||
content="I contain the keyword.", mime_type="application/pdf"
|
||||
)
|
||||
|
||||
self.index_dir = tempfile.mkdtemp()
|
||||
# TODO: we should not need the index here.
|
||||
@ -238,40 +221,43 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
|
||||
|
||||
def test_tag_applied_any(self):
|
||||
t1 = Tag.objects.create(
|
||||
name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY)
|
||||
name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains)
|
||||
sender=self.__class__, document=self.doc_contains
|
||||
)
|
||||
self.assertTrue(list(self.doc_contains.tags.all()) == [t1])
|
||||
|
||||
def test_tag_not_applied(self):
|
||||
Tag.objects.create(
|
||||
name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY)
|
||||
name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains)
|
||||
sender=self.__class__, document=self.doc_contains
|
||||
)
|
||||
self.assertTrue(list(self.doc_contains.tags.all()) == [])
|
||||
|
||||
def test_correspondent_applied(self):
|
||||
correspondent = Correspondent.objects.create(
|
||||
name="test",
|
||||
match="keyword",
|
||||
matching_algorithm=Correspondent.MATCH_ANY
|
||||
name="test", match="keyword", matching_algorithm=Correspondent.MATCH_ANY
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains)
|
||||
sender=self.__class__, document=self.doc_contains
|
||||
)
|
||||
self.assertTrue(self.doc_contains.correspondent == correspondent)
|
||||
|
||||
def test_correspondent_not_applied(self):
|
||||
Tag.objects.create(
|
||||
name="test",
|
||||
match="no-match",
|
||||
matching_algorithm=Correspondent.MATCH_ANY
|
||||
name="test", match="no-match", matching_algorithm=Correspondent.MATCH_ANY
|
||||
)
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains)
|
||||
sender=self.__class__, document=self.doc_contains
|
||||
)
|
||||
self.assertEqual(self.doc_contains.correspondent, None)
|
||||
|
||||
def test_logentry_created(self):
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, document=self.doc_contains)
|
||||
sender=self.__class__, document=self.doc_contains
|
||||
)
|
||||
|
||||
self.assertEqual(LogEntry.objects.count(), 1)
|
||||
|
@ -24,20 +24,14 @@ def archive_path_old(self):
|
||||
else:
|
||||
fname = "{:07}.pdf".format(self.pk)
|
||||
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
fname
|
||||
)
|
||||
return os.path.join(settings.ARCHIVE_DIR, fname)
|
||||
|
||||
|
||||
def archive_path_new(doc):
|
||||
if doc.archive_filename is not None:
|
||||
return os.path.join(
|
||||
settings.ARCHIVE_DIR,
|
||||
str(doc.archive_filename)
|
||||
)
|
||||
else:
|
||||
return None
|
||||
if doc.archive_filename is not None:
|
||||
return os.path.join(settings.ARCHIVE_DIR, str(doc.archive_filename))
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def source_path(doc):
|
||||
@ -48,10 +42,7 @@ def source_path(doc):
|
||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg" # pragma: no cover
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||
|
||||
|
||||
def thumbnail_path(doc):
|
||||
@ -59,13 +50,18 @@ def thumbnail_path(doc):
|
||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||
file_name += ".gpg"
|
||||
|
||||
return os.path.join(
|
||||
settings.THUMBNAIL_DIR,
|
||||
file_name
|
||||
)
|
||||
return os.path.join(settings.THUMBNAIL_DIR, file_name)
|
||||
|
||||
|
||||
def make_test_document(document_class, title: str, mime_type: str, original: str, original_filename: str, archive: str = None, archive_filename: str = None):
|
||||
def make_test_document(
|
||||
document_class,
|
||||
title: str,
|
||||
mime_type: str,
|
||||
original: str,
|
||||
original_filename: str,
|
||||
archive: str = None,
|
||||
archive_filename: str = None,
|
||||
):
|
||||
doc = document_class()
|
||||
doc.filename = original_filename
|
||||
doc.title = title
|
||||
@ -96,8 +92,12 @@ def make_test_document(document_class, title: str, mime_type: str, original: str
|
||||
|
||||
simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
|
||||
simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
simple_pdf2 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf")
|
||||
simple_pdf3 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf")
|
||||
simple_pdf2 = os.path.join(
|
||||
os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf"
|
||||
)
|
||||
simple_pdf3 = os.path.join(
|
||||
os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf"
|
||||
)
|
||||
simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
|
||||
simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
|
||||
simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
|
||||
@ -106,26 +106,52 @@ simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1011_auto_20210101_2340'
|
||||
migrate_to = '1012_fix_archive_files'
|
||||
migrate_from = "1011_auto_20210101_2340"
|
||||
migrate_to = "1012_fix_archive_files"
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
self.unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf3, "unrelated.pdf", simple_pdf)
|
||||
self.no_text = make_test_document(Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf)
|
||||
self.doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
|
||||
self.clash1 = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
|
||||
self.clash2 = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf)
|
||||
self.clash3 = make_test_document(Document, "clash", "image/png", simple_png, "clash.png", simple_pdf)
|
||||
self.clash4 = make_test_document(Document, "clash.png", "application/pdf", simple_pdf2, "clash.png.pdf", simple_pdf2)
|
||||
self.unrelated = make_test_document(
|
||||
Document,
|
||||
"unrelated",
|
||||
"application/pdf",
|
||||
simple_pdf3,
|
||||
"unrelated.pdf",
|
||||
simple_pdf,
|
||||
)
|
||||
self.no_text = make_test_document(
|
||||
Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf
|
||||
)
|
||||
self.doc_no_archive = make_test_document(
|
||||
Document, "no_archive", "text/plain", simple_txt, "no_archive.txt"
|
||||
)
|
||||
self.clash1 = make_test_document(
|
||||
Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf
|
||||
)
|
||||
self.clash2 = make_test_document(
|
||||
Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf
|
||||
)
|
||||
self.clash3 = make_test_document(
|
||||
Document, "clash", "image/png", simple_png, "clash.png", simple_pdf
|
||||
)
|
||||
self.clash4 = make_test_document(
|
||||
Document,
|
||||
"clash.png",
|
||||
"application/pdf",
|
||||
simple_pdf2,
|
||||
"clash.png.pdf",
|
||||
simple_pdf2,
|
||||
)
|
||||
|
||||
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
|
||||
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
|
||||
self.assertNotEqual(archive_path_old(self.clash1), archive_path_old(self.clash4))
|
||||
self.assertNotEqual(
|
||||
archive_path_old(self.clash1), archive_path_old(self.clash4)
|
||||
)
|
||||
|
||||
def testArchiveFilesMigrated(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
for doc in Document.objects.all():
|
||||
if doc.archive_checksum:
|
||||
@ -144,31 +170,65 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
|
||||
archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(archive_checksum, doc.archive_checksum)
|
||||
|
||||
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 6)
|
||||
self.assertEqual(
|
||||
Document.objects.filter(archive_checksum__isnull=False).count(), 6
|
||||
)
|
||||
|
||||
def test_filenames(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
|
||||
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, f"{self.clash1.id:07}.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, f"{self.clash2.id:07}.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, f"{self.clash3.id:07}.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.doc_no_archive.id).archive_filename, None
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash1.id).archive_filename,
|
||||
f"{self.clash1.id:07}.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash2.id).archive_filename,
|
||||
f"{self.clash2.id:07}.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash3.id).archive_filename,
|
||||
f"{self.clash3.id:07}.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf"
|
||||
)
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
|
||||
|
||||
def test_filenames(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
|
||||
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, "none/clash_01.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, "none/clash_02.pdf")
|
||||
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.doc_no_archive.id).archive_filename, None
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf"
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash2.id).archive_filename,
|
||||
"none/clash_01.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash3.id).archive_filename,
|
||||
"none/clash_02.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf"
|
||||
)
|
||||
|
||||
|
||||
def fake_parse_wrapper(parser, path, mime_type, file_name):
|
||||
@ -179,34 +239,63 @@ def fake_parse_wrapper(parser, path, mime_type, file_name):
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1011_auto_20210101_2340'
|
||||
migrate_to = '1012_fix_archive_files'
|
||||
migrate_from = "1011_auto_20210101_2340"
|
||||
migrate_to = "1012_fix_archive_files"
|
||||
auto_migrate = False
|
||||
|
||||
def test_archive_missing(self):
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
|
||||
doc = make_test_document(
|
||||
Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf
|
||||
)
|
||||
os.unlink(archive_path_old(doc))
|
||||
|
||||
self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration)
|
||||
self.assertRaisesMessage(
|
||||
ValueError, "does not exist at: ", self.performMigration
|
||||
)
|
||||
|
||||
def test_parser_missing(self):
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = make_test_document(Document, "document", "invalid/typesss768", simple_png, "document.png", simple_pdf)
|
||||
doc2 = make_test_document(Document, "document", "invalid/typesss768", simple_jpg, "document.jpg", simple_pdf)
|
||||
doc1 = make_test_document(
|
||||
Document,
|
||||
"document",
|
||||
"invalid/typesss768",
|
||||
simple_png,
|
||||
"document.png",
|
||||
simple_pdf,
|
||||
)
|
||||
doc2 = make_test_document(
|
||||
Document,
|
||||
"document",
|
||||
"invalid/typesss768",
|
||||
simple_jpg,
|
||||
"document.jpg",
|
||||
simple_pdf,
|
||||
)
|
||||
|
||||
self.assertRaisesMessage(ValueError, "no parsers are available", self.performMigration)
|
||||
self.assertRaisesMessage(
|
||||
ValueError, "no parsers are available", self.performMigration
|
||||
)
|
||||
|
||||
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
|
||||
def test_parser_error(self, m):
|
||||
m.side_effect = ParseError()
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
|
||||
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
|
||||
doc1 = make_test_document(
|
||||
Document, "document", "image/png", simple_png, "document.png", simple_pdf
|
||||
)
|
||||
doc2 = make_test_document(
|
||||
Document,
|
||||
"document",
|
||||
"application/pdf",
|
||||
simple_jpg,
|
||||
"document.jpg",
|
||||
simple_pdf,
|
||||
)
|
||||
|
||||
self.assertIsNotNone(doc1.archive_checksum)
|
||||
self.assertIsNotNone(doc2.archive_checksum)
|
||||
@ -217,12 +306,29 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
self.assertEqual(m.call_count, 6)
|
||||
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda log: "Parse error, will try again in 5 seconds" in log, capture.output))),
|
||||
4)
|
||||
len(
|
||||
list(
|
||||
filter(
|
||||
lambda log: "Parse error, will try again in 5 seconds" in log,
|
||||
capture.output,
|
||||
)
|
||||
)
|
||||
),
|
||||
4,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda log: "Unable to regenerate archive document for ID:" in log, capture.output))),
|
||||
2)
|
||||
len(
|
||||
list(
|
||||
filter(
|
||||
lambda log: "Unable to regenerate archive document for ID:"
|
||||
in log,
|
||||
capture.output,
|
||||
)
|
||||
)
|
||||
),
|
||||
2,
|
||||
)
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
@ -240,15 +346,33 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
|
||||
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
|
||||
doc1 = make_test_document(
|
||||
Document, "document", "image/png", simple_png, "document.png", simple_pdf
|
||||
)
|
||||
doc2 = make_test_document(
|
||||
Document,
|
||||
"document",
|
||||
"application/pdf",
|
||||
simple_jpg,
|
||||
"document.jpg",
|
||||
simple_pdf,
|
||||
)
|
||||
|
||||
with self.assertLogs() as capture:
|
||||
self.performMigration()
|
||||
|
||||
self.assertEqual(
|
||||
len(list(filter(lambda log: "Parser did not return an archive document for document" in log, capture.output))),
|
||||
2)
|
||||
len(
|
||||
list(
|
||||
filter(
|
||||
lambda log: "Parser did not return an archive document for document"
|
||||
in log,
|
||||
capture.output,
|
||||
)
|
||||
)
|
||||
),
|
||||
2,
|
||||
)
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
@ -264,19 +388,37 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1012_fix_archive_files'
|
||||
migrate_to = '1011_auto_20210101_2340'
|
||||
migrate_from = "1012_fix_archive_files"
|
||||
migrate_to = "1011_auto_20210101_2340"
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
doc_unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf2, "unrelated.txt", simple_pdf2, "unrelated.pdf")
|
||||
doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
|
||||
clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_02.pdf")
|
||||
doc_unrelated = make_test_document(
|
||||
Document,
|
||||
"unrelated",
|
||||
"application/pdf",
|
||||
simple_pdf2,
|
||||
"unrelated.txt",
|
||||
simple_pdf2,
|
||||
"unrelated.pdf",
|
||||
)
|
||||
doc_no_archive = make_test_document(
|
||||
Document, "no_archive", "text/plain", simple_txt, "no_archive.txt"
|
||||
)
|
||||
clashB = make_test_document(
|
||||
Document,
|
||||
"clash",
|
||||
"image/jpeg",
|
||||
simple_jpg,
|
||||
"clash.jpg",
|
||||
simple_pdf,
|
||||
"clash_02.pdf",
|
||||
)
|
||||
|
||||
def testArchiveFilesReverted(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
for doc in Document.objects.all():
|
||||
if doc.archive_checksum:
|
||||
@ -291,35 +433,77 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
|
||||
archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(archive_checksum, doc.archive_checksum)
|
||||
|
||||
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 2)
|
||||
self.assertEqual(
|
||||
Document.objects.filter(archive_checksum__isnull=False).count(), 2
|
||||
)
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(TestMigrateArchiveFilesBackwards):
|
||||
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(
|
||||
TestMigrateArchiveFilesBackwards
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1012_fix_archive_files'
|
||||
migrate_to = '1011_auto_20210101_2340'
|
||||
migrate_from = "1012_fix_archive_files"
|
||||
migrate_to = "1011_auto_20210101_2340"
|
||||
auto_migrate = False
|
||||
|
||||
def test_filename_clash(self):
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash_02.pdf")
|
||||
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
|
||||
self.clashA = make_test_document(
|
||||
Document,
|
||||
"clash",
|
||||
"application/pdf",
|
||||
simple_pdf,
|
||||
"clash.pdf",
|
||||
simple_pdf,
|
||||
"clash_02.pdf",
|
||||
)
|
||||
self.clashB = make_test_document(
|
||||
Document,
|
||||
"clash",
|
||||
"image/jpeg",
|
||||
simple_jpg,
|
||||
"clash.jpg",
|
||||
simple_pdf,
|
||||
"clash_01.pdf",
|
||||
)
|
||||
|
||||
self.assertRaisesMessage(ValueError, "would clash with another archive filename", self.performMigration)
|
||||
self.assertRaisesMessage(
|
||||
ValueError,
|
||||
"would clash with another archive filename",
|
||||
self.performMigration,
|
||||
)
|
||||
|
||||
def test_filename_exists(self):
|
||||
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash.pdf")
|
||||
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
|
||||
self.clashA = make_test_document(
|
||||
Document,
|
||||
"clash",
|
||||
"application/pdf",
|
||||
simple_pdf,
|
||||
"clash.pdf",
|
||||
simple_pdf,
|
||||
"clash.pdf",
|
||||
)
|
||||
self.clashB = make_test_document(
|
||||
Document,
|
||||
"clash",
|
||||
"image/jpeg",
|
||||
simple_jpg,
|
||||
"clash.jpg",
|
||||
simple_pdf,
|
||||
"clash_01.pdf",
|
||||
)
|
||||
|
||||
self.assertRaisesMessage(ValueError, "file already exists.", self.performMigration)
|
||||
self.assertRaisesMessage(
|
||||
ValueError, "file already exists.", self.performMigration
|
||||
)
|
||||
|
@ -19,10 +19,7 @@ def source_path_before(self):
|
||||
if self.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg"
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||
|
||||
|
||||
def file_type_after(self):
|
||||
@ -37,30 +34,43 @@ def source_path_after(doc):
|
||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg" # pragma: no cover
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||
|
||||
|
||||
@override_settings(PASSPHRASE="test")
|
||||
class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1002_auto_20201111_1105'
|
||||
migrate_to = '1003_mime_types'
|
||||
migrate_from = "1002_auto_20201111_1105"
|
||||
migrate_to = "1003_mime_types"
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
doc = Document.objects.create(title="test", file_type="pdf", filename="file1.pdf")
|
||||
doc = Document.objects.create(
|
||||
title="test", file_type="pdf", filename="file1.pdf"
|
||||
)
|
||||
self.doc_id = doc.id
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_before(doc))
|
||||
shutil.copy(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
source_path_before(doc),
|
||||
)
|
||||
|
||||
doc2 = Document.objects.create(checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG)
|
||||
doc2 = Document.objects.create(
|
||||
checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG
|
||||
)
|
||||
self.doc2_id = doc2.id
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), source_path_before(doc2))
|
||||
shutil.copy(
|
||||
os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"documents",
|
||||
"originals",
|
||||
"0000004.pdf.gpg",
|
||||
),
|
||||
source_path_before(doc2),
|
||||
)
|
||||
|
||||
def testMimeTypesMigrated(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc = Document.objects.get(id=self.doc_id)
|
||||
self.assertEqual(doc.mime_type, "application/pdf")
|
||||
@ -72,17 +82,22 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
|
||||
@override_settings(PASSPHRASE="test")
|
||||
class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1003_mime_types'
|
||||
migrate_to = '1002_auto_20201111_1105'
|
||||
migrate_from = "1003_mime_types"
|
||||
migrate_to = "1002_auto_20201111_1105"
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
doc = Document.objects.create(title="test", mime_type="application/pdf", filename="file1.pdf")
|
||||
doc = Document.objects.create(
|
||||
title="test", mime_type="application/pdf", filename="file1.pdf"
|
||||
)
|
||||
self.doc_id = doc.id
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_after(doc))
|
||||
shutil.copy(
|
||||
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||
source_path_after(doc),
|
||||
)
|
||||
|
||||
def testMimeTypesReverted(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc = Document.objects.get(id=self.doc_id)
|
||||
self.assertEqual(doc.file_type, "pdf")
|
||||
|
@ -3,13 +3,13 @@ from documents.tests.utils import DirectoriesMixin, TestMigrations
|
||||
|
||||
class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1014_auto_20210228_1614'
|
||||
migrate_to = '1015_remove_null_characters'
|
||||
migrate_from = "1014_auto_20210228_1614"
|
||||
migrate_to = "1015_remove_null_characters"
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
self.doc = Document.objects.create(content="aaa\0bbb")
|
||||
|
||||
def testMimeTypesMigrated(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
self.assertNotIn("\0", Document.objects.get(id=self.doc.id).content)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user