diff --git a/Pipfile b/Pipfile index 2e86f2a42..48759307c 100644 --- a/Pipfile +++ b/Pipfile @@ -19,6 +19,7 @@ django-extensions = "*" django-filter = "~=2.4.0" django-q = "~=1.3.4" djangorestframework = "~=3.12.2" +filelock = "*" fuzzywuzzy = "*" gunicorn = "*" imap-tools = "*" @@ -26,6 +27,7 @@ langdetect = "*" pdftotext = "*" pathvalidate = "*" pillow = "*" +pikepdf = "*" python-gnupg = "*" python-dotenv = "*" python-dateutil = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 6158a70e0..1cfccb8ff 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "b10db53eb22d917723aa6107ff0970dc4e2aa886ee03d3ae08a994a856d57986" + "sha256": "3d576f289958226a7583e4c471c7f8c11bff6933bf093185f623cfb381a92412" }, "pipfile-spec": 6, "requires": { @@ -197,6 +197,14 @@ "index": "pypi", "version": "==3.12.2" }, + "filelock": { + "hashes": [ + "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59", + "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836" + ], + "index": "pypi", + "version": "==3.0.12" + }, "fuzzywuzzy": { "hashes": [ "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8", @@ -425,7 +433,7 @@ "sha256:fe0ca120e3347c851c34a91041d574f3c588d832023906d8ae18d66d042e8a52", "sha256:fe8e0152672f24d8bfdecc725f97e9013f2de1b41849150959526ca3562bd3ef" ], - "markers": "python_version < '3.9'", + "index": "pypi", "version": "==2.2.0" }, "pillow": { @@ -858,10 +866,10 @@ }, "certifi": { "hashes": [ - "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd", - "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4" + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" ], - "version": "==2020.11.8" + "version": "==2020.12.5" }, "chardet": { "hashes": [ @@ -961,17 +969,18 @@ }, "faker": { "hashes": [ - "sha256:7bca5b074299ac6532be2f72979e6793f1a2403ca8105cb4cf0b385a964469c4", - "sha256:fb21a76064847561033d8cab1cfd11af436ddf2c6fe72eb51b3cda51dff86bdc" + "sha256:1fcb415562ee6e2395b041e85fa6901d4708d30b84d54015226fa754ed0822c3", + "sha256:e8beccb398ee9b8cc1a91d9295121d66512b6753b4846eb1e7370545d46b3311" ], - "markers": "python_version >= '3.5'", - "version": "==5.0.0" + "markers": "python_version >= '3.6'", + "version": "==5.0.1" }, "filelock": { "hashes": [ "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59", "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836" ], + "index": "pypi", "version": "==3.0.12" }, "idna": { @@ -1100,11 +1109,11 @@ }, "pygments": { "hashes": [ - "sha256:381985fcc551eb9d37c52088a32914e00517e57f4a21609f48141ba08e193fa0", - "sha256:88a0bbcd659fcb9573703957c6b9cff9fab7295e6e76db54c9d00ae42df32773" + "sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716", + "sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08" ], "markers": "python_version >= '3.5'", - "version": "==2.7.2" + "version": "==2.7.3" }, "pyparsing": { "hashes": [ @@ -1313,11 +1322,11 @@ }, "virtualenv": { "hashes": [ - "sha256:07cff122e9d343140366055f31be4dcd61fd598c69d11cd33a9d9c8df4546dd7", - "sha256:e0aac7525e880a429764cefd3aaaff54afb5d9f25c82627563603f5d7de5a6e5" + "sha256:54b05fc737ea9c9ee9f8340f579e5da5b09fb64fd010ab5757eb90268616907c", + "sha256:b7a8ec323ee02fb2312f098b6b4c9de99559b462775bc8fe3627a73706603c1b" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==20.2.1" + "version": "==20.2.2" }, "zipp": { "hashes": [ diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index dfa7cfc65..13a0ba035 100644 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -23,8 +23,14 @@ wait_for_postgres() { echo "Waiting for PostgreSQL to start..." host="${PAPERLESS_DBHOST}" + port="${PAPERLESS_DBPORT}" - while !/download/``: Download the original document. -* ``/api/documents//thumb/``: Download the PNG thumbnail of a document. -* ``/api/documents//preview/``: Display the original document inline, +* ``/api/documents//download/``: Download the document. +* ``/api/documents//preview/``: Display the document inline, without downloading it. +* ``/api/documents//thumb/``: Download the PNG thumbnail of a document. + +Paperless generates archived PDF/A documents from consumed files and stores both +the original files as well as the archived files. By default, the endpoints +for previews and downloads serve the archived file, if it is available. +Otherwise, the original file is served. +Some document cannot be archived. + +The endpoints correctly serve the response header fields ``Content-Disposition`` +and ``Content-Type`` to indicate the filename for download and the type of content of +the document. + +In order to download or preview the original document when an archied document is available, +supply the query parameter ``original=true``. .. hint:: @@ -38,13 +70,43 @@ individual documents: are in place. However, if you use these old URLs to access documents, you should update your app or script to use the new URLs. -.. note:: - The document endpoint provides tags, document types and correspondents as - ids in their corresponding fields. These are writeable. Paperless also - offers read-only objects for assigned tags, types and correspondents, - however, these might be removed in the future. As for now, the front end - requires them. +Getting document metadata +######################### + +The api also has an endpoint to retrieve read-only metadata about specific documents. this +information is not served along with the document objects, since it requires reading +files and would therefore slow down document lists considerably. + +Access the metadata of a document with an ID ``id`` at ``/api/documents//metadata/``. + +The endpoint reports the following data: + +* ``original_checksum``: MD5 checksum of the original document. +* ``original_size``: Size of the original document, in bytes. +* ``original_mime_type``: Mime type of the original document. +* ``media_filename``: Current filename of the document, under which it is stored inside the media directory. +* ``has_archive_version``: True, if this document is archived, false otherwise. +* ``original_metadata``: A list of metadata associated with the original document. See below. +* ``archive_checksum``: MD5 checksum of the archived document, or null. +* ``archive_size``: Size of the archived document in bytes, or null. +* ``archive_metadata``: Metadata associated with the archived document, or null. See below. + +File metadata is reported as a list of objects in the following form: + +.. code:: json + + [ + { + "namespace": "http://ns.adobe.com/pdf/1.3/", + "prefix": "pdf", + "key": "Producer", + "value": "SparklePDF, Fancy edition" + }, + ] + +``namespace`` and ``prefix`` can be null. The actual metadata reported depends on the file type and the metadata +available in that specific document. Paperless only reports PDF metadata at this point. Authorization ############# @@ -54,11 +116,11 @@ The REST api provides three different forms of authentication. 1. Basic authentication Authorize by providing a HTTP header in the form - + .. code:: Authorization: Basic - + where ``credentials`` is a base64-encoded string of ``:`` 2. Session authentication @@ -79,7 +141,7 @@ The REST api provides three different forms of authentication. .. code:: Authorization: Token - + Tokens can be managed and revoked in the paperless admin. Searching for documents @@ -111,7 +173,7 @@ Result list object returned by the endpoint: "page_count": 1, "corrected_query": "", "results": [ - + ] } @@ -131,12 +193,12 @@ Result object: { "id": 1, "highlights": [ - + ], "score": 6.34234, "rank": 23, "document": { - + } } @@ -168,7 +230,7 @@ Each fragment contains a list of strings, and some of them are marked as a highl {"text": " fragment with a highlight."} ] ] - + When ``term`` is present within a string, the word within ``text`` should be highlighted. diff --git a/docs/changelog.rst b/docs/changelog.rst index 116c2e07c..a50fc31d5 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -5,6 +5,47 @@ Changelog ********* +paperless-ng 0.9.6 +################## + +This release focusses primarily on many small issues with the UI. + +* Front end + + * Paperless now has proper window titles. + * Fixed an issue with the small cards when more than 7 tags were used. + * Navigation of the "Show all" links adjusted. They navigate to the saved view now, if available in the sidebar. + * Some indication on the document lists that a filter is active was added. + * There's a new filter to filter for documents that do *not* have a certain tag. + * The file upload box now shows upload progress. + * The document edit page was reorganized. + * The document edit page shows various information about a document. + * An issue with the height of the preview was fixed. + * Table issues with too long document titles fixed. + +* API + + * The API now serves file names with documents. + * The API now serves various metadata about documents. + * API documentation updated. + +* Other + + * Fixed an issue with the docker image when a non-standard PostgreSQL port was used. + * The docker image was trying check for installed languages before actually installing them. + * ``FILENAME_FORMAT`` placeholder for document types. + * The filename formatter is now less restrictive with file names and tries to + conserve the original correspondents, types and titles as much as possible. + * The filename formatter does not include the document ID in filenames anymore. It will + rather append ``_01``, ``_02``, etc when it detects duplicate filenames. + +.. note:: + + The changes to the filename format will apply to newly added documents and changed documents. + If you want all files to reflect these changes, execute the ``document_renamer`` management + command. + + paperless-ng 0.9.5 ################## diff --git a/docs/usage_overview.rst b/docs/usage_overview.rst index db50d5706..bb9ecd452 100644 --- a/docs/usage_overview.rst +++ b/docs/usage_overview.rst @@ -57,7 +57,7 @@ Adding documents to paperless ############################# Once you've got Paperless setup, you need to start feeding documents into it. -Currently, there are three options: the consumption directory, IMAP (email), and +Currently, there are four options: the consumption directory, the dashboard, IMAP (email), and HTTP POST. When adding documents to paperless, it will perform the following operations on @@ -82,8 +82,7 @@ your documents: No matter which options you choose, Paperless will always store the original document that it found in the consumption directory or in the mail and will never overwrite that document. Archived versions are stored alongside the - digital versions. - + original versions. The consumption directory @@ -107,6 +106,12 @@ files from the scanner. Typically, you're looking at an FTP server like .. TODO: hyperref to configuration of the location of this magic folder. +Dashboard upload +================ + +The dashboard has a file drop field to upload documents to paperless. Simply drag a file +onto this field or select a file with the file dialog. Multiple files are supported. + .. _usage-email: IMAP (Email) @@ -183,6 +188,63 @@ You can also submit a document using the REST API, see :ref:`api-file_uploads` f .. _basic-searching: + +Best practices +############## + +Paperless offers a couple tools that help you organize your document collection. However, +it is up to you to use them in a way that helps you organize documents and find specific +documents when you need them. This section offers a couple ideas for managing your collection. + +Document types allow you to classify documents according to what they are. You can define +types such as "Receipt", "Invoice", or "Contract". If you used to collect all your receipts +in a single binder, you can recreate that system in paperless by defining a document type, +assigning documents to that type and then filtering by that type to only see all receipts. + +Not all documents need document types. Sometimes its hard to determine what the type of a +document is or it is hard to justify creating a document type that you only need once or twice. +This is okay. As long as the types you define help you organize your collection in the way +you want, paperless is doing its job. + +Tags can be used in many different ways. Think of tags are more versatile folders or binders. +If you have a binder for documents related to university / your car or health care, you can +create these binders in paperless by creating tags and assigning them to relevant documents. +Just as with documents, you can filter the document list by tags and only see documents of +a certain topic. + +With physical documents, you'll often need to decide which folder the document belongs to. +The advantage of tags over folders and binders is that a single document can have multiple +tags. A physical document cannot magically appear in two different folders, but with tags, +this is entirely possible. + +.. hint:: + + This can be used in many different ways. One example: Imagine you're working on a particular + task, such as signing up for university. Usually you'll need to collect a bunch of different + documents that are already sorted into various folders. With the tag system of paperless, + you can create a new group of documents that are relevant to this task without destroying + the already existing organization. When you're done with the task, you could delete the + tag again, which would be equal to sorting documents back into the folder they belong into. + Or keep the tag, up to you. + +All of the logic above applies to correspondents as well. Attach them to documents if you +feel that they help you organize your collection. + +When you've started organizing your documents, create a couple saved views for document collections +you regularly access. This is equal to having labeled physical binders on your desk, except +that these saved views are dynamic and simply update themselves as you add documents to the system. + +Here are a couple examples of tags and types that you could use in your collection. + +* An ``inbox`` tag for newly added documents that you haven't manually edited yet. +* A tag ``car`` for everything car related (repairs, registration, insurance, etc) +* A tag ``todo`` for documents that you still need to do something with, such as reply, or + perform some task online. +* A tag ``bank account x`` for all bank statement related to that account. +* A tag ``mail`` for anything that you added to paperless via its mail processing capabilities. +* A tag ``missing_metadata`` when you still need to add some metadata to a document, but can't + or don't want to do this right now. + Searching ######### diff --git a/src-ui/src/app/app.module.ts b/src-ui/src/app/app.module.ts index 7f2e8414e..ad12c9c47 100644 --- a/src-ui/src/app/app.module.ts +++ b/src-ui/src/app/app.module.ts @@ -45,6 +45,9 @@ import { SavedViewWidgetComponent } from './components/dashboard/widgets/saved-v import { StatisticsWidgetComponent } from './components/dashboard/widgets/statistics-widget/statistics-widget.component'; import { UploadFileWidgetComponent } from './components/dashboard/widgets/upload-file-widget/upload-file-widget.component'; import { WidgetFrameComponent } from './components/dashboard/widgets/widget-frame/widget-frame.component'; +import { WelcomeWidgetComponent } from './components/dashboard/widgets/welcome-widget/welcome-widget.component'; +import { YesNoPipe } from './pipes/yes-no.pipe'; +import { FileSizePipe } from './pipes/file-size.pipe'; @NgModule({ declarations: [ @@ -82,7 +85,10 @@ import { WidgetFrameComponent } from './components/dashboard/widgets/widget-fram SavedViewWidgetComponent, StatisticsWidgetComponent, UploadFileWidgetComponent, - WidgetFrameComponent + WidgetFrameComponent, + WelcomeWidgetComponent, + YesNoPipe, + FileSizePipe ], imports: [ BrowserModule, diff --git a/src-ui/src/app/components/common/input/date-time/date-time.component.html b/src-ui/src/app/components/common/input/date-time/date-time.component.html index eaed0e185..7c002db1b 100644 --- a/src-ui/src/app/components/common/input/date-time/date-time.component.html +++ b/src-ui/src/app/components/common/input/date-time/date-time.component.html @@ -3,11 +3,10 @@ -
+
-
diff --git a/src-ui/src/app/components/common/input/date-time/date-time.component.ts b/src-ui/src/app/components/common/input/date-time/date-time.component.ts index 07238e94f..6a04c5b27 100644 --- a/src-ui/src/app/components/common/input/date-time/date-time.component.ts +++ b/src-ui/src/app/components/common/input/date-time/date-time.component.ts @@ -40,7 +40,7 @@ export class DateTimeComponent implements OnInit,ControlValueAccessor { titleDate: string = "Date" @Input() - titleTime: string = "Time" + titleTime: string @Input() disabled: boolean = false diff --git a/src-ui/src/app/components/common/input/tags/tags.component.html b/src-ui/src/app/components/common/input/tags/tags.component.html index b2ad0944f..8029dd860 100644 --- a/src-ui/src/app/components/common/input/tags/tags.component.html +++ b/src-ui/src/app/components/common/input/tags/tags.component.html @@ -8,7 +8,7 @@
-
+
diff --git a/src-ui/src/app/components/common/input/text/text.component.ts b/src-ui/src/app/components/common/input/text/text.component.ts index ffb8c0c3d..0a1a05749 100644 --- a/src-ui/src/app/components/common/input/text/text.component.ts +++ b/src-ui/src/app/components/common/input/text/text.component.ts @@ -1,6 +1,5 @@ -import { Component, forwardRef, Input, OnInit } from '@angular/core'; -import { ControlValueAccessor, NG_VALUE_ACCESSOR } from '@angular/forms'; -import { v4 as uuidv4 } from 'uuid'; +import { Component, forwardRef } from '@angular/core'; +import { NG_VALUE_ACCESSOR } from '@angular/forms'; import { AbstractInputComponent } from '../abstract-input'; @Component({ diff --git a/src-ui/src/app/components/dashboard/dashboard.component.html b/src-ui/src/app/components/dashboard/dashboard.component.html index 3e6438181..627e7ff22 100644 --- a/src-ui/src/app/components/dashboard/dashboard.component.html +++ b/src-ui/src/app/components/dashboard/dashboard.component.html @@ -4,11 +4,7 @@
- -

This space is reserved to display your saved views. Go to your documents and save a view - to have it displayed - here!

-
+ @@ -22,4 +18,4 @@
-
\ No newline at end of file +
diff --git a/src-ui/src/app/components/dashboard/dashboard.component.ts b/src-ui/src/app/components/dashboard/dashboard.component.ts index aa2426179..c7410c3f2 100644 --- a/src-ui/src/app/components/dashboard/dashboard.component.ts +++ b/src-ui/src/app/components/dashboard/dashboard.component.ts @@ -1,5 +1,7 @@ import { Component, OnInit } from '@angular/core'; +import { Title } from '@angular/platform-browser'; import { SavedViewConfigService } from 'src/app/services/saved-view-config.service'; +import { environment } from 'src/environments/environment'; @Component({ @@ -10,13 +12,15 @@ import { SavedViewConfigService } from 'src/app/services/saved-view-config.servi export class DashboardComponent implements OnInit { constructor( - public savedViewConfigService: SavedViewConfigService) { } + public savedViewConfigService: SavedViewConfigService, + private titleService: Title) { } savedViews = [] ngOnInit(): void { this.savedViews = this.savedViewConfigService.getDashboardConfigs() + this.titleService.setTitle(`Dashboard - ${environment.appTitle}`) } } diff --git a/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.ts b/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.ts index 413df0ae4..a55bf57fc 100644 --- a/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.ts +++ b/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.ts @@ -29,8 +29,12 @@ export class SavedViewWidgetComponent implements OnInit { } showAll() { - this.list.load(this.savedView) - this.router.navigate(["documents"]) + if (this.savedView.showInSideBar) { + this.router.navigate(['view', this.savedView.id]) + } else { + this.list.load(this.savedView) + this.router.navigate(["documents"]) + } } } diff --git a/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.html b/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.html index cb114e49e..013486a47 100644 --- a/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.html +++ b/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.html @@ -1,15 +1,18 @@ -
- +
+ + - - + + +
+

Uploading {{uploadStatus.length}} file(s)

+ + +
+
\ No newline at end of file diff --git a/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.ts b/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.ts index a95d5f4db..2ea4825f1 100644 --- a/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.ts +++ b/src-ui/src/app/components/dashboard/widgets/upload-file-widget/upload-file-widget.component.ts @@ -1,8 +1,15 @@ +import { HttpEventType } from '@angular/common/http'; import { Component, OnInit } from '@angular/core'; import { FileSystemFileEntry, NgxFileDropEntry } from 'ngx-file-drop'; import { DocumentService } from 'src/app/services/rest/document.service'; import { Toast, ToastService } from 'src/app/services/toast.service'; + +interface UploadStatus { + loaded: number + total: number +} + @Component({ selector: 'app-upload-file-widget', templateUrl: './upload-file-widget.component.html', @@ -16,26 +23,59 @@ export class UploadFileWidgetComponent implements OnInit { } public fileOver(event){ - console.log(event); } - + public fileLeave(event){ - console.log(event); } - + + uploadStatus: UploadStatus[] = [] + completedFiles = 0 + + uploadVisible = false + + get loadedSum() { + return this.uploadStatus.map(s => s.loaded).reduce((a,b) => a+b, this.completedFiles > 0 ? 1 : 0) + } + + get totalSum() { + return this.uploadStatus.map(s => s.total).reduce((a,b) => a+b, 1) + } + public dropped(files: NgxFileDropEntry[]) { for (const droppedFile of files) { if (droppedFile.fileEntry.isFile) { - const fileEntry = droppedFile.fileEntry as FileSystemFileEntry; - console.log(fileEntry) + let uploadStatusObject: UploadStatus = {loaded: 0, total: 1} + this.uploadStatus.push(uploadStatusObject) + this.uploadVisible = true + + const fileEntry = droppedFile.fileEntry as FileSystemFileEntry; fileEntry.file((file: File) => { - console.log(file) - const formData = new FormData() + let formData = new FormData() formData.append('document', file, file.name) - this.documentService.uploadDocument(formData).subscribe(result => { - this.toastService.showToast(Toast.make("Information", "The document has been uploaded and will be processed by the consumer shortly.")) + + this.documentService.uploadDocument(formData).subscribe(event => { + if (event.type == HttpEventType.UploadProgress) { + uploadStatusObject.loaded = event.loaded + uploadStatusObject.total = event.total + } else if (event.type == HttpEventType.Response) { + this.uploadStatus.splice(this.uploadStatus.indexOf(uploadStatusObject), 1) + this.completedFiles += 1 + this.toastService.showToast(Toast.make("Information", "The document has been uploaded and will be processed by the consumer shortly.")) + } + }, error => { - this.toastService.showToast(Toast.makeError("An error has occured while uploading the document. Sorry!")) + this.uploadStatus.splice(this.uploadStatus.indexOf(uploadStatusObject), 1) + this.completedFiles += 1 + switch (error.status) { + case 400: { + this.toastService.showToast(Toast.makeError(`There was an error while uploading the document: ${error.error.document}`)) + break; + } + default: { + this.toastService.showToast(Toast.makeError("An error has occurred while uploading the document. Sorry!")) + break; + } + } }) }); } diff --git a/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.html b/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.html new file mode 100644 index 000000000..0caf55f11 --- /dev/null +++ b/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.html @@ -0,0 +1,16 @@ + + + + +

Paperless is running! :)

+

You can start uploading documents by dropping them in the file upload box to the right or by dropping them in the configured consumption folder and they'll start showing up in the documents list. + After you've added some metadata to your documents, use the filtering mechanisms of paperless to create custom views (such as 'Recently added', 'Tagged TODO') and have them displayed on the dashboard instead of this message.

+

Paperless offers some more features that try to make your life easier, such as:

+
    +
  • Once you've got a couple documents in paperless and added metadata to them, paperless can assign that metadata to new documents automatically.
  • +
  • You can configure paperless to read your mails and add documents from attached files.
  • +
+

Consult the documentation on how to use these features. The section on basic usage also has some information on how to use paperless in general.

+
+ +
\ No newline at end of file diff --git a/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.scss b/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.scss new file mode 100644 index 000000000..e69de29bb diff --git a/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.spec.ts b/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.spec.ts new file mode 100644 index 000000000..5e8c2494b --- /dev/null +++ b/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.spec.ts @@ -0,0 +1,25 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { WelcomeWidgetComponent } from './welcome-widget.component'; + +describe('WelcomeWidgetComponent', () => { + let component: WelcomeWidgetComponent; + let fixture: ComponentFixture; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [ WelcomeWidgetComponent ] + }) + .compileComponents(); + }); + + beforeEach(() => { + fixture = TestBed.createComponent(WelcomeWidgetComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.ts b/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.ts new file mode 100644 index 000000000..71a87189c --- /dev/null +++ b/src-ui/src/app/components/dashboard/widgets/welcome-widget/welcome-widget.component.ts @@ -0,0 +1,15 @@ +import { Component, OnInit } from '@angular/core'; + +@Component({ + selector: 'app-welcome-widget', + templateUrl: './welcome-widget.component.html', + styleUrls: ['./welcome-widget.component.scss'] +}) +export class WelcomeWidgetComponent implements OnInit { + + constructor() { } + + ngOnInit(): void { + } + +} diff --git a/src-ui/src/app/components/dashboard/widgets/widget-frame/widget-frame.component.html b/src-ui/src/app/components/dashboard/widgets/widget-frame/widget-frame.component.html index d0f637935..1d7d2d906 100644 --- a/src-ui/src/app/components/dashboard/widgets/widget-frame/widget-frame.component.html +++ b/src-ui/src/app/components/dashboard/widgets/widget-frame/widget-frame.component.html @@ -1,4 +1,4 @@ -
+
{{title}}
diff --git a/src-ui/src/app/components/document-detail/document-detail.component.html b/src-ui/src/app/components/document-detail/document-detail.component.html index 5a5563571..e0b5c6da9 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.html +++ b/src-ui/src/app/components/document-detail/document-detail.component.html @@ -14,15 +14,15 @@ Download - -
- - + + - -
+ +
+ Original document metadata + + +
+ + + + + + + +
{{m.prefix}}:{{m.key}}{{m.value}}
+
+ +
+ + Archived document metadata +
+ +
+ + + + + + + +
{{m.prefix}}:{{m.key}}{{m.value}}
+
+ + + + + +
  -   +    
-
+

Your browser does not support PDFs. Download the PDF.

-
+
\ No newline at end of file diff --git a/src-ui/src/app/components/document-detail/document-detail.component.scss b/src-ui/src/app/components/document-detail/document-detail.component.scss index e69de29bb..b1e9fddfb 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.scss +++ b/src-ui/src/app/components/document-detail/document-detail.component.scss @@ -0,0 +1,5 @@ +.document-preview { + height: calc(100vh - 180px); + top: 70px; + position: sticky; +} \ No newline at end of file diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts index 253833792..329077693 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.ts @@ -1,5 +1,6 @@ import { Component, OnInit } from '@angular/core'; import { FormControl, FormGroup } from '@angular/forms'; +import { Title } from '@angular/platform-browser'; import { ActivatedRoute, Router } from '@angular/router'; import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent'; @@ -11,6 +12,7 @@ import { OpenDocumentsService } from 'src/app/services/open-documents.service'; import { CorrespondentService } from 'src/app/services/rest/correspondent.service'; import { DocumentTypeService } from 'src/app/services/rest/document-type.service'; import { DocumentService } from 'src/app/services/rest/document.service'; +import { environment } from 'src/environments/environment'; import { DeleteDialogComponent } from '../common/delete-dialog/delete-dialog.component'; import { CorrespondentEditDialogComponent } from '../manage/correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component'; import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component'; @@ -22,6 +24,9 @@ import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/do }) export class DocumentDetailComponent implements OnInit { + public expandOriginalMetadata = false; + public expandArchivedMetadata = false; + documentId: number document: PaperlessDocument metadata: PaperlessDocumentMetadata @@ -51,7 +56,8 @@ export class DocumentDetailComponent implements OnInit { private router: Router, private modalService: NgbModal, private openDocumentService: OpenDocumentsService, - private documentListViewService: DocumentListViewService) { } + private documentListViewService: DocumentListViewService, + private titleService: Title) { } ngOnInit(): void { this.documentForm.valueChanges.subscribe(wow => { @@ -80,6 +86,7 @@ export class DocumentDetailComponent implements OnInit { updateComponent(doc: PaperlessDocument) { this.document = doc + this.titleService.setTitle(`${doc.title} - ${environment.appTitle}`) this.documentsService.getMetadata(doc.id).subscribe(result => { this.metadata = result }) diff --git a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html index 71a7fb01a..da469ebc4 100644 --- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html +++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html @@ -1,8 +1,14 @@
-
-
- +
+ +
+
+ +
+
+ + {{moreTags}} +
diff --git a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss index ef00ad029..0068667d0 100644 --- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss +++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.scss @@ -1,5 +1,5 @@ .doc-img { - background-size: cover; - background-position: top; + object-fit: cover; + object-position: top; height: 200px; } \ No newline at end of file diff --git a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts index 08202bfc9..d60552d4f 100644 --- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts +++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.ts @@ -1,4 +1,5 @@ import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core'; +import { map } from 'rxjs/operators'; import { PaperlessDocument } from 'src/app/data/paperless-document'; import { PaperlessTag } from 'src/app/data/paperless-tag'; import { DocumentService } from 'src/app/services/rest/document.service'; @@ -21,6 +22,8 @@ export class DocumentCardSmallComponent implements OnInit { @Output() clickCorrespondent = new EventEmitter() + moreTags: number = null + ngOnInit(): void { } @@ -35,4 +38,18 @@ export class DocumentCardSmallComponent implements OnInit { getPreviewUrl() { return this.documentService.getPreviewUrl(this.document.id) } + + getTagsLimited$() { + return this.document.tags$.pipe( + map(tags => { + if (tags.length > 7) { + this.moreTags = tags.length - 6 + return tags.slice(0, 6) + } else { + return tags + } + }) + ) + } + } diff --git a/src-ui/src/app/components/document-list/document-list.component.html b/src-ui/src/app/components/document-list/document-list.component.html index cebe7c544..1a8c7a781 100644 --- a/src-ui/src/app/components/document-list/document-list.component.html +++ b/src-ui/src/app/components/document-list/document-list.component.html @@ -24,7 +24,7 @@
-
+
@@ -44,7 +44,7 @@
- - - +
@@ -105,7 +105,7 @@ - - + + diff --git a/src-ui/src/app/components/manage/settings/settings.component.ts b/src-ui/src/app/components/manage/settings/settings.component.ts index 1b93268fc..c7b976c65 100644 --- a/src-ui/src/app/components/manage/settings/settings.component.ts +++ b/src-ui/src/app/components/manage/settings/settings.component.ts @@ -1,9 +1,11 @@ import { Component, OnInit } from '@angular/core'; import { FormControl, FormGroup } from '@angular/forms'; +import { Title } from '@angular/platform-browser'; import { SavedViewConfig } from 'src/app/data/saved-view-config'; import { GENERAL_SETTINGS } from 'src/app/data/storage-keys'; import { DocumentListViewService } from 'src/app/services/document-list-view.service'; import { SavedViewConfigService } from 'src/app/services/saved-view-config.service'; +import { environment } from 'src/environments/environment'; @Component({ selector: 'app-settings', @@ -18,10 +20,12 @@ export class SettingsComponent implements OnInit { constructor( private savedViewConfigService: SavedViewConfigService, - private documentListViewService: DocumentListViewService + private documentListViewService: DocumentListViewService, + private titleService: Title ) { } ngOnInit(): void { + this.titleService.setTitle(`Settings - ${environment.appTitle}`) } deleteViewConfig(config: SavedViewConfig) { diff --git a/src-ui/src/app/components/manage/tag-list/tag-list.component.ts b/src-ui/src/app/components/manage/tag-list/tag-list.component.ts index 761a9484c..efbe11321 100644 --- a/src-ui/src/app/components/manage/tag-list/tag-list.component.ts +++ b/src-ui/src/app/components/manage/tag-list/tag-list.component.ts @@ -1,8 +1,9 @@ -import { Component } from '@angular/core'; +import { Component, OnInit } from '@angular/core'; +import { Title } from '@angular/platform-browser'; import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; import { TAG_COLOURS, PaperlessTag } from 'src/app/data/paperless-tag'; import { TagService } from 'src/app/services/rest/tag.service'; -import { CorrespondentEditDialogComponent } from '../correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component'; +import { environment } from 'src/environments/environment'; import { GenericListComponent } from '../generic-list/generic-list.component'; import { TagEditDialogComponent } from './tag-edit-dialog/tag-edit-dialog.component'; @@ -11,11 +12,17 @@ import { TagEditDialogComponent } from './tag-edit-dialog/tag-edit-dialog.compon templateUrl: './tag-list.component.html', styleUrls: ['./tag-list.component.scss'] }) -export class TagListComponent extends GenericListComponent { +export class TagListComponent extends GenericListComponent implements OnInit { - constructor(tagService: TagService, modalService: NgbModal) { + constructor(tagService: TagService, modalService: NgbModal, private titleService: Title) { super(tagService, modalService, TagEditDialogComponent) - } + } + + + ngOnInit(): void { + super.ngOnInit() + this.titleService.setTitle(`Tags - ${environment.appTitle}`) + } getColor(id) { return TAG_COLOURS.find(c => c.id == id) diff --git a/src-ui/src/app/components/search/search.component.ts b/src-ui/src/app/components/search/search.component.ts index de8b4652f..3371debd2 100644 --- a/src-ui/src/app/components/search/search.component.ts +++ b/src-ui/src/app/components/search/search.component.ts @@ -1,7 +1,9 @@ import { Component, OnInit } from '@angular/core'; +import { Title } from '@angular/platform-browser'; import { ActivatedRoute, Router } from '@angular/router'; import { SearchHit } from 'src/app/data/search-result'; import { SearchService } from 'src/app/services/rest/search.service'; +import { environment } from 'src/environments/environment'; @Component({ selector: 'app-search', @@ -26,7 +28,7 @@ export class SearchComponent implements OnInit { errorMessage: string - constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { } + constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router, private titleService: Title) { } ngOnInit(): void { this.route.queryParamMap.subscribe(paramMap => { @@ -34,6 +36,7 @@ export class SearchComponent implements OnInit { this.searching = true this.currentPage = 1 this.loadPage() + this.titleService.setTitle(`Search: ${this.query} - ${environment.appTitle}`) }) } diff --git a/src-ui/src/app/data/filter-rule-type.ts b/src-ui/src/app/data/filter-rule-type.ts index e1db34298..a35759f69 100644 --- a/src-ui/src/app/data/filter-rule-type.ts +++ b/src-ui/src/app/data/filter-rule-type.ts @@ -16,19 +16,22 @@ export const FILTER_ADDED_AFTER = 14 export const FILTER_MODIFIED_BEFORE = 15 export const FILTER_MODIFIED_AFTER = 16 +export const FILTER_DOES_NOT_HAVE_TAG = 17 + export const FILTER_RULE_TYPES: FilterRuleType[] = [ - {id: FILTER_TITLE, name: "Title contains", filtervar: "title__icontains", datatype: "string", multi: false}, - {id: FILTER_CONTENT, name: "Content contains", filtervar: "content__icontains", datatype: "string", multi: false}, + {id: FILTER_TITLE, name: "Title contains", filtervar: "title__icontains", datatype: "string", multi: false, default: ""}, + {id: FILTER_CONTENT, name: "Content contains", filtervar: "content__icontains", datatype: "string", multi: false, default: ""}, {id: FILTER_ASN, name: "ASN is", filtervar: "archive_serial_number", datatype: "number", multi: false}, {id: FILTER_CORRESPONDENT, name: "Correspondent is", filtervar: "correspondent__id", datatype: "correspondent", multi: false}, {id: FILTER_DOCUMENT_TYPE, name: "Document type is", filtervar: "document_type__id", datatype: "document_type", multi: false}, - {id: FILTER_IS_IN_INBOX, name: "Is in Inbox", filtervar: "is_in_inbox", datatype: "boolean", multi: false}, + {id: FILTER_IS_IN_INBOX, name: "Is in Inbox", filtervar: "is_in_inbox", datatype: "boolean", multi: false, default: true}, {id: FILTER_HAS_TAG, name: "Has tag", filtervar: "tags__id__all", datatype: "tag", multi: true}, - {id: FILTER_HAS_ANY_TAG, name: "Has any tag", filtervar: "is_tagged", datatype: "boolean", multi: false}, + {id: FILTER_DOES_NOT_HAVE_TAG, name: "Does not have tag", filtervar: "tags__id__none", datatype: "tag", multi: true}, + {id: FILTER_HAS_ANY_TAG, name: "Has any tag", filtervar: "is_tagged", datatype: "boolean", multi: false, default: true}, {id: FILTER_CREATED_BEFORE, name: "Created before", filtervar: "created__date__lt", datatype: "date", multi: false}, {id: FILTER_CREATED_AFTER, name: "Created after", filtervar: "created__date__gt", datatype: "date", multi: false}, @@ -50,4 +53,5 @@ export interface FilterRuleType { filtervar: string datatype: string //number, string, boolean, date multi: boolean + default?: any } \ No newline at end of file diff --git a/src-ui/src/app/data/paperless-document-metadata.ts b/src-ui/src/app/data/paperless-document-metadata.ts index 22b3f692a..12f0a78d8 100644 --- a/src-ui/src/app/data/paperless-document-metadata.ts +++ b/src-ui/src/app/data/paperless-document-metadata.ts @@ -1,11 +1,13 @@ export interface PaperlessDocumentMetadata { - paperless__checksum?: string + original_checksum?: string - paperless__mime_type?: string + archived_checksum?: string - paperless__filename?: string + original_mime_type?: string - paperless__has_archive_version?: boolean + media_filename?: string + + has_archive_version?: boolean } \ No newline at end of file diff --git a/src-ui/src/app/pipes/file-size.pipe.spec.ts b/src-ui/src/app/pipes/file-size.pipe.spec.ts new file mode 100644 index 000000000..8c7a39d22 --- /dev/null +++ b/src-ui/src/app/pipes/file-size.pipe.spec.ts @@ -0,0 +1,8 @@ +import { FileSizePipe } from './file-size.pipe'; + +describe('FileSizePipe', () => { + it('create an instance', () => { + const pipe = new FileSizePipe(); + expect(pipe).toBeTruthy(); + }); +}); diff --git a/src-ui/src/app/pipes/file-size.pipe.ts b/src-ui/src/app/pipes/file-size.pipe.ts new file mode 100644 index 000000000..7d742c876 --- /dev/null +++ b/src-ui/src/app/pipes/file-size.pipe.ts @@ -0,0 +1,77 @@ +/** + * https://gist.github.com/JonCatmull/ecdf9441aaa37336d9ae2c7f9cb7289a + * + * @license + * Copyright (c) 2019 Jonathan Catmull. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +import { Pipe, PipeTransform } from '@angular/core'; + +type unit = 'bytes' | 'KB' | 'MB' | 'GB' | 'TB' | 'PB'; +type unitPrecisionMap = { + [u in unit]: number; +}; + +const defaultPrecisionMap: unitPrecisionMap = { + bytes: 0, + KB: 0, + MB: 1, + GB: 1, + TB: 2, + PB: 2 +}; + +/* + * Convert bytes into largest possible unit. + * Takes an precision argument that can be a number or a map for each unit. + * Usage: + * bytes | fileSize:precision + * @example + * // returns 1 KB + * {{ 1500 | fileSize }} + * @example + * // returns 2.1 GB + * {{ 2100000000 | fileSize }} + * @example + * // returns 1.46 KB + * {{ 1500 | fileSize:2 }} + */ +@Pipe({ name: 'fileSize' }) +export class FileSizePipe implements PipeTransform { + private readonly units: unit[] = ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB']; + + transform(bytes: number = 0, precision: number | unitPrecisionMap = defaultPrecisionMap): string { + if (isNaN(parseFloat(String(bytes))) || !isFinite(bytes)) return '?'; + + let unitIndex = 0; + + while (bytes >= 1024) { + bytes /= 1024; + unitIndex++; + } + + const unit = this.units[unitIndex]; + + if (typeof precision === 'number') { + return `${bytes.toFixed(+precision)} ${unit}`; + } + return `${bytes.toFixed(precision[unit])} ${unit}`; + } +} diff --git a/src-ui/src/app/pipes/yes-no.pipe.spec.ts b/src-ui/src/app/pipes/yes-no.pipe.spec.ts new file mode 100644 index 000000000..80acd8acd --- /dev/null +++ b/src-ui/src/app/pipes/yes-no.pipe.spec.ts @@ -0,0 +1,8 @@ +import { YesNoPipe } from './yes-no.pipe'; + +describe('YesNoPipe', () => { + it('create an instance', () => { + const pipe = new YesNoPipe(); + expect(pipe).toBeTruthy(); + }); +}); diff --git a/src-ui/src/app/pipes/yes-no.pipe.ts b/src-ui/src/app/pipes/yes-no.pipe.ts new file mode 100644 index 000000000..9a4ed56ef --- /dev/null +++ b/src-ui/src/app/pipes/yes-no.pipe.ts @@ -0,0 +1,12 @@ +import { Pipe, PipeTransform } from '@angular/core'; + +@Pipe({ + name: 'yesno' +}) +export class YesNoPipe implements PipeTransform { + + transform(value: boolean): unknown { + return value ? "Yes" : "No" + } + +} diff --git a/src-ui/src/app/services/rest/document.service.ts b/src-ui/src/app/services/rest/document.service.ts index 5bf2308d4..81693ec68 100644 --- a/src-ui/src/app/services/rest/document.service.ts +++ b/src-ui/src/app/services/rest/document.service.ts @@ -94,7 +94,7 @@ export class DocumentService extends AbstractPaperlessService } uploadDocument(formData) { - return this.http.post(this.getResourceUrl(null, 'post_document'), formData) + return this.http.post(this.getResourceUrl(null, 'post_document'), formData, {reportProgress: true, observe: "events"}) } getMetadata(id: number): Observable { diff --git a/src-ui/src/assets/save-filter.png b/src-ui/src/assets/save-filter.png new file mode 100644 index 000000000..dcaa41714 Binary files /dev/null and b/src-ui/src/assets/save-filter.png differ diff --git a/src-ui/src/environments/environment.prod.ts b/src-ui/src/environments/environment.prod.ts index 2d8d5261b..09154dfca 100644 --- a/src-ui/src/environments/environment.prod.ts +++ b/src-ui/src/environments/environment.prod.ts @@ -1,4 +1,5 @@ export const environment = { production: true, - apiBaseUrl: "/api/" + apiBaseUrl: "/api/", + appTitle: "Paperless-ng" }; diff --git a/src-ui/src/environments/environment.ts b/src-ui/src/environments/environment.ts index a0877d69f..5e4b148dc 100644 --- a/src-ui/src/environments/environment.ts +++ b/src-ui/src/environments/environment.ts @@ -4,7 +4,8 @@ export const environment = { production: false, - apiBaseUrl: "http://localhost:8000/api/" + apiBaseUrl: "http://localhost:8000/api/", + appTitle: "DEVELOPMENT P-NG" }; /* diff --git a/src/documents/admin.py b/src/documents/admin.py index 8b9f2fce9..055a6fd93 100755 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -17,8 +17,6 @@ class CorrespondentAdmin(admin.ModelAdmin): list_filter = ("matching_algorithm",) list_editable = ("match", "matching_algorithm") - readonly_fields = ("slug",) - class TagAdmin(admin.ModelAdmin): @@ -31,8 +29,6 @@ class TagAdmin(admin.ModelAdmin): list_filter = ("colour", "matching_algorithm") list_editable = ("colour", "match", "matching_algorithm") - readonly_fields = ("slug", ) - class DocumentTypeAdmin(admin.ModelAdmin): @@ -44,13 +40,16 @@ class DocumentTypeAdmin(admin.ModelAdmin): list_filter = ("matching_algorithm",) list_editable = ("match", "matching_algorithm") - readonly_fields = ("slug",) - class DocumentAdmin(admin.ModelAdmin): search_fields = ("correspondent__name", "title", "content", "tags__name") - readonly_fields = ("added", "mime_type", "storage_type", "filename") + readonly_fields = ( + "added", + "modified", + "mime_type", + "storage_type", + "filename") list_display_links = ("title",) @@ -101,7 +100,7 @@ class DocumentAdmin(admin.ModelAdmin): for tag in obj.tags.all(): r += self._html_tag( "span", - tag.slug + ", " + tag.name + ", " ) return r diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 7bae5c2a9..e4da51f1d 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -8,13 +8,14 @@ from django.conf import settings from django.db import transaction from django.db.models import Q from django.utils import timezone +from filelock import FileLock from .classifier import DocumentClassifier, IncompatibleClassifierVersionError -from .file_handling import create_source_path_directory +from .file_handling import create_source_path_directory, \ + generate_unique_filename from .loggers import LoggingMixin from .models import Document, FileInfo, Correspondent, DocumentType, Tag -from .parsers import ParseError, get_parser_class_for_mime_type, \ - get_supported_file_extensions, parse_date +from .parsers import ParseError, get_parser_class_for_mime_type, parse_date from .signals import ( document_consumption_finished, document_consumption_started @@ -38,6 +39,10 @@ class Consumer(LoggingMixin): def pre_check_file_exists(self): if not os.path.isfile(self.path): + self.log( + "error", + "Cannot consume {}: It is not a file.".format(self.path) + ) raise ConsumerError("Cannot consume {}: It is not a file".format( self.path)) @@ -47,6 +52,10 @@ class Consumer(LoggingMixin): if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501 if settings.CONSUMER_DELETE_DUPLICATES: os.unlink(self.path) + self.log( + "error", + "Not consuming {}: It is a duplicate.".format(self.filename) + ) raise ConsumerError( "Not consuming {}: It is a duplicate.".format(self.filename) ) @@ -148,8 +157,9 @@ class Consumer(LoggingMixin): classifier = DocumentClassifier() classifier.reload() except (FileNotFoundError, IncompatibleClassifierVersionError) as e: - logging.getLogger(__name__).warning( - "Cannot classify documents: {}.".format(e)) + self.log( + "warning", + f"Cannot classify documents: {e}.") classifier = None # now that everything is done, we can start to store the document @@ -176,31 +186,28 @@ class Consumer(LoggingMixin): # After everything is in the database, copy the files into # place. If this fails, we'll also rollback the transaction. + with FileLock(settings.MEDIA_LOCK): + document.filename = generate_unique_filename( + document, settings.ORIGINALS_DIR) + create_source_path_directory(document.source_path) - # TODO: not required, since this is done by the file handling - # logic - create_source_path_directory(document.source_path) - - self._write(document.storage_type, - self.path, document.source_path) - - self._write(document.storage_type, - thumbnail, document.thumbnail_path) - - if archive_path and os.path.isfile(archive_path): self._write(document.storage_type, - archive_path, document.archive_path) + self.path, document.source_path) - with open(archive_path, 'rb') as f: - document.archive_checksum = hashlib.md5( - f.read()).hexdigest() - document.save() + self._write(document.storage_type, + thumbnail, document.thumbnail_path) - # Afte performing all database operations and moving files - # into place, tell paperless where the file is. - document.filename = os.path.basename(document.source_path) - # Saving the document now will trigger the filename handling - # logic. + if archive_path and os.path.isfile(archive_path): + create_source_path_directory(document.archive_path) + self._write(document.storage_type, + archive_path, document.archive_path) + + with open(archive_path, 'rb') as f: + document.archive_checksum = hashlib.md5( + f.read()).hexdigest() + + # Don't save with the lock active. Saving will cause the file + # renaming logic to aquire the lock as well. document.save() # Delete the file only if it was successfully consumed @@ -241,7 +248,7 @@ class Consumer(LoggingMixin): with open(self.path, "rb") as f: document = Document.objects.create( correspondent=file_info.correspondent, - title=file_info.title, + title=(self.override_title or file_info.title)[:127], content=text, mime_type=mime_type, checksum=hashlib.md5(f.read()).hexdigest(), @@ -252,18 +259,17 @@ class Consumer(LoggingMixin): relevant_tags = set(file_info.tags) if relevant_tags: - tag_names = ", ".join([t.slug for t in relevant_tags]) + tag_names = ", ".join([t.name for t in relevant_tags]) self.log("debug", "Tagging with {}".format(tag_names)) document.tags.add(*relevant_tags) self.apply_overrides(document) + document.save() + return document def apply_overrides(self, document): - if self.override_title: - document.title = self.override_title - if self.override_correspondent_id: document.correspondent = Correspondent.objects.get( pk=self.override_correspondent_id) diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py index 85ee37d4d..c5efc33e4 100644 --- a/src/documents/file_handling.py +++ b/src/documents/file_handling.py @@ -1,7 +1,9 @@ +import datetime import logging import os from collections import defaultdict +import pathvalidate from django.conf import settings from django.template.defaultfilters import slugify @@ -68,21 +70,53 @@ def many_to_dictionary(field): return mydictionary -def generate_filename(doc): +def generate_unique_filename(doc, root): + counter = 0 + + while True: + new_filename = generate_filename(doc, counter) + if new_filename == doc.filename: + # still the same as before. + return new_filename + + if os.path.exists(os.path.join(root, new_filename)): + counter += 1 + else: + return new_filename + + +def generate_filename(doc, counter=0): path = "" try: if settings.PAPERLESS_FILENAME_FORMAT is not None: tags = defaultdict(lambda: slugify(None), many_to_dictionary(doc.tags)) + + if doc.correspondent: + correspondent = pathvalidate.sanitize_filename( + doc.correspondent.name, replacement_text="-" + ) + else: + correspondent = "none" + + if doc.document_type: + document_type = pathvalidate.sanitize_filename( + doc.document_type.name, replacement_text="-" + ) + else: + document_type = "none" + path = settings.PAPERLESS_FILENAME_FORMAT.format( - correspondent=slugify(doc.correspondent), - title=slugify(doc.title), - created=slugify(doc.created), + title=pathvalidate.sanitize_filename( + doc.title, replacement_text="-"), + correspondent=correspondent, + document_type=document_type, + created=datetime.date.isoformat(doc.created), created_year=doc.created.year if doc.created else "none", created_month=doc.created.month if doc.created else "none", created_day=doc.created.day if doc.created else "none", - added=slugify(doc.added), + added=datetime.date.isoformat(doc.added), added_year=doc.added.year if doc.added else "none", added_month=doc.added.month if doc.added else "none", added_day=doc.added.day if doc.added else "none", @@ -93,11 +127,11 @@ def generate_filename(doc): f"Invalid PAPERLESS_FILENAME_FORMAT: " f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default") - # Always append the primary key to guarantee uniqueness of filename + counter_str = f"_{counter:02}" if counter else "" if len(path) > 0: - filename = "%s-%07i%s" % (path, doc.pk, doc.file_type) + filename = f"{path}{counter_str}{doc.file_type}" else: - filename = "%07i%s" % (doc.pk, doc.file_type) + filename = f"{doc.pk:07}{counter_str}{doc.file_type}" # Append .gpg for encrypted files if doc.storage_type == doc.STORAGE_TYPE_GPG: diff --git a/src/documents/filters.py b/src/documents/filters.py index 770e0e5af..b3c92eba3 100755 --- a/src/documents/filters.py +++ b/src/documents/filters.py @@ -37,6 +37,10 @@ class DocumentTypeFilterSet(FilterSet): class TagsFilter(Filter): + def __init__(self, exclude=False): + super(TagsFilter, self).__init__() + self.exclude = exclude + def filter(self, qs, value): if not value: return qs @@ -47,7 +51,10 @@ class TagsFilter(Filter): return qs for tag_id in tag_ids: - qs = qs.filter(tags__id=tag_id) + if self.exclude: + qs = qs.exclude(tags__id=tag_id) + else: + qs = qs.filter(tags__id=tag_id) return qs @@ -74,6 +81,8 @@ class DocumentFilterSet(FilterSet): tags__id__all = TagsFilter() + tags__id__none = TagsFilter(exclude=True) + is_in_inbox = InboxFilter() class Meta: diff --git a/src/documents/management/commands/decrypt_documents.py b/src/documents/management/commands/decrypt_documents.py index 2287bfa72..918f1a175 100644 --- a/src/documents/management/commands/decrypt_documents.py +++ b/src/documents/management/commands/decrypt_documents.py @@ -82,7 +82,8 @@ class Command(BaseCommand): with open(document.thumbnail_path, "wb") as f: f.write(raw_thumb) - document.save(update_fields=("storage_type", "filename")) + Document.objects.filter(id=document.id).update( + storage_type=document.storage_type, filename=document.filename) for path in old_paths: os.unlink(path) diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index 5cecd6bf9..8ac60aa6d 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -29,10 +29,9 @@ def _tags_from_path(filepath): path_parts = Path(filepath).relative_to( settings.CONSUMPTION_DIR).parent.parts for part in path_parts: - tag_ids.add(Tag.objects.get_or_create( - slug=slugify(part), - defaults={"name": part}, - )[0].pk) + tag_ids.add(Tag.objects.get_or_create(name__iexact=part, defaults={ + "name": part + })[0].pk) return tag_ids diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index f1ee74038..a7a17f124 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -38,6 +38,9 @@ class Command(Renderable, BaseCommand): if not os.access(self.target, os.W_OK): raise CommandError("That path doesn't appear to be writable") + if os.listdir(self.target): + raise CommandError("That directory is not empty.") + self.dump() def dump(self): @@ -54,31 +57,39 @@ class Command(Renderable, BaseCommand): document = document_map[document_dict["pk"]] - unique_filename = f"{document.pk:07}_{document.file_name}" - file_target = os.path.join(self.target, unique_filename) + print(f"Exporting: {document}") - thumbnail_name = unique_filename + "-thumbnail.png" + filename_counter = 0 + while True: + original_name = document.get_public_filename( + counter=filename_counter) + original_target = os.path.join(self.target, original_name) + + if not os.path.exists(original_target): + break + else: + filename_counter += 1 + + thumbnail_name = original_name + "-thumbnail.png" thumbnail_target = os.path.join(self.target, thumbnail_name) - document_dict[EXPORTER_FILE_NAME] = unique_filename + document_dict[EXPORTER_FILE_NAME] = original_name document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name if os.path.exists(document.archive_path): - archive_name = \ - f"{document.pk:07}_archive_{document.archive_file_name}" + archive_name = document.get_public_filename( + archive=True, counter=filename_counter, suffix="_archive") archive_target = os.path.join(self.target, archive_name) document_dict[EXPORTER_ARCHIVE_NAME] = archive_name else: archive_target = None - print(f"Exporting: {file_target}") - t = int(time.mktime(document.created.timetuple())) if document.storage_type == Document.STORAGE_TYPE_GPG: - with open(file_target, "wb") as f: + with open(original_target, "wb") as f: f.write(GnuPG.decrypted(document.source_file)) - os.utime(file_target, times=(t, t)) + os.utime(original_target, times=(t, t)) with open(thumbnail_target, "wb") as f: f.write(GnuPG.decrypted(document.thumbnail_file)) @@ -90,7 +101,7 @@ class Command(Renderable, BaseCommand): os.utime(archive_target, times=(t, t)) else: - shutil.copy(document.source_path, file_target) + shutil.copy(document.source_path, original_target) shutil.copy(document.thumbnail_path, thumbnail_target) if archive_target: diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index ca8c8bf06..70d05d98b 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -5,11 +5,13 @@ import shutil from django.conf import settings from django.core.management import call_command from django.core.management.base import BaseCommand, CommandError +from filelock import FileLock from documents.models import Document from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \ EXPORTER_ARCHIVE_NAME -from ...file_handling import generate_filename, create_source_path_directory +from ...file_handling import create_source_path_directory, \ + generate_unique_filename from ...mixins import Renderable @@ -114,17 +116,20 @@ class Command(Renderable, BaseCommand): document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED - document.filename = generate_filename(document) + with FileLock(settings.MEDIA_LOCK): + document.filename = generate_unique_filename( + document, settings.ORIGINALS_DIR) - if os.path.isfile(document.source_path): - raise FileExistsError(document.source_path) + if os.path.isfile(document.source_path): + raise FileExistsError(document.source_path) - create_source_path_directory(document.source_path) + create_source_path_directory(document.source_path) - print(f"Moving {document_path} to {document.source_path}") - shutil.copy(document_path, document.source_path) - shutil.copy(thumbnail_path, document.thumbnail_path) - if archive_path: - shutil.copy(archive_path, document.archive_path) + print(f"Moving {document_path} to {document.source_path}") + shutil.copy(document_path, document.source_path) + shutil.copy(thumbnail_path, document.thumbnail_path) + if archive_path: + create_source_path_directory(document.archive_path) + shutil.copy(archive_path, document.archive_path) document.save() diff --git a/src/documents/management/commands/document_renamer.py b/src/documents/management/commands/document_renamer.py index ba9e74de5..5d7d0d90c 100644 --- a/src/documents/management/commands/document_renamer.py +++ b/src/documents/management/commands/document_renamer.py @@ -1,3 +1,6 @@ +import logging + +import tqdm from django.core.management.base import BaseCommand from documents.models import Document @@ -18,6 +21,8 @@ class Command(Renderable, BaseCommand): self.verbosity = options["verbosity"] - for document in Document.objects.all(): + logging.getLogger().handlers[0].level = logging.ERROR + + for document in tqdm.tqdm(Document.objects.all()): # Saving the document again will generate a new filename and rename document.save() diff --git a/src/documents/migrations/1006_auto_20201208_2209.py b/src/documents/migrations/1006_auto_20201208_2209.py new file mode 100644 index 000000000..49f8c8dfe --- /dev/null +++ b/src/documents/migrations/1006_auto_20201208_2209.py @@ -0,0 +1,25 @@ +# Generated by Django 3.1.4 on 2020-12-08 22:09 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '1005_checksums'), + ] + + operations = [ + migrations.RemoveField( + model_name='correspondent', + name='slug', + ), + migrations.RemoveField( + model_name='documenttype', + name='slug', + ), + migrations.RemoveField( + model_name='tag', + name='slug', + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index a4f887d77..f0678a843 100755 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -1,10 +1,12 @@ # coding=utf-8 - +import datetime import logging import os import re from collections import OrderedDict +import pathvalidate + import dateutil.parser from django.conf import settings from django.db import models @@ -34,7 +36,6 @@ class MatchingModel(models.Model): ) name = models.CharField(max_length=128, unique=True) - slug = models.SlugField(blank=True, editable=False) match = models.CharField(max_length=256, blank=True) matching_algorithm = models.PositiveIntegerField( @@ -67,7 +68,6 @@ class MatchingModel(models.Model): def save(self, *args, **kwargs): self.match = self.match.lower() - self.slug = slugify(self.name) models.Model.save(self, *args, **kwargs) @@ -172,6 +172,7 @@ class Document(models.Model): created = models.DateTimeField( default=timezone.now, db_index=True) + modified = models.DateTimeField( auto_now=True, editable=False, db_index=True) @@ -206,13 +207,11 @@ class Document(models.Model): ordering = ("correspondent", "title") def __str__(self): - created = self.created.strftime("%Y%m%d") + created = datetime.date.isoformat(self.created) if self.correspondent and self.title: - return "{}: {} - {}".format( - created, self.correspondent, self.title) - if self.correspondent or self.title: - return "{}: {}".format(created, self.correspondent or self.title) - return str(created) + return f"{created} {self.correspondent} {self.title}" + else: + return f"{created} {self.title}" @property def source_path(self): @@ -248,13 +247,21 @@ class Document(models.Model): def archive_file(self): return open(self.archive_path, "rb") - @property - def file_name(self): - return slugify(str(self)) + self.file_type + def get_public_filename(self, archive=False, counter=0, suffix=None): + result = str(self) - @property - def archive_file_name(self): - return slugify(str(self)) + ".pdf" + if counter: + result += f"_{counter:02}" + + if suffix: + result += suffix + + if archive: + result += ".pdf" + else: + result += self.file_type + + return pathvalidate.sanitize_filename(result, replacement_text="-") @property def file_type(self): @@ -375,9 +382,7 @@ class FileInfo: def _get_correspondent(cls, name): if not name: return None - return Correspondent.objects.get_or_create(name=name, defaults={ - "slug": slugify(name) - })[0] + return Correspondent.objects.get_or_create(name=name)[0] @classmethod def _get_title(cls, title): @@ -387,10 +392,7 @@ class FileInfo: def _get_tags(cls, tags): r = [] for t in tags.split(","): - r.append(Tag.objects.get_or_create( - slug=slugify(t), - defaults={"name": t} - )[0]) + r.append(Tag.objects.get_or_create(name=t)[0]) return tuple(r) @classmethod diff --git a/src/documents/sanity_checker.py b/src/documents/sanity_checker.py index e3c4b1aec..bc0b689d4 100644 --- a/src/documents/sanity_checker.py +++ b/src/documents/sanity_checker.py @@ -46,6 +46,10 @@ def check_sanity(): for f in files: present_files.append(os.path.normpath(os.path.join(root, f))) + lockfile = os.path.normpath(settings.MEDIA_LOCK) + if lockfile in present_files: + present_files.remove(lockfile) + for doc in Document.objects.all(): # Check sanity of the thumbnail if not os.path.isfile(doc.thumbnail_path): diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index c988b2137..600645061 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -1,17 +1,23 @@ import magic +from django.utils.text import slugify from pathvalidate import validate_filename, ValidationError from rest_framework import serializers +from rest_framework.fields import SerializerMethodField from .models import Correspondent, Tag, Document, Log, DocumentType from .parsers import is_mime_type_supported -class CorrespondentSerializer(serializers.HyperlinkedModelSerializer): +class CorrespondentSerializer(serializers.ModelSerializer): document_count = serializers.IntegerField(read_only=True) last_correspondence = serializers.DateTimeField(read_only=True) + def get_slug(self, obj): + return slugify(obj.name) + slug = SerializerMethodField() + class Meta: model = Correspondent fields = ( @@ -26,10 +32,14 @@ class CorrespondentSerializer(serializers.HyperlinkedModelSerializer): ) -class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer): +class DocumentTypeSerializer(serializers.ModelSerializer): document_count = serializers.IntegerField(read_only=True) + def get_slug(self, obj): + return slugify(obj.name) + slug = SerializerMethodField() + class Meta: model = DocumentType fields = ( @@ -43,10 +53,14 @@ class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer): ) -class TagSerializer(serializers.HyperlinkedModelSerializer): +class TagSerializer(serializers.ModelSerializer): document_count = serializers.IntegerField(read_only=True) + def get_slug(self, obj): + return slugify(obj.name) + slug = SerializerMethodField() + class Meta: model = Tag fields = ( @@ -83,6 +97,18 @@ class DocumentSerializer(serializers.ModelSerializer): tags = TagsField(many=True) document_type = DocumentTypeField(allow_null=True) + original_file_name = SerializerMethodField() + archived_file_name = SerializerMethodField() + + def get_original_file_name(self, obj): + return obj.get_public_filename() + + def get_archived_file_name(self, obj): + if obj.archive_checksum: + return obj.get_public_filename(archive=True) + else: + return None + class Meta: model = Document depth = 1 @@ -96,7 +122,9 @@ class DocumentSerializer(serializers.ModelSerializer): "created", "modified", "added", - "archive_serial_number" + "archive_serial_number", + "original_file_name", + "archived_file_name", ) @@ -150,8 +178,7 @@ class PostDocumentSerializer(serializers.Serializer): required=False, ) - def validate(self, attrs): - document = attrs.get('document') + def validate_document(self, document): try: validate_filename(document.name) @@ -163,32 +190,31 @@ class PostDocumentSerializer(serializers.Serializer): if not is_mime_type_supported(mime_type): raise serializers.ValidationError( - "This mime type is not supported.") + "This file type is not supported.") - attrs['document_data'] = document_data + return document.name, document_data - title = attrs.get('title') + def validate_title(self, title): + if title: + return title + else: + # do not return empty strings. + return None - if not title: - attrs['title'] = None - - correspondent = attrs.get('correspondent') + def validate_correspondent(self, correspondent): if correspondent: - attrs['correspondent_id'] = correspondent.id + return correspondent.id else: - attrs['correspondent_id'] = None + return None - document_type = attrs.get('document_type') + def validate_document_type(self, document_type): if document_type: - attrs['document_type_id'] = document_type.id + return document_type.id else: - attrs['document_type_id'] = None + return None - tags = attrs.get('tags') + def validate_tags(self, tags): if tags: - tag_ids = [tag.id for tag in tags] - attrs['tag_ids'] = tag_ids + return [tag.id for tag in tags] else: - attrs['tag_ids'] = None - - return attrs + return None diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 4d9dc9ccd..4fbbe8f8a 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -9,11 +9,13 @@ from django.contrib.contenttypes.models import ContentType from django.db import models, DatabaseError from django.dispatch import receiver from django.utils import timezone +from filelock import FileLock from rest_framework.reverse import reverse from .. import index, matching -from ..file_handling import delete_empty_directories, generate_filename, \ - create_source_path_directory, archive_name_from_filename +from ..file_handling import delete_empty_directories, \ + create_source_path_directory, archive_name_from_filename, \ + generate_unique_filename from ..models import Document, Tag @@ -134,7 +136,7 @@ def set_tags(sender, message = 'Tagging "{}" with "{}"' logger( - message.format(document, ", ".join([t.slug for t in relevant_tags])), + message.format(document, ", ".join([t.name for t in relevant_tags])), logging_group ) @@ -157,41 +159,42 @@ def run_post_consume_script(sender, document, **kwargs): Popen(( settings.POST_CONSUME_SCRIPT, str(document.pk), - document.file_name, + document.get_public_filename(), os.path.normpath(document.source_path), os.path.normpath(document.thumbnail_path), reverse("document-download", kwargs={"pk": document.pk}), reverse("document-thumb", kwargs={"pk": document.pk}), str(document.correspondent), - str(",".join(document.tags.all().values_list("slug", flat=True))) + str(",".join(document.tags.all().values_list("name", flat=True))) )).wait() @receiver(models.signals.post_delete, sender=Document) def cleanup_document_deletion(sender, instance, using, **kwargs): - for f in (instance.source_path, - instance.archive_path, - instance.thumbnail_path): - if os.path.isfile(f): - try: - os.unlink(f) - logging.getLogger(__name__).debug( - f"Deleted file {f}.") - except OSError as e: - logging.getLogger(__name__).warning( - f"While deleting document {instance.file_name}, the file " - f"{f} could not be deleted: {e}" - ) + with FileLock(settings.MEDIA_LOCK): + for f in (instance.source_path, + instance.archive_path, + instance.thumbnail_path): + if os.path.isfile(f): + try: + os.unlink(f) + logging.getLogger(__name__).debug( + f"Deleted file {f}.") + except OSError as e: + logging.getLogger(__name__).warning( + f"While deleting document {str(instance)}, the file " + f"{f} could not be deleted: {e}" + ) - delete_empty_directories( - os.path.dirname(instance.source_path), - root=settings.ORIGINALS_DIR - ) + delete_empty_directories( + os.path.dirname(instance.source_path), + root=settings.ORIGINALS_DIR + ) - delete_empty_directories( - os.path.dirname(instance.archive_path), - root=settings.ARCHIVE_DIR - ) + delete_empty_directories( + os.path.dirname(instance.archive_path), + root=settings.ARCHIVE_DIR + ) def validate_move(instance, old_path, new_path): @@ -226,81 +229,94 @@ def update_filename_and_move_files(sender, instance, **kwargs): # This will in turn cause this logic to move the file where it belongs. return - old_filename = instance.filename - new_filename = generate_filename(instance) + with FileLock(settings.MEDIA_LOCK): + old_filename = instance.filename + new_filename = generate_unique_filename( + instance, settings.ORIGINALS_DIR) - if new_filename == instance.filename: - # Don't do anything if its the same. - return - - old_source_path = instance.source_path - new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename) - - if not validate_move(instance, old_source_path, new_source_path): - return - - # archive files are optional, archive checksum tells us if we have one, - # since this is None for documents without archived files. - if instance.archive_checksum: - new_archive_filename = archive_name_from_filename(new_filename) - old_archive_path = instance.archive_path - new_archive_path = os.path.join(settings.ARCHIVE_DIR, - new_archive_filename) - - if not validate_move(instance, old_archive_path, new_archive_path): + if new_filename == instance.filename: + # Don't do anything if its the same. return - create_source_path_directory(new_archive_path) - else: - old_archive_path = None - new_archive_path = None + old_source_path = instance.source_path + new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename) - create_source_path_directory(new_source_path) + if not validate_move(instance, old_source_path, new_source_path): + return - try: - os.rename(old_source_path, new_source_path) + # archive files are optional, archive checksum tells us if we have one, + # since this is None for documents without archived files. if instance.archive_checksum: - os.rename(old_archive_path, new_archive_path) - instance.filename = new_filename - # Don't save here to prevent infinite recursion. - Document.objects.filter(pk=instance.pk).update(filename=new_filename) + new_archive_filename = archive_name_from_filename(new_filename) + old_archive_path = instance.archive_path + new_archive_path = os.path.join(settings.ARCHIVE_DIR, + new_archive_filename) - logging.getLogger(__name__).debug( - f"Moved file {old_source_path} to {new_source_path}.") + if not validate_move(instance, old_archive_path, new_archive_path): + return - if instance.archive_checksum: - logging.getLogger(__name__).debug( - f"Moved file {old_archive_path} to {new_archive_path}.") + create_source_path_directory(new_archive_path) + else: + old_archive_path = None + new_archive_path = None + + create_source_path_directory(new_source_path) - except OSError as e: - instance.filename = old_filename - # this happens when we can't move a file. If that's the case for the - # archive file, we try our best to revert the changes. try: + os.rename(old_source_path, new_source_path) + if instance.archive_checksum: + os.rename(old_archive_path, new_archive_path) + instance.filename = new_filename + + # Don't save() here to prevent infinite recursion. + Document.objects.filter(pk=instance.pk).update( + filename=new_filename) + + logging.getLogger(__name__).debug( + f"Moved file {old_source_path} to {new_source_path}.") + + if instance.archive_checksum: + logging.getLogger(__name__).debug( + f"Moved file {old_archive_path} to {new_archive_path}.") + + except OSError as e: + instance.filename = old_filename + # this happens when we can't move a file. If that's the case for + # the archive file, we try our best to revert the changes. + # no need to save the instance, the update() has not happened yet. + try: + os.rename(new_source_path, old_source_path) + os.rename(new_archive_path, old_archive_path) + except Exception as e: + # This is fine, since: + # A: if we managed to move source from A to B, we will also + # manage to move it from B to A. If not, we have a serious + # issue that's going to get caught by the santiy checker. + # All files remain in place and will never be overwritten, + # so this is not the end of the world. + # B: if moving the orignal file failed, nothing has changed + # anyway. + pass + except DatabaseError as e: + # this happens after moving files, so move them back into place. + # since moving them once succeeded, it's very likely going to + # succeed again. os.rename(new_source_path, old_source_path) - os.rename(new_archive_path, old_archive_path) - except Exception as e: - # This is fine, since: - # A: if we managed to move source from A to B, we will also manage - # to move it from B to A. If not, we have a serious issue - # that's going to get caught by the santiy checker. - # all files remain in place and will never be overwritten, - # so this is not the end of the world. - # B: if moving the orignal file failed, nothing has changed anyway. - pass - except DatabaseError as e: - os.rename(new_source_path, old_source_path) - if instance.archive_checksum: - os.rename(new_archive_path, old_archive_path) - instance.filename = old_filename + if instance.archive_checksum: + os.rename(new_archive_path, old_archive_path) + instance.filename = old_filename + # again, no need to save the instance, since the actual update() + # operation failed. - if not os.path.isfile(old_source_path): - delete_empty_directories(os.path.dirname(old_source_path), - root=settings.ORIGINALS_DIR) + # finally, remove any empty sub folders. This will do nothing if + # something has failed above. + if not os.path.isfile(old_source_path): + delete_empty_directories(os.path.dirname(old_source_path), + root=settings.ORIGINALS_DIR) - if old_archive_path and not os.path.isfile(old_archive_path): - delete_empty_directories(os.path.dirname(old_archive_path), - root=settings.ARCHIVE_DIR) + if old_archive_path and not os.path.isfile(old_archive_path): + delete_empty_directories(os.path.dirname(old_archive_path), + root=settings.ARCHIVE_DIR) def set_log_entry(sender, document=None, logging_group=None, **kwargs): diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 65d767efc..8c9b00dd6 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -1,5 +1,6 @@ import logging +import tqdm from django.conf import settings from whoosh.writing import AsyncWriter @@ -23,7 +24,7 @@ def index_reindex(): ix = index.open_index(recreate=True) with AsyncWriter(ix) as writer: - for document in documents: + for document in tqdm.tqdm(documents): index.update_document(writer, document) diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index b900ee653..572667406 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -1,4 +1,5 @@ import os +import shutil import tempfile from unittest import mock @@ -195,6 +196,24 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): results = response.data['results'] self.assertEqual(len(results), 3) + response = self.client.get("/api/documents/?tags__id__none={}".format(tag_3.id)) + self.assertEqual(response.status_code, 200) + results = response.data['results'] + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['id'], doc1.id) + self.assertEqual(results[1]['id'], doc2.id) + + response = self.client.get("/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id)) + self.assertEqual(response.status_code, 200) + results = response.data['results'] + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['id'], doc1.id) + + response = self.client.get("/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id)) + self.assertEqual(response.status_code, 200) + results = response.data['results'] + self.assertEqual(len(results), 0) + def test_search_no_query(self): response = self.client.get("/api/search/") results = response.data['results'] @@ -475,3 +494,34 @@ class TestDocumentApi(DirectoriesMixin, APITestCase): self.assertEqual(response.status_code, 400) async_task.assert_not_called() + + def test_get_metadata(self): + doc = Document.objects.create(title="test", filename="file.pdf", mime_type="image/png", archive_checksum="A") + + shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), doc.source_path) + shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), doc.archive_path) + + response = self.client.get(f"/api/documents/{doc.pk}/metadata/") + self.assertEqual(response.status_code, 200) + + meta = response.data + + self.assertEqual(meta['original_mime_type'], "image/png") + self.assertTrue(meta['has_archive_version']) + self.assertEqual(len(meta['original_metadata']), 0) + self.assertGreater(len(meta['archive_metadata']), 0) + + def test_get_metadata_no_archive(self): + doc = Document.objects.create(title="test", filename="file.pdf", mime_type="application/pdf") + + shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), doc.source_path) + + response = self.client.get(f"/api/documents/{doc.pk}/metadata/") + self.assertEqual(response.status_code, 200) + + meta = response.data + + self.assertEqual(meta['original_mime_type'], "application/pdf") + self.assertFalse(meta['has_archive_version']) + self.assertGreater(len(meta['original_metadata']), 0) + self.assertIsNone(meta['archive_metadata']) diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 992d450db..b4b19be4c 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -27,7 +27,7 @@ class TestAttributes(TestCase): self.assertEqual(file_info.title, title, filename) - self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, filename) + self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename) def test_guess_attributes_from_name0(self): self._test_guess_attributes_from_name( @@ -188,7 +188,7 @@ class TestFieldPermutations(TestCase): self.assertEqual(info.tags, (), filename) else: self.assertEqual( - [t.slug for t in info.tags], tags.split(','), + [t.name for t in info.tags], tags.split(','), filename ) @@ -342,8 +342,8 @@ class TestFieldPermutations(TestCase): info = FileInfo.from_filename(filename) self.assertEqual(info.title, "0001") self.assertEqual(len(info.tags), 2) - self.assertEqual(info.tags[0].slug, "tag1") - self.assertEqual(info.tags[1].slug, "tag2") + self.assertEqual(info.tags[0].name, "tag1") + self.assertEqual(info.tags[1].name, "tag2") self.assertIsNone(info.created) # Complex transformation with date in replacement string @@ -356,8 +356,8 @@ class TestFieldPermutations(TestCase): info = FileInfo.from_filename(filename) self.assertEqual(info.title, "0001") self.assertEqual(len(info.tags), 2) - self.assertEqual(info.tags[0].slug, "tag1") - self.assertEqual(info.tags[1].slug, "tag2") + self.assertEqual(info.tags[0].name, "tag1") + self.assertEqual(info.tags[1].name, "tag2") self.assertEqual(info.created.year, 2019) self.assertEqual(info.created.month, 9) self.assertEqual(info.created.day, 8) @@ -598,10 +598,10 @@ class TestConsumer(DirectoriesMixin, TestCase): self.assertEqual(document.title, "new docs") self.assertEqual(document.correspondent.name, "Bank") - self.assertEqual(document.filename, "bank/new-docs-0000001.pdf") + self.assertEqual(document.filename, "Bank/new docs.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") - @mock.patch("documents.signals.handlers.generate_filename") + @mock.patch("documents.signals.handlers.generate_unique_filename") def testFilenameHandlingUnstableFormat(self, m): filenames = ["this", "that", "now this", "i cant decide"] @@ -611,7 +611,7 @@ class TestConsumer(DirectoriesMixin, TestCase): filenames.insert(0, f) return f - m.side_effect = lambda f: get_filename() + m.side_effect = lambda f, root: get_filename() filename = self.get_test_file() diff --git a/src/documents/tests/test_document_model.py b/src/documents/tests/test_document_model.py index 8764c7ec8..74bd9a2a7 100644 --- a/src/documents/tests/test_document_model.py +++ b/src/documents/tests/test_document_model.py @@ -48,19 +48,19 @@ class TestDocument(TestCase): def test_file_name(self): doc = Document(mime_type="application/pdf", title="test", created=datetime(2020, 12, 25)) - self.assertEqual(doc.file_name, "20201225-test.pdf") + self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf") def test_file_name_jpg(self): doc = Document(mime_type="image/jpeg", title="test", created=datetime(2020, 12, 25)) - self.assertEqual(doc.file_name, "20201225-test.jpg") + self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg") def test_file_name_unknown(self): doc = Document(mime_type="application/zip", title="test", created=datetime(2020, 12, 25)) - self.assertEqual(doc.file_name, "20201225-test.zip") + self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip") - def test_file_name_invalid(self): + def test_file_name_invalid_type(self): doc = Document(mime_type="image/jpegasd", title="test", created=datetime(2020, 12, 25)) - self.assertEqual(doc.file_name, "20201225-test") + self.assertEqual(doc.get_public_filename(), "2020-12-25 test") diff --git a/src/documents/tests/test_file_handling.py b/src/documents/tests/test_file_handling.py index cc4bf8053..719b0078a 100644 --- a/src/documents/tests/test_file_handling.py +++ b/src/documents/tests/test_file_handling.py @@ -1,5 +1,8 @@ +import datetime +import hashlib import os -import shutil +import random +import uuid from pathlib import Path from unittest import mock @@ -8,7 +11,8 @@ from django.db import DatabaseError from django.test import TestCase, override_settings from .utils import DirectoriesMixin -from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories +from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories, \ + generate_unique_filename from ..models import Document, Correspondent @@ -40,13 +44,13 @@ class TestFileHandling(DirectoriesMixin, TestCase): document.filename = generate_filename(document) # Ensure that filename is properly generated - self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) + self.assertEqual(document.filename, "none/none.pdf") # Enable encryption and check again document.storage_type = Document.STORAGE_TYPE_GPG document.filename = generate_filename(document) self.assertEqual(document.filename, - "none/none-{:07d}.pdf.gpg".format(document.pk)) + "none/none.pdf.gpg") document.save() @@ -62,7 +66,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Check proper handling of files self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) - self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") def test_file_renaming_missing_permissions(self): @@ -74,12 +78,12 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated document.filename = generate_filename(document) self.assertEqual(document.filename, - "none/none-{:07d}.pdf".format(document.pk)) + "none/none.pdf") create_source_path_directory(document.source_path) Path(document.source_path).touch() # Test source_path - self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)) + self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf") # Make the folder read- and execute-only (no writing and no renaming) os.chmod(settings.ORIGINALS_DIR + "/none", 0o555) @@ -89,8 +93,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): document.save() # Check proper handling of files - self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True) - self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True) + self.assertEqual(document.filename, "none/none.pdf") os.chmod(settings.ORIGINALS_DIR + "/none", 0o777) @@ -108,7 +112,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated document.filename = generate_filename(document) self.assertEqual(document.filename, - "none/none-{:07d}.pdf".format(document.pk)) + "none/none.pdf") create_source_path_directory(document.source_path) Path(document.source_path).touch() @@ -125,8 +129,8 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Check proper handling of files self.assertTrue(os.path.isfile(document.source_path)) - self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True) - self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk)) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True) + self.assertEqual(document.filename, "none/none.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") def test_document_delete(self): @@ -138,7 +142,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated document.filename = generate_filename(document) self.assertEqual(document.filename, - "none/none-{:07d}.pdf".format(document.pk)) + "none/none.pdf") create_source_path_directory(document.source_path) Path(document.source_path).touch() @@ -146,7 +150,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure file deletion after delete pk = document.pk document.delete() - self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}") @@ -168,7 +172,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated document.filename = generate_filename(document) self.assertEqual(document.filename, - "none/none-{:07d}.pdf".format(document.pk)) + "none/none.pdf") create_source_path_directory(document.source_path) @@ -199,7 +203,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated self.assertEqual(generate_filename(document), - "demo-{:07d}.pdf".format(document.pk)) + "demo.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_with_dash(self): @@ -215,7 +219,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated self.assertEqual(generate_filename(document), - "demo-{:07d}.pdf".format(document.pk)) + "demo.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}") def test_tags_malformed(self): @@ -231,7 +235,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated self.assertEqual(generate_filename(document), - "none-{:07d}.pdf".format(document.pk)) + "none.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}") def test_tags_all(self): @@ -246,7 +250,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated self.assertEqual(generate_filename(document), - "demo-{:07d}.pdf".format(document.pk)) + "demo.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}") def test_tags_out_of_bounds(self): @@ -261,7 +265,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated self.assertEqual(generate_filename(document), - "none-{:07d}.pdf".format(document.pk)) + "none.pdf") @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}") def test_nested_directory_cleanup(self): @@ -272,7 +276,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): # Ensure that filename is properly generated document.filename = generate_filename(document) - self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk)) + self.assertEqual(document.filename, "none/none/none.pdf") create_source_path_directory(document.source_path) Path(document.source_path).touch() @@ -282,7 +286,7 @@ class TestFileHandling(DirectoriesMixin, TestCase): pk = document.pk document.delete() - self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False) + self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False) self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True) @@ -330,6 +334,48 @@ class TestFileHandling(DirectoriesMixin, TestCase): self.assertEqual(generate_filename(document), "0000001.pdf") + @override_settings(PAPERLESS_FILENAME_FORMAT="{title}") + def test_duplicates(self): + document = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="A", pk=1) + document2 = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="B", pk=2) + Path(document.source_path).touch() + Path(document2.source_path).touch() + document.filename = "0000001.pdf" + document.save() + + self.assertTrue(os.path.isfile(document.source_path)) + self.assertEqual(document.filename, "qwe.pdf") + + document2.filename = "0000002.pdf" + document2.save() + + self.assertTrue(os.path.isfile(document.source_path)) + self.assertEqual(document2.filename, "qwe_01.pdf") + + # saving should not change the file names. + + document.save() + + self.assertTrue(os.path.isfile(document.source_path)) + self.assertEqual(document.filename, "qwe.pdf") + + document2.save() + + self.assertTrue(os.path.isfile(document.source_path)) + self.assertEqual(document2.filename, "qwe_01.pdf") + + document.delete() + + self.assertFalse(os.path.isfile(document.source_path)) + + # filename free, should remove _01 suffix + + document2.save() + + self.assertTrue(os.path.isfile(document.source_path)) + self.assertEqual(document2.filename, "qwe.pdf") + + class TestFileHandlingWithArchive(DirectoriesMixin, TestCase): @@ -358,15 +404,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase): self.assertFalse(os.path.isfile(archive)) self.assertTrue(os.path.isfile(doc.source_path)) self.assertTrue(os.path.isfile(doc.archive_path)) - self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc-0000001.pdf")) - self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")) + self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf")) + self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")) @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") def test_move_archive_gone(self): original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf") archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf") Path(original).touch() - #Path(archive).touch() doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B") self.assertTrue(os.path.isfile(original)) @@ -381,7 +426,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase): Path(original).touch() Path(archive).touch() os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none")) - Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")).touch() + Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")).touch() doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B") self.assertTrue(os.path.isfile(original)) @@ -485,3 +530,44 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase): self.assertTrue(os.path.isfile(archive)) self.assertTrue(os.path.isfile(doc.source_path)) self.assertTrue(os.path.isfile(doc.archive_path)) + +class TestFilenameGeneration(TestCase): + + @override_settings( + PAPERLESS_FILENAME_FORMAT="{title}" + ) + def test_invalid_characters(self): + + doc = Document.objects.create(title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1") + self.assertEqual(generate_filename(doc), "This. is the title.pdf") + + doc = Document.objects.create(title="my\\invalid/../title:yay", mime_type="application/pdf", pk=2, checksum="2") + self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf") + + @override_settings( + PAPERLESS_FILENAME_FORMAT="{created}" + ) + def test_date(self): + doc = Document.objects.create(title="does not matter", created=datetime.datetime(2020,5,21, 7,36,51, 153), mime_type="application/pdf", pk=2, checksum="2") + self.assertEqual(generate_filename(doc), "2020-05-21.pdf") + + +def run(): + doc = Document.objects.create(checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow") + doc.filename = generate_unique_filename(doc, settings.ORIGINALS_DIR) + Path(doc.thumbnail_path).touch() + with open(doc.source_path, "w") as f: + f.write(str(uuid.uuid4())) + with open(doc.source_path, "rb") as f: + doc.checksum = hashlib.md5(f.read()).hexdigest() + + with open(doc.archive_path, "w") as f: + f.write(str(uuid.uuid4())) + with open(doc.archive_path, "rb") as f: + doc.archive_checksum = hashlib.md5(f.read()).hexdigest() + + doc.save() + + for i in range(30): + doc.title = str(random.randrange(1, 5)) + doc.save() diff --git a/src/documents/tests/test_management_archiver.py b/src/documents/tests/test_management_archiver.py index fdb588acf..0828f05ff 100644 --- a/src/documents/tests/test_management_archiver.py +++ b/src/documents/tests/test_management_archiver.py @@ -16,25 +16,23 @@ sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") class TestArchiver(DirectoriesMixin, TestCase): def make_models(self): - self.d1 = Document.objects.create(checksum="A", title="A", content="first document", pk=1, mime_type="application/pdf") - #self.d2 = Document.objects.create(checksum="B", title="B", content="second document") - #self.d3 = Document.objects.create(checksum="C", title="C", content="unrelated document") + return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf") def test_archiver(self): - shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, "0000001.pdf")) - self.make_models() + doc = self.make_models() + shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")) call_command('document_archiver') def test_handle_document(self): - shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, "0000001.pdf")) - self.make_models() + doc = self.make_models() + shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")) - handle_document(self.d1.pk) + handle_document(doc.pk) - doc = Document.objects.get(id=self.d1.id) + doc = Document.objects.get(id=doc.id) self.assertIsNotNone(doc.checksum) self.assertTrue(os.path.isfile(doc.archive_path)) diff --git a/src/documents/tests/test_management_consumer.py b/src/documents/tests/test_management_consumer.py index 6973fdacf..b6a61a167 100644 --- a/src/documents/tests/test_management_consumer.py +++ b/src/documents/tests/test_management_consumer.py @@ -230,7 +230,7 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase): tag_names = ("existingTag", "Space Tag") # Create a Tag prior to consuming a file using it in path - tag_ids = [Tag.objects.create(name=tag_names[0]).pk,] + tag_ids = [Tag.objects.create(name="existingtag").pk,] self.t_start() diff --git a/src/documents/tests/test_management_decrypt.py b/src/documents/tests/test_management_decrypt.py index f68ea7cc1..1d64b1105 100644 --- a/src/documents/tests/test_management_decrypt.py +++ b/src/documents/tests/test_management_decrypt.py @@ -35,20 +35,20 @@ class TestDecryptDocuments(TestCase): PASSPHRASE="test" ).enable() - shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg")) - shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000002.png.gpg"), os.path.join(thumb_dir, "0000002.png.gpg")) + doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG) - Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", id=2, mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG) + shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg")) + shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg")) call_command('decrypt_documents') - doc = Document.objects.get(id=2) + doc.refresh_from_db() self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED) self.assertEqual(doc.filename, "0000002.pdf") self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf"))) self.assertTrue(os.path.isfile(doc.source_path)) - self.assertTrue(os.path.isfile(os.path.join(thumb_dir, "0000002.png"))) + self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png"))) self.assertTrue(os.path.isfile(doc.thumbnail_path)) with doc.source_file as f: diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py index 284d6108d..22d6fc7f6 100644 --- a/src/documents/tests/test_management_exporter.py +++ b/src/documents/tests/test_management_exporter.py @@ -24,13 +24,14 @@ class TestExportImport(DirectoriesMixin, TestCase): file = os.path.join(self.dirs.originals_dir, "0000001.pdf") - Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", id=1, mime_type="application/pdf") - Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", id=2, mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG) + Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", mime_type="application/pdf") + Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG) Tag.objects.create(name="t") DocumentType.objects.create(name="dt") Correspondent.objects.create(name="c") target = tempfile.mkdtemp() + self.addCleanup(shutil.rmtree, target) call_command('document_exporter', target) @@ -66,6 +67,6 @@ class TestExportImport(DirectoriesMixin, TestCase): def test_export_missing_files(self): target = tempfile.mkdtemp() - call_command('document_exporter', target) - Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", id=3, mime_type="application/pdf") + self.addCleanup(shutil.rmtree, target) + Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", mime_type="application/pdf") self.assertRaises(FileNotFoundError, call_command, 'document_exporter', target) diff --git a/src/documents/views.py b/src/documents/views.py index adef757ef..8dbb61dc7 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -1,8 +1,11 @@ +import logging import os +import re import tempfile from datetime import datetime from time import mktime +import pikepdf from django.conf import settings from django.db.models import Count, Max from django.http import HttpResponse, HttpResponseBadRequest, Http404 @@ -145,11 +148,11 @@ class DocumentViewSet(RetrieveModelMixin, doc = Document.objects.get(id=pk) if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501 file_handle = doc.archive_file - filename = doc.archive_file_name + filename = doc.get_public_filename(archive=True) mime_type = 'application/pdf' else: file_handle = doc.source_file - filename = doc.file_name + filename = doc.get_public_filename() mime_type = doc.mime_type if doc.storage_type == Document.STORAGE_TYPE_GPG: @@ -160,17 +163,61 @@ class DocumentViewSet(RetrieveModelMixin, disposition, filename) return response + def get_metadata(self, file, type): + if not os.path.isfile(file): + return None + + namespace_pattern = re.compile(r"\{(.*)\}(.*)") + + result = [] + if type == 'application/pdf': + pdf = pikepdf.open(file) + meta = pdf.open_metadata() + for key, value in meta.items(): + if isinstance(value, list): + value = " ".join([str(e) for e in value]) + value = str(value) + try: + m = namespace_pattern.match(key) + result.append({ + "namespace": m.group(1), + "prefix": meta.REVERSE_NS[m.group(1)], + "key": m.group(2), + "value": value + }) + except Exception as e: + logging.getLogger(__name__).warning( + f"Error while reading metadata {key}: {value}. Error: " + f"{e}" + ) + return result + @action(methods=['get'], detail=True) def metadata(self, request, pk=None): try: doc = Document.objects.get(pk=pk) - return Response({ - "paperless__checksum": doc.checksum, - "paperless__mime_type": doc.mime_type, - "paperless__filename": doc.filename, - "paperless__has_archive_version": - os.path.isfile(doc.archive_path) - }) + + meta = { + "original_checksum": doc.checksum, + "original_size": os.stat(doc.source_path).st_size, + "original_mime_type": doc.mime_type, + "media_filename": doc.filename, + "has_archive_version": os.path.isfile(doc.archive_path), + "original_metadata": self.get_metadata( + doc.source_path, doc.mime_type) + } + + if doc.archive_checksum and os.path.isfile(doc.archive_path): + meta['archive_checksum'] = doc.archive_checksum + meta['archive_size'] = os.stat(doc.archive_path).st_size, + meta['archive_metadata'] = self.get_metadata( + doc.archive_path, "application/pdf") + else: + meta['archive_checksum'] = None + meta['archive_size'] = None + meta['archive_metadata'] = None + + return Response(meta) except Document.DoesNotExist: raise Http404() @@ -235,12 +282,11 @@ class PostDocumentView(APIView): serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) - document = serializer.validated_data['document'] - document_data = serializer.validated_data['document_data'] - correspondent_id = serializer.validated_data['correspondent_id'] - document_type_id = serializer.validated_data['document_type_id'] - tag_ids = serializer.validated_data['tag_ids'] - title = serializer.validated_data['title'] + doc_name, doc_data = serializer.validated_data.get('document') + correspondent_id = serializer.validated_data.get('correspondent') + document_type_id = serializer.validated_data.get('document_type') + tag_ids = serializer.validated_data.get('tags') + title = serializer.validated_data.get('title') t = int(mktime(datetime.now().timetuple())) @@ -249,17 +295,17 @@ class PostDocumentView(APIView): with tempfile.NamedTemporaryFile(prefix="paperless-upload-", dir=settings.SCRATCH_DIR, delete=False) as f: - f.write(document_data) + f.write(doc_data) os.utime(f.name, times=(t, t)) async_task("documents.tasks.consume_file", f.name, - override_filename=document.name, + override_filename=doc_name, override_title=title, override_correspondent_id=correspondent_id, override_document_type_id=document_type_id, override_tag_ids=tag_ids, - task_name=os.path.basename(document.name)[:100]) + task_name=os.path.basename(doc_name)[:100]) return Response("OK") diff --git a/src/paperless/settings.py b/src/paperless/settings.py index c7ecf7645..cf0c3e28d 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -53,6 +53,10 @@ ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive") THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails") DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data")) + +# Lock file for synchronizing changes to the MEDIA directory across multiple +# threads. +MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock") INDEX_DIR = os.path.join(DATA_DIR, "index") MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle") diff --git a/src/paperless/version.py b/src/paperless/version.py index 26e46fea8..527e0668d 100644 --- a/src/paperless/version.py +++ b/src/paperless/version.py @@ -1 +1 @@ -__version__ = (0, 9, 5) +__version__ = (0, 9, 6) diff --git a/src/paperless_mail/mail.py b/src/paperless_mail/mail.py index 08f7365da..a82c34f15 100644 --- a/src/paperless_mail/mail.py +++ b/src/paperless_mail/mail.py @@ -103,10 +103,7 @@ class MailAccountHandler(LoggingMixin): def _correspondent_from_name(self, name): try: - return Correspondent.objects.get_or_create( - name=name, defaults={ - "slug": slugify(name) - })[0] + return Correspondent.objects.get_or_create(name=name)[0] except DatabaseError as e: self.log( "error",
ASN Correspondent - {{d.title}} + {{d.title}} diff --git a/src-ui/src/app/components/document-list/document-list.component.ts b/src-ui/src/app/components/document-list/document-list.component.ts index fe6c8a894..09e73dd96 100644 --- a/src-ui/src/app/components/document-list/document-list.component.ts +++ b/src-ui/src/app/components/document-list/document-list.component.ts @@ -1,4 +1,5 @@ import { Component, OnInit } from '@angular/core'; +import { Title } from '@angular/platform-browser'; import { ActivatedRoute } from '@angular/router'; import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; import { cloneFilterRules, FilterRule } from 'src/app/data/filter-rule'; @@ -8,6 +9,7 @@ import { DocumentListViewService } from 'src/app/services/document-list-view.ser import { DOCUMENT_SORT_FIELDS } from 'src/app/services/rest/document.service'; import { SavedViewConfigService } from 'src/app/services/saved-view-config.service'; import { Toast, ToastService } from 'src/app/services/toast.service'; +import { environment } from 'src/environments/environment'; import { SaveViewConfigDialogComponent } from './save-view-config-dialog/save-view-config-dialog.component'; @Component({ @@ -22,13 +24,18 @@ export class DocumentListComponent implements OnInit { public savedViewConfigService: SavedViewConfigService, public route: ActivatedRoute, private toastService: ToastService, - public modalService: NgbModal) { } + public modalService: NgbModal, + private titleService: Title) { } displayMode = 'smallCards' // largeCards, smallCards, details filterRules: FilterRule[] = [] showFilter = false + get isFiltered() { + return this.list.filterRules?.length > 0 + } + getTitle() { return this.list.savedViewTitle || "Documents" } @@ -50,10 +57,12 @@ export class DocumentListComponent implements OnInit { this.list.savedView = this.savedViewConfigService.getConfig(params.get('id')) this.filterRules = this.list.filterRules this.showFilter = false + this.titleService.setTitle(`${this.list.savedView.title} - ${environment.appTitle}`) } else { this.list.savedView = null this.filterRules = this.list.filterRules this.showFilter = this.filterRules.length > 0 + this.titleService.setTitle(`Documents - ${environment.appTitle}`) } this.list.clear() this.list.reload() diff --git a/src-ui/src/app/components/filter-editor/filter-editor.component.ts b/src-ui/src/app/components/filter-editor/filter-editor.component.ts index 2eeac7dcd..b04127287 100644 --- a/src-ui/src/app/components/filter-editor/filter-editor.component.ts +++ b/src-ui/src/app/components/filter-editor/filter-editor.component.ts @@ -34,7 +34,7 @@ export class FilterEditorComponent implements OnInit { documentTypes: PaperlessDocumentType[] = [] newRuleClicked() { - this.filterRules.push({type: this.selectedRuleType, value: null}) + this.filterRules.push({type: this.selectedRuleType, value: this.selectedRuleType.default}) this.selectedRuleType = this.getRuleTypes().length > 0 ? this.getRuleTypes()[0] : null } diff --git a/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts b/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts index 83aa5d2cc..11027c60f 100644 --- a/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts +++ b/src-ui/src/app/components/manage/correspondent-list/correspondent-list.component.ts @@ -1,7 +1,9 @@ -import { Component } from '@angular/core'; +import { Component, OnInit } from '@angular/core'; +import { Title } from '@angular/platform-browser'; import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent'; import { CorrespondentService } from 'src/app/services/rest/correspondent.service'; +import { environment } from 'src/environments/environment'; import { GenericListComponent } from '../generic-list/generic-list.component'; import { CorrespondentEditDialogComponent } from './correspondent-edit-dialog/correspondent-edit-dialog.component'; @@ -10,14 +12,19 @@ import { CorrespondentEditDialogComponent } from './correspondent-edit-dialog/co templateUrl: './correspondent-list.component.html', styleUrls: ['./correspondent-list.component.scss'] }) -export class CorrespondentListComponent extends GenericListComponent { +export class CorrespondentListComponent extends GenericListComponent implements OnInit { - constructor(correspondentsService: CorrespondentService, - modalService: NgbModal) { - super(correspondentsService,modalService,CorrespondentEditDialogComponent) - } + constructor(correspondentsService: CorrespondentService, modalService: NgbModal, private titleService: Title) { + super(correspondentsService,modalService,CorrespondentEditDialogComponent) + } + + getObjectName(object: PaperlessCorrespondent) { + return `correspondent '${object.name}'` + } + + ngOnInit(): void { + super.ngOnInit() + this.titleService.setTitle(`Correspondents - ${environment.appTitle}`) + } - getObjectName(object: PaperlessCorrespondent) { - return `correspondent '${object.name}'` - } } diff --git a/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts b/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts index 733d2c44b..316024514 100644 --- a/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts +++ b/src-ui/src/app/components/manage/document-type-list/document-type-list.component.ts @@ -1,7 +1,9 @@ import { Component, OnInit } from '@angular/core'; +import { Title } from '@angular/platform-browser'; import { NgbModal } from '@ng-bootstrap/ng-bootstrap'; import { PaperlessDocumentType } from 'src/app/data/paperless-document-type'; import { DocumentTypeService } from 'src/app/services/rest/document-type.service'; +import { environment } from 'src/environments/environment'; import { GenericListComponent } from '../generic-list/generic-list.component'; import { DocumentTypeEditDialogComponent } from './document-type-edit-dialog/document-type-edit-dialog.component'; @@ -10,13 +12,18 @@ import { DocumentTypeEditDialogComponent } from './document-type-edit-dialog/doc templateUrl: './document-type-list.component.html', styleUrls: ['./document-type-list.component.scss'] }) -export class DocumentTypeListComponent extends GenericListComponent { +export class DocumentTypeListComponent extends GenericListComponent implements OnInit { - constructor(service: DocumentTypeService, modalService: NgbModal) { + constructor(service: DocumentTypeService, modalService: NgbModal, private titleService: Title) { super(service, modalService, DocumentTypeEditDialogComponent) - } + } - getObjectName(object: PaperlessDocumentType) { + getObjectName(object: PaperlessDocumentType) { return `document type '${object.name}'` } + + ngOnInit(): void { + super.ngOnInit() + this.titleService.setTitle(`Document types - ${environment.appTitle}`) + } } diff --git a/src-ui/src/app/components/manage/logs/logs.component.ts b/src-ui/src/app/components/manage/logs/logs.component.ts index d52b90a5a..44d0fa24d 100644 --- a/src-ui/src/app/components/manage/logs/logs.component.ts +++ b/src-ui/src/app/components/manage/logs/logs.component.ts @@ -1,7 +1,8 @@ import { Component, OnInit } from '@angular/core'; -import { kMaxLength } from 'buffer'; +import { Title } from '@angular/platform-browser'; import { LOG_LEVELS, LOG_LEVEL_INFO, PaperlessLog } from 'src/app/data/paperless-log'; import { LogService } from 'src/app/services/rest/log.service'; +import { environment } from 'src/environments/environment'; @Component({ selector: 'app-logs', @@ -10,13 +11,14 @@ import { LogService } from 'src/app/services/rest/log.service'; }) export class LogsComponent implements OnInit { - constructor(private logService: LogService) { } + constructor(private logService: LogService, private titleService: Title) { } logs: PaperlessLog[] = [] level: number = LOG_LEVEL_INFO ngOnInit(): void { this.reload() + this.titleService.setTitle(`Logs - ${environment.appTitle}`) } reload() { diff --git a/src-ui/src/app/components/manage/settings/settings.component.html b/src-ui/src/app/components/manage/settings/settings.component.html index 91eab807b..7a500e6eb 100644 --- a/src-ui/src/app/components/manage/settings/settings.component.html +++ b/src-ui/src/app/components/manage/settings/settings.component.html @@ -46,8 +46,8 @@
{{ config.title }}{{ config.showInDashboard }}{{ config.showInSideBar }}{{ config.showInDashboard | yesno }}{{ config.showInSideBar | yesno }}