mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-24 02:05:48 -06:00
Compare commits
43 Commits
feature-do
...
8e0d574e99
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8e0d574e99 | ||
|
|
4449dbadb5 | ||
|
|
43b4f36026 | ||
|
|
0e35acaef5 | ||
|
|
19ff339804 | ||
|
|
6b868a5ecb | ||
|
|
8a5820328e | ||
|
|
809d62a2f4 | ||
|
|
0d87f94b9b | ||
|
|
3e4aa87cc5 | ||
|
|
fc95d42b35 | ||
|
|
315b90f8e5 | ||
|
|
47b2d2964b | ||
|
|
e05639ae4e | ||
|
|
f400a8cb2f | ||
|
|
26abcf5612 | ||
|
|
afde52430d | ||
|
|
716f2da652 | ||
|
|
c54073b7c2 | ||
|
|
247e6f39dc | ||
|
|
1e6dfc4481 | ||
|
|
7cc0750066 | ||
|
|
bd6585d3b4 | ||
|
|
717e828a1d | ||
|
|
07381d48e6 | ||
|
|
dd0ffaf312 | ||
|
|
264504affc | ||
|
|
4feedf2add | ||
|
|
2f76cf9831 | ||
|
|
1002d37f6b | ||
|
|
d260a94740 | ||
|
|
88c69b83ea | ||
|
|
2557ee2014 | ||
|
|
3c75deed80 | ||
|
|
d05343c927 | ||
|
|
e7972b7eaf | ||
|
|
75a091cc0d | ||
|
|
dca74803fd | ||
|
|
3cf3d868d0 | ||
|
|
bf4fc6604a | ||
|
|
e8c1eb86fa | ||
|
|
c3dad3cf69 | ||
|
|
811bd66088 |
@@ -2,9 +2,11 @@
|
|||||||
|
|
||||||
If you feel like contributing to the project, please do! Bug fixes and improvements are always welcome.
|
If you feel like contributing to the project, please do! Bug fixes and improvements are always welcome.
|
||||||
|
|
||||||
|
⚠️ Please note: Pull requests that implement a new feature or enhancement _should almost always target an existing feature request_ with evidence of community interest and discussion. This is in order to balance the work of implementing and maintaining new features / enhancements. Pull requests that are opened without meeting this requirement may not be merged.
|
||||||
|
|
||||||
If you want to implement something big:
|
If you want to implement something big:
|
||||||
|
|
||||||
- Please start a discussion about that in the issues! Maybe something similar is already in development and we can make it happen together.
|
- As above, please start with a discussion! Maybe something similar is already in development and we can make it happen together.
|
||||||
- When making additions to the project, consider if the majority of users will benefit from your change. If not, you're probably better of forking the project.
|
- When making additions to the project, consider if the majority of users will benefit from your change. If not, you're probably better of forking the project.
|
||||||
- Also consider if your change will get in the way of other users. A good change is a change that enhances the experience of some users who want that change and does not affect users who do not care about the change.
|
- Also consider if your change will get in the way of other users. A good change is a change that enhances the experience of some users who want that change and does not affect users who do not care about the change.
|
||||||
- Please see the [paperless-ngx merge process](#merging-prs) below.
|
- Please see the [paperless-ngx merge process](#merging-prs) below.
|
||||||
|
|||||||
@@ -1759,6 +1759,11 @@ started by the container.
|
|||||||
|
|
||||||
: Path to an image file in the /media/logo directory, must include 'logo', e.g. `/logo/Atari_logo.svg`
|
: Path to an image file in the /media/logo directory, must include 'logo', e.g. `/logo/Atari_logo.svg`
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
|
||||||
|
The logo file will be viewable by anyone with access to the Paperless instance login page,
|
||||||
|
so consider your choice of logo carefully and removing exif data from images before uploading.
|
||||||
|
|
||||||
#### [`PAPERLESS_ENABLE_UPDATE_CHECK=<bool>`](#PAPERLESS_ENABLE_UPDATE_CHECK) {#PAPERLESS_ENABLE_UPDATE_CHECK}
|
#### [`PAPERLESS_ENABLE_UPDATE_CHECK=<bool>`](#PAPERLESS_ENABLE_UPDATE_CHECK) {#PAPERLESS_ENABLE_UPDATE_CHECK}
|
||||||
|
|
||||||
!!! note
|
!!! note
|
||||||
@@ -1800,3 +1805,23 @@ password. All of these options come from their similarly-named [Django settings]
|
|||||||
#### [`PAPERLESS_EMAIL_USE_SSL=<bool>`](#PAPERLESS_EMAIL_USE_SSL) {#PAPERLESS_EMAIL_USE_SSL}
|
#### [`PAPERLESS_EMAIL_USE_SSL=<bool>`](#PAPERLESS_EMAIL_USE_SSL) {#PAPERLESS_EMAIL_USE_SSL}
|
||||||
|
|
||||||
: Defaults to false.
|
: Defaults to false.
|
||||||
|
|
||||||
|
## Remote OCR
|
||||||
|
|
||||||
|
#### [`PAPERLESS_REMOTE_OCR_ENGINE=<str>`](#PAPERLESS_REMOTE_OCR_ENGINE) {#PAPERLESS_REMOTE_OCR_ENGINE}
|
||||||
|
|
||||||
|
: The remote OCR engine to use. Currently only Azure AI is supported as "azureai".
|
||||||
|
|
||||||
|
Defaults to None, which disables remote OCR.
|
||||||
|
|
||||||
|
#### [`PAPERLESS_REMOTE_OCR_API_KEY=<str>`](#PAPERLESS_REMOTE_OCR_API_KEY) {#PAPERLESS_REMOTE_OCR_API_KEY}
|
||||||
|
|
||||||
|
: The API key to use for the remote OCR engine.
|
||||||
|
|
||||||
|
Defaults to None.
|
||||||
|
|
||||||
|
#### [`PAPERLESS_REMOTE_OCR_ENDPOINT=<str>`](#PAPERLESS_REMOTE_OCR_ENDPOINT) {#PAPERLESS_REMOTE_OCR_ENDPOINT}
|
||||||
|
|
||||||
|
: The endpoint to use for the remote OCR engine. This is required for Azure AI.
|
||||||
|
|
||||||
|
Defaults to None.
|
||||||
|
|||||||
@@ -25,9 +25,10 @@ physical documents into a searchable online archive so you can keep, well, _less
|
|||||||
## Features
|
## Features
|
||||||
|
|
||||||
- **Organize and index** your scanned documents with tags, correspondents, types, and more.
|
- **Organize and index** your scanned documents with tags, correspondents, types, and more.
|
||||||
- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way.
|
- _Your_ data is stored locally on _your_ server and is never transmitted or shared in any way, unless you explicitly choose to do so.
|
||||||
- Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images.
|
- Performs **OCR** on your documents, adding searchable and selectable text, even to documents scanned with only images.
|
||||||
- Utilizes the open-source Tesseract engine to recognize more than 100 languages.
|
- Utilizes the open-source Tesseract engine to recognize more than 100 languages.
|
||||||
|
- _New!_ Supports remote OCR with Azure AI (opt-in).
|
||||||
- Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
|
- Documents are saved as PDF/A format which is designed for long term storage, alongside the unaltered originals.
|
||||||
- Uses machine-learning to automatically add tags, correspondents and document types to your documents.
|
- Uses machine-learning to automatically add tags, correspondents and document types to your documents.
|
||||||
- Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.
|
- Supports PDF documents, images, plain text files, Office documents (Word, Excel, PowerPoint, and LibreOffice equivalents)[^1] and more.
|
||||||
|
|||||||
@@ -878,6 +878,21 @@ how regularly you intend to scan documents and use paperless.
|
|||||||
performed the task associated with the document, move it to the
|
performed the task associated with the document, move it to the
|
||||||
inbox.
|
inbox.
|
||||||
|
|
||||||
|
## Remote OCR
|
||||||
|
|
||||||
|
!!! important
|
||||||
|
|
||||||
|
This feature is disabled by default and will always remain strictly "opt-in".
|
||||||
|
|
||||||
|
Paperless-ngx supports performing OCR on documents using remote services. At the moment, this is limited to
|
||||||
|
[Microsoft's Azure "Document Intelligence" service](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence).
|
||||||
|
This is of course a paid service (with a free tier) which requires an Azure account and subscription. Azure AI is not affiliated with
|
||||||
|
Paperless-ngx in any way. When enabled, Paperless-ngx will automatically send appropriate documents to Azure for OCR processing, bypassing
|
||||||
|
the local OCR engine. See the [configuration](configuration.md#PAPERLESS_REMOTE_OCR_ENGINE) options for more details.
|
||||||
|
|
||||||
|
Additionally, when using a commercial service with this feature, consider both potential costs as well as any associated file size
|
||||||
|
or page limitations (e.g. with a free tier).
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
Paperless-ngx consists of the following components:
|
Paperless-ngx consists of the following components:
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ classifiers = [
|
|||||||
# This will allow testing to not install a webserver, mysql, etc
|
# This will allow testing to not install a webserver, mysql, etc
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"azure-ai-documentintelligence>=1.0.2",
|
||||||
"babel>=2.17",
|
"babel>=2.17",
|
||||||
"bleach~=6.2.0",
|
"bleach~=6.2.0",
|
||||||
"celery[redis]~=5.5.1",
|
"celery[redis]~=5.5.1",
|
||||||
@@ -233,6 +234,7 @@ testpaths = [
|
|||||||
"src/paperless_tesseract/tests/",
|
"src/paperless_tesseract/tests/",
|
||||||
"src/paperless_tika/tests",
|
"src/paperless_tika/tests",
|
||||||
"src/paperless_text/tests/",
|
"src/paperless_text/tests/",
|
||||||
|
"src/paperless_remote/tests/",
|
||||||
]
|
]
|
||||||
addopts = [
|
addopts = [
|
||||||
"--pythonwarnings=all",
|
"--pythonwarnings=all",
|
||||||
|
|||||||
@@ -1,30 +1,7 @@
|
|||||||
<pngx-page-header [(title)]="title">
|
<pngx-page-header [(title)]="title">
|
||||||
|
|
||||||
@if (document?.versions?.length > 0) {
|
|
||||||
<div class="btn-group" ngbDropdown role="group">
|
|
||||||
<div class="btn-group" ngbDropdown role="group">
|
|
||||||
<button class="btn btn-sm btn-outline-secondary dropdown-toggle" ngbDropdownToggle [disabled]="!hasVersions">
|
|
||||||
<i-bs name="layers"></i-bs>
|
|
||||||
<span class="d-none d-lg-inline ps-1" i18n>Version</span>
|
|
||||||
</button>
|
|
||||||
<div class="dropdown-menu shadow" ngbDropdownMenu>
|
|
||||||
@for (vid of document.versions; track vid) {
|
|
||||||
<button ngbDropdownItem (click)="selectVersion(vid)">
|
|
||||||
<span i18n>Version</span> {{vid}}
|
|
||||||
@if (selectedVersionId === vid) { <span> ✓</span> }
|
|
||||||
</button>
|
|
||||||
}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<input #versionFileInput type="file" class="visually-hidden" (change)="onVersionFileSelected($event)" />
|
|
||||||
<button class="btn btn-sm btn-outline-secondary" title="Upload new version" i18n-title (click)="versionFileInput.click()" [disabled]="!userIsOwner || !userCanEdit">
|
|
||||||
<i-bs name="file-earmark-plus"></i-bs><span class="visually-hidden" i18n>Upload new version</span>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
}
|
|
||||||
@if (archiveContentRenderType === ContentRenderType.PDF && !useNativePdfViewer) {
|
@if (archiveContentRenderType === ContentRenderType.PDF && !useNativePdfViewer) {
|
||||||
@if (previewNumPages) {
|
@if (previewNumPages) {
|
||||||
<div class="input-group input-group-sm ms-2 d-none d-md-flex">
|
<div class="input-group input-group-sm d-none d-md-flex">
|
||||||
<div class="input-group-text" i18n>Page</div>
|
<div class="input-group-text" i18n>Page</div>
|
||||||
<input class="form-control flex-grow-0 w-auto" type="number" min="1" [max]="previewNumPages" [(ngModel)]="previewCurrentPage" />
|
<input class="form-control flex-grow-0 w-auto" type="number" min="1" [max]="previewNumPages" [(ngModel)]="previewCurrentPage" />
|
||||||
<div class="input-group-text" i18n>of {{previewNumPages}}</div>
|
<div class="input-group-text" i18n>of {{previewNumPages}}</div>
|
||||||
|
|||||||
@@ -222,8 +222,6 @@ export class DocumentDetailComponent
|
|||||||
titleSubject: Subject<string> = new Subject()
|
titleSubject: Subject<string> = new Subject()
|
||||||
previewUrl: string
|
previewUrl: string
|
||||||
thumbUrl: string
|
thumbUrl: string
|
||||||
// Versioning: which document ID to use for file preview/download
|
|
||||||
selectedVersionId: number
|
|
||||||
previewText: string
|
previewText: string
|
||||||
previewLoaded: boolean = false
|
previewLoaded: boolean = false
|
||||||
tiffURL: string
|
tiffURL: string
|
||||||
@@ -272,7 +270,6 @@ export class DocumentDetailComponent
|
|||||||
public readonly DataType = DataType
|
public readonly DataType = DataType
|
||||||
|
|
||||||
@ViewChild('nav') nav: NgbNav
|
@ViewChild('nav') nav: NgbNav
|
||||||
@ViewChild('versionFileInput') versionFileInput
|
|
||||||
@ViewChild('pdfPreview') set pdfPreview(element) {
|
@ViewChild('pdfPreview') set pdfPreview(element) {
|
||||||
// this gets called when component added or removed from DOM
|
// this gets called when component added or removed from DOM
|
||||||
if (
|
if (
|
||||||
@@ -405,10 +402,7 @@ export class DocumentDetailComponent
|
|||||||
}
|
}
|
||||||
|
|
||||||
private loadDocument(documentId: number): void {
|
private loadDocument(documentId: number): void {
|
||||||
this.selectedVersionId = documentId
|
this.previewUrl = this.documentsService.getPreviewUrl(documentId)
|
||||||
this.previewUrl = this.documentsService.getPreviewUrl(
|
|
||||||
this.selectedVersionId
|
|
||||||
)
|
|
||||||
this.http
|
this.http
|
||||||
.get(this.previewUrl, { responseType: 'text' })
|
.get(this.previewUrl, { responseType: 'text' })
|
||||||
.pipe(
|
.pipe(
|
||||||
@@ -423,7 +417,7 @@ export class DocumentDetailComponent
|
|||||||
err.message ?? err.toString()
|
err.message ?? err.toString()
|
||||||
}`),
|
}`),
|
||||||
})
|
})
|
||||||
this.thumbUrl = this.documentsService.getThumbUrl(this.selectedVersionId)
|
this.thumbUrl = this.documentsService.getThumbUrl(documentId)
|
||||||
this.documentsService
|
this.documentsService
|
||||||
.get(documentId)
|
.get(documentId)
|
||||||
.pipe(
|
.pipe(
|
||||||
@@ -644,10 +638,6 @@ export class DocumentDetailComponent
|
|||||||
|
|
||||||
updateComponent(doc: Document) {
|
updateComponent(doc: Document) {
|
||||||
this.document = doc
|
this.document = doc
|
||||||
// Default selected version is the newest version
|
|
||||||
this.selectedVersionId = doc.versions?.length
|
|
||||||
? Math.max(...doc.versions)
|
|
||||||
: doc.id
|
|
||||||
this.requiresPassword = false
|
this.requiresPassword = false
|
||||||
this.updateFormForCustomFields()
|
this.updateFormForCustomFields()
|
||||||
if (this.archiveContentRenderType === ContentRenderType.TIFF) {
|
if (this.archiveContentRenderType === ContentRenderType.TIFF) {
|
||||||
@@ -712,36 +702,6 @@ export class DocumentDetailComponent
|
|||||||
this.prepareForm(doc)
|
this.prepareForm(doc)
|
||||||
}
|
}
|
||||||
|
|
||||||
get hasVersions(): boolean {
|
|
||||||
return this.document?.versions?.length > 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update file preview and download target to a specific version (by document id)
|
|
||||||
selectVersion(versionId: number) {
|
|
||||||
this.selectedVersionId = versionId
|
|
||||||
this.previewUrl = this.documentsService.getPreviewUrl(
|
|
||||||
this.documentId,
|
|
||||||
false,
|
|
||||||
this.selectedVersionId
|
|
||||||
)
|
|
||||||
this.thumbUrl = this.documentsService.getThumbUrl(this.selectedVersionId)
|
|
||||||
// For text previews, refresh content
|
|
||||||
this.http
|
|
||||||
.get(this.previewUrl, { responseType: 'text' })
|
|
||||||
.pipe(
|
|
||||||
first(),
|
|
||||||
takeUntil(this.unsubscribeNotifier),
|
|
||||||
takeUntil(this.docChangeNotifier)
|
|
||||||
)
|
|
||||||
.subscribe({
|
|
||||||
next: (res) => (this.previewText = res.toString()),
|
|
||||||
error: (err) =>
|
|
||||||
(this.previewText = $localize`An error occurred loading content: ${
|
|
||||||
err.message ?? err.toString()
|
|
||||||
}`),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
get customFieldFormFields(): FormArray {
|
get customFieldFormFields(): FormArray {
|
||||||
return this.documentForm.get('custom_fields') as FormArray
|
return this.documentForm.get('custom_fields') as FormArray
|
||||||
}
|
}
|
||||||
@@ -1089,36 +1049,10 @@ export class DocumentDetailComponent
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
onVersionFileSelected(event: Event) {
|
|
||||||
const input = event.target as HTMLInputElement
|
|
||||||
if (!input?.files || input.files.length === 0) return
|
|
||||||
const file = input.files[0]
|
|
||||||
// Reset input to allow re-selection of the same file later
|
|
||||||
input.value = ''
|
|
||||||
this.documentsService
|
|
||||||
.uploadVersion(this.documentId, file)
|
|
||||||
.pipe(first())
|
|
||||||
.subscribe({
|
|
||||||
next: () => {
|
|
||||||
this.toastService.showInfo(
|
|
||||||
$localize`Uploading new version. Processing will happen in the background.`
|
|
||||||
)
|
|
||||||
// Refresh metadata to reflect that versions changed (when ready)
|
|
||||||
this.openDocumentService.refreshDocument(this.documentId)
|
|
||||||
},
|
|
||||||
error: (error) => {
|
|
||||||
this.toastService.showError(
|
|
||||||
$localize`Error uploading new version`,
|
|
||||||
error
|
|
||||||
)
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
download(original: boolean = false) {
|
download(original: boolean = false) {
|
||||||
this.downloading = true
|
this.downloading = true
|
||||||
const downloadUrl = this.documentsService.getDownloadUrl(
|
const downloadUrl = this.documentsService.getDownloadUrl(
|
||||||
this.selectedVersionId || this.documentId,
|
this.documentId,
|
||||||
original
|
original
|
||||||
)
|
)
|
||||||
this.http
|
this.http
|
||||||
|
|||||||
@@ -71,4 +71,20 @@ describe('TagListComponent', () => {
|
|||||||
'Do you really want to delete the tag "Tag1"?'
|
'Do you really want to delete the tag "Tag1"?'
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('should filter out child tags if name filter is empty, otherwise show all', () => {
|
||||||
|
const tags = [
|
||||||
|
{ id: 1, name: 'Tag1', parent: null },
|
||||||
|
{ id: 2, name: 'Tag2', parent: 1 },
|
||||||
|
{ id: 3, name: 'Tag3', parent: null },
|
||||||
|
]
|
||||||
|
component['_nameFilter'] = null // Simulate empty name filter
|
||||||
|
const filtered = component.filterData(tags as any)
|
||||||
|
expect(filtered.length).toBe(2)
|
||||||
|
expect(filtered.find((t) => t.id === 2)).toBeUndefined()
|
||||||
|
|
||||||
|
component['_nameFilter'] = 'Tag2' // Simulate non-empty name filter
|
||||||
|
const filteredWithName = component.filterData(tags as any)
|
||||||
|
expect(filteredWithName.length).toBe(3)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -62,6 +62,8 @@ export class TagListComponent extends ManagementListComponent<Tag> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
filterData(data: Tag[]) {
|
filterData(data: Tag[]) {
|
||||||
return data.filter((tag) => !tag.parent)
|
return this.nameFilter?.length
|
||||||
|
? [...data]
|
||||||
|
: data.filter((tag) => !tag.parent)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -159,10 +159,6 @@ export interface Document extends ObjectWithPermissions {
|
|||||||
|
|
||||||
page_count?: number
|
page_count?: number
|
||||||
|
|
||||||
// Versioning
|
|
||||||
head_version?: number
|
|
||||||
versions?: number[]
|
|
||||||
|
|
||||||
// Frontend only
|
// Frontend only
|
||||||
__changedFields?: string[]
|
__changedFields?: string[]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -163,19 +163,12 @@ export class DocumentService extends AbstractPaperlessService<Document> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
getPreviewUrl(
|
getPreviewUrl(id: number, original: boolean = false): string {
|
||||||
id: number,
|
|
||||||
original: boolean = false,
|
|
||||||
versionID: number = null
|
|
||||||
): string {
|
|
||||||
let url = new URL(this.getResourceUrl(id, 'preview'))
|
let url = new URL(this.getResourceUrl(id, 'preview'))
|
||||||
if (this._searchQuery) url.hash = `#search="${this.searchQuery}"`
|
if (this._searchQuery) url.hash = `#search="${this.searchQuery}"`
|
||||||
if (original) {
|
if (original) {
|
||||||
url.searchParams.append('original', 'true')
|
url.searchParams.append('original', 'true')
|
||||||
}
|
}
|
||||||
if (versionID) {
|
|
||||||
url.searchParams.append('version', versionID.toString())
|
|
||||||
}
|
|
||||||
return url.toString()
|
return url.toString()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -191,16 +184,6 @@ export class DocumentService extends AbstractPaperlessService<Document> {
|
|||||||
return url
|
return url
|
||||||
}
|
}
|
||||||
|
|
||||||
uploadVersion(documentId: number, file: File) {
|
|
||||||
const formData = new FormData()
|
|
||||||
formData.append('document', file, file.name)
|
|
||||||
return this.http.post(
|
|
||||||
this.getResourceUrl(documentId, 'update_version'),
|
|
||||||
formData,
|
|
||||||
{ reportProgress: true, observe: 'events' }
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
getNextAsn(): Observable<number> {
|
getNextAsn(): Observable<number> {
|
||||||
return this.http.get<number>(this.getResourceUrl(null, 'next_asn'))
|
return this.http.get<number>(this.getResourceUrl(null, 'next_asn'))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -78,7 +78,6 @@ import {
|
|||||||
fileEarmarkFill,
|
fileEarmarkFill,
|
||||||
fileEarmarkLock,
|
fileEarmarkLock,
|
||||||
fileEarmarkMinus,
|
fileEarmarkMinus,
|
||||||
fileEarmarkPlus,
|
|
||||||
fileEarmarkRichtext,
|
fileEarmarkRichtext,
|
||||||
fileText,
|
fileText,
|
||||||
files,
|
files,
|
||||||
@@ -95,7 +94,6 @@ import {
|
|||||||
house,
|
house,
|
||||||
infoCircle,
|
infoCircle,
|
||||||
journals,
|
journals,
|
||||||
layers,
|
|
||||||
link,
|
link,
|
||||||
listNested,
|
listNested,
|
||||||
listTask,
|
listTask,
|
||||||
@@ -292,7 +290,6 @@ const icons = {
|
|||||||
fileEarmarkFill,
|
fileEarmarkFill,
|
||||||
fileEarmarkLock,
|
fileEarmarkLock,
|
||||||
fileEarmarkMinus,
|
fileEarmarkMinus,
|
||||||
fileEarmarkPlus,
|
|
||||||
fileEarmarkRichtext,
|
fileEarmarkRichtext,
|
||||||
files,
|
files,
|
||||||
fileText,
|
fileText,
|
||||||
@@ -309,7 +306,6 @@ const icons = {
|
|||||||
house,
|
house,
|
||||||
infoCircle,
|
infoCircle,
|
||||||
journals,
|
journals,
|
||||||
layers,
|
|
||||||
link,
|
link,
|
||||||
listNested,
|
listNested,
|
||||||
listTask,
|
listTask,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -332,8 +333,10 @@ def rotate(doc_ids: list[int], degrees: int) -> Literal["OK"]:
|
|||||||
f"Attempting to rotate {len(doc_ids)} documents by {degrees} degrees.",
|
f"Attempting to rotate {len(doc_ids)} documents by {degrees} degrees.",
|
||||||
)
|
)
|
||||||
qs = Document.objects.filter(id__in=doc_ids)
|
qs = Document.objects.filter(id__in=doc_ids)
|
||||||
|
affected_docs: list[int] = []
|
||||||
import pikepdf
|
import pikepdf
|
||||||
|
|
||||||
|
rotate_tasks = []
|
||||||
for doc in qs:
|
for doc in qs:
|
||||||
if doc.mime_type != "application/pdf":
|
if doc.mime_type != "application/pdf":
|
||||||
logger.warning(
|
logger.warning(
|
||||||
@@ -341,34 +344,28 @@ def rotate(doc_ids: list[int], degrees: int) -> Literal["OK"]:
|
|||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
# Write rotated output to a temp file and create a new version via consume pipeline
|
with pikepdf.open(doc.source_path, allow_overwriting_input=True) as pdf:
|
||||||
filepath: Path = (
|
|
||||||
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
|
|
||||||
/ f"{doc.id}_rotated.pdf"
|
|
||||||
)
|
|
||||||
with pikepdf.open(doc.source_path) as pdf:
|
|
||||||
for page in pdf.pages:
|
for page in pdf.pages:
|
||||||
page.rotate(degrees, relative=True)
|
page.rotate(degrees, relative=True)
|
||||||
pdf.remove_unreferenced_resources()
|
pdf.save()
|
||||||
pdf.save(filepath)
|
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
|
||||||
|
doc.save()
|
||||||
# Preserve metadata/permissions via overrides; mark as new version
|
rotate_tasks.append(
|
||||||
overrides = DocumentMetadataOverrides().from_document(doc)
|
update_document_content_maybe_archive_file.s(
|
||||||
|
document_id=doc.id,
|
||||||
consume_file.delay(
|
),
|
||||||
ConsumableDocument(
|
)
|
||||||
source=DocumentSource.ConsumeFolder,
|
logger.info(
|
||||||
original_file=filepath,
|
f"Rotated document {doc.id} by {degrees} degrees",
|
||||||
head_version_id=doc.id,
|
)
|
||||||
),
|
affected_docs.append(doc.id)
|
||||||
overrides,
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
f"Queued new rotated version for document {doc.id} by {degrees} degrees",
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception(f"Error rotating document {doc.id}: {e}")
|
logger.exception(f"Error rotating document {doc.id}: {e}")
|
||||||
|
|
||||||
|
if len(affected_docs) > 0:
|
||||||
|
bulk_update_task = bulk_update_documents.si(document_ids=affected_docs)
|
||||||
|
chord(header=rotate_tasks, body=bulk_update_task).delay()
|
||||||
|
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
|
|
||||||
@@ -531,31 +528,19 @@ def delete_pages(doc_ids: list[int], pages: list[int]) -> Literal["OK"]:
|
|||||||
import pikepdf
|
import pikepdf
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Produce edited PDF to a temp file and create a new version
|
with pikepdf.open(doc.source_path, allow_overwriting_input=True) as pdf:
|
||||||
filepath: Path = (
|
|
||||||
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
|
|
||||||
/ f"{doc.id}_pages_deleted.pdf"
|
|
||||||
)
|
|
||||||
with pikepdf.open(doc.source_path) as pdf:
|
|
||||||
offset = 1 # pages are 1-indexed
|
offset = 1 # pages are 1-indexed
|
||||||
for page_num in pages:
|
for page_num in pages:
|
||||||
pdf.pages.remove(pdf.pages[page_num - offset])
|
pdf.pages.remove(pdf.pages[page_num - offset])
|
||||||
offset += 1 # remove() changes the index of the pages
|
offset += 1 # remove() changes the index of the pages
|
||||||
pdf.remove_unreferenced_resources()
|
pdf.remove_unreferenced_resources()
|
||||||
pdf.save(filepath)
|
pdf.save()
|
||||||
|
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
|
||||||
overrides = DocumentMetadataOverrides().from_document(doc)
|
if doc.page_count is not None:
|
||||||
consume_file.delay(
|
doc.page_count = doc.page_count - len(pages)
|
||||||
ConsumableDocument(
|
doc.save()
|
||||||
source=DocumentSource.ConsumeFolder,
|
update_document_content_maybe_archive_file.delay(document_id=doc.id)
|
||||||
original_file=filepath,
|
logger.info(f"Deleted pages {pages} from document {doc.id}")
|
||||||
head_version_id=doc.id,
|
|
||||||
),
|
|
||||||
overrides,
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
f"Queued new version for document {doc.id} after deleting pages {pages}",
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception(f"Error deleting pages from document {doc.id}: {e}")
|
logger.exception(f"Error deleting pages from document {doc.id}: {e}")
|
||||||
|
|
||||||
@@ -607,29 +592,17 @@ def edit_pdf(
|
|||||||
dst.pages[-1].rotate(op["rotate"], relative=True)
|
dst.pages[-1].rotate(op["rotate"], relative=True)
|
||||||
|
|
||||||
if update_document:
|
if update_document:
|
||||||
# Create a new version from the edited PDF rather than replacing in-place
|
temp_path = doc.source_path.with_suffix(".tmp.pdf")
|
||||||
pdf = pdf_docs[0]
|
pdf = pdf_docs[0]
|
||||||
pdf.remove_unreferenced_resources()
|
pdf.remove_unreferenced_resources()
|
||||||
filepath: Path = (
|
# save the edited PDF to a temporary file in case of errors
|
||||||
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
|
pdf.save(temp_path)
|
||||||
/ f"{doc.id}_edited.pdf"
|
# replace the original document with the edited one
|
||||||
)
|
temp_path.replace(doc.source_path)
|
||||||
pdf.save(filepath)
|
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
|
||||||
overrides = (
|
doc.page_count = len(pdf.pages)
|
||||||
DocumentMetadataOverrides().from_document(doc)
|
doc.save()
|
||||||
if include_metadata
|
update_document_content_maybe_archive_file.delay(document_id=doc.id)
|
||||||
else DocumentMetadataOverrides()
|
|
||||||
)
|
|
||||||
if user is not None:
|
|
||||||
overrides.owner_id = user.id
|
|
||||||
consume_file.delay(
|
|
||||||
ConsumableDocument(
|
|
||||||
source=DocumentSource.ConsumeFolder,
|
|
||||||
original_file=filepath,
|
|
||||||
head_version_id=doc.id,
|
|
||||||
),
|
|
||||||
overrides,
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
consume_tasks = []
|
consume_tasks = []
|
||||||
overrides = (
|
overrides = (
|
||||||
|
|||||||
@@ -14,51 +14,6 @@ from documents.classifier import DocumentClassifier
|
|||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
|
||||||
|
|
||||||
def _resolve_effective_doc(pk: int, request) -> Document | None:
|
|
||||||
"""
|
|
||||||
Resolve which Document row should be considered for caching keys:
|
|
||||||
- If a version is requested, use that version
|
|
||||||
- If pk is a head doc, use its newest child version if present, else the head.
|
|
||||||
- Else, pk is a version, use that version.
|
|
||||||
Returns None if resolution fails (treat as no-cache).
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
request_doc = Document.objects.only("id", "head_version_id").get(pk=pk)
|
|
||||||
except Document.DoesNotExist:
|
|
||||||
return None
|
|
||||||
|
|
||||||
head_doc = (
|
|
||||||
request_doc
|
|
||||||
if request_doc.head_version_id is None
|
|
||||||
else Document.objects.only("id").get(id=request_doc.head_version_id)
|
|
||||||
)
|
|
||||||
|
|
||||||
version_param = (
|
|
||||||
request.query_params.get("version")
|
|
||||||
if hasattr(request, "query_params")
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
if version_param:
|
|
||||||
try:
|
|
||||||
version_id = int(version_param)
|
|
||||||
candidate = Document.objects.only("id", "head_version_id").get(
|
|
||||||
id=version_id,
|
|
||||||
)
|
|
||||||
if candidate.id != head_doc.id and candidate.head_version_id != head_doc.id:
|
|
||||||
return None
|
|
||||||
return candidate
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Default behavior: if pk is a head doc, prefer its newest child version
|
|
||||||
if request_doc.head_version_id is None:
|
|
||||||
latest = head_doc.versions.only("id").order_by("id").last()
|
|
||||||
return latest or head_doc
|
|
||||||
|
|
||||||
# pk is already a version
|
|
||||||
return request_doc
|
|
||||||
|
|
||||||
|
|
||||||
def suggestions_etag(request, pk: int) -> str | None:
|
def suggestions_etag(request, pk: int) -> str | None:
|
||||||
"""
|
"""
|
||||||
Returns an optional string for the ETag, allowing browser caching of
|
Returns an optional string for the ETag, allowing browser caching of
|
||||||
@@ -116,10 +71,11 @@ def metadata_etag(request, pk: int) -> str | None:
|
|||||||
Metadata is extracted from the original file, so use its checksum as the
|
Metadata is extracted from the original file, so use its checksum as the
|
||||||
ETag
|
ETag
|
||||||
"""
|
"""
|
||||||
doc = _resolve_effective_doc(pk, request)
|
try:
|
||||||
if doc is None:
|
doc = Document.objects.only("checksum").get(pk=pk)
|
||||||
|
return doc.checksum
|
||||||
|
except Document.DoesNotExist: # pragma: no cover
|
||||||
return None
|
return None
|
||||||
return doc.checksum
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -129,10 +85,11 @@ def metadata_last_modified(request, pk: int) -> datetime | None:
|
|||||||
not the modification of the original file, but of the database object, but might as well
|
not the modification of the original file, but of the database object, but might as well
|
||||||
error on the side of more cautious
|
error on the side of more cautious
|
||||||
"""
|
"""
|
||||||
doc = _resolve_effective_doc(pk, request)
|
try:
|
||||||
if doc is None:
|
doc = Document.objects.only("modified").get(pk=pk)
|
||||||
|
return doc.modified
|
||||||
|
except Document.DoesNotExist: # pragma: no cover
|
||||||
return None
|
return None
|
||||||
return doc.modified
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -140,15 +97,15 @@ def preview_etag(request, pk: int) -> str | None:
|
|||||||
"""
|
"""
|
||||||
ETag for the document preview, using the original or archive checksum, depending on the request
|
ETag for the document preview, using the original or archive checksum, depending on the request
|
||||||
"""
|
"""
|
||||||
doc = _resolve_effective_doc(pk, request)
|
try:
|
||||||
if doc is None:
|
doc = Document.objects.only("checksum", "archive_checksum").get(pk=pk)
|
||||||
|
use_original = (
|
||||||
|
"original" in request.query_params
|
||||||
|
and request.query_params["original"] == "true"
|
||||||
|
)
|
||||||
|
return doc.checksum if use_original else doc.archive_checksum
|
||||||
|
except Document.DoesNotExist: # pragma: no cover
|
||||||
return None
|
return None
|
||||||
use_original = (
|
|
||||||
hasattr(request, "query_params")
|
|
||||||
and "original" in request.query_params
|
|
||||||
and request.query_params["original"] == "true"
|
|
||||||
)
|
|
||||||
return doc.checksum if use_original else doc.archive_checksum
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -157,10 +114,11 @@ def preview_last_modified(request, pk: int) -> datetime | None:
|
|||||||
Uses the documents modified time to set the Last-Modified header. Not strictly
|
Uses the documents modified time to set the Last-Modified header. Not strictly
|
||||||
speaking correct, but close enough and quick
|
speaking correct, but close enough and quick
|
||||||
"""
|
"""
|
||||||
doc = _resolve_effective_doc(pk, request)
|
try:
|
||||||
if doc is None:
|
doc = Document.objects.only("modified").get(pk=pk)
|
||||||
|
return doc.modified
|
||||||
|
except Document.DoesNotExist: # pragma: no cover
|
||||||
return None
|
return None
|
||||||
return doc.modified
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -170,13 +128,10 @@ def thumbnail_last_modified(request, pk: int) -> datetime | None:
|
|||||||
Cache should be (slightly?) faster than filesystem
|
Cache should be (slightly?) faster than filesystem
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
doc = _resolve_effective_doc(pk, request)
|
doc = Document.objects.only("storage_type").get(pk=pk)
|
||||||
if doc is None:
|
|
||||||
return None
|
|
||||||
if not doc.thumbnail_path.exists():
|
if not doc.thumbnail_path.exists():
|
||||||
return None
|
return None
|
||||||
# Use the effective document id for cache key
|
doc_key = get_thumbnail_modified_key(pk)
|
||||||
doc_key = get_thumbnail_modified_key(doc.id)
|
|
||||||
|
|
||||||
cache_hit = cache.get(doc_key)
|
cache_hit = cache.get(doc_key)
|
||||||
if cache_hit is not None:
|
if cache_hit is not None:
|
||||||
|
|||||||
@@ -113,12 +113,6 @@ class ConsumerPluginMixin:
|
|||||||
|
|
||||||
self.filename = self.metadata.filename or self.input_doc.original_file.name
|
self.filename = self.metadata.filename or self.input_doc.original_file.name
|
||||||
|
|
||||||
if input_doc.head_version_id:
|
|
||||||
self.log.debug(f"Document head version id: {input_doc.head_version_id}")
|
|
||||||
head_version = Document.objects.get(pk=input_doc.head_version_id)
|
|
||||||
version_index = head_version.versions.count()
|
|
||||||
self.filename += f"_v{version_index}"
|
|
||||||
|
|
||||||
def _send_progress(
|
def _send_progress(
|
||||||
self,
|
self,
|
||||||
current_progress: int,
|
current_progress: int,
|
||||||
@@ -476,44 +470,12 @@ class ConsumerPlugin(
|
|||||||
try:
|
try:
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
# store the document.
|
# store the document.
|
||||||
if self.input_doc.head_version_id:
|
document = self._store(
|
||||||
# If this is a new version of an existing document, we need
|
text=text,
|
||||||
# to make sure we're not creating a new document, but updating
|
date=date,
|
||||||
# the existing one.
|
page_count=page_count,
|
||||||
original_document = Document.objects.get(
|
mime_type=mime_type,
|
||||||
pk=self.input_doc.head_version_id,
|
)
|
||||||
)
|
|
||||||
self.log.debug("Saving record for updated version to database")
|
|
||||||
original_document.pk = None
|
|
||||||
original_document.head_version = Document.objects.get(
|
|
||||||
pk=self.input_doc.head_version_id,
|
|
||||||
)
|
|
||||||
file_for_checksum = (
|
|
||||||
self.unmodified_original
|
|
||||||
if self.unmodified_original is not None
|
|
||||||
else self.working_copy
|
|
||||||
)
|
|
||||||
original_document.checksum = hashlib.md5(
|
|
||||||
file_for_checksum.read_bytes(),
|
|
||||||
).hexdigest()
|
|
||||||
original_document.content = text
|
|
||||||
original_document.page_count = page_count
|
|
||||||
original_document.mime_type = mime_type
|
|
||||||
original_document.original_filename = self.filename
|
|
||||||
# Clear unique file path fields so they can be generated uniquely later
|
|
||||||
original_document.filename = None
|
|
||||||
original_document.archive_filename = None
|
|
||||||
original_document.archive_checksum = None
|
|
||||||
original_document.modified = timezone.now()
|
|
||||||
original_document.save()
|
|
||||||
document = original_document
|
|
||||||
else:
|
|
||||||
document = self._store(
|
|
||||||
text=text,
|
|
||||||
date=date,
|
|
||||||
page_count=page_count,
|
|
||||||
mime_type=mime_type,
|
|
||||||
)
|
|
||||||
|
|
||||||
# If we get here, it was successful. Proceed with post-consume
|
# If we get here, it was successful. Proceed with post-consume
|
||||||
# hooks. If they fail, nothing will get changed.
|
# hooks. If they fail, nothing will get changed.
|
||||||
|
|||||||
@@ -156,7 +156,6 @@ class ConsumableDocument:
|
|||||||
|
|
||||||
source: DocumentSource
|
source: DocumentSource
|
||||||
original_file: Path
|
original_file: Path
|
||||||
head_version_id: int | None = None
|
|
||||||
mailrule_id: int | None = None
|
mailrule_id: int | None = None
|
||||||
mime_type: str = dataclasses.field(init=False, default=None)
|
mime_type: str = dataclasses.field(init=False, default=None)
|
||||||
|
|
||||||
|
|||||||
@@ -82,6 +82,13 @@ def _is_ignored(filepath: Path) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def _consume(filepath: Path) -> None:
|
def _consume(filepath: Path) -> None:
|
||||||
|
# Check permissions early
|
||||||
|
try:
|
||||||
|
filepath.stat()
|
||||||
|
except (PermissionError, OSError):
|
||||||
|
logger.warning(f"Not consuming file {filepath}: Permission denied.")
|
||||||
|
return
|
||||||
|
|
||||||
if filepath.is_dir() or _is_ignored(filepath):
|
if filepath.is_dir() or _is_ignored(filepath):
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -323,7 +330,12 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
# Also make sure the file exists still, some scanners might write a
|
# Also make sure the file exists still, some scanners might write a
|
||||||
# temporary file first
|
# temporary file first
|
||||||
file_still_exists = filepath.exists() and filepath.is_file()
|
try:
|
||||||
|
file_still_exists = filepath.exists() and filepath.is_file()
|
||||||
|
except (PermissionError, OSError): # pragma: no cover
|
||||||
|
# If we can't check, let it fail in the _consume function
|
||||||
|
file_still_exists = True
|
||||||
|
continue
|
||||||
|
|
||||||
if waited_long_enough and file_still_exists:
|
if waited_long_enough and file_still_exists:
|
||||||
_consume(filepath)
|
_consume(filepath)
|
||||||
|
|||||||
@@ -1,26 +0,0 @@
|
|||||||
# Generated by Django 5.1.6 on 2025-02-26 17:08
|
|
||||||
|
|
||||||
import django.db.models.deletion
|
|
||||||
from django.db import migrations
|
|
||||||
from django.db import models
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
dependencies = [
|
|
||||||
("documents", "1071_tag_tn_ancestors_count_tag_tn_ancestors_pks_and_more"),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.AddField(
|
|
||||||
model_name="document",
|
|
||||||
name="head_version",
|
|
||||||
field=models.ForeignKey(
|
|
||||||
blank=True,
|
|
||||||
null=True,
|
|
||||||
on_delete=django.db.models.deletion.CASCADE,
|
|
||||||
related_name="versions",
|
|
||||||
to="documents.document",
|
|
||||||
verbose_name="head version of document",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
]
|
|
||||||
@@ -313,15 +313,6 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
head_version = models.ForeignKey(
|
|
||||||
"self",
|
|
||||||
blank=True,
|
|
||||||
null=True,
|
|
||||||
related_name="versions",
|
|
||||||
on_delete=models.CASCADE,
|
|
||||||
verbose_name=_("head version of document"),
|
|
||||||
)
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
ordering = ("-created",)
|
ordering = ("-created",)
|
||||||
verbose_name = _("document")
|
verbose_name = _("document")
|
||||||
|
|||||||
@@ -974,8 +974,6 @@ class DocumentSerializer(
|
|||||||
page_count = SerializerMethodField()
|
page_count = SerializerMethodField()
|
||||||
|
|
||||||
notes = NotesSerializer(many=True, required=False, read_only=True)
|
notes = NotesSerializer(many=True, required=False, read_only=True)
|
||||||
head_version = serializers.PrimaryKeyRelatedField(read_only=True)
|
|
||||||
versions = serializers.PrimaryKeyRelatedField(many=True, read_only=True)
|
|
||||||
|
|
||||||
custom_fields = CustomFieldInstanceSerializer(
|
custom_fields = CustomFieldInstanceSerializer(
|
||||||
many=True,
|
many=True,
|
||||||
@@ -1018,10 +1016,6 @@ class DocumentSerializer(
|
|||||||
request.version if request else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
|
request.version if request else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
|
||||||
)
|
)
|
||||||
|
|
||||||
if doc.get("versions") is not None:
|
|
||||||
doc["versions"] = sorted(doc["versions"], reverse=True)
|
|
||||||
doc["versions"].append(doc["id"])
|
|
||||||
|
|
||||||
if api_version < 9:
|
if api_version < 9:
|
||||||
# provide created as a datetime for backwards compatibility
|
# provide created as a datetime for backwards compatibility
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
@@ -1190,8 +1184,6 @@ class DocumentSerializer(
|
|||||||
"remove_inbox_tags",
|
"remove_inbox_tags",
|
||||||
"page_count",
|
"page_count",
|
||||||
"mime_type",
|
"mime_type",
|
||||||
"head_version",
|
|
||||||
"versions",
|
|
||||||
)
|
)
|
||||||
list_serializer_class = OwnedObjectListSerializer
|
list_serializer_class = OwnedObjectListSerializer
|
||||||
|
|
||||||
@@ -1875,15 +1867,6 @@ class PostDocumentSerializer(serializers.Serializer):
|
|||||||
return created.date()
|
return created.date()
|
||||||
|
|
||||||
|
|
||||||
class DocumentVersionSerializer(serializers.Serializer):
|
|
||||||
document = serializers.FileField(
|
|
||||||
label="Document",
|
|
||||||
write_only=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
validate_document = PostDocumentSerializer().validate_document
|
|
||||||
|
|
||||||
|
|
||||||
class BulkDownloadSerializer(DocumentListSerializer):
|
class BulkDownloadSerializer(DocumentListSerializer):
|
||||||
content = serializers.ChoiceField(
|
content = serializers.ChoiceField(
|
||||||
choices=["archive", "originals", "both"],
|
choices=["archive", "originals", "both"],
|
||||||
|
|||||||
@@ -145,17 +145,13 @@ def consume_file(
|
|||||||
if overrides is None:
|
if overrides is None:
|
||||||
overrides = DocumentMetadataOverrides()
|
overrides = DocumentMetadataOverrides()
|
||||||
|
|
||||||
plugins: list[type[ConsumeTaskPlugin]] = (
|
plugins: list[type[ConsumeTaskPlugin]] = [
|
||||||
[ConsumerPreflightPlugin, ConsumerPlugin]
|
ConsumerPreflightPlugin,
|
||||||
if input_doc.head_version_id is not None
|
CollatePlugin,
|
||||||
else [
|
BarcodePlugin,
|
||||||
ConsumerPreflightPlugin,
|
WorkflowTriggerPlugin,
|
||||||
CollatePlugin,
|
ConsumerPlugin,
|
||||||
BarcodePlugin,
|
]
|
||||||
WorkflowTriggerPlugin,
|
|
||||||
ConsumerPlugin,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
with (
|
with (
|
||||||
ProgressManager(
|
ProgressManager(
|
||||||
|
|||||||
@@ -787,8 +787,10 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
mock_consume_file.assert_not_called()
|
mock_consume_file.assert_not_called()
|
||||||
|
|
||||||
@mock.patch("documents.tasks.consume_file.delay")
|
@mock.patch("documents.tasks.bulk_update_documents.si")
|
||||||
def test_rotate(self, mock_consume_delay):
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
|
||||||
|
@mock.patch("celery.chord.delay")
|
||||||
|
def test_rotate(self, mock_chord, mock_update_document, mock_update_documents):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Existing documents
|
- Existing documents
|
||||||
@@ -799,22 +801,19 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
doc_ids = [self.doc1.id, self.doc2.id]
|
doc_ids = [self.doc1.id, self.doc2.id]
|
||||||
result = bulk_edit.rotate(doc_ids, 90)
|
result = bulk_edit.rotate(doc_ids, 90)
|
||||||
self.assertEqual(mock_consume_delay.call_count, 2)
|
self.assertEqual(mock_update_document.call_count, 2)
|
||||||
for call, expected_id in zip(
|
mock_update_documents.assert_called_once()
|
||||||
mock_consume_delay.call_args_list,
|
mock_chord.assert_called_once()
|
||||||
doc_ids,
|
|
||||||
):
|
|
||||||
consumable, overrides = call.args
|
|
||||||
self.assertEqual(consumable.head_version_id, expected_id)
|
|
||||||
self.assertIsNotNone(overrides)
|
|
||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
|
|
||||||
@mock.patch("documents.tasks.consume_file.delay")
|
@mock.patch("documents.tasks.bulk_update_documents.si")
|
||||||
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
|
||||||
@mock.patch("pikepdf.Pdf.save")
|
@mock.patch("pikepdf.Pdf.save")
|
||||||
def test_rotate_with_error(
|
def test_rotate_with_error(
|
||||||
self,
|
self,
|
||||||
mock_pdf_save,
|
mock_pdf_save,
|
||||||
mock_consume_delay,
|
mock_update_archive_file,
|
||||||
|
mock_update_documents,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -833,12 +832,16 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
error_str = cm.output[0]
|
error_str = cm.output[0]
|
||||||
expected_str = "Error rotating document"
|
expected_str = "Error rotating document"
|
||||||
self.assertIn(expected_str, error_str)
|
self.assertIn(expected_str, error_str)
|
||||||
mock_consume_delay.assert_not_called()
|
mock_update_archive_file.assert_not_called()
|
||||||
|
|
||||||
@mock.patch("documents.tasks.consume_file.delay")
|
@mock.patch("documents.tasks.bulk_update_documents.si")
|
||||||
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
|
||||||
|
@mock.patch("celery.chord.delay")
|
||||||
def test_rotate_non_pdf(
|
def test_rotate_non_pdf(
|
||||||
self,
|
self,
|
||||||
mock_consume_delay,
|
mock_chord,
|
||||||
|
mock_update_document,
|
||||||
|
mock_update_documents,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@@ -853,16 +856,14 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
output_str = cm.output[1]
|
output_str = cm.output[1]
|
||||||
expected_str = "Document 4 is not a PDF, skipping rotation"
|
expected_str = "Document 4 is not a PDF, skipping rotation"
|
||||||
self.assertIn(expected_str, output_str)
|
self.assertIn(expected_str, output_str)
|
||||||
self.assertEqual(mock_consume_delay.call_count, 1)
|
self.assertEqual(mock_update_document.call_count, 1)
|
||||||
consumable, overrides = mock_consume_delay.call_args[0]
|
mock_update_documents.assert_called_once()
|
||||||
self.assertEqual(consumable.head_version_id, self.doc2.id)
|
mock_chord.assert_called_once()
|
||||||
self.assertIsNotNone(overrides)
|
|
||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
|
|
||||||
@mock.patch("documents.tasks.consume_file.delay")
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
|
||||||
@mock.patch("pikepdf.Pdf.save")
|
@mock.patch("pikepdf.Pdf.save")
|
||||||
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
|
def test_delete_pages(self, mock_pdf_save, mock_update_archive_file):
|
||||||
def test_delete_pages(self, mock_magic, mock_pdf_save, mock_consume_delay):
|
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Existing documents
|
- Existing documents
|
||||||
@@ -870,22 +871,24 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
- Delete pages action is called with 1 document and 2 pages
|
- Delete pages action is called with 1 document and 2 pages
|
||||||
THEN:
|
THEN:
|
||||||
- Save should be called once
|
- Save should be called once
|
||||||
- A new version should be enqueued via consume_file
|
- Archive file should be updated once
|
||||||
|
- The document's page_count should be reduced by the number of deleted pages
|
||||||
"""
|
"""
|
||||||
doc_ids = [self.doc2.id]
|
doc_ids = [self.doc2.id]
|
||||||
|
initial_page_count = self.doc2.page_count
|
||||||
pages = [1, 3]
|
pages = [1, 3]
|
||||||
result = bulk_edit.delete_pages(doc_ids, pages)
|
result = bulk_edit.delete_pages(doc_ids, pages)
|
||||||
mock_pdf_save.assert_called_once()
|
mock_pdf_save.assert_called_once()
|
||||||
mock_consume_delay.assert_called_once()
|
mock_update_archive_file.assert_called_once()
|
||||||
consumable, overrides = mock_consume_delay.call_args[0]
|
|
||||||
self.assertEqual(consumable.head_version_id, self.doc2.id)
|
|
||||||
self.assertTrue(str(consumable.original_file).endswith("_pages_deleted.pdf"))
|
|
||||||
self.assertIsNotNone(overrides)
|
|
||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
|
|
||||||
@mock.patch("documents.tasks.consume_file.delay")
|
expected_page_count = initial_page_count - len(pages)
|
||||||
|
self.doc2.refresh_from_db()
|
||||||
|
self.assertEqual(self.doc2.page_count, expected_page_count)
|
||||||
|
|
||||||
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
|
||||||
@mock.patch("pikepdf.Pdf.save")
|
@mock.patch("pikepdf.Pdf.save")
|
||||||
def test_delete_pages_with_error(self, mock_pdf_save, mock_consume_delay):
|
def test_delete_pages_with_error(self, mock_pdf_save, mock_update_archive_file):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Existing documents
|
- Existing documents
|
||||||
@@ -894,7 +897,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
- PikePDF raises an error
|
- PikePDF raises an error
|
||||||
THEN:
|
THEN:
|
||||||
- Save should be called once
|
- Save should be called once
|
||||||
- No new version should be enqueued
|
- Archive file should not be updated
|
||||||
"""
|
"""
|
||||||
mock_pdf_save.side_effect = Exception("Error saving PDF")
|
mock_pdf_save.side_effect = Exception("Error saving PDF")
|
||||||
doc_ids = [self.doc2.id]
|
doc_ids = [self.doc2.id]
|
||||||
@@ -905,7 +908,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
error_str = cm.output[0]
|
error_str = cm.output[0]
|
||||||
expected_str = "Error deleting pages from document"
|
expected_str = "Error deleting pages from document"
|
||||||
self.assertIn(expected_str, error_str)
|
self.assertIn(expected_str, error_str)
|
||||||
mock_consume_delay.assert_not_called()
|
mock_update_archive_file.assert_not_called()
|
||||||
|
|
||||||
@mock.patch("documents.bulk_edit.group")
|
@mock.patch("documents.bulk_edit.group")
|
||||||
@mock.patch("documents.tasks.consume_file.s")
|
@mock.patch("documents.tasks.consume_file.s")
|
||||||
@@ -965,18 +968,21 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
mock_chord.assert_called_once()
|
mock_chord.assert_called_once()
|
||||||
|
|
||||||
@mock.patch("documents.tasks.consume_file.delay")
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
|
||||||
def test_edit_pdf_with_update_document(self, mock_consume_delay):
|
def test_edit_pdf_with_update_document(self, mock_update_document):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- A single existing PDF document
|
- A single existing PDF document
|
||||||
WHEN:
|
WHEN:
|
||||||
- edit_pdf is called with update_document=True and a single output
|
- edit_pdf is called with update_document=True and a single output
|
||||||
THEN:
|
THEN:
|
||||||
- A version update is enqueued targeting the existing document
|
- The original document is updated in-place
|
||||||
|
- The update_document_content_maybe_archive_file task is triggered
|
||||||
"""
|
"""
|
||||||
doc_ids = [self.doc2.id]
|
doc_ids = [self.doc2.id]
|
||||||
operations = [{"page": 1}, {"page": 2}]
|
operations = [{"page": 1}, {"page": 2}]
|
||||||
|
original_checksum = self.doc2.checksum
|
||||||
|
original_page_count = self.doc2.page_count
|
||||||
|
|
||||||
result = bulk_edit.edit_pdf(
|
result = bulk_edit.edit_pdf(
|
||||||
doc_ids,
|
doc_ids,
|
||||||
@@ -986,11 +992,10 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
mock_consume_delay.assert_called_once()
|
self.doc2.refresh_from_db()
|
||||||
consumable, overrides = mock_consume_delay.call_args[0]
|
self.assertNotEqual(self.doc2.checksum, original_checksum)
|
||||||
self.assertEqual(consumable.head_version_id, self.doc2.id)
|
self.assertNotEqual(self.doc2.page_count, original_page_count)
|
||||||
self.assertTrue(str(consumable.original_file).endswith("_edited.pdf"))
|
mock_update_document.assert_called_once_with(document_id=self.doc2.id)
|
||||||
self.assertIsNotNone(overrides)
|
|
||||||
|
|
||||||
@mock.patch("documents.bulk_edit.group")
|
@mock.patch("documents.bulk_edit.group")
|
||||||
@mock.patch("documents.tasks.consume_file.s")
|
@mock.patch("documents.tasks.consume_file.s")
|
||||||
|
|||||||
@@ -209,6 +209,26 @@ class TestConsumer(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
|
|||||||
# assert that we have an error logged with this invalid file.
|
# assert that we have an error logged with this invalid file.
|
||||||
error_logger.assert_called_once()
|
error_logger.assert_called_once()
|
||||||
|
|
||||||
|
@mock.patch("documents.management.commands.document_consumer.logger.warning")
|
||||||
|
def test_permission_error_on_prechecks(self, warning_logger):
|
||||||
|
filepath = Path(self.dirs.consumption_dir) / "selinux.txt"
|
||||||
|
filepath.touch()
|
||||||
|
|
||||||
|
original_stat = Path.stat
|
||||||
|
|
||||||
|
def raising_stat(self, *args, **kwargs):
|
||||||
|
if self == filepath:
|
||||||
|
raise PermissionError("Permission denied")
|
||||||
|
return original_stat(self, *args, **kwargs)
|
||||||
|
|
||||||
|
with mock.patch("pathlib.Path.stat", new=raising_stat):
|
||||||
|
document_consumer._consume(filepath)
|
||||||
|
|
||||||
|
warning_logger.assert_called_once()
|
||||||
|
(args, _) = warning_logger.call_args
|
||||||
|
self.assertIn("Permission denied", args[0])
|
||||||
|
self.consume_file_mock.assert_not_called()
|
||||||
|
|
||||||
@override_settings(CONSUMPTION_DIR="does_not_exist")
|
@override_settings(CONSUMPTION_DIR="does_not_exist")
|
||||||
def test_consumption_directory_invalid(self):
|
def test_consumption_directory_invalid(self):
|
||||||
self.assertRaises(CommandError, call_command, "document_consumer", "--oneshot")
|
self.assertRaises(CommandError, call_command, "document_consumer", "--oneshot")
|
||||||
|
|||||||
@@ -147,7 +147,6 @@ from documents.serialisers import CustomFieldSerializer
|
|||||||
from documents.serialisers import DocumentListSerializer
|
from documents.serialisers import DocumentListSerializer
|
||||||
from documents.serialisers import DocumentSerializer
|
from documents.serialisers import DocumentSerializer
|
||||||
from documents.serialisers import DocumentTypeSerializer
|
from documents.serialisers import DocumentTypeSerializer
|
||||||
from documents.serialisers import DocumentVersionSerializer
|
|
||||||
from documents.serialisers import NotesSerializer
|
from documents.serialisers import NotesSerializer
|
||||||
from documents.serialisers import PostDocumentSerializer
|
from documents.serialisers import PostDocumentSerializer
|
||||||
from documents.serialisers import RunTaskViewSerializer
|
from documents.serialisers import RunTaskViewSerializer
|
||||||
@@ -568,7 +567,7 @@ class DocumentViewSet(
|
|||||||
GenericViewSet,
|
GenericViewSet,
|
||||||
):
|
):
|
||||||
model = Document
|
model = Document
|
||||||
queryset = Document.objects.all()
|
queryset = Document.objects.annotate(num_notes=Count("notes"))
|
||||||
serializer_class = DocumentSerializer
|
serializer_class = DocumentSerializer
|
||||||
pagination_class = StandardPagination
|
pagination_class = StandardPagination
|
||||||
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
|
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
|
||||||
@@ -597,8 +596,7 @@ class DocumentViewSet(
|
|||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
return (
|
return (
|
||||||
Document.objects.filter(head_version__isnull=True)
|
Document.objects.distinct()
|
||||||
.distinct()
|
|
||||||
.order_by("-created")
|
.order_by("-created")
|
||||||
.annotate(num_notes=Count("notes"))
|
.annotate(num_notes=Count("notes"))
|
||||||
.select_related("correspondent", "storage_path", "document_type", "owner")
|
.select_related("correspondent", "storage_path", "document_type", "owner")
|
||||||
@@ -660,55 +658,18 @@ class DocumentViewSet(
|
|||||||
and request.query_params["original"] == "true"
|
and request.query_params["original"] == "true"
|
||||||
)
|
)
|
||||||
|
|
||||||
def _resolve_file_doc(self, head_doc: Document, request):
|
|
||||||
version_param = request.query_params.get("version")
|
|
||||||
if version_param:
|
|
||||||
try:
|
|
||||||
version_id = int(version_param)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
raise NotFound("Invalid version parameter")
|
|
||||||
try:
|
|
||||||
candidate = Document.global_objects.select_related("owner").get(
|
|
||||||
id=version_id,
|
|
||||||
)
|
|
||||||
except Document.DoesNotExist:
|
|
||||||
raise Http404
|
|
||||||
if candidate.id != head_doc.id and candidate.head_version_id != head_doc.id:
|
|
||||||
raise Http404
|
|
||||||
return candidate
|
|
||||||
latest = head_doc.versions.order_by("id").last()
|
|
||||||
return latest or head_doc
|
|
||||||
|
|
||||||
def file_response(self, pk, request, disposition):
|
def file_response(self, pk, request, disposition):
|
||||||
request_doc = Document.global_objects.select_related("owner").get(id=pk)
|
doc = Document.global_objects.select_related("owner").get(id=pk)
|
||||||
head_doc = (
|
|
||||||
request_doc
|
|
||||||
if request_doc.head_version_id is None
|
|
||||||
else Document.global_objects.select_related("owner").get(
|
|
||||||
id=request_doc.head_version_id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if request.user is not None and not has_perms_owner_aware(
|
if request.user is not None and not has_perms_owner_aware(
|
||||||
request.user,
|
request.user,
|
||||||
"view_document",
|
"view_document",
|
||||||
head_doc,
|
doc,
|
||||||
):
|
):
|
||||||
return HttpResponseForbidden("Insufficient permissions")
|
return HttpResponseForbidden("Insufficient permissions")
|
||||||
# If a version is explicitly requested, use it. Otherwise:
|
|
||||||
# - if pk is a head document: serve newest version
|
|
||||||
# - if pk is a version: serve that version
|
|
||||||
if "version" in request.query_params:
|
|
||||||
file_doc = self._resolve_file_doc(head_doc, request)
|
|
||||||
else:
|
|
||||||
file_doc = (
|
|
||||||
self._resolve_file_doc(head_doc, request)
|
|
||||||
if request_doc.head_version_id is None
|
|
||||||
else request_doc
|
|
||||||
)
|
|
||||||
return serve_file(
|
return serve_file(
|
||||||
doc=file_doc,
|
doc=doc,
|
||||||
use_archive=not self.original_requested(request)
|
use_archive=not self.original_requested(request)
|
||||||
and file_doc.has_archive_version,
|
and doc.has_archive_version,
|
||||||
disposition=disposition,
|
disposition=disposition,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -743,33 +704,16 @@ class DocumentViewSet(
|
|||||||
)
|
)
|
||||||
def metadata(self, request, pk=None):
|
def metadata(self, request, pk=None):
|
||||||
try:
|
try:
|
||||||
request_doc = Document.objects.select_related("owner").get(pk=pk)
|
doc = Document.objects.select_related("owner").get(pk=pk)
|
||||||
head_doc = (
|
|
||||||
request_doc
|
|
||||||
if request_doc.head_version_id is None
|
|
||||||
else Document.objects.select_related("owner").get(
|
|
||||||
id=request_doc.head_version_id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if request.user is not None and not has_perms_owner_aware(
|
if request.user is not None and not has_perms_owner_aware(
|
||||||
request.user,
|
request.user,
|
||||||
"view_document",
|
"view_document",
|
||||||
head_doc,
|
doc,
|
||||||
):
|
):
|
||||||
return HttpResponseForbidden("Insufficient permissions")
|
return HttpResponseForbidden("Insufficient permissions")
|
||||||
except Document.DoesNotExist:
|
except Document.DoesNotExist:
|
||||||
raise Http404
|
raise Http404
|
||||||
|
|
||||||
# Choose the effective document (newest version by default, or explicit via ?version=)
|
|
||||||
if "version" in request.query_params:
|
|
||||||
doc = self._resolve_file_doc(head_doc, request)
|
|
||||||
else:
|
|
||||||
doc = (
|
|
||||||
self._resolve_file_doc(head_doc, request)
|
|
||||||
if request_doc.head_version_id is None
|
|
||||||
else request_doc
|
|
||||||
)
|
|
||||||
|
|
||||||
document_cached_metadata = get_metadata_cache(doc.pk)
|
document_cached_metadata = get_metadata_cache(doc.pk)
|
||||||
|
|
||||||
archive_metadata = None
|
archive_metadata = None
|
||||||
@@ -871,36 +815,8 @@ class DocumentViewSet(
|
|||||||
)
|
)
|
||||||
def preview(self, request, pk=None):
|
def preview(self, request, pk=None):
|
||||||
try:
|
try:
|
||||||
request_doc = Document.objects.select_related("owner").get(id=pk)
|
response = self.file_response(pk, request, "inline")
|
||||||
head_doc = (
|
return response
|
||||||
request_doc
|
|
||||||
if request_doc.head_version_id is None
|
|
||||||
else Document.objects.select_related("owner").get(
|
|
||||||
id=request_doc.head_version_id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if request.user is not None and not has_perms_owner_aware(
|
|
||||||
request.user,
|
|
||||||
"view_document",
|
|
||||||
head_doc,
|
|
||||||
):
|
|
||||||
return HttpResponseForbidden("Insufficient permissions")
|
|
||||||
|
|
||||||
if "version" in request.query_params:
|
|
||||||
file_doc = self._resolve_file_doc(head_doc, request)
|
|
||||||
else:
|
|
||||||
file_doc = (
|
|
||||||
self._resolve_file_doc(head_doc, request)
|
|
||||||
if request_doc.head_version_id is None
|
|
||||||
else request_doc
|
|
||||||
)
|
|
||||||
|
|
||||||
return serve_file(
|
|
||||||
doc=file_doc,
|
|
||||||
use_archive=not self.original_requested(request)
|
|
||||||
and file_doc.has_archive_version,
|
|
||||||
disposition="inline",
|
|
||||||
)
|
|
||||||
except (FileNotFoundError, Document.DoesNotExist):
|
except (FileNotFoundError, Document.DoesNotExist):
|
||||||
raise Http404
|
raise Http404
|
||||||
|
|
||||||
@@ -909,32 +825,17 @@ class DocumentViewSet(
|
|||||||
@method_decorator(last_modified(thumbnail_last_modified))
|
@method_decorator(last_modified(thumbnail_last_modified))
|
||||||
def thumb(self, request, pk=None):
|
def thumb(self, request, pk=None):
|
||||||
try:
|
try:
|
||||||
request_doc = Document.objects.select_related("owner").get(id=pk)
|
doc = Document.objects.select_related("owner").get(id=pk)
|
||||||
head_doc = (
|
|
||||||
request_doc
|
|
||||||
if request_doc.head_version_id is None
|
|
||||||
else Document.objects.select_related("owner").get(
|
|
||||||
id=request_doc.head_version_id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if request.user is not None and not has_perms_owner_aware(
|
if request.user is not None and not has_perms_owner_aware(
|
||||||
request.user,
|
request.user,
|
||||||
"view_document",
|
"view_document",
|
||||||
head_doc,
|
doc,
|
||||||
):
|
):
|
||||||
return HttpResponseForbidden("Insufficient permissions")
|
return HttpResponseForbidden("Insufficient permissions")
|
||||||
if "version" in request.query_params:
|
if doc.storage_type == Document.STORAGE_TYPE_GPG:
|
||||||
file_doc = self._resolve_file_doc(head_doc, request)
|
handle = GnuPG.decrypted(doc.thumbnail_file)
|
||||||
else:
|
else:
|
||||||
file_doc = (
|
handle = doc.thumbnail_file
|
||||||
self._resolve_file_doc(head_doc, request)
|
|
||||||
if request_doc.head_version_id is None
|
|
||||||
else request_doc
|
|
||||||
)
|
|
||||||
if file_doc.storage_type == Document.STORAGE_TYPE_GPG:
|
|
||||||
handle = GnuPG.decrypted(file_doc.thumbnail_file)
|
|
||||||
else:
|
|
||||||
handle = file_doc.thumbnail_file
|
|
||||||
|
|
||||||
return HttpResponse(handle, content_type="image/webp")
|
return HttpResponse(handle, content_type="image/webp")
|
||||||
except (FileNotFoundError, Document.DoesNotExist):
|
except (FileNotFoundError, Document.DoesNotExist):
|
||||||
@@ -1202,56 +1103,6 @@ class DocumentViewSet(
|
|||||||
"Error emailing document, check logs for more detail.",
|
"Error emailing document, check logs for more detail.",
|
||||||
)
|
)
|
||||||
|
|
||||||
@action(methods=["post"], detail=True)
|
|
||||||
def update_version(self, request, pk=None):
|
|
||||||
serializer = DocumentVersionSerializer(data=request.data)
|
|
||||||
serializer.is_valid(raise_exception=True)
|
|
||||||
|
|
||||||
try:
|
|
||||||
doc = Document.objects.select_related("owner").get(pk=pk)
|
|
||||||
if request.user is not None and not has_perms_owner_aware(
|
|
||||||
request.user,
|
|
||||||
"change_document",
|
|
||||||
doc,
|
|
||||||
):
|
|
||||||
return HttpResponseForbidden("Insufficient permissions")
|
|
||||||
except Document.DoesNotExist:
|
|
||||||
raise Http404
|
|
||||||
|
|
||||||
try:
|
|
||||||
doc_name, doc_data = serializer.validated_data.get("document")
|
|
||||||
|
|
||||||
t = int(mktime(datetime.now().timetuple()))
|
|
||||||
|
|
||||||
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
temp_file_path = Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR)) / Path(
|
|
||||||
pathvalidate.sanitize_filename(doc_name),
|
|
||||||
)
|
|
||||||
|
|
||||||
temp_file_path.write_bytes(doc_data)
|
|
||||||
|
|
||||||
os.utime(temp_file_path, times=(t, t))
|
|
||||||
|
|
||||||
input_doc = ConsumableDocument(
|
|
||||||
source=DocumentSource.ApiUpload,
|
|
||||||
original_file=temp_file_path,
|
|
||||||
head_version_id=doc.pk,
|
|
||||||
)
|
|
||||||
|
|
||||||
async_task = consume_file.delay(
|
|
||||||
input_doc,
|
|
||||||
)
|
|
||||||
logger.debug(
|
|
||||||
f"Updated document {doc.id} with new version",
|
|
||||||
)
|
|
||||||
return Response(async_task.id)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"An error occurred updating document: {e!s}")
|
|
||||||
return HttpResponseServerError(
|
|
||||||
"Error updating document, check logs for more detail.",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@extend_schema_view(
|
@extend_schema_view(
|
||||||
list=extend_schema(
|
list=extend_schema(
|
||||||
|
|||||||
@@ -322,6 +322,7 @@ INSTALLED_APPS = [
|
|||||||
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
||||||
"paperless_text.apps.PaperlessTextConfig",
|
"paperless_text.apps.PaperlessTextConfig",
|
||||||
"paperless_mail.apps.PaperlessMailConfig",
|
"paperless_mail.apps.PaperlessMailConfig",
|
||||||
|
"paperless_remote.apps.PaperlessRemoteParserConfig",
|
||||||
"django.contrib.admin",
|
"django.contrib.admin",
|
||||||
"rest_framework",
|
"rest_framework",
|
||||||
"rest_framework.authtoken",
|
"rest_framework.authtoken",
|
||||||
@@ -922,7 +923,7 @@ CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]
|
|||||||
CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
|
CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
|
||||||
|
|
||||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
|
||||||
CELERY_BEAT_SCHEDULE_FILENAME = DATA_DIR / "celerybeat-schedule.db"
|
CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
|
||||||
|
|
||||||
|
|
||||||
# Cachalot: Database read cache.
|
# Cachalot: Database read cache.
|
||||||
@@ -1389,3 +1390,10 @@ WEBHOOKS_ALLOW_INTERNAL_REQUESTS = __get_boolean(
|
|||||||
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
|
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
|
||||||
"true",
|
"true",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Remote Parser #
|
||||||
|
###############################################################################
|
||||||
|
REMOTE_OCR_ENGINE = os.getenv("PAPERLESS_REMOTE_OCR_ENGINE")
|
||||||
|
REMOTE_OCR_API_KEY = os.getenv("PAPERLESS_REMOTE_OCR_API_KEY")
|
||||||
|
REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")
|
||||||
|
|||||||
4
src/paperless_remote/__init__.py
Normal file
4
src/paperless_remote/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
# this is here so that django finds the checks.
|
||||||
|
from paperless_remote.checks import check_remote_parser_configured
|
||||||
|
|
||||||
|
__all__ = ["check_remote_parser_configured"]
|
||||||
14
src/paperless_remote/apps.py
Normal file
14
src/paperless_remote/apps.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
from django.apps import AppConfig
|
||||||
|
|
||||||
|
from paperless_remote.signals import remote_consumer_declaration
|
||||||
|
|
||||||
|
|
||||||
|
class PaperlessRemoteParserConfig(AppConfig):
|
||||||
|
name = "paperless_remote"
|
||||||
|
|
||||||
|
def ready(self):
|
||||||
|
from documents.signals import document_consumer_declaration
|
||||||
|
|
||||||
|
document_consumer_declaration.connect(remote_consumer_declaration)
|
||||||
|
|
||||||
|
AppConfig.ready(self)
|
||||||
17
src/paperless_remote/checks.py
Normal file
17
src/paperless_remote/checks.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
from django.conf import settings
|
||||||
|
from django.core.checks import Error
|
||||||
|
from django.core.checks import register
|
||||||
|
|
||||||
|
|
||||||
|
@register()
|
||||||
|
def check_remote_parser_configured(app_configs, **kwargs):
|
||||||
|
if settings.REMOTE_OCR_ENGINE == "azureai" and not (
|
||||||
|
settings.REMOTE_OCR_ENDPOINT and settings.REMOTE_OCR_API_KEY
|
||||||
|
):
|
||||||
|
return [
|
||||||
|
Error(
|
||||||
|
"Azure AI remote parser requires endpoint and API key to be configured.",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
return []
|
||||||
113
src/paperless_remote/parsers.py
Normal file
113
src/paperless_remote/parsers.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||||
|
|
||||||
|
|
||||||
|
class RemoteEngineConfig:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
engine: str,
|
||||||
|
api_key: str | None = None,
|
||||||
|
endpoint: str | None = None,
|
||||||
|
):
|
||||||
|
self.engine = engine
|
||||||
|
self.api_key = api_key
|
||||||
|
self.endpoint = endpoint
|
||||||
|
|
||||||
|
def engine_is_valid(self):
|
||||||
|
valid = self.engine in ["azureai"] and self.api_key is not None
|
||||||
|
if self.engine == "azureai":
|
||||||
|
valid = valid and self.endpoint is not None
|
||||||
|
return valid
|
||||||
|
|
||||||
|
|
||||||
|
class RemoteDocumentParser(RasterisedDocumentParser):
|
||||||
|
"""
|
||||||
|
This parser uses a remote OCR engine to parse documents. Currently, it supports Azure AI Vision
|
||||||
|
as this is the only service that provides a remote OCR API with text-embedded PDF output.
|
||||||
|
"""
|
||||||
|
|
||||||
|
logging_name = "paperless.parsing.remote"
|
||||||
|
|
||||||
|
def get_settings(self) -> RemoteEngineConfig:
|
||||||
|
"""
|
||||||
|
Returns the configuration for the remote OCR engine, loaded from Django settings.
|
||||||
|
"""
|
||||||
|
return RemoteEngineConfig(
|
||||||
|
engine=settings.REMOTE_OCR_ENGINE,
|
||||||
|
api_key=settings.REMOTE_OCR_API_KEY,
|
||||||
|
endpoint=settings.REMOTE_OCR_ENDPOINT,
|
||||||
|
)
|
||||||
|
|
||||||
|
def supported_mime_types(self):
|
||||||
|
if self.settings.engine_is_valid():
|
||||||
|
return {
|
||||||
|
"application/pdf": ".pdf",
|
||||||
|
"image/png": ".png",
|
||||||
|
"image/jpeg": ".jpg",
|
||||||
|
"image/tiff": ".tiff",
|
||||||
|
"image/bmp": ".bmp",
|
||||||
|
"image/gif": ".gif",
|
||||||
|
"image/webp": ".webp",
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def azure_ai_vision_parse(
|
||||||
|
self,
|
||||||
|
file: Path,
|
||||||
|
) -> str | None:
|
||||||
|
"""
|
||||||
|
Uses Azure AI Vision to parse the document and return the text content.
|
||||||
|
It requests a searchable PDF output with embedded text.
|
||||||
|
The PDF is saved to the archive_path attribute.
|
||||||
|
Returns the text content extracted from the document.
|
||||||
|
If the parsing fails, it returns None.
|
||||||
|
"""
|
||||||
|
from azure.ai.documentintelligence import DocumentIntelligenceClient
|
||||||
|
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
|
||||||
|
from azure.ai.documentintelligence.models import AnalyzeOutputOption
|
||||||
|
from azure.ai.documentintelligence.models import DocumentContentFormat
|
||||||
|
from azure.core.credentials import AzureKeyCredential
|
||||||
|
|
||||||
|
client = DocumentIntelligenceClient(
|
||||||
|
endpoint=self.settings.endpoint,
|
||||||
|
credential=AzureKeyCredential(self.settings.api_key),
|
||||||
|
)
|
||||||
|
|
||||||
|
with file.open("rb") as f:
|
||||||
|
analyze_request = AnalyzeDocumentRequest(bytes_source=f.read())
|
||||||
|
poller = client.begin_analyze_document(
|
||||||
|
model_id="prebuilt-read",
|
||||||
|
body=analyze_request,
|
||||||
|
output_content_format=DocumentContentFormat.TEXT,
|
||||||
|
output=[AnalyzeOutputOption.PDF], # request searchable PDF output
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
|
||||||
|
poller.wait()
|
||||||
|
result_id = poller.details["operation_id"]
|
||||||
|
result = poller.result()
|
||||||
|
|
||||||
|
# Download the PDF with embedded text
|
||||||
|
self.archive_path = self.tempdir / "archive.pdf"
|
||||||
|
with self.archive_path.open("wb") as f:
|
||||||
|
for chunk in client.get_analyze_result_pdf(
|
||||||
|
model_id="prebuilt-read",
|
||||||
|
result_id=result_id,
|
||||||
|
):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
client.close()
|
||||||
|
return result.content
|
||||||
|
|
||||||
|
def parse(self, document_path: Path, mime_type, file_name=None):
|
||||||
|
if not self.settings.engine_is_valid():
|
||||||
|
self.log.warning(
|
||||||
|
"No valid remote parser engine is configured, content will be empty.",
|
||||||
|
)
|
||||||
|
self.text = ""
|
||||||
|
elif self.settings.engine == "azureai":
|
||||||
|
self.text = self.azure_ai_vision_parse(document_path)
|
||||||
18
src/paperless_remote/signals.py
Normal file
18
src/paperless_remote/signals.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
def get_parser(*args, **kwargs):
|
||||||
|
from paperless_remote.parsers import RemoteDocumentParser
|
||||||
|
|
||||||
|
return RemoteDocumentParser(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def get_supported_mime_types():
|
||||||
|
from paperless_remote.parsers import RemoteDocumentParser
|
||||||
|
|
||||||
|
return RemoteDocumentParser(None).supported_mime_types()
|
||||||
|
|
||||||
|
|
||||||
|
def remote_consumer_declaration(sender, **kwargs):
|
||||||
|
return {
|
||||||
|
"parser": get_parser,
|
||||||
|
"weight": 5,
|
||||||
|
"mime_types": get_supported_mime_types(),
|
||||||
|
}
|
||||||
0
src/paperless_remote/tests/__init__.py
Normal file
0
src/paperless_remote/tests/__init__.py
Normal file
BIN
src/paperless_remote/tests/samples/simple-digital.pdf
Normal file
BIN
src/paperless_remote/tests/samples/simple-digital.pdf
Normal file
Binary file not shown.
24
src/paperless_remote/tests/test_checks.py
Normal file
24
src/paperless_remote/tests/test_checks.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from django.test import override_settings
|
||||||
|
|
||||||
|
from paperless_remote import check_remote_parser_configured
|
||||||
|
|
||||||
|
|
||||||
|
class TestChecks(TestCase):
|
||||||
|
@override_settings(REMOTE_OCR_ENGINE=None)
|
||||||
|
def test_no_engine(self):
|
||||||
|
msgs = check_remote_parser_configured(None)
|
||||||
|
self.assertEqual(len(msgs), 0)
|
||||||
|
|
||||||
|
@override_settings(REMOTE_OCR_ENGINE="azureai")
|
||||||
|
@override_settings(REMOTE_OCR_API_KEY="somekey")
|
||||||
|
@override_settings(REMOTE_OCR_ENDPOINT=None)
|
||||||
|
def test_azure_no_endpoint(self):
|
||||||
|
msgs = check_remote_parser_configured(None)
|
||||||
|
self.assertEqual(len(msgs), 1)
|
||||||
|
self.assertTrue(
|
||||||
|
msgs[0].msg.startswith(
|
||||||
|
"Azure AI remote parser requires endpoint and API key to be configured.",
|
||||||
|
),
|
||||||
|
)
|
||||||
101
src/paperless_remote/tests/test_parser.py
Normal file
101
src/paperless_remote/tests/test_parser.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from django.test import TestCase
|
||||||
|
from django.test import override_settings
|
||||||
|
|
||||||
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
from documents.tests.utils import FileSystemAssertsMixin
|
||||||
|
from paperless_remote.parsers import RemoteDocumentParser
|
||||||
|
from paperless_remote.signals import get_parser
|
||||||
|
|
||||||
|
|
||||||
|
class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||||
|
SAMPLE_FILES = Path(__file__).resolve().parent / "samples"
|
||||||
|
|
||||||
|
def assertContainsStrings(self, content: str, strings: list[str]):
|
||||||
|
# Asserts that all strings appear in content, in the given order.
|
||||||
|
indices = []
|
||||||
|
for s in strings:
|
||||||
|
if s in content:
|
||||||
|
indices.append(content.index(s))
|
||||||
|
else:
|
||||||
|
self.fail(f"'{s}' is not in '{content}'")
|
||||||
|
self.assertListEqual(indices, sorted(indices))
|
||||||
|
|
||||||
|
@mock.patch("paperless_tesseract.parsers.run_subprocess")
|
||||||
|
@mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
|
||||||
|
def test_get_text_with_azure(self, mock_client_cls, mock_subprocess):
|
||||||
|
# Arrange mock Azure client
|
||||||
|
mock_client = mock.Mock()
|
||||||
|
mock_client_cls.return_value = mock_client
|
||||||
|
|
||||||
|
# Simulate poller result and its `.details`
|
||||||
|
mock_poller = mock.Mock()
|
||||||
|
mock_poller.wait.return_value = None
|
||||||
|
mock_poller.details = {"operation_id": "fake-op-id"}
|
||||||
|
mock_client.begin_analyze_document.return_value = mock_poller
|
||||||
|
mock_poller.result.return_value.content = "This is a test document."
|
||||||
|
|
||||||
|
# Return dummy PDF bytes
|
||||||
|
mock_client.get_analyze_result_pdf.return_value = [
|
||||||
|
b"%PDF-",
|
||||||
|
b"1.7 ",
|
||||||
|
b"FAKEPDF",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Simulate pdftotext by writing dummy text to sidecar file
|
||||||
|
def fake_run(cmd, *args, **kwargs):
|
||||||
|
with Path(cmd[-1]).open("w", encoding="utf-8") as f:
|
||||||
|
f.write("This is a test document.")
|
||||||
|
|
||||||
|
mock_subprocess.side_effect = fake_run
|
||||||
|
|
||||||
|
with override_settings(
|
||||||
|
REMOTE_OCR_ENGINE="azureai",
|
||||||
|
REMOTE_OCR_API_KEY="somekey",
|
||||||
|
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
|
||||||
|
):
|
||||||
|
parser = get_parser(uuid.uuid4())
|
||||||
|
parser.parse(
|
||||||
|
self.SAMPLE_FILES / "simple-digital.pdf",
|
||||||
|
"application/pdf",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertContainsStrings(
|
||||||
|
parser.text.strip(),
|
||||||
|
["This is a test document."],
|
||||||
|
)
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
REMOTE_OCR_ENGINE="azureai",
|
||||||
|
REMOTE_OCR_API_KEY="key",
|
||||||
|
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
|
||||||
|
)
|
||||||
|
def test_supported_mime_types_valid_config(self):
|
||||||
|
parser = RemoteDocumentParser(uuid.uuid4())
|
||||||
|
expected_types = {
|
||||||
|
"application/pdf": ".pdf",
|
||||||
|
"image/png": ".png",
|
||||||
|
"image/jpeg": ".jpg",
|
||||||
|
"image/tiff": ".tiff",
|
||||||
|
"image/bmp": ".bmp",
|
||||||
|
"image/gif": ".gif",
|
||||||
|
"image/webp": ".webp",
|
||||||
|
}
|
||||||
|
self.assertEqual(parser.supported_mime_types(), expected_types)
|
||||||
|
|
||||||
|
def test_supported_mime_types_invalid_config(self):
|
||||||
|
parser = get_parser(uuid.uuid4())
|
||||||
|
self.assertEqual(parser.supported_mime_types(), {})
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
REMOTE_OCR_ENGINE=None,
|
||||||
|
REMOTE_OCR_API_KEY=None,
|
||||||
|
REMOTE_OCR_ENDPOINT=None,
|
||||||
|
)
|
||||||
|
def test_parse_with_invalid_config(self):
|
||||||
|
parser = get_parser(uuid.uuid4())
|
||||||
|
parser.parse(self.SAMPLE_FILES / "simple-digital.pdf", "application/pdf")
|
||||||
|
self.assertEqual(parser.text, "")
|
||||||
39
uv.lock
generated
39
uv.lock
generated
@@ -95,6 +95,34 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/af/cc/55a32a2c98022d88812b5986d2a92c4ff3ee087e83b712ebc703bba452bf/Automat-24.8.1-py3-none-any.whl", hash = "sha256:bf029a7bc3da1e2c24da2343e7598affaa9f10bf0ab63ff808566ce90551e02a", size = 42585, upload-time = "2024-08-19T17:31:56.729Z" },
|
{ url = "https://files.pythonhosted.org/packages/af/cc/55a32a2c98022d88812b5986d2a92c4ff3ee087e83b712ebc703bba452bf/Automat-24.8.1-py3-none-any.whl", hash = "sha256:bf029a7bc3da1e2c24da2343e7598affaa9f10bf0ab63ff808566ce90551e02a", size = 42585, upload-time = "2024-08-19T17:31:56.729Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "azure-ai-documentintelligence"
|
||||||
|
version = "1.0.2"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "azure-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "isodate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/44/7b/8115cd713e2caa5e44def85f2b7ebd02a74ae74d7113ba20bdd41fd6dd80/azure_ai_documentintelligence-1.0.2.tar.gz", hash = "sha256:4d75a2513f2839365ebabc0e0e1772f5601b3a8c9a71e75da12440da13b63484", size = 170940 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/d9/75/c9ec040f23082f54ffb1977ff8f364c2d21c79a640a13d1c1809e7fd6b1a/azure_ai_documentintelligence-1.0.2-py3-none-any.whl", hash = "sha256:e1fb446abbdeccc9759d897898a0fe13141ed29f9ad11fc705f951925822ed59", size = 106005 },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "azure-core"
|
||||||
|
version = "1.33.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "six", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/75/aa/7c9db8edd626f1a7d99d09ef7926f6f4fb34d5f9fa00dc394afdfe8e2a80/azure_core-1.33.0.tar.gz", hash = "sha256:f367aa07b5e3005fec2c1e184b882b0b039910733907d001c20fb08ebb8c0eb9", size = 295633 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/07/b7/76b7e144aa53bd206bf1ce34fa75350472c3f69bf30e5c8c18bc9881035d/azure_core-1.33.0-py3-none-any.whl", hash = "sha256:9b5b6d0223a1d38c37500e6971118c1e0f13f54951e6893968b38910bc9cda8f", size = 207071 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "babel"
|
name = "babel"
|
||||||
version = "2.17.0"
|
version = "2.17.0"
|
||||||
@@ -1412,6 +1440,15 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/c7/fc/4e5a141c3f7c7bed550ac1f69e599e92b6be449dd4677ec09f325cad0955/inotifyrecursive-0.3.5-py3-none-any.whl", hash = "sha256:7e5f4a2e1dc2bef0efa3b5f6b339c41fb4599055a2b54909d020e9e932cc8d2f", size = 8009, upload-time = "2020-11-20T12:38:46.981Z" },
|
{ url = "https://files.pythonhosted.org/packages/c7/fc/4e5a141c3f7c7bed550ac1f69e599e92b6be449dd4677ec09f325cad0955/inotifyrecursive-0.3.5-py3-none-any.whl", hash = "sha256:7e5f4a2e1dc2bef0efa3b5f6b339c41fb4599055a2b54909d020e9e932cc8d2f", size = 8009, upload-time = "2020-11-20T12:38:46.981Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "isodate"
|
||||||
|
version = "0.7.2"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jinja2"
|
name = "jinja2"
|
||||||
version = "3.1.6"
|
version = "3.1.6"
|
||||||
@@ -2032,6 +2069,7 @@ name = "paperless-ngx"
|
|||||||
version = "2.18.4"
|
version = "2.18.4"
|
||||||
source = { virtual = "." }
|
source = { virtual = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
{ name = "azure-ai-documentintelligence", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "bleach", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "bleach", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "celery", extra = ["redis"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "celery", extra = ["redis"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
@@ -2169,6 +2207,7 @@ typing = [
|
|||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
|
{ name = "azure-ai-documentintelligence", specifier = ">=1.0.2" },
|
||||||
{ name = "babel", specifier = ">=2.17" },
|
{ name = "babel", specifier = ">=2.17" },
|
||||||
{ name = "bleach", specifier = "~=6.2.0" },
|
{ name = "bleach", specifier = "~=6.2.0" },
|
||||||
{ name = "celery", extras = ["redis"], specifier = "~=5.5.1" },
|
{ name = "celery", extras = ["redis"], specifier = "~=5.5.1" },
|
||||||
|
|||||||
Reference in New Issue
Block a user