Merge branch 'dev' into feature-permissions

This commit is contained in:
shamoon 2023-02-16 20:44:51 -08:00
commit 3559e27cdd
47 changed files with 8045 additions and 7686 deletions

View File

@ -7,6 +7,7 @@ import subprocess
from argparse import ArgumentParser
from typing import Dict
from typing import Final
from typing import Iterator
from typing import List
from typing import Optional
@ -15,16 +16,17 @@ from github import ContainerPackage
from github import GithubBranchApi
from github import GithubContainerRegistryApi
import docker
logger = logging.getLogger("cleanup-tags")
class DockerManifest2:
class ImageProperties:
"""
Data class wrapping the Docker Image Manifest Version 2.
Data class wrapping the properties of an entry in the image index
manifests list. It is NOT an actual image with layers, etc
See https://docs.docker.com/registry/spec/manifest-v2-2/
https://docs.docker.com/registry/spec/manifest-v2-2/
https://github.com/opencontainers/image-spec/blob/main/manifest.md
https://github.com/opencontainers/image-spec/blob/main/descriptor.md
"""
def __init__(self, data: Dict) -> None:
@ -41,6 +43,45 @@ class DockerManifest2:
self.platform = f"{platform_data_os}/{platform_arch}{platform_variant}"
class ImageIndex:
"""
Data class wrapping up logic for an OCI Image Index
JSON data. Primary use is to access the manifests listing
See https://github.com/opencontainers/image-spec/blob/main/image-index.md
"""
def __init__(self, package_url: str, tag: str) -> None:
self.qualified_name = f"{package_url}:{tag}"
logger.info(f"Getting image index for {self.qualified_name}")
try:
proc = subprocess.run(
[
shutil.which("docker"),
"buildx",
"imagetools",
"inspect",
"--raw",
self.qualified_name,
],
capture_output=True,
check=True,
)
self._data = json.loads(proc.stdout)
except subprocess.CalledProcessError as e:
logger.error(
f"Failed to get image index for {self.qualified_name}: {e.stderr}",
)
raise e
@property
def image_pointers(self) -> Iterator[ImageProperties]:
for manifest_data in self._data["manifests"]:
yield ImageProperties(manifest_data)
class RegistryTagsCleaner:
"""
This is the base class for the image registry cleaning. Given a package
@ -87,7 +128,10 @@ class RegistryTagsCleaner:
def clean(self):
"""
This method will delete image versions, based on the selected tags to delete
This method will delete image versions, based on the selected tags to delete.
It behaves more like an unlinking than actual deletion. Removing the tag
simply removes a pointer to an image, but the actual image data remains accessible
if one has the sha256 digest of it.
"""
for tag_to_delete in self.tags_to_delete:
package_version_info = self.all_pkgs_tags_to_version[tag_to_delete]
@ -151,31 +195,17 @@ class RegistryTagsCleaner:
# Parse manifests to locate digests pointed to
for tag in sorted(self.tags_to_keep):
full_name = f"ghcr.io/{self.repo_owner}/{self.package_name}:{tag}"
logger.info(f"Checking manifest for {full_name}")
# TODO: It would be nice to use RegistryData from docker
# except the ID doesn't map to anything in the manifest
try:
proc = subprocess.run(
[
shutil.which("docker"),
"buildx",
"imagetools",
"inspect",
"--raw",
full_name,
],
capture_output=True,
image_index = ImageIndex(
f"ghcr.io/{self.repo_owner}/{self.package_name}",
tag,
)
manifest_list = json.loads(proc.stdout)
for manifest_data in manifest_list["manifests"]:
manifest = DockerManifest2(manifest_data)
for manifest in image_index.image_pointers:
if manifest.digest in untagged_versions:
logger.info(
f"Skipping deletion of {manifest.digest},"
f" referred to by {full_name}"
f" referred to by {image_index.qualified_name}"
f" for {manifest.platform}",
)
del untagged_versions[manifest.digest]
@ -247,64 +277,54 @@ class RegistryTagsCleaner:
# By default, keep anything which is tagged
self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys()))
def check_tags_pull(self):
def check_remaining_tags_valid(self):
"""
This method uses the Docker Python SDK to confirm all tags which were
kept still pull, for all platforms.
Checks the non-deleted tags are still valid. The assumption is if the
manifest is can be inspected and each image manifest if points to can be
inspected, the image will still pull.
TODO: This is much slower (although more comprehensive). Maybe a Pool?
https://github.com/opencontainers/image-spec/blob/main/image-index.md
"""
logger.info("Beginning confirmation step")
client = docker.from_env()
imgs = []
a_tag_failed = False
for tag in sorted(self.tags_to_keep):
repository = f"ghcr.io/{self.repo_owner}/{self.package_name}"
for arch, variant in [("amd64", None), ("arm64", None), ("arm", "v7")]:
# From 11.2.0 onwards, qpdf is cross compiled, so there is a single arch, amd64
# skip others in this case
if "qpdf" in self.package_name and arch != "amd64" and tag == "11.2.0":
continue
# Skip beta and release candidate tags
elif "beta" in tag:
continue
# Build the platform name
if variant is not None:
platform = f"linux/{arch}/{variant}"
else:
platform = f"linux/{arch}"
try:
image_index = ImageIndex(
f"ghcr.io/{self.repo_owner}/{self.package_name}",
tag,
)
for manifest in image_index.image_pointers:
logger.info(f"Checking {manifest.digest} for {manifest.platform}")
try:
logger.info(f"Pulling {repository}:{tag} for {platform}")
image = client.images.pull(
repository=repository,
tag=tag,
platform=platform,
)
imgs.append(image)
except docker.errors.APIError as e:
logger.error(
f"Failed to pull {repository}:{tag}: {e}",
)
# This follows the pointer from the index to an actual image, layers and all
# Note the format is @
digest_name = f"ghcr.io/{self.repo_owner}/{self.package_name}@{manifest.digest}"
# Prevent out of space errors by removing after a few
# pulls
if len(imgs) > 50:
for image in imgs:
try:
client.images.remove(image.id)
except docker.errors.APIError as e:
err_str = str(e)
# Ignore attempts to remove images that are partly shared
# Ignore images which are somehow gone already
if (
"must be forced" not in err_str
and "No such image" not in err_str
):
logger.error(
f"Remove image ghcr.io/{self.repo_owner}/{self.package_name}:{tag} failed: {e}",
)
imgs = []
subprocess.run(
[
shutil.which("docker"),
"buildx",
"imagetools",
"inspect",
"--raw",
digest_name,
],
capture_output=True,
check=True,
)
except subprocess.CalledProcessError as e:
logger.error(f"Failed to inspect digest: {e.stderr}")
a_tag_failed = True
except subprocess.CalledProcessError as e:
a_tag_failed = True
logger.error(f"Failed to inspect: {e.stderr}")
continue
if a_tag_failed:
raise Exception("At least one image tag failed to inspect")
class MainImageTagsCleaner(RegistryTagsCleaner):
@ -366,7 +386,7 @@ class MainImageTagsCleaner(RegistryTagsCleaner):
class LibraryTagsCleaner(RegistryTagsCleaner):
"""
Exists for the off change that someday, the installer library images
Exists for the off chance that someday, the installer library images
will need their own logic
"""
@ -464,7 +484,7 @@ def _main():
# Verify remaining tags still pull
if args.is_manifest:
cleaner.check_tags_pull()
cleaner.check_remaining_tags_valid()
if __name__ == "__main__":

View File

@ -6,7 +6,7 @@
version: "3.7"
services:
gotenberg:
image: docker.io/gotenberg/gotenberg:7.6
image: docker.io/gotenberg/gotenberg:7.8
hostname: gotenberg
container_name: gotenberg
network_mode: host

View File

@ -83,7 +83,7 @@ services:
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
gotenberg:
image: docker.io/gotenberg/gotenberg:7.6
image: docker.io/gotenberg/gotenberg:7.8
restart: unless-stopped
# The gotenberg chromium route is used to convert .eml files. We do not
# want to allow external content like tracking pixels or even javascript.

View File

@ -77,7 +77,7 @@ services:
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
gotenberg:
image: docker.io/gotenberg/gotenberg:7.6
image: docker.io/gotenberg/gotenberg:7.8
restart: unless-stopped
# The gotenberg chromium route is used to convert .eml files. We do not

View File

@ -65,7 +65,7 @@ services:
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
gotenberg:
image: docker.io/gotenberg/gotenberg:7.6
image: docker.io/gotenberg/gotenberg:7.8
restart: unless-stopped
# The gotenberg chromium route is used to convert .eml files. We do not

View File

@ -501,3 +501,9 @@ You can also set the default for new tables (this does NOT affect
existing tables) with:
`ALTER DATABASE <db_name> CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;`
!!! warning
Using mariadb version 10.4+ is recommended. Using the `utf8mb3` character set on
an older system may fix issues that can arise while setting up Paperless-ngx but
`utf8mb3` can cause issues with consumption (where `utf8mb4` does not).

View File

@ -1,12 +1,83 @@
# Changelog
## paperless-ngx 1.12.1
## paperless-ngx 1.13.0
### Features
- Feature: allow disable warn on close saved view with changes [@shamoon](https://github.com/shamoon) ([#2681](https://github.com/paperless-ngx/paperless-ngx/pull/2681))
- Feature: Add option to enable response compression [@stumpylog](https://github.com/stumpylog) ([#2621](https://github.com/paperless-ngx/paperless-ngx/pull/2621))
- Feature: split documents on ASN barcode [@muued](https://github.com/muued) ([#2554](https://github.com/paperless-ngx/paperless-ngx/pull/2554))
### Bug Fixes
- Fix: Ignore path filtering didn't handle sub directories [@stumpylog](https://github.com/stumpylog) ([#2674](https://github.com/paperless-ngx/paperless-ngx/pull/2674))
- Bugfix: Generation of secret key hangs during install script [@stumpylog](https://github.com/stumpylog) ([#2657](https://github.com/paperless-ngx/paperless-ngx/pull/2657))
- Fix: Remove files produced by barcode splitting when completed [@stumpylog](https://github.com/stumpylog) ([#2648](https://github.com/paperless-ngx/paperless-ngx/pull/2648))
- Fix: add missing storage path placeholders [@shamoon](https://github.com/shamoon) ([#2651](https://github.com/paperless-ngx/paperless-ngx/pull/2651))
- Fix long dropdown contents break document detail column view [@shamoon](https://github.com/shamoon) ([#2638](https://github.com/paperless-ngx/paperless-ngx/pull/2638))
- Fix: tags dropdown should stay closed when removing [@shamoon](https://github.com/shamoon) ([#2625](https://github.com/paperless-ngx/paperless-ngx/pull/2625))
- Bugfix: Configure scheduled tasks to expire after some time [@stumpylog](https://github.com/stumpylog) ([#2614](https://github.com/paperless-ngx/paperless-ngx/pull/2614))
- Bugfix: Limit management list pagination maxSize to 5 [@Kaaybi](https://github.com/Kaaybi) ([#2618](https://github.com/paperless-ngx/paperless-ngx/pull/2618))
- Fix: Don't crash on bad ASNs during indexing [@stumpylog](https://github.com/stumpylog) ([#2586](https://github.com/paperless-ngx/paperless-ngx/pull/2586))
- Fix: Prevent mktime OverflowError except in even more rare caes [@stumpylog](https://github.com/stumpylog) ([#2574](https://github.com/paperless-ngx/paperless-ngx/pull/2574))
- Bugfix: Whoosh relative date queries weren't handling timezones [@stumpylog](https://github.com/stumpylog) ([#2566](https://github.com/paperless-ngx/paperless-ngx/pull/2566))
- Fix importing files with non-ascii names [@Kexogg](https://github.com/Kexogg) ([#2555](https://github.com/paperless-ngx/paperless-ngx/pull/2555))
### Documentation
- Chore: update recommended Gotenberg to 7.8, docs note possible incompatibility [@shamoon](https://github.com/shamoon) ([#2608](https://github.com/paperless-ngx/paperless-ngx/pull/2608))
- [Documentation] Add v1.12.2 changelog [@github-actions](https://github.com/github-actions) ([#2553](https://github.com/paperless-ngx/paperless-ngx/pull/2553))
### Maintenance
- Chore: Faster Docker image cleanup [@stumpylog](https://github.com/stumpylog) ([#2687](https://github.com/paperless-ngx/paperless-ngx/pull/2687))
- Chore: Remove duplicated folder [@stumpylog](https://github.com/stumpylog) ([#2561](https://github.com/paperless-ngx/paperless-ngx/pull/2561))
- Chore: Switch test coverage to Codecov [@stumpylog](https://github.com/stumpylog) ([#2582](https://github.com/paperless-ngx/paperless-ngx/pull/2582))
- Bump docker/build-push-action from 3 to 4 [@dependabot](https://github.com/dependabot) ([#2576](https://github.com/paperless-ngx/paperless-ngx/pull/2576))
- Chore: Run tests which require convert in the CI [@stumpylog](https://github.com/stumpylog) ([#2570](https://github.com/paperless-ngx/paperless-ngx/pull/2570))
- Feature: split documents on ASN barcode [@muued](https://github.com/muued) ([#2554](https://github.com/paperless-ngx/paperless-ngx/pull/2554))
- Bugfix: Whoosh relative date queries weren't handling timezones [@stumpylog](https://github.com/stumpylog) ([#2566](https://github.com/paperless-ngx/paperless-ngx/pull/2566))
- Fix importing files with non-ascii names [@Kexogg](https://github.com/Kexogg) ([#2555](https://github.com/paperless-ngx/paperless-ngx/pull/2555))
## paperless-ngx 1.12.2
_Note: Version 1.12.x introduced searching of comments which will work for comments added after the upgrade but a reindex of the search index is required in order to be able to search
older comments. The Docker image will automatically perform this reindex, bare metal installations will have to perform this manually, see [the docs](https://docs.paperless-ngx.com/administration/#index)._
### Bug Fixes
- Bugfix: Allow pre-consume scripts to modify incoming file [@stumpylog](https://github.com/stumpylog) ([#2547](https://github.com/paperless-ngx/paperless-ngx/pull/2547))
- Bugfix: Return to page based barcode scanning [@stumpylog](https://github.com/stumpylog) ([#2544](https://github.com/paperless-ngx/paperless-ngx/pull/2544))
- Fix: Try to prevent title debounce overwriting [@shamoon](https://github.com/shamoon) ([#2543](https://github.com/paperless-ngx/paperless-ngx/pull/2543))
- Fix comment search highlight + multi-word search [@shamoon](https://github.com/shamoon) ([#2542](https://github.com/paperless-ngx/paperless-ngx/pull/2542))
- Bugfix: Request PDF/A format from Gotenberg [@stumpylog](https://github.com/stumpylog) ([#2530](https://github.com/paperless-ngx/paperless-ngx/pull/2530))
- Fix: Trigger reindex for pre-existing comments [@shamoon](https://github.com/shamoon) ([#2519](https://github.com/paperless-ngx/paperless-ngx/pull/2519))
### Documentation
- Bugfix: Allow pre-consume scripts to modify incoming file [@stumpylog](https://github.com/stumpylog) ([#2547](https://github.com/paperless-ngx/paperless-ngx/pull/2547))
- Fix: Trigger reindex for pre-existing comments [@shamoon](https://github.com/shamoon) ([#2519](https://github.com/paperless-ngx/paperless-ngx/pull/2519))
- Minor updates to development documentation [@clemensrieder](https://github.com/clemensrieder) ([#2474](https://github.com/paperless-ngx/paperless-ngx/pull/2474))
- [Documentation] Add v1.12.1 changelog [@github-actions](https://github.com/github-actions) ([#2515](https://github.com/paperless-ngx/paperless-ngx/pull/2515))
### Maintenance
- Chore: Fix tag cleaner to work with attestations [@stumpylog](https://github.com/stumpylog) ([#2532](https://github.com/paperless-ngx/paperless-ngx/pull/2532))
- Chore: Make installers statically versioned [@stumpylog](https://github.com/stumpylog) ([#2517](https://github.com/paperless-ngx/paperless-ngx/pull/2517))
### All App Changes
- Bugfix: Allow pre-consume scripts to modify incoming file [@stumpylog](https://github.com/stumpylog) ([#2547](https://github.com/paperless-ngx/paperless-ngx/pull/2547))
- Bugfix: Return to page based barcode scanning [@stumpylog](https://github.com/stumpylog) ([#2544](https://github.com/paperless-ngx/paperless-ngx/pull/2544))
- Fix: Try to prevent title debounce overwriting [@shamoon](https://github.com/shamoon) ([#2543](https://github.com/paperless-ngx/paperless-ngx/pull/2543))
- Fix comment search highlight + multi-word search [@shamoon](https://github.com/shamoon) ([#2542](https://github.com/paperless-ngx/paperless-ngx/pull/2542))
- Bugfix: Request PDF/A format from Gotenberg [@stumpylog](https://github.com/stumpylog) ([#2530](https://github.com/paperless-ngx/paperless-ngx/pull/2530))
## paperless-ngx 1.12.1
### Bug Fixes
- Fix: comments not showing in search until after manual reindex in v1.12 [@shamoon](https://github.com/shamoon) ([#2513](https://github.com/paperless-ngx/paperless-ngx/pull/2513))
- Fix: date range search broken in 1.12 [@shamoon](https://github.com/shamoon) ([#2509](https://github.com/paperless-ngx/paperless-ngx/pull/2509))

View File

@ -626,7 +626,7 @@ services:
# ...
gotenberg:
image: gotenberg/gotenberg:7.6
image: gotenberg/gotenberg:7.8
restart: unless-stopped
# The gotenberg chromium route is used to convert .eml files. We do not
# want to allow external content like tracking pixels or even javascript.
@ -999,13 +999,20 @@ within your documents.
`PAPERLESS_CONSUMER_IGNORE_PATTERNS=<json>`
: By default, paperless ignores certain files and folders in the
consumption directory, such as system files created by the Mac OS.
consumption directory, such as system files created by the Mac OS
or hidden folders some tools use to store data.
This can be adjusted by configuring a custom json array with
patterns to exclude.
For example, `.DS_STORE/*` will ignore any files found in a folder
named `.DS_STORE`, including `.DS_STORE/bar.pdf` and `foo/.DS_STORE/bar.pdf`
A pattern like `._*` will ignore anything starting with `._`, including:
`._foo.pdf` and `._bar/foo.pdf`
Defaults to
`[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]`.
`[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini", "@eaDir/*"]`.
## Binaries

View File

@ -708,6 +708,12 @@ below use PostgreSQL, but are applicable to MySQL/MariaDB with the
MySQL also enforces limits on maximum lengths, but does so differently than
PostgreSQL. It may not be possible to migrate to MySQL due to this.
!!! warning
Using mariadb version 10.4+ is recommended. Using the `utf8mb3` character set on
an older system may fix issues that can arise while setting up Paperless-ngx but
`utf8mb3` can cause issues with consumption (where `utf8mb4` does not).
1. Stop paperless, if it is running.
2. Tell paperless to use PostgreSQL:

View File

@ -321,7 +321,7 @@ fi
wget "https://raw.githubusercontent.com/paperless-ngx/paperless-ngx/main/docker/compose/docker-compose.$DOCKER_COMPOSE_VERSION.yml" -O docker-compose.yml
wget "https://raw.githubusercontent.com/paperless-ngx/paperless-ngx/main/docker/compose/.env" -O .env
SECRET_KEY=$(tr -dc 'a-zA-Z0-9' < /dev/urandom | fold -w 64 | head -n 1)
SECRET_KEY=$(tr --delete --complement 'a-zA-Z0-9' < /dev/urandom 2>/dev/null | head --bytes 64)
DEFAULT_LANGUAGES=("deu eng fra ita spa")

View File

@ -2,5 +2,5 @@
docker run -p 5432:5432 -e POSTGRES_PASSWORD=password -v paperless_pgdata:/var/lib/postgresql/data -d postgres:13
docker run -d -p 6379:6379 redis:latest
docker run -p 3000:3000 -d gotenberg/gotenberg:7.6 gotenberg --chromium-disable-javascript=true --chromium-allow-list="file:///tmp/.*"
docker run -p 3000:3000 -d gotenberg/gotenberg:7.8 gotenberg --chromium-disable-javascript=true --chromium-allow-list="file:///tmp/.*"
docker run -p 9998:9998 -d ghcr.io/paperless-ngx/tika:latest

File diff suppressed because it is too large Load Diff

View File

@ -189,6 +189,14 @@
<a ngbNavLink i18n>Saved views</a>
<ng-template ngbNavContent>
<h4 i18n>Settings</h4>
<div class="row mb-3">
<div class="offset-md-3 col">
<app-input-check i18n-title title="Show warning when closing saved views with unsaved changes" formControlName="savedViewsWarnOnUnsavedChange"></app-input-check>
</div>
</div>
<h4 i18n>Views</h4>
<div formGroupName="savedViews">
<div *ngFor="let view of savedViews" [formGroupName]="view.id" class="row">

View File

@ -95,6 +95,7 @@ export class SettingsComponent
usersGroup: this.usersGroup,
groupsGroup: this.groupsGroup,
savedViewsWarnOnUnsavedChange: new FormControl(null),
savedViews: this.savedViewGroup,
mailAccounts: this.mailAccountGroup,
@ -211,6 +212,9 @@ export class SettingsComponent
notificationsConsumerSuppressOnDashboard: this.settings.get(
SETTINGS_KEYS.NOTIFICATIONS_CONSUMER_SUPPRESS_ON_DASHBOARD
),
savedViewsWarnOnUnsavedChange: this.settings.get(
SETTINGS_KEYS.SAVED_VIEWS_WARN_ON_UNSAVED_CHANGE
),
usersGroup: {},
groupsGroup: {},
savedViews: {},
@ -536,6 +540,10 @@ export class SettingsComponent
SETTINGS_KEYS.UPDATE_CHECKING_ENABLED,
this.settingsForm.value.updateCheckingEnabled
)
this.settings.set(
SETTINGS_KEYS.SAVED_VIEWS_WARN_ON_UNSAVED_CHANGE,
this.settingsForm.value.savedViewsWarnOnUnsavedChange
)
this.settings.setLanguage(this.settingsForm.value.displayLanguage)
this.settings
.storeSettings()

View File

@ -38,6 +38,8 @@ export const SETTINGS_KEYS = {
UPDATE_CHECKING_ENABLED: 'general-settings:update-checking:enabled',
UPDATE_CHECKING_BACKEND_SETTING:
'general-settings:update-checking:backend-setting',
SAVED_VIEWS_WARN_ON_UNSAVED_CHANGE:
'general-settings:saved-views:warn-on-unsaved-change',
}
export const SETTINGS: PaperlessUiSetting[] = [
@ -136,4 +138,9 @@ export const SETTINGS: PaperlessUiSetting[] = [
type: 'string',
default: '',
},
{
key: SETTINGS_KEYS.SAVED_VIEWS_WARN_ON_UNSAVED_CHANGE,
type: 'boolean',
default: true,
},
]

View File

@ -4,17 +4,25 @@ import { first, Observable, Subject } from 'rxjs'
import { DocumentListComponent } from '../components/document-list/document-list.component'
import { NgbModal } from '@ng-bootstrap/ng-bootstrap'
import { ConfirmDialogComponent } from '../components/common/confirm-dialog/confirm-dialog.component'
import { SettingsService } from '../services/settings.service'
import { SETTINGS_KEYS } from '../data/paperless-uisettings'
@Injectable()
export class DirtySavedViewGuard
implements CanDeactivate<DocumentListComponent>
{
constructor(private modalService: NgbModal) {}
constructor(
private modalService: NgbModal,
private settings: SettingsService
) {}
canDeactivate(
component: DocumentListComponent
): boolean | Observable<boolean> {
return component.savedViewIsModified ? this.warn(component) : true
return component.savedViewIsModified &&
this.settings.get(SETTINGS_KEYS.SAVED_VIEWS_WARN_ON_UNSAVED_CHANGE)
? this.warn(component)
: true
}
warn(component: DocumentListComponent) {

View File

@ -5,7 +5,7 @@ export const environment = {
apiBaseUrl: document.baseURI + 'api/',
apiVersion: '2',
appTitle: 'Paperless-ngx',
version: '1.12.2-dev',
version: '1.13.0-dev',
webSocketHost: window.location.host,
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
webSocketBaseUrl: base_url.pathname + 'ws/',

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
import logging
import os
from fnmatch import filter
from pathlib import Path
from pathlib import PurePath
from threading import Event
@ -7,6 +8,7 @@ from threading import Thread
from time import monotonic
from time import sleep
from typing import Final
from typing import Set
from django.conf import settings
from django.core.management.base import BaseCommand
@ -25,15 +27,15 @@ except ImportError: # pragma: nocover
logger = logging.getLogger("paperless.management.consumer")
def _tags_from_path(filepath):
"""Walk up the directory tree from filepath to CONSUMPTION_DIR
and get or create Tag IDs for every directory.
def _tags_from_path(filepath) -> Set[Tag]:
"""
Walk up the directory tree from filepath to CONSUMPTION_DIR
and get or create Tag IDs for every directory.
Returns set of Tag models
"""
normalized_consumption_dir = os.path.abspath(
os.path.normpath(settings.CONSUMPTION_DIR),
)
tag_ids = set()
path_parts = Path(filepath).relative_to(normalized_consumption_dir).parent.parts
path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
for part in path_parts:
tag_ids.add(
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
@ -43,14 +45,41 @@ def _tags_from_path(filepath):
def _is_ignored(filepath: str) -> bool:
normalized_consumption_dir = os.path.abspath(
os.path.normpath(settings.CONSUMPTION_DIR),
"""
Checks if the given file should be ignored, based on configured
patterns.
Returns True if the file is ignored, False otherwise
"""
filepath = os.path.abspath(
os.path.normpath(filepath),
)
filepath_relative = PurePath(filepath).relative_to(normalized_consumption_dir)
return any(filepath_relative.match(p) for p in settings.CONSUMER_IGNORE_PATTERNS)
# Trim out the consume directory, leaving only filename and it's
# path relative to the consume directory
filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR)
# March through the components of the path, including directories and the filename
# looking for anything matching
# foo/bar/baz/file.pdf -> (foo, bar, baz, file.pdf)
parts = []
for part in filepath_relative.parts:
# If the part is not the name (ie, it's a dir)
# Need to append the trailing slash or fnmatch doesn't match
# fnmatch("dir", "dir/*") == False
# fnmatch("dir/", "dir/*") == True
if part != filepath_relative.name:
part = part + "/"
parts.append(part)
for pattern in settings.CONSUMER_IGNORE_PATTERNS:
if len(filter(parts, pattern)):
return True
return False
def _consume(filepath):
def _consume(filepath: str) -> None:
if os.path.isdir(filepath) or _is_ignored(filepath):
return
@ -103,7 +132,13 @@ def _consume(filepath):
logger.exception("Error while consuming document")
def _consume_wait_unmodified(file):
def _consume_wait_unmodified(file: str) -> None:
"""
Waits for the given file to appear unmodified based on file size
and modification time. Will wait a configured number of seconds
and retry a configured number of times before either consuming or
giving up
"""
if _is_ignored(file):
return

View File

@ -247,22 +247,85 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
def test_is_ignored(self):
test_paths = [
(os.path.join(self.dirs.consumption_dir, "foo.pdf"), False),
(os.path.join(self.dirs.consumption_dir, "foo", "bar.pdf"), False),
(os.path.join(self.dirs.consumption_dir, ".DS_STORE", "foo.pdf"), True),
(
os.path.join(self.dirs.consumption_dir, "foo", ".DS_STORE", "bar.pdf"),
True,
),
(os.path.join(self.dirs.consumption_dir, ".stfolder", "foo.pdf"), True),
(os.path.join(self.dirs.consumption_dir, "._foo.pdf"), True),
(os.path.join(self.dirs.consumption_dir, "._foo", "bar.pdf"), False),
{
"path": os.path.join(self.dirs.consumption_dir, "foo.pdf"),
"ignore": False,
},
{
"path": os.path.join(self.dirs.consumption_dir, "foo", "bar.pdf"),
"ignore": False,
},
{
"path": os.path.join(self.dirs.consumption_dir, ".DS_STORE", "foo.pdf"),
"ignore": True,
},
{
"path": os.path.join(
self.dirs.consumption_dir,
"foo",
".DS_STORE",
"bar.pdf",
),
"ignore": True,
},
{
"path": os.path.join(
self.dirs.consumption_dir,
".DS_STORE",
"foo",
"bar.pdf",
),
"ignore": True,
},
{
"path": os.path.join(self.dirs.consumption_dir, ".stfolder", "foo.pdf"),
"ignore": True,
},
{
"path": os.path.join(self.dirs.consumption_dir, ".stfolder.pdf"),
"ignore": False,
},
{
"path": os.path.join(
self.dirs.consumption_dir,
".stversions",
"foo.pdf",
),
"ignore": True,
},
{
"path": os.path.join(self.dirs.consumption_dir, ".stversions.pdf"),
"ignore": False,
},
{
"path": os.path.join(self.dirs.consumption_dir, "._foo.pdf"),
"ignore": True,
},
{
"path": os.path.join(self.dirs.consumption_dir, "my_foo.pdf"),
"ignore": False,
},
{
"path": os.path.join(self.dirs.consumption_dir, "._foo", "bar.pdf"),
"ignore": True,
},
{
"path": os.path.join(
self.dirs.consumption_dir,
"@eaDir",
"SYNO@.fileindexdb",
"_1jk.fnm",
),
"ignore": True,
},
]
for file_path, expected_ignored in test_paths:
for test_setup in test_paths:
filepath = test_setup["path"]
expected_ignored_result = test_setup["ignore"]
self.assertEqual(
expected_ignored,
document_consumer._is_ignored(file_path),
f'_is_ignored("{file_path}") != {expected_ignored}',
expected_ignored_result,
document_consumer._is_ignored(filepath),
f'_is_ignored("{filepath}") != {expected_ignored_result}',
)
@mock.patch("documents.management.commands.document_consumer.open")

View File

@ -3,7 +3,7 @@ msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2022-11-09 21:50+0000\n"
"PO-Revision-Date: 2023-01-23 12:37\n"
"PO-Revision-Date: 2023-01-27 19:22\n"
"Last-Translator: \n"
"Language-Team: Dutch\n"
"Language: nl_NL\n"
@ -368,15 +368,15 @@ msgstr "heeft tags in"
#: documents/models.py:410
msgid "ASN greater than"
msgstr ""
msgstr "ASN groter dan"
#: documents/models.py:411
msgid "ASN less than"
msgstr ""
msgstr "ASN kleiner dan"
#: documents/models.py:412
msgid "storage path is"
msgstr ""
msgstr "opslagpad is"
#: documents/models.py:422
msgid "rule type"
@ -396,99 +396,99 @@ msgstr "filterregels"
#: documents/models.py:536
msgid "Task ID"
msgstr ""
msgstr "Taak ID"
#: documents/models.py:537
msgid "Celery ID for the Task that was run"
msgstr ""
msgstr "Celery ID voor de taak die werd uitgevoerd"
#: documents/models.py:542
msgid "Acknowledged"
msgstr ""
msgstr "Bevestigd"
#: documents/models.py:543
msgid "If the task is acknowledged via the frontend or API"
msgstr ""
msgstr "Of de taak is bevestigd via de frontend of de API"
#: documents/models.py:549 documents/models.py:556
msgid "Task Name"
msgstr ""
msgstr "Taaknaam"
#: documents/models.py:550
msgid "Name of the file which the Task was run for"
msgstr ""
msgstr "Naam van het bestand waarvoor de taak werd uitgevoerd"
#: documents/models.py:557
msgid "Name of the Task which was run"
msgstr ""
msgstr "Naam van de uitgevoerde taak"
#: documents/models.py:562
msgid "Task Positional Arguments"
msgstr ""
msgstr "Positionele argumenten voor taak"
#: documents/models.py:564
msgid "JSON representation of the positional arguments used with the task"
msgstr ""
msgstr "JSON weergave van de positionele argumenten die gebruikt worden voor de taak"
#: documents/models.py:569
msgid "Task Named Arguments"
msgstr ""
msgstr "Argumenten met naam voor taak"
#: documents/models.py:571
msgid "JSON representation of the named arguments used with the task"
msgstr ""
msgstr "JSON weergave van de argumenten met naam die gebruikt worden voor de taak"
#: documents/models.py:578
msgid "Task State"
msgstr ""
msgstr "Taakstatus"
#: documents/models.py:579
msgid "Current state of the task being run"
msgstr ""
msgstr "Huidige status van de taak die wordt uitgevoerd"
#: documents/models.py:584
msgid "Created DateTime"
msgstr ""
msgstr "Aangemaakt DateTime"
#: documents/models.py:585
msgid "Datetime field when the task result was created in UTC"
msgstr ""
msgstr "Datetime veld wanneer het resultaat van de taak werd aangemaakt in UTC"
#: documents/models.py:590
msgid "Started DateTime"
msgstr ""
msgstr "Gestart DateTime"
#: documents/models.py:591
msgid "Datetime field when the task was started in UTC"
msgstr ""
msgstr "Datetime veld wanneer de taak werd gestart in UTC"
#: documents/models.py:596
msgid "Completed DateTime"
msgstr ""
msgstr "Voltooid DateTime"
#: documents/models.py:597
msgid "Datetime field when the task was completed in UTC"
msgstr ""
msgstr "Datetime veld wanneer de taak werd voltooid in UTC"
#: documents/models.py:602
msgid "Result Data"
msgstr ""
msgstr "Resultaatgegevens"
#: documents/models.py:604
msgid "The data returned by the task"
msgstr ""
msgstr "Gegevens geretourneerd door de taak"
#: documents/models.py:613
msgid "Comment for the document"
msgstr ""
msgstr "Commentaar op het document"
#: documents/models.py:642
msgid "comment"
msgstr ""
msgstr "opmerking"
#: documents/models.py:643
msgid "comments"
msgstr ""
msgstr "opmerkingen"
#: documents/serialisers.py:72
#, python-format

View File

@ -676,7 +676,7 @@ CONSUMER_IGNORE_PATTERNS = list(
json.loads(
os.getenv(
"PAPERLESS_CONSUMER_IGNORE_PATTERNS",
'[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]', # noqa: E501
'[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini", "@eaDir/*"]', # noqa: E501
),
),
)

View File

@ -1,7 +1,7 @@
from typing import Final
from typing import Tuple
__version__: Final[Tuple[int, int, int]] = (1, 12, 2)
__version__: Final[Tuple[int, int, int]] = (1, 13, 0)
# Version string like X.Y.Z
__full_version_str__: Final[str] = ".".join(map(str, __version__))
# Version string like X.Y