paperless-ngx/src/documents/data_models.py

63 lines
1.8 KiB
Python

import dataclasses
import datetime
import enum
from pathlib import Path
from typing import List
from typing import Optional
import magic
@dataclasses.dataclass
class DocumentMetadataOverrides:
"""
Manages overrides for document fields which normally would
be set from content or matching. All fields default to None,
meaning no override is happening
"""
filename: Optional[str] = None
title: Optional[str] = None
correspondent_id: Optional[int] = None
document_type_id: Optional[int] = None
tag_ids: Optional[List[int]] = None
created: Optional[datetime.datetime] = None
asn: Optional[int] = None
owner_id: Optional[int] = None
class DocumentSource(enum.IntEnum):
"""
The source of an incoming document. May have other uses in the future
"""
ConsumeFolder = enum.auto()
ApiUpload = enum.auto()
MailFetch = enum.auto()
@dataclasses.dataclass
class ConsumableDocument:
"""
Encapsulates an incoming document, either from consume folder, API upload
or mail fetching and certain useful operations on it.
"""
source: DocumentSource
original_file: Path
mime_type: str = dataclasses.field(init=False, default=None)
def __post_init__(self):
"""
After a dataclass is initialized, this is called to finalize some data
1. Make sure the original path is an absolute, fully qualified path
2. Get the mime type of the file
"""
# Always fully qualify the path first thing
# Just in case, convert to a path if it's a str
self.original_file = Path(self.original_file).resolve()
# Get the file type once at init
# Note this function isn't called when the object is unpickled
self.mime_type = magic.from_file(self.original_file, mime=True)