mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation
This commit is contained in:
parent
d6fedbec52
commit
4849249d86
@ -188,7 +188,11 @@ class Document(models.Model):
|
|||||||
TYPE_JPG = "jpg"
|
TYPE_JPG = "jpg"
|
||||||
TYPE_GIF = "gif"
|
TYPE_GIF = "gif"
|
||||||
TYPE_TIF = "tiff"
|
TYPE_TIF = "tiff"
|
||||||
TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
|
TYPE_TXT = "txt"
|
||||||
|
TYPE_CSV = "csv"
|
||||||
|
TYPE_MD = "md"
|
||||||
|
TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,
|
||||||
|
TYPE_TXT, TYPE_CSV, TYPE_MD)
|
||||||
|
|
||||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||||
STORAGE_TYPE_GPG = "gpg"
|
STORAGE_TYPE_GPG = "gpg"
|
||||||
@ -361,51 +365,52 @@ class FileInfo:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv"
|
||||||
REGEXES = OrderedDict([
|
REGEXES = OrderedDict([
|
||||||
("created-correspondent-title-tags", re.compile(
|
("created-correspondent-title-tags", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*) - "
|
r"(?P<title>.*) - "
|
||||||
r"(?P<tags>[a-z0-9\-,]*)"
|
r"(?P<tags>[a-z0-9\-,]*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("created-title-tags", re.compile(
|
("created-title-tags", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<title>.*) - "
|
r"(?P<title>.*) - "
|
||||||
r"(?P<tags>[a-z0-9\-,]*)"
|
r"(?P<tags>[a-z0-9\-,]*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("created-correspondent-title", re.compile(
|
("created-correspondent-title", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*)"
|
r"(?P<title>.*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("created-title", re.compile(
|
("created-title", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<title>.*)"
|
r"(?P<title>.*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("correspondent-title-tags", re.compile(
|
("correspondent-title-tags", re.compile(
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*) - "
|
r"(?P<title>.*) - "
|
||||||
r"(?P<tags>[a-z0-9\-,]*)"
|
r"(?P<tags>[a-z0-9\-,]*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("correspondent-title", re.compile(
|
("correspondent-title", re.compile(
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*)?"
|
r"(?P<title>.*)?"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("title", re.compile(
|
("title", re.compile(
|
||||||
r"(?P<title>.*)"
|
r"(?P<title>.*)"
|
||||||
r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
|
r"\.(?P<extension>{})$".format(formats),
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
))
|
))
|
||||||
])
|
])
|
||||||
|
@ -48,6 +48,9 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
|
|||||||
Document.TYPE_JPG: "image/jpeg",
|
Document.TYPE_JPG: "image/jpeg",
|
||||||
Document.TYPE_GIF: "image/gif",
|
Document.TYPE_GIF: "image/gif",
|
||||||
Document.TYPE_TIF: "image/tiff",
|
Document.TYPE_TIF: "image/tiff",
|
||||||
|
Document.TYPE_CSV: "text/csv",
|
||||||
|
Document.TYPE_MD: "text/markdown",
|
||||||
|
Document.TYPE_TXT: "text/plain"
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.kwargs["kind"] == "thumb":
|
if self.kwargs["kind"] == "thumb":
|
||||||
|
@ -10,7 +10,7 @@ from documents.parsers import DocumentParser, ParseError
|
|||||||
|
|
||||||
class TextDocumentParser(DocumentParser):
|
class TextDocumentParser(DocumentParser):
|
||||||
"""
|
"""
|
||||||
This parser directly parses a text document (.txt or .md)
|
This parser directly parses a text document (.txt, .md, or .csv)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@ -30,18 +30,50 @@ class TextDocumentParser(DocumentParser):
|
|||||||
The thumbnail of a txt is just a 500px wide image of the text
|
The thumbnail of a txt is just a 500px wide image of the text
|
||||||
rendered onto a letter-sized page.
|
rendered onto a letter-sized page.
|
||||||
"""
|
"""
|
||||||
|
# The below is heavily cribbed from https://askubuntu.com/a/590951
|
||||||
|
|
||||||
run_convert(
|
bg_color = "white" # bg color
|
||||||
self.CONVERT,
|
text_color = "black" # text color
|
||||||
"-size", "500x647",
|
psize = [500, 647] # icon size
|
||||||
"xc:white",
|
n_lines = 50 # number of lines to show
|
||||||
"-pointsize", "12",
|
output_file = os.path.join(self.tempdir, "convert-txt.png")
|
||||||
"-fill", "black",
|
|
||||||
"-draw", "\"text 0,12 \'$(cat {})\'\"".format(self.document_path),
|
|
||||||
os.path.join(self.tempdir, "convert-txt.png")
|
|
||||||
)
|
|
||||||
|
|
||||||
return os.path.join(self.tempdir, "convert-txt.png")
|
temp_bg = os.path.join(self.tempdir, "bg.png")
|
||||||
|
temp_txlayer = os.path.join(self.tempdir, "tx.png")
|
||||||
|
picsize = "x".join([str(n) for n in psize])
|
||||||
|
txsize = "x".join([str(n - 8) for n in psize])
|
||||||
|
|
||||||
|
def create_bg():
|
||||||
|
work_size = ",".join([str(n - 1) for n in psize])
|
||||||
|
r = str(round(psize[0] / 10));
|
||||||
|
rounded = ",".join([r, r])
|
||||||
|
run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ',
|
||||||
|
'"fill ', bg_color, ' roundrectangle 0,0,',
|
||||||
|
work_size, ",", rounded, '" ', temp_bg)
|
||||||
|
|
||||||
|
def read_text():
|
||||||
|
with open(self.document_path, 'r') as src:
|
||||||
|
lines = [l.strip() for l in src.readlines()]
|
||||||
|
text = "\n".join([l for l in lines[:n_lines]])
|
||||||
|
return text.replace('"', "'")
|
||||||
|
|
||||||
|
def create_txlayer():
|
||||||
|
run_command(self.CONVERT,
|
||||||
|
"-background none",
|
||||||
|
"-fill",
|
||||||
|
text_color,
|
||||||
|
"-pointsize", "12",
|
||||||
|
"-border 4 -bordercolor none",
|
||||||
|
"-size ", txsize,
|
||||||
|
' caption:"', read_text(), '" ',
|
||||||
|
temp_txlayer)
|
||||||
|
|
||||||
|
create_txlayer()
|
||||||
|
create_bg()
|
||||||
|
run_command(self.CONVERT, temp_bg, temp_txlayer,
|
||||||
|
"-background None -layers merge ", output_file)
|
||||||
|
|
||||||
|
return output_file
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
|
|
||||||
@ -102,12 +134,13 @@ class TextDocumentParser(DocumentParser):
|
|||||||
return date
|
return date
|
||||||
|
|
||||||
|
|
||||||
def run_convert(*args):
|
def run_command(*args):
|
||||||
environment = os.environ.copy()
|
environment = os.environ.copy()
|
||||||
if settings.CONVERT_MEMORY_LIMIT:
|
if settings.CONVERT_MEMORY_LIMIT:
|
||||||
environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
|
environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
|
||||||
if settings.CONVERT_TMPDIR:
|
if settings.CONVERT_TMPDIR:
|
||||||
environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
|
environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
|
||||||
|
|
||||||
if not subprocess.Popen(args, env=environment).wait() == 0:
|
if not subprocess.Popen(' '.join(args), env=environment,
|
||||||
|
shell=True).wait() == 0:
|
||||||
raise ParseError("Convert failed at {}".format(args))
|
raise ParseError("Convert failed at {}".format(args))
|
@ -5,7 +5,7 @@ from .parsers import TextDocumentParser
|
|||||||
|
|
||||||
class ConsumerDeclaration:
|
class ConsumerDeclaration:
|
||||||
|
|
||||||
MATCHING_FILES = re.compile("^.*\.(txt|md)$")
|
MATCHING_FILES = re.compile("^.*\.(te?xt|md|csv)$")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def handle(cls, sender, **kwargs):
|
def handle(cls, sender, **kwargs):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user