mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-28 03:46:06 -05:00 
			
		
		
		
	explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation
This commit is contained in:
		| @@ -188,7 +188,11 @@ class Document(models.Model): | ||||
|     TYPE_JPG = "jpg" | ||||
|     TYPE_GIF = "gif" | ||||
|     TYPE_TIF = "tiff" | ||||
|     TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,) | ||||
|     TYPE_TXT = "txt" | ||||
|     TYPE_CSV = "csv" | ||||
|     TYPE_MD  = "md" | ||||
|     TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF, | ||||
|              TYPE_TXT, TYPE_CSV, TYPE_MD) | ||||
|  | ||||
|     STORAGE_TYPE_UNENCRYPTED = "unencrypted" | ||||
|     STORAGE_TYPE_GPG = "gpg" | ||||
| @@ -361,51 +365,52 @@ class FileInfo: | ||||
|         ) | ||||
|     ) | ||||
|  | ||||
|     formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv" | ||||
|     REGEXES = OrderedDict([ | ||||
|         ("created-correspondent-title-tags", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*) - " | ||||
|             r"(?P<tags>[a-z0-9\-,]*)" | ||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", | ||||
|             r"\.(?P<extension>{})$".format(formats), | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("created-title-tags", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<title>.*) - " | ||||
|             r"(?P<tags>[a-z0-9\-,]*)" | ||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", | ||||
|             r"\.(?P<extension>{})$".format(formats), | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("created-correspondent-title", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*)" | ||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", | ||||
|             r"\.(?P<extension>{})$".format(formats), | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("created-title", re.compile( | ||||
|             r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - " | ||||
|             r"(?P<title>.*)" | ||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", | ||||
|             r"\.(?P<extension>{})$".format(formats), | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("correspondent-title-tags", re.compile( | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*) - " | ||||
|             r"(?P<tags>[a-z0-9\-,]*)" | ||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", | ||||
|             r"\.(?P<extension>{})$".format(formats), | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("correspondent-title", re.compile( | ||||
|             r"(?P<correspondent>.*) - " | ||||
|             r"(?P<title>.*)?" | ||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", | ||||
|             r"\.(?P<extension>{})$".format(formats), | ||||
|             flags=re.IGNORECASE | ||||
|         )), | ||||
|         ("title", re.compile( | ||||
|             r"(?P<title>.*)" | ||||
|             r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$", | ||||
|             r"\.(?P<extension>{})$".format(formats), | ||||
|             flags=re.IGNORECASE | ||||
|         )) | ||||
|     ]) | ||||
|   | ||||
| @@ -48,6 +48,9 @@ class FetchView(SessionOrBasicAuthMixin, DetailView): | ||||
|             Document.TYPE_JPG: "image/jpeg", | ||||
|             Document.TYPE_GIF: "image/gif", | ||||
|             Document.TYPE_TIF: "image/tiff", | ||||
|             Document.TYPE_CSV: "text/csv", | ||||
|             Document.TYPE_MD:  "text/markdown", | ||||
|             Document.TYPE_TXT: "text/plain" | ||||
|         } | ||||
|  | ||||
|         if self.kwargs["kind"] == "thumb": | ||||
|   | ||||
| @@ -10,7 +10,7 @@ from documents.parsers import DocumentParser, ParseError | ||||
|  | ||||
| class TextDocumentParser(DocumentParser): | ||||
|     """ | ||||
|     This parser directly parses a text document (.txt or .md) | ||||
|     This parser directly parses a text document (.txt, .md, or .csv) | ||||
|     """ | ||||
|  | ||||
|  | ||||
| @@ -30,18 +30,50 @@ class TextDocumentParser(DocumentParser): | ||||
|         The thumbnail of a txt is just a 500px wide image of the text | ||||
|         rendered onto a letter-sized page. | ||||
|         """ | ||||
|         # The below is heavily cribbed from https://askubuntu.com/a/590951 | ||||
|  | ||||
|         run_convert( | ||||
|             self.CONVERT, | ||||
|             "-size", "500x647", | ||||
|             "xc:white", | ||||
|         bg_color = "white"  # bg color | ||||
|         text_color = "black"  # text color | ||||
|         psize = [500, 647]  # icon size | ||||
|         n_lines = 50  # number of lines to show | ||||
|         output_file = os.path.join(self.tempdir, "convert-txt.png") | ||||
|  | ||||
|         temp_bg = os.path.join(self.tempdir, "bg.png") | ||||
|         temp_txlayer = os.path.join(self.tempdir, "tx.png") | ||||
|         picsize = "x".join([str(n) for n in psize]) | ||||
|         txsize = "x".join([str(n - 8) for n in psize]) | ||||
|  | ||||
|         def create_bg(): | ||||
|             work_size = ",".join([str(n - 1) for n in psize]) | ||||
|             r = str(round(psize[0] / 10)); | ||||
|             rounded = ",".join([r, r]) | ||||
|             run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ', | ||||
|                         '"fill ', bg_color, ' roundrectangle 0,0,', | ||||
|                         work_size, ",", rounded, '" ', temp_bg) | ||||
|  | ||||
|         def read_text(): | ||||
|             with open(self.document_path, 'r') as src: | ||||
|                 lines = [l.strip() for l in src.readlines()] | ||||
|                 text = "\n".join([l for l in lines[:n_lines]]) | ||||
|                 return text.replace('"', "'") | ||||
|  | ||||
|         def create_txlayer(): | ||||
|             run_command(self.CONVERT, | ||||
|                         "-background none", | ||||
|                         "-fill", | ||||
|                         text_color, | ||||
|                         "-pointsize", "12", | ||||
|             "-fill", "black", | ||||
|             "-draw", "\"text 0,12 \'$(cat {})\'\"".format(self.document_path), | ||||
|             os.path.join(self.tempdir, "convert-txt.png") | ||||
|         ) | ||||
|                         "-border 4 -bordercolor none", | ||||
|                         "-size ", txsize, | ||||
|                         ' caption:"', read_text(), '" ', | ||||
|                         temp_txlayer) | ||||
|  | ||||
|         return os.path.join(self.tempdir, "convert-txt.png") | ||||
|         create_txlayer() | ||||
|         create_bg() | ||||
|         run_command(self.CONVERT, temp_bg, temp_txlayer, | ||||
|                     "-background None -layers merge ", output_file) | ||||
|  | ||||
|         return output_file | ||||
|  | ||||
|     def get_text(self): | ||||
|  | ||||
| @@ -102,12 +134,13 @@ class TextDocumentParser(DocumentParser): | ||||
|         return date | ||||
|  | ||||
|  | ||||
| def run_convert(*args): | ||||
| def run_command(*args): | ||||
|     environment = os.environ.copy() | ||||
|     if settings.CONVERT_MEMORY_LIMIT: | ||||
|         environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT | ||||
|     if settings.CONVERT_TMPDIR: | ||||
|         environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR | ||||
|  | ||||
|     if not subprocess.Popen(args, env=environment).wait() == 0: | ||||
|     if not subprocess.Popen(' '.join(args), env=environment, | ||||
|                             shell=True).wait() == 0: | ||||
|         raise ParseError("Convert failed at {}".format(args)) | ||||
| @@ -5,7 +5,7 @@ from .parsers import TextDocumentParser | ||||
|  | ||||
| class ConsumerDeclaration: | ||||
|  | ||||
|     MATCHING_FILES = re.compile("^.*\.(txt|md)$") | ||||
|     MATCHING_FILES = re.compile("^.*\.(te?xt|md|csv)$") | ||||
|  | ||||
|     @classmethod | ||||
|     def handle(cls, sender, **kwargs): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Joshua Taillon
					Joshua Taillon