Compare commits
641 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
0a47fba9ae | ||
![]() |
9aea8a7d7c | ||
![]() |
7b9c0d65b9 | ||
![]() |
7dd9a4e089 | ||
![]() |
9784ea4a60 | ||
![]() |
4fce5aba63 | ||
![]() |
2ab77fbaf7 | ||
![]() |
94ad290e14 | ||
![]() |
d2b290f789 | ||
![]() |
583f05af2d | ||
![]() |
1b2cb13a21 | ||
![]() |
4dc0c7bbe2 | ||
![]() |
44212d492d | ||
![]() |
3ccb83e49c | ||
![]() |
215691ac1a | ||
![]() |
a884647a7c | ||
![]() |
590d129cd3 | ||
![]() |
8fcb7efbd2 | ||
![]() |
f1204d2749 | ||
![]() |
b07b8d65a6 | ||
![]() |
dadd7472fd | ||
![]() |
2801b60b0e | ||
![]() |
e625ac21c3 | ||
![]() |
7ace9eb325 | ||
![]() |
02465672f9 | ||
![]() |
6ea6c79575 | ||
![]() |
c430b9f8cf | ||
![]() |
92fb390f7b | ||
![]() |
8164840cba | ||
![]() |
8e8b2d7e8a | ||
![]() |
459de80124 | ||
![]() |
b38aacd1ce | ||
![]() |
a4535c11e4 | ||
![]() |
f78e93a364 | ||
![]() |
75d2a3a45f | ||
![]() |
1871ef1a72 | ||
![]() |
5e9a7b94ba | ||
![]() |
51a5746611 | ||
![]() |
16fc7ebecc | ||
![]() |
10a5d50ce9 | ||
![]() |
454264a87f | ||
![]() |
7ecb76dddc | ||
![]() |
64eabbe8d0 | ||
![]() |
197938eaab | ||
![]() |
02a40055f5 | ||
![]() |
72bacc016a | ||
![]() |
aeecc10e45 | ||
![]() |
2b3edbaa46 | ||
![]() |
270f8677a7 | ||
![]() |
447edd1355 | ||
![]() |
024921212a | ||
![]() |
5d08a34365 | ||
![]() |
20763e7c26 | ||
![]() |
b33ba4c902 | ||
![]() |
fae5e834b9 | ||
![]() |
4cb4bd13ad | ||
![]() |
896304ccaa | ||
![]() |
9ae186e6f9 | ||
![]() |
c7690c05f5 | ||
![]() |
7273a8c7a5 | ||
![]() |
4195d5746f | ||
![]() |
8b90b51b1a | ||
![]() |
e74af5c73c | ||
![]() |
99c2442b28 | ||
![]() |
3c2df48a1a | ||
![]() |
a0c1c48dca | ||
![]() |
4e05aba0a5 | ||
![]() |
299a69a2de | ||
![]() |
7bc077ac08 | ||
![]() |
64752f6b57 | ||
![]() |
c2880bcf9a | ||
![]() |
159dcdbda5 | ||
![]() |
1838fa971e | ||
![]() |
d8d111f093 | ||
![]() |
31a03b1d30 | ||
![]() |
5004771d79 | ||
![]() |
92b9fc1ba9 | ||
![]() |
585cc24dd5 | ||
![]() |
f261c70f1e | ||
![]() |
8c9dfa449c | ||
![]() |
d94ca2962e | ||
![]() |
3c7eacf923 | ||
![]() |
643486b14b | ||
![]() |
87045da1e2 | ||
![]() |
a109723ada | ||
![]() |
151573a26e | ||
![]() |
284e0d3f60 | ||
![]() |
7048af276a | ||
![]() |
e6cd3c1970 | ||
![]() |
623ac441d5 | ||
![]() |
003201bc1b | ||
![]() |
1bf6d9165f | ||
![]() |
4b49bd9de8 | ||
![]() |
69f82d503a | ||
![]() |
6c7ff54aad | ||
![]() |
0b53a8981c | ||
![]() |
c4dbd58efd | ||
![]() |
959f80604a | ||
![]() |
dee691b72b | ||
![]() |
a4829ce26a | ||
![]() |
7ed4dedd5e | ||
![]() |
93d272f50b | ||
![]() |
6fe5674ac3 | ||
![]() |
6024a862d6 | ||
![]() |
195f3a5dbf | ||
![]() |
94f0808a2f | ||
![]() |
e3f062b981 | ||
![]() |
22142203ce | ||
![]() |
412d9f5cd2 | ||
![]() |
133532a463 | ||
![]() |
c9683808c9 | ||
![]() |
b25f083687 | ||
![]() |
62ba4b9730 | ||
![]() |
150c7f26a5 | ||
![]() |
4b4111ec03 | ||
![]() |
9e33344808 | ||
![]() |
bba1fc7194 | ||
![]() |
efaa1c4dd7 | ||
![]() |
a88b318d7d | ||
![]() |
2460c3e076 | ||
![]() |
9763b72f81 | ||
![]() |
19ab62c06c | ||
![]() |
eb8f37d846 | ||
![]() |
5c9e2d7070 | ||
![]() |
da9f2b1a8c | ||
![]() |
985f298c46 | ||
![]() |
2bb63b2d02 | ||
![]() |
ac75c61c8c | ||
![]() |
f8f0915a32 | ||
![]() |
7b87511e88 | ||
![]() |
bb05c2218f | ||
![]() |
e96e8472d9 | ||
![]() |
3191c15889 | ||
![]() |
d4af7aa411 | ||
![]() |
7b3719101a | ||
![]() |
4def3bf5c2 | ||
![]() |
3daee46c3d | ||
![]() |
fbebd8d7c0 | ||
![]() |
af5cb35531 | ||
![]() |
61a2dca81f | ||
![]() |
4aa8e9b800 | ||
![]() |
b81fe1695d | ||
![]() |
3625e5080c | ||
![]() |
c21775980f | ||
![]() |
33e597f5bb | ||
![]() |
6fa2ca648a | ||
![]() |
adecf5d927 | ||
![]() |
e69d7d804b | ||
![]() |
a0eecb83cf | ||
![]() |
9955315a10 | ||
![]() |
ee7097b497 | ||
![]() |
387c23d27a | ||
![]() |
359593728e | ||
![]() |
9708832ccd | ||
![]() |
aa2ae8fe4c | ||
![]() |
729845662f | ||
![]() |
6ff28c92a4 | ||
![]() |
d19bf59f47 | ||
![]() |
a340b9c8a1 | ||
![]() |
d7939ca958 | ||
![]() |
00d67d53bf | ||
![]() |
b869ad02a1 | ||
![]() |
91d4941438 | ||
![]() |
5746e8b56d | ||
![]() |
8e83f90952 | ||
![]() |
80910c72cf | ||
![]() |
ca4ece3ccd | ||
![]() |
ac6c0484ed | ||
![]() |
1e4923835b | ||
![]() |
7be9ae9c02 | ||
![]() |
da38efebdf | ||
![]() |
0fd51e35e1 | ||
![]() |
59e0c1fe4e | ||
![]() |
cfe9528884 | ||
![]() |
1b45637e9c | ||
![]() |
76acf2b01d | ||
![]() |
eda2bd2dbd | ||
![]() |
6819decec3 | ||
![]() |
c2220aa1ef | ||
![]() |
0d87e529f3 | ||
![]() |
24ce1830eb | ||
![]() |
dfed4176ed | ||
![]() |
be8615741e | ||
![]() |
fd1f6aa960 | ||
![]() |
067a6107f5 | ||
![]() |
62782be08e | ||
![]() |
428fe4a372 | ||
![]() |
91e3302e54 | ||
![]() |
906d5d0bab | ||
![]() |
06c62abfbd | ||
![]() |
31e4a0a88b | ||
![]() |
cf82cb35c9 | ||
![]() |
53fff1d54a | ||
![]() |
60cf260b71 | ||
![]() |
b9d1499d04 | ||
![]() |
3fe68d7bbe | ||
![]() |
2eeb02638b | ||
![]() |
cb4beb5e71 | ||
![]() |
729f25c435 | ||
![]() |
d8e02c6fa0 | ||
![]() |
26c7fad005 | ||
![]() |
28b26eb4c7 | ||
![]() |
4032315851 | ||
![]() |
3c8d7f2dee | ||
![]() |
c76460bd96 | ||
![]() |
25f0a79d06 | ||
![]() |
20e586fa60 | ||
![]() |
384a118672 | ||
![]() |
7d3110f392 | ||
![]() |
ecd345f3e1 | ||
![]() |
ea637b292d | ||
![]() |
82c0b657c4 | ||
![]() |
0476be0ef0 | ||
![]() |
b12ab5fe04 | ||
![]() |
50c0c65c60 | ||
![]() |
a83058ab11 | ||
![]() |
16d5daa867 | ||
![]() |
e44fcd4e84 | ||
![]() |
67d7b4cef5 | ||
![]() |
83734c3bee | ||
![]() |
06dab340dd | ||
![]() |
58532eeb69 | ||
![]() |
2bc208cd6e | ||
![]() |
eaa7ae2fb5 | ||
![]() |
f4e5023d22 | ||
![]() |
cd5432fec0 | ||
![]() |
b1410a854e | ||
![]() |
8ec9c77e51 | ||
![]() |
f9ce4d8f6a | ||
![]() |
8c9a74ee0c | ||
![]() |
0b59ef2cfa | ||
![]() |
0099631905 | ||
![]() |
06c6f33d97 | ||
![]() |
4548038525 | ||
![]() |
a2b7687c3b | ||
![]() |
15cba8e14d | ||
![]() |
605f86f0cf | ||
![]() |
8cbaca22c1 | ||
![]() |
4269074944 | ||
![]() |
7b7331683d | ||
![]() |
a83637b2bf | ||
![]() |
721447999e | ||
![]() |
72cbdca6e8 | ||
![]() |
22e060e00e | ||
![]() |
23fb5c2a1f | ||
![]() |
d6e6f49c15 | ||
![]() |
fc259c8bfd | ||
![]() |
ecf90c4718 | ||
![]() |
f0b359889e | ||
![]() |
b0fb44db86 | ||
![]() |
bfd955b210 | ||
![]() |
8af21d6fe3 | ||
![]() |
e9f25190e9 | ||
![]() |
e81b829eb0 | ||
![]() |
5f4e5c2cfb | ||
![]() |
383358376f | ||
![]() |
00f0b55729 | ||
![]() |
b10b981cb5 | ||
![]() |
f805407bce | ||
![]() |
4f169da4a8 | ||
![]() |
4031381c31 | ||
![]() |
b7bc3830cc | ||
![]() |
f2872d6475 | ||
![]() |
f5219c101c | ||
![]() |
f329b5a3d0 | ||
![]() |
6f6a5f2eed | ||
![]() |
266a8cd1a9 | ||
![]() |
7675014c90 | ||
![]() |
ec5971c134 | ||
![]() |
d7fedfcd87 | ||
![]() |
cb99a8741e | ||
![]() |
a63ed236a4 | ||
![]() |
4594a5c41c | ||
![]() |
9109c25b3e | ||
![]() |
5435dc2499 | ||
![]() |
1d300fafad | ||
![]() |
b1194f9524 | ||
![]() |
2532bd1e2c | ||
![]() |
800e842ab3 | ||
![]() |
6f6f365e2b | ||
![]() |
43b863b816 | ||
![]() |
94e32005ca | ||
![]() |
204e14877d | ||
![]() |
92f05e051f | ||
![]() |
17bdf2a233 | ||
![]() |
ce37100a0a | ||
![]() |
81c371d66b | ||
![]() |
c114653977 | ||
![]() |
6280b9948a | ||
![]() |
3a322a7b33 | ||
![]() |
327ae03589 | ||
![]() |
cffbea9053 | ||
![]() |
ad0ef9a5a8 | ||
![]() |
03e7299925 | ||
![]() |
d3ba910f2d | ||
![]() |
329e649878 | ||
![]() |
206ee97554 | ||
![]() |
e512d4af8c | ||
![]() |
6a5e752172 | ||
![]() |
6ba527ef55 | ||
![]() |
220cc1927c | ||
![]() |
9956f4cb47 | ||
![]() |
fe055f6391 | ||
![]() |
eec506a13c | ||
![]() |
1530bbd1cb | ||
![]() |
db6afdd926 | ||
![]() |
5642715721 | ||
![]() |
3bd22f0b0f | ||
![]() |
c92c7e1ced | ||
![]() |
940f5d5b50 | ||
![]() |
4dc893a4fa | ||
![]() |
6d324dbd8e | ||
![]() |
8ddf05e573 | ||
![]() |
d869a6bcca | ||
![]() |
40bdeffa38 | ||
![]() |
5bf5710d39 | ||
![]() |
551a7e606c | ||
![]() |
feec36939b | ||
![]() |
554bba839e | ||
![]() |
ebaaa3a1e8 | ||
![]() |
bae715cd34 | ||
![]() |
2a3b8f5a7f | ||
![]() |
6a023507e2 | ||
![]() |
1551052cde | ||
![]() |
b6dd36a439 | ||
![]() |
ce38e4ae08 | ||
![]() |
97d6503fef | ||
![]() |
b0625cdced | ||
![]() |
31e8c44c18 | ||
![]() |
0472dfe25a | ||
![]() |
8b36c9ad64 | ||
![]() |
1266f2d5b9 | ||
![]() |
8196051959 | ||
![]() |
d198142a1e | ||
![]() |
5e15ede849 | ||
![]() |
06a6eb0326 | ||
![]() |
28819d6d0f | ||
![]() |
8a9e564dac | ||
![]() |
534704693b | ||
![]() |
bc40607c51 | ||
![]() |
6fdc17cc72 | ||
![]() |
69a5ba0618 | ||
![]() |
3c71a9160f | ||
![]() |
48ef8eca80 | ||
![]() |
812df3782a | ||
![]() |
54bb1ae27d | ||
![]() |
ff4a8b37bd | ||
![]() |
37d3a624b7 | ||
![]() |
493f6173da | ||
![]() |
272e87b741 | ||
![]() |
2b5e6f7a9d | ||
![]() |
70960f86ba | ||
![]() |
ee4d25567c | ||
![]() |
80a126e838 | ||
![]() |
c02bd66b3f | ||
![]() |
cea6720c1a | ||
![]() |
700d58058c | ||
![]() |
33e413af65 | ||
![]() |
45a13523d4 | ||
![]() |
95257d5723 | ||
![]() |
8da3ae2c53 | ||
![]() |
f17b541a5b | ||
![]() |
2b2e518dea | ||
![]() |
3f6e3a2750 | ||
![]() |
14784d5832 | ||
![]() |
8cd5e25364 | ||
![]() |
7788d93227 | ||
![]() |
826503802a | ||
![]() |
6db1e36e14 | ||
![]() |
3bc4d7dad7 | ||
![]() |
32d546740b | ||
![]() |
24da3e5034 | ||
![]() |
9e295ddf4f | ||
![]() |
eff6f2fb01 | ||
![]() |
c597da495c | ||
![]() |
de5e9c95ec | ||
![]() |
4e27242373 | ||
![]() |
7bf1e24616 | ||
![]() |
fd0759bf6f | ||
![]() |
d6bbf2cc8d | ||
![]() |
80495d42de | ||
![]() |
eac21f773f | ||
![]() |
52f5831657 | ||
![]() |
f35f33539a | ||
![]() |
46f310603b | ||
![]() |
531d3f03f9 | ||
![]() |
85cfd7610d | ||
![]() |
201b77189a | ||
![]() |
5b76b45e33 | ||
![]() |
bf2fac9393 | ||
![]() |
5a3affe8c0 | ||
![]() |
a5834393b3 | ||
![]() |
cd6e37c520 | ||
![]() |
af51165229 | ||
![]() |
d480620be9 | ||
![]() |
d470de3576 | ||
![]() |
538249b26c | ||
![]() |
fb9d3f736b | ||
![]() |
a6b7beaf6b | ||
![]() |
4d4d545343 | ||
![]() |
049dc17902 | ||
![]() |
b0ca57a7f0 | ||
![]() |
cdd49c5142 | ||
![]() |
4b31e5d0b4 | ||
![]() |
8076ebd78c | ||
![]() |
c864b3cd19 | ||
![]() |
2704bcb979 | ||
![]() |
59f6074093 | ||
![]() |
b1da7f3491 | ||
![]() |
adde88e7b9 | ||
![]() |
8e876ef2d1 | ||
![]() |
2ea0f83a91 | ||
![]() |
05d8ea5a9d | ||
![]() |
967248233f | ||
![]() |
b4c4b9fb6a | ||
![]() |
adb6483abc | ||
![]() |
908db55bb7 | ||
![]() |
610f20de28 | ||
![]() |
b2513a5cde | ||
![]() |
bfa1c13d01 | ||
![]() |
12aaff431f | ||
![]() |
547e5ea55e | ||
![]() |
c301127096 | ||
![]() |
19147855e7 | ||
![]() |
4e7c7ea1d6 | ||
![]() |
fcf8a49160 | ||
![]() |
c6d658a954 | ||
![]() |
a78cd6526c | ||
![]() |
bf895b54f4 | ||
![]() |
e5f84ef583 | ||
![]() |
8c690a9a51 | ||
![]() |
56526b970a | ||
![]() |
94fbf92916 | ||
![]() |
37f5e46d09 | ||
![]() |
38be817637 | ||
![]() |
17303f41da | ||
![]() |
55ef0d4a1b | ||
![]() |
a8f3c4be54 | ||
![]() |
1b9de2be5a | ||
![]() |
0e8265f1ae | ||
![]() |
5b45a140b9 | ||
![]() |
72fb9a475d | ||
![]() |
bf97f5807f | ||
![]() |
a707818b4d | ||
![]() |
fb46c1b96a | ||
![]() |
3226d8b25b | ||
![]() |
5c4363cbea | ||
![]() |
fa62ae820b | ||
![]() |
17891bafaf | ||
![]() |
15fdadadef | ||
![]() |
ce9f604d81 | ||
![]() |
4f876db5d1 | ||
![]() |
5e5f56dc67 | ||
![]() |
93fab8bb95 | ||
![]() |
35ca2195fe | ||
![]() |
7ace66d7fd | ||
![]() |
4f9a31244b | ||
![]() |
14cf4f7095 | ||
![]() |
8bd7c27826 | ||
![]() |
8c4f486fe9 | ||
![]() |
2849414445 | ||
![]() |
ea1ea0816f | ||
![]() |
52d3a8703c | ||
![]() |
4cb4d6adcd | ||
![]() |
24444237f2 | ||
![]() |
40c8629aef | ||
![]() |
98cdf614a5 | ||
![]() |
2eb2d99a91 | ||
![]() |
18ad9bcbf2 | ||
![]() |
997bff4917 | ||
![]() |
78f9a80895 | ||
![]() |
9231df7a4a | ||
![]() |
6f25917c86 | ||
![]() |
c41d1a78a8 | ||
![]() |
c3331086d5 | ||
![]() |
6bd9ccd8f6 | ||
![]() |
68c7cecb07 | ||
![]() |
bcc029a2c7 | ||
![]() |
ea38eb01b2 | ||
![]() |
01d070b882 | ||
![]() |
1727eb00cc | ||
![]() |
9d4180553c | ||
![]() |
8049af4b22 | ||
![]() |
7c6142643d | ||
![]() |
2e8706f4e2 | ||
![]() |
d39d32d555 | ||
![]() |
6f52945449 | ||
![]() |
37025297b5 | ||
![]() |
aa023ea2e3 | ||
![]() |
78bf0b63a5 | ||
![]() |
dc9e9e3b48 | ||
![]() |
ab29c49b7a | ||
![]() |
1c0ac474b8 | ||
![]() |
29391c1c7b | ||
![]() |
693834971c | ||
![]() |
97376d4b72 | ||
![]() |
3ee1d2a9a9 | ||
![]() |
605f885e19 | ||
![]() |
25fb8d9c3b | ||
![]() |
a96ecd673b | ||
![]() |
58a01a57ee | ||
![]() |
c18fc03ef3 | ||
![]() |
a96f79f6a3 | ||
![]() |
d6f1d004a3 | ||
![]() |
da72d3571b | ||
![]() |
8241da0eb3 | ||
![]() |
51562667bf | ||
![]() |
97eeae65a3 | ||
![]() |
1aee2988f7 | ||
![]() |
a63a8dd488 | ||
![]() |
06a9df6dbd | ||
![]() |
49933bb5a8 | ||
![]() |
7d7d9630c1 | ||
![]() |
6f0077efac | ||
![]() |
39be68a1a4 | ||
![]() |
ac69babfce | ||
![]() |
02c782a127 | ||
![]() |
4e90fda80f | ||
![]() |
88e3e556a1 | ||
![]() |
88cf6ef843 | ||
![]() |
9b602a4bf0 | ||
![]() |
fe2db4dbf7 | ||
![]() |
47c88a6bdd | ||
![]() |
a3bc3b78d5 | ||
![]() |
fed7d3e993 | ||
![]() |
3a74f24e49 | ||
![]() |
52afab39cf | ||
![]() |
8659292852 | ||
![]() |
ce73f159fd | ||
![]() |
71382e9c62 | ||
![]() |
a1a802fc92 | ||
![]() |
4200fc610d | ||
![]() |
32d212cd9f | ||
![]() |
5d3a6e230d | ||
![]() |
b33fcc117e | ||
![]() |
e96d65f945 | ||
![]() |
cfeed0ce6e | ||
![]() |
b89ecf7d77 | ||
![]() |
5ca25d44ba | ||
![]() |
2c1333a75f | ||
![]() |
3c48ce0225 | ||
![]() |
1e11c12d96 | ||
![]() |
3e22e8e0b9 | ||
![]() |
dba45f93a4 | ||
![]() |
18f3f44ae9 | ||
![]() |
85a6a271dc | ||
![]() |
abb515d4ea | ||
![]() |
309d1f2b67 | ||
![]() |
fa2f09bc4b | ||
![]() |
c51590cd12 | ||
![]() |
8e01406acf | ||
![]() |
7cce2f0fe6 | ||
![]() |
95091c2f39 | ||
![]() |
4a0aa12bd9 | ||
![]() |
9a0329746a | ||
![]() |
8392a6fd4a | ||
![]() |
8fa18bb8a6 | ||
![]() |
0095b593fb | ||
![]() |
b1e5135e21 | ||
![]() |
e88755e7ac | ||
![]() |
c582947291 | ||
![]() |
98fe3a2cb7 | ||
![]() |
61647606fa | ||
![]() |
95a1e5c645 | ||
![]() |
8ead77f128 | ||
![]() |
b9e9e82f33 | ||
![]() |
487fd3a5dd | ||
![]() |
657786a2fe | ||
![]() |
e74d7dadfb | ||
![]() |
a2937cd54d | ||
![]() |
f0497e7744 | ||
![]() |
4aa318598f | ||
![]() |
00f39d8b58 | ||
![]() |
0b1a16908f | ||
![]() |
d9796e5003 | ||
![]() |
3599bb52c0 | ||
![]() |
af8a6c3764 | ||
![]() |
6d37ebf79e | ||
![]() |
f6a70b85f4 | ||
![]() |
538a4219bd | ||
![]() |
85c41b79be | ||
![]() |
9b01aa9202 | ||
![]() |
df101f5e7a | ||
![]() |
1fa735eb23 | ||
![]() |
ebe21a0114 | ||
![]() |
d132eba143 | ||
![]() |
073c3c8fed | ||
![]() |
e3c1bde793 | ||
![]() |
b68906b14e | ||
![]() |
681eecc46e | ||
![]() |
1578e8de2d | ||
![]() |
b01cbc9aa0 | ||
![]() |
acd3832417 | ||
![]() |
82b2ba3cc2 | ||
![]() |
3de6e0bcf1 | ||
![]() |
6df73ae940 | ||
![]() |
2204090151 | ||
![]() |
3c81a7468b | ||
![]() |
5ef86f9489 | ||
![]() |
90cb0836bb | ||
![]() |
ef1d4264b5 | ||
![]() |
e1fa59122d | ||
![]() |
5bf26369e2 | ||
![]() |
36239ba09f | ||
![]() |
318c1d2fbd | ||
![]() |
e7c40fc3dc | ||
![]() |
0da0b1c062 | ||
![]() |
08988e11f8 | ||
![]() |
30372b0e85 | ||
![]() |
567e89d1c7 | ||
![]() |
f1f5227ccd | ||
![]() |
09b5bd17f2 | ||
![]() |
e384bd78c5 | ||
![]() |
fda844f64c | ||
![]() |
daf90399bd | ||
![]() |
3d37e49c1a | ||
![]() |
261c6fb990 | ||
![]() |
cdd2b99b6b | ||
![]() |
d0a0ae91c4 | ||
![]() |
c04b9fd7f6 | ||
![]() |
6809b15ce1 | ||
![]() |
c317eca1ca | ||
![]() |
466afa8203 | ||
![]() |
c2e3dc76d9 | ||
![]() |
5a899664f8 | ||
![]() |
990e905a04 | ||
![]() |
6b7155a849 | ||
![]() |
47851ddd3f | ||
![]() |
47189643ff | ||
![]() |
c1efe11cf3 | ||
![]() |
0e40ef5f35 | ||
![]() |
c8081595c4 | ||
![]() |
a2b5b3b253 | ||
![]() |
790bcf05ed | ||
![]() |
d8d2d53c59 | ||
![]() |
027897ff03 | ||
![]() |
cca576f518 | ||
![]() |
5fcf1b5434 | ||
![]() |
942b5aa9df | ||
![]() |
c05b39a056 | ||
![]() |
3c8196527f |
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"qpdf": {
|
||||
"version": "11.1.1"
|
||||
"version": "11.2.0"
|
||||
},
|
||||
"jbig2enc": {
|
||||
"version": "0.29",
|
||||
|
6
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@@ -6,12 +6,12 @@ body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Have a question? 👉 [Start a new discussion](https://github.com/paperless-ngx/paperless-ngx/discussions/new) or [ask in chat](https://matrix.to/#/#paperless:adnidor.de).
|
||||
Have a question? 👉 [Start a new discussion](https://github.com/paperless-ngx/paperless-ngx/discussions/new) or [ask in chat](https://matrix.to/#/#paperlessngx:matrix.org).
|
||||
|
||||
Before opening an issue, please double check:
|
||||
|
||||
- [The troubleshooting documentation](https://paperless-ngx.readthedocs.io/en/latest/troubleshooting.html).
|
||||
- [The installation instructions](https://paperless-ngx.readthedocs.io/en/latest/setup.html#installation).
|
||||
- [The troubleshooting documentation](https://docs.paperless-ngx.com/troubleshooting/).
|
||||
- [The installation instructions](https://docs.paperless-ngx.com/setup/#installation).
|
||||
- [Existing issues and discussions](https://github.com/paperless-ngx/paperless-ngx/search?q=&type=issues).
|
||||
- Disable any customer container initialization scripts, if using any
|
||||
|
||||
|
2
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -4,7 +4,7 @@ contact_links:
|
||||
url: https://github.com/paperless-ngx/paperless-ngx/discussions
|
||||
about: This issue tracker is not for support questions. Please refer to our Discussions.
|
||||
- name: 💬 Chat
|
||||
url: https://matrix.to/#/#paperless:adnidor.de
|
||||
url: https://matrix.to/#/#paperlessngx:matrix.org
|
||||
about: Want to discuss Paperless-ngx with others? Check out our chat.
|
||||
- name: 🚀 Feature Request
|
||||
url: https://github.com/paperless-ngx/paperless-ngx/discussions/new?category=feature-requests
|
||||
|
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -26,7 +26,7 @@ NOTE: Please check only one box!
|
||||
|
||||
- [ ] I have read & agree with the [contributing guidelines](https://github.com/paperless-ngx/paperless-ngx/blob/main/CONTRIBUTING.md).
|
||||
- [ ] If applicable, I have tested my code for new features & regressions on both mobile & desktop devices, using the latest version of major browsers.
|
||||
- [ ] If applicable, I have checked that all tests pass, see [documentation](https://paperless-ngx.readthedocs.io/en/latest/extending.html#back-end-development).
|
||||
- [ ] I have run all `pre-commit` hooks, see [documentation](https://paperless-ngx.readthedocs.io/en/latest/extending.html#code-formatting-with-pre-commit-hooks).
|
||||
- [ ] If applicable, I have checked that all tests pass, see [documentation](https://docs.paperless-ngx.com/development/#back-end-development).
|
||||
- [ ] I have run all `pre-commit` hooks, see [documentation](https://docs.paperless-ngx.com/development/#code-formatting-with-pre-commit-hooks).
|
||||
- [ ] I have made corresponding changes to the documentation as needed.
|
||||
- [ ] I have checked my modifications for any breaking changes.
|
||||
|
12
.github/release-drafter.yml
vendored
@@ -4,6 +4,7 @@ autolabeler:
|
||||
- '/^fix/'
|
||||
title:
|
||||
- "/^fix/i"
|
||||
- "/^Bugfix/i"
|
||||
- label: "enhancement"
|
||||
branch:
|
||||
- '/^feature/'
|
||||
@@ -13,6 +14,9 @@ categories:
|
||||
- title: 'Breaking Changes'
|
||||
labels:
|
||||
- 'breaking-change'
|
||||
- title: 'Notable Changes'
|
||||
labels:
|
||||
- 'notable'
|
||||
- title: 'Features'
|
||||
labels:
|
||||
- 'enhancement'
|
||||
@@ -20,7 +24,8 @@ categories:
|
||||
labels:
|
||||
- 'bug'
|
||||
- title: 'Documentation'
|
||||
label: 'documentation'
|
||||
labels:
|
||||
- 'documentation'
|
||||
- title: 'Maintenance'
|
||||
labels:
|
||||
- 'chore'
|
||||
@@ -29,7 +34,8 @@ categories:
|
||||
- 'ci-cd'
|
||||
- title: 'Dependencies'
|
||||
collapse-after: 3
|
||||
label: 'dependencies'
|
||||
labels:
|
||||
- 'dependencies'
|
||||
- title: 'All App Changes'
|
||||
labels:
|
||||
- 'frontend'
|
||||
@@ -46,6 +52,8 @@ include-labels:
|
||||
- 'frontend'
|
||||
- 'backend'
|
||||
- 'ci-cd'
|
||||
- 'breaking-change'
|
||||
- 'notable'
|
||||
category-template: '### $TITLE'
|
||||
change-template: '- $TITLE @$AUTHOR ([#$NUMBER]($URL))'
|
||||
change-title-escapes: '\<*_&#@'
|
||||
|
71
.github/scripts/cleanup-tags.py
vendored
@@ -15,6 +15,8 @@ from github import ContainerPackage
|
||||
from github import GithubBranchApi
|
||||
from github import GithubContainerRegistryApi
|
||||
|
||||
import docker
|
||||
|
||||
logger = logging.getLogger("cleanup-tags")
|
||||
|
||||
|
||||
@@ -151,12 +153,16 @@ class RegistryTagsCleaner:
|
||||
for tag in sorted(self.tags_to_keep):
|
||||
full_name = f"ghcr.io/{self.repo_owner}/{self.package_name}:{tag}"
|
||||
logger.info(f"Checking manifest for {full_name}")
|
||||
# TODO: It would be nice to use RegistryData from docker
|
||||
# except the ID doesn't map to anything in the manifest
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[
|
||||
shutil.which("docker"),
|
||||
"manifest",
|
||||
"buildx",
|
||||
"imagetools",
|
||||
"inspect",
|
||||
"--raw",
|
||||
full_name,
|
||||
],
|
||||
capture_output=True,
|
||||
@@ -241,6 +247,65 @@ class RegistryTagsCleaner:
|
||||
# By default, keep anything which is tagged
|
||||
self.tags_to_keep = list(set(self.all_pkgs_tags_to_version.keys()))
|
||||
|
||||
def check_tags_pull(self):
|
||||
"""
|
||||
This method uses the Docker Python SDK to confirm all tags which were
|
||||
kept still pull, for all platforms.
|
||||
|
||||
TODO: This is much slower (although more comprehensive). Maybe a Pool?
|
||||
"""
|
||||
logger.info("Beginning confirmation step")
|
||||
client = docker.from_env()
|
||||
imgs = []
|
||||
for tag in sorted(self.tags_to_keep):
|
||||
repository = f"ghcr.io/{self.repo_owner}/{self.package_name}"
|
||||
for arch, variant in [("amd64", None), ("arm64", None), ("arm", "v7")]:
|
||||
# From 11.2.0 onwards, qpdf is cross compiled, so there is a single arch, amd64
|
||||
# skip others in this case
|
||||
if "qpdf" in self.package_name and arch != "amd64" and tag == "11.2.0":
|
||||
continue
|
||||
# Skip beta and release candidate tags
|
||||
elif "beta" in tag:
|
||||
continue
|
||||
|
||||
# Build the platform name
|
||||
if variant is not None:
|
||||
platform = f"linux/{arch}/{variant}"
|
||||
else:
|
||||
platform = f"linux/{arch}"
|
||||
|
||||
try:
|
||||
logger.info(f"Pulling {repository}:{tag} for {platform}")
|
||||
image = client.images.pull(
|
||||
repository=repository,
|
||||
tag=tag,
|
||||
platform=platform,
|
||||
)
|
||||
imgs.append(image)
|
||||
except docker.errors.APIError as e:
|
||||
logger.error(
|
||||
f"Failed to pull {repository}:{tag}: {e}",
|
||||
)
|
||||
|
||||
# Prevent out of space errors by removing after a few
|
||||
# pulls
|
||||
if len(imgs) > 50:
|
||||
for image in imgs:
|
||||
try:
|
||||
client.images.remove(image.id)
|
||||
except docker.errors.APIError as e:
|
||||
err_str = str(e)
|
||||
# Ignore attempts to remove images that are partly shared
|
||||
# Ignore images which are somehow gone already
|
||||
if (
|
||||
"must be forced" not in err_str
|
||||
and "No such image" not in err_str
|
||||
):
|
||||
logger.error(
|
||||
f"Remove image ghcr.io/{self.repo_owner}/{self.package_name}:{tag} failed: {e}",
|
||||
)
|
||||
imgs = []
|
||||
|
||||
|
||||
class MainImageTagsCleaner(RegistryTagsCleaner):
|
||||
def decide_what_tags_to_keep(self):
|
||||
@@ -397,6 +462,10 @@ def _main():
|
||||
# Clean images which are untagged
|
||||
cleaner.clean_untagged(args.is_manifest)
|
||||
|
||||
# Verify remaining tags still pull
|
||||
if args.is_manifest:
|
||||
cleaner.check_tags_pull()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_main()
|
||||
|
164
.github/workflows/ci.yml
vendored
@@ -13,83 +13,99 @@ on:
|
||||
branches-ignore:
|
||||
- 'translations**'
|
||||
|
||||
env:
|
||||
# This is the version of pipenv all the steps will use
|
||||
# If changing this, change Dockerfile
|
||||
DEFAULT_PIP_ENV_VERSION: "2022.11.30"
|
||||
# This is the default version of Python to use in most steps
|
||||
# If changing this, change Dockerfile
|
||||
DEFAULT_PYTHON_VERSION: "3.9"
|
||||
|
||||
jobs:
|
||||
pre-commit:
|
||||
name: Linting Checks
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
-
|
||||
name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
-
|
||||
name: Install tools
|
||||
name: Install python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.9"
|
||||
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
-
|
||||
name: Check files
|
||||
uses: pre-commit/action@v3.0.0
|
||||
|
||||
documentation:
|
||||
name: "Build Documentation"
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
needs:
|
||||
- pre-commit
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
name: Install pipenv
|
||||
run: |
|
||||
pipx install pipenv==2022.10.12
|
||||
-
|
||||
name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.9
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
cache: "pipenv"
|
||||
cache-dependency-path: 'Pipfile.lock'
|
||||
-
|
||||
name: Install pipenv
|
||||
run: |
|
||||
pip install --user pipenv==${DEFAULT_PIP_ENV_VERSION}
|
||||
-
|
||||
name: Install dependencies
|
||||
run: |
|
||||
pipenv sync --dev
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} sync --dev
|
||||
-
|
||||
name: List installed Python dependencies
|
||||
run: |
|
||||
pipenv run pip list
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run pip list
|
||||
-
|
||||
name: Make documentation
|
||||
run: |
|
||||
cd docs/
|
||||
pipenv run make html
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run mkdocs build --config-file ./mkdocs.yml
|
||||
-
|
||||
name: Upload artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: documentation
|
||||
path: docs/_build/html/
|
||||
path: site/
|
||||
|
||||
documentation-deploy:
|
||||
name: "Deploy Documentation"
|
||||
runs-on: ubuntu-22.04
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
needs:
|
||||
- documentation
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
name: Deploy docs
|
||||
uses: mhausenblas/mkdocs-deploy-gh-pages@master
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
CUSTOM_DOMAIN: docs.paperless-ngx.com
|
||||
CONFIG_FILE: mkdocs.yml
|
||||
EXTRA_PACKAGES: build-base
|
||||
|
||||
tests-backend:
|
||||
name: "Tests (${{ matrix.python-version }})"
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
needs:
|
||||
- pre-commit
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.8', '3.9', '3.10']
|
||||
fail-fast: false
|
||||
services:
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
ports:
|
||||
- "9998:9998/tcp"
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:7.6
|
||||
ports:
|
||||
- "3000:3000/tcp"
|
||||
env:
|
||||
# Enable Tika end to end testing
|
||||
TIKA_LIVE: 1
|
||||
@@ -97,6 +113,10 @@ jobs:
|
||||
PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }}
|
||||
PAPERLESS_MAIL_TEST_USER: ${{ secrets.TEST_MAIL_USER }}
|
||||
PAPERLESS_MAIL_TEST_PASSWD: ${{ secrets.TEST_MAIL_PASSWD }}
|
||||
# Skip Tests which require convert
|
||||
PAPERLESS_TEST_SKIP_CONVERT: 1
|
||||
# Enable Gotenberg end to end testing
|
||||
GOTENBERG_LIVE: 1
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
@@ -104,16 +124,22 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
-
|
||||
name: Install pipenv
|
||||
name: Start containers
|
||||
run: |
|
||||
pipx install pipenv==2022.10.12
|
||||
docker compose --file ${GITHUB_WORKSPACE}/docker/compose/docker-compose.ci-test.yml pull --quiet
|
||||
docker compose --file ${GITHUB_WORKSPACE}/docker/compose/docker-compose.ci-test.yml up --detach
|
||||
-
|
||||
name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "${{ matrix.python-version }}"
|
||||
cache: "pipenv"
|
||||
cache-dependency-path: 'Pipfile.lock'
|
||||
-
|
||||
name: Install pipenv
|
||||
run: |
|
||||
pip install --user pipenv==${DEFAULT_PIP_ENV_VERSION}
|
||||
-
|
||||
name: Install system dependencies
|
||||
run: |
|
||||
@@ -122,20 +148,21 @@ jobs:
|
||||
-
|
||||
name: Install Python dependencies
|
||||
run: |
|
||||
pipenv sync --dev
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run python --version
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} sync --dev
|
||||
-
|
||||
name: List installed Python dependencies
|
||||
run: |
|
||||
pipenv run pip list
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run pip list
|
||||
-
|
||||
name: Tests
|
||||
run: |
|
||||
cd src/
|
||||
pipenv run pytest -rfEp
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run pytest -ra
|
||||
-
|
||||
name: Get changed files
|
||||
id: changed-files-specific
|
||||
uses: tj-actions/changed-files@v34
|
||||
uses: tj-actions/changed-files@v35
|
||||
with:
|
||||
files: |
|
||||
src/**
|
||||
@@ -147,17 +174,23 @@ jobs:
|
||||
done
|
||||
-
|
||||
name: Publish coverage results
|
||||
if: matrix.python-version == '3.9' && steps.changed-files-specific.outputs.any_changed == 'true'
|
||||
if: matrix.python-version == ${{ env.DEFAULT_PYTHON_VERSION }} && steps.changed-files-specific.outputs.any_changed == 'true'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# https://github.com/coveralls-clients/coveralls-python/issues/251
|
||||
run: |
|
||||
cd src/
|
||||
pipenv run coveralls --service=github
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run coveralls --service=github
|
||||
-
|
||||
name: Stop containers
|
||||
if: always()
|
||||
run: |
|
||||
docker compose --file ${GITHUB_WORKSPACE}/docker/compose/docker-compose.ci-test.yml logs
|
||||
docker compose --file ${GITHUB_WORKSPACE}/docker/compose/docker-compose.ci-test.yml down
|
||||
|
||||
tests-frontend:
|
||||
name: "Tests Frontend"
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
needs:
|
||||
- pre-commit
|
||||
strategy:
|
||||
@@ -171,19 +204,14 @@ jobs:
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
- run: cd src-ui && npm ci
|
||||
- run: cd src-ui && npm run lint
|
||||
- run: cd src-ui && npm run test
|
||||
- run: cd src-ui && npm run e2e:ci
|
||||
|
||||
prepare-docker-build:
|
||||
name: Prepare Docker Pipeline Data
|
||||
if: github.event_name == 'push' && (startsWith(github.ref, 'refs/heads/feature-') || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/beta' || contains(github.ref, 'beta.rc') || startsWith(github.ref, 'refs/tags/v'))
|
||||
runs-on: ubuntu-20.04
|
||||
# If the push triggered the installer library workflow, wait for it to
|
||||
# complete here. This ensures the required versions for the final
|
||||
# image have been built, while not waiting at all if the versions haven't changed
|
||||
concurrency:
|
||||
group: build-installer-library
|
||||
cancel-in-progress: false
|
||||
runs-on: ubuntu-22.04
|
||||
needs:
|
||||
- documentation
|
||||
- tests-backend
|
||||
@@ -202,7 +230,7 @@ jobs:
|
||||
name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.9"
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
-
|
||||
name: Setup qpdf image
|
||||
id: qpdf-setup
|
||||
@@ -254,7 +282,7 @@ jobs:
|
||||
|
||||
# build and push image to docker hub.
|
||||
build-docker-image:
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-build-docker-image-${{ github.ref_name }}
|
||||
cancel-in-progress: true
|
||||
@@ -359,27 +387,27 @@ jobs:
|
||||
build-release:
|
||||
needs:
|
||||
- build-docker-image
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
name: Install pipenv
|
||||
run: |
|
||||
pip3 install --upgrade pip setuptools wheel pipx
|
||||
pipx install pipenv
|
||||
-
|
||||
name: Set up Python
|
||||
id: setup-python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.9
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
cache: "pipenv"
|
||||
cache-dependency-path: 'Pipfile.lock'
|
||||
-
|
||||
name: Install pipenv + tools
|
||||
run: |
|
||||
pip install --upgrade --user pipenv==${DEFAULT_PIP_ENV_VERSION} setuptools wheel
|
||||
-
|
||||
name: Install Python dependencies
|
||||
run: |
|
||||
pipenv sync --dev
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} sync --dev
|
||||
-
|
||||
name: Install system dependencies
|
||||
run: |
|
||||
@@ -400,17 +428,17 @@ jobs:
|
||||
-
|
||||
name: Generate requirements file
|
||||
run: |
|
||||
pipenv requirements > requirements.txt
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} requirements > requirements.txt
|
||||
-
|
||||
name: Compile messages
|
||||
run: |
|
||||
cd src/
|
||||
pipenv run python3 manage.py compilemessages
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run python3 manage.py compilemessages
|
||||
-
|
||||
name: Collect static files
|
||||
run: |
|
||||
cd src/
|
||||
pipenv run python3 manage.py collectstatic --no-input
|
||||
pipenv --python ${{ steps.setup-python.outputs.python-version }} run python3 manage.py collectstatic --no-input
|
||||
-
|
||||
name: Move files
|
||||
run: |
|
||||
@@ -421,7 +449,7 @@ jobs:
|
||||
cp paperless.conf.example dist/paperless-ngx/paperless.conf
|
||||
cp gunicorn.conf.py dist/paperless-ngx/gunicorn.conf.py
|
||||
cp -r docker/ dist/paperless-ngx/docker
|
||||
cp scripts/*.service scripts/*.sh dist/paperless-ngx/scripts/
|
||||
cp scripts/*.service scripts/*.sh scripts/*.socket dist/paperless-ngx/scripts/
|
||||
cp -r src/ dist/paperless-ngx/src
|
||||
cp -r docs/_build/html/ dist/paperless-ngx/docs
|
||||
mv static dist/paperless-ngx
|
||||
@@ -438,7 +466,7 @@ jobs:
|
||||
path: dist/paperless-ngx.tar.xz
|
||||
|
||||
publish-release:
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
prerelease: ${{ steps.get_version.outputs.prerelease }}
|
||||
changelog: ${{ steps.create-release.outputs.body }}
|
||||
@@ -466,7 +494,7 @@ jobs:
|
||||
-
|
||||
name: Create Release and Changelog
|
||||
id: create-release
|
||||
uses: paperless-ngx/release-drafter@master
|
||||
uses: release-drafter/release-drafter@v5
|
||||
with:
|
||||
name: Paperless-ngx ${{ steps.get_version.outputs.version }}
|
||||
tag: ${{ steps.get_version.outputs.version }}
|
||||
@@ -478,17 +506,16 @@ jobs:
|
||||
-
|
||||
name: Upload release archive
|
||||
id: upload-release-asset
|
||||
uses: actions/upload-release-asset@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: shogo82148/actions-upload-release-asset@v1
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
upload_url: ${{ steps.create-release.outputs.upload_url }}
|
||||
asset_path: ./paperless-ngx.tar.xz
|
||||
asset_name: paperless-ngx-${{ steps.get_version.outputs.version }}.tar.xz
|
||||
asset_content_type: application/x-xz
|
||||
|
||||
append-changelog:
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
needs:
|
||||
- publish-release
|
||||
if: needs.publish-release.outputs.prerelease == 'false'
|
||||
@@ -498,18 +525,17 @@ jobs:
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: main
|
||||
-
|
||||
name: Install pipenv
|
||||
run: |
|
||||
pip3 install --upgrade pip setuptools wheel pipx
|
||||
pipx install pipenv
|
||||
-
|
||||
name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.9
|
||||
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
|
||||
cache: "pipenv"
|
||||
cache-dependency-path: 'Pipfile.lock'
|
||||
-
|
||||
name: Install pipenv + tools
|
||||
run: |
|
||||
pip install --upgrade --user pipenv==${DEFAULT_PIP_ENV_VERSION} setuptools wheel
|
||||
-
|
||||
name: Append Changelog to docs
|
||||
id: append-Changelog
|
||||
@@ -523,7 +549,7 @@ jobs:
|
||||
CURRENT_CHANGELOG=`tail --lines +2 changelog.md`
|
||||
echo -e "$CURRENT_CHANGELOG" >> changelog-new.md
|
||||
mv changelog-new.md changelog.md
|
||||
pipenv run pre-commit --files changelog.md
|
||||
pipenv run pre-commit run --files changelog.md || true
|
||||
git config --global user.name "github-actions"
|
||||
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git commit -am "Changelog ${{ needs.publish-release.outputs.version }} - GHA"
|
||||
|
26
.github/workflows/cleanup-tags.yml
vendored
@@ -1,17 +1,14 @@
|
||||
# This workflow runs on certain conditions to check for and potentially
|
||||
# delete container images from the GHCR which no longer have an associated
|
||||
# code branch.
|
||||
# Requires a PAT with the correct scope set in the secrets
|
||||
# Requires a PAT with the correct scope set in the secrets.
|
||||
#
|
||||
# This workflow will not trigger runs on forked repos.
|
||||
|
||||
name: Cleanup Image Tags
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * SAT'
|
||||
delete:
|
||||
pull_request:
|
||||
types:
|
||||
- closed
|
||||
push:
|
||||
paths:
|
||||
- ".github/workflows/cleanup-tags.yml"
|
||||
@@ -26,7 +23,8 @@ concurrency:
|
||||
jobs:
|
||||
cleanup-images:
|
||||
name: Cleanup Image Tags for ${{ matrix.primary-name }}
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository_owner == 'paperless-ngx'
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
@@ -64,9 +62,9 @@ jobs:
|
||||
with:
|
||||
python-version: "3.10"
|
||||
-
|
||||
name: Install httpx
|
||||
name: Install Python libraries
|
||||
run: |
|
||||
python -m pip install httpx
|
||||
python -m pip install httpx docker
|
||||
#
|
||||
# Clean up primary package
|
||||
#
|
||||
@@ -83,13 +81,3 @@ jobs:
|
||||
if: "${{ env.TOKEN != '' }}"
|
||||
run: |
|
||||
python ${GITHUB_WORKSPACE}/.github/scripts/cleanup-tags.py --untagged --delete "${{ matrix.cache-name }}"
|
||||
#
|
||||
# Verify tags which are left still pull
|
||||
#
|
||||
-
|
||||
name: Check all tags still pull
|
||||
run: |
|
||||
ghcr_name=$(echo "ghcr.io/${GITHUB_REPOSITORY_OWNER}/${{ matrix.primary-name }}" | awk '{ print tolower($0) }')
|
||||
echo "Pulling all tags of ${ghcr_name}"
|
||||
docker pull --quiet --all-tags ${ghcr_name}
|
||||
docker image list
|
||||
|
2
.github/workflows/codeql-analysis.yml
vendored
@@ -23,7 +23,7 @@ on:
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
|
146
.github/workflows/installer-library.yml
vendored
@@ -34,7 +34,7 @@ concurrency:
|
||||
jobs:
|
||||
prepare-docker-build:
|
||||
name: Prepare Docker Image Version Data
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
-
|
||||
name: Set ghcr repository name
|
||||
@@ -95,8 +95,8 @@ jobs:
|
||||
name: Setup other versions
|
||||
id: cache-bust-setup
|
||||
run: |
|
||||
pillow_version=$(jq ".default.pillow.version" Pipfile.lock | sed 's/=//g' | sed 's/"//g')
|
||||
lxml_version=$(jq ".default.lxml.version" Pipfile.lock | sed 's/=//g' | sed 's/"//g')
|
||||
pillow_version=$(jq -r '.default.pillow.version | gsub("=";"")' Pipfile.lock)
|
||||
lxml_version=$(jq -r '.default.lxml.version | gsub("=";"")' Pipfile.lock)
|
||||
|
||||
echo "Pillow is ${pillow_version}"
|
||||
echo "lxml is ${lxml_version}"
|
||||
@@ -127,6 +127,7 @@ jobs:
|
||||
uses: ./.github/workflows/reusable-workflow-builder.yml
|
||||
with:
|
||||
dockerfile: ./docker-builders/Dockerfile.qpdf
|
||||
build-platforms: linux/amd64
|
||||
build-json: ${{ needs.prepare-docker-build.outputs.qpdf-json }}
|
||||
build-args: |
|
||||
QPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).version }}
|
||||
@@ -168,3 +169,142 @@ jobs:
|
||||
PIKEPDF_VERSION=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }}
|
||||
PILLOW_VERSION=${{ needs.prepare-docker-build.outputs.pillow-version }}
|
||||
LXML_VERSION=${{ needs.prepare-docker-build.outputs.lxml-version }}
|
||||
|
||||
commit-binary-files:
|
||||
name: Store installers
|
||||
needs:
|
||||
- prepare-docker-build
|
||||
- build-qpdf-debs
|
||||
- build-jbig2enc
|
||||
- build-psycopg2-wheel
|
||||
- build-pikepdf-wheel
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: binary-library
|
||||
-
|
||||
name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.9"
|
||||
-
|
||||
name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -qq --no-install-recommends tree
|
||||
-
|
||||
name: Extract qpdf files
|
||||
run: |
|
||||
version=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).version }}
|
||||
tag=${{ fromJSON(needs.prepare-docker-build.outputs.qpdf-json).image_tag }}
|
||||
|
||||
docker pull --quiet ${tag}
|
||||
docker create --name qpdf-extract ${tag}
|
||||
|
||||
mkdir --parents qpdf/${version}/amd64
|
||||
docker cp qpdf-extract:/usr/src/qpdf/${version}/amd64 qpdf/${version}
|
||||
|
||||
mkdir --parents qpdf/${version}/arm64
|
||||
docker cp qpdf-extract:/usr/src/qpdf/${version}/arm64 qpdf/${version}
|
||||
|
||||
mkdir --parents qpdf/${version}/armv7
|
||||
docker cp qpdf-extract:/usr/src/qpdf/${version}/armv7 qpdf/${version}
|
||||
-
|
||||
name: Extract psycopg2 files
|
||||
run: |
|
||||
version=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).version }}
|
||||
tag=${{ fromJSON(needs.prepare-docker-build.outputs.psycopg2-json).image_tag }}
|
||||
|
||||
docker pull --quiet --platform linux/amd64 ${tag}
|
||||
docker create --platform linux/amd64 --name psycopg2-extract ${tag}
|
||||
mkdir --parents psycopg2/${version}/amd64
|
||||
docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/amd64
|
||||
mv psycopg2/${version}/amd64/wheels/* psycopg2/${version}/amd64
|
||||
rm -r psycopg2/${version}/amd64/wheels/
|
||||
docker rm psycopg2-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm64 ${tag}
|
||||
docker create --platform linux/arm64 --name psycopg2-extract ${tag}
|
||||
mkdir --parents psycopg2/${version}/arm64
|
||||
docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/arm64
|
||||
mv psycopg2/${version}/arm64/wheels/* psycopg2/${version}/arm64
|
||||
rm -r psycopg2/${version}/arm64/wheels/
|
||||
docker rm psycopg2-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm/v7 ${tag}
|
||||
docker create --platform linux/arm/v7 --name psycopg2-extract ${tag}
|
||||
mkdir --parents psycopg2/${version}/armv7
|
||||
docker cp psycopg2-extract:/usr/src/wheels/ psycopg2/${version}/armv7
|
||||
mv psycopg2/${version}/armv7/wheels/* psycopg2/${version}/armv7
|
||||
rm -r psycopg2/${version}/armv7/wheels/
|
||||
docker rm psycopg2-extract
|
||||
-
|
||||
name: Extract pikepdf files
|
||||
run: |
|
||||
version=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).version }}
|
||||
tag=${{ fromJSON(needs.prepare-docker-build.outputs.pikepdf-json).image_tag }}
|
||||
|
||||
docker pull --quiet --platform linux/amd64 ${tag}
|
||||
docker create --platform linux/amd64 --name pikepdf-extract ${tag}
|
||||
mkdir --parents pikepdf/${version}/amd64
|
||||
docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/amd64
|
||||
mv pikepdf/${version}/amd64/wheels/* pikepdf/${version}/amd64
|
||||
rm -r pikepdf/${version}/amd64/wheels/
|
||||
docker rm pikepdf-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm64 ${tag}
|
||||
docker create --platform linux/arm64 --name pikepdf-extract ${tag}
|
||||
mkdir --parents pikepdf/${version}/arm64
|
||||
docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/arm64
|
||||
mv pikepdf/${version}/arm64/wheels/* pikepdf/${version}/arm64
|
||||
rm -r pikepdf/${version}/arm64/wheels/
|
||||
docker rm pikepdf-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm/v7 ${tag}
|
||||
docker create --platform linux/arm/v7 --name pikepdf-extract ${tag}
|
||||
mkdir --parents pikepdf/${version}/armv7
|
||||
docker cp pikepdf-extract:/usr/src/wheels/ pikepdf/${version}/armv7
|
||||
mv pikepdf/${version}/armv7/wheels/* pikepdf/${version}/armv7
|
||||
rm -r pikepdf/${version}/armv7/wheels/
|
||||
docker rm pikepdf-extract
|
||||
-
|
||||
name: Extract jbig2enc files
|
||||
run: |
|
||||
version=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).version }}
|
||||
tag=${{ fromJSON(needs.prepare-docker-build.outputs.jbig2enc-json).image_tag }}
|
||||
|
||||
docker pull --quiet --platform linux/amd64 ${tag}
|
||||
docker create --platform linux/amd64 --name jbig2enc-extract ${tag}
|
||||
mkdir --parents jbig2enc/${version}/amd64
|
||||
docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/amd64/
|
||||
mv jbig2enc/${version}/amd64/build/* jbig2enc/${version}/amd64/
|
||||
docker rm jbig2enc-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm64 ${tag}
|
||||
docker create --platform linux/arm64 --name jbig2enc-extract ${tag}
|
||||
mkdir --parents jbig2enc/${version}/arm64
|
||||
docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/arm64
|
||||
mv jbig2enc/${version}/arm64/build/* jbig2enc/${version}/arm64/
|
||||
docker rm jbig2enc-extract
|
||||
|
||||
docker pull --quiet --platform linux/arm/v7 ${tag}
|
||||
docker create --platform linux/arm/v7 --name jbig2enc-extract ${tag}
|
||||
mkdir --parents jbig2enc/${version}/armv7
|
||||
docker cp jbig2enc-extract:/usr/src/jbig2enc/build jbig2enc/${version}/armv7
|
||||
mv jbig2enc/${version}/armv7/build/* jbig2enc/${version}/armv7/
|
||||
docker rm jbig2enc-extract
|
||||
-
|
||||
name: Show file structure
|
||||
run: |
|
||||
tree .
|
||||
-
|
||||
name: Commit files
|
||||
run: |
|
||||
git config --global user.name "github-actions"
|
||||
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git add pikepdf/ qpdf/ psycopg2/ jbig2enc/
|
||||
git commit -m "Updating installer packages" || true
|
||||
git push origin || true
|
||||
|
4
.github/workflows/project-actions.yml
vendored
@@ -24,7 +24,7 @@ env:
|
||||
jobs:
|
||||
issue_opened_or_reopened:
|
||||
name: issue_opened_or_reopened
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
if: github.event_name == 'issues' && (github.event.action == 'opened' || github.event.action == 'reopened')
|
||||
steps:
|
||||
- name: Add issue to project and set status to ${{ env.todo }}
|
||||
@@ -37,7 +37,7 @@ jobs:
|
||||
status_value: ${{ env.todo }} # Target status
|
||||
pr_opened_or_reopened:
|
||||
name: pr_opened_or_reopened
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
# write permission is required for autolabeler
|
||||
pull-requests: write
|
||||
|
@@ -13,6 +13,10 @@ on:
|
||||
required: false
|
||||
default: ""
|
||||
type: string
|
||||
build-platforms:
|
||||
required: false
|
||||
default: linux/amd64,linux/arm64,linux/arm/v7
|
||||
type: string
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ fromJSON(inputs.build-json).name }}-${{ fromJSON(inputs.build-json).version }}
|
||||
@@ -21,7 +25,7 @@ concurrency:
|
||||
jobs:
|
||||
build-image:
|
||||
name: Build ${{ fromJSON(inputs.build-json).name }} @ ${{ fromJSON(inputs.build-json).version }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
@@ -46,7 +50,7 @@ jobs:
|
||||
context: .
|
||||
file: ${{ inputs.dockerfile }}
|
||||
tags: ${{ fromJSON(inputs.build-json).image_tag }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7
|
||||
platforms: ${{ inputs.build-platforms }}
|
||||
build-args: ${{ inputs.build-args }}
|
||||
push: true
|
||||
cache-from: type=registry,ref=${{ fromJSON(inputs.build-json).cache_tag }}
|
||||
|
4
.gitignore
vendored
@@ -51,8 +51,8 @@ coverage.xml
|
||||
# Django stuff:
|
||||
*.log
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
# MkDocs documentation
|
||||
site/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
@@ -5,7 +5,7 @@
|
||||
repos:
|
||||
# General hooks
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.3.0
|
||||
rev: v4.4.0
|
||||
hooks:
|
||||
- id: check-docstring-first
|
||||
- id: check-json
|
||||
@@ -47,23 +47,23 @@ repos:
|
||||
- id: yesqa
|
||||
exclude: "(migrations)"
|
||||
- repo: https://github.com/asottile/add-trailing-comma
|
||||
rev: "v2.3.0"
|
||||
rev: "v2.4.0"
|
||||
hooks:
|
||||
- id: add-trailing-comma
|
||||
exclude: "(migrations)"
|
||||
- repo: https://github.com/PyCQA/flake8
|
||||
rev: 5.0.4
|
||||
rev: 6.0.0
|
||||
hooks:
|
||||
- id: flake8
|
||||
files: ^src/
|
||||
args:
|
||||
- "--config=./src/setup.cfg"
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.10.0
|
||||
rev: 22.12.0
|
||||
hooks:
|
||||
- id: black
|
||||
- repo: https://github.com/asottile/pyupgrade
|
||||
rev: v3.2.2
|
||||
rev: v3.3.1
|
||||
hooks:
|
||||
- id: pyupgrade
|
||||
exclude: "(migrations)"
|
||||
@@ -82,6 +82,6 @@ repos:
|
||||
args:
|
||||
- "--tab"
|
||||
- repo: https://github.com/shellcheck-py/shellcheck-py
|
||||
rev: "v0.8.0.4"
|
||||
rev: "v0.9.0.2"
|
||||
hooks:
|
||||
- id: shellcheck
|
||||
|
1
.python-version
Normal file
@@ -0,0 +1 @@
|
||||
3.8.15
|
@@ -1,16 +0,0 @@
|
||||
# .readthedocs.yml
|
||||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Build documentation in the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
|
||||
# Optionally set the version of Python and requirements required to build your docs
|
||||
python:
|
||||
version: "3.8"
|
||||
install:
|
||||
- requirements: docs/requirements.txt
|
@@ -27,11 +27,11 @@ Please format and test your code! I know it's a hassle, but it makes sure that y
|
||||
|
||||
To test your code, execute `pytest` in the src/ directory. This also generates a html coverage report, which you can use to see if you missed anything important during testing.
|
||||
|
||||
Before you can run `pytest`, ensure to [properly set up your local environment](https://paperless-ngx.readthedocs.io/en/latest/extending.html#initial-setup-and-first-start).
|
||||
Before you can run `pytest`, ensure to [properly set up your local environment](https://docs.paperless-ngx.com/development/#initial-setup-and-first-start).
|
||||
|
||||
## More info:
|
||||
|
||||
... is available in the documentation. https://paperless-ngx.readthedocs.io/en/latest/extending.html
|
||||
... is available [in the documentation](https://docs.paperless-ngx.com/development).
|
||||
|
||||
# Merging PRs
|
||||
|
||||
|
113
Dockerfile
@@ -1,19 +1,5 @@
|
||||
# syntax=docker/dockerfile:1.4
|
||||
|
||||
# Pull the installer images from the library
|
||||
# These are all built previously
|
||||
# They provide either a .deb or .whl
|
||||
|
||||
ARG JBIG2ENC_VERSION
|
||||
ARG QPDF_VERSION
|
||||
ARG PIKEPDF_VERSION
|
||||
ARG PSYCOPG2_VERSION
|
||||
|
||||
FROM ghcr.io/paperless-ngx/paperless-ngx/builder/jbig2enc:${JBIG2ENC_VERSION} as jbig2enc-builder
|
||||
FROM ghcr.io/paperless-ngx/paperless-ngx/builder/qpdf:${QPDF_VERSION} as qpdf-builder
|
||||
FROM ghcr.io/paperless-ngx/paperless-ngx/builder/pikepdf:${PIKEPDF_VERSION} as pikepdf-builder
|
||||
FROM ghcr.io/paperless-ngx/paperless-ngx/builder/psycopg2:${PSYCOPG2_VERSION} as psycopg2-builder
|
||||
|
||||
FROM --platform=$BUILDPLATFORM node:16-bullseye-slim AS compile-frontend
|
||||
|
||||
# This stage compiles the frontend
|
||||
@@ -45,61 +31,67 @@ COPY Pipfile* ./
|
||||
|
||||
RUN set -eux \
|
||||
&& echo "Installing pipenv" \
|
||||
&& python3 -m pip install --no-cache-dir --upgrade pipenv \
|
||||
&& python3 -m pip install --no-cache-dir --upgrade pipenv==2022.11.30 \
|
||||
&& echo "Generating requirement.txt" \
|
||||
&& pipenv requirements > requirements.txt
|
||||
|
||||
FROM python:3.9-slim-bullseye as main-app
|
||||
|
||||
LABEL org.opencontainers.image.authors="paperless-ngx team <hello@paperless-ngx.com>"
|
||||
LABEL org.opencontainers.image.documentation="https://paperless-ngx.readthedocs.io/en/latest/"
|
||||
LABEL org.opencontainers.image.documentation="https://docs.paperless-ngx.com/"
|
||||
LABEL org.opencontainers.image.source="https://github.com/paperless-ngx/paperless-ngx"
|
||||
LABEL org.opencontainers.image.url="https://github.com/paperless-ngx/paperless-ngx"
|
||||
LABEL org.opencontainers.image.licenses="GPL-3.0-only"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
# Buildx provided, must be defined to use though
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
# Workflow provided
|
||||
ARG JBIG2ENC_VERSION
|
||||
ARG QPDF_VERSION
|
||||
ARG PIKEPDF_VERSION
|
||||
ARG PSYCOPG2_VERSION
|
||||
|
||||
#
|
||||
# Begin installation and configuration
|
||||
# Order the steps below from least often changed to most
|
||||
#
|
||||
|
||||
# copy jbig2enc
|
||||
# Basically will never change again
|
||||
COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/.libs/libjbig2enc* /usr/local/lib/
|
||||
COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/jbig2 /usr/local/bin/
|
||||
COPY --from=jbig2enc-builder /usr/src/jbig2enc/src/*.h /usr/local/include/
|
||||
|
||||
# Packages need for running
|
||||
ARG RUNTIME_PACKAGES="\
|
||||
# Python
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-setuptools \
|
||||
# General utils
|
||||
curl \
|
||||
file \
|
||||
# Docker specific
|
||||
gosu \
|
||||
# Timezones support
|
||||
tzdata \
|
||||
# fonts for text file thumbnail generation
|
||||
fonts-liberation \
|
||||
gettext \
|
||||
ghostscript \
|
||||
gnupg \
|
||||
gosu \
|
||||
icc-profiles-free \
|
||||
imagemagick \
|
||||
media-types \
|
||||
# Image processing
|
||||
liblept5 \
|
||||
libpq5 \
|
||||
libxml2 \
|
||||
liblcms2-2 \
|
||||
libtiff5 \
|
||||
libxslt1.1 \
|
||||
libfreetype6 \
|
||||
libwebp6 \
|
||||
libopenjp2-7 \
|
||||
libimagequant0 \
|
||||
libraqm0 \
|
||||
libgnutls30 \
|
||||
libjpeg62-turbo \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-setuptools \
|
||||
# PostgreSQL
|
||||
libpq5 \
|
||||
postgresql-client \
|
||||
# MySQL / MariaDB
|
||||
mariadb-client \
|
||||
# For Numpy
|
||||
libatlas3-base \
|
||||
@@ -110,17 +102,23 @@ ARG RUNTIME_PACKAGES="\
|
||||
tesseract-ocr-fra \
|
||||
tesseract-ocr-ita \
|
||||
tesseract-ocr-spa \
|
||||
# Suggested for OCRmyPDF
|
||||
pngquant \
|
||||
# Suggested for pikepdf
|
||||
jbig2dec \
|
||||
tzdata \
|
||||
unpaper \
|
||||
pngquant \
|
||||
# pikepdf / qpdf
|
||||
jbig2dec \
|
||||
libxml2 \
|
||||
libxslt1.1 \
|
||||
libgnutls30 \
|
||||
# Mime type detection
|
||||
file \
|
||||
libmagic1 \
|
||||
media-types \
|
||||
zlib1g \
|
||||
# Barcode splitter
|
||||
libzbar0 \
|
||||
poppler-utils"
|
||||
poppler-utils \
|
||||
# RapidFuzz on armv7
|
||||
libatomic1"
|
||||
|
||||
# Install basic runtime packages.
|
||||
# These change very infrequently
|
||||
@@ -150,6 +148,7 @@ COPY [ \
|
||||
"docker/docker-prepare.sh", \
|
||||
"docker/paperless_cmd.sh", \
|
||||
"docker/wait-for-redis.py", \
|
||||
"docker/env-from-file.sh", \
|
||||
"docker/management_script.sh", \
|
||||
"docker/flower-conditional.sh", \
|
||||
"docker/install_management_commands.sh", \
|
||||
@@ -169,6 +168,8 @@ RUN set -eux \
|
||||
&& chmod 755 /sbin/docker-prepare.sh \
|
||||
&& mv wait-for-redis.py /sbin/wait-for-redis.py \
|
||||
&& chmod 755 /sbin/wait-for-redis.py \
|
||||
&& mv env-from-file.sh /sbin/env-from-file.sh \
|
||||
&& chmod 755 /sbin/env-from-file.sh \
|
||||
&& mv paperless_cmd.sh /usr/local/bin/paperless_cmd.sh \
|
||||
&& chmod 755 /usr/local/bin/paperless_cmd.sh \
|
||||
&& mv flower-conditional.sh /usr/local/bin/flower-conditional.sh \
|
||||
@@ -180,23 +181,29 @@ RUN set -eux \
|
||||
# Install the built packages from the installer library images
|
||||
# Use mounts to avoid copying installer files into the image
|
||||
# These change sometimes
|
||||
RUN --mount=type=bind,from=qpdf-builder,target=/qpdf \
|
||||
--mount=type=bind,from=psycopg2-builder,target=/psycopg2 \
|
||||
--mount=type=bind,from=pikepdf-builder,target=/pikepdf \
|
||||
set -eux \
|
||||
RUN set -eux \
|
||||
&& echo "Getting binaries" \
|
||||
&& mkdir paperless-ngx \
|
||||
&& curl --fail --silent --show-error --output paperless-ngx.tar.gz --location https://github.com/paperless-ngx/paperless-ngx/archive/41d6e7e407af09a0882736d50c89b6e015997bff.tar.gz \
|
||||
&& tar -xf paperless-ngx.tar.gz --directory paperless-ngx --strip-components=1 \
|
||||
&& cd paperless-ngx \
|
||||
# Setting a specific revision ensures we know what this installed
|
||||
# and ensures cache breaking on changes
|
||||
&& echo "Installing jbig2enc" \
|
||||
&& cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/jbig2 /usr/local/bin/ \
|
||||
&& cp ./jbig2enc/${JBIG2ENC_VERSION}/${TARGETARCH}${TARGETVARIANT}/libjbig2enc* /usr/local/lib/ \
|
||||
&& echo "Installing qpdf" \
|
||||
&& apt-get install --yes --no-install-recommends /qpdf/usr/src/qpdf/libqpdf29_*.deb \
|
||||
&& apt-get install --yes --no-install-recommends /qpdf/usr/src/qpdf/qpdf_*.deb \
|
||||
&& apt-get install --yes --no-install-recommends ./qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/libqpdf29_*.deb \
|
||||
&& apt-get install --yes --no-install-recommends ./qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/qpdf_*.deb \
|
||||
&& echo "Installing pikepdf and dependencies" \
|
||||
&& python3 -m pip install --no-cache-dir /pikepdf/usr/src/wheels/pyparsing*.whl \
|
||||
&& python3 -m pip install --no-cache-dir /pikepdf/usr/src/wheels/packaging*.whl \
|
||||
&& python3 -m pip install --no-cache-dir /pikepdf/usr/src/wheels/lxml*.whl \
|
||||
&& python3 -m pip install --no-cache-dir /pikepdf/usr/src/wheels/Pillow*.whl \
|
||||
&& python3 -m pip install --no-cache-dir /pikepdf/usr/src/wheels/pikepdf*.whl \
|
||||
&& python3 -m pip install --no-cache-dir ./pikepdf/${PIKEPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/*.whl \
|
||||
&& python3 -m pip list \
|
||||
&& echo "Installing psycopg2" \
|
||||
&& python3 -m pip install --no-cache-dir /psycopg2/usr/src/wheels/psycopg2*.whl \
|
||||
&& python3 -m pip list
|
||||
&& python3 -m pip install --no-cache-dir ./psycopg2/${PSYCOPG2_VERSION}/${TARGETARCH}${TARGETVARIANT}/psycopg2*.whl \
|
||||
&& python3 -m pip list \
|
||||
&& echo "Cleaning up image layer" \
|
||||
&& cd ../ \
|
||||
&& rm -rf paperless-ngx
|
||||
|
||||
WORKDIR /usr/src/paperless/src/
|
||||
|
||||
@@ -219,6 +226,10 @@ RUN set -eux \
|
||||
&& python3 -m pip install --no-cache-dir --upgrade wheel \
|
||||
&& echo "Installing Python requirements" \
|
||||
&& python3 -m pip install --default-timeout=1000 --no-cache-dir --requirement requirements.txt \
|
||||
&& echo "Installing NLTK data" \
|
||||
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" snowball_data \
|
||||
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" stopwords \
|
||||
&& python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" punkt \
|
||||
&& echo "Cleaning up image" \
|
||||
&& apt-get -y purge ${BUILD_PACKAGES} \
|
||||
&& apt-get -y autoremove --purge \
|
||||
|
31
Pipfile
@@ -30,8 +30,6 @@ psycopg2 = "*"
|
||||
rapidfuzz = "*"
|
||||
redis = {extras = ["hiredis"], version = "*"}
|
||||
scikit-learn = "~=1.1"
|
||||
# Pin this until piwheels is building 1.9 (see https://www.piwheels.org/project/scipy/)
|
||||
scipy = "==1.8.1"
|
||||
numpy = "*"
|
||||
whitenoise = "~=6.2"
|
||||
watchdog = "~=2.1"
|
||||
@@ -43,15 +41,9 @@ tika = "*"
|
||||
# TODO: This will sadly also install daphne+dependencies,
|
||||
# which an ASGI server we don't need. Adds about 15MB image size.
|
||||
channels = "~=3.0"
|
||||
# Locked version until https://github.com/django/channels_redis/issues/332
|
||||
# is resolved
|
||||
channels-redis = "==3.4.1"
|
||||
uvicorn = {extras = ["standard"], version = "*"}
|
||||
concurrent-log-handler = "*"
|
||||
"pdfminer.six" = "*"
|
||||
"backports.zoneinfo" = {version = "*", markers = "python_version < '3.9'"}
|
||||
"importlib-resources" = {version = "*", markers = "python_version < '3.9'"}
|
||||
zipp = {version = "*", markers = "python_version < '3.9'"}
|
||||
pyzbar = "*"
|
||||
mysqlclient = "*"
|
||||
celery = {extras = ["redis"], version = "*"}
|
||||
@@ -60,21 +52,32 @@ setproctitle = "*"
|
||||
nltk = "*"
|
||||
pdf2image = "*"
|
||||
flower = "*"
|
||||
bleach = "*"
|
||||
|
||||
#
|
||||
# Packages locked due to issues (try to check if these are fixed in a release every so often)
|
||||
#
|
||||
|
||||
# Pin this until piwheels is building 1.9 (see https://www.piwheels.org/project/scipy/)
|
||||
scipy = "==1.8.1"
|
||||
|
||||
# Newer versions aren't builting yet (see https://www.piwheels.org/project/cryptography/)
|
||||
cryptography = "==38.0.1"
|
||||
|
||||
# Locked version until https://github.com/django/channels_redis/issues/332
|
||||
# is resolved
|
||||
channels-redis = "==3.4.1"
|
||||
|
||||
[dev-packages]
|
||||
coveralls = "*"
|
||||
factory-boy = "*"
|
||||
pycodestyle = "*"
|
||||
pytest = "*"
|
||||
pytest-cov = "*"
|
||||
pytest-django = "*"
|
||||
pytest-env = "*"
|
||||
pytest-sugar = "*"
|
||||
pytest-xdist = "*"
|
||||
sphinx = "~=5.3"
|
||||
sphinx_rtd_theme = "*"
|
||||
tox = "*"
|
||||
black = "*"
|
||||
pre-commit = "*"
|
||||
sphinx-autobuild = "*"
|
||||
myst-parser = "*"
|
||||
imagehash = "*"
|
||||
mkdocs-material = "*"
|
||||
|
2038
Pipfile.lock
generated
28
README.md
@@ -1,8 +1,9 @@
|
||||
[](https://github.com/paperless-ngx/paperless-ngx/actions)
|
||||
[](https://crowdin.com/project/paperless-ngx)
|
||||
[](https://paperless-ngx.readthedocs.io/en/latest/?badge=latest)
|
||||
[](https://docs.paperless-ngx.com)
|
||||
[](https://coveralls.io/github/paperless-ngx/paperless-ngx?branch=master)
|
||||
[](https://matrix.to/#/%23paperlessngx%3Amatrix.org)
|
||||
[](https://demo.paperless-ngx.com)
|
||||
|
||||
<p align="center">
|
||||
<img src="https://github.com/paperless-ngx/paperless-ngx/raw/main/resources/logo/web/png/Black%20logo%20-%20no%20background.png#gh-light-mode-only" width="50%" />
|
||||
@@ -32,13 +33,13 @@ A demo is available at [demo.paperless-ngx.com](https://demo.paperless-ngx.com)
|
||||
|
||||
# Features
|
||||
|
||||

|
||||

|
||||

|
||||

|
||||
|
||||
- Organize and index your scanned documents with tags, correspondents, types, and more.
|
||||
- Performs OCR on your documents, adds selectable text to image only documents and adds tags, correspondents and document types to your documents.
|
||||
- Supports PDF documents, images, plain text files, and Office documents (Word, Excel, Powerpoint, and LibreOffice equivalents).
|
||||
- Office document support is optional and provided by Apache Tika (see [configuration](https://paperless-ngx.readthedocs.io/en/latest/configuration.html#tika-settings))
|
||||
- Office document support is optional and provided by Apache Tika (see [configuration](https://docs.paperless-ngx.com/configuration/#tika))
|
||||
- Paperless stores your documents plain on disk. Filenames and folders are managed by paperless and their format can be configured freely.
|
||||
- Single page application front end.
|
||||
- Includes a dashboard that shows basic statistics and has document upload.
|
||||
@@ -56,7 +57,7 @@ A demo is available at [demo.paperless-ngx.com](https://demo.paperless-ngx.com)
|
||||
- Paperless-ngx learns from your documents and will be able to automatically assign tags, correspondents and types to documents once you've stored a few documents in paperless.
|
||||
- Optimized for multi core systems: Paperless-ngx consumes multiple documents in parallel.
|
||||
- The integrated sanity checker makes sure that your document archive is in good health.
|
||||
- [More screenshots are available in the documentation](https://paperless-ngx.readthedocs.io/en/latest/screenshots.html).
|
||||
- [More screenshots are available in the documentation](https://docs.paperless-ngx.com/#screenshots).
|
||||
|
||||
# Getting started
|
||||
|
||||
@@ -68,19 +69,19 @@ If you'd like to jump right in, you can configure a docker-compose environment w
|
||||
bash -c "$(curl -L https://raw.githubusercontent.com/paperless-ngx/paperless-ngx/main/install-paperless-ngx.sh)"
|
||||
```
|
||||
|
||||
Alternatively, you can install the dependencies and setup apache and a database server yourself. The [documentation](https://paperless-ngx.readthedocs.io/en/latest/setup.html#installation) has a step by step guide on how to do it.
|
||||
Alternatively, you can install the dependencies and setup apache and a database server yourself. The [documentation](https://docs.paperless-ngx.com/setup/#installation) has a step by step guide on how to do it.
|
||||
|
||||
Migrating from Paperless-ng is easy, just drop in the new docker image! See the [documentation on migrating](https://paperless-ngx.readthedocs.io/en/latest/setup.html#migrating-from-paperless-ng) for more details.
|
||||
Migrating from Paperless-ng is easy, just drop in the new docker image! See the [documentation on migrating](https://docs.paperless-ngx.com/setup/#migrating-to-paperless-ngx) for more details.
|
||||
|
||||
<!-- omit in toc -->
|
||||
|
||||
### Documentation
|
||||
|
||||
The documentation for Paperless-ngx is available on [ReadTheDocs](https://paperless-ngx.readthedocs.io/).
|
||||
The documentation for Paperless-ngx is available at [https://docs.paperless-ngx.com](https://docs.paperless-ngx.com/).
|
||||
|
||||
# Contributing
|
||||
|
||||
If you feel like contributing to the project, please do! Bug fixes, enhancements, visual fixes etc. are always welcome. If you want to implement something big: Please start a discussion about that! The [documentation](https://paperless-ngx.readthedocs.io/en/latest/extending.html) has some basic information on how to get started.
|
||||
If you feel like contributing to the project, please do! Bug fixes, enhancements, visual fixes etc. are always welcome. If you want to implement something big: Please start a discussion about that! The [documentation](https://docs.paperless-ngx.com/development/) has some basic information on how to get started.
|
||||
|
||||
## Community Support
|
||||
|
||||
@@ -107,15 +108,6 @@ Paperless has been around a while now, and people are starting to build stuff on
|
||||
- [Scan to Paperless](https://github.com/sbrunner/scan-to-paperless): Scan and prepare (crop, deskew, OCR, ...) your documents for Paperless.
|
||||
- [Paperless Mobile](https://github.com/astubenbord/paperless-mobile): A modern, feature rich mobile application for Paperless.
|
||||
|
||||
These projects also exist, but their status and compatibility with paperless-ngx is unknown.
|
||||
|
||||
- [paperless-cli](https://github.com/stgarf/paperless-cli): A golang command line binary to interact with a Paperless instance.
|
||||
|
||||
This project also exists, but needs updates to be compatible with paperless-ngx.
|
||||
|
||||
- [Paperless Desktop](https://github.com/thomasbrueggemann/paperless-desktop): A desktop UI for your Paperless installation. Runs on Mac, Linux, and Windows.
|
||||
Known issues on Mac: (Could not load reminders and documents)
|
||||
|
||||
# Important Note
|
||||
|
||||
Document scanners are typically used to scan sensitive documents. Things like your social insurance number, tax records, invoices, etc. Everything is stored in the clear without encryption. This means that Paperless should never be run on an untrusted host. Instead, I recommend that if you do want to use it, run it locally on a server in your own home.
|
||||
|
@@ -10,9 +10,9 @@
|
||||
# Example Usage:
|
||||
# ./build-docker-image.sh Dockerfile -t paperless-ngx:my-awesome-feature
|
||||
|
||||
set -eux
|
||||
set -eu
|
||||
|
||||
if ! command -v jq; then
|
||||
if ! command -v jq &> /dev/null ; then
|
||||
echo "jq required"
|
||||
exit 1
|
||||
elif [ ! -f "$1" ]; then
|
||||
@@ -20,28 +20,62 @@ elif [ ! -f "$1" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Parse what we can from Pipfile.lock
|
||||
pikepdf_version=$(jq ".default.pikepdf.version" Pipfile.lock | sed 's/=//g' | sed 's/"//g')
|
||||
psycopg2_version=$(jq ".default.psycopg2.version" Pipfile.lock | sed 's/=//g' | sed 's/"//g')
|
||||
pillow_version=$(jq ".default.pillow.version" Pipfile.lock | sed 's/=//g' | sed 's/"//g')
|
||||
lxml_version=$(jq ".default.lxml.version" Pipfile.lock | sed 's/=//g' | sed 's/"//g')
|
||||
# Read this from the other config file
|
||||
qpdf_version=$(jq ".qpdf.version" .build-config.json | sed 's/"//g')
|
||||
jbig2enc_version=$(jq ".jbig2enc.version" .build-config.json | sed 's/"//g')
|
||||
# Get the branch name (used for caching)
|
||||
branch_name=$(git rev-parse --abbrev-ref HEAD)
|
||||
|
||||
# https://docs.docker.com/develop/develop-images/build_enhancements/
|
||||
# Required to use cache-from
|
||||
export DOCKER_BUILDKIT=1
|
||||
# Parse eithe Pipfile.lock or the .build-config.json
|
||||
jbig2enc_version=$(jq -r '.jbig2enc.version' .build-config.json)
|
||||
qpdf_version=$(jq -r '.qpdf.version' .build-config.json)
|
||||
psycopg2_version=$(jq -r '.default.psycopg2.version | gsub("=";"")' Pipfile.lock)
|
||||
pikepdf_version=$(jq -r '.default.pikepdf.version | gsub("=";"")' Pipfile.lock)
|
||||
pillow_version=$(jq -r '.default.pillow.version | gsub("=";"")' Pipfile.lock)
|
||||
lxml_version=$(jq -r '.default.lxml.version | gsub("=";"")' Pipfile.lock)
|
||||
|
||||
docker build --file "$1" \
|
||||
base_filename="$(basename -- "${1}")"
|
||||
build_args_str=""
|
||||
cache_from_str=""
|
||||
|
||||
case "${base_filename}" in
|
||||
|
||||
*.jbig2enc)
|
||||
build_args_str="--build-arg JBIG2ENC_VERSION=${jbig2enc_version}"
|
||||
cache_from_str="--cache-from ghcr.io/paperless-ngx/paperless-ngx/builder/cache/jbig2enc:${jbig2enc_version}"
|
||||
;;
|
||||
|
||||
*.psycopg2)
|
||||
build_args_str="--build-arg PSYCOPG2_VERSION=${psycopg2_version}"
|
||||
cache_from_str="--cache-from ghcr.io/paperless-ngx/paperless-ngx/builder/cache/psycopg2:${psycopg2_version}"
|
||||
;;
|
||||
|
||||
*.qpdf)
|
||||
build_args_str="--build-arg QPDF_VERSION=${qpdf_version}"
|
||||
cache_from_str="--cache-from ghcr.io/paperless-ngx/paperless-ngx/builder/cache/qpdf:${qpdf_version}"
|
||||
;;
|
||||
|
||||
*.pikepdf)
|
||||
build_args_str="--build-arg QPDF_VERSION=${qpdf_version} --build-arg PIKEPDF_VERSION=${pikepdf_version} --build-arg PILLOW_VERSION=${pillow_version} --build-arg LXML_VERSION=${lxml_version}"
|
||||
cache_from_str="--cache-from ghcr.io/paperless-ngx/paperless-ngx/builder/cache/pikepdf:${pikepdf_version}"
|
||||
;;
|
||||
|
||||
Dockerfile)
|
||||
build_args_str="--build-arg QPDF_VERSION=${qpdf_version} --build-arg PIKEPDF_VERSION=${pikepdf_version} --build-arg PSYCOPG2_VERSION=${psycopg2_version} --build-arg JBIG2ENC_VERSION=${jbig2enc_version}"
|
||||
cache_from_str="--cache-from ghcr.io/paperless-ngx/paperless-ngx/builder/cache/app:${branch_name} --cache-from ghcr.io/paperless-ngx/paperless-ngx/builder/cache/app:dev"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unable to match ${base_filename}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
read -r -a build_args_arr <<< "${build_args_str}"
|
||||
read -r -a cache_from_arr <<< "${cache_from_str}"
|
||||
|
||||
set -eux
|
||||
|
||||
docker buildx build --file "${1}" \
|
||||
--progress=plain \
|
||||
--cache-from ghcr.io/paperless-ngx/paperless-ngx/builder/cache/app:"${branch_name}" \
|
||||
--cache-from ghcr.io/paperless-ngx/paperless-ngx/builder/cache/app:dev \
|
||||
--build-arg JBIG2ENC_VERSION="${jbig2enc_version}" \
|
||||
--build-arg QPDF_VERSION="${qpdf_version}" \
|
||||
--build-arg PIKEPDF_VERSION="${pikepdf_version}" \
|
||||
--build-arg PILLOW_VERSION="${pillow_version}" \
|
||||
--build-arg LXML_VERSION="${lxml_version}" \
|
||||
--build-arg PSYCOPG2_VERSION="${psycopg2_version}" "${@:2}" .
|
||||
--output=type=docker \
|
||||
"${cache_from_arr[@]}" \
|
||||
"${build_args_arr[@]}" \
|
||||
"${@:2}" .
|
||||
|
@@ -29,7 +29,20 @@ RUN set -eux \
|
||||
&& ./autogen.sh \
|
||||
&& ./configure \
|
||||
&& make \
|
||||
&& echo "Gathering package data" \
|
||||
&& dpkg-query -f '${Package;-40}${Version}\n' -W > ./pkg-list.txt \
|
||||
&& echo "Cleaning up image" \
|
||||
&& apt-get -y purge ${BUILD_PACKAGES} \
|
||||
&& apt-get -y autoremove --purge \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& echo "Moving files around" \
|
||||
&& mkdir build \
|
||||
# Unlink a symlink that causes problems
|
||||
&& unlink ./src/.libs/libjbig2enc.la \
|
||||
# Move what the link pointed to
|
||||
&& mv ./src/libjbig2enc.la ./build/ \
|
||||
# Move the shared library .so files
|
||||
&& mv ./src/.libs/libjbig2enc* ./build/ \
|
||||
# And move the cli binary
|
||||
&& mv ./src/jbig2 ./build/ \
|
||||
&& mv ./pkg-list.txt ./build/
|
||||
|
@@ -7,16 +7,27 @@
|
||||
# Default to pulling from the main repo registry when manually building
|
||||
ARG REPO="paperless-ngx/paperless-ngx"
|
||||
|
||||
ARG QPDF_VERSION
|
||||
FROM ghcr.io/${REPO}/builder/qpdf:${QPDF_VERSION} as qpdf-builder
|
||||
|
||||
# This does nothing, except provide a name for a copy below
|
||||
ARG QPDF_VERSION
|
||||
FROM --platform=$BUILDPLATFORM ghcr.io/${REPO}/builder/qpdf:${QPDF_VERSION} as qpdf-builder
|
||||
|
||||
FROM python:3.9-slim-bullseye as main
|
||||
#
|
||||
# Stage: builder
|
||||
# Purpose:
|
||||
# - Build the pikepdf wheel
|
||||
# - Build any dependent wheels which can't be found
|
||||
#
|
||||
FROM python:3.9-slim-bullseye as builder
|
||||
|
||||
LABEL org.opencontainers.image.description="A intermediate image with pikepdf wheel built"
|
||||
|
||||
# Buildx provided
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
# Workflow provided
|
||||
ARG QPDF_VERSION
|
||||
ARG PIKEPDF_VERSION
|
||||
# These are not used, but will still bust the cache if one changes
|
||||
# Otherwise, the main image will try to build thing (and fail)
|
||||
@@ -54,7 +65,7 @@ ARG BUILD_PACKAGES="\
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
COPY --from=qpdf-builder /usr/src/qpdf/*.deb ./
|
||||
COPY --from=qpdf-builder /usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/*.deb ./
|
||||
|
||||
# As this is an base image for a multi-stage final image
|
||||
# the added size of the install is basically irrelevant
|
||||
@@ -77,6 +88,8 @@ RUN set -eux \
|
||||
&& python3 -m pip wheel \
|
||||
# Build the package at the required version
|
||||
pikepdf==${PIKEPDF_VERSION} \
|
||||
# Look to piwheels for additional pre-built wheels
|
||||
--extra-index-url https://www.piwheels.org/simple \
|
||||
# Output the *.whl into this directory
|
||||
--wheel-dir wheels \
|
||||
# Do not use a binary packge for the package being built
|
||||
@@ -86,7 +99,20 @@ RUN set -eux \
|
||||
# Don't cache build files
|
||||
--no-cache-dir \
|
||||
&& ls -ahl wheels \
|
||||
&& echo "Gathering package data" \
|
||||
&& dpkg-query -f '${Package;-40}${Version}\n' -W > ./wheels/pkg-list.txt \
|
||||
&& echo "Cleaning up image" \
|
||||
&& apt-get -y purge ${BUILD_PACKAGES} \
|
||||
&& apt-get -y autoremove --purge \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
#
|
||||
# Stage: package
|
||||
# Purpose: Holds the compiled .whl files in a tiny image to pull
|
||||
#
|
||||
FROM alpine:3.17 as package
|
||||
|
||||
WORKDIR /usr/src/wheels/
|
||||
|
||||
COPY --from=builder /usr/src/wheels/*.whl ./
|
||||
COPY --from=builder /usr/src/wheels/pkg-list.txt ./
|
||||
|
@@ -2,7 +2,12 @@
|
||||
# Inputs:
|
||||
# - PSYCOPG2_VERSION - Version to build
|
||||
|
||||
FROM python:3.9-slim-bullseye as main
|
||||
#
|
||||
# Stage: builder
|
||||
# Purpose:
|
||||
# - Build the psycopg2 wheel
|
||||
#
|
||||
FROM python:3.9-slim-bullseye as builder
|
||||
|
||||
LABEL org.opencontainers.image.description="A intermediate image with psycopg2 wheel built"
|
||||
|
||||
@@ -42,7 +47,20 @@ RUN set -eux \
|
||||
# Don't cache build files
|
||||
--no-cache-dir \
|
||||
&& ls -ahl wheels/ \
|
||||
&& echo "Gathering package data" \
|
||||
&& dpkg-query -f '${Package;-40}${Version}\n' -W > ./wheels/pkg-list.txt \
|
||||
&& echo "Cleaning up image" \
|
||||
&& apt-get -y purge ${BUILD_PACKAGES} \
|
||||
&& apt-get -y autoremove --purge \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
#
|
||||
# Stage: package
|
||||
# Purpose: Holds the compiled .whl files in a tiny image to pull
|
||||
#
|
||||
FROM alpine:3.17 as package
|
||||
|
||||
WORKDIR /usr/src/wheels/
|
||||
|
||||
COPY --from=builder /usr/src/wheels/*.whl ./
|
||||
COPY --from=builder /usr/src/wheels/pkg-list.txt ./
|
||||
|
@@ -1,48 +1,156 @@
|
||||
# This Dockerfile compiles the jbig2enc library
|
||||
# Inputs:
|
||||
# - QPDF_VERSION - the version of qpdf to build a .deb.
|
||||
# Must be present as a deb-src in bookworm
|
||||
#
|
||||
# Stage: pre-build
|
||||
# Purpose:
|
||||
# - Installs common packages
|
||||
# - Sets common environment variables related to dpkg
|
||||
# - Aquires the qpdf source from bookwork
|
||||
# Useful Links:
|
||||
# - https://qpdf.readthedocs.io/en/stable/installation.html#system-requirements
|
||||
# - https://wiki.debian.org/Multiarch/HOWTO
|
||||
# - https://wiki.debian.org/CrossCompiling
|
||||
#
|
||||
|
||||
FROM debian:bullseye-slim as main
|
||||
FROM debian:bullseye-slim as pre-build
|
||||
|
||||
LABEL org.opencontainers.image.description="A intermediate image with qpdf built"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
# This must match to pikepdf's minimum at least
|
||||
ARG QPDF_VERSION
|
||||
|
||||
ARG BUILD_PACKAGES="\
|
||||
build-essential \
|
||||
debhelper \
|
||||
ARG COMMON_BUILD_PACKAGES="\
|
||||
cmake \
|
||||
debhelper\
|
||||
debian-keyring \
|
||||
devscripts \
|
||||
dpkg-dev \
|
||||
equivs \
|
||||
libtool \
|
||||
# https://qpdf.readthedocs.io/en/stable/installation.html#system-requirements
|
||||
libjpeg62-turbo-dev \
|
||||
libgnutls28-dev \
|
||||
packaging-dev \
|
||||
cmake \
|
||||
zlib1g-dev"
|
||||
libtool"
|
||||
|
||||
ENV DEB_BUILD_OPTIONS="terse nocheck nodoc parallel=2"
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
RUN set -eux \
|
||||
&& echo "Installing build tools" \
|
||||
&& echo "Installing common packages" \
|
||||
&& apt-get update --quiet \
|
||||
&& apt-get install --yes --quiet --no-install-recommends $BUILD_PACKAGES \
|
||||
&& echo "Getting qpdf src" \
|
||||
&& apt-get install --yes --quiet --no-install-recommends ${COMMON_BUILD_PACKAGES} \
|
||||
&& echo "Getting qpdf source" \
|
||||
&& echo "deb-src http://deb.debian.org/debian/ bookworm main" > /etc/apt/sources.list.d/bookworm-src.list \
|
||||
&& apt-get update \
|
||||
&& mkdir qpdf \
|
||||
&& cd qpdf \
|
||||
&& apt-get source --yes --quiet qpdf=${QPDF_VERSION}-1/bookworm \
|
||||
&& echo "Building qpdf" \
|
||||
&& cd qpdf-$QPDF_VERSION \
|
||||
&& export DEB_BUILD_OPTIONS="terse nocheck nodoc parallel=2" \
|
||||
&& apt-get update --quiet \
|
||||
&& apt-get source --yes --quiet qpdf=${QPDF_VERSION}-1/bookworm
|
||||
|
||||
#
|
||||
# Stage: amd64-builder
|
||||
# Purpose: Builds qpdf for x86_64 (native build)
|
||||
#
|
||||
FROM pre-build as amd64-builder
|
||||
|
||||
ARG AMD64_BUILD_PACKAGES="\
|
||||
build-essential \
|
||||
libjpeg62-turbo-dev:amd64 \
|
||||
libgnutls28-dev:amd64 \
|
||||
zlib1g-dev:amd64"
|
||||
|
||||
WORKDIR /usr/src/qpdf-${QPDF_VERSION}
|
||||
|
||||
RUN set -eux \
|
||||
&& echo "Beginning amd64" \
|
||||
&& echo "Install amd64 packages" \
|
||||
&& apt-get update --quiet \
|
||||
&& apt-get install --yes --quiet --no-install-recommends ${AMD64_BUILD_PACKAGES} \
|
||||
&& echo "Building amd64" \
|
||||
&& dpkg-buildpackage --build=binary --unsigned-source --unsigned-changes --post-clean \
|
||||
&& ls -ahl ../*.deb \
|
||||
&& echo "Cleaning up image" \
|
||||
&& apt-get -y purge ${BUILD_PACKAGES} \
|
||||
&& apt-get -y autoremove --purge \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
&& echo "Removing debug files" \
|
||||
&& rm -f ../libqpdf29-dbgsym* \
|
||||
&& rm -f ../qpdf-dbgsym* \
|
||||
&& echo "Gathering package data" \
|
||||
&& dpkg-query -f '${Package;-40}${Version}\n' -W > ../pkg-list.txt
|
||||
#
|
||||
# Stage: armhf-builder
|
||||
# Purpose:
|
||||
# - Sets armhf specific environment
|
||||
# - Builds qpdf for armhf (cross compile)
|
||||
#
|
||||
FROM pre-build as armhf-builder
|
||||
|
||||
ARG ARMHF_PACKAGES="\
|
||||
crossbuild-essential-armhf \
|
||||
libjpeg62-turbo-dev:armhf \
|
||||
libgnutls28-dev:armhf \
|
||||
zlib1g-dev:armhf"
|
||||
|
||||
WORKDIR /usr/src/qpdf-${QPDF_VERSION}
|
||||
|
||||
ENV CXX="/usr/bin/arm-linux-gnueabihf-g++" \
|
||||
CC="/usr/bin/arm-linux-gnueabihf-gcc"
|
||||
|
||||
RUN set -eux \
|
||||
&& echo "Beginning armhf" \
|
||||
&& echo "Install armhf packages" \
|
||||
&& dpkg --add-architecture armhf \
|
||||
&& apt-get update --quiet \
|
||||
&& apt-get install --yes --quiet --no-install-recommends ${ARMHF_PACKAGES} \
|
||||
&& echo "Building armhf" \
|
||||
&& dpkg-buildpackage --build=binary --unsigned-source --unsigned-changes --post-clean --host-arch armhf \
|
||||
&& echo "Removing debug files" \
|
||||
&& rm -f ../libqpdf29-dbgsym* \
|
||||
&& rm -f ../qpdf-dbgsym* \
|
||||
&& echo "Gathering package data" \
|
||||
&& dpkg-query -f '${Package;-40}${Version}\n' -W > ../pkg-list.txt
|
||||
|
||||
#
|
||||
# Stage: aarch64-builder
|
||||
# Purpose:
|
||||
# - Sets aarch64 specific environment
|
||||
# - Builds qpdf for aarch64 (cross compile)
|
||||
#
|
||||
FROM pre-build as aarch64-builder
|
||||
|
||||
ARG ARM64_PACKAGES="\
|
||||
crossbuild-essential-arm64 \
|
||||
libjpeg62-turbo-dev:arm64 \
|
||||
libgnutls28-dev:arm64 \
|
||||
zlib1g-dev:arm64"
|
||||
|
||||
ENV CXX="/usr/bin/aarch64-linux-gnu-g++" \
|
||||
CC="/usr/bin/aarch64-linux-gnu-gcc"
|
||||
|
||||
WORKDIR /usr/src/qpdf-${QPDF_VERSION}
|
||||
|
||||
RUN set -eux \
|
||||
&& echo "Beginning arm64" \
|
||||
&& echo "Install arm64 packages" \
|
||||
&& dpkg --add-architecture arm64 \
|
||||
&& apt-get update --quiet \
|
||||
&& apt-get install --yes --quiet --no-install-recommends ${ARM64_PACKAGES} \
|
||||
&& echo "Building arm64" \
|
||||
&& dpkg-buildpackage --build=binary --unsigned-source --unsigned-changes --post-clean --host-arch arm64 \
|
||||
&& echo "Removing debug files" \
|
||||
&& rm -f ../libqpdf29-dbgsym* \
|
||||
&& rm -f ../qpdf-dbgsym* \
|
||||
&& echo "Gathering package data" \
|
||||
&& dpkg-query -f '${Package;-40}${Version}\n' -W > ../pkg-list.txt
|
||||
|
||||
#
|
||||
# Stage: package
|
||||
# Purpose: Holds the compiled .deb files in arch/variant specific folders
|
||||
#
|
||||
FROM alpine:3.17 as package
|
||||
|
||||
LABEL org.opencontainers.image.description="A image with qpdf installers stored in architecture & version specific folders"
|
||||
|
||||
ARG QPDF_VERSION
|
||||
|
||||
WORKDIR /usr/src/qpdf/${QPDF_VERSION}/amd64
|
||||
|
||||
COPY --from=amd64-builder /usr/src/*.deb ./
|
||||
COPY --from=amd64-builder /usr/src/pkg-list.txt ./
|
||||
|
||||
# Note this is ${TARGETARCH}${TARGETVARIANT} for armv7
|
||||
WORKDIR /usr/src/qpdf/${QPDF_VERSION}/armv7
|
||||
|
||||
COPY --from=armhf-builder /usr/src/*.deb ./
|
||||
COPY --from=armhf-builder /usr/src/pkg-list.txt ./
|
||||
|
||||
WORKDIR /usr/src/qpdf/${QPDF_VERSION}/arm64
|
||||
|
||||
COPY --from=aarch64-builder /usr/src/*.deb ./
|
||||
COPY --from=aarch64-builder /usr/src/pkg-list.txt ./
|
||||
|
57
docker-builders/README.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# Installer Library
|
||||
|
||||
This folder contains the Dockerfiles for building certain installers or libraries, which are then pulled into the main image.
|
||||
|
||||
## [jbig2enc](https://github.com/agl/jbig2enc)
|
||||
|
||||
### Why
|
||||
|
||||
JBIG is an image coding which can achieve better compression of images for PDFs.
|
||||
|
||||
### What
|
||||
|
||||
The Docker image builds a shared library file and utility, which is copied into the correct location in the final image.
|
||||
|
||||
### Updating
|
||||
|
||||
1. Ensure the given qpdf version is present in [Debian bookworm](https://packages.debian.org/bookworm/qpdf)
|
||||
2. Update `.build-config.json` to the given version
|
||||
3. If the Debian specific version has incremented, update `Dockerfile.qpdf`
|
||||
|
||||
See Also:
|
||||
|
||||
- [OCRMyPDF Documentation](https://ocrmypdf.readthedocs.io/en/latest/jbig2.html)
|
||||
|
||||
## [psycopg2](https://www.psycopg.org/)
|
||||
|
||||
### Why
|
||||
|
||||
The pre-built wheels of psycopg2 are built on Debian 9, which provides a quite old version of libpq-dev. This causes issue with authentication methods.
|
||||
|
||||
### What
|
||||
|
||||
The image builds psycopg2 wheels on Debian 10 and places the produced wheels into `/usr/src/wheels/`.
|
||||
|
||||
See Also:
|
||||
|
||||
- [Issue 266](https://github.com/paperless-ngx/paperless-ngx/issues/266)
|
||||
|
||||
## [qpdf](https://qpdf.readthedocs.io/en/stable/index.html)
|
||||
|
||||
### Why
|
||||
|
||||
qpdf and it's library provide tools to read, manipulate and fix up PDFs. Version 11 is also required by `pikepdf` 6+ and Debian 9 does not provide above version 10.
|
||||
|
||||
### What
|
||||
|
||||
The Docker image cross compiles .deb installers for each supported architecture of the main image. The installers are placed in `/usr/src/qpdf/${QPDF_VERSION}/${TARGETARCH}${TARGETVARIANT}/`
|
||||
|
||||
## [pikepdf](https://pikepdf.readthedocs.io/en/latest/)
|
||||
|
||||
### Why
|
||||
|
||||
Required by OCRMyPdf, this is a general purpose library for PDF manipulation in Python via the qpdf libraries.
|
||||
|
||||
### What
|
||||
|
||||
The built wheels are placed into `/usr/src/wheels/`
|
25
docker/compose/docker-compose.ci-test.yml
Normal file
@@ -0,0 +1,25 @@
|
||||
# docker-compose file for running paperless testing with actual gotenberg
|
||||
# and Tika containers for a more end to end test of the Tika related functionality
|
||||
# Can be used locally or by the CI to start the nessecary containers with the
|
||||
# correct networking for the tests
|
||||
|
||||
version: "3.7"
|
||||
services:
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:7.6
|
||||
hostname: gotenberg
|
||||
container_name: gotenberg
|
||||
network_mode: host
|
||||
restart: unless-stopped
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- "gotenberg"
|
||||
- "--chromium-disable-javascript=true"
|
||||
- "--chromium-allow-list=file:///tmp/.*"
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
hostname: tika
|
||||
container_name: tika
|
||||
network_mode: host
|
||||
restart: unless-stopped
|
@@ -49,8 +49,6 @@ services:
|
||||
MARIADB_USER: paperless
|
||||
MARIADB_PASSWORD: paperless
|
||||
MARIADB_ROOT_PASSWORD: paperless
|
||||
ports:
|
||||
- "3306:3306"
|
||||
|
||||
webserver:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
@@ -87,9 +85,12 @@ services:
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- "gotenberg"
|
||||
- "--chromium-disable-routes=true"
|
||||
- "--chromium-disable-javascript=true"
|
||||
- "--chromium-allow-list=file:///tmp/.*"
|
||||
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
|
@@ -45,8 +45,6 @@ services:
|
||||
MARIADB_USER: paperless
|
||||
MARIADB_PASSWORD: paperless
|
||||
MARIADB_ROOT_PASSWORD: paperless
|
||||
ports:
|
||||
- "3306:3306"
|
||||
|
||||
webserver:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
|
@@ -79,9 +79,13 @@ services:
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- "gotenberg"
|
||||
- "--chromium-disable-routes=true"
|
||||
- "--chromium-disable-javascript=true"
|
||||
- "--chromium-allow-list=file:///tmp/.*"
|
||||
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
|
@@ -67,9 +67,13 @@ services:
|
||||
gotenberg:
|
||||
image: docker.io/gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- "gotenberg"
|
||||
- "--chromium-disable-routes=true"
|
||||
- "--chromium-disable-javascript=true"
|
||||
- "--chromium-allow-list=file:///tmp/.*"
|
||||
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
|
@@ -2,37 +2,6 @@
|
||||
|
||||
set -e
|
||||
|
||||
# Adapted from:
|
||||
# https://github.com/docker-library/postgres/blob/master/docker-entrypoint.sh
|
||||
# usage: file_env VAR
|
||||
# ie: file_env 'XYZ_DB_PASSWORD' will allow for "$XYZ_DB_PASSWORD_FILE" to
|
||||
# fill in the value of "$XYZ_DB_PASSWORD" from a file, especially for Docker's
|
||||
# secrets feature
|
||||
file_env() {
|
||||
local -r var="$1"
|
||||
local -r fileVar="${var}_FILE"
|
||||
|
||||
# Basic validation
|
||||
if [ "${!var:-}" ] && [ "${!fileVar:-}" ]; then
|
||||
echo >&2 "error: both $var and $fileVar are set (but are exclusive)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Only export var if the _FILE exists
|
||||
if [ "${!fileVar:-}" ]; then
|
||||
# And the file exists
|
||||
if [[ -f ${!fileVar} ]]; then
|
||||
echo "Setting ${var} from file"
|
||||
val="$(< "${!fileVar}")"
|
||||
export "$var"="$val"
|
||||
else
|
||||
echo "File ${!fileVar} doesn't exist"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
# Source: https://github.com/sameersbn/docker-gitlab/
|
||||
map_uidgid() {
|
||||
local -r usermap_original_uid=$(id -u paperless)
|
||||
@@ -53,46 +22,54 @@ map_folders() {
|
||||
export CONSUME_DIR="${PAPERLESS_CONSUMPTION_DIR:-/usr/src/paperless/consume}"
|
||||
}
|
||||
|
||||
nltk_data () {
|
||||
# Store the NLTK data outside the Docker container
|
||||
local -r nltk_data_dir="${DATA_DIR}/nltk"
|
||||
local -r truthy_things=("yes y 1 t true")
|
||||
|
||||
# If not set, or it looks truthy
|
||||
if [[ -z "${PAPERLESS_ENABLE_NLTK}" ]] || [[ "${truthy_things[*]}" =~ ${PAPERLESS_ENABLE_NLTK,} ]]; then
|
||||
|
||||
# Download or update the snowball stemmer data
|
||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" snowball_data
|
||||
|
||||
# Download or update the stopwords corpus
|
||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" stopwords
|
||||
|
||||
# Download or update the punkt tokenizer data
|
||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" punkt
|
||||
|
||||
else
|
||||
echo "Skipping NLTK data download"
|
||||
|
||||
custom_container_init() {
|
||||
# Mostly borrowed from the LinuxServer.io base image
|
||||
# https://github.com/linuxserver/docker-baseimage-ubuntu/tree/bionic/root/etc/cont-init.d
|
||||
local -r custom_script_dir="/custom-cont-init.d"
|
||||
# Tamper checking.
|
||||
# Don't run files which are owned by anyone except root
|
||||
# Don't run files which are writeable by others
|
||||
if [ -d "${custom_script_dir}" ]; then
|
||||
if [ -n "$(/usr/bin/find "${custom_script_dir}" -maxdepth 1 ! -user root)" ]; then
|
||||
echo "**** Potential tampering with custom scripts detected ****"
|
||||
echo "**** The folder '${custom_script_dir}' must be owned by root ****"
|
||||
return 0
|
||||
fi
|
||||
if [ -n "$(/usr/bin/find "${custom_script_dir}" -maxdepth 1 -perm -o+w)" ]; then
|
||||
echo "**** The folder '${custom_script_dir}' or some of contents have write permissions for others, which is a security risk. ****"
|
||||
echo "**** Please review the permissions and their contents to make sure they are owned by root, and can only be modified by root. ****"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Make sure custom init directory has files in it
|
||||
if [ -n "$(/bin/ls -A "${custom_script_dir}" 2>/dev/null)" ]; then
|
||||
echo "[custom-init] files found in ${custom_script_dir} executing"
|
||||
# Loop over files in the directory
|
||||
for SCRIPT in "${custom_script_dir}"/*; do
|
||||
NAME="$(basename "${SCRIPT}")"
|
||||
if [ -f "${SCRIPT}" ]; then
|
||||
echo "[custom-init] ${NAME}: executing..."
|
||||
/bin/bash "${SCRIPT}"
|
||||
echo "[custom-init] ${NAME}: exited $?"
|
||||
elif [ ! -f "${SCRIPT}" ]; then
|
||||
echo "[custom-init] ${NAME}: is not a file"
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo "[custom-init] no custom files found exiting..."
|
||||
fi
|
||||
|
||||
fi
|
||||
}
|
||||
|
||||
initialize() {
|
||||
|
||||
# Setup environment from secrets before anything else
|
||||
for env_var in \
|
||||
PAPERLESS_DBUSER \
|
||||
PAPERLESS_DBPASS \
|
||||
PAPERLESS_SECRET_KEY \
|
||||
PAPERLESS_AUTO_LOGIN_USERNAME \
|
||||
PAPERLESS_ADMIN_USER \
|
||||
PAPERLESS_ADMIN_MAIL \
|
||||
PAPERLESS_ADMIN_PASSWORD \
|
||||
PAPERLESS_REDIS; do
|
||||
# Check for a version of this var with _FILE appended
|
||||
# and convert the contents to the env var value
|
||||
file_env ${env_var}
|
||||
done
|
||||
# Source it so export is persistent
|
||||
# shellcheck disable=SC1091
|
||||
source /sbin/env-from-file.sh
|
||||
|
||||
# Change the user and group IDs if needed
|
||||
map_uidgid
|
||||
@@ -117,8 +94,6 @@ initialize() {
|
||||
echo "Creating directory ${tmp_dir}"
|
||||
mkdir -p "${tmp_dir}"
|
||||
|
||||
nltk_data
|
||||
|
||||
set +e
|
||||
echo "Adjusting permissions of paperless files. This may take a while."
|
||||
chown -R paperless:paperless ${tmp_dir}
|
||||
@@ -132,6 +107,10 @@ initialize() {
|
||||
set -e
|
||||
|
||||
"${gosu_cmd[@]}" /sbin/docker-prepare.sh
|
||||
|
||||
# Leave this last thing
|
||||
custom_container_init
|
||||
|
||||
}
|
||||
|
||||
install_languages() {
|
||||
@@ -147,10 +126,6 @@ install_languages() {
|
||||
|
||||
for lang in "${langs[@]}"; do
|
||||
pkg="tesseract-ocr-$lang"
|
||||
# English is installed by default
|
||||
#if [[ "$lang" == "eng" ]]; then
|
||||
# continue
|
||||
#fi
|
||||
|
||||
if dpkg -s "$pkg" &>/dev/null; then
|
||||
echo "Package $pkg already installed!"
|
||||
|
@@ -20,7 +20,6 @@ wait_for_postgres() {
|
||||
exit 1
|
||||
else
|
||||
echo "Attempt $attempt_num failed! Trying again in 5 seconds..."
|
||||
|
||||
fi
|
||||
|
||||
attempt_num=$(("$attempt_num" + 1))
|
||||
@@ -37,6 +36,8 @@ wait_for_mariadb() {
|
||||
local attempt_num=1
|
||||
local -r max_attempts=5
|
||||
|
||||
# Disable warning, host and port can't have spaces
|
||||
# shellcheck disable=SC2086
|
||||
while ! true > /dev/tcp/$host/$port; do
|
||||
|
||||
if [ $attempt_num -eq $max_attempts ]; then
|
||||
@@ -67,13 +68,19 @@ migrations() {
|
||||
# of the current container starts.
|
||||
flock 200
|
||||
echo "Apply database migrations..."
|
||||
python3 manage.py migrate
|
||||
python3 manage.py migrate --skip-checks --no-input
|
||||
) 200>"${DATA_DIR}/migration_lock"
|
||||
}
|
||||
|
||||
django_checks() {
|
||||
# Explicitly run the Django system checks
|
||||
echo "Running Django checks"
|
||||
python3 manage.py check
|
||||
}
|
||||
|
||||
search_index() {
|
||||
|
||||
local -r index_version=1
|
||||
local -r index_version=2
|
||||
local -r index_version_file=${DATA_DIR}/.index_version
|
||||
|
||||
if [[ (! -f "${index_version_file}") || $(<"${index_version_file}") != "$index_version" ]]; then
|
||||
@@ -89,46 +96,6 @@ superuser() {
|
||||
fi
|
||||
}
|
||||
|
||||
custom_container_init() {
|
||||
# Mostly borrowed from the LinuxServer.io base image
|
||||
# https://github.com/linuxserver/docker-baseimage-ubuntu/tree/bionic/root/etc/cont-init.d
|
||||
local -r custom_script_dir="/custom-cont-init.d"
|
||||
# Tamper checking.
|
||||
# Don't run files which are owned by anyone except root
|
||||
# Don't run files which are writeable by others
|
||||
if [ -d "${custom_script_dir}" ]; then
|
||||
if [ -n "$(/usr/bin/find "${custom_script_dir}" -maxdepth 1 ! -user root)" ]; then
|
||||
echo "**** Potential tampering with custom scripts detected ****"
|
||||
echo "**** The folder '${custom_script_dir}' must be owned by root ****"
|
||||
return 0
|
||||
fi
|
||||
if [ -n "$(/usr/bin/find "${custom_script_dir}" -maxdepth 1 -perm -o+w)" ]; then
|
||||
echo "**** The folder '${custom_script_dir}' or some of contents have write permissions for others, which is a security risk. ****"
|
||||
echo "**** Please review the permissions and their contents to make sure they are owned by root, and can only be modified by root. ****"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Make sure custom init directory has files in it
|
||||
if [ -n "$(/bin/ls -A "${custom_script_dir}" 2>/dev/null)" ]; then
|
||||
echo "[custom-init] files found in ${custom_script_dir} executing"
|
||||
# Loop over files in the directory
|
||||
for SCRIPT in "${custom_script_dir}"/*; do
|
||||
NAME="$(basename "${SCRIPT}")"
|
||||
if [ -f "${SCRIPT}" ]; then
|
||||
echo "[custom-init] ${NAME}: executing..."
|
||||
/bin/bash "${SCRIPT}"
|
||||
echo "[custom-init] ${NAME}: exited $?"
|
||||
elif [ ! -f "${SCRIPT}" ]; then
|
||||
echo "[custom-init] ${NAME}: is not a file"
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo "[custom-init] no custom files found exiting..."
|
||||
fi
|
||||
|
||||
fi
|
||||
}
|
||||
|
||||
do_work() {
|
||||
if [[ "${PAPERLESS_DBENGINE}" == "mariadb" ]]; then
|
||||
wait_for_mariadb
|
||||
@@ -140,13 +107,12 @@ do_work() {
|
||||
|
||||
migrations
|
||||
|
||||
django_checks
|
||||
|
||||
search_index
|
||||
|
||||
superuser
|
||||
|
||||
# Leave this last thing
|
||||
custom_container_init
|
||||
|
||||
}
|
||||
|
||||
do_work
|
||||
|
38
docker/env-from-file.sh
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Scans the environment variables for those with the suffix _FILE
|
||||
# When located, checks the file exists, and exports the contents
|
||||
# of the file as the same name, minus the suffix
|
||||
# This allows the use of Docker secrets or mounted files
|
||||
# to fill in any of the settings configurable via environment
|
||||
# variables
|
||||
|
||||
set -eu
|
||||
|
||||
for line in $(printenv)
|
||||
do
|
||||
# Extract the name of the environment variable
|
||||
env_name=${line%%=*}
|
||||
# Check if it starts with "PAPERLESS_" and ends in "_FILE"
|
||||
if [[ ${env_name} == PAPERLESS_*_FILE ]]; then
|
||||
# Extract the value of the environment
|
||||
env_value=${line#*=}
|
||||
|
||||
# Check the file exists
|
||||
if [[ -f ${env_value} ]]; then
|
||||
|
||||
# Trim off the _FILE suffix
|
||||
non_file_env_name=${env_name%"_FILE"}
|
||||
echo "Setting ${non_file_env_name} from file"
|
||||
|
||||
# Reads the value from th file
|
||||
val="$(< "${!env_name}")"
|
||||
|
||||
# Sets the normal name to the read file contents
|
||||
export "${non_file_env_name}"="${val}"
|
||||
|
||||
else
|
||||
echo "File ${env_value} referenced by ${env_name} doesn't exist"
|
||||
fi
|
||||
fi
|
||||
done
|
@@ -3,6 +3,9 @@
|
||||
set -e
|
||||
|
||||
cd /usr/src/paperless/src/
|
||||
# This ensures environment is setup
|
||||
# shellcheck disable=SC1091
|
||||
source /sbin/env-from-file.sh
|
||||
|
||||
if [[ $(id -u) == 0 ]] ;
|
||||
then
|
||||
|
181
docs/Makefile
@@ -1,181 +0,0 @@
|
||||
# Makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line.
|
||||
SPHINXOPTS =
|
||||
SPHINXBUILD = sphinx-build
|
||||
PAPER =
|
||||
BUILDDIR = _build
|
||||
|
||||
# User-friendly check for sphinx-build
|
||||
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
|
||||
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
|
||||
endif
|
||||
|
||||
# Internal variables.
|
||||
PAPEROPT_a4 = -D latex_paper_size=a4
|
||||
PAPEROPT_letter = -D latex_paper_size=letter
|
||||
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||
# the i18n builder cannot share the environment and doctrees with the others
|
||||
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||
|
||||
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
|
||||
|
||||
help:
|
||||
@echo "Please use \`make <target>' where <target> is one of"
|
||||
@echo " html to make standalone HTML files"
|
||||
@echo " livehtml to preview changes with live reload in your browser"
|
||||
@echo " dirhtml to make HTML files named index.html in directories"
|
||||
@echo " singlehtml to make a single large HTML file"
|
||||
@echo " pickle to make pickle files"
|
||||
@echo " json to make JSON files"
|
||||
@echo " htmlhelp to make HTML files and a HTML help project"
|
||||
@echo " qthelp to make HTML files and a qthelp project"
|
||||
@echo " devhelp to make HTML files and a Devhelp project"
|
||||
@echo " epub to make an epub"
|
||||
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
|
||||
@echo " latexpdf to make LaTeX files and run them through pdflatex"
|
||||
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
|
||||
@echo " text to make text files"
|
||||
@echo " man to make manual pages"
|
||||
@echo " texinfo to make Texinfo files"
|
||||
@echo " info to make Texinfo files and run them through makeinfo"
|
||||
@echo " gettext to make PO message catalogs"
|
||||
@echo " changes to make an overview of all changed/added/deprecated items"
|
||||
@echo " xml to make Docutils-native XML files"
|
||||
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
|
||||
@echo " linkcheck to check all external links for integrity"
|
||||
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILDDIR)/*
|
||||
|
||||
html:
|
||||
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
||||
@echo
|
||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
|
||||
|
||||
livehtml:
|
||||
sphinx-autobuild "./" "$(BUILDDIR)" $(O)
|
||||
|
||||
dirhtml:
|
||||
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
|
||||
@echo
|
||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
|
||||
|
||||
singlehtml:
|
||||
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
|
||||
@echo
|
||||
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
|
||||
|
||||
pickle:
|
||||
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
|
||||
@echo
|
||||
@echo "Build finished; now you can process the pickle files."
|
||||
|
||||
json:
|
||||
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
|
||||
@echo
|
||||
@echo "Build finished; now you can process the JSON files."
|
||||
|
||||
htmlhelp:
|
||||
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
|
||||
@echo
|
||||
@echo "Build finished; now you can run HTML Help Workshop with the" \
|
||||
".hhp project file in $(BUILDDIR)/htmlhelp."
|
||||
|
||||
qthelp:
|
||||
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
|
||||
@echo
|
||||
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
|
||||
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
|
||||
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/RIPEAtlasToolsMagellan.qhcp"
|
||||
@echo "To view the help file:"
|
||||
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/RIPEAtlasToolsMagellan.qhc"
|
||||
|
||||
devhelp:
|
||||
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
|
||||
@echo
|
||||
@echo "Build finished."
|
||||
@echo "To view the help file:"
|
||||
@echo "# mkdir -p $$HOME/.local/share/devhelp/RIPEAtlasToolsMagellan"
|
||||
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/RIPEAtlasToolsMagellan"
|
||||
@echo "# devhelp"
|
||||
|
||||
epub:
|
||||
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
|
||||
@echo
|
||||
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
|
||||
|
||||
latex:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo
|
||||
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
|
||||
@echo "Run \`make' in that directory to run these through (pdf)latex" \
|
||||
"(use \`make latexpdf' here to do that automatically)."
|
||||
|
||||
latexpdf:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo "Running LaTeX files through pdflatex..."
|
||||
$(MAKE) -C $(BUILDDIR)/latex all-pdf
|
||||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||
|
||||
latexpdfja:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo "Running LaTeX files through platex and dvipdfmx..."
|
||||
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
|
||||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||
|
||||
text:
|
||||
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
|
||||
@echo
|
||||
@echo "Build finished. The text files are in $(BUILDDIR)/text."
|
||||
|
||||
man:
|
||||
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
|
||||
@echo
|
||||
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
|
||||
|
||||
texinfo:
|
||||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||
@echo
|
||||
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
|
||||
@echo "Run \`make' in that directory to run these through makeinfo" \
|
||||
"(use \`make info' here to do that automatically)."
|
||||
|
||||
info:
|
||||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||
@echo "Running Texinfo files through makeinfo..."
|
||||
make -C $(BUILDDIR)/texinfo info
|
||||
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
|
||||
|
||||
gettext:
|
||||
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
|
||||
@echo
|
||||
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
|
||||
|
||||
changes:
|
||||
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
|
||||
@echo
|
||||
@echo "The overview file is in $(BUILDDIR)/changes."
|
||||
|
||||
linkcheck:
|
||||
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
|
||||
@echo
|
||||
@echo "Link check complete; look for any errors in the above output " \
|
||||
"or in $(BUILDDIR)/linkcheck/output.txt."
|
||||
|
||||
doctest:
|
||||
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
|
||||
@echo "Testing of doctests in the sources finished, look at the " \
|
||||
"results in $(BUILDDIR)/doctest/output.txt."
|
||||
|
||||
xml:
|
||||
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
|
||||
@echo
|
||||
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
|
||||
|
||||
pseudoxml:
|
||||
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
|
||||
@echo
|
||||
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
|
597
docs/_static/css/custom.css
vendored
@@ -1,597 +0,0 @@
|
||||
/* Variables */
|
||||
:root {
|
||||
--color-text-body: #5c5962;
|
||||
--color-text-body-light: #fcfcfc;
|
||||
--color-text-anchor: #7253ed;
|
||||
--color-text-alt: rgba(0, 0, 0, 0.3);
|
||||
--color-text-title: #27262b;
|
||||
--color-text-code-inline: #e74c3c;
|
||||
--color-text-code-nt: #062873;
|
||||
--color-text-selection: #b19eff;
|
||||
--color-bg-body: #fcfcfc;
|
||||
--color-bg-body-alt: #f3f6f6;
|
||||
--color-bg-side-nav: #f5f6fa;
|
||||
--color-bg-side-nav-hover: #ebedf5;
|
||||
--color-bg-code-block: var(--color-bg-side-nav);
|
||||
--color-border: #eeebee;
|
||||
--color-btn-neutral-bg: #f3f6f6;
|
||||
--color-btn-neutral-bg-hover: #e5ebeb;
|
||||
--color-success-title: #1abc9c;
|
||||
--color-success-body: #dbfaf4;
|
||||
--color-warning-title: #f0b37e;
|
||||
--color-warning-body: #ffedcc;
|
||||
--color-danger-title: #f29f97;
|
||||
--color-danger-body: #fdf3f2;
|
||||
--color-info-title: #6ab0de;
|
||||
--color-info-body: #e7f2fa;
|
||||
}
|
||||
|
||||
.dark-mode {
|
||||
--color-text-body: #abb2bf;
|
||||
--color-text-body-light: #9499a2;
|
||||
--color-text-alt: rgba(0255, 255, 255, 0.5);
|
||||
--color-text-title: var(--color-text-anchor);
|
||||
--color-text-code-inline: #abb2bf;
|
||||
--color-text-code-nt: #2063f3;
|
||||
--color-text-selection: #030303;
|
||||
--color-bg-body: #1d1d20 !important;
|
||||
--color-bg-body-alt: #131315;
|
||||
--color-bg-side-nav: #18181a;
|
||||
--color-bg-side-nav-hover: #101216;
|
||||
--color-bg-code-block: #101216;
|
||||
--color-border: #47494f;
|
||||
--color-btn-neutral-bg: #242529;
|
||||
--color-btn-neutral-bg-hover: #101216;
|
||||
--color-success-title: #02120f;
|
||||
--color-success-body: #041b17;
|
||||
--color-warning-title: #1b0e03;
|
||||
--color-warning-body: #371d06;
|
||||
--color-danger-title: #120902;
|
||||
--color-danger-body: #1b0503;
|
||||
--color-info-title: #020608;
|
||||
--color-info-body: #06141e;
|
||||
}
|
||||
|
||||
* {
|
||||
transition: background-color 0.3s ease, border-color 0.3s ease;
|
||||
}
|
||||
|
||||
/* Typography */
|
||||
body {
|
||||
font-family: system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif;
|
||||
font-size: inherit;
|
||||
line-height: 1.4;
|
||||
color: var(--color-text-body);
|
||||
}
|
||||
|
||||
.rst-content p {
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
font-family: inherit;
|
||||
}
|
||||
|
||||
.rst-content .toctree-wrapper>p.caption, .rst-content h1, .rst-content h2, .rst-content h3, .rst-content h4, .rst-content h5, .rst-content h6 {
|
||||
padding-top: .5em;
|
||||
}
|
||||
|
||||
p, .main-content-wrap, .rst-content .section ul, .rst-content .toctree-wrapper ul, .rst-content section ul, .wy-plain-list-disc, article ul {
|
||||
line-height: 1.6;
|
||||
}
|
||||
|
||||
pre, .code, .rst-content .linenodiv pre, .rst-content div[class^=highlight] pre, .rst-content pre.literal-block {
|
||||
font-family: "SFMono-Regular", Menlo,Consolas, Monospace;
|
||||
font-size: 0.75em;
|
||||
line-height: 1.8;
|
||||
}
|
||||
|
||||
.wy-menu-vertical li.toctree-l3,.wy-menu-vertical li.toctree-l4 {
|
||||
font-size: 1rem
|
||||
}
|
||||
|
||||
.rst-versions {
|
||||
font-family: inherit;
|
||||
line-height: 1;
|
||||
}
|
||||
|
||||
footer, footer p {
|
||||
font-size: .8rem;
|
||||
}
|
||||
|
||||
footer .rst-footer-buttons {
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
@media (max-width: 400px) {
|
||||
/* break code lines on mobile */
|
||||
pre, code {
|
||||
word-break: break-word;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Layout */
|
||||
.wy-side-nav-search, .wy-menu-vertical {
|
||||
width: auto;
|
||||
}
|
||||
|
||||
.wy-nav-side {
|
||||
z-index: 0;
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
background-color: var(--color-bg-side-nav);
|
||||
}
|
||||
|
||||
.wy-side-scroll {
|
||||
width: 100%;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
@media (min-width: 66.5rem) {
|
||||
.wy-side-scroll {
|
||||
width:264px
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 50rem) {
|
||||
.wy-nav-side {
|
||||
flex-wrap: nowrap;
|
||||
position: fixed;
|
||||
width: 248px;
|
||||
height: 100%;
|
||||
flex-direction: column;
|
||||
border-right: 1px solid var(--color-border);
|
||||
align-items:flex-end
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 66.5rem) {
|
||||
.wy-nav-side {
|
||||
width: calc((100% - 1064px) / 2 + 264px);
|
||||
min-width:264px
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 50rem) {
|
||||
.wy-nav-content-wrap {
|
||||
position: relative;
|
||||
max-width: 800px;
|
||||
margin-left:248px
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 66.5rem) {
|
||||
.wy-nav-content-wrap {
|
||||
margin-left:calc((100% - 1064px) / 2 + 264px)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Colors */
|
||||
body.wy-body-for-nav,
|
||||
.wy-nav-content {
|
||||
background: var(--color-bg-body);
|
||||
}
|
||||
|
||||
.wy-nav-side {
|
||||
border-right: 1px solid var(--color-border);
|
||||
}
|
||||
|
||||
.wy-side-nav-search, .wy-nav-top {
|
||||
background: var(--color-bg-side-nav);
|
||||
border-bottom: 1px solid var(--color-border);
|
||||
}
|
||||
|
||||
.wy-nav-content-wrap {
|
||||
background: inherit;
|
||||
}
|
||||
|
||||
.wy-side-nav-search > a, .wy-nav-top a, .wy-nav-top i {
|
||||
color: var(--color-text-title);
|
||||
}
|
||||
|
||||
.wy-side-nav-search > a:hover, .wy-nav-top a:hover {
|
||||
background: transparent;
|
||||
}
|
||||
|
||||
.wy-side-nav-search > div.version {
|
||||
color: var(--color-text-alt)
|
||||
}
|
||||
|
||||
.wy-side-nav-search > div[role="search"] {
|
||||
border-top: 1px solid var(--color-border);
|
||||
}
|
||||
|
||||
.wy-menu-vertical li.toctree-l2.current>a, .wy-menu-vertical li.toctree-l2.current li.toctree-l3>a,
|
||||
.wy-menu-vertical li.toctree-l3.current>a, .wy-menu-vertical li.toctree-l3.current li.toctree-l4>a {
|
||||
background: var(--color-bg-side-nav);
|
||||
}
|
||||
|
||||
.rst-content .highlighted {
|
||||
background: #eedd85;
|
||||
box-shadow: 0 0 0 2px #eedd85;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.wy-side-nav-search input[type=text],
|
||||
html.writer-html5 .rst-content table.docutils th {
|
||||
color: var(--color-text-body);
|
||||
}
|
||||
|
||||
.rst-content table.docutils:not(.field-list) tr:nth-child(2n-1) td,
|
||||
.wy-table-backed,
|
||||
.wy-table-odd td,
|
||||
.wy-table-striped tr:nth-child(2n-1) td {
|
||||
background-color: var(--color-bg-body-alt);
|
||||
}
|
||||
|
||||
.rst-content table.docutils,
|
||||
.wy-table-bordered-all,
|
||||
html.writer-html5 .rst-content table.docutils th,
|
||||
.rst-content table.docutils td,
|
||||
.wy-table-bordered-all td,
|
||||
hr {
|
||||
border-color: var(--color-border) !important;
|
||||
}
|
||||
|
||||
::selection {
|
||||
background: var(--color-text-selection);
|
||||
}
|
||||
|
||||
/* Ridiculous rules are taken from sphinx_rtd */
|
||||
.rst-content .admonition-title,
|
||||
.wy-alert-title {
|
||||
color: var(--color-text-body-light);
|
||||
}
|
||||
|
||||
.rst-content .hint,
|
||||
.rst-content .important,
|
||||
.rst-content .tip,
|
||||
.rst-content .wy-alert-success,
|
||||
.wy-alert.wy-alert-success {
|
||||
background: var(--color-success-body);
|
||||
}
|
||||
|
||||
.rst-content .hint .admonition-title,
|
||||
.rst-content .hint .wy-alert-title,
|
||||
.rst-content .important .admonition-title,
|
||||
.rst-content .important .wy-alert-title,
|
||||
.rst-content .tip .admonition-title,
|
||||
.rst-content .tip .wy-alert-title,
|
||||
.rst-content .wy-alert-success .admonition-title,
|
||||
.rst-content .wy-alert-success .wy-alert-title,
|
||||
.wy-alert.wy-alert-success .rst-content .admonition-title,
|
||||
.wy-alert.wy-alert-success .wy-alert-title {
|
||||
background-color: var(--color-success-title);
|
||||
}
|
||||
|
||||
.rst-content .admonition-todo,
|
||||
.rst-content .attention,
|
||||
.rst-content .caution,
|
||||
.rst-content .warning,
|
||||
.rst-content .wy-alert-warning,
|
||||
.wy-alert.wy-alert-warning {
|
||||
background: var(--color-warning-body);
|
||||
}
|
||||
|
||||
.rst-content .admonition-todo .admonition-title,
|
||||
.rst-content .admonition-todo .wy-alert-title,
|
||||
.rst-content .attention .admonition-title,
|
||||
.rst-content .attention .wy-alert-title,
|
||||
.rst-content .caution .admonition-title,
|
||||
.rst-content .caution .wy-alert-title,
|
||||
.rst-content .warning .admonition-title,
|
||||
.rst-content .warning .wy-alert-title,
|
||||
.rst-content .wy-alert-warning .admonition-title,
|
||||
.rst-content .wy-alert-warning .wy-alert-title,
|
||||
.rst-content .wy-alert.wy-alert-warning .admonition-title,
|
||||
.wy-alert.wy-alert-warning .rst-content .admonition-title,
|
||||
.wy-alert.wy-alert-warning .wy-alert-title {
|
||||
background: var(--color-warning-title);
|
||||
}
|
||||
|
||||
.rst-content .danger,
|
||||
.rst-content .error,
|
||||
.rst-content .wy-alert-danger,
|
||||
.wy-alert.wy-alert-danger {
|
||||
background: var(--color-danger-body);
|
||||
}
|
||||
|
||||
.rst-content .danger .admonition-title,
|
||||
.rst-content .danger .wy-alert-title,
|
||||
.rst-content .error .admonition-title,
|
||||
.rst-content .error .wy-alert-title,
|
||||
.rst-content .wy-alert-danger .admonition-title,
|
||||
.rst-content .wy-alert-danger .wy-alert-title,
|
||||
.wy-alert.wy-alert-danger .rst-content .admonition-title,
|
||||
.wy-alert.wy-alert-danger .wy-alert-title {
|
||||
background: var(--color-danger-title);
|
||||
}
|
||||
|
||||
.rst-content .note,
|
||||
.rst-content .seealso,
|
||||
.rst-content .wy-alert-info,
|
||||
.wy-alert.wy-alert-info {
|
||||
background: var(--color-info-body);
|
||||
}
|
||||
|
||||
.rst-content .note .admonition-title,
|
||||
.rst-content .note .wy-alert-title,
|
||||
.rst-content .seealso .admonition-title,
|
||||
.rst-content .seealso .wy-alert-title,
|
||||
.rst-content .wy-alert-info .admonition-title,
|
||||
.rst-content .wy-alert-info .wy-alert-title,
|
||||
.wy-alert.wy-alert-info .rst-content .admonition-title,
|
||||
.wy-alert.wy-alert-info .wy-alert-title {
|
||||
background: var(--color-info-title);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Links */
|
||||
a, a:visited,
|
||||
.wy-menu-vertical a,
|
||||
a.icon.icon-home,
|
||||
.wy-menu-vertical li.toctree-l1.current > a.current {
|
||||
color: var(--color-text-anchor);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
a:hover, .wy-breadcrumbs-aside a {
|
||||
color: var(--color-text-anchor); /* reset */
|
||||
}
|
||||
|
||||
.rst-versions a, .rst-versions .rst-current-version {
|
||||
color: #var(--color-text-anchor);
|
||||
}
|
||||
|
||||
.wy-nav-content a.reference, .wy-nav-content a:not([class]) {
|
||||
background-image: linear-gradient(var(--color-border) 0%, var(--color-border) 100%);
|
||||
background-repeat: repeat-x;
|
||||
background-position: 0 100%;
|
||||
background-size: 1px 1px;
|
||||
}
|
||||
|
||||
.wy-nav-content a.reference:hover, .wy-nav-content a:not([class]):hover {
|
||||
background-image: linear-gradient(rgba(114,83,237,0.45) 0%, rgba(114,83,237,0.45) 100%);
|
||||
background-size: 1px 1px;
|
||||
}
|
||||
|
||||
.wy-menu-vertical a:hover,
|
||||
.wy-menu-vertical li.current a:hover,
|
||||
.wy-menu-vertical a:active {
|
||||
background: var(--color-bg-side-nav-hover) !important;
|
||||
color: var(--color-text-body);
|
||||
}
|
||||
|
||||
.wy-menu-vertical li.toctree-l1.current>a,
|
||||
.wy-menu-vertical li.current>a,
|
||||
.wy-menu-vertical li.on a {
|
||||
background-color: var(--color-bg-side-nav-hover);
|
||||
border: none;
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
.wy-menu-vertical li.current {
|
||||
background-color: inherit;
|
||||
}
|
||||
|
||||
.wy-menu-vertical li.current a {
|
||||
border-right: none;
|
||||
}
|
||||
|
||||
.wy-menu-vertical li.toctree-l2 a,
|
||||
.wy-menu-vertical li.toctree-l3 a,
|
||||
.wy-menu-vertical li.toctree-l4 a,
|
||||
.wy-menu-vertical li.toctree-l5 a,
|
||||
.wy-menu-vertical li.toctree-l6 a,
|
||||
.wy-menu-vertical li.toctree-l7 a,
|
||||
.wy-menu-vertical li.toctree-l8 a,
|
||||
.wy-menu-vertical li.toctree-l9 a,
|
||||
.wy-menu-vertical li.toctree-l10 a {
|
||||
color: var(--color-text-body);
|
||||
}
|
||||
|
||||
a.image-reference, a.image-reference:hover {
|
||||
background: none !important;
|
||||
}
|
||||
|
||||
a.image-reference img {
|
||||
cursor: zoom-in;
|
||||
}
|
||||
|
||||
|
||||
/* Code blocks */
|
||||
.rst-content code, .rst-content tt, code {
|
||||
padding: 0.25em;
|
||||
font-weight: 400;
|
||||
background-color: var(--color-bg-code-block);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.rst-content div[class^=highlight], .rst-content pre.literal-block {
|
||||
padding: 0.7rem;
|
||||
margin-top: 0;
|
||||
margin-bottom: 0.75rem;
|
||||
overflow-x: auto;
|
||||
background-color: var(--color-bg-side-nav);
|
||||
border-color: var(--color-border);
|
||||
border-radius: 4px;
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
.rst-content .admonition-title,
|
||||
.rst-content div.admonition,
|
||||
.wy-alert-title {
|
||||
padding: 10px 12px;
|
||||
border-top-left-radius: 4px;
|
||||
border-top-right-radius: 4px;
|
||||
}
|
||||
|
||||
.highlight .go {
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.highlight .nt {
|
||||
color: var(--color-text-code-nt);
|
||||
}
|
||||
|
||||
.rst-content code.literal,
|
||||
.rst-content tt.literal,
|
||||
html.writer-html5 .rst-content dl.footnote code {
|
||||
border-color: var(--color-border);
|
||||
background-color: var(--color-border);
|
||||
color: var(--color-text-code-inline)
|
||||
}
|
||||
|
||||
|
||||
/* Search */
|
||||
.wy-side-nav-search input[type=text] {
|
||||
border: none;
|
||||
border-radius: 0;
|
||||
background-color: transparent;
|
||||
font-family: inherit;
|
||||
font-size: .85rem;
|
||||
box-shadow: none;
|
||||
padding: .7rem 1rem .7rem 2.8rem;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
#rtd-search-form {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
#rtd-search-form:before {
|
||||
font: normal normal normal 14px/1 FontAwesome;
|
||||
font-size: inherit;
|
||||
text-rendering: auto;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
content: "\f002";
|
||||
color: var(--color-text-alt);
|
||||
position: absolute;
|
||||
left: 1.5rem;
|
||||
top: .7rem;
|
||||
}
|
||||
|
||||
/* Side nav */
|
||||
.wy-side-nav-search {
|
||||
padding: 1rem 0 0 0;
|
||||
}
|
||||
|
||||
.wy-menu-vertical li a button.toctree-expand {
|
||||
float: right;
|
||||
margin-right: -1.5em;
|
||||
padding: 0 .5em;
|
||||
}
|
||||
|
||||
.wy-menu-vertical a,
|
||||
.wy-menu-vertical li.current>a,
|
||||
.wy-menu-vertical li.current li>a {
|
||||
padding-right: 1.5em !important;
|
||||
}
|
||||
|
||||
.wy-menu-vertical li.current li>a.current {
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
/* Misc spacing */
|
||||
.rst-content .admonition-title, .wy-alert-title {
|
||||
padding: 10px 12px;
|
||||
}
|
||||
|
||||
/* Buttons */
|
||||
.btn {
|
||||
display: inline-block;
|
||||
box-sizing: border-box;
|
||||
padding: 0.3em 1em;
|
||||
margin: 0;
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
font-weight: 500;
|
||||
line-height: 1.5;
|
||||
color: #var(--color-text-anchor);
|
||||
text-decoration: none;
|
||||
vertical-align: baseline;
|
||||
background-color: #f7f7f7;
|
||||
border-width: 0;
|
||||
border-radius: 4px;
|
||||
box-shadow: 0 1px 2px rgba(0,0,0,0.12),0 3px 10px rgba(0,0,0,0.08);
|
||||
appearance: none;
|
||||
}
|
||||
|
||||
.btn:active {
|
||||
padding: 0.3em 1em;
|
||||
}
|
||||
|
||||
.rst-content .btn:focus {
|
||||
outline: 1px solid #ccc;
|
||||
}
|
||||
|
||||
.rst-content .btn-neutral, .rst-content .btn span.fa {
|
||||
color: var(--color-text-body) !important;
|
||||
}
|
||||
|
||||
.btn-neutral {
|
||||
background-color: var(--color-btn-neutral-bg) !important;
|
||||
color: var(--color-btn-neutral-text) !important;
|
||||
border: 1px solid var(--color-btn-neutral-bg);
|
||||
}
|
||||
|
||||
.btn:hover, .btn-neutral:hover {
|
||||
background-color: var(--color-btn-neutral-bg-hover) !important;
|
||||
}
|
||||
|
||||
|
||||
/* Icon overrides */
|
||||
.wy-side-nav-search a.icon-home:before {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.fa-minus-square-o:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before {
|
||||
content: "\f106"; /* fa-angle-up */
|
||||
}
|
||||
|
||||
.fa-plus-square-o:before, .wy-menu-vertical li button.toctree-expand:before {
|
||||
content: "\f107"; /* fa-angle-down */
|
||||
}
|
||||
|
||||
|
||||
/* Misc */
|
||||
.wy-nav-top {
|
||||
line-height: 36px;
|
||||
}
|
||||
|
||||
.wy-nav-top > i {
|
||||
font-size: 24px;
|
||||
padding: 8px 0 0 2px;
|
||||
color:#var(--color-text-anchor);
|
||||
}
|
||||
|
||||
.rst-content table.docutils td,
|
||||
.rst-content table.docutils th,
|
||||
.rst-content table.field-list td,
|
||||
.rst-content table.field-list th,
|
||||
.wy-table td,
|
||||
.wy-table th {
|
||||
padding: 8px 14px;
|
||||
}
|
||||
|
||||
.dark-mode-toggle {
|
||||
position: absolute;
|
||||
top: 14px;
|
||||
right: 12px;
|
||||
height: 20px;
|
||||
width: 24px;
|
||||
z-index: 10;
|
||||
border: none;
|
||||
background-color: transparent;
|
||||
color: inherit;
|
||||
opacity: 0.7;
|
||||
}
|
||||
|
||||
.wy-nav-content-wrap {
|
||||
z-index: 20;
|
||||
}
|
47
docs/_static/js/darkmode.js
vendored
@@ -1,47 +0,0 @@
|
||||
let toggleButton
|
||||
let icon
|
||||
|
||||
function load() {
|
||||
'use strict'
|
||||
|
||||
toggleButton = document.createElement('button')
|
||||
toggleButton.setAttribute('title', 'Toggle dark mode')
|
||||
toggleButton.classList.add('dark-mode-toggle')
|
||||
icon = document.createElement('i')
|
||||
icon.classList.add('fa', darkModeState ? 'fa-sun-o' : 'fa-moon-o')
|
||||
toggleButton.appendChild(icon)
|
||||
document.body.prepend(toggleButton)
|
||||
|
||||
// Listen for changes in the OS settings
|
||||
// addListener is used because older versions of Safari don't support addEventListener
|
||||
// prefersDarkQuery set in <head>
|
||||
if (prefersDarkQuery) {
|
||||
prefersDarkQuery.addListener(function (evt) {
|
||||
toggleDarkMode(evt.matches)
|
||||
})
|
||||
}
|
||||
|
||||
// Initial setting depending on the prefers-color-mode or localstorage
|
||||
// darkModeState should be set in the document <head> to prevent flash
|
||||
if (darkModeState == undefined) darkModeState = false
|
||||
toggleDarkMode(darkModeState)
|
||||
|
||||
// Toggles the "dark-mode" class on click and sets localStorage state
|
||||
toggleButton.addEventListener('click', () => {
|
||||
darkModeState = !darkModeState
|
||||
|
||||
toggleDarkMode(darkModeState)
|
||||
localStorage.setItem('dark-mode', darkModeState)
|
||||
})
|
||||
}
|
||||
|
||||
function toggleDarkMode(state) {
|
||||
document.documentElement.classList.toggle('dark-mode', state)
|
||||
document.documentElement.classList.toggle('light-mode', !state)
|
||||
icon.classList.remove('fa-sun-o')
|
||||
icon.classList.remove('fa-moon-o')
|
||||
icon.classList.add(state ? 'fa-sun-o' : 'fa-moon-o')
|
||||
darkModeState = state
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', load)
|
BIN
docs/_static/screenshots/mail-rules-edited.png
vendored
Before Width: | Height: | Size: 96 KiB |
13
docs/_templates/layout.html
vendored
@@ -1,13 +0,0 @@
|
||||
{% extends "!layout.html" %}
|
||||
{% block extrahead %}
|
||||
<script>
|
||||
// MediaQueryList object
|
||||
const prefersDarkQuery = window.matchMedia("(prefers-color-scheme: dark)");
|
||||
const lsDark = localStorage.getItem("dark-mode");
|
||||
let darkModeState = lsDark !== null ? lsDark == "true" : prefersDarkQuery.matches;
|
||||
|
||||
document.documentElement.classList.toggle("dark-mode", darkModeState);
|
||||
document.documentElement.classList.toggle("light-mode", !darkModeState);
|
||||
</script>
|
||||
{{ super() }}
|
||||
{% endblock %}
|
550
docs/administration.md
Normal file
@@ -0,0 +1,550 @@
|
||||
# Administration
|
||||
|
||||
## Making backups {#backup}
|
||||
|
||||
Multiple options exist for making backups of your paperless instance,
|
||||
depending on how you installed paperless.
|
||||
|
||||
Before making backups, make sure that paperless is not running.
|
||||
|
||||
Options available to any installation of paperless:
|
||||
|
||||
- Use the [document exporter](#exporter). The document exporter exports all your documents,
|
||||
thumbnails and metadata to a specific folder. You may import your
|
||||
documents into a fresh instance of paperless again or store your
|
||||
documents in another DMS with this export.
|
||||
- The document exporter is also able to update an already existing
|
||||
export. Therefore, incremental backups with `rsync` are entirely
|
||||
possible.
|
||||
|
||||
!!! caution
|
||||
|
||||
You cannot import the export generated with one version of paperless in
|
||||
a different version of paperless. The export contains an exact image of
|
||||
the database, and migrations may change the database layout.
|
||||
|
||||
Options available to docker installations:
|
||||
|
||||
- Backup the docker volumes. These usually reside within
|
||||
`/var/lib/docker/volumes` on the host and you need to be root in
|
||||
order to access them.
|
||||
|
||||
Paperless uses 4 volumes:
|
||||
|
||||
- `paperless_media`: This is where your documents are stored.
|
||||
- `paperless_data`: This is where auxillary data is stored. This
|
||||
folder also contains the SQLite database, if you use it.
|
||||
- `paperless_pgdata`: Exists only if you use PostgreSQL and
|
||||
contains the database.
|
||||
- `paperless_dbdata`: Exists only if you use MariaDB and contains
|
||||
the database.
|
||||
|
||||
Options available to bare-metal and non-docker installations:
|
||||
|
||||
- Backup the entire paperless folder. This ensures that if your
|
||||
paperless instance crashes at some point or your disk fails, you can
|
||||
simply copy the folder back into place and it works.
|
||||
|
||||
When using PostgreSQL or MariaDB, you'll also have to backup the
|
||||
database.
|
||||
|
||||
### Restoring {#migrating-restoring}
|
||||
|
||||
## Updating Paperless {#updating}
|
||||
|
||||
### Docker Route {#docker-updating}
|
||||
|
||||
If a new release of paperless-ngx is available, upgrading depends on how
|
||||
you installed paperless-ngx in the first place. The releases are
|
||||
available at the [release
|
||||
page](https://github.com/paperless-ngx/paperless-ngx/releases).
|
||||
|
||||
First of all, ensure that paperless is stopped.
|
||||
|
||||
```shell-session
|
||||
$ cd /path/to/paperless
|
||||
$ docker-compose down
|
||||
```
|
||||
|
||||
After that, [make a backup](#backup).
|
||||
|
||||
1. If you pull the image from the docker hub, all you need to do is:
|
||||
|
||||
```shell-session
|
||||
$ docker-compose pull
|
||||
$ docker-compose up
|
||||
```
|
||||
|
||||
The docker-compose files refer to the `latest` version, which is
|
||||
always the latest stable release.
|
||||
|
||||
2. If you built the image yourself, do the following:
|
||||
|
||||
```shell-session
|
||||
$ git pull
|
||||
$ docker-compose build
|
||||
$ docker-compose up
|
||||
```
|
||||
|
||||
Running `docker-compose up` will also apply any new database migrations.
|
||||
If you see everything working, press CTRL+C once to gracefully stop
|
||||
paperless. Then you can start paperless-ngx with `-d` to have it run in
|
||||
the background.
|
||||
|
||||
!!! note
|
||||
|
||||
In version 0.9.14, the update process was changed. In 0.9.13 and
|
||||
earlier, the docker-compose files specified exact versions and pull
|
||||
won't automatically update to newer versions. In order to enable
|
||||
updates as described above, either get the new `docker-compose.yml`
|
||||
file from
|
||||
[here](https://github.com/paperless-ngx/paperless-ngx/tree/master/docker/compose)
|
||||
or edit the `docker-compose.yml` file, find the line that says
|
||||
|
||||
```
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:0.9.x
|
||||
```
|
||||
|
||||
and replace the version with `latest`:
|
||||
|
||||
```
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
In version 1.7.1 and onwards, the Docker image can now be pinned to a
|
||||
release series. This is often combined with automatic updaters such as
|
||||
Watchtower to allow safer unattended upgrading to new bugfix releases
|
||||
only. It is still recommended to always review release notes before
|
||||
upgrading. To pin your install to a release series, edit the
|
||||
`docker-compose.yml` find the line that says
|
||||
|
||||
```
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
```
|
||||
|
||||
and replace the version with the series you want to track, for
|
||||
example:
|
||||
|
||||
```
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:1.7
|
||||
```
|
||||
|
||||
### Bare Metal Route {#bare-metal-updating}
|
||||
|
||||
After grabbing the new release and unpacking the contents, do the
|
||||
following:
|
||||
|
||||
1. Update dependencies. New paperless version may require additional
|
||||
dependencies. The dependencies required are listed in the section
|
||||
about
|
||||
[bare metal installations](/setup#bare_metal).
|
||||
|
||||
2. Update python requirements. Keep in mind to activate your virtual
|
||||
environment before that, if you use one.
|
||||
|
||||
```shell-session
|
||||
$ pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Migrate the database.
|
||||
|
||||
```shell-session
|
||||
$ cd src
|
||||
$ python3 manage.py migrate # (1)
|
||||
```
|
||||
|
||||
1. Including `sudo -Hu <paperless_user>` may be required
|
||||
|
||||
This might not actually do anything. Not every new paperless version
|
||||
comes with new database migrations.
|
||||
|
||||
## Downgrading Paperless {#downgrade-paperless}
|
||||
|
||||
Downgrades are possible. However, some updates also contain database
|
||||
migrations (these change the layout of the database and may move data).
|
||||
In order to move back from a version that applied database migrations,
|
||||
you'll have to revert the database migration _before_ downgrading, and
|
||||
then downgrade paperless.
|
||||
|
||||
This table lists the compatible versions for each database migration
|
||||
number.
|
||||
|
||||
| Migration number | Version range |
|
||||
| ---------------- | --------------- |
|
||||
| 1011 | 1.0.0 |
|
||||
| 1012 | 1.1.0 - 1.2.1 |
|
||||
| 1014 | 1.3.0 - 1.3.1 |
|
||||
| 1016 | 1.3.2 - current |
|
||||
|
||||
Execute the following management command to migrate your database:
|
||||
|
||||
```shell-session
|
||||
$ python3 manage.py migrate documents <migration number>
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
Some migrations cannot be undone. The command will issue errors if that
|
||||
happens.
|
||||
|
||||
## Management utilities {#management-commands}
|
||||
|
||||
Paperless comes with some management commands that perform various
|
||||
maintenance tasks on your paperless instance. You can invoke these
|
||||
commands in the following way:
|
||||
|
||||
With docker-compose, while paperless is running:
|
||||
|
||||
```shell-session
|
||||
$ cd /path/to/paperless
|
||||
$ docker-compose exec webserver <command> <arguments>
|
||||
```
|
||||
|
||||
With docker, while paperless is running:
|
||||
|
||||
```shell-session
|
||||
$ docker exec -it <container-name> <command> <arguments>
|
||||
```
|
||||
|
||||
Bare metal:
|
||||
|
||||
```shell-session
|
||||
$ cd /path/to/paperless/src
|
||||
$ python3 manage.py <command> <arguments> # (1)
|
||||
```
|
||||
|
||||
1. Including `sudo -Hu <paperless_user>` may be required
|
||||
|
||||
All commands have built-in help, which can be accessed by executing them
|
||||
with the argument `--help`.
|
||||
|
||||
### Document exporter {#exporter}
|
||||
|
||||
The document exporter exports all your data from paperless into a folder
|
||||
for backup or migration to another DMS.
|
||||
|
||||
If you use the document exporter within a cronjob to backup your data
|
||||
you might use the `-T` flag behind exec to suppress "The input device
|
||||
is not a TTY" errors. For example:
|
||||
`docker-compose exec -T webserver document_exporter ../export`
|
||||
|
||||
```
|
||||
document_exporter target [-c] [-d] [-f] [-na] [-nt] [-p] [-sm] [-z]
|
||||
|
||||
optional arguments:
|
||||
-c, --compare-checksums
|
||||
-d, --delete
|
||||
-f, --use-filename-format
|
||||
-na, --no-archive
|
||||
-nt, --no-thumbnail
|
||||
-p, --use-folder-prefix
|
||||
-sm, --split-manifest
|
||||
-z --zip
|
||||
```
|
||||
|
||||
`target` is a folder to which the data gets written. This includes
|
||||
documents, thumbnails and a `manifest.json` file. The manifest contains
|
||||
all metadata from the database (correspondents, tags, etc).
|
||||
|
||||
When you use the provided docker compose script, specify `../export` as
|
||||
the target. This path inside the container is automatically mounted on
|
||||
your host on the folder `export`.
|
||||
|
||||
If the target directory already exists and contains files, paperless
|
||||
will assume that the contents of the export directory are a previous
|
||||
export and will attempt to update the previous export. Paperless will
|
||||
only export changed and added files. Paperless determines whether a file
|
||||
has changed by inspecting the file attributes "date/time modified" and
|
||||
"size". If that does not work out for you, specify `-c` or
|
||||
`--compare-checksums` and paperless will attempt to compare file
|
||||
checksums instead. This is slower.
|
||||
|
||||
Paperless will not remove any existing files in the export directory. If
|
||||
you want paperless to also remove files that do not belong to the
|
||||
current export such as files from deleted documents, specify `-d` or `--delete`.
|
||||
Be careful when pointing paperless to a directory that already contains
|
||||
other files.
|
||||
|
||||
The filenames generated by this command follow the format
|
||||
`[date created] [correspondent] [title].[extension]`. If you want
|
||||
paperless to use `PAPERLESS_FILENAME_FORMAT` for exported filenames
|
||||
instead, specify `-f` or `--use-filename-format`.
|
||||
|
||||
If `-na` or `--no-archive` is provided, no archive files will be exported,
|
||||
only the original files.
|
||||
|
||||
If `-nt` or `--no-thumbnail` is provided, thumbnail files will not be exported.
|
||||
|
||||
!!! note
|
||||
|
||||
When using the `-na`/`--no-archive` or `-nt`/`--no-thumbnail` options
|
||||
the exporter will not output these files for backup. After importing,
|
||||
the [sanity checker](#sanity-checker) will warn about missing thumbnails and archive files
|
||||
until they are regenerated with `document_thumbnails` or [`document_archiver`](#archiver).
|
||||
It can make sense to omit these files from backup as their content and checksum
|
||||
can change (new archiver algorithm) and may then cause additional used space in
|
||||
a deduplicated backup.
|
||||
|
||||
If `-p` or `--use-folder-prefix` is provided, files will be exported
|
||||
in dedicated folders according to their nature: `archive`, `originals`,
|
||||
`thumbnails` or `json`
|
||||
|
||||
If `-sm` or `--split-manifest` is provided, information about document
|
||||
will be placed in individual json files, instead of a single JSON file. The main
|
||||
manifest.json will still contain application wide information (e.g. tags, correspondent,
|
||||
documenttype, etc)
|
||||
|
||||
If `-z` or `--zip` is provided, the export will be a zipfile
|
||||
in the target directory, named according to the current date.
|
||||
|
||||
!!! warning
|
||||
|
||||
If exporting with the file name format, there may be errors due to
|
||||
your operating system's maximum path lengths. Try adjusting the export
|
||||
target or consider not using the filename format.
|
||||
|
||||
### Document importer {#importer}
|
||||
|
||||
The document importer takes the export produced by the [Document
|
||||
exporter](#exporter) and imports it into paperless.
|
||||
|
||||
The importer works just like the exporter. You point it at a directory,
|
||||
and the script does the rest of the work:
|
||||
|
||||
```
|
||||
document_importer source
|
||||
```
|
||||
|
||||
When you use the provided docker compose script, put the export inside
|
||||
the `export` folder in your paperless source directory. Specify
|
||||
`../export` as the `source`.
|
||||
|
||||
!!! note
|
||||
|
||||
Importing from a previous version of Paperless may work, but for best
|
||||
results it is suggested to match the versions.
|
||||
|
||||
### Document retagger {#retagger}
|
||||
|
||||
Say you've imported a few hundred documents and now want to introduce a
|
||||
tag or set up a new correspondent, and apply its matching to all of the
|
||||
currently-imported docs. This problem is common enough that there are
|
||||
tools for it.
|
||||
|
||||
```
|
||||
document_retagger [-h] [-c] [-T] [-t] [-i] [--use-first] [-f]
|
||||
|
||||
optional arguments:
|
||||
-c, --correspondent
|
||||
-T, --tags
|
||||
-t, --document_type
|
||||
-s, --storage_path
|
||||
-i, --inbox-only
|
||||
--use-first
|
||||
-f, --overwrite
|
||||
```
|
||||
|
||||
Run this after changing or adding matching rules. It'll loop over all
|
||||
of the documents in your database and attempt to match documents
|
||||
according to the new rules.
|
||||
|
||||
Specify any combination of `-c`, `-T`, `-t` and `-s` to have the
|
||||
retagger perform matching of the specified metadata type. If you don't
|
||||
specify any of these options, the document retagger won't do anything.
|
||||
|
||||
Specify `-i` to have the document retagger work on documents tagged with
|
||||
inbox tags only. This is useful when you don't want to mess with your
|
||||
already processed documents.
|
||||
|
||||
When multiple document types or correspondents match a single document,
|
||||
the retagger won't assign these to the document. Specify `--use-first`
|
||||
to override this behavior and just use the first correspondent or type
|
||||
it finds. This option does not apply to tags, since any amount of tags
|
||||
can be applied to a document.
|
||||
|
||||
Finally, `-f` specifies that you wish to overwrite already assigned
|
||||
correspondents, types and/or tags. The default behavior is to not assign
|
||||
correspondents and types to documents that have this data already
|
||||
assigned. `-f` works differently for tags: By default, only additional
|
||||
tags get added to documents, no tags will be removed. With `-f`, tags
|
||||
that don't match a document anymore get removed as well.
|
||||
|
||||
### Managing the Automatic matching algorithm
|
||||
|
||||
The _Auto_ matching algorithm requires a trained neural network to work.
|
||||
This network needs to be updated whenever somethings in your data
|
||||
changes. The docker image takes care of that automatically with the task
|
||||
scheduler. You can manually renew the classifier by invoking the
|
||||
following management command:
|
||||
|
||||
```
|
||||
document_create_classifier
|
||||
```
|
||||
|
||||
This command takes no arguments.
|
||||
|
||||
### Document thumbnails {#thumbnails}
|
||||
|
||||
Use this command to re-create document thumbnails. Optionally include the ` --document {id}` option to generate thumbnails for a specific document only.
|
||||
|
||||
```
|
||||
document_thumbnails
|
||||
```
|
||||
|
||||
### Managing the document search index {#index}
|
||||
|
||||
The document search index is responsible for delivering search results
|
||||
for the website. The document index is automatically updated whenever
|
||||
documents get added to, changed, or removed from paperless. However, if
|
||||
the search yields non-existing documents or won't find anything, you
|
||||
may need to recreate the index manually.
|
||||
|
||||
```
|
||||
document_index {reindex,optimize}
|
||||
```
|
||||
|
||||
Specify `reindex` to have the index created from scratch. This may take
|
||||
some time.
|
||||
|
||||
Specify `optimize` to optimize the index. This updates certain aspects
|
||||
of the index and usually makes queries faster and also ensures that the
|
||||
autocompletion works properly. This command is regularly invoked by the
|
||||
task scheduler.
|
||||
|
||||
### Managing filenames {#renamer}
|
||||
|
||||
If you use paperless' feature to
|
||||
[assign custom filenames to your documents](/advanced_usage#file-name-handling), you can use this command to move all your files after
|
||||
changing the naming scheme.
|
||||
|
||||
!!! warning
|
||||
|
||||
Since this command moves your documents, it is advised to do a backup
|
||||
beforehand. The renaming logic is robust and will never overwrite or
|
||||
delete a file, but you can't ever be careful enough.
|
||||
|
||||
```
|
||||
document_renamer
|
||||
```
|
||||
|
||||
The command takes no arguments and processes all your documents at once.
|
||||
|
||||
Learn how to use
|
||||
[Management Utilities](#management-commands).
|
||||
|
||||
### Sanity checker {#sanity-checker}
|
||||
|
||||
Paperless has a built-in sanity checker that inspects your document
|
||||
collection for issues.
|
||||
|
||||
The issues detected by the sanity checker are as follows:
|
||||
|
||||
- Missing original files.
|
||||
- Missing archive files.
|
||||
- Inaccessible original files due to improper permissions.
|
||||
- Inaccessible archive files due to improper permissions.
|
||||
- Corrupted original documents by comparing their checksum against
|
||||
what is stored in the database.
|
||||
- Corrupted archive documents by comparing their checksum against what
|
||||
is stored in the database.
|
||||
- Missing thumbnails.
|
||||
- Inaccessible thumbnails due to improper permissions.
|
||||
- Documents without any content (warning).
|
||||
- Orphaned files in the media directory (warning). These are files
|
||||
that are not referenced by any document im paperless.
|
||||
|
||||
```
|
||||
document_sanity_checker
|
||||
```
|
||||
|
||||
The command takes no arguments. Depending on the size of your document
|
||||
archive, this may take some time.
|
||||
|
||||
### Fetching e-mail
|
||||
|
||||
Paperless automatically fetches your e-mail every 10 minutes by default.
|
||||
If you want to invoke the email consumer manually, call the following
|
||||
management command:
|
||||
|
||||
```
|
||||
mail_fetcher
|
||||
```
|
||||
|
||||
The command takes no arguments and processes all your mail accounts and
|
||||
rules.
|
||||
|
||||
!!! note
|
||||
|
||||
As of October 2022 Microsoft no longer supports IMAP authentication
|
||||
for Exchange servers, thus Exchange is no longer supported until a
|
||||
solution is implemented in the Python IMAP library used by Paperless.
|
||||
See [learn.microsoft.com](https://learn.microsoft.com/en-us/exchange/clients-and-mobile-in-exchange-online/deprecation-of-basic-authentication-exchange-online)
|
||||
|
||||
### Creating archived documents {#archiver}
|
||||
|
||||
Paperless stores archived PDF/A documents alongside your original
|
||||
documents. These archived documents will also contain selectable text
|
||||
for image-only originals. These documents are derived from the
|
||||
originals, which are always stored unmodified. If coming from an earlier
|
||||
version of paperless, your documents won't have archived versions.
|
||||
|
||||
This command creates PDF/A documents for your documents.
|
||||
|
||||
```
|
||||
document_archiver --overwrite --document <id>
|
||||
```
|
||||
|
||||
This command will only attempt to create archived documents when no
|
||||
archived document exists yet, unless `--overwrite` is specified. If
|
||||
`--document <id>` is specified, the archiver will only process that
|
||||
document.
|
||||
|
||||
!!! note
|
||||
|
||||
This command essentially performs OCR on all your documents again,
|
||||
according to your settings. If you run this with
|
||||
`PAPERLESS_OCR_MODE=redo`, it will potentially run for a very long time.
|
||||
You can cancel the command at any time, since this command will skip
|
||||
already archived versions the next time it is run.
|
||||
|
||||
!!! note
|
||||
|
||||
Some documents will cause errors and cannot be converted into PDF/A
|
||||
documents, such as encrypted PDF documents. The archiver will skip over
|
||||
these documents each time it sees them.
|
||||
|
||||
### Managing encryption {#encryption}
|
||||
|
||||
Documents can be stored in Paperless using GnuPG encryption.
|
||||
|
||||
!!! warning
|
||||
|
||||
Encryption is deprecated since [paperless-ng 0.9](/changelog#paperless-ng-090) and doesn't really
|
||||
provide any additional security, since you have to store the passphrase
|
||||
in a configuration file on the same system as the encrypted documents
|
||||
for paperless to work. Furthermore, the entire text content of the
|
||||
documents is stored plain in the database, even if your documents are
|
||||
encrypted. Filenames are not encrypted as well.
|
||||
|
||||
Also, the web server provides transparent access to your encrypted
|
||||
documents.
|
||||
|
||||
Consider running paperless on an encrypted filesystem instead, which
|
||||
will then at least provide security against physical hardware theft.
|
||||
|
||||
#### Enabling encryption
|
||||
|
||||
Enabling encryption is no longer supported.
|
||||
|
||||
#### Disabling encryption
|
||||
|
||||
Basic usage to disable encryption of your document store:
|
||||
|
||||
(Note: If `PAPERLESS_PASSPHRASE` isn't set already, you need to specify
|
||||
it here)
|
||||
|
||||
```
|
||||
decrypt_documents [--passphrase SECR3TP4SSPHRA$E]
|
||||
```
|
@@ -1,531 +0,0 @@
|
||||
|
||||
**************
|
||||
Administration
|
||||
**************
|
||||
|
||||
.. _administration-backup:
|
||||
|
||||
Making backups
|
||||
##############
|
||||
|
||||
Multiple options exist for making backups of your paperless instance,
|
||||
depending on how you installed paperless.
|
||||
|
||||
Before making backups, make sure that paperless is not running.
|
||||
|
||||
Options available to any installation of paperless:
|
||||
|
||||
* Use the :ref:`document exporter <utilities-exporter>`.
|
||||
The document exporter exports all your documents, thumbnails and
|
||||
metadata to a specific folder. You may import your documents into a
|
||||
fresh instance of paperless again or store your documents in another
|
||||
DMS with this export.
|
||||
* The document exporter is also able to update an already existing export.
|
||||
Therefore, incremental backups with ``rsync`` are entirely possible.
|
||||
|
||||
.. caution::
|
||||
|
||||
You cannot import the export generated with one version of paperless in a
|
||||
different version of paperless. The export contains an exact image of the
|
||||
database, and migrations may change the database layout.
|
||||
|
||||
Options available to docker installations:
|
||||
|
||||
* Backup the docker volumes. These usually reside within
|
||||
``/var/lib/docker/volumes`` on the host and you need to be root in order
|
||||
to access them.
|
||||
|
||||
Paperless uses 4 volumes:
|
||||
|
||||
* ``paperless_media``: This is where your documents are stored.
|
||||
* ``paperless_data``: This is where auxillary data is stored. This
|
||||
folder also contains the SQLite database, if you use it.
|
||||
* ``paperless_pgdata``: Exists only if you use PostgreSQL and contains
|
||||
the database.
|
||||
* ``paperless_dbdata``: Exists only if you use MariaDB and contains
|
||||
the database.
|
||||
|
||||
Options available to bare-metal and non-docker installations:
|
||||
|
||||
* Backup the entire paperless folder. This ensures that if your paperless instance
|
||||
crashes at some point or your disk fails, you can simply copy the folder back
|
||||
into place and it works.
|
||||
|
||||
When using PostgreSQL or MariaDB, you'll also have to backup the database.
|
||||
|
||||
.. _migrating-restoring:
|
||||
|
||||
Restoring
|
||||
=========
|
||||
|
||||
.. _administration-updating:
|
||||
|
||||
Updating Paperless
|
||||
##################
|
||||
|
||||
Docker Route
|
||||
============
|
||||
|
||||
If a new release of paperless-ngx is available, upgrading depends on how you
|
||||
installed paperless-ngx in the first place. The releases are available at the
|
||||
`release page <https://github.com/paperless-ngx/paperless-ngx/releases>`_.
|
||||
|
||||
First of all, ensure that paperless is stopped.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless
|
||||
$ docker-compose down
|
||||
|
||||
After that, :ref:`make a backup <administration-backup>`.
|
||||
|
||||
A. If you pull the image from the docker hub, all you need to do is:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ docker-compose pull
|
||||
$ docker-compose up
|
||||
|
||||
The docker-compose files refer to the ``latest`` version, which is always the latest
|
||||
stable release.
|
||||
|
||||
B. If you built the image yourself, do the following:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ git pull
|
||||
$ docker-compose build
|
||||
$ docker-compose up
|
||||
|
||||
Running ``docker-compose up`` will also apply any new database migrations.
|
||||
If you see everything working, press CTRL+C once to gracefully stop paperless.
|
||||
Then you can start paperless-ngx with ``-d`` to have it run in the background.
|
||||
|
||||
.. note::
|
||||
|
||||
In version 0.9.14, the update process was changed. In 0.9.13 and earlier, the
|
||||
docker-compose files specified exact versions and pull won't automatically
|
||||
update to newer versions. In order to enable updates as described above, either
|
||||
get the new ``docker-compose.yml`` file from `here <https://github.com/paperless-ngx/paperless-ngx/tree/master/docker/compose>`_
|
||||
or edit the ``docker-compose.yml`` file, find the line that says
|
||||
|
||||
.. code::
|
||||
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:0.9.x
|
||||
|
||||
and replace the version with ``latest``:
|
||||
|
||||
.. code::
|
||||
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
|
||||
.. note::
|
||||
In version 1.7.1 and onwards, the Docker image can now be pinned to a release series.
|
||||
This is often combined with automatic updaters such as Watchtower to allow safer
|
||||
unattended upgrading to new bugfix releases only. It is still recommended to always
|
||||
review release notes before upgrading. To pin your install to a release series, edit
|
||||
the ``docker-compose.yml`` find the line that says
|
||||
|
||||
.. code::
|
||||
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
|
||||
and replace the version with the series you want to track, for example:
|
||||
|
||||
.. code::
|
||||
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:1.7
|
||||
|
||||
Bare Metal Route
|
||||
================
|
||||
|
||||
After grabbing the new release and unpacking the contents, do the following:
|
||||
|
||||
1. Update dependencies. New paperless version may require additional
|
||||
dependencies. The dependencies required are listed in the section about
|
||||
:ref:`bare metal installations <setup-bare_metal>`.
|
||||
|
||||
2. Update python requirements. Keep in mind to activate your virtual environment
|
||||
before that, if you use one.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ pip install -r requirements.txt
|
||||
|
||||
3. Migrate the database.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd src
|
||||
$ python3 manage.py migrate
|
||||
|
||||
This might not actually do anything. Not every new paperless version comes with new
|
||||
database migrations.
|
||||
|
||||
Downgrading Paperless
|
||||
#####################
|
||||
|
||||
Downgrades are possible. However, some updates also contain database migrations (these change the layout of the database and may move data).
|
||||
In order to move back from a version that applied database migrations, you'll have to revert the database migration *before* downgrading,
|
||||
and then downgrade paperless.
|
||||
|
||||
This table lists the compatible versions for each database migration number.
|
||||
|
||||
+------------------+-----------------+
|
||||
| Migration number | Version range |
|
||||
+------------------+-----------------+
|
||||
| 1011 | 1.0.0 |
|
||||
+------------------+-----------------+
|
||||
| 1012 | 1.1.0 - 1.2.1 |
|
||||
+------------------+-----------------+
|
||||
| 1014 | 1.3.0 - 1.3.1 |
|
||||
+------------------+-----------------+
|
||||
| 1016 | 1.3.2 - current |
|
||||
+------------------+-----------------+
|
||||
|
||||
Execute the following management command to migrate your database:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ python3 manage.py migrate documents <migration number>
|
||||
|
||||
.. note::
|
||||
|
||||
Some migrations cannot be undone. The command will issue errors if that happens.
|
||||
|
||||
.. _utilities-management-commands:
|
||||
|
||||
Management utilities
|
||||
####################
|
||||
|
||||
Paperless comes with some management commands that perform various maintenance
|
||||
tasks on your paperless instance. You can invoke these commands in the following way:
|
||||
|
||||
With docker-compose, while paperless is running:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless
|
||||
$ docker-compose exec webserver <command> <arguments>
|
||||
|
||||
With docker, while paperless is running:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ docker exec -it <container-name> <command> <arguments>
|
||||
|
||||
Bare metal:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless/src
|
||||
$ python3 manage.py <command> <arguments>
|
||||
|
||||
All commands have built-in help, which can be accessed by executing them with
|
||||
the argument ``--help``.
|
||||
|
||||
.. _utilities-exporter:
|
||||
|
||||
Document exporter
|
||||
=================
|
||||
|
||||
The document exporter exports all your data from paperless into a folder for
|
||||
backup or migration to another DMS.
|
||||
|
||||
If you use the document exporter within a cronjob to backup your data you might use the ``-T`` flag behind exec to suppress "The input device is not a TTY" errors. For example: ``docker-compose exec -T webserver document_exporter ../export``
|
||||
|
||||
.. code::
|
||||
|
||||
document_exporter target [-c] [-f] [-d]
|
||||
|
||||
optional arguments:
|
||||
-c, --compare-checksums
|
||||
-f, --use-filename-format
|
||||
-d, --delete
|
||||
|
||||
``target`` is a folder to which the data gets written. This includes documents,
|
||||
thumbnails and a ``manifest.json`` file. The manifest contains all metadata from
|
||||
the database (correspondents, tags, etc).
|
||||
|
||||
When you use the provided docker compose script, specify ``../export`` as the
|
||||
target. This path inside the container is automatically mounted on your host on
|
||||
the folder ``export``.
|
||||
|
||||
If the target directory already exists and contains files, paperless will assume
|
||||
that the contents of the export directory are a previous export and will attempt
|
||||
to update the previous export. Paperless will only export changed and added files.
|
||||
Paperless determines whether a file has changed by inspecting the file attributes
|
||||
"date/time modified" and "size". If that does not work out for you, specify
|
||||
``--compare-checksums`` and paperless will attempt to compare file checksums instead.
|
||||
This is slower.
|
||||
|
||||
Paperless will not remove any existing files in the export directory. If you want
|
||||
paperless to also remove files that do not belong to the current export such as files
|
||||
from deleted documents, specify ``--delete``. Be careful when pointing paperless to
|
||||
a directory that already contains other files.
|
||||
|
||||
The filenames generated by this command follow the format
|
||||
``[date created] [correspondent] [title].[extension]``.
|
||||
If you want paperless to use ``PAPERLESS_FILENAME_FORMAT`` for exported filenames
|
||||
instead, specify ``--use-filename-format``.
|
||||
|
||||
|
||||
.. _utilities-importer:
|
||||
|
||||
Document importer
|
||||
=================
|
||||
|
||||
The document importer takes the export produced by the `Document exporter`_ and
|
||||
imports it into paperless.
|
||||
|
||||
The importer works just like the exporter. You point it at a directory, and
|
||||
the script does the rest of the work:
|
||||
|
||||
.. code::
|
||||
|
||||
document_importer source
|
||||
|
||||
When you use the provided docker compose script, put the export inside the
|
||||
``export`` folder in your paperless source directory. Specify ``../export``
|
||||
as the ``source``.
|
||||
|
||||
.. note::
|
||||
|
||||
Importing from a previous version of Paperless may work, but for best results
|
||||
it is suggested to match the versions.
|
||||
|
||||
.. _utilities-retagger:
|
||||
|
||||
Document retagger
|
||||
=================
|
||||
|
||||
Say you've imported a few hundred documents and now want to introduce
|
||||
a tag or set up a new correspondent, and apply its matching to all of
|
||||
the currently-imported docs. This problem is common enough that
|
||||
there are tools for it.
|
||||
|
||||
.. code::
|
||||
|
||||
document_retagger [-h] [-c] [-T] [-t] [-i] [--use-first] [-f]
|
||||
|
||||
optional arguments:
|
||||
-c, --correspondent
|
||||
-T, --tags
|
||||
-t, --document_type
|
||||
-s, --storage_path
|
||||
-i, --inbox-only
|
||||
--use-first
|
||||
-f, --overwrite
|
||||
|
||||
Run this after changing or adding matching rules. It'll loop over all
|
||||
of the documents in your database and attempt to match documents
|
||||
according to the new rules.
|
||||
|
||||
Specify any combination of ``-c``, ``-T``, ``-t`` and ``-s`` to have the
|
||||
retagger perform matching of the specified metadata type. If you don't
|
||||
specify any of these options, the document retagger won't do anything.
|
||||
|
||||
Specify ``-i`` to have the document retagger work on documents tagged
|
||||
with inbox tags only. This is useful when you don't want to mess with
|
||||
your already processed documents.
|
||||
|
||||
When multiple document types or correspondents match a single document,
|
||||
the retagger won't assign these to the document. Specify ``--use-first``
|
||||
to override this behavior and just use the first correspondent or type
|
||||
it finds. This option does not apply to tags, since any amount of tags
|
||||
can be applied to a document.
|
||||
|
||||
Finally, ``-f`` specifies that you wish to overwrite already assigned
|
||||
correspondents, types and/or tags. The default behavior is to not
|
||||
assign correspondents and types to documents that have this data already
|
||||
assigned. ``-f`` works differently for tags: By default, only additional tags get
|
||||
added to documents, no tags will be removed. With ``-f``, tags that don't
|
||||
match a document anymore get removed as well.
|
||||
|
||||
|
||||
Managing the Automatic matching algorithm
|
||||
=========================================
|
||||
|
||||
The *Auto* matching algorithm requires a trained neural network to work.
|
||||
This network needs to be updated whenever somethings in your data
|
||||
changes. The docker image takes care of that automatically with the task
|
||||
scheduler. You can manually renew the classifier by invoking the following
|
||||
management command:
|
||||
|
||||
.. code::
|
||||
|
||||
document_create_classifier
|
||||
|
||||
This command takes no arguments.
|
||||
|
||||
.. _`administration-index`:
|
||||
|
||||
Managing the document search index
|
||||
==================================
|
||||
|
||||
The document search index is responsible for delivering search results for the
|
||||
website. The document index is automatically updated whenever documents get
|
||||
added to, changed, or removed from paperless. However, if the search yields
|
||||
non-existing documents or won't find anything, you may need to recreate the
|
||||
index manually.
|
||||
|
||||
.. code::
|
||||
|
||||
document_index {reindex,optimize}
|
||||
|
||||
Specify ``reindex`` to have the index created from scratch. This may take some
|
||||
time.
|
||||
|
||||
Specify ``optimize`` to optimize the index. This updates certain aspects of
|
||||
the index and usually makes queries faster and also ensures that the
|
||||
autocompletion works properly. This command is regularly invoked by the task
|
||||
scheduler.
|
||||
|
||||
.. _utilities-renamer:
|
||||
|
||||
Managing filenames
|
||||
==================
|
||||
|
||||
If you use paperless' feature to
|
||||
:ref:`assign custom filenames to your documents <advanced-file_name_handling>`,
|
||||
you can use this command to move all your files after changing
|
||||
the naming scheme.
|
||||
|
||||
.. warning::
|
||||
|
||||
Since this command moves your documents, it is advised to do
|
||||
a backup beforehand. The renaming logic is robust and will never overwrite
|
||||
or delete a file, but you can't ever be careful enough.
|
||||
|
||||
.. code::
|
||||
|
||||
document_renamer
|
||||
|
||||
The command takes no arguments and processes all your documents at once.
|
||||
|
||||
Learn how to use :ref:`Management Utilities<utilities-management-commands>`.
|
||||
|
||||
|
||||
.. _utilities-sanity-checker:
|
||||
|
||||
Sanity checker
|
||||
==============
|
||||
|
||||
Paperless has a built-in sanity checker that inspects your document collection for issues.
|
||||
|
||||
The issues detected by the sanity checker are as follows:
|
||||
|
||||
* Missing original files.
|
||||
* Missing archive files.
|
||||
* Inaccessible original files due to improper permissions.
|
||||
* Inaccessible archive files due to improper permissions.
|
||||
* Corrupted original documents by comparing their checksum against what is stored in the database.
|
||||
* Corrupted archive documents by comparing their checksum against what is stored in the database.
|
||||
* Missing thumbnails.
|
||||
* Inaccessible thumbnails due to improper permissions.
|
||||
* Documents without any content (warning).
|
||||
* Orphaned files in the media directory (warning). These are files that are not referenced by any document im paperless.
|
||||
|
||||
|
||||
.. code::
|
||||
|
||||
document_sanity_checker
|
||||
|
||||
The command takes no arguments. Depending on the size of your document archive, this may take some time.
|
||||
|
||||
|
||||
Fetching e-mail
|
||||
===============
|
||||
|
||||
Paperless automatically fetches your e-mail every 10 minutes by default. If
|
||||
you want to invoke the email consumer manually, call the following management
|
||||
command:
|
||||
|
||||
.. code::
|
||||
|
||||
mail_fetcher
|
||||
|
||||
The command takes no arguments and processes all your mail accounts and rules.
|
||||
|
||||
.. note::
|
||||
|
||||
As of October 2022 Microsoft no longer supports IMAP authentication for Exchange
|
||||
servers, thus Exchange is no longer supported until a solution is implemented in
|
||||
the Python IMAP library used by Paperless. See `learn.microsoft.com`_
|
||||
|
||||
.. _learn.microsoft.com: https://learn.microsoft.com/en-us/exchange/clients-and-mobile-in-exchange-online/deprecation-of-basic-authentication-exchange-online
|
||||
|
||||
.. _utilities-archiver:
|
||||
|
||||
Creating archived documents
|
||||
===========================
|
||||
|
||||
Paperless stores archived PDF/A documents alongside your original documents.
|
||||
These archived documents will also contain selectable text for image-only
|
||||
originals.
|
||||
These documents are derived from the originals, which are always stored
|
||||
unmodified. If coming from an earlier version of paperless, your documents
|
||||
won't have archived versions.
|
||||
|
||||
This command creates PDF/A documents for your documents.
|
||||
|
||||
.. code::
|
||||
|
||||
document_archiver --overwrite --document <id>
|
||||
|
||||
This command will only attempt to create archived documents when no archived
|
||||
document exists yet, unless ``--overwrite`` is specified. If ``--document <id>``
|
||||
is specified, the archiver will only process that document.
|
||||
|
||||
.. note::
|
||||
|
||||
This command essentially performs OCR on all your documents again,
|
||||
according to your settings. If you run this with ``PAPERLESS_OCR_MODE=redo``,
|
||||
it will potentially run for a very long time. You can cancel the command
|
||||
at any time, since this command will skip already archived versions the next time
|
||||
it is run.
|
||||
|
||||
.. note::
|
||||
|
||||
Some documents will cause errors and cannot be converted into PDF/A documents,
|
||||
such as encrypted PDF documents. The archiver will skip over these documents
|
||||
each time it sees them.
|
||||
|
||||
.. _utilities-encyption:
|
||||
|
||||
Managing encryption
|
||||
===================
|
||||
|
||||
Documents can be stored in Paperless using GnuPG encryption.
|
||||
|
||||
.. danger::
|
||||
|
||||
Encryption is deprecated since paperless-ngx 0.9 and doesn't really provide any
|
||||
additional security, since you have to store the passphrase in a configuration
|
||||
file on the same system as the encrypted documents for paperless to work.
|
||||
Furthermore, the entire text content of the documents is stored plain in the
|
||||
database, even if your documents are encrypted. Filenames are not encrypted as
|
||||
well.
|
||||
|
||||
Also, the web server provides transparent access to your encrypted documents.
|
||||
|
||||
Consider running paperless on an encrypted filesystem instead, which will then
|
||||
at least provide security against physical hardware theft.
|
||||
|
||||
|
||||
Enabling encryption
|
||||
-------------------
|
||||
|
||||
Enabling encryption is no longer supported.
|
||||
|
||||
|
||||
Disabling encryption
|
||||
--------------------
|
||||
|
||||
Basic usage to disable encryption of your document store:
|
||||
|
||||
(Note: If ``PAPERLESS_PASSPHRASE`` isn't set already, you need to specify it here)
|
||||
|
||||
.. code::
|
||||
|
||||
decrypt_documents [--passphrase SECR3TP4SSPHRA$E]
|
503
docs/advanced_usage.md
Normal file
@@ -0,0 +1,503 @@
|
||||
# Advanced Topics
|
||||
|
||||
Paperless offers a couple features that automate certain tasks and make
|
||||
your life easier.
|
||||
|
||||
## Matching tags, correspondents, document types, and storage paths {#matching}
|
||||
|
||||
Paperless will compare the matching algorithms defined by every tag,
|
||||
correspondent, document type, and storage path in your database to see
|
||||
if they apply to the text in a document. In other words, if you define a
|
||||
tag called `Home Utility` that had a `match` property of `bc hydro` and
|
||||
a `matching_algorithm` of `literal`, Paperless will automatically tag
|
||||
your newly-consumed document with your `Home Utility` tag so long as the
|
||||
text `bc hydro` appears in the body of the document somewhere.
|
||||
|
||||
The matching logic is quite powerful. It supports searching the text of
|
||||
your document with different algorithms, and as such, some
|
||||
experimentation may be necessary to get things right.
|
||||
|
||||
In order to have a tag, correspondent, document type, or storage path
|
||||
assigned automatically to newly consumed documents, assign a match and
|
||||
matching algorithm using the web interface. These settings define when
|
||||
to assign tags, correspondents, document types, and storage paths to
|
||||
documents.
|
||||
|
||||
The following algorithms are available:
|
||||
|
||||
- **Any:** Looks for any occurrence of any word provided in match in
|
||||
the PDF. If you define the match as `Bank1 Bank2`, it will match
|
||||
documents containing either of these terms.
|
||||
- **All:** Requires that every word provided appears in the PDF,
|
||||
albeit not in the order provided.
|
||||
- **Literal:** Matches only if the match appears exactly as provided
|
||||
(i.e. preserve ordering) in the PDF.
|
||||
- **Regular expression:** Parses the match as a regular expression and
|
||||
tries to find a match within the document.
|
||||
- **Fuzzy match:** I don't know. Look at the source.
|
||||
- **Auto:** Tries to automatically match new documents. This does not
|
||||
require you to set a match. See the notes below.
|
||||
|
||||
When using the _any_ or _all_ matching algorithms, you can search for
|
||||
terms that consist of multiple words by enclosing them in double quotes.
|
||||
For example, defining a match text of `"Bank of America" BofA` using the
|
||||
_any_ algorithm, will match documents that contain either "Bank of
|
||||
America" or "BofA", but will not match documents containing "Bank of
|
||||
South America".
|
||||
|
||||
Then just save your tag, correspondent, document type, or storage path
|
||||
and run another document through the consumer. Once complete, you should
|
||||
see the newly-created document, automatically tagged with the
|
||||
appropriate data.
|
||||
|
||||
### Automatic matching {#automatic-matching}
|
||||
|
||||
Paperless-ngx comes with a new matching algorithm called _Auto_. This
|
||||
matching algorithm tries to assign tags, correspondents, document types,
|
||||
and storage paths to your documents based on how you have already
|
||||
assigned these on existing documents. It uses a neural network under the
|
||||
hood.
|
||||
|
||||
If, for example, all your bank statements of your account 123 at the
|
||||
Bank of America are tagged with the tag "bofa123" and the matching
|
||||
algorithm of this tag is set to _Auto_, this neural network will examine
|
||||
your documents and automatically learn when to assign this tag.
|
||||
|
||||
Paperless tries to hide much of the involved complexity with this
|
||||
approach. However, there are a couple caveats you need to keep in mind
|
||||
when using this feature:
|
||||
|
||||
- Changes to your documents are not immediately reflected by the
|
||||
matching algorithm. The neural network needs to be _trained_ on your
|
||||
documents after changes. Paperless periodically (default: once each
|
||||
hour) checks for changes and does this automatically for you.
|
||||
- The Auto matching algorithm only takes documents into account which
|
||||
are NOT placed in your inbox (i.e. have any inbox tags assigned to
|
||||
them). This ensures that the neural network only learns from
|
||||
documents which you have correctly tagged before.
|
||||
- The matching algorithm can only work if there is a correlation
|
||||
between the tag, correspondent, document type, or storage path and
|
||||
the document itself. Your bank statements usually contain your bank
|
||||
account number and the name of the bank, so this works reasonably
|
||||
well, However, tags such as "TODO" cannot be automatically
|
||||
assigned.
|
||||
- The matching algorithm needs a reasonable number of documents to
|
||||
identify when to assign tags, correspondents, storage paths, and
|
||||
types. If one out of a thousand documents has the correspondent
|
||||
"Very obscure web shop I bought something five years ago", it will
|
||||
probably not assign this correspondent automatically if you buy
|
||||
something from them again. The more documents, the better.
|
||||
- Paperless also needs a reasonable amount of negative examples to
|
||||
decide when not to assign a certain tag, correspondent, document
|
||||
type, or storage path. This will usually be the case as you start
|
||||
filling up paperless with documents. Example: If all your documents
|
||||
are either from "Webshop" and "Bank", paperless will assign one
|
||||
of these correspondents to ANY new document, if both are set to
|
||||
automatic matching.
|
||||
|
||||
## Hooking into the consumption process {#consume-hooks}
|
||||
|
||||
Sometimes you may want to do something arbitrary whenever a document is
|
||||
consumed. Rather than try to predict what you may want to do, Paperless
|
||||
lets you execute scripts of your own choosing just before or after a
|
||||
document is consumed using a couple simple hooks.
|
||||
|
||||
Just write a script, put it somewhere that Paperless can read & execute,
|
||||
and then put the path to that script in `paperless.conf` or
|
||||
`docker-compose.env` with the variable name of either
|
||||
`PAPERLESS_PRE_CONSUME_SCRIPT` or `PAPERLESS_POST_CONSUME_SCRIPT`.
|
||||
|
||||
!!! info
|
||||
|
||||
These scripts are executed in a **blocking** process, which means that
|
||||
if a script takes a long time to run, it can significantly slow down
|
||||
your document consumption flow. If you want things to run
|
||||
asynchronously, you'll have to fork the process in your script and
|
||||
exit.
|
||||
|
||||
### Pre-consumption script {#pre-consume-script}
|
||||
|
||||
Executed after the consumer sees a new document in the consumption
|
||||
folder, but before any processing of the document is performed. This
|
||||
script can access the following relevant environment variables set:
|
||||
|
||||
| Environment Variable | Description |
|
||||
| ----------------------- | ------------------------------------------------------------ |
|
||||
| `DOCUMENT_SOURCE_PATH` | Original path of the consumed document |
|
||||
| `DOCUMENT_WORKING_PATH` | Path to a copy of the original that consumption will work on |
|
||||
|
||||
!!! note
|
||||
|
||||
Pre-consume scripts which modify the document should only change
|
||||
the `DOCUMENT_WORKING_PATH` file or a second consume task may
|
||||
be triggered, leading to failures as two tasks work on the
|
||||
same document path
|
||||
|
||||
A simple but common example for this would be creating a simple script
|
||||
like this:
|
||||
|
||||
`/usr/local/bin/ocr-pdf`
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
pdf2pdfocr.py -i ${DOCUMENT_WORKING_PATH}
|
||||
```
|
||||
|
||||
`/etc/paperless.conf`
|
||||
|
||||
```bash
|
||||
...
|
||||
PAPERLESS_PRE_CONSUME_SCRIPT="/usr/local/bin/ocr-pdf"
|
||||
...
|
||||
```
|
||||
|
||||
This will pass the path to the document about to be consumed to
|
||||
`/usr/local/bin/ocr-pdf`, which will in turn call
|
||||
[pdf2pdfocr.py](https://github.com/LeoFCardoso/pdf2pdfocr) on your
|
||||
document, which will then overwrite the file with an OCR'd version of
|
||||
the file and exit. At which point, the consumption process will begin
|
||||
with the newly modified file.
|
||||
|
||||
The script's stdout and stderr will be logged line by line to the
|
||||
webserver log, along with the exit code of the script.
|
||||
|
||||
### Post-consumption script {#post-consume-script}
|
||||
|
||||
Executed after the consumer has successfully processed a document and
|
||||
has moved it into paperless. It receives the following environment
|
||||
variables:
|
||||
|
||||
| Environment Variable | Description |
|
||||
| ---------------------------- | --------------------------------------------- |
|
||||
| `DOCUMENT_ID` | Database primary key of the document |
|
||||
| `DOCUMENT_FILE_NAME` | Formatted filename, not including paths |
|
||||
| `DOCUMENT_CREATED` | Date & time when document created |
|
||||
| `DOCUMENT_MODIFIED` | Date & time when document was last modified |
|
||||
| `DOCUMENT_ADDED` | Date & time when document was added |
|
||||
| `DOCUMENT_SOURCE_PATH` | Path to the original document file |
|
||||
| `DOCUMENT_ARCHIVE_PATH` | Path to the generate archive file (if any) |
|
||||
| `DOCUMENT_THUMBNAIL_PATH` | Path to the generated thumbnail |
|
||||
| `DOCUMENT_DOWNLOAD_URL` | URL for document download |
|
||||
| `DOCUMENT_THUMBNAIL_URL` | URL for the document thumbnail |
|
||||
| `DOCUMENT_CORRESPONDENT` | Assigned correspondent (if any) |
|
||||
| `DOCUMENT_TAGS` | Comma separated list of tags applied (if any) |
|
||||
| `DOCUMENT_ORIGINAL_FILENAME` | Filename of original document |
|
||||
|
||||
The script can be in any language, A simple shell script example:
|
||||
|
||||
```bash title="post-consumption-example"
|
||||
--8<-- "./scripts/post-consumption-example.sh"
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
The post consumption script cannot cancel the consumption process.
|
||||
|
||||
!!! warning
|
||||
|
||||
The post consumption script should not modify the document files
|
||||
directly
|
||||
|
||||
The script's stdout and stderr will be logged line by line to the
|
||||
webserver log, along with the exit code of the script.
|
||||
|
||||
### Docker {#docker-consume-hooks}
|
||||
|
||||
To hook into the consumption process when using Docker, you
|
||||
will need to pass the scripts into the container via a host mount
|
||||
in your `docker-compose.yml`.
|
||||
|
||||
Assuming you have
|
||||
`/home/paperless-ngx/scripts/post-consumption-example.sh` as a
|
||||
script which you'd like to run.
|
||||
|
||||
You can pass that script into the consumer container via a host mount:
|
||||
|
||||
```yaml
|
||||
...
|
||||
webserver:
|
||||
...
|
||||
volumes:
|
||||
...
|
||||
- /home/paperless-ngx/scripts:/path/in/container/scripts/ # (1)!
|
||||
environment: # (3)!
|
||||
...
|
||||
PAPERLESS_POST_CONSUME_SCRIPT: /path/in/container/scripts/post-consumption-example.sh # (2)!
|
||||
...
|
||||
```
|
||||
|
||||
1. The external scripts directory is mounted to a location inside the container.
|
||||
2. The internal location of the script is used to set the script to run
|
||||
3. This can also be set in `docker-compose.env`
|
||||
|
||||
Troubleshooting:
|
||||
|
||||
- Monitor the docker-compose log
|
||||
`cd ~/paperless-ngx; docker-compose logs -f`
|
||||
- Check your script's permission e.g. in case of permission error
|
||||
`sudo chmod 755 post-consumption-example.sh`
|
||||
- Pipe your scripts's output to a log file e.g.
|
||||
`echo "${DOCUMENT_ID}" | tee --append /usr/src/paperless/scripts/post-consumption-example.log`
|
||||
|
||||
## File name handling {#file-name-handling}
|
||||
|
||||
By default, paperless stores your documents in the media directory and
|
||||
renames them using the identifier which it has assigned to each
|
||||
document. You will end up getting files like `0000123.pdf` in your media
|
||||
directory. This isn't necessarily a bad thing, because you normally
|
||||
don't have to access these files manually. However, if you wish to name
|
||||
your files differently, you can do that by adjusting the
|
||||
`PAPERLESS_FILENAME_FORMAT` configuration option. Paperless adds the
|
||||
correct file extension e.g. `.pdf`, `.jpg` automatically.
|
||||
|
||||
This variable allows you to configure the filename (folders are allowed)
|
||||
using placeholders. For example, configuring this to
|
||||
|
||||
```bash
|
||||
PAPERLESS_FILENAME_FORMAT={created_year}/{correspondent}/{title}
|
||||
```
|
||||
|
||||
will create a directory structure as follows:
|
||||
|
||||
```
|
||||
2019/
|
||||
My bank/
|
||||
Statement January.pdf
|
||||
Statement February.pdf
|
||||
2020/
|
||||
My bank/
|
||||
Statement January.pdf
|
||||
Letter.pdf
|
||||
Letter_01.pdf
|
||||
Shoe store/
|
||||
My new shoes.pdf
|
||||
```
|
||||
|
||||
!!! warning
|
||||
|
||||
Do not manually move your files in the media folder. Paperless remembers
|
||||
the last filename a document was stored as. If you do rename a file,
|
||||
paperless will report your files as missing and won't be able to find
|
||||
them.
|
||||
|
||||
Paperless provides the following placeholders within filenames:
|
||||
|
||||
- `{asn}`: The archive serial number of the document, or "none".
|
||||
- `{correspondent}`: The name of the correspondent, or "none".
|
||||
- `{document_type}`: The name of the document type, or "none".
|
||||
- `{tag_list}`: A comma separated list of all tags assigned to the
|
||||
document.
|
||||
- `{title}`: The title of the document.
|
||||
- `{created}`: The full date (ISO format) the document was created.
|
||||
- `{created_year}`: Year created only, formatted as the year with
|
||||
century.
|
||||
- `{created_year_short}`: Year created only, formatted as the year
|
||||
without century, zero padded.
|
||||
- `{created_month}`: Month created only (number 01-12).
|
||||
- `{created_month_name}`: Month created name, as per locale
|
||||
- `{created_month_name_short}`: Month created abbreviated name, as per
|
||||
locale
|
||||
- `{created_day}`: Day created only (number 01-31).
|
||||
- `{added}`: The full date (ISO format) the document was added to
|
||||
paperless.
|
||||
- `{added_year}`: Year added only.
|
||||
- `{added_year_short}`: Year added only, formatted as the year without
|
||||
century, zero padded.
|
||||
- `{added_month}`: Month added only (number 01-12).
|
||||
- `{added_month_name}`: Month added name, as per locale
|
||||
- `{added_month_name_short}`: Month added abbreviated name, as per
|
||||
locale
|
||||
- `{added_day}`: Day added only (number 01-31).
|
||||
|
||||
Paperless will try to conserve the information from your database as
|
||||
much as possible. However, some characters that you can use in document
|
||||
titles and correspondent names (such as `: \ /` and a couple more) are
|
||||
not allowed in filenames and will be replaced with dashes.
|
||||
|
||||
If paperless detects that two documents share the same filename,
|
||||
paperless will automatically append `_01`, `_02`, etc to the filename.
|
||||
This happens if all the placeholders in a filename evaluate to the same
|
||||
value.
|
||||
|
||||
!!! tip
|
||||
|
||||
You can affect how empty placeholders are treated by changing the
|
||||
following setting to `true`.
|
||||
|
||||
```
|
||||
PAPERLESS_FILENAME_FORMAT_REMOVE_NONE=True
|
||||
```
|
||||
|
||||
Doing this results in all empty placeholders resolving to "" instead
|
||||
of "none" as stated above. Spaces before empty placeholders are
|
||||
removed as well, empty directories are omitted.
|
||||
|
||||
!!! tip
|
||||
|
||||
Paperless checks the filename of a document whenever it is saved.
|
||||
Therefore, you need to update the filenames of your documents and move
|
||||
them after altering this setting by invoking the
|
||||
[`document renamer`](/administration#renamer).
|
||||
|
||||
!!! warning
|
||||
|
||||
Make absolutely sure you get the spelling of the placeholders right, or
|
||||
else paperless will use the default naming scheme instead.
|
||||
|
||||
!!! caution
|
||||
|
||||
As of now, you could totally tell paperless to store your files anywhere
|
||||
outside the media directory by setting
|
||||
|
||||
```
|
||||
PAPERLESS_FILENAME_FORMAT=../../my/custom/location/{title}
|
||||
```
|
||||
|
||||
However, keep in mind that inside docker, if files get stored outside of
|
||||
the predefined volumes, they will be lost after a restart of paperless.
|
||||
|
||||
!!! warning
|
||||
|
||||
When file naming handling, in particular when using `{tag_list}`,
|
||||
you may run into the limits of your operating system's maximum
|
||||
path lengths. Files will retain the previous path instead and
|
||||
the issue logged.
|
||||
|
||||
## Storage paths
|
||||
|
||||
One of the best things in Paperless is that you can not only access the
|
||||
documents via the web interface, but also via the file system.
|
||||
|
||||
When as single storage layout is not sufficient for your use case,
|
||||
storage paths come to the rescue. Storage paths allow you to configure
|
||||
more precisely where each document is stored in the file system.
|
||||
|
||||
- Each storage path is a `PAPERLESS_FILENAME_FORMAT` and
|
||||
follows the rules described above
|
||||
- Each document is assigned a storage path using the matching
|
||||
algorithms described above, but can be overwritten at any time
|
||||
|
||||
For example, you could define the following two storage paths:
|
||||
|
||||
1. Normal communications are put into a folder structure sorted by
|
||||
`year/correspondent`
|
||||
2. Communications with insurance companies are stored in a flat
|
||||
structure with longer file names, but containing the full date of
|
||||
the correspondence.
|
||||
|
||||
```
|
||||
By Year = {created_year}/{correspondent}/{title}
|
||||
Insurances = Insurances/{correspondent}/{created_year}-{created_month}-{created_day} {title}
|
||||
```
|
||||
|
||||
If you then map these storage paths to the documents, you might get the
|
||||
following result. For simplicity, `By Year` defines the same
|
||||
structure as in the previous example above.
|
||||
|
||||
```text
|
||||
2019/ # By Year
|
||||
My bank/
|
||||
Statement January.pdf
|
||||
Statement February.pdf
|
||||
|
||||
Insurances/ # Insurances
|
||||
Healthcare 123/
|
||||
2022-01-01 Statement January.pdf
|
||||
2022-02-02 Letter.pdf
|
||||
2022-02-03 Letter.pdf
|
||||
Dental 456/
|
||||
2021-12-01 New Conditions.pdf
|
||||
```
|
||||
|
||||
!!! tip
|
||||
|
||||
Defining a storage path is optional. If no storage path is defined for a
|
||||
document, the global `PAPERLESS_FILENAME_FORMAT` is applied.
|
||||
|
||||
!!! warning
|
||||
|
||||
If you adjust the format of an existing storage path, old documents
|
||||
don't get relocated automatically. You need to run the
|
||||
[document renamer](/administration#renamer) to
|
||||
adjust their paths.
|
||||
|
||||
## Celery Monitoring {#celery-monitoring}
|
||||
|
||||
The monitoring tool
|
||||
[Flower](https://flower.readthedocs.io/en/latest/index.html) can be used
|
||||
to view more detailed information about the health of the celery workers
|
||||
used for asynchronous tasks. This includes details on currently running,
|
||||
queued and completed tasks, timing and more. Flower can also be used
|
||||
with Prometheus, as it exports metrics. For details on its capabilities,
|
||||
refer to the Flower documentation.
|
||||
|
||||
To configure Flower further, create a `flowerconfig.py` and
|
||||
place it into the `src/paperless` directory. For a Docker
|
||||
installation, you can use volumes to accomplish this:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
# ...
|
||||
webserver:
|
||||
ports:
|
||||
- 5555:5555 # (2)!
|
||||
# ...
|
||||
volumes:
|
||||
- /path/to/my/flowerconfig.py:/usr/src/paperless/src/paperless/flowerconfig.py:ro # (1)!
|
||||
```
|
||||
|
||||
1. Note the `:ro` tag means the file will be mounted as read only.
|
||||
2. `flower` runs by default on port 5555, but this can be configured
|
||||
|
||||
## Custom Container Initialization
|
||||
|
||||
The Docker image includes the ability to run custom user scripts during
|
||||
startup. This could be utilized for installing additional tools or
|
||||
Python packages, for example. Scripts are expected to be shell scripts.
|
||||
|
||||
To utilize this, mount a folder containing your scripts to the custom
|
||||
initialization directory, `/custom-cont-init.d` and place
|
||||
scripts you wish to run inside. For security, the folder must be owned
|
||||
by `root` and should have permissions of `a=rx`. Additionally, scripts
|
||||
must only be writable by `root`.
|
||||
|
||||
Your scripts will be run directly before the webserver completes
|
||||
startup. Scripts will be run by the `root` user.
|
||||
If you would like to switch users, the utility `gosu` is available and
|
||||
preferred over `sudo`.
|
||||
|
||||
This is an advanced functionality with which you could break functionality
|
||||
or lose data. If you experience issues, please disable any custom scripts
|
||||
and try again before reporting an issue.
|
||||
|
||||
For example, using Docker Compose:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
# ...
|
||||
webserver:
|
||||
# ...
|
||||
volumes:
|
||||
- /path/to/my/scripts:/custom-cont-init.d:ro # (1)!
|
||||
```
|
||||
|
||||
1. Note the `:ro` tag means the folder will be mounted as read only. This is for extra security against changes
|
||||
|
||||
## MySQL Caveats {#mysql-caveats}
|
||||
|
||||
### Case Sensitivity
|
||||
|
||||
The database interface does not provide a method to configure a MySQL
|
||||
database to be case sensitive. This would prevent a user from creating a
|
||||
tag `Name` and `NAME` as they are considered the same.
|
||||
|
||||
Per Django documentation, to enable this requires manual intervention.
|
||||
To enable case sensetive tables, you can execute the following command
|
||||
against each table:
|
||||
|
||||
`ALTER TABLE <table_name> CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;`
|
||||
|
||||
You can also set the default for new tables (this does NOT affect
|
||||
existing tables) with:
|
||||
|
||||
`ALTER DATABASE <db_name> CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;`
|
@@ -1,447 +0,0 @@
|
||||
***************
|
||||
Advanced topics
|
||||
***************
|
||||
|
||||
Paperless offers a couple features that automate certain tasks and make your life
|
||||
easier.
|
||||
|
||||
.. _advanced-matching:
|
||||
|
||||
Matching tags, correspondents, document types, and storage paths
|
||||
################################################################
|
||||
|
||||
Paperless will compare the matching algorithms defined by every tag, correspondent,
|
||||
document type, and storage path in your database to see if they apply to the text
|
||||
in a document. In other words, if you define a tag called ``Home Utility``
|
||||
that had a ``match`` property of ``bc hydro`` and a ``matching_algorithm`` of
|
||||
``literal``, Paperless will automatically tag your newly-consumed document with
|
||||
your ``Home Utility`` tag so long as the text ``bc hydro`` appears in the body
|
||||
of the document somewhere.
|
||||
|
||||
The matching logic is quite powerful. It supports searching the text of your
|
||||
document with different algorithms, and as such, some experimentation may be
|
||||
necessary to get things right.
|
||||
|
||||
In order to have a tag, correspondent, document type, or storage path assigned
|
||||
automatically to newly consumed documents, assign a match and matching algorithm
|
||||
using the web interface. These settings define when to assign tags, correspondents,
|
||||
document types, and storage paths to documents.
|
||||
|
||||
The following algorithms are available:
|
||||
|
||||
* **Any:** Looks for any occurrence of any word provided in match in the PDF.
|
||||
If you define the match as ``Bank1 Bank2``, it will match documents containing
|
||||
either of these terms.
|
||||
* **All:** Requires that every word provided appears in the PDF, albeit not in the
|
||||
order provided.
|
||||
* **Literal:** Matches only if the match appears exactly as provided (i.e. preserve ordering) in the PDF.
|
||||
* **Regular expression:** Parses the match as a regular expression and tries to
|
||||
find a match within the document.
|
||||
* **Fuzzy match:** I don't know. Look at the source.
|
||||
* **Auto:** Tries to automatically match new documents. This does not require you
|
||||
to set a match. See the notes below.
|
||||
|
||||
When using the *any* or *all* matching algorithms, you can search for terms
|
||||
that consist of multiple words by enclosing them in double quotes. For example,
|
||||
defining a match text of ``"Bank of America" BofA`` using the *any* algorithm,
|
||||
will match documents that contain either "Bank of America" or "BofA", but will
|
||||
not match documents containing "Bank of South America".
|
||||
|
||||
Then just save your tag, correspondent, document type, or storage path and run
|
||||
another document through the consumer. Once complete, you should see the
|
||||
newly-created document, automatically tagged with the appropriate data.
|
||||
|
||||
|
||||
.. _advanced-automatic_matching:
|
||||
|
||||
Automatic matching
|
||||
==================
|
||||
|
||||
Paperless-ngx comes with a new matching algorithm called *Auto*. This matching
|
||||
algorithm tries to assign tags, correspondents, document types, and storage paths
|
||||
to your documents based on how you have already assigned these on existing documents.
|
||||
It uses a neural network under the hood.
|
||||
|
||||
If, for example, all your bank statements of your account 123 at the Bank of
|
||||
America are tagged with the tag "bofa_123" and the matching algorithm of this
|
||||
tag is set to *Auto*, this neural network will examine your documents and
|
||||
automatically learn when to assign this tag.
|
||||
|
||||
Paperless tries to hide much of the involved complexity with this approach.
|
||||
However, there are a couple caveats you need to keep in mind when using this
|
||||
feature:
|
||||
|
||||
* Changes to your documents are not immediately reflected by the matching
|
||||
algorithm. The neural network needs to be *trained* on your documents after
|
||||
changes. Paperless periodically (default: once each hour) checks for changes
|
||||
and does this automatically for you.
|
||||
* The Auto matching algorithm only takes documents into account which are NOT
|
||||
placed in your inbox (i.e. have any inbox tags assigned to them). This ensures
|
||||
that the neural network only learns from documents which you have correctly
|
||||
tagged before.
|
||||
* The matching algorithm can only work if there is a correlation between the
|
||||
tag, correspondent, document type, or storage path and the document itself.
|
||||
Your bank statements usually contain your bank account number and the name
|
||||
of the bank, so this works reasonably well, However, tags such as "TODO"
|
||||
cannot be automatically assigned.
|
||||
* The matching algorithm needs a reasonable number of documents to identify when
|
||||
to assign tags, correspondents, storage paths, and types. If one out of a
|
||||
thousand documents has the correspondent "Very obscure web shop I bought
|
||||
something five years ago", it will probably not assign this correspondent
|
||||
automatically if you buy something from them again. The more documents, the better.
|
||||
* Paperless also needs a reasonable amount of negative examples to decide when
|
||||
not to assign a certain tag, correspondent, document type, or storage path. This will
|
||||
usually be the case as you start filling up paperless with documents.
|
||||
Example: If all your documents are either from "Webshop" and "Bank", paperless
|
||||
will assign one of these correspondents to ANY new document, if both are set
|
||||
to automatic matching.
|
||||
|
||||
Hooking into the consumption process
|
||||
####################################
|
||||
|
||||
Sometimes you may want to do something arbitrary whenever a document is
|
||||
consumed. Rather than try to predict what you may want to do, Paperless lets
|
||||
you execute scripts of your own choosing just before or after a document is
|
||||
consumed using a couple simple hooks.
|
||||
|
||||
Just write a script, put it somewhere that Paperless can read & execute, and
|
||||
then put the path to that script in ``paperless.conf`` or ``docker-compose.env`` with the variable name
|
||||
of either ``PAPERLESS_PRE_CONSUME_SCRIPT`` or
|
||||
``PAPERLESS_POST_CONSUME_SCRIPT``.
|
||||
|
||||
.. important::
|
||||
|
||||
These scripts are executed in a **blocking** process, which means that if
|
||||
a script takes a long time to run, it can significantly slow down your
|
||||
document consumption flow. If you want things to run asynchronously,
|
||||
you'll have to fork the process in your script and exit.
|
||||
|
||||
|
||||
Pre-consumption script
|
||||
======================
|
||||
|
||||
Executed after the consumer sees a new document in the consumption folder, but
|
||||
before any processing of the document is performed. This script can access the
|
||||
following relevant environment variables set:
|
||||
|
||||
* ``DOCUMENT_SOURCE_PATH``
|
||||
|
||||
A simple but common example for this would be creating a simple script like
|
||||
this:
|
||||
|
||||
``/usr/local/bin/ocr-pdf``
|
||||
|
||||
.. code:: bash
|
||||
|
||||
#!/usr/bin/env bash
|
||||
pdf2pdfocr.py -i ${DOCUMENT_SOURCE_PATH}
|
||||
|
||||
``/etc/paperless.conf``
|
||||
|
||||
.. code:: bash
|
||||
|
||||
...
|
||||
PAPERLESS_PRE_CONSUME_SCRIPT="/usr/local/bin/ocr-pdf"
|
||||
...
|
||||
|
||||
This will pass the path to the document about to be consumed to ``/usr/local/bin/ocr-pdf``,
|
||||
which will in turn call `pdf2pdfocr.py`_ on your document, which will then
|
||||
overwrite the file with an OCR'd version of the file and exit. At which point,
|
||||
the consumption process will begin with the newly modified file.
|
||||
|
||||
The script's stdout and stderr will be logged line by line to the webserver log, along
|
||||
with the exit code of the script.
|
||||
|
||||
.. _pdf2pdfocr.py: https://github.com/LeoFCardoso/pdf2pdfocr
|
||||
|
||||
.. _advanced-post_consume_script:
|
||||
|
||||
Post-consumption script
|
||||
=======================
|
||||
|
||||
Executed after the consumer has successfully processed a document and has moved it
|
||||
into paperless. It receives the following environment variables:
|
||||
|
||||
* ``DOCUMENT_ID``
|
||||
* ``DOCUMENT_FILE_NAME``
|
||||
* ``DOCUMENT_CREATED``
|
||||
* ``DOCUMENT_MODIFIED``
|
||||
* ``DOCUMENT_ADDED``
|
||||
* ``DOCUMENT_SOURCE_PATH``
|
||||
* ``DOCUMENT_ARCHIVE_PATH``
|
||||
* ``DOCUMENT_THUMBNAIL_PATH``
|
||||
* ``DOCUMENT_DOWNLOAD_URL``
|
||||
* ``DOCUMENT_THUMBNAIL_URL``
|
||||
* ``DOCUMENT_CORRESPONDENT``
|
||||
* ``DOCUMENT_TAGS``
|
||||
* ``DOCUMENT_ORIGINAL_FILENAME``
|
||||
|
||||
The script can be in any language, but for a simple shell script
|
||||
example, you can take a look at `post-consumption-example.sh`_ in this project.
|
||||
|
||||
The post consumption script cannot cancel the consumption process.
|
||||
|
||||
The script's stdout and stderr will be logged line by line to the webserver log, along
|
||||
with the exit code of the script.
|
||||
|
||||
|
||||
Docker
|
||||
------
|
||||
Assumed you have ``/home/foo/paperless-ngx/scripts/post-consumption-example.sh``.
|
||||
|
||||
You can pass that script into the consumer container via a host mount in your ``docker-compose.yml``.
|
||||
|
||||
.. code:: bash
|
||||
|
||||
...
|
||||
consumer:
|
||||
...
|
||||
volumes:
|
||||
...
|
||||
- /home/paperless-ngx/scripts:/path/in/container/scripts/
|
||||
...
|
||||
|
||||
Example (docker-compose.yml): ``- /home/foo/paperless-ngx/scripts:/usr/src/paperless/scripts``
|
||||
|
||||
which in turn requires the variable ``PAPERLESS_POST_CONSUME_SCRIPT`` in ``docker-compose.env`` to point to ``/path/in/container/scripts/post-consumption-example.sh``.
|
||||
|
||||
Example (docker-compose.env): ``PAPERLESS_POST_CONSUME_SCRIPT=/usr/src/paperless/scripts/post-consumption-example.sh``
|
||||
|
||||
Troubleshooting:
|
||||
|
||||
- Monitor the docker-compose log ``cd ~/paperless-ngx; docker-compose logs -f``
|
||||
- Check your script's permission e.g. in case of permission error ``sudo chmod 755 post-consumption-example.sh``
|
||||
- Pipe your scripts's output to a log file e.g. ``echo "${DOCUMENT_ID}" | tee --append /usr/src/paperless/scripts/post-consumption-example.log``
|
||||
|
||||
.. _post-consumption-example.sh: https://github.com/paperless-ngx/paperless-ngx/blob/main/scripts/post-consumption-example.sh
|
||||
|
||||
.. _advanced-file_name_handling:
|
||||
|
||||
File name handling
|
||||
##################
|
||||
|
||||
By default, paperless stores your documents in the media directory and renames them
|
||||
using the identifier which it has assigned to each document. You will end up getting
|
||||
files like ``0000123.pdf`` in your media directory. This isn't necessarily a bad
|
||||
thing, because you normally don't have to access these files manually. However, if
|
||||
you wish to name your files differently, you can do that by adjusting the
|
||||
``PAPERLESS_FILENAME_FORMAT`` configuration option. Paperless adds the correct
|
||||
file extension e.g. ``.pdf``, ``.jpg`` automatically.
|
||||
|
||||
This variable allows you to configure the filename (folders are allowed) using
|
||||
placeholders. For example, configuring this to
|
||||
|
||||
.. code:: bash
|
||||
|
||||
PAPERLESS_FILENAME_FORMAT={created_year}/{correspondent}/{title}
|
||||
|
||||
will create a directory structure as follows:
|
||||
|
||||
.. code::
|
||||
|
||||
2019/
|
||||
My bank/
|
||||
Statement January.pdf
|
||||
Statement February.pdf
|
||||
2020/
|
||||
My bank/
|
||||
Statement January.pdf
|
||||
Letter.pdf
|
||||
Letter_01.pdf
|
||||
Shoe store/
|
||||
My new shoes.pdf
|
||||
|
||||
.. danger::
|
||||
|
||||
Do not manually move your files in the media folder. Paperless remembers the
|
||||
last filename a document was stored as. If you do rename a file, paperless will
|
||||
report your files as missing and won't be able to find them.
|
||||
|
||||
Paperless provides the following placeholders within filenames:
|
||||
|
||||
* ``{asn}``: The archive serial number of the document, or "none".
|
||||
* ``{correspondent}``: The name of the correspondent, or "none".
|
||||
* ``{document_type}``: The name of the document type, or "none".
|
||||
* ``{tag_list}``: A comma separated list of all tags assigned to the document.
|
||||
* ``{title}``: The title of the document.
|
||||
* ``{created}``: The full date (ISO format) the document was created.
|
||||
* ``{created_year}``: Year created only, formatted as the year with century.
|
||||
* ``{created_year_short}``: Year created only, formatted as the year without century, zero padded.
|
||||
* ``{created_month}``: Month created only (number 01-12).
|
||||
* ``{created_month_name}``: Month created name, as per locale
|
||||
* ``{created_month_name_short}``: Month created abbreviated name, as per locale
|
||||
* ``{created_day}``: Day created only (number 01-31).
|
||||
* ``{added}``: The full date (ISO format) the document was added to paperless.
|
||||
* ``{added_year}``: Year added only.
|
||||
* ``{added_year_short}``: Year added only, formatted as the year without century, zero padded.
|
||||
* ``{added_month}``: Month added only (number 01-12).
|
||||
* ``{added_month_name}``: Month added name, as per locale
|
||||
* ``{added_month_name_short}``: Month added abbreviated name, as per locale
|
||||
* ``{added_day}``: Day added only (number 01-31).
|
||||
|
||||
|
||||
Paperless will try to conserve the information from your database as much as possible.
|
||||
However, some characters that you can use in document titles and correspondent names (such
|
||||
as ``: \ /`` and a couple more) are not allowed in filenames and will be replaced with dashes.
|
||||
|
||||
If paperless detects that two documents share the same filename, paperless will automatically
|
||||
append ``_01``, ``_02``, etc to the filename. This happens if all the placeholders in a filename
|
||||
evaluate to the same value.
|
||||
|
||||
.. hint::
|
||||
You can affect how empty placeholders are treated by changing the following setting to
|
||||
`true`.
|
||||
|
||||
.. code::
|
||||
|
||||
PAPERLESS_FILENAME_FORMAT_REMOVE_NONE=True
|
||||
|
||||
Doing this results in all empty placeholders resolving to "" instead of "none" as stated above.
|
||||
Spaces before empty placeholders are removed as well, empty directories are omitted.
|
||||
|
||||
.. hint::
|
||||
|
||||
Paperless checks the filename of a document whenever it is saved. Therefore,
|
||||
you need to update the filenames of your documents and move them after altering
|
||||
this setting by invoking the :ref:`document renamer <utilities-renamer>`.
|
||||
|
||||
.. warning::
|
||||
|
||||
Make absolutely sure you get the spelling of the placeholders right, or else
|
||||
paperless will use the default naming scheme instead.
|
||||
|
||||
.. caution::
|
||||
|
||||
As of now, you could totally tell paperless to store your files anywhere outside
|
||||
the media directory by setting
|
||||
|
||||
.. code::
|
||||
|
||||
PAPERLESS_FILENAME_FORMAT=../../my/custom/location/{title}
|
||||
|
||||
However, keep in mind that inside docker, if files get stored outside of the
|
||||
predefined volumes, they will be lost after a restart of paperless.
|
||||
|
||||
|
||||
Storage paths
|
||||
#############
|
||||
|
||||
One of the best things in Paperless is that you can not only access the documents via the
|
||||
web interface, but also via the file system.
|
||||
|
||||
When as single storage layout is not sufficient for your use case, storage paths come to
|
||||
the rescue. Storage paths allow you to configure more precisely where each document is stored
|
||||
in the file system.
|
||||
|
||||
- Each storage path is a `PAPERLESS_FILENAME_FORMAT` and follows the rules described above
|
||||
- Each document is assigned a storage path using the matching algorithms described above, but
|
||||
can be overwritten at any time
|
||||
|
||||
For example, you could define the following two storage paths:
|
||||
|
||||
1. Normal communications are put into a folder structure sorted by `year/correspondent`
|
||||
2. Communications with insurance companies are stored in a flat structure with longer file names,
|
||||
but containing the full date of the correspondence.
|
||||
|
||||
.. code::
|
||||
|
||||
By Year = {created_year}/{correspondent}/{title}
|
||||
Insurances = Insurances/{correspondent}/{created_year}-{created_month}-{created_day} {title}
|
||||
|
||||
|
||||
If you then map these storage paths to the documents, you might get the following result.
|
||||
For simplicity, `By Year` defines the same structure as in the previous example above.
|
||||
|
||||
.. code:: text
|
||||
|
||||
2019/ # By Year
|
||||
My bank/
|
||||
Statement January.pdf
|
||||
Statement February.pdf
|
||||
|
||||
Insurances/ # Insurances
|
||||
Healthcare 123/
|
||||
2022-01-01 Statement January.pdf
|
||||
2022-02-02 Letter.pdf
|
||||
2022-02-03 Letter.pdf
|
||||
Dental 456/
|
||||
2021-12-01 New Conditions.pdf
|
||||
|
||||
|
||||
.. hint::
|
||||
|
||||
Defining a storage path is optional. If no storage path is defined for a document, the global
|
||||
`PAPERLESS_FILENAME_FORMAT` is applied.
|
||||
|
||||
.. caution::
|
||||
|
||||
If you adjust the format of an existing storage path, old documents don't get relocated automatically.
|
||||
You need to run the :ref:`document renamer <utilities-renamer>` to adjust their pathes.
|
||||
|
||||
.. _advanced-celery-monitoring:
|
||||
|
||||
Celery Monitoring
|
||||
#################
|
||||
|
||||
The monitoring tool `Flower <https://flower.readthedocs.io/en/latest/index.html>`_ can be used to view more
|
||||
detailed information about the health of the celery workers used for asynchronous tasks. This includes details
|
||||
on currently running, queued and completed tasks, timing and more. Flower can also be used with Prometheus, as it
|
||||
exports metrics. For details on its capabilities, refer to the Flower documentation.
|
||||
|
||||
To configure Flower further, create a `flowerconfig.py` and place it into the `src/paperless` directory. For
|
||||
a Docker installation, you can use volumes to accomplish this:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
services:
|
||||
# ...
|
||||
webserver:
|
||||
# ...
|
||||
volumes:
|
||||
- /path/to/my/flowerconfig.py:/usr/src/paperless/src/paperless/flowerconfig.py:ro
|
||||
|
||||
Custom Container Initialization
|
||||
###############################
|
||||
|
||||
The Docker image includes the ability to run custom user scripts during startup. This could be
|
||||
utilized for installing additional tools or Python packages, for example.
|
||||
|
||||
To utilize this, mount a folder containing your scripts to the custom initialization directory, `/custom-cont-init.d`
|
||||
and place scripts you wish to run inside. For security, the folder and its contents must be owned by `root`.
|
||||
Additionally, scripts must only be writable by `root`.
|
||||
|
||||
Your scripts will be run directly before the webserver completes startup. Scripts will be run by the `root` user.
|
||||
This is an advanced functionality with which you could break functionality or lose data.
|
||||
|
||||
For example, using Docker Compose:
|
||||
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
services:
|
||||
# ...
|
||||
webserver:
|
||||
# ...
|
||||
volumes:
|
||||
- /path/to/my/scripts:/custom-cont-init.d:ro
|
||||
|
||||
.. _advanced-mysql-caveats:
|
||||
|
||||
MySQL Caveats
|
||||
#############
|
||||
|
||||
Case Sensitivity
|
||||
================
|
||||
|
||||
The database interface does not provide a method to configure a MySQL database to
|
||||
be case sensitive. This would prevent a user from creating a tag ``Name`` and ``NAME``
|
||||
as they are considered the same.
|
||||
|
||||
Per Django documentation, to enable this requires manual intervention. To enable
|
||||
case sensetive tables, you can execute the following command against each table:
|
||||
|
||||
``ALTER TABLE <table_name> CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;``
|
||||
|
||||
You can also set the default for new tables (this does NOT affect existing tables) with:
|
||||
|
||||
``ALTER DATABASE <db_name> CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;``
|
318
docs/api.md
Normal file
@@ -0,0 +1,318 @@
|
||||
# The REST API
|
||||
|
||||
Paperless makes use of the [Django REST
|
||||
Framework](https://django-rest-framework.org/) standard API interface. It
|
||||
provides a browsable API for most of its endpoints, which you can
|
||||
inspect at `http://<paperless-host>:<port>/api/`. This also documents
|
||||
most of the available filters and ordering fields.
|
||||
|
||||
The API provides 7 main endpoints:
|
||||
|
||||
- `/api/documents/`: Full CRUD support, except POSTing new documents.
|
||||
See below.
|
||||
- `/api/correspondents/`: Full CRUD support.
|
||||
- `/api/document_types/`: Full CRUD support.
|
||||
- `/api/logs/`: Read-Only.
|
||||
- `/api/tags/`: Full CRUD support.
|
||||
- `/api/mail_accounts/`: Full CRUD support.
|
||||
- `/api/mail_rules/`: Full CRUD support.
|
||||
|
||||
All of these endpoints except for the logging endpoint allow you to
|
||||
fetch, edit and delete individual objects by appending their primary key
|
||||
to the path, for example `/api/documents/454/`.
|
||||
|
||||
The objects served by the document endpoint contain the following
|
||||
fields:
|
||||
|
||||
- `id`: ID of the document. Read-only.
|
||||
- `title`: Title of the document.
|
||||
- `content`: Plain text content of the document.
|
||||
- `tags`: List of IDs of tags assigned to this document, or empty
|
||||
list.
|
||||
- `document_type`: Document type of this document, or null.
|
||||
- `correspondent`: Correspondent of this document or null.
|
||||
- `created`: The date time at which this document was created.
|
||||
- `created_date`: The date (YYYY-MM-DD) at which this document was
|
||||
created. Optional. If also passed with created, this is ignored.
|
||||
- `modified`: The date at which this document was last edited in
|
||||
paperless. Read-only.
|
||||
- `added`: The date at which this document was added to paperless.
|
||||
Read-only.
|
||||
- `archive_serial_number`: The identifier of this document in a
|
||||
physical document archive.
|
||||
- `original_file_name`: Verbose filename of the original document.
|
||||
Read-only.
|
||||
- `archived_file_name`: Verbose filename of the archived document.
|
||||
Read-only. Null if no archived document is available.
|
||||
|
||||
## Downloading documents
|
||||
|
||||
In addition to that, the document endpoint offers these additional
|
||||
actions on individual documents:
|
||||
|
||||
- `/api/documents/<pk>/download/`: Download the document.
|
||||
- `/api/documents/<pk>/preview/`: Display the document inline, without
|
||||
downloading it.
|
||||
- `/api/documents/<pk>/thumb/`: Download the PNG thumbnail of a
|
||||
document.
|
||||
|
||||
Paperless generates archived PDF/A documents from consumed files and
|
||||
stores both the original files as well as the archived files. By
|
||||
default, the endpoints for previews and downloads serve the archived
|
||||
file, if it is available. Otherwise, the original file is served. Some
|
||||
document cannot be archived.
|
||||
|
||||
The endpoints correctly serve the response header fields
|
||||
`Content-Disposition` and `Content-Type` to indicate the filename for
|
||||
download and the type of content of the document.
|
||||
|
||||
In order to download or preview the original document when an archived
|
||||
document is available, supply the query parameter `original=true`.
|
||||
|
||||
!!! tip
|
||||
|
||||
Paperless used to provide these functionality at `/fetch/<pk>/preview`,
|
||||
`/fetch/<pk>/thumb` and `/fetch/<pk>/doc`. Redirects to the new URLs are
|
||||
in place. However, if you use these old URLs to access documents, you
|
||||
should update your app or script to use the new URLs.
|
||||
|
||||
## Getting document metadata
|
||||
|
||||
The api also has an endpoint to retrieve read-only metadata about
|
||||
specific documents. this information is not served along with the
|
||||
document objects, since it requires reading files and would therefore
|
||||
slow down document lists considerably.
|
||||
|
||||
Access the metadata of a document with an ID `id` at
|
||||
`/api/documents/<id>/metadata/`.
|
||||
|
||||
The endpoint reports the following data:
|
||||
|
||||
- `original_checksum`: MD5 checksum of the original document.
|
||||
- `original_size`: Size of the original document, in bytes.
|
||||
- `original_mime_type`: Mime type of the original document.
|
||||
- `media_filename`: Current filename of the document, under which it
|
||||
is stored inside the media directory.
|
||||
- `has_archive_version`: True, if this document is archived, false
|
||||
otherwise.
|
||||
- `original_metadata`: A list of metadata associated with the original
|
||||
document. See below.
|
||||
- `archive_checksum`: MD5 checksum of the archived document, or null.
|
||||
- `archive_size`: Size of the archived document in bytes, or null.
|
||||
- `archive_metadata`: Metadata associated with the archived document,
|
||||
or null. See below.
|
||||
|
||||
File metadata is reported as a list of objects in the following form:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"namespace": "http://ns.adobe.com/pdf/1.3/",
|
||||
"prefix": "pdf",
|
||||
"key": "Producer",
|
||||
"value": "SparklePDF, Fancy edition"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
`namespace` and `prefix` can be null. The actual metadata reported
|
||||
depends on the file type and the metadata available in that specific
|
||||
document. Paperless only reports PDF metadata at this point.
|
||||
|
||||
## Authorization
|
||||
|
||||
The REST api provides three different forms of authentication.
|
||||
|
||||
1. Basic authentication
|
||||
|
||||
Authorize by providing a HTTP header in the form
|
||||
|
||||
```
|
||||
Authorization: Basic <credentials>
|
||||
```
|
||||
|
||||
where `credentials` is a base64-encoded string of
|
||||
`<username>:<password>`
|
||||
|
||||
2. Session authentication
|
||||
|
||||
When you're logged into paperless in your browser, you're
|
||||
automatically logged into the API as well and don't need to provide
|
||||
any authorization headers.
|
||||
|
||||
3. Token authentication
|
||||
|
||||
Paperless also offers an endpoint to acquire authentication tokens.
|
||||
|
||||
POST a username and password as a form or json string to
|
||||
`/api/token/` and paperless will respond with a token, if the login
|
||||
data is correct. This token can be used to authenticate other
|
||||
requests with the following HTTP header:
|
||||
|
||||
```
|
||||
Authorization: Token <token>
|
||||
```
|
||||
|
||||
Tokens can be managed and revoked in the paperless admin.
|
||||
|
||||
## Searching for documents
|
||||
|
||||
Full text searching is available on the `/api/documents/` endpoint. Two
|
||||
specific query parameters cause the API to return full text search
|
||||
results:
|
||||
|
||||
- `/api/documents/?query=your%20search%20query`: Search for a document
|
||||
using a full text query. For details on the syntax, see [Basic Usage - Searching](/usage#basic-usage_searching).
|
||||
- `/api/documents/?more_like=1234`: Search for documents similar to
|
||||
the document with id 1234.
|
||||
|
||||
Pagination works exactly the same as it does for normal requests on this
|
||||
endpoint.
|
||||
|
||||
Certain limitations apply to full text queries:
|
||||
|
||||
- Results are always sorted by search score. The results matching the
|
||||
query best will show up first.
|
||||
- Only a small subset of filtering parameters are supported.
|
||||
|
||||
Furthermore, each returned document has an additional `__search_hit__`
|
||||
attribute with various information about the search results:
|
||||
|
||||
```
|
||||
{
|
||||
"count": 31,
|
||||
"next": "http://localhost:8000/api/documents/?page=2&query=test",
|
||||
"previous": null,
|
||||
"results": [
|
||||
|
||||
...
|
||||
|
||||
{
|
||||
"id": 123,
|
||||
"title": "title",
|
||||
"content": "content",
|
||||
|
||||
...
|
||||
|
||||
"__search_hit__": {
|
||||
"score": 0.343,
|
||||
"highlights": "text <span class="match">Test</span> text",
|
||||
"rank": 23
|
||||
}
|
||||
},
|
||||
|
||||
...
|
||||
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
- `score` is an indication how well this document matches the query
|
||||
relative to the other search results.
|
||||
- `highlights` is an excerpt from the document content and highlights
|
||||
the search terms with `<span>` tags as shown above.
|
||||
- `rank` is the index of the search results. The first result will
|
||||
have rank 0.
|
||||
|
||||
### `/api/search/autocomplete/`
|
||||
|
||||
Get auto completions for a partial search term.
|
||||
|
||||
Query parameters:
|
||||
|
||||
- `term`: The incomplete term.
|
||||
- `limit`: Amount of results. Defaults to 10.
|
||||
|
||||
Results returned by the endpoint are ordered by importance of the term
|
||||
in the document index. The first result is the term that has the highest
|
||||
[Tf/Idf](https://en.wikipedia.org/wiki/Tf%E2%80%93idf) score in the index.
|
||||
|
||||
```json
|
||||
["term1", "term3", "term6", "term4"]
|
||||
```
|
||||
|
||||
## POSTing documents {#file-uploads}
|
||||
|
||||
The API provides a special endpoint for file uploads:
|
||||
|
||||
`/api/documents/post_document/`
|
||||
|
||||
POST a multipart form to this endpoint, where the form field `document`
|
||||
contains the document that you want to upload to paperless. The filename
|
||||
is sanitized and then used to store the document in a temporary
|
||||
directory, and the consumer will be instructed to consume the document
|
||||
from there.
|
||||
|
||||
The endpoint supports the following optional form fields:
|
||||
|
||||
- `title`: Specify a title that the consumer should use for the
|
||||
document.
|
||||
- `created`: Specify a DateTime where the document was created (e.g.
|
||||
"2016-04-19" or "2016-04-19 06:15:00+02:00").
|
||||
- `correspondent`: Specify the ID of a correspondent that the consumer
|
||||
should use for the document.
|
||||
- `document_type`: Similar to correspondent.
|
||||
- `tags`: Similar to correspondent. Specify this multiple times to
|
||||
have multiple tags added to the document.
|
||||
|
||||
The endpoint will immediately return "OK" if the document consumption
|
||||
process was started successfully. No additional status information about
|
||||
the consumption process itself is available, since that happens in a
|
||||
different process.
|
||||
|
||||
## API Versioning
|
||||
|
||||
The REST API is versioned since Paperless-ngx 1.3.0.
|
||||
|
||||
- Versioning ensures that changes to the API don't break older
|
||||
clients.
|
||||
- Clients specify the specific version of the API they wish to use
|
||||
with every request and Paperless will handle the request using the
|
||||
specified API version.
|
||||
- Even if the underlying data model changes, older API versions will
|
||||
always serve compatible data.
|
||||
- If no version is specified, Paperless will serve version 1 to ensure
|
||||
compatibility with older clients that do not request a specific API
|
||||
version.
|
||||
|
||||
API versions are specified by submitting an additional HTTP `Accept`
|
||||
header with every request:
|
||||
|
||||
```
|
||||
Accept: application/json; version=6
|
||||
```
|
||||
|
||||
If an invalid version is specified, Paperless 1.3.0 will respond with
|
||||
"406 Not Acceptable" and an error message in the body. Earlier
|
||||
versions of Paperless will serve API version 1 regardless of whether a
|
||||
version is specified via the `Accept` header.
|
||||
|
||||
If a client wishes to verify whether it is compatible with any given
|
||||
server, the following procedure should be performed:
|
||||
|
||||
1. Perform an _authenticated_ request against any API endpoint. If the
|
||||
server is on version 1.3.0 or newer, the server will add two custom
|
||||
headers to the response:
|
||||
|
||||
```
|
||||
X-Api-Version: 2
|
||||
X-Version: 1.3.0
|
||||
```
|
||||
|
||||
2. Determine whether the client is compatible with this server based on
|
||||
the presence/absence of these headers and their values if present.
|
||||
|
||||
### API Changelog
|
||||
|
||||
#### Version 1
|
||||
|
||||
Initial API version.
|
||||
|
||||
#### Version 2
|
||||
|
||||
- Added field `Tag.color`. This read/write string field contains a hex
|
||||
color such as `#a6cee3`.
|
||||
- Added read-only field `Tag.text_color`. This field contains the text
|
||||
color to use for a specific tag, which is either black or white
|
||||
depending on the brightness of `Tag.color`.
|
||||
- Removed field `Tag.colour`.
|
303
docs/api.rst
@@ -1,303 +0,0 @@
|
||||
|
||||
************
|
||||
The REST API
|
||||
************
|
||||
|
||||
|
||||
Paperless makes use of the `Django REST Framework`_ standard API interface.
|
||||
It provides a browsable API for most of its endpoints, which you can inspect
|
||||
at ``http://<paperless-host>:<port>/api/``. This also documents most of the
|
||||
available filters and ordering fields.
|
||||
|
||||
.. _Django REST Framework: http://django-rest-framework.org/
|
||||
|
||||
The API provides 5 main endpoints:
|
||||
|
||||
* ``/api/documents/``: Full CRUD support, except POSTing new documents. See below.
|
||||
* ``/api/correspondents/``: Full CRUD support.
|
||||
* ``/api/document_types/``: Full CRUD support.
|
||||
* ``/api/logs/``: Read-Only.
|
||||
* ``/api/tags/``: Full CRUD support.
|
||||
|
||||
All of these endpoints except for the logging endpoint
|
||||
allow you to fetch, edit and delete individual objects
|
||||
by appending their primary key to the path, for example ``/api/documents/454/``.
|
||||
|
||||
The objects served by the document endpoint contain the following fields:
|
||||
|
||||
* ``id``: ID of the document. Read-only.
|
||||
* ``title``: Title of the document.
|
||||
* ``content``: Plain text content of the document.
|
||||
* ``tags``: List of IDs of tags assigned to this document, or empty list.
|
||||
* ``document_type``: Document type of this document, or null.
|
||||
* ``correspondent``: Correspondent of this document or null.
|
||||
* ``created``: The date time at which this document was created.
|
||||
* ``created_date``: The date (YYYY-MM-DD) at which this document was created. Optional. If also passed with created, this is ignored.
|
||||
* ``modified``: The date at which this document was last edited in paperless. Read-only.
|
||||
* ``added``: The date at which this document was added to paperless. Read-only.
|
||||
* ``archive_serial_number``: The identifier of this document in a physical document archive.
|
||||
* ``original_file_name``: Verbose filename of the original document. Read-only.
|
||||
* ``archived_file_name``: Verbose filename of the archived document. Read-only. Null if no archived document is available.
|
||||
|
||||
|
||||
Downloading documents
|
||||
#####################
|
||||
|
||||
In addition to that, the document endpoint offers these additional actions on
|
||||
individual documents:
|
||||
|
||||
* ``/api/documents/<pk>/download/``: Download the document.
|
||||
* ``/api/documents/<pk>/preview/``: Display the document inline,
|
||||
without downloading it.
|
||||
* ``/api/documents/<pk>/thumb/``: Download the PNG thumbnail of a document.
|
||||
|
||||
Paperless generates archived PDF/A documents from consumed files and stores both
|
||||
the original files as well as the archived files. By default, the endpoints
|
||||
for previews and downloads serve the archived file, if it is available.
|
||||
Otherwise, the original file is served.
|
||||
Some document cannot be archived.
|
||||
|
||||
The endpoints correctly serve the response header fields ``Content-Disposition``
|
||||
and ``Content-Type`` to indicate the filename for download and the type of content of
|
||||
the document.
|
||||
|
||||
In order to download or preview the original document when an archived document is available,
|
||||
supply the query parameter ``original=true``.
|
||||
|
||||
.. hint::
|
||||
|
||||
Paperless used to provide these functionality at ``/fetch/<pk>/preview``,
|
||||
``/fetch/<pk>/thumb`` and ``/fetch/<pk>/doc``. Redirects to the new URLs
|
||||
are in place. However, if you use these old URLs to access documents, you
|
||||
should update your app or script to use the new URLs.
|
||||
|
||||
|
||||
Getting document metadata
|
||||
#########################
|
||||
|
||||
The api also has an endpoint to retrieve read-only metadata about specific documents. this
|
||||
information is not served along with the document objects, since it requires reading
|
||||
files and would therefore slow down document lists considerably.
|
||||
|
||||
Access the metadata of a document with an ID ``id`` at ``/api/documents/<id>/metadata/``.
|
||||
|
||||
The endpoint reports the following data:
|
||||
|
||||
* ``original_checksum``: MD5 checksum of the original document.
|
||||
* ``original_size``: Size of the original document, in bytes.
|
||||
* ``original_mime_type``: Mime type of the original document.
|
||||
* ``media_filename``: Current filename of the document, under which it is stored inside the media directory.
|
||||
* ``has_archive_version``: True, if this document is archived, false otherwise.
|
||||
* ``original_metadata``: A list of metadata associated with the original document. See below.
|
||||
* ``archive_checksum``: MD5 checksum of the archived document, or null.
|
||||
* ``archive_size``: Size of the archived document in bytes, or null.
|
||||
* ``archive_metadata``: Metadata associated with the archived document, or null. See below.
|
||||
|
||||
File metadata is reported as a list of objects in the following form:
|
||||
|
||||
.. code:: json
|
||||
|
||||
[
|
||||
{
|
||||
"namespace": "http://ns.adobe.com/pdf/1.3/",
|
||||
"prefix": "pdf",
|
||||
"key": "Producer",
|
||||
"value": "SparklePDF, Fancy edition"
|
||||
},
|
||||
]
|
||||
|
||||
``namespace`` and ``prefix`` can be null. The actual metadata reported depends on the file type and the metadata
|
||||
available in that specific document. Paperless only reports PDF metadata at this point.
|
||||
|
||||
Authorization
|
||||
#############
|
||||
|
||||
The REST api provides three different forms of authentication.
|
||||
|
||||
1. Basic authentication
|
||||
|
||||
Authorize by providing a HTTP header in the form
|
||||
|
||||
.. code::
|
||||
|
||||
Authorization: Basic <credentials>
|
||||
|
||||
where ``credentials`` is a base64-encoded string of ``<username>:<password>``
|
||||
|
||||
2. Session authentication
|
||||
|
||||
When you're logged into paperless in your browser, you're automatically
|
||||
logged into the API as well and don't need to provide any authorization
|
||||
headers.
|
||||
|
||||
3. Token authentication
|
||||
|
||||
Paperless also offers an endpoint to acquire authentication tokens.
|
||||
|
||||
POST a username and password as a form or json string to ``/api/token/``
|
||||
and paperless will respond with a token, if the login data is correct.
|
||||
This token can be used to authenticate other requests with the
|
||||
following HTTP header:
|
||||
|
||||
.. code::
|
||||
|
||||
Authorization: Token <token>
|
||||
|
||||
Tokens can be managed and revoked in the paperless admin.
|
||||
|
||||
Searching for documents
|
||||
#######################
|
||||
|
||||
Full text searching is available on the ``/api/documents/`` endpoint. Two specific
|
||||
query parameters cause the API to return full text search results:
|
||||
|
||||
* ``/api/documents/?query=your%20search%20query``: Search for a document using a full text query.
|
||||
For details on the syntax, see :ref:`basic-usage_searching`.
|
||||
|
||||
* ``/api/documents/?more_like=1234``: Search for documents similar to the document with id 1234.
|
||||
|
||||
Pagination works exactly the same as it does for normal requests on this endpoint.
|
||||
|
||||
Certain limitations apply to full text queries:
|
||||
|
||||
* Results are always sorted by search score. The results matching the query best will show up first.
|
||||
|
||||
* Only a small subset of filtering parameters are supported.
|
||||
|
||||
Furthermore, each returned document has an additional ``__search_hit__`` attribute with various information
|
||||
about the search results:
|
||||
|
||||
.. code::
|
||||
|
||||
{
|
||||
"count": 31,
|
||||
"next": "http://localhost:8000/api/documents/?page=2&query=test",
|
||||
"previous": null,
|
||||
"results": [
|
||||
|
||||
...
|
||||
|
||||
{
|
||||
"id": 123,
|
||||
"title": "title",
|
||||
"content": "content",
|
||||
|
||||
...
|
||||
|
||||
"__search_hit__": {
|
||||
"score": 0.343,
|
||||
"highlights": "text <span class=\"match\">Test</span> text",
|
||||
"rank": 23
|
||||
}
|
||||
},
|
||||
|
||||
...
|
||||
|
||||
]
|
||||
}
|
||||
|
||||
* ``score`` is an indication how well this document matches the query relative to the other search results.
|
||||
* ``highlights`` is an excerpt from the document content and highlights the search terms with ``<span>`` tags as shown above.
|
||||
* ``rank`` is the index of the search results. The first result will have rank 0.
|
||||
|
||||
``/api/search/autocomplete/``
|
||||
=============================
|
||||
|
||||
Get auto completions for a partial search term.
|
||||
|
||||
Query parameters:
|
||||
|
||||
* ``term``: The incomplete term.
|
||||
* ``limit``: Amount of results. Defaults to 10.
|
||||
|
||||
Results returned by the endpoint are ordered by importance of the term in the
|
||||
document index. The first result is the term that has the highest Tf/Idf score
|
||||
in the index.
|
||||
|
||||
.. code:: json
|
||||
|
||||
[
|
||||
"term1",
|
||||
"term3",
|
||||
"term6",
|
||||
"term4"
|
||||
]
|
||||
|
||||
|
||||
.. _api-file_uploads:
|
||||
|
||||
POSTing documents
|
||||
#################
|
||||
|
||||
The API provides a special endpoint for file uploads:
|
||||
|
||||
``/api/documents/post_document/``
|
||||
|
||||
POST a multipart form to this endpoint, where the form field ``document`` contains
|
||||
the document that you want to upload to paperless. The filename is sanitized and
|
||||
then used to store the document in a temporary directory, and the consumer will
|
||||
be instructed to consume the document from there.
|
||||
|
||||
The endpoint supports the following optional form fields:
|
||||
|
||||
* ``title``: Specify a title that the consumer should use for the document.
|
||||
* ``created``: Specify a DateTime where the document was created (e.g. "2016-04-19" or "2016-04-19 06:15:00+02:00").
|
||||
* ``correspondent``: Specify the ID of a correspondent that the consumer should use for the document.
|
||||
* ``document_type``: Similar to correspondent.
|
||||
* ``tags``: Similar to correspondent. Specify this multiple times to have multiple tags added
|
||||
to the document.
|
||||
|
||||
|
||||
The endpoint will immediately return "OK" if the document consumption process
|
||||
was started successfully. No additional status information about the consumption
|
||||
process itself is available, since that happens in a different process.
|
||||
|
||||
|
||||
.. _api-versioning:
|
||||
|
||||
API Versioning
|
||||
##############
|
||||
|
||||
The REST API is versioned since Paperless-ngx 1.3.0.
|
||||
|
||||
* Versioning ensures that changes to the API don't break older clients.
|
||||
* Clients specify the specific version of the API they wish to use with every request and Paperless will handle the request using the specified API version.
|
||||
* Even if the underlying data model changes, older API versions will always serve compatible data.
|
||||
* If no version is specified, Paperless will serve version 1 to ensure compatibility with older clients that do not request a specific API version.
|
||||
|
||||
API versions are specified by submitting an additional HTTP ``Accept`` header with every request:
|
||||
|
||||
.. code::
|
||||
|
||||
Accept: application/json; version=6
|
||||
|
||||
If an invalid version is specified, Paperless 1.3.0 will respond with "406 Not Acceptable" and an error message in the body.
|
||||
Earlier versions of Paperless will serve API version 1 regardless of whether a version is specified via the ``Accept`` header.
|
||||
|
||||
If a client wishes to verify whether it is compatible with any given server, the following procedure should be performed:
|
||||
|
||||
1. Perform an *authenticated* request against any API endpoint. If the server is on version 1.3.0 or newer, the server will
|
||||
add two custom headers to the response:
|
||||
|
||||
.. code::
|
||||
|
||||
X-Api-Version: 2
|
||||
X-Version: 1.3.0
|
||||
|
||||
2. Determine whether the client is compatible with this server based on the presence/absence of these headers and their values if present.
|
||||
|
||||
|
||||
API Changelog
|
||||
=============
|
||||
|
||||
Version 1
|
||||
---------
|
||||
|
||||
Initial API version.
|
||||
|
||||
Version 2
|
||||
---------
|
||||
|
||||
* Added field ``Tag.color``. This read/write string field contains a hex color such as ``#a6cee3``.
|
||||
* Added read-only field ``Tag.text_color``. This field contains the text color to use for a specific tag, which is either black or white depending on the brightness of ``Tag.color``.
|
||||
* Removed field ``Tag.colour``.
|
36
docs/assets/extra.css
Normal file
@@ -0,0 +1,36 @@
|
||||
:root > * {
|
||||
--md-primary-fg-color: #17541f;
|
||||
--md-primary-fg-color--dark: #17541f;
|
||||
--md-primary-fg-color--light: #17541f;
|
||||
--md-accent-fg-color: #2b8a38;
|
||||
--md-typeset-a-color: #21652a;
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] {
|
||||
--md-hue: 222;
|
||||
}
|
||||
|
||||
@media (min-width: 400px) {
|
||||
.grid-left {
|
||||
width: 33%;
|
||||
float: left;
|
||||
}
|
||||
.grid-right {
|
||||
width: 62%;
|
||||
margin-left: 4%;
|
||||
float: left;
|
||||
}
|
||||
}
|
||||
|
||||
.grid-left > p {
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
|
||||
.grid-right p {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.index-callout {
|
||||
margin-right: .5rem;
|
||||
}
|
BIN
docs/assets/favicon.png
Normal file
After Width: | Height: | Size: 768 B |
12
docs/assets/logo.svg
Normal file
@@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Generator: Adobe Illustrator 27.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
|
||||
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
|
||||
viewBox="0 0 1000 1000" style="enable-background:new 0 0 1000 1000;" xml:space="preserve">
|
||||
<style type="text/css">
|
||||
.st0{fill:#FFFFFF;}
|
||||
</style>
|
||||
<path class="st0" d="M299,891.7c-4.2-19.8-12.5-59.6-13.6-59.6c-176.7-105.7-155.8-288.7-97.3-393.4
|
||||
c12.5,131.8,245.8,222.8,109.8,383.9c-1.1,2,6.2,27.2,12.5,50.2c27.2-46,68-101.4,65.8-106.7C208.9,358.2,731.9,326.9,840.6,73.7
|
||||
c49.1,244.8-25.1,623.5-445.5,719.7c-2,1.1-76.3,131.8-79.5,132.9c0-2-31.4-1.1-27.2-11.5C290.7,908.4,294.8,900.1,299,891.7
|
||||
L299,891.7z M293.8,793.4c53.3-61.8-9.4-167.4-47.1-201.9C310.5,701.3,306.3,765.1,293.8,793.4L293.8,793.4z"/>
|
||||
</svg>
|
After Width: | Height: | Size: 869 B |
68
docs/assets/logo_full_black.svg
Normal file
@@ -0,0 +1,68 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Generator: Adobe Illustrator 27.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
|
||||
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
|
||||
viewBox="0 0 2962.2 860.2" style="enable-background:new 0 0 2962.2 860.2;" xml:space="preserve">
|
||||
<style type="text/css">
|
||||
.st0{fill:#17541F;stroke:#000000;stroke-miterlimit:10;}
|
||||
</style>
|
||||
<path d="M1055.6,639.7v-20.6c-18,20-43.1,30.1-75.4,30.1c-22.4,0-42.8-5.8-61-17.5c-18.3-11.7-32.5-27.8-42.9-48.3
|
||||
c-10.3-20.5-15.5-43.3-15.5-68.4c0-25.1,5.2-48,15.5-68.5s24.6-36.6,42.9-48.3s38.6-17.5,61-17.5c32.3,0,57.5,10,75.4,30.1v-20.6
|
||||
h85.3v249.6L1055.6,639.7L1055.6,639.7z M1059.1,514.9c0-17.4-5.2-31.9-15.5-43.8c-10.3-11.8-23.9-17.7-40.6-17.7
|
||||
c-16.8,0-30.2,5.9-40.4,17.7c-10.2,11.8-15.3,26.4-15.3,43.8c0,17.4,5.1,31.9,15.3,43.8c10.2,11.8,23.6,17.7,40.4,17.7
|
||||
c16.8,0,30.3-5.9,40.6-17.7C1054,546.9,1059.1,532.3,1059.1,514.9z"/>
|
||||
<path d="M1417.8,398.2c18.3,11.7,32.5,27.8,42.9,48.3c10.3,20.5,15.5,43.3,15.5,68.5c0,25.1-5.2,48-15.5,68.4
|
||||
c-10.3,20.5-24.6,36.6-42.9,48.3s-38.6,17.5-61,17.5c-32.3,0-57.5-10-75.4-30.1v165.6h-85.3V390.2h85.3v20.6
|
||||
c18-20,43.1-30.1,75.4-30.1C1379.2,380.7,1399.5,386.6,1417.8,398.2z M1389.5,514.9c0-17.4-5.1-31.9-15.3-43.8
|
||||
c-10.2-11.8-23.6-17.7-40.4-17.7s-30.2,5.9-40.4,17.7c-10.2,11.8-15.3,26.4-15.3,43.8c0,17.4,5.1,31.9,15.3,43.8
|
||||
c10.2,11.8,23.6,17.7,40.4,17.7s30.2-5.9,40.4-17.7S1389.5,532.3,1389.5,514.9z"/>
|
||||
<path d="M1713.6,555.3l53,49.4c-28.1,29.6-66.7,44.4-115.8,44.4c-28.1,0-53-5.8-74.5-17.5s-38.2-27.7-49.8-48
|
||||
c-11.7-20.3-17.7-43.2-18-68.7c0-24.8,5.9-47.5,17.7-68c11.8-20.5,28.1-36.7,48.7-48.5s43.5-17.7,68.7-17.7
|
||||
c24.8,0,47.6,6.1,68.2,18.2s37,29.5,49.1,52.3c12.1,22.7,18.2,49.1,18.2,79l-0.4,11.7h-181.8c3.6,11.4,10.5,20.7,20.9,28.1
|
||||
c10.3,7.3,21.3,11,33,11c14.4,0,26.3-2.2,35.7-6.5C1695.8,570.1,1704.9,563.7,1713.6,555.3z M1596.9,486.2h92.9
|
||||
c-2.1-12.3-7.5-22.1-16.2-29.4s-18.7-11-30.1-11s-21.5,3.7-30.3,11S1599,473.9,1596.9,486.2z"/>
|
||||
<path d="M1908.8,418.4c7.8-10.8,17.2-19,28.3-24.7s22-8.5,32.8-8.5c11.4,0,20,1.6,26,4.9l-10.8,72.7c-8.4-2.1-15.7-3.1-22-3.1
|
||||
c-17.1,0-30.4,4.3-39.9,12.8c-9.6,8.5-14.4,24.2-14.4,46.9v120.3h-85.3V390.2h85.3V418.4L1908.8,418.4z"/>
|
||||
<path d="M2113,258.2v381.5h-85.3V258.2H2113z"/>
|
||||
<path d="M2360.8,555.3l53,49.4c-28.1,29.6-66.7,44.4-115.8,44.4c-28.1,0-53-5.8-74.5-17.5s-38.2-27.7-49.8-48
|
||||
c-11.7-20.3-17.7-43.2-18-68.7c0-24.8,5.9-47.5,17.7-68s28.1-36.7,48.7-48.5c20.6-11.8,43.5-17.7,68.7-17.7
|
||||
c24.8,0,47.6,6.1,68.2,18.2c20.6,12.1,37,29.5,49.1,52.3c12.1,22.7,18.2,49.1,18.2,79l-0.4,11.7h-181.8
|
||||
c3.6,11.4,10.5,20.7,20.9,28.1c10.3,7.3,21.3,11,33,11c14.4,0,26.3-2.2,35.7-6.5C2343.1,570.1,2352.1,563.7,2360.8,555.3z
|
||||
M2244.1,486.2h92.9c-2.1-12.3-7.5-22.1-16.2-29.4s-18.7-11-30.1-11s-21.5,3.7-30.3,11C2251.7,464.1,2246.2,473.9,2244.1,486.2z"/>
|
||||
<path d="M2565.9,446.3c-9.9,0-17.1,1.1-21.5,3.4c-4.5,2.2-6.7,5.9-6.7,11s3.4,8.8,10.3,11.2c6.9,2.4,18,4.9,33.2,7.6
|
||||
c20,3,37,6.7,50.9,11.2s26,12.1,36.1,22.9c10.2,10.8,15.3,25.9,15.3,45.3c0,29.9-10.9,52.4-32.8,67.6
|
||||
c-21.8,15.1-50.3,22.7-85.3,22.7c-25.7,0-49.5-3.7-71.4-11c-21.8-7.3-37.4-14.7-46.7-22.2l33.7-60.6c10.2,9,23.4,15.8,39.7,20.4
|
||||
c16.3,4.6,31.3,7,45.1,7c19.7,0,29.6-5.2,29.6-15.7c0-5.4-3.3-9.4-9.9-11.9c-6.6-2.5-17.2-5.2-31.9-7.9c-18.9-3.3-34.9-7.2-48-11.7
|
||||
c-13.2-4.5-24.6-12.2-34.3-23.1c-9.7-10.9-14.6-26-14.6-45.1c0-27.2,9.7-48.5,29-63.7c19.3-15.3,46-22.9,80.1-22.9
|
||||
c23.3,0,44.4,3.6,63.3,10.8c18.9,7.2,34,14.5,45.3,22l-32.8,58.8c-10.8-7.5-23.2-13.7-37.3-18.6
|
||||
C2590.5,448.7,2577.6,446.3,2565.9,446.3z"/>
|
||||
<path d="M2817.3,446.3c-9.9,0-17.1,1.1-21.5,3.4c-4.5,2.2-6.7,5.9-6.7,11s3.4,8.8,10.3,11.2c6.9,2.4,18,4.9,33.2,7.6
|
||||
c20,3,37,6.7,50.9,11.2s26,12.1,36.1,22.9c10.2,10.8,15.3,25.9,15.3,45.3c0,29.9-10.9,52.4-32.8,67.6
|
||||
c-21.8,15.1-50.3,22.7-85.3,22.7c-25.7,0-49.5-3.7-71.4-11c-21.8-7.3-37.4-14.7-46.7-22.2l33.7-60.6c10.2,9,23.4,15.8,39.7,20.4
|
||||
c16.3,4.6,31.3,7,45.1,7c19.8,0,29.6-5.2,29.6-15.7c0-5.4-3.3-9.4-9.9-11.9c-6.6-2.5-17.2-5.2-31.9-7.9c-18.9-3.3-34.9-7.2-48-11.7
|
||||
c-13.2-4.5-24.6-12.2-34.3-23.1c-9.7-10.9-14.6-26-14.6-45.1c0-27.2,9.7-48.5,29-63.7c19.3-15.3,46-22.9,80.1-22.9
|
||||
c23.3,0,44.4,3.6,63.3,10.8c18.9,7.2,34,14.5,45.3,22l-32.8,58.8c-10.8-7.5-23.2-13.7-37.3-18.6
|
||||
C2841.8,448.7,2828.9,446.3,2817.3,446.3z"/>
|
||||
<g>
|
||||
<path d="M2508,724h60.2v17.3H2508V724z"/>
|
||||
<path d="M2629.2,694.4c4.9-2,10.2-3.1,16-3.1c10.9,0,19.5,3.4,25.9,10.2s9.6,16.7,9.6,29.6v57.3h-19.6v-52.6
|
||||
c0-9.3-1.7-16.2-5.1-20.7c-3.4-4.5-9.1-6.7-17-6.7c-6.5,0-11.8,2.4-16.1,7.1c-4.3,4.8-6.4,11.5-6.4,20.2v52.6h-19.6v-94.6h19.6v9.5
|
||||
C2620.2,699.4,2624.4,696.4,2629.2,694.4z"/>
|
||||
<path d="M2790.3,833.2c-8.6,6.8-19.4,10.2-32.3,10.2c-7.9,0-15.2-1.4-21.9-4.1s-12.1-6.8-16.3-12.2s-6.6-11.9-7.1-19.6h19.6
|
||||
c0.7,6.1,3.5,10.8,8.4,13.9c4.9,3.2,10.7,4.8,17.4,4.8c7,0,13.1-2,18.2-6c5.1-4,7.7-10.3,7.7-18.9v-24.7c-3.6,3.4-8,6.2-13.3,8.2
|
||||
c-5.2,2.1-10.7,3.1-16.3,3.1c-8.7,0-16.6-2.1-23.7-6.4c-7.1-4.3-12.6-10-16.7-17.3c-4-7.3-6-15.5-6-24.6s2-17.3,6-24.7
|
||||
s9.6-13.2,16.7-17.4c7.1-4.3,15-6.4,23.7-6.4c5.7,0,11.1,1,16.3,3.1s9.6,4.8,13.3,8.2v-8.8h19.4v107.8
|
||||
C2803.2,815.9,2798.9,826.4,2790.3,833.2z M2782.2,755.7c2.6-4.7,3.8-10,3.8-15.9s-1.3-11.2-3.8-16c-2.6-4.8-6.1-8.5-10.5-11.1
|
||||
c-4.5-2.7-9.5-4-15.1-4c-5.8,0-10.9,1.4-15.4,4.3c-4.5,2.8-7.9,6.6-10.3,11.4c-2.4,4.8-3.6,9.9-3.6,15.5c0,5.4,1.2,10.5,3.6,15.3
|
||||
c2.4,4.8,5.8,8.6,10.3,11.5s9.6,4.3,15.4,4.3c5.6,0,10.6-1.4,15.1-4.1C2776.1,764.1,2779.6,760.4,2782.2,755.7z"/>
|
||||
<path d="M2843.5,788.4h-21.6l37.9-48l-36.4-46.6h22.6l25.7,33.3l25.8-33.3h21.6l-36.2,45.9l37.9,48.6h-22.6l-27.4-35L2843.5,788.4z
|
||||
"/>
|
||||
</g>
|
||||
<path d="M835.8,319.2c-11.5-18.9-27.4-33.7-47.6-44.7c-20.2-10.9-43-16.4-68.5-16.4h-90.6c-8.6,39.6-21.3,77.2-38,112.4
|
||||
c-10,21-21.3,41-33.9,59.9v209.2H647v-135h72.7c25.4,0,48.3-5.5,68.5-16.4s36.1-25.8,47.6-44.7c11.5-18.9,17.3-39.5,17.3-61.9
|
||||
C853.1,358.9,847.4,338.1,835.8,319.2z M747,416.6c-9.4,9-21.8,13.5-37,13.5l-62.8,0.4v-93.4l62.8-0.4c15.3,0,27.6,4.5,37,13.5
|
||||
s14.1,20,14.1,33.2C761.1,396.6,756.4,407.7,747,416.6z"/>
|
||||
<path class="st0" d="M164.7,698.7c-3.5-16.5-10.4-49.6-11.3-49.6c-147.1-88-129.7-240.3-81-327.4C82.8,431.4,277,507.1,163.8,641.2
|
||||
c-0.9,1.7,5.2,22.6,10.4,41.8c22.6-38.3,56.6-84.4,54.8-88.8C89.7,254.7,525,228.6,615.5,17.9c40.9,203.7-20.9,518.9-370.8,599
|
||||
c-1.7,0.9-63.5,109.7-66.2,110.6c0-1.7-26.1-0.9-22.6-9.6C157.8,712.6,161.2,705.7,164.7,698.7L164.7,698.7z M160.4,616.9
|
||||
c44.4-51.4-7.8-139.3-39.2-168C174.3,540.2,170.8,593.3,160.4,616.9L160.4,616.9z"/>
|
||||
</svg>
|
After Width: | Height: | Size: 6.3 KiB |
69
docs/assets/logo_full_white.svg
Normal file
@@ -0,0 +1,69 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Generator: Adobe Illustrator 27.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
|
||||
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
|
||||
viewBox="0 0 2962.2 860.2" style="enable-background:new 0 0 2962.2 860.2;" xml:space="preserve">
|
||||
<style type="text/css">
|
||||
.st0{fill:#FFFFFF;stroke:#000000;stroke-miterlimit:10;}
|
||||
.st1{fill:#17541F;stroke:#000000;stroke-miterlimit:10;}
|
||||
</style>
|
||||
<path class="st0" d="M1055.6,639.7v-20.6c-18,20-43.1,30.1-75.4,30.1c-22.4,0-42.8-5.8-61-17.5c-18.3-11.7-32.5-27.8-42.9-48.3
|
||||
c-10.3-20.5-15.5-43.3-15.5-68.4c0-25.1,5.2-48,15.5-68.5s24.6-36.6,42.9-48.3s38.6-17.5,61-17.5c32.3,0,57.5,10,75.4,30.1v-20.6
|
||||
h85.3v249.6L1055.6,639.7L1055.6,639.7z M1059.1,514.9c0-17.4-5.2-31.9-15.5-43.8c-10.3-11.8-23.9-17.7-40.6-17.7
|
||||
c-16.8,0-30.2,5.9-40.4,17.7c-10.2,11.8-15.3,26.4-15.3,43.8c0,17.4,5.1,31.9,15.3,43.8c10.2,11.8,23.6,17.7,40.4,17.7
|
||||
c16.8,0,30.3-5.9,40.6-17.7C1054,546.9,1059.1,532.3,1059.1,514.9z"/>
|
||||
<path class="st0" d="M1417.8,398.2c18.3,11.7,32.5,27.8,42.9,48.3c10.3,20.5,15.5,43.3,15.5,68.5c0,25.1-5.2,48-15.5,68.4
|
||||
c-10.3,20.5-24.6,36.6-42.9,48.3s-38.6,17.5-61,17.5c-32.3,0-57.5-10-75.4-30.1v165.6h-85.3V390.2h85.3v20.6
|
||||
c18-20,43.1-30.1,75.4-30.1C1379.2,380.7,1399.5,386.6,1417.8,398.2z M1389.5,514.9c0-17.4-5.1-31.9-15.3-43.8
|
||||
c-10.2-11.8-23.6-17.7-40.4-17.7s-30.2,5.9-40.4,17.7c-10.2,11.8-15.3,26.4-15.3,43.8c0,17.4,5.1,31.9,15.3,43.8
|
||||
c10.2,11.8,23.6,17.7,40.4,17.7s30.2-5.9,40.4-17.7S1389.5,532.3,1389.5,514.9z"/>
|
||||
<path class="st0" d="M1713.6,555.3l53,49.4c-28.1,29.6-66.7,44.4-115.8,44.4c-28.1,0-53-5.8-74.5-17.5s-38.2-27.7-49.8-48
|
||||
c-11.7-20.3-17.7-43.2-18-68.7c0-24.8,5.9-47.5,17.7-68c11.8-20.5,28.1-36.7,48.7-48.5s43.5-17.7,68.7-17.7
|
||||
c24.8,0,47.6,6.1,68.2,18.2s37,29.5,49.1,52.3c12.1,22.7,18.2,49.1,18.2,79l-0.4,11.7h-181.8c3.6,11.4,10.5,20.7,20.9,28.1
|
||||
c10.3,7.3,21.3,11,33,11c14.4,0,26.3-2.2,35.7-6.5C1695.8,570.1,1704.9,563.7,1713.6,555.3z M1596.9,486.2h92.9
|
||||
c-2.1-12.3-7.5-22.1-16.2-29.4s-18.7-11-30.1-11s-21.5,3.7-30.3,11S1599,473.9,1596.9,486.2z"/>
|
||||
<path class="st0" d="M1908.8,418.4c7.8-10.8,17.2-19,28.3-24.7s22-8.5,32.8-8.5c11.4,0,20,1.6,26,4.9l-10.8,72.7
|
||||
c-8.4-2.1-15.7-3.1-22-3.1c-17.1,0-30.4,4.3-39.9,12.8c-9.6,8.5-14.4,24.2-14.4,46.9v120.3h-85.3V390.2h85.3V418.4L1908.8,418.4z"/>
|
||||
<path class="st0" d="M2113,258.2v381.5h-85.3V258.2H2113z"/>
|
||||
<path class="st0" d="M2360.8,555.3l53,49.4c-28.1,29.6-66.7,44.4-115.8,44.4c-28.1,0-53-5.8-74.5-17.5s-38.2-27.7-49.8-48
|
||||
c-11.7-20.3-17.7-43.2-18-68.7c0-24.8,5.9-47.5,17.7-68s28.1-36.7,48.7-48.5c20.6-11.8,43.5-17.7,68.7-17.7
|
||||
c24.8,0,47.6,6.1,68.2,18.2c20.6,12.1,37,29.5,49.1,52.3c12.1,22.7,18.2,49.1,18.2,79l-0.4,11.7h-181.8
|
||||
c3.6,11.4,10.5,20.7,20.9,28.1c10.3,7.3,21.3,11,33,11c14.4,0,26.3-2.2,35.7-6.5C2343.1,570.1,2352.1,563.7,2360.8,555.3z
|
||||
M2244.1,486.2h92.9c-2.1-12.3-7.5-22.1-16.2-29.4s-18.7-11-30.1-11s-21.5,3.7-30.3,11C2251.7,464.1,2246.2,473.9,2244.1,486.2z"/>
|
||||
<path class="st0" d="M2565.9,446.3c-9.9,0-17.1,1.1-21.5,3.4c-4.5,2.2-6.7,5.9-6.7,11s3.4,8.8,10.3,11.2c6.9,2.4,18,4.9,33.2,7.6
|
||||
c20,3,37,6.7,50.9,11.2s26,12.1,36.1,22.9c10.2,10.8,15.3,25.9,15.3,45.3c0,29.9-10.9,52.4-32.8,67.6
|
||||
c-21.8,15.1-50.3,22.7-85.3,22.7c-25.7,0-49.5-3.7-71.4-11c-21.8-7.3-37.4-14.7-46.7-22.2l33.7-60.6c10.2,9,23.4,15.8,39.7,20.4
|
||||
c16.3,4.6,31.3,7,45.1,7c19.7,0,29.6-5.2,29.6-15.7c0-5.4-3.3-9.4-9.9-11.9c-6.6-2.5-17.2-5.2-31.9-7.9c-18.9-3.3-34.9-7.2-48-11.7
|
||||
c-13.2-4.5-24.6-12.2-34.3-23.1c-9.7-10.9-14.6-26-14.6-45.1c0-27.2,9.7-48.5,29-63.7c19.3-15.3,46-22.9,80.1-22.9
|
||||
c23.3,0,44.4,3.6,63.3,10.8c18.9,7.2,34,14.5,45.3,22l-32.8,58.8c-10.8-7.5-23.2-13.7-37.3-18.6
|
||||
C2590.5,448.7,2577.6,446.3,2565.9,446.3z"/>
|
||||
<path class="st0" d="M2817.3,446.3c-9.9,0-17.1,1.1-21.5,3.4c-4.5,2.2-6.7,5.9-6.7,11s3.4,8.8,10.3,11.2c6.9,2.4,18,4.9,33.2,7.6
|
||||
c20,3,37,6.7,50.9,11.2s26,12.1,36.1,22.9c10.2,10.8,15.3,25.9,15.3,45.3c0,29.9-10.9,52.4-32.8,67.6
|
||||
c-21.8,15.1-50.3,22.7-85.3,22.7c-25.7,0-49.5-3.7-71.4-11c-21.8-7.3-37.4-14.7-46.7-22.2l33.7-60.6c10.2,9,23.4,15.8,39.7,20.4
|
||||
c16.3,4.6,31.3,7,45.1,7c19.8,0,29.6-5.2,29.6-15.7c0-5.4-3.3-9.4-9.9-11.9c-6.6-2.5-17.2-5.2-31.9-7.9c-18.9-3.3-34.9-7.2-48-11.7
|
||||
c-13.2-4.5-24.6-12.2-34.3-23.1c-9.7-10.9-14.6-26-14.6-45.1c0-27.2,9.7-48.5,29-63.7c19.3-15.3,46-22.9,80.1-22.9
|
||||
c23.3,0,44.4,3.6,63.3,10.8c18.9,7.2,34,14.5,45.3,22l-32.8,58.8c-10.8-7.5-23.2-13.7-37.3-18.6
|
||||
C2841.8,448.7,2828.9,446.3,2817.3,446.3z"/>
|
||||
<g>
|
||||
<path class="st0" d="M2508,724h60.2v17.3H2508V724z"/>
|
||||
<path class="st0" d="M2629.2,694.4c4.9-2,10.2-3.1,16-3.1c10.9,0,19.5,3.4,25.9,10.2s9.6,16.7,9.6,29.6v57.3h-19.6v-52.6
|
||||
c0-9.3-1.7-16.2-5.1-20.7c-3.4-4.5-9.1-6.7-17-6.7c-6.5,0-11.8,2.4-16.1,7.1c-4.3,4.8-6.4,11.5-6.4,20.2v52.6h-19.6v-94.6h19.6v9.5
|
||||
C2620.2,699.4,2624.4,696.4,2629.2,694.4z"/>
|
||||
<path class="st0" d="M2790.3,833.2c-8.6,6.8-19.4,10.2-32.3,10.2c-7.9,0-15.2-1.4-21.9-4.1s-12.1-6.8-16.3-12.2s-6.6-11.9-7.1-19.6
|
||||
h19.6c0.7,6.1,3.5,10.8,8.4,13.9c4.9,3.2,10.7,4.8,17.4,4.8c7,0,13.1-2,18.2-6c5.1-4,7.7-10.3,7.7-18.9v-24.7
|
||||
c-3.6,3.4-8,6.2-13.3,8.2c-5.2,2.1-10.7,3.1-16.3,3.1c-8.7,0-16.6-2.1-23.7-6.4c-7.1-4.3-12.6-10-16.7-17.3c-4-7.3-6-15.5-6-24.6
|
||||
s2-17.3,6-24.7s9.6-13.2,16.7-17.4c7.1-4.3,15-6.4,23.7-6.4c5.7,0,11.1,1,16.3,3.1s9.6,4.8,13.3,8.2v-8.8h19.4v107.8
|
||||
C2803.2,815.9,2798.9,826.4,2790.3,833.2z M2782.2,755.7c2.6-4.7,3.8-10,3.8-15.9s-1.3-11.2-3.8-16c-2.6-4.8-6.1-8.5-10.5-11.1
|
||||
c-4.5-2.7-9.5-4-15.1-4c-5.8,0-10.9,1.4-15.4,4.3c-4.5,2.8-7.9,6.6-10.3,11.4c-2.4,4.8-3.6,9.9-3.6,15.5c0,5.4,1.2,10.5,3.6,15.3
|
||||
c2.4,4.8,5.8,8.6,10.3,11.5s9.6,4.3,15.4,4.3c5.6,0,10.6-1.4,15.1-4.1C2776.1,764.1,2779.6,760.4,2782.2,755.7z"/>
|
||||
<path class="st0" d="M2843.5,788.4h-21.6l37.9-48l-36.4-46.6h22.6l25.7,33.3l25.8-33.3h21.6l-36.2,45.9l37.9,48.6h-22.6l-27.4-35
|
||||
L2843.5,788.4z"/>
|
||||
</g>
|
||||
<path class="st0" d="M835.8,319.2c-11.5-18.9-27.4-33.7-47.6-44.7c-20.2-10.9-43-16.4-68.5-16.4h-90.6c-8.6,39.6-21.3,77.2-38,112.4
|
||||
c-10,21-21.3,41-33.9,59.9v209.2H647v-135h72.7c25.4,0,48.3-5.5,68.5-16.4s36.1-25.8,47.6-44.7c11.5-18.9,17.3-39.5,17.3-61.9
|
||||
C853.1,358.9,847.4,338.1,835.8,319.2z M747,416.6c-9.4,9-21.8,13.5-37,13.5l-62.8,0.4v-93.4l62.8-0.4c15.3,0,27.6,4.5,37,13.5
|
||||
s14.1,20,14.1,33.2C761.1,396.6,756.4,407.7,747,416.6z"/>
|
||||
<path class="st1" d="M164.7,698.7c-3.5-16.5-10.4-49.6-11.3-49.6c-147.1-88-129.7-240.3-81-327.4C82.8,431.4,277,507.1,163.8,641.2
|
||||
c-0.9,1.7,5.2,22.6,10.4,41.8c22.6-38.3,56.6-84.4,54.8-88.8C89.7,254.7,525,228.6,615.5,17.9c40.9,203.7-20.9,518.9-370.8,599
|
||||
c-1.7,0.9-63.5,109.7-66.2,110.6c0-1.7-26.1-0.9-22.6-9.6C157.8,712.6,161.2,705.7,164.7,698.7L164.7,698.7z M160.4,616.9
|
||||
c44.4-51.4-7.8-139.3-39.2-168C174.3,540.2,170.8,593.3,160.4,616.9L160.4,616.9z"/>
|
||||
</svg>
|
After Width: | Height: | Size: 6.5 KiB |
Before Width: | Height: | Size: 67 KiB After Width: | Height: | Size: 67 KiB |
Before Width: | Height: | Size: 661 KiB After Width: | Height: | Size: 661 KiB |
Before Width: | Height: | Size: 457 KiB After Width: | Height: | Size: 457 KiB |
Before Width: | Height: | Size: 436 KiB After Width: | Height: | Size: 436 KiB |
Before Width: | Height: | Size: 462 KiB After Width: | Height: | Size: 462 KiB |
Before Width: | Height: | Size: 608 KiB After Width: | Height: | Size: 608 KiB |
Before Width: | Height: | Size: 698 KiB After Width: | Height: | Size: 698 KiB |
Before Width: | Height: | Size: 706 KiB After Width: | Height: | Size: 706 KiB |
Before Width: | Height: | Size: 480 KiB After Width: | Height: | Size: 480 KiB |
Before Width: | Height: | Size: 680 KiB After Width: | Height: | Size: 680 KiB |
Before Width: | Height: | Size: 686 KiB After Width: | Height: | Size: 686 KiB |
Before Width: | Height: | Size: 848 KiB After Width: | Height: | Size: 848 KiB |
Before Width: | Height: | Size: 703 KiB After Width: | Height: | Size: 703 KiB |
BIN
docs/assets/screenshots/mail-rules-edited.png
Normal file
After Width: | Height: | Size: 76 KiB |
Before Width: | Height: | Size: 388 KiB After Width: | Height: | Size: 388 KiB |
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
Before Width: | Height: | Size: 54 KiB After Width: | Height: | Size: 54 KiB |
Before Width: | Height: | Size: 517 KiB After Width: | Height: | Size: 517 KiB |
337
docs/conf.py
@@ -1,337 +0,0 @@
|
||||
import sphinx_rtd_theme
|
||||
|
||||
|
||||
__version__ = None
|
||||
__full_version_str__ = None
|
||||
__major_minor_version_str__ = None
|
||||
exec(open("../src/paperless/version.py").read())
|
||||
|
||||
|
||||
extensions = [
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.intersphinx",
|
||||
"sphinx.ext.todo",
|
||||
"sphinx.ext.imgmath",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinx_rtd_theme",
|
||||
"myst_parser",
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ["_templates"]
|
||||
|
||||
# The suffix of source filenames.
|
||||
source_suffix = {
|
||||
".rst": "restructuredtext",
|
||||
".md": "markdown",
|
||||
}
|
||||
|
||||
# The encoding of source files.
|
||||
# source_encoding = 'utf-8-sig'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = "index"
|
||||
|
||||
# General information about the project.
|
||||
project = "Paperless-ngx"
|
||||
copyright = "2015-2022, Daniel Quinn, Jonas Winkler, and the paperless-ngx team"
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
#
|
||||
|
||||
#
|
||||
# If the build process ever explodes here, it's because you've set the version
|
||||
# number in paperless.version to a tuple with 3 numbers in it.
|
||||
#
|
||||
|
||||
# The short X.Y version.
|
||||
version = __major_minor_version_str__
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = __full_version_str__
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
# language = None
|
||||
|
||||
# There are two options for replacing |today|: either, you set today to some
|
||||
# non-false value, then it is used:
|
||||
# today = ''
|
||||
# Else, today_fmt is used as the format for a strftime call.
|
||||
# today_fmt = '%B %d, %Y'
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
exclude_patterns = ["_build"]
|
||||
|
||||
# The reST default role (used for this markup: `text`) to use for all
|
||||
# documents.
|
||||
# default_role = None
|
||||
|
||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||
# add_function_parentheses = True
|
||||
|
||||
# If true, the current module name will be prepended to all description
|
||||
# unit titles (such as .. function::).
|
||||
# add_module_names = True
|
||||
|
||||
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||
# output. They are ignored by default.
|
||||
# show_authors = False
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = "sphinx"
|
||||
|
||||
# A list of ignored prefixes for module index sorting.
|
||||
# modindex_common_prefix = []
|
||||
|
||||
# If true, keep warnings as "system message" paragraphs in the built documents.
|
||||
# keep_warnings = False
|
||||
|
||||
|
||||
# -- Options for HTML output ----------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
# html_theme_options = {}
|
||||
|
||||
# Add any paths that contain custom themes here, relative to this directory.
|
||||
html_theme_path = []
|
||||
|
||||
# The name for this set of Sphinx documents. If None, it defaults to
|
||||
# "<project> v<release> documentation".
|
||||
# html_title = None
|
||||
|
||||
# A shorter title for the navigation bar. Default is the same as html_title.
|
||||
# html_short_title = None
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top
|
||||
# of the sidebar.
|
||||
# html_logo = None
|
||||
|
||||
# The name of an image file (within the static path) to use as favicon of the
|
||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
||||
# pixels large.
|
||||
# html_favicon = None
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ["_static"]
|
||||
|
||||
# These paths are either relative to html_static_path
|
||||
# or fully qualified paths (eg. https://...)
|
||||
html_css_files = [
|
||||
"css/custom.css",
|
||||
]
|
||||
|
||||
html_js_files = [
|
||||
"js/darkmode.js",
|
||||
]
|
||||
|
||||
# Add any extra paths that contain custom files (such as robots.txt or
|
||||
# .htaccess) here, relative to this directory. These files are copied
|
||||
# directly to the root of the documentation.
|
||||
# html_extra_path = []
|
||||
|
||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||
# using the given strftime format.
|
||||
# html_last_updated_fmt = '%b %d, %Y'
|
||||
|
||||
# If true, SmartyPants will be used to convert quotes and dashes to
|
||||
# typographically correct entities.
|
||||
# html_use_smartypants = True
|
||||
|
||||
# Custom sidebar templates, maps document names to template names.
|
||||
# html_sidebars = {}
|
||||
|
||||
# Additional templates that should be rendered to pages, maps page names to
|
||||
# template names.
|
||||
# html_additional_pages = {}
|
||||
|
||||
# If false, no module index is generated.
|
||||
# html_domain_indices = True
|
||||
|
||||
# If false, no index is generated.
|
||||
# html_use_index = True
|
||||
|
||||
# If true, the index is split into individual pages for each letter.
|
||||
# html_split_index = False
|
||||
|
||||
# If true, links to the reST sources are added to the pages.
|
||||
# html_show_sourcelink = True
|
||||
|
||||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
||||
# html_show_sphinx = True
|
||||
|
||||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
||||
# html_show_copyright = True
|
||||
|
||||
# If true, an OpenSearch description file will be output, and all pages will
|
||||
# contain a <link> tag referring to it. The value of this option must be the
|
||||
# base URL from which the finished HTML is served.
|
||||
# html_use_opensearch = ''
|
||||
|
||||
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
||||
# html_file_suffix = None
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = "paperless"
|
||||
|
||||
# -- Options for LaTeX output ---------------------------------------------
|
||||
|
||||
latex_elements = {
|
||||
# The paper size ('letterpaper' or 'a4paper').
|
||||
#'papersize': 'letterpaper',
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
#'pointsize': '10pt',
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
#'preamble': '',
|
||||
}
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title,
|
||||
# author, documentclass [howto, manual, or own class]).
|
||||
latex_documents = [
|
||||
("index", "paperless.tex", "Paperless Documentation", "Daniel Quinn", "manual"),
|
||||
]
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top of
|
||||
# the title page.
|
||||
# latex_logo = None
|
||||
|
||||
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||
# not chapters.
|
||||
# latex_use_parts = False
|
||||
|
||||
# If true, show page references after internal links.
|
||||
# latex_show_pagerefs = False
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
# latex_show_urls = False
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
# latex_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
# latex_domain_indices = True
|
||||
|
||||
|
||||
# -- Options for manual page output ---------------------------------------
|
||||
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [("index", "paperless", "Paperless Documentation", ["Daniel Quinn"], 1)]
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
# man_show_urls = False
|
||||
|
||||
|
||||
# -- Options for Texinfo output -------------------------------------------
|
||||
|
||||
# Grouping the document tree into Texinfo files. List of tuples
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
(
|
||||
"index",
|
||||
"Paperless",
|
||||
"Paperless Documentation",
|
||||
"Daniel Quinn",
|
||||
"paperless",
|
||||
"Scan, index, and archive all of your paper documents.",
|
||||
"Miscellaneous",
|
||||
),
|
||||
]
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
# texinfo_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
# texinfo_domain_indices = True
|
||||
|
||||
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
||||
# texinfo_show_urls = 'footnote'
|
||||
|
||||
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
||||
# texinfo_no_detailmenu = False
|
||||
|
||||
|
||||
# -- Options for Epub output ----------------------------------------------
|
||||
|
||||
# Bibliographic Dublin Core info.
|
||||
epub_title = "Paperless"
|
||||
epub_author = "Daniel Quinn"
|
||||
epub_publisher = "Daniel Quinn"
|
||||
epub_copyright = "2015, Daniel Quinn"
|
||||
|
||||
# The basename for the epub file. It defaults to the project name.
|
||||
# epub_basename = u'Paperless'
|
||||
|
||||
# The HTML theme for the epub output. Since the default themes are not optimized
|
||||
# for small screen space, using the same theme for HTML and epub output is
|
||||
# usually not wise. This defaults to 'epub', a theme designed to save visual
|
||||
# space.
|
||||
# epub_theme = 'epub'
|
||||
|
||||
# The language of the text. It defaults to the language option
|
||||
# or en if the language is not set.
|
||||
# epub_language = ''
|
||||
|
||||
# The scheme of the identifier. Typical schemes are ISBN or URL.
|
||||
# epub_scheme = ''
|
||||
|
||||
# The unique identifier of the text. This can be a ISBN number
|
||||
# or the project homepage.
|
||||
# epub_identifier = ''
|
||||
|
||||
# A unique identification for the text.
|
||||
# epub_uid = ''
|
||||
|
||||
# A tuple containing the cover image and cover page html template filenames.
|
||||
# epub_cover = ()
|
||||
|
||||
# A sequence of (type, uri, title) tuples for the guide element of content.opf.
|
||||
# epub_guide = ()
|
||||
|
||||
# HTML files that should be inserted before the pages created by sphinx.
|
||||
# The format is a list of tuples containing the path and title.
|
||||
# epub_pre_files = []
|
||||
|
||||
# HTML files shat should be inserted after the pages created by sphinx.
|
||||
# The format is a list of tuples containing the path and title.
|
||||
# epub_post_files = []
|
||||
|
||||
# A list of files that should not be packed into the epub file.
|
||||
epub_exclude_files = ["search.html"]
|
||||
|
||||
# The depth of the table of contents in toc.ncx.
|
||||
# epub_tocdepth = 3
|
||||
|
||||
# Allow duplicate toc entries.
|
||||
# epub_tocdup = True
|
||||
|
||||
# Choose between 'default' and 'includehidden'.
|
||||
# epub_tocscope = 'default'
|
||||
|
||||
# Fix unsupported image types using the PIL.
|
||||
# epub_fix_images = False
|
||||
|
||||
# Scale large images.
|
||||
# epub_max_image_width = 0
|
||||
|
||||
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
||||
# epub_show_urls = 'inline'
|
||||
|
||||
# If false, no index is generated.
|
||||
# epub_use_index = True
|
||||
|
||||
|
||||
# Example configuration for intersphinx: refer to the Python standard library.
|
||||
intersphinx_mapping = {"http://docs.python.org/": None}
|
1119
docs/configuration.md
Normal file
@@ -1,931 +0,0 @@
|
||||
.. _configuration:
|
||||
|
||||
*************
|
||||
Configuration
|
||||
*************
|
||||
|
||||
Paperless provides a wide range of customizations.
|
||||
Depending on how you run paperless, these settings have to be defined in different
|
||||
places.
|
||||
|
||||
* If you run paperless on docker, ``paperless.conf`` is not used. Rather, configure
|
||||
paperless by copying necessary options to ``docker-compose.env``.
|
||||
* If you are running paperless on anything else, paperless will search for the
|
||||
configuration file in these locations and use the first one it finds:
|
||||
|
||||
.. code::
|
||||
|
||||
/path/to/paperless/paperless.conf
|
||||
/etc/paperless.conf
|
||||
/usr/local/etc/paperless.conf
|
||||
|
||||
|
||||
Required services
|
||||
#################
|
||||
|
||||
PAPERLESS_REDIS=<url>
|
||||
This is required for processing scheduled tasks such as email fetching, index
|
||||
optimization and for training the automatic document matcher.
|
||||
|
||||
* If your Redis server needs login credentials PAPERLESS_REDIS = ``redis://<username>:<password>@<host>:<port>``
|
||||
|
||||
* With the requirepass option PAPERLESS_REDIS = ``redis://:<password>@<host>:<port>``
|
||||
|
||||
`More information on securing your Redis Instance <https://redis.io/docs/getting-started/#securing-redis>`_.
|
||||
|
||||
Defaults to redis://localhost:6379.
|
||||
|
||||
PAPERLESS_DBENGINE=<engine_name>
|
||||
Optional, gives the ability to choose Postgres or MariaDB for database engine.
|
||||
Available options are `postgresql` and `mariadb`.
|
||||
|
||||
Default is `postgresql`.
|
||||
|
||||
.. warning::
|
||||
|
||||
Using MariaDB comes with some caveats. See :ref:`advanced-mysql-caveats` for details.
|
||||
|
||||
|
||||
PAPERLESS_DBHOST=<hostname>
|
||||
By default, sqlite is used as the database backend. This can be changed here.
|
||||
|
||||
Set PAPERLESS_DBHOST and another database will be used instead of sqlite.
|
||||
|
||||
PAPERLESS_DBPORT=<port>
|
||||
Adjust port if necessary.
|
||||
|
||||
Default is 5432.
|
||||
|
||||
PAPERLESS_DBNAME=<name>
|
||||
Database name in PostgreSQL or MariaDB.
|
||||
|
||||
Defaults to "paperless".
|
||||
|
||||
PAPERLESS_DBUSER=<name>
|
||||
Database user in PostgreSQL or MariaDB.
|
||||
|
||||
Defaults to "paperless".
|
||||
|
||||
PAPERLESS_DBPASS=<password>
|
||||
Database password for PostgreSQL or MariaDB.
|
||||
|
||||
Defaults to "paperless".
|
||||
|
||||
PAPERLESS_DBSSLMODE=<mode>
|
||||
SSL mode to use when connecting to PostgreSQL.
|
||||
|
||||
See `the official documentation about sslmode <https://www.postgresql.org/docs/current/libpq-ssl.html>`_.
|
||||
|
||||
Default is ``prefer``.
|
||||
|
||||
PAPERLESS_DB_TIMEOUT=<float>
|
||||
Amount of time for a database connection to wait for the database to unlock.
|
||||
Mostly applicable for an sqlite based installation, consider changing to postgresql
|
||||
if you need to increase this.
|
||||
|
||||
Defaults to unset, keeping the Django defaults.
|
||||
|
||||
Paths and folders
|
||||
#################
|
||||
|
||||
PAPERLESS_CONSUMPTION_DIR=<path>
|
||||
This where your documents should go to be consumed. Make sure that it exists
|
||||
and that the user running the paperless service can read/write its contents
|
||||
before you start Paperless.
|
||||
|
||||
Don't change this when using docker, as it only changes the path within the
|
||||
container. Change the local consumption directory in the docker-compose.yml
|
||||
file instead.
|
||||
|
||||
Defaults to "../consume/", relative to the "src" directory.
|
||||
|
||||
PAPERLESS_DATA_DIR=<path>
|
||||
This is where paperless stores all its data (search index, SQLite database,
|
||||
classification model, etc).
|
||||
|
||||
Defaults to "../data/", relative to the "src" directory.
|
||||
|
||||
PAPERLESS_TRASH_DIR=<path>
|
||||
Instead of removing deleted documents, they are moved to this directory.
|
||||
|
||||
This must be writeable by the user running paperless. When running inside
|
||||
docker, ensure that this path is within a permanent volume (such as
|
||||
"../media/trash") so it won't get lost on upgrades.
|
||||
|
||||
Defaults to empty (i.e. really delete documents).
|
||||
|
||||
PAPERLESS_MEDIA_ROOT=<path>
|
||||
This is where your documents and thumbnails are stored.
|
||||
|
||||
You can set this and PAPERLESS_DATA_DIR to the same folder to have paperless
|
||||
store all its data within the same volume.
|
||||
|
||||
Defaults to "../media/", relative to the "src" directory.
|
||||
|
||||
PAPERLESS_STATICDIR=<path>
|
||||
Override the default STATIC_ROOT here. This is where all static files
|
||||
created using "collectstatic" manager command are stored.
|
||||
|
||||
Unless you're doing something fancy, there is no need to override this.
|
||||
|
||||
Defaults to "../static/", relative to the "src" directory.
|
||||
|
||||
PAPERLESS_FILENAME_FORMAT=<format>
|
||||
Changes the filenames paperless uses to store documents in the media directory.
|
||||
See :ref:`advanced-file_name_handling` for details.
|
||||
|
||||
Default is none, which disables this feature.
|
||||
|
||||
PAPERLESS_FILENAME_FORMAT_REMOVE_NONE=<bool>
|
||||
Tells paperless to replace placeholders in `PAPERLESS_FILENAME_FORMAT` that would resolve
|
||||
to 'none' to be omitted from the resulting filename. This also holds true for directory
|
||||
names.
|
||||
See :ref:`advanced-file_name_handling` for details.
|
||||
|
||||
Defaults to `false` which disables this feature.
|
||||
|
||||
PAPERLESS_LOGGING_DIR=<path>
|
||||
This is where paperless will store log files.
|
||||
|
||||
Defaults to "``PAPERLESS_DATA_DIR``/log/".
|
||||
|
||||
|
||||
Logging
|
||||
#######
|
||||
|
||||
PAPERLESS_LOGROTATE_MAX_SIZE=<num>
|
||||
Maximum file size for log files before they are rotated, in bytes.
|
||||
|
||||
Defaults to 1 MiB.
|
||||
|
||||
PAPERLESS_LOGROTATE_MAX_BACKUPS=<num>
|
||||
Number of rotated log files to keep.
|
||||
|
||||
Defaults to 20.
|
||||
|
||||
.. _hosting-and-security:
|
||||
|
||||
Hosting & Security
|
||||
##################
|
||||
|
||||
PAPERLESS_SECRET_KEY=<key>
|
||||
Paperless uses this to make session tokens. If you expose paperless on the
|
||||
internet, you need to change this, since the default secret is well known.
|
||||
|
||||
Use any sequence of characters. The more, the better. You don't need to
|
||||
remember this. Just face-roll your keyboard.
|
||||
|
||||
Default is listed in the file ``src/paperless/settings.py``.
|
||||
|
||||
PAPERLESS_URL=<url>
|
||||
This setting can be used to set the three options below (ALLOWED_HOSTS,
|
||||
CORS_ALLOWED_HOSTS and CSRF_TRUSTED_ORIGINS). If the other options are
|
||||
set the values will be combined with this one. Do not include a trailing
|
||||
slash. E.g. https://paperless.domain.com
|
||||
|
||||
Defaults to empty string, leaving the other settings unaffected.
|
||||
|
||||
PAPERLESS_CSRF_TRUSTED_ORIGINS=<comma-separated-list>
|
||||
A list of trusted origins for unsafe requests (e.g. POST). As of Django 4.0
|
||||
this is required to access the Django admin via the web.
|
||||
See https://docs.djangoproject.com/en/4.0/ref/settings/#csrf-trusted-origins
|
||||
|
||||
Can also be set using PAPERLESS_URL (see above).
|
||||
|
||||
Defaults to empty string, which does not add any origins to the trusted list.
|
||||
|
||||
PAPERLESS_ALLOWED_HOSTS=<comma-separated-list>
|
||||
If you're planning on putting Paperless on the open internet, then you
|
||||
really should set this value to the domain name you're using. Failing to do
|
||||
so leaves you open to HTTP host header attacks:
|
||||
https://docs.djangoproject.com/en/3.1/topics/security/#host-header-validation
|
||||
|
||||
Just remember that this is a comma-separated list, so "example.com" is fine,
|
||||
as is "example.com,www.example.com", but NOT " example.com" or "example.com,"
|
||||
|
||||
Can also be set using PAPERLESS_URL (see above).
|
||||
|
||||
If manually set, please remember to include "localhost". Otherwise docker
|
||||
healthcheck will fail.
|
||||
|
||||
Defaults to "*", which is all hosts.
|
||||
|
||||
PAPERLESS_CORS_ALLOWED_HOSTS=<comma-separated-list>
|
||||
You need to add your servers to the list of allowed hosts that can do CORS
|
||||
calls. Set this to your public domain name.
|
||||
|
||||
Can also be set using PAPERLESS_URL (see above).
|
||||
|
||||
Defaults to "http://localhost:8000".
|
||||
|
||||
PAPERLESS_FORCE_SCRIPT_NAME=<path>
|
||||
To host paperless under a subpath url like example.com/paperless you set
|
||||
this value to /paperless. No trailing slash!
|
||||
|
||||
Defaults to none, which hosts paperless at "/".
|
||||
|
||||
PAPERLESS_STATIC_URL=<path>
|
||||
Override the STATIC_URL here. Unless you're hosting Paperless off a
|
||||
subdomain like /paperless/, you probably don't need to change this.
|
||||
If you do change it, be sure to include the trailing slash.
|
||||
|
||||
Defaults to "/static/".
|
||||
|
||||
.. note::
|
||||
|
||||
When hosting paperless behind a reverse proxy like Traefik or Nginx at a subpath e.g.
|
||||
example.com/paperlessngx you will also need to set ``PAPERLESS_FORCE_SCRIPT_NAME``
|
||||
(see above).
|
||||
|
||||
PAPERLESS_AUTO_LOGIN_USERNAME=<username>
|
||||
Specify a username here so that paperless will automatically perform login
|
||||
with the selected user.
|
||||
|
||||
.. danger::
|
||||
|
||||
Do not use this when exposing paperless on the internet. There are no
|
||||
checks in place that would prevent you from doing this.
|
||||
|
||||
Defaults to none, which disables this feature.
|
||||
|
||||
PAPERLESS_ADMIN_USER=<username>
|
||||
If this environment variable is specified, Paperless automatically creates
|
||||
a superuser with the provided username at start. This is useful in cases
|
||||
where you can not run the `createsuperuser` command separately, such as Kubernetes
|
||||
or AWS ECS.
|
||||
|
||||
Requires `PAPERLESS_ADMIN_PASSWORD` to be set.
|
||||
|
||||
.. note::
|
||||
|
||||
This will not change an existing [super]user's password, nor will
|
||||
it recreate a user that already exists. You can leave this throughout
|
||||
the lifecycle of the containers.
|
||||
|
||||
PAPERLESS_ADMIN_MAIL=<email>
|
||||
(Optional) Specify superuser email address. Only used when
|
||||
`PAPERLESS_ADMIN_USER` is set.
|
||||
|
||||
Defaults to ``root@localhost``.
|
||||
|
||||
PAPERLESS_ADMIN_PASSWORD=<password>
|
||||
Only used when `PAPERLESS_ADMIN_USER` is set.
|
||||
This will be the password of the automatically created superuser.
|
||||
|
||||
|
||||
PAPERLESS_COOKIE_PREFIX=<str>
|
||||
Specify a prefix that is added to the cookies used by paperless to identify
|
||||
the currently logged in user. This is useful for when you're running two
|
||||
instances of paperless on the same host.
|
||||
|
||||
After changing this, you will have to login again.
|
||||
|
||||
Defaults to ``""``, which does not alter the cookie names.
|
||||
|
||||
PAPERLESS_ENABLE_HTTP_REMOTE_USER=<bool>
|
||||
Allows authentication via HTTP_REMOTE_USER which is used by some SSO
|
||||
applications.
|
||||
|
||||
.. warning::
|
||||
|
||||
This will allow authentication by simply adding a ``Remote-User: <username>`` header
|
||||
to a request. Use with care! You especially *must* ensure that any such header is not
|
||||
passed from your proxy server to paperless.
|
||||
|
||||
If you're exposing paperless to the internet directly, do not use this.
|
||||
|
||||
Also see the warning `in the official documentation <https://docs.djangoproject.com/en/3.1/howto/auth-remote-user/#configuration>`.
|
||||
|
||||
Defaults to `false` which disables this feature.
|
||||
|
||||
PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME=<str>
|
||||
If `PAPERLESS_ENABLE_HTTP_REMOTE_USER` is enabled, this property allows to
|
||||
customize the name of the HTTP header from which the authenticated username
|
||||
is extracted. Values are in terms of
|
||||
[HttpRequest.META](https://docs.djangoproject.com/en/3.1/ref/request-response/#django.http.HttpRequest.META).
|
||||
Thus, the configured value must start with `HTTP_` followed by the
|
||||
normalized actual header name.
|
||||
|
||||
Defaults to `HTTP_REMOTE_USER`.
|
||||
|
||||
PAPERLESS_LOGOUT_REDIRECT_URL=<str>
|
||||
URL to redirect the user to after a logout. This can be used together with
|
||||
`PAPERLESS_ENABLE_HTTP_REMOTE_USER` to redirect the user back to the SSO
|
||||
application's logout page.
|
||||
|
||||
Defaults to None, which disables this feature.
|
||||
|
||||
.. _configuration-ocr:
|
||||
|
||||
OCR settings
|
||||
############
|
||||
|
||||
Paperless uses `OCRmyPDF <https://ocrmypdf.readthedocs.io/en/latest/>`_ for
|
||||
performing OCR on documents and images. Paperless uses sensible defaults for
|
||||
most settings, but all of them can be configured to your needs.
|
||||
|
||||
PAPERLESS_OCR_LANGUAGE=<lang>
|
||||
Customize the language that paperless will attempt to use when
|
||||
parsing documents.
|
||||
|
||||
It should be a 3-letter language code consistent with ISO
|
||||
639: https://www.loc.gov/standards/iso639-2/php/code_list.php
|
||||
|
||||
Set this to the language most of your documents are written in.
|
||||
|
||||
This can be a combination of multiple languages such as ``deu+eng``,
|
||||
in which case tesseract will use whatever language matches best.
|
||||
Keep in mind that tesseract uses much more cpu time with multiple
|
||||
languages enabled.
|
||||
|
||||
Defaults to "eng".
|
||||
|
||||
Note: If your language contains a '-' such as chi-sim, you must use chi_sim
|
||||
|
||||
PAPERLESS_OCR_MODE=<mode>
|
||||
Tell paperless when and how to perform ocr on your documents. Four modes
|
||||
are available:
|
||||
|
||||
* ``skip``: Paperless skips all pages and will perform ocr only on pages
|
||||
where no text is present. This is the safest option.
|
||||
* ``skip_noarchive``: In addition to skip, paperless won't create an
|
||||
archived version of your documents when it finds any text in them.
|
||||
This is useful if you don't want to have two almost-identical versions
|
||||
of your digital documents in the media folder. This is the fastest option.
|
||||
* ``redo``: Paperless will OCR all pages of your documents and attempt to
|
||||
replace any existing text layers with new text. This will be useful for
|
||||
documents from scanners that already performed OCR with insufficient
|
||||
results. It will also perform OCR on purely digital documents.
|
||||
|
||||
This option may fail on some documents that have features that cannot
|
||||
be removed, such as forms. In this case, the text from the document is
|
||||
used instead.
|
||||
* ``force``: Paperless rasterizes your documents, converting any text
|
||||
into images and puts the OCRed text on top. This works for all documents,
|
||||
however, the resulting document may be significantly larger and text
|
||||
won't appear as sharp when zoomed in.
|
||||
|
||||
The default is ``skip``, which only performs OCR when necessary and always
|
||||
creates archived documents.
|
||||
|
||||
Read more about this in the `OCRmyPDF documentation <https://ocrmypdf.readthedocs.io/en/latest/advanced.html#when-ocr-is-skipped>`_.
|
||||
|
||||
PAPERLESS_OCR_CLEAN=<mode>
|
||||
Tells paperless to use ``unpaper`` to clean any input document before
|
||||
sending it to tesseract. This uses more resources, but generally results
|
||||
in better OCR results. The following modes are available:
|
||||
|
||||
* ``clean``: Apply unpaper.
|
||||
* ``clean-final``: Apply unpaper, and use the cleaned images to build the
|
||||
output file instead of the original images.
|
||||
* ``none``: Do not apply unpaper.
|
||||
|
||||
Defaults to ``clean``.
|
||||
|
||||
.. note::
|
||||
|
||||
``clean-final`` is incompatible with ocr mode ``redo``. When both
|
||||
``clean-final`` and the ocr mode ``redo`` is configured, ``clean``
|
||||
is used instead.
|
||||
|
||||
PAPERLESS_OCR_DESKEW=<bool>
|
||||
Tells paperless to correct skewing (slight rotation of input images mainly
|
||||
due to improper scanning)
|
||||
|
||||
Defaults to ``true``, which enables this feature.
|
||||
|
||||
.. note::
|
||||
|
||||
Deskewing is incompatible with ocr mode ``redo``. Deskewing will get
|
||||
disabled automatically if ``redo`` is used as the ocr mode.
|
||||
|
||||
PAPERLESS_OCR_ROTATE_PAGES=<bool>
|
||||
Tells paperless to correct page rotation (90°, 180° and 270° rotation).
|
||||
|
||||
If you notice that paperless is not rotating incorrectly rotated
|
||||
pages (or vice versa), try adjusting the threshold up or down (see below).
|
||||
|
||||
Defaults to ``true``, which enables this feature.
|
||||
|
||||
|
||||
PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD=<num>
|
||||
Adjust the threshold for automatic page rotation by ``PAPERLESS_OCR_ROTATE_PAGES``.
|
||||
This is an arbitrary value reported by tesseract. "15" is a very conservative value,
|
||||
whereas "2" is a very aggressive option and will often result in correctly rotated pages
|
||||
being rotated as well.
|
||||
|
||||
Defaults to "12".
|
||||
|
||||
PAPERLESS_OCR_OUTPUT_TYPE=<type>
|
||||
Specify the the type of PDF documents that paperless should produce.
|
||||
|
||||
* ``pdf``: Modify the PDF document as little as possible.
|
||||
* ``pdfa``: Convert PDF documents into PDF/A-2b documents, which is a
|
||||
subset of the entire PDF specification and meant for storing
|
||||
documents long term.
|
||||
* ``pdfa-1``, ``pdfa-2``, ``pdfa-3`` to specify the exact version of
|
||||
PDF/A you wish to use.
|
||||
|
||||
If not specified, ``pdfa`` is used. Remember that paperless also keeps
|
||||
the original input file as well as the archived version.
|
||||
|
||||
|
||||
PAPERLESS_OCR_PAGES=<num>
|
||||
Tells paperless to use only the specified amount of pages for OCR. Documents
|
||||
with less than the specified amount of pages get OCR'ed completely.
|
||||
|
||||
Specifying 1 here will only use the first page.
|
||||
|
||||
When combined with ``PAPERLESS_OCR_MODE=redo`` or ``PAPERLESS_OCR_MODE=force``,
|
||||
paperless will not modify any text it finds on excluded pages and copy it
|
||||
verbatim.
|
||||
|
||||
Defaults to 0, which disables this feature and always uses all pages.
|
||||
|
||||
PAPERLESS_OCR_IMAGE_DPI=<num>
|
||||
Paperless will OCR any images you put into the system and convert them
|
||||
into PDF documents. This is useful if your scanner produces images.
|
||||
In order to do so, paperless needs to know the DPI of the image.
|
||||
Most images from scanners will have this information embedded and
|
||||
paperless will detect and use that information. In case this fails, it
|
||||
uses this value as a fallback.
|
||||
|
||||
Set this to the DPI your scanner produces images at.
|
||||
|
||||
Default is none, which will automatically calculate image DPI so that
|
||||
the produced PDF documents are A4 sized.
|
||||
|
||||
PAPERLESS_OCR_MAX_IMAGE_PIXELS=<num>
|
||||
Paperless will raise a warning when OCRing images which are over this limit and
|
||||
will not OCR images which are more than twice this limit. Note this does not
|
||||
prevent the document from being consumed, but could result in missing text content.
|
||||
|
||||
If unset, will default to the value determined by
|
||||
`Pillow <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS>`_.
|
||||
|
||||
.. note::
|
||||
|
||||
Increasing this limit could cause Paperless to consume additional resources
|
||||
when consuming a file. Be sure you have sufficient system resources.
|
||||
|
||||
.. caution::
|
||||
|
||||
The limit is intended to prevent malicious files from consuming system resources
|
||||
and causing crashes and other errors. Only increase this value if you are certain
|
||||
your documents are not malicious and you need the text which was not OCRed
|
||||
|
||||
PAPERLESS_OCR_USER_ARGS=<json>
|
||||
OCRmyPDF offers many more options. Use this parameter to specify any
|
||||
additional arguments you wish to pass to OCRmyPDF. Since Paperless uses
|
||||
the API of OCRmyPDF, you have to specify these in a format that can be
|
||||
passed to the API. See `the API reference of OCRmyPDF <https://ocrmypdf.readthedocs.io/en/latest/api.html#reference>`_
|
||||
for valid parameters. All command line options are supported, but they
|
||||
use underscores instead of dashes.
|
||||
|
||||
.. caution::
|
||||
|
||||
Paperless has been tested to work with the OCR options provided
|
||||
above. There are many options that are incompatible with each other,
|
||||
so specifying invalid options may prevent paperless from consuming
|
||||
any documents.
|
||||
|
||||
Specify arguments as a JSON dictionary. Keep note of lower case booleans
|
||||
and double quoted parameter names and strings. Examples:
|
||||
|
||||
.. code:: json
|
||||
|
||||
{"deskew": true, "optimize": 3, "unpaper_args": "--pre-rotate 90"}
|
||||
|
||||
.. _configuration-tika:
|
||||
|
||||
Tika settings
|
||||
#############
|
||||
|
||||
Paperless can make use of `Tika <https://tika.apache.org/>`_ and
|
||||
`Gotenberg <https://gotenberg.dev/>`_ for parsing and
|
||||
converting "Office" documents (such as ".doc", ".xlsx" and ".odt"). If you
|
||||
wish to use this, you must provide a Tika server and a Gotenberg server,
|
||||
configure their endpoints, and enable the feature.
|
||||
|
||||
PAPERLESS_TIKA_ENABLED=<bool>
|
||||
Enable (or disable) the Tika parser.
|
||||
|
||||
Defaults to false.
|
||||
|
||||
PAPERLESS_TIKA_ENDPOINT=<url>
|
||||
Set the endpoint URL were Paperless can reach your Tika server.
|
||||
|
||||
Defaults to "http://localhost:9998".
|
||||
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT=<url>
|
||||
Set the endpoint URL were Paperless can reach your Gotenberg server.
|
||||
|
||||
Defaults to "http://localhost:3000".
|
||||
|
||||
If you run paperless on docker, you can add those services to the docker-compose
|
||||
file (see the provided ``docker-compose.sqlite-tika.yml`` file for reference). The changes
|
||||
requires are as follows:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
services:
|
||||
# ...
|
||||
|
||||
webserver:
|
||||
# ...
|
||||
|
||||
environment:
|
||||
# ...
|
||||
|
||||
PAPERLESS_TIKA_ENABLED: 1
|
||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
||||
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
||||
|
||||
# ...
|
||||
|
||||
gotenberg:
|
||||
image: gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- "gotenberg"
|
||||
- "--chromium-disable-routes=true"
|
||||
|
||||
tika:
|
||||
image: ghcr.io/paperless-ngx/tika:latest
|
||||
restart: unless-stopped
|
||||
|
||||
Add the configuration variables to the environment of the webserver (alternatively
|
||||
put the configuration in the ``docker-compose.env`` file) and add the additional
|
||||
services below the webserver service. Watch out for indentation.
|
||||
|
||||
Make sure to use the correct format `PAPERLESS_TIKA_ENABLED = 1` so python_dotenv can parse the statement correctly.
|
||||
|
||||
Software tweaks
|
||||
###############
|
||||
|
||||
PAPERLESS_TASK_WORKERS=<num>
|
||||
Paperless does multiple things in the background: Maintain the search index,
|
||||
maintain the automatic matching algorithm, check emails, consume documents,
|
||||
etc. This variable specifies how many things it will do in parallel.
|
||||
|
||||
Defaults to 1
|
||||
|
||||
|
||||
PAPERLESS_THREADS_PER_WORKER=<num>
|
||||
Furthermore, paperless uses multiple threads when consuming documents to
|
||||
speed up OCR. This variable specifies how many pages paperless will process
|
||||
in parallel on a single document.
|
||||
|
||||
.. caution::
|
||||
|
||||
Ensure that the product
|
||||
|
||||
PAPERLESS_TASK_WORKERS * PAPERLESS_THREADS_PER_WORKER
|
||||
|
||||
does not exceed your CPU core count or else paperless will be extremely slow.
|
||||
If you want paperless to process many documents in parallel, choose a high
|
||||
worker count. If you want paperless to process very large documents faster,
|
||||
use a higher thread per worker count.
|
||||
|
||||
The default is a balance between the two, according to your CPU core count,
|
||||
with a slight favor towards threads per worker:
|
||||
|
||||
+----------------+---------+---------+
|
||||
| CPU core count | Workers | Threads |
|
||||
+----------------+---------+---------+
|
||||
| 1 | 1 | 1 |
|
||||
+----------------+---------+---------+
|
||||
| 2 | 2 | 1 |
|
||||
+----------------+---------+---------+
|
||||
| 4 | 2 | 2 |
|
||||
+----------------+---------+---------+
|
||||
| 6 | 2 | 3 |
|
||||
+----------------+---------+---------+
|
||||
| 8 | 2 | 4 |
|
||||
+----------------+---------+---------+
|
||||
| 12 | 3 | 4 |
|
||||
+----------------+---------+---------+
|
||||
| 16 | 4 | 4 |
|
||||
+----------------+---------+---------+
|
||||
|
||||
If you only specify PAPERLESS_TASK_WORKERS, paperless will adjust
|
||||
PAPERLESS_THREADS_PER_WORKER automatically.
|
||||
|
||||
|
||||
PAPERLESS_WORKER_TIMEOUT=<num>
|
||||
Machines with few cores or weak ones might not be able to finish OCR on
|
||||
large documents within the default 1800 seconds. So extending this timeout
|
||||
may prove to be useful on weak hardware setups.
|
||||
|
||||
PAPERLESS_WORKER_RETRY=<num>
|
||||
If PAPERLESS_WORKER_TIMEOUT has been configured, the retry time for a task can
|
||||
also be configured. By default, this value will be set to 10s more than the
|
||||
worker timeout. This value should never be set less than the worker timeout.
|
||||
|
||||
PAPERLESS_TIME_ZONE=<timezone>
|
||||
Set the time zone here.
|
||||
See https://docs.djangoproject.com/en/3.1/ref/settings/#std:setting-TIME_ZONE
|
||||
for details on how to set it.
|
||||
|
||||
Defaults to UTC.
|
||||
|
||||
|
||||
.. _configuration-polling:
|
||||
|
||||
PAPERLESS_CONSUMER_POLLING=<num>
|
||||
If paperless won't find documents added to your consume folder, it might
|
||||
not be able to automatically detect filesystem changes. In that case,
|
||||
specify a polling interval in seconds here, which will then cause paperless
|
||||
to periodically check your consumption directory for changes. This will also
|
||||
disable listening for file system changes with ``inotify``.
|
||||
|
||||
Defaults to 0, which disables polling and uses filesystem notifications.
|
||||
|
||||
PAPERLESS_CONSUMER_POLLING_RETRY_COUNT=<num>
|
||||
If consumer polling is enabled, sets the number of times paperless will check for a
|
||||
file to remain unmodified.
|
||||
|
||||
Defaults to 5.
|
||||
|
||||
PAPERLESS_CONSUMER_POLLING_DELAY=<num>
|
||||
If consumer polling is enabled, sets the delay in seconds between each check (above) paperless
|
||||
will do while waiting for a file to remain unmodified.
|
||||
|
||||
Defaults to 5.
|
||||
|
||||
.. _configuration-inotify:
|
||||
|
||||
PAPERLESS_CONSUMER_INOTIFY_DELAY=<num>
|
||||
Sets the time in seconds the consumer will wait for additional events
|
||||
from inotify before the consumer will consider a file ready and begin consumption.
|
||||
Certain scanners or network setups may generate multiple events for a single file,
|
||||
leading to multiple consumers working on the same file. Configure this to
|
||||
prevent that.
|
||||
|
||||
Defaults to 0.5 seconds.
|
||||
|
||||
PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>
|
||||
When the consumer detects a duplicate document, it will not touch the
|
||||
original document. This default behavior can be changed here.
|
||||
|
||||
Defaults to false.
|
||||
|
||||
|
||||
PAPERLESS_CONSUMER_RECURSIVE=<bool>
|
||||
Enable recursive watching of the consumption directory. Paperless will
|
||||
then pickup files from files in subdirectories within your consumption
|
||||
directory as well.
|
||||
|
||||
Defaults to false.
|
||||
|
||||
|
||||
PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=<bool>
|
||||
Set the names of subdirectories as tags for consumed files.
|
||||
E.g. <CONSUMPTION_DIR>/foo/bar/file.pdf will add the tags "foo" and "bar" to
|
||||
the consumed file. Paperless will create any tags that don't exist yet.
|
||||
|
||||
This is useful for sorting documents with certain tags such as ``car`` or
|
||||
``todo`` prior to consumption. These folders won't be deleted.
|
||||
|
||||
PAPERLESS_CONSUMER_RECURSIVE must be enabled for this to work.
|
||||
|
||||
Defaults to false.
|
||||
|
||||
PAPERLESS_CONSUMER_ENABLE_BARCODES=<bool>
|
||||
Enables the scanning and page separation based on detected barcodes.
|
||||
This allows for scanning and adding multiple documents per uploaded
|
||||
file, which are separated by one or multiple barcode pages.
|
||||
|
||||
For ease of use, it is suggested to use a standardized separation page,
|
||||
e.g. `here <https://www.alliancegroup.co.uk/patch-codes.htm>`_.
|
||||
|
||||
If no barcodes are detected in the uploaded file, no page separation
|
||||
will happen.
|
||||
|
||||
The original document will be removed and the separated pages will be
|
||||
saved as pdf.
|
||||
|
||||
Defaults to false.
|
||||
|
||||
|
||||
PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT=<bool>
|
||||
Whether TIFF image files should be scanned for barcodes.
|
||||
This will automatically convert any TIFF image(s) to pdfs for later
|
||||
processing.
|
||||
This only has an effect, if PAPERLESS_CONSUMER_ENABLE_BARCODES has been
|
||||
enabled.
|
||||
|
||||
Defaults to false.
|
||||
|
||||
PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
|
||||
Defines the string to be detected as a separator barcode.
|
||||
If paperless is used with the PATCH-T separator pages, users
|
||||
shouldn't change this.
|
||||
|
||||
Defaults to "PATCHT"
|
||||
|
||||
PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
|
||||
On smaller systems, or even in the case of Very Large Documents, the consumer
|
||||
may explode, complaining about how it's "unable to extend pixel cache". In
|
||||
such cases, try setting this to a reasonably low value, like 32. The
|
||||
default is to use whatever is necessary to do everything without writing to
|
||||
disk, and units are in megabytes.
|
||||
|
||||
For more information on how to use this value, you should search
|
||||
the web for "MAGICK_MEMORY_LIMIT".
|
||||
|
||||
Defaults to 0, which disables the limit.
|
||||
|
||||
PAPERLESS_CONVERT_TMPDIR=<path>
|
||||
Similar to the memory limit, if you've got a small system and your OS mounts
|
||||
/tmp as tmpfs, you should set this to a path that's on a physical disk, like
|
||||
/home/your_user/tmp or something. ImageMagick will use this as scratch space
|
||||
when crunching through very large documents.
|
||||
|
||||
For more information on how to use this value, you should search
|
||||
the web for "MAGICK_TMPDIR".
|
||||
|
||||
Default is none, which disables the temporary directory.
|
||||
|
||||
PAPERLESS_POST_CONSUME_SCRIPT=<filename>
|
||||
After a document is consumed, Paperless can trigger an arbitrary script if
|
||||
you like. This script will be passed a number of arguments for you to work
|
||||
with. For more information, take a look at :ref:`advanced-post_consume_script`.
|
||||
|
||||
The default is blank, which means nothing will be executed.
|
||||
|
||||
PAPERLESS_FILENAME_DATE_ORDER=<format>
|
||||
Paperless will check the document text for document date information.
|
||||
Use this setting to enable checking the document filename for date
|
||||
information. The date order can be set to any option as specified in
|
||||
https://dateparser.readthedocs.io/en/latest/settings.html#date-order.
|
||||
The filename will be checked first, and if nothing is found, the document
|
||||
text will be checked as normal.
|
||||
|
||||
A date in a filename must have some separators (`.`, `-`, `/`, etc)
|
||||
for it to be parsed.
|
||||
|
||||
Defaults to none, which disables this feature.
|
||||
|
||||
PAPERLESS_NUMBER_OF_SUGGESTED_DATES=<num>
|
||||
Paperless searches an entire document for dates. The first date found will
|
||||
be used as the initial value for the created date. When this variable is
|
||||
greater than 0 (or left to it's default value), paperless will also suggest
|
||||
other dates found in the document, up to a maximum of this setting. Note that
|
||||
duplicates will be removed, which can result in fewer dates displayed in the
|
||||
frontend than this setting value.
|
||||
|
||||
The task to find all dates can be time-consuming and increases with a higher
|
||||
(maximum) number of suggested dates and slower hardware.
|
||||
|
||||
Defaults to 3. Set to 0 to disable this feature.
|
||||
|
||||
PAPERLESS_THUMBNAIL_FONT_NAME=<filename>
|
||||
Paperless creates thumbnails for plain text files by rendering the content
|
||||
of the file on an image and uses a predefined font for that. This
|
||||
font can be changed here.
|
||||
|
||||
Note that this won't have any effect on already generated thumbnails.
|
||||
|
||||
Defaults to ``/usr/share/fonts/liberation/LiberationSerif-Regular.ttf``.
|
||||
|
||||
PAPERLESS_IGNORE_DATES=<string>
|
||||
Paperless parses a documents creation date from filename and file content.
|
||||
You may specify a comma separated list of dates that should be ignored during
|
||||
this process. This is useful for special dates (like date of birth) that appear
|
||||
in documents regularly but are very unlikely to be the documents creation date.
|
||||
|
||||
The date is parsed using the order specified in PAPERLESS_DATE_ORDER
|
||||
|
||||
Defaults to an empty string to not ignore any dates.
|
||||
|
||||
PAPERLESS_DATE_ORDER=<format>
|
||||
Paperless will try to determine the document creation date from its contents.
|
||||
Specify the date format Paperless should expect to see within your documents.
|
||||
|
||||
This option defaults to DMY which translates to day first, month second, and year
|
||||
last order. Characters D, M, or Y can be shuffled to meet the required order.
|
||||
|
||||
PAPERLESS_CONSUMER_IGNORE_PATTERNS=<json>
|
||||
By default, paperless ignores certain files and folders in the consumption
|
||||
directory, such as system files created by the Mac OS.
|
||||
|
||||
This can be adjusted by configuring a custom json array with patterns to exclude.
|
||||
|
||||
Defaults to ``[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]``.
|
||||
|
||||
Binaries
|
||||
########
|
||||
|
||||
There are a few external software packages that Paperless expects to find on
|
||||
your system when it starts up. Unless you've done something creative with
|
||||
their installation, you probably won't need to edit any of these. However,
|
||||
if you've installed these programs somewhere where simply typing the name of
|
||||
the program doesn't automatically execute it (ie. the program isn't in your
|
||||
$PATH), then you'll need to specify the literal path for that program.
|
||||
|
||||
PAPERLESS_CONVERT_BINARY=<path>
|
||||
Defaults to "convert".
|
||||
|
||||
PAPERLESS_GS_BINARY=<path>
|
||||
Defaults to "gs".
|
||||
|
||||
|
||||
.. _configuration-docker:
|
||||
|
||||
Docker-specific options
|
||||
#######################
|
||||
|
||||
These options don't have any effect in ``paperless.conf``. These options adjust
|
||||
the behavior of the docker container. Configure these in `docker-compose.env`.
|
||||
|
||||
PAPERLESS_WEBSERVER_WORKERS=<num>
|
||||
The number of worker processes the webserver should spawn. More worker processes
|
||||
usually result in the front end to load data much quicker. However, each worker process
|
||||
also loads the entire application into memory separately, so increasing this value
|
||||
will increase RAM usage.
|
||||
|
||||
Defaults to 1.
|
||||
|
||||
PAPERLESS_BIND_ADDR=<ip address>
|
||||
The IP address the webserver will listen on inside the container. There are
|
||||
special setups where you may need to configure this value to restrict the
|
||||
Ip address or interface the webserver listens on.
|
||||
|
||||
Defaults to [::], meaning all interfaces, including IPv6.
|
||||
|
||||
PAPERLESS_PORT=<port>
|
||||
The port number the webserver will listen on inside the container. There are
|
||||
special setups where you may need this to avoid collisions with other
|
||||
services (like using podman with multiple containers in one pod).
|
||||
|
||||
Don't change this when using Docker. To change the port the webserver is
|
||||
reachable outside of the container, instead refer to the "ports" key in
|
||||
``docker-compose.yml``.
|
||||
|
||||
Defaults to 8000.
|
||||
|
||||
USERMAP_UID=<uid>
|
||||
The ID of the paperless user in the container. Set this to your actual user ID on the
|
||||
host system, which you can get by executing
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ id -u
|
||||
|
||||
Paperless will change ownership on its folders to this user, so you need to get this right
|
||||
in order to be able to write to the consumption directory.
|
||||
|
||||
Defaults to 1000.
|
||||
|
||||
USERMAP_GID=<gid>
|
||||
The ID of the paperless Group in the container. Set this to your actual group ID on the
|
||||
host system, which you can get by executing
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ id -g
|
||||
|
||||
Paperless will change ownership on its folders to this group, so you need to get this right
|
||||
in order to be able to write to the consumption directory.
|
||||
|
||||
Defaults to 1000.
|
||||
|
||||
PAPERLESS_OCR_LANGUAGES=<list>
|
||||
Additional OCR languages to install. By default, paperless comes with
|
||||
English, German, Italian, Spanish and French. If your language is not in this list, install
|
||||
additional languages with this configuration option:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
PAPERLESS_OCR_LANGUAGES=tur ces
|
||||
|
||||
To actually use these languages, also set the default OCR language of paperless:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
PAPERLESS_OCR_LANGUAGE=tur
|
||||
|
||||
Defaults to none, which does not install any additional languages.
|
||||
|
||||
PAPERLESS_ENABLE_FLOWER=<defined>
|
||||
If this environment variable is defined, the Celery monitoring tool
|
||||
`Flower <https://flower.readthedocs.io/en/latest/index.html>`_ will
|
||||
be started by the container.
|
||||
|
||||
You can read more about this in the :ref:`advanced setup <advanced-celery-monitoring>`
|
||||
documentation.
|
||||
|
||||
|
||||
.. _configuration-update-checking:
|
||||
|
||||
Update Checking
|
||||
###############
|
||||
|
||||
PAPERLESS_ENABLE_UPDATE_CHECK=<bool>
|
||||
|
||||
.. note::
|
||||
|
||||
This setting was deprecated in favor of a frontend setting after v1.9.2. A one-time
|
||||
migration is performed for users who have this setting set. This setting is always
|
||||
ignored if the corresponding frontend setting has been set.
|
464
docs/development.md
Normal file
@@ -0,0 +1,464 @@
|
||||
# Development
|
||||
|
||||
This section describes the steps you need to take to start development
|
||||
on Paperless-ngx.
|
||||
|
||||
Check out the source from GitHub. The repository is organized in the
|
||||
following way:
|
||||
|
||||
- `main` always represents the latest release and will only see
|
||||
changes when a new release is made.
|
||||
- `dev` contains the code that will be in the next release.
|
||||
- `feature-X` contain bigger changes that will be in some release, but
|
||||
not necessarily the next one.
|
||||
|
||||
When making functional changes to Paperless-ngx, _always_ make your changes
|
||||
on the `dev` branch.
|
||||
|
||||
Apart from that, the folder structure is as follows:
|
||||
|
||||
- `docs/` - Documentation.
|
||||
- `src-ui/` - Code of the front end.
|
||||
- `src/` - Code of the back end.
|
||||
- `scripts/` - Various scripts that help with different parts of
|
||||
development.
|
||||
- `docker/` - Files required to build the docker image.
|
||||
|
||||
## Contributing to Paperless-ngx
|
||||
|
||||
Maybe you've been using Paperless-ngx for a while and want to add a feature
|
||||
or two, or maybe you've come across a bug that you have some ideas how
|
||||
to solve. The beauty of open source software is that you can see what's
|
||||
wrong and help to get it fixed for everyone!
|
||||
|
||||
Before contributing please review our [code of
|
||||
conduct](https://github.com/paperless-ngx/paperless-ngx/blob/main/CODE_OF_CONDUCT.md)
|
||||
and other important information in the [contributing
|
||||
guidelines](https://github.com/paperless-ngx/paperless-ngx/blob/main/CONTRIBUTING.md).
|
||||
|
||||
## Code formatting with pre-commit hooks
|
||||
|
||||
To ensure a consistent style and formatting across the project source,
|
||||
the project utilizes Git [`pre-commit`](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks)
|
||||
hooks to perform some formatting and linting before a commit is allowed.
|
||||
That way, everyone uses the same style and some common issues can be caught
|
||||
early on.
|
||||
|
||||
Once installed, hooks will run when you commit. If the formatting isn't
|
||||
quite right or a linter catches something, the commit will be rejected.
|
||||
You'll need to look at the output and fix the issue. Some hooks, such
|
||||
as the Python formatting tool `black`, will format failing
|
||||
files, so all you need to do is `git add` those files again
|
||||
and retry your commit.
|
||||
|
||||
## General setup
|
||||
|
||||
After you forked and cloned the code from GitHub you need to perform a
|
||||
first-time setup.
|
||||
|
||||
!!! note
|
||||
|
||||
Every command is executed directly from the root folder of the project unless specified otherwise.
|
||||
|
||||
1. Install prerequisites + pipenv as mentioned in
|
||||
[Bare metal route](/setup#bare_metal).
|
||||
|
||||
2. Copy `paperless.conf.example` to `paperless.conf` and enable debug
|
||||
mode within the file via `PAPERLESS_DEBUG=true`.
|
||||
|
||||
3. Create `consume` and `media` directories:
|
||||
|
||||
```bash
|
||||
$ mkdir -p consume media
|
||||
```
|
||||
|
||||
4. Install the Python dependencies:
|
||||
|
||||
```bash
|
||||
$ pipenv install --dev
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
Using a virtual environment is highly recommended. You can spawn one via `pipenv shell`.
|
||||
Make sure you're using Python 3.10.x or lower. Otherwise you might
|
||||
get issues with building dependencies. You can use
|
||||
[pyenv](https://github.com/pyenv/pyenv) to install a specific
|
||||
Python version.
|
||||
|
||||
5. Install pre-commit hooks:
|
||||
|
||||
```bash
|
||||
$ pre-commit install
|
||||
```
|
||||
|
||||
6. Apply migrations and create a superuser for your development instance:
|
||||
|
||||
```bash
|
||||
# src/
|
||||
|
||||
$ python3 manage.py migrate
|
||||
$ python3 manage.py createsuperuser
|
||||
```
|
||||
|
||||
7. You can now either ...
|
||||
|
||||
- install redis or
|
||||
|
||||
- use the included `scripts/start_services.sh` to use docker to fire
|
||||
up a redis instance (and some other services such as tika,
|
||||
gotenberg and a database server) or
|
||||
|
||||
- spin up a bare redis container
|
||||
|
||||
```
|
||||
$ docker run -d -p 6379:6379 --restart unless-stopped redis:latest
|
||||
```
|
||||
|
||||
8. Continue with either back-end or front-end development – or both :-).
|
||||
|
||||
## Back end development
|
||||
|
||||
The back end is a [Django](https://www.djangoproject.com/) application. [PyCharm](https://www.jetbrains.com/de-de/pycharm/) as well as [Visual Studio Code](https://code.visualstudio.com) work well for development, but you can use whatever you want.
|
||||
|
||||
Configure the IDE to use the `src/`-folder as the base source folder.
|
||||
Configure the following launch configurations in your IDE:
|
||||
|
||||
- `python3 manage.py runserver`
|
||||
- `python3 manage.py document_consumer`
|
||||
- `celery --app paperless worker -l DEBUG` (or any other log level)
|
||||
|
||||
To start them all:
|
||||
|
||||
```bash
|
||||
# src/
|
||||
|
||||
$ python3 manage.py runserver & \
|
||||
python3 manage.py document_consumer & \
|
||||
celery --app paperless worker -l DEBUG
|
||||
```
|
||||
|
||||
You might need the front end to test your back end code. This assumes that you have AngularJS installed on your system. Go to the [Front end development](#front-end-development) section for further details. To build the front end once use this commmand:
|
||||
|
||||
```bash
|
||||
# src-ui/
|
||||
|
||||
$ npm install
|
||||
$ ng build --configuration production
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
||||
- Run `pytest` in the `src/` directory to execute all tests. This also
|
||||
generates a HTML coverage report. When runnings test, `paperless.conf`
|
||||
is loaded as well. However, the tests rely on the default
|
||||
configuration. This is not ideal. But for now, make sure no settings
|
||||
except for DEBUG are overridden when testing.
|
||||
|
||||
!!! note
|
||||
|
||||
The line length rule E501 is generally useful for getting multiple
|
||||
source files next to each other on the screen. However, in some
|
||||
cases, its just not possible to make some lines fit, especially
|
||||
complicated IF cases. Append `# noqa: E501` to disable this check
|
||||
for certain lines.
|
||||
|
||||
## Front end development
|
||||
|
||||
The front end is built using AngularJS. In order to get started, you need Node.js (version 14.15+) and
|
||||
`npm`.
|
||||
|
||||
!!! note
|
||||
|
||||
The following commands are all performed in the `src-ui`-directory. You will need a running back end (including an active session) to connect to the back end API. To spin it up refer to the commands under the section [above](#back-end-development).
|
||||
|
||||
1. Install the Angular CLI. You might need sudo privileges
|
||||
to perform this command:
|
||||
|
||||
```bash
|
||||
$ npm install -g @angular/cli
|
||||
```
|
||||
|
||||
2. Make sure that it's on your path.
|
||||
|
||||
3. Install all neccessary modules:
|
||||
|
||||
```bash
|
||||
$ npm install
|
||||
```
|
||||
|
||||
4. You can launch a development server by running:
|
||||
|
||||
```bash
|
||||
$ ng serve
|
||||
```
|
||||
|
||||
This will automatically update whenever you save. However, in-place
|
||||
compilation might fail on syntax errors, in which case you need to
|
||||
restart it.
|
||||
|
||||
By default, the development server is available on `http://localhost:4200/` and is configured to access the API at
|
||||
`http://localhost:8000/api/`, which is the default of the backend. If you enabled `DEBUG` on the back end, several security overrides for allowed hosts, CORS and X-Frame-Options are in place so that the front end behaves exactly as in production.
|
||||
|
||||
### Testing and code style
|
||||
|
||||
- The front end code (.ts, .html, .scss) use `prettier` for code
|
||||
formatting via the Git `pre-commit` hooks which run automatically on
|
||||
commit. See [above](#code-formatting-with-pre-commit-hooks) for installation instructions. You can also run this via the CLI with a
|
||||
command such as
|
||||
|
||||
```bash
|
||||
$ git ls-files -- '*.ts' | xargs pre-commit run prettier --files
|
||||
```
|
||||
|
||||
- Front end testing uses jest and cypress. There is currently a need
|
||||
for significantly more front end tests. Unit tests and e2e tests,
|
||||
respectively, can be run non-interactively with:
|
||||
|
||||
```bash
|
||||
$ ng test
|
||||
$ npm run e2e:ci
|
||||
```
|
||||
|
||||
- Cypress also includes a UI which can be run with:
|
||||
|
||||
```bash
|
||||
$ ./node_modules/.bin/cypress open
|
||||
```
|
||||
|
||||
- In order to build the front end and serve it as part of Django, execute:
|
||||
|
||||
```bash
|
||||
$ ng build --configuration production
|
||||
```
|
||||
|
||||
This will build the front end and put it in a location from which the
|
||||
Django server will serve it as static content. This way, you can verify
|
||||
that authentication is working.
|
||||
|
||||
## Localization
|
||||
|
||||
Paperless-ngx is available in many different languages. Since Paperless-ngx
|
||||
consists both of a Django application and an AngularJS front end, both
|
||||
these parts have to be translated separately.
|
||||
|
||||
### Front end localization
|
||||
|
||||
- The AngularJS front end does localization according to the [Angular
|
||||
documentation](https://angular.io/guide/i18n).
|
||||
- The source language of the project is "en_US".
|
||||
- The source strings end up in the file `src-ui/messages.xlf`.
|
||||
- The translated strings need to be placed in the
|
||||
`src-ui/src/locale/` folder.
|
||||
- In order to extract added or changed strings from the source files,
|
||||
call `ng xi18n --ivy`.
|
||||
|
||||
Adding new languages requires adding the translated files in the
|
||||
`src-ui/src/locale/` folder and adjusting a couple files.
|
||||
|
||||
1. Adjust `src-ui/angular.json`:
|
||||
|
||||
```json
|
||||
"i18n": {
|
||||
"sourceLocale": "en-US",
|
||||
"locales": {
|
||||
"de": "src/locale/messages.de.xlf",
|
||||
"nl-NL": "src/locale/messages.nl_NL.xlf",
|
||||
"fr": "src/locale/messages.fr.xlf",
|
||||
"en-GB": "src/locale/messages.en_GB.xlf",
|
||||
"pt-BR": "src/locale/messages.pt_BR.xlf",
|
||||
"language-code": "language-file"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. Add the language to the available options in
|
||||
`src-ui/src/app/services/settings.service.ts`:
|
||||
|
||||
```typescript
|
||||
getLanguageOptions(): LanguageOption[] {
|
||||
return [
|
||||
{code: "en-us", name: $localize`English (US)`, englishName: "English (US)", dateInputFormat: "mm/dd/yyyy"},
|
||||
{code: "en-gb", name: $localize`English (GB)`, englishName: "English (GB)", dateInputFormat: "dd/mm/yyyy"},
|
||||
{code: "de", name: $localize`German`, englishName: "German", dateInputFormat: "dd.mm.yyyy"},
|
||||
{code: "nl", name: $localize`Dutch`, englishName: "Dutch", dateInputFormat: "dd-mm-yyyy"},
|
||||
{code: "fr", name: $localize`French`, englishName: "French", dateInputFormat: "dd/mm/yyyy"},
|
||||
{code: "pt-br", name: $localize`Portuguese (Brazil)`, englishName: "Portuguese (Brazil)", dateInputFormat: "dd/mm/yyyy"}
|
||||
// Add your new language here
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
`dateInputFormat` is a special string that defines the behavior of
|
||||
the date input fields and absolutely needs to contain "dd", "mm"
|
||||
and "yyyy".
|
||||
|
||||
3. Import and register the Angular data for this locale in
|
||||
`src-ui/src/app/app.module.ts`:
|
||||
|
||||
```typescript
|
||||
import localeDe from '@angular/common/locales/de'
|
||||
registerLocaleData(localeDe)
|
||||
```
|
||||
|
||||
### Back end localization
|
||||
|
||||
A majority of the strings that appear in the back end appear only when
|
||||
the admin is used. However, some of these are still shown on the front
|
||||
end (such as error messages).
|
||||
|
||||
- The django application does localization according to the [Django
|
||||
documentation](https://docs.djangoproject.com/en/3.1/topics/i18n/translation/).
|
||||
- The source language of the project is "en_US".
|
||||
- Localization files end up in the folder `src/locale/`.
|
||||
- In order to extract strings from the application, call
|
||||
`python3 manage.py makemessages -l en_US`. This is important after
|
||||
making changes to translatable strings.
|
||||
- The message files need to be compiled for them to show up in the
|
||||
application. Call `python3 manage.py compilemessages` to do this.
|
||||
The generated files don't get committed into git, since these are
|
||||
derived artifacts. The build pipeline takes care of executing this
|
||||
command.
|
||||
|
||||
Adding new languages requires adding the translated files in the
|
||||
`src/locale/`-folder and adjusting the file
|
||||
`src/paperless/settings.py` to include the new language:
|
||||
|
||||
```python
|
||||
LANGUAGES = [
|
||||
("en-us", _("English (US)")),
|
||||
("en-gb", _("English (GB)")),
|
||||
("de", _("German")),
|
||||
("nl-nl", _("Dutch")),
|
||||
("fr", _("French")),
|
||||
("pt-br", _("Portuguese (Brazil)")),
|
||||
# Add language here.
|
||||
]
|
||||
```
|
||||
|
||||
## Building the documentation
|
||||
|
||||
The documentation is built using material-mkdocs, see their [documentation](https://squidfunk.github.io/mkdocs-material/reference/).
|
||||
If you want to build the documentation locally, this is how you do it:
|
||||
|
||||
1. Have an active pipenv shell (`pipenv shell`) and install Python dependencies:
|
||||
|
||||
```bash
|
||||
$ pipenv install --dev
|
||||
```
|
||||
|
||||
2. Build the documentation
|
||||
|
||||
```bash
|
||||
$ mkdocs build --config-file mkdocs.yml
|
||||
```
|
||||
|
||||
_alternatively..._
|
||||
|
||||
3. Serve the documentation. This will spin up a
|
||||
copy of the documentation at http://127.0.0.1:8000
|
||||
that will automatically refresh everytime you change
|
||||
something.
|
||||
|
||||
```bash
|
||||
$ mkdocs serve
|
||||
```
|
||||
|
||||
## Building the Docker image
|
||||
|
||||
The docker image is primarily built by the GitHub actions workflow, but
|
||||
it can be faster when developing to build and tag an image locally.
|
||||
|
||||
To provide the build arguments automatically, build the image using the
|
||||
helper script `build-docker-image.sh`.
|
||||
|
||||
Building the docker image from source:
|
||||
|
||||
```bash
|
||||
./build-docker-image.sh Dockerfile -t <your-tag>
|
||||
```
|
||||
|
||||
## Extending Paperless-ngx
|
||||
|
||||
Paperless-ngx does not have any fancy plugin systems and will probably never
|
||||
have. However, some parts of the application have been designed to allow
|
||||
easy integration of additional features without any modification to the
|
||||
base code.
|
||||
|
||||
### Making custom parsers
|
||||
|
||||
Paperless-ngx uses parsers to add documents. A parser is
|
||||
responsible for:
|
||||
|
||||
- Retrieving the content from the original
|
||||
- Creating a thumbnail
|
||||
- _optional:_ Retrieving a created date from the original
|
||||
- _optional:_ Creainge an archived document from the original
|
||||
|
||||
Custom parsers can be added to Paperless-ngx to support more file types. In
|
||||
order to do that, you need to write the parser itself and announce its
|
||||
existence to Paperless-ngx.
|
||||
|
||||
The parser itself must extend `documents.parsers.DocumentParser` and
|
||||
must implement the methods `parse` and `get_thumbnail`. You can provide
|
||||
your own implementation to `get_date` if you don't want to rely on
|
||||
Paperless-ngx' default date guessing mechanisms.
|
||||
|
||||
```python
|
||||
class MyCustomParser(DocumentParser):
|
||||
|
||||
def parse(self, document_path, mime_type):
|
||||
# This method does not return anything. Rather, you should assign
|
||||
# whatever you got from the document to the following fields:
|
||||
|
||||
# The content of the document.
|
||||
self.text = "content"
|
||||
|
||||
# Optional: path to a PDF document that you created from the original.
|
||||
self.archive_path = os.path.join(self.tempdir, "archived.pdf")
|
||||
|
||||
# Optional: "created" date of the document.
|
||||
self.date = get_created_from_metadata(document_path)
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type):
|
||||
# This should return the path to a thumbnail you created for this
|
||||
# document.
|
||||
return os.path.join(self.tempdir, "thumb.webp")
|
||||
```
|
||||
|
||||
If you encounter any issues during parsing, raise a
|
||||
`documents.parsers.ParseError`.
|
||||
|
||||
The `self.tempdir` directory is a temporary directory that is guaranteed
|
||||
to be empty and removed after consumption finished. You can use that
|
||||
directory to store any intermediate files and also use it to store the
|
||||
thumbnail / archived document.
|
||||
|
||||
After that, you need to announce your parser to Paperless-ngx. You need to
|
||||
connect a handler to the `document_consumer_declaration` signal. Have a
|
||||
look in the file `src/paperless_tesseract/apps.py` on how that's done.
|
||||
The handler is a method that returns information about your parser:
|
||||
|
||||
```python
|
||||
def myparser_consumer_declaration(sender, **kwargs):
|
||||
return {
|
||||
"parser": MyCustomParser,
|
||||
"weight": 0,
|
||||
"mime_types": {
|
||||
"application/pdf": ".pdf",
|
||||
"image/jpeg": ".jpg",
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- `parser` is a reference to a class that extends `DocumentParser`.
|
||||
- `weight` is used whenever two or more parsers are able to parse a
|
||||
file: The parser with the higher weight wins. This can be used to
|
||||
override the parsers provided by Paperless-ngx.
|
||||
- `mime_types` is a dictionary. The keys are the mime types your
|
||||
parser supports and the value is the default file extension that
|
||||
Paperless-ngx should use when storing files and serving them for
|
||||
download. We could guess that from the file extensions, but some
|
||||
mime types have many extensions associated with them and the Python
|
||||
methods responsible for guessing the extension do not always return
|
||||
the same value.
|
@@ -1,431 +0,0 @@
|
||||
.. _extending:
|
||||
|
||||
Paperless-ngx Development
|
||||
#########################
|
||||
|
||||
This section describes the steps you need to take to start development on paperless-ngx.
|
||||
|
||||
Check out the source from github. The repository is organized in the following way:
|
||||
|
||||
* ``main`` always represents the latest release and will only see changes
|
||||
when a new release is made.
|
||||
* ``dev`` contains the code that will be in the next release.
|
||||
* ``feature-X`` contain bigger changes that will be in some release, but not
|
||||
necessarily the next one.
|
||||
|
||||
When making functional changes to paperless, *always* make your changes on the ``dev`` branch.
|
||||
|
||||
Apart from that, the folder structure is as follows:
|
||||
|
||||
* ``docs/`` - Documentation.
|
||||
* ``src-ui/`` - Code of the front end.
|
||||
* ``src/`` - Code of the back end.
|
||||
* ``scripts/`` - Various scripts that help with different parts of development.
|
||||
* ``docker/`` - Files required to build the docker image.
|
||||
|
||||
Contributing to Paperless
|
||||
=========================
|
||||
|
||||
Maybe you've been using Paperless for a while and want to add a feature or two,
|
||||
or maybe you've come across a bug that you have some ideas how to solve. The
|
||||
beauty of open source software is that you can see what's wrong and help to get
|
||||
it fixed for everyone!
|
||||
|
||||
Before contributing please review our `code of conduct`_ and other important
|
||||
information in the `contributing guidelines`_.
|
||||
|
||||
.. _code-formatting-with-pre-commit-hooks:
|
||||
|
||||
Code formatting with pre-commit Hooks
|
||||
=====================================
|
||||
|
||||
To ensure a consistent style and formatting across the project source, the project
|
||||
utilizes a Git `pre-commit` hook to perform some formatting and linting before a
|
||||
commit is allowed. That way, everyone uses the same style and some common issues
|
||||
can be caught early on. See below for installation instructions.
|
||||
|
||||
Once installed, hooks will run when you commit. If the formatting isn't quite right
|
||||
or a linter catches something, the commit will be rejected. You'll need to look at the
|
||||
output and fix the issue. Some hooks, such as the Python formatting tool `black`,
|
||||
will format failing files, so all you need to do is `git add` those files again and
|
||||
retry your commit.
|
||||
|
||||
Initial setup and first start
|
||||
=============================
|
||||
|
||||
After you forked and cloned the code from github you need to perform a first-time setup.
|
||||
To do the setup you need to perform the steps from the following chapters in a certain order:
|
||||
|
||||
1. Install prerequisites + pipenv as mentioned in :ref:`Bare metal route <setup-bare_metal>`
|
||||
2. Copy ``paperless.conf.example`` to ``paperless.conf`` and enable debug mode.
|
||||
3. Install the Angular CLI interface:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ npm install -g @angular/cli
|
||||
|
||||
4. Install pre-commit
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
pre-commit install
|
||||
|
||||
5. Create ``consume`` and ``media`` folders in the cloned root folder.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
mkdir -p consume media
|
||||
|
||||
6. You can now either ...
|
||||
|
||||
* install redis or
|
||||
* use the included scripts/start-services.sh to use docker to fire up a redis instance (and some other services such as tika, gotenberg and a database server) or
|
||||
* spin up a bare redis container
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
docker run -d -p 6379:6379 --restart unless-stopped redis:latest
|
||||
|
||||
7. Install the python dependencies by performing in the src/ directory.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
pipenv install --dev
|
||||
|
||||
* Make sure you're using python 3.9.x or lower. Otherwise you might get issues with building dependencies. You can use `pyenv <https://github.com/pyenv/pyenv>`_ to install a specific python version.
|
||||
|
||||
8. Generate the static UI so you can perform a login to get session that is required for frontend development (this needs to be done one time only). From src-ui directory:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
npm install .
|
||||
./node_modules/.bin/ng build --configuration production
|
||||
|
||||
9. Apply migrations and create a superuser for your dev instance:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
python3 manage.py migrate
|
||||
python3 manage.py createsuperuser
|
||||
|
||||
10. Now spin up the dev backend. Depending on which part of paperless you're developing for, you need to have some or all of them running.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
python3 manage.py runserver & python3 manage.py document_consumer & celery --app paperless worker
|
||||
|
||||
11. Login with the superuser credentials provided in step 8 at ``http://localhost:8000`` to create a session that enables you to use the backend.
|
||||
|
||||
Backend development environment is now ready, to start Frontend development go to ``/src-ui`` and run ``ng serve``. From there you can use ``http://localhost:4200`` for a preview.
|
||||
|
||||
Back end development
|
||||
====================
|
||||
|
||||
The backend is a django application. PyCharm works well for development, but you can use whatever
|
||||
you want.
|
||||
|
||||
Configure the IDE to use the src/ folder as the base source folder. Configure the following
|
||||
launch configurations in your IDE:
|
||||
|
||||
* python3 manage.py runserver
|
||||
* celery --app paperless worker
|
||||
* python3 manage.py document_consumer
|
||||
|
||||
To start them all:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
python3 manage.py runserver & python3 manage.py document_consumer & celery --app paperless worker
|
||||
|
||||
Testing and code style:
|
||||
|
||||
* Run ``pytest`` in the src/ directory to execute all tests. This also generates a HTML coverage
|
||||
report. When runnings test, paperless.conf is loaded as well. However: the tests rely on the default
|
||||
configuration. This is not ideal. But for now, make sure no settings except for DEBUG are overridden when testing.
|
||||
* Coding style is enforced by the Git pre-commit hooks. These will ensure your code is formatted and do some
|
||||
linting when you do a `git commit`.
|
||||
* You can also run ``black`` manually to format your code
|
||||
|
||||
.. note::
|
||||
|
||||
The line length rule E501 is generally useful for getting multiple source files
|
||||
next to each other on the screen. However, in some cases, its just not possible
|
||||
to make some lines fit, especially complicated IF cases. Append ``# NOQA: E501``
|
||||
to disable this check for certain lines.
|
||||
|
||||
Front end development
|
||||
=====================
|
||||
|
||||
The front end is built using Angular. In order to get started, you need ``npm``.
|
||||
Install the Angular CLI interface with
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ npm install -g @angular/cli
|
||||
|
||||
and make sure that it's on your path. Next, in the src-ui/ directory, install the
|
||||
required dependencies of the project.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ npm install
|
||||
|
||||
You can launch a development server by running
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ ng serve
|
||||
|
||||
This will automatically update whenever you save. However, in-place compilation might fail
|
||||
on syntax errors, in which case you need to restart it.
|
||||
|
||||
By default, the development server is available on ``http://localhost:4200/`` and is configured
|
||||
to access the API at ``http://localhost:8000/api/``, which is the default of the backend.
|
||||
If you enabled DEBUG on the back end, several security overrides for allowed hosts, CORS and
|
||||
X-Frame-Options are in place so that the front end behaves exactly as in production. This also
|
||||
relies on you being logged into the back end. Without a valid session, The front end will simply
|
||||
not work.
|
||||
|
||||
Testing and code style:
|
||||
|
||||
* The frontend code (.ts, .html, .scss) use ``prettier`` for code formatting via the Git
|
||||
``pre-commit`` hooks which run automatically on commit. See
|
||||
:ref:`above <code-formatting-with-pre-commit-hooks>` for installation. You can also run this
|
||||
via cli with a command such as
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ git ls-files -- '*.ts' | xargs pre-commit run prettier --files
|
||||
|
||||
* Frontend testing uses jest and cypress. There is currently a need for significantly more
|
||||
frontend tests. Unit tests and e2e tests, respectively, can be run non-interactively with:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ ng test
|
||||
$ npm run e2e:ci
|
||||
|
||||
Cypress also includes a UI which can be run from within the ``src-ui`` directory with
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ ./node_modules/.bin/cypress open
|
||||
|
||||
In order to build the front end and serve it as part of django, execute
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ ng build --prod
|
||||
|
||||
This will build the front end and put it in a location from which the Django server will serve
|
||||
it as static content. This way, you can verify that authentication is working.
|
||||
|
||||
|
||||
Localization
|
||||
============
|
||||
|
||||
Paperless is available in many different languages. Since paperless consists both of a django
|
||||
application and an Angular front end, both these parts have to be translated separately.
|
||||
|
||||
Front end localization
|
||||
----------------------
|
||||
|
||||
* The Angular front end does localization according to the `Angular documentation <https://angular.io/guide/i18n>`_.
|
||||
* The source language of the project is "en_US".
|
||||
* The source strings end up in the file "src-ui/messages.xlf".
|
||||
* The translated strings need to be placed in the "src-ui/src/locale/" folder.
|
||||
* In order to extract added or changed strings from the source files, call ``ng xi18n --ivy``.
|
||||
|
||||
Adding new languages requires adding the translated files in the "src-ui/src/locale/" folder and adjusting a couple files.
|
||||
|
||||
1. Adjust "src-ui/angular.json":
|
||||
|
||||
.. code:: json
|
||||
|
||||
"i18n": {
|
||||
"sourceLocale": "en-US",
|
||||
"locales": {
|
||||
"de": "src/locale/messages.de.xlf",
|
||||
"nl-NL": "src/locale/messages.nl_NL.xlf",
|
||||
"fr": "src/locale/messages.fr.xlf",
|
||||
"en-GB": "src/locale/messages.en_GB.xlf",
|
||||
"pt-BR": "src/locale/messages.pt_BR.xlf",
|
||||
"language-code": "language-file"
|
||||
}
|
||||
}
|
||||
|
||||
2. Add the language to the available options in "src-ui/src/app/services/settings.service.ts":
|
||||
|
||||
.. code:: typescript
|
||||
|
||||
getLanguageOptions(): LanguageOption[] {
|
||||
return [
|
||||
{code: "en-us", name: $localize`English (US)`, englishName: "English (US)", dateInputFormat: "mm/dd/yyyy"},
|
||||
{code: "en-gb", name: $localize`English (GB)`, englishName: "English (GB)", dateInputFormat: "dd/mm/yyyy"},
|
||||
{code: "de", name: $localize`German`, englishName: "German", dateInputFormat: "dd.mm.yyyy"},
|
||||
{code: "nl", name: $localize`Dutch`, englishName: "Dutch", dateInputFormat: "dd-mm-yyyy"},
|
||||
{code: "fr", name: $localize`French`, englishName: "French", dateInputFormat: "dd/mm/yyyy"},
|
||||
{code: "pt-br", name: $localize`Portuguese (Brazil)`, englishName: "Portuguese (Brazil)", dateInputFormat: "dd/mm/yyyy"}
|
||||
// Add your new language here
|
||||
]
|
||||
}
|
||||
|
||||
``dateInputFormat`` is a special string that defines the behavior of the date input fields and absolutely needs to contain "dd", "mm" and "yyyy".
|
||||
|
||||
3. Import and register the Angular data for this locale in "src-ui/src/app/app.module.ts":
|
||||
|
||||
.. code:: typescript
|
||||
|
||||
import localeDe from '@angular/common/locales/de';
|
||||
registerLocaleData(localeDe)
|
||||
|
||||
Back end localization
|
||||
---------------------
|
||||
|
||||
A majority of the strings that appear in the back end appear only when the admin is used. However,
|
||||
some of these are still shown on the front end (such as error messages).
|
||||
|
||||
* The django application does localization according to the `django documentation <https://docs.djangoproject.com/en/3.1/topics/i18n/translation/>`_.
|
||||
* The source language of the project is "en_US".
|
||||
* Localization files end up in the folder "src/locale/".
|
||||
* In order to extract strings from the application, call ``python3 manage.py makemessages -l en_US``. This is important after making changes to translatable strings.
|
||||
* The message files need to be compiled for them to show up in the application. Call ``python3 manage.py compilemessages`` to do this. The generated files don't get
|
||||
committed into git, since these are derived artifacts. The build pipeline takes care of executing this command.
|
||||
|
||||
Adding new languages requires adding the translated files in the "src/locale/" folder and adjusting the file "src/paperless/settings.py" to include the new language:
|
||||
|
||||
.. code:: python
|
||||
|
||||
LANGUAGES = [
|
||||
("en-us", _("English (US)")),
|
||||
("en-gb", _("English (GB)")),
|
||||
("de", _("German")),
|
||||
("nl-nl", _("Dutch")),
|
||||
("fr", _("French")),
|
||||
("pt-br", _("Portuguese (Brazil)")),
|
||||
# Add language here.
|
||||
]
|
||||
|
||||
|
||||
Building the documentation
|
||||
==========================
|
||||
|
||||
The documentation is built using sphinx. I've configured ReadTheDocs to automatically build
|
||||
the documentation when changes are pushed. If you want to build the documentation locally,
|
||||
this is how you do it:
|
||||
|
||||
1. Install python dependencies.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless
|
||||
$ pipenv install --dev
|
||||
|
||||
2. Build the documentation
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless/docs
|
||||
$ pipenv run make clean html
|
||||
|
||||
This will build the HTML documentation, and put the resulting files in the ``_build/html``
|
||||
directory.
|
||||
|
||||
Building the Docker image
|
||||
=========================
|
||||
|
||||
The docker image is primarily built by the GitHub actions workflow, but it can be
|
||||
faster when developing to build and tag an image locally.
|
||||
|
||||
To provide the build arguments automatically, build the image using the helper
|
||||
script ``build-docker-image.sh``.
|
||||
|
||||
Building the docker image from source:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
./build-docker-image.sh Dockerfile -t <your-tag>
|
||||
|
||||
Extending Paperless
|
||||
===================
|
||||
|
||||
Paperless does not have any fancy plugin systems and will probably never have. However,
|
||||
some parts of the application have been designed to allow easy integration of additional
|
||||
features without any modification to the base code.
|
||||
|
||||
Making custom parsers
|
||||
---------------------
|
||||
|
||||
Paperless uses parsers to add documents to paperless. A parser is responsible for:
|
||||
|
||||
* Retrieve the content from the original
|
||||
* Create a thumbnail
|
||||
* Optional: Retrieve a created date from the original
|
||||
* Optional: Create an archived document from the original
|
||||
|
||||
Custom parsers can be added to paperless to support more file types. In order to do that,
|
||||
you need to write the parser itself and announce its existence to paperless.
|
||||
|
||||
The parser itself must extend ``documents.parsers.DocumentParser`` and must implement the
|
||||
methods ``parse`` and ``get_thumbnail``. You can provide your own implementation to
|
||||
``get_date`` if you don't want to rely on paperless' default date guessing mechanisms.
|
||||
|
||||
.. code:: python
|
||||
|
||||
class MyCustomParser(DocumentParser):
|
||||
|
||||
def parse(self, document_path, mime_type):
|
||||
# This method does not return anything. Rather, you should assign
|
||||
# whatever you got from the document to the following fields:
|
||||
|
||||
# The content of the document.
|
||||
self.text = "content"
|
||||
|
||||
# Optional: path to a PDF document that you created from the original.
|
||||
self.archive_path = os.path.join(self.tempdir, "archived.pdf")
|
||||
|
||||
# Optional: "created" date of the document.
|
||||
self.date = get_created_from_metadata(document_path)
|
||||
|
||||
def get_thumbnail(self, document_path, mime_type):
|
||||
# This should return the path to a thumbnail you created for this
|
||||
# document.
|
||||
return os.path.join(self.tempdir, "thumb.png")
|
||||
|
||||
If you encounter any issues during parsing, raise a ``documents.parsers.ParseError``.
|
||||
|
||||
The ``self.tempdir`` directory is a temporary directory that is guaranteed to be empty
|
||||
and removed after consumption finished. You can use that directory to store any
|
||||
intermediate files and also use it to store the thumbnail / archived document.
|
||||
|
||||
After that, you need to announce your parser to paperless. You need to connect a
|
||||
handler to the ``document_consumer_declaration`` signal. Have a look in the file
|
||||
``src/paperless_tesseract/apps.py`` on how that's done. The handler is a method
|
||||
that returns information about your parser:
|
||||
|
||||
.. code:: python
|
||||
|
||||
def myparser_consumer_declaration(sender, **kwargs):
|
||||
return {
|
||||
"parser": MyCustomParser,
|
||||
"weight": 0,
|
||||
"mime_types": {
|
||||
"application/pdf": ".pdf",
|
||||
"image/jpeg": ".jpg",
|
||||
}
|
||||
}
|
||||
|
||||
* ``parser`` is a reference to a class that extends ``DocumentParser``.
|
||||
|
||||
* ``weight`` is used whenever two or more parsers are able to parse a file: The parser with
|
||||
the higher weight wins. This can be used to override the parsers provided by
|
||||
paperless.
|
||||
|
||||
* ``mime_types`` is a dictionary. The keys are the mime types your parser supports and the value
|
||||
is the default file extension that paperless should use when storing files and serving them for
|
||||
download. We could guess that from the file extensions, but some mime types have many extensions
|
||||
associated with them and the python methods responsible for guessing the extension do not always
|
||||
return the same value.
|
||||
|
||||
.. _code of conduct: https://github.com/paperless-ngx/paperless-ngx/blob/main/CODE_OF_CONDUCT.md
|
||||
.. _contributing guidelines: https://github.com/paperless-ngx/paperless-ngx/blob/main/CONTRIBUTING.md
|
123
docs/faq.md
Normal file
@@ -0,0 +1,123 @@
|
||||
# Frequently Asked Questions
|
||||
|
||||
## _What's the general plan for Paperless-ngx?_
|
||||
|
||||
**A:** While Paperless-ngx is already considered largely
|
||||
"feature-complete" it is a community-driven project and development
|
||||
will be guided in this way. New features can be submitted via GitHub
|
||||
discussions and "up-voted" by the community but this is not a
|
||||
guarantee the feature will be implemented. This project will always be
|
||||
open to collaboration in the form of PRs, ideas etc.
|
||||
|
||||
## _I'm using docker. Where are my documents?_
|
||||
|
||||
**A:** Your documents are stored inside the docker volume
|
||||
`paperless_media`. Docker manages this volume automatically for you. It
|
||||
is a persistent storage and will persist as long as you don't
|
||||
explicitly delete it. The actual location depends on your host operating
|
||||
system. On Linux, chances are high that this location is
|
||||
|
||||
```
|
||||
/var/lib/docker/volumes/paperless_media/_data
|
||||
```
|
||||
|
||||
!!! warning
|
||||
|
||||
Do not mess with this folder. Don't change permissions and don't move
|
||||
files around manually. This folder is meant to be entirely managed by
|
||||
docker and paperless.
|
||||
|
||||
## Let's say I want to switch tools in a year. Can I easily move to other systems?
|
||||
|
||||
**A:** Your documents are stored as plain files inside the media folder.
|
||||
You can always drag those files out of that folder to use them
|
||||
elsewhere. Here are a couple notes about that.
|
||||
|
||||
- Paperless-ngx never modifies your original documents. It keeps
|
||||
checksums of all documents and uses a scheduled sanity checker to
|
||||
check that they remain the same.
|
||||
- By default, paperless uses the internal ID of each document as its
|
||||
filename. This might not be very convenient for export. However, you
|
||||
can adjust the way files are stored in paperless by
|
||||
[configuring the filename format](/advanced_usage#file-name-handling).
|
||||
- [The exporter](/administration#exporter) is
|
||||
another easy way to get your files out of paperless with reasonable
|
||||
file names.
|
||||
|
||||
## _What file types does paperless-ngx support?_
|
||||
|
||||
**A:** Currently, the following files are supported:
|
||||
|
||||
- PDF documents, PNG images, JPEG images, TIFF images, GIF images and
|
||||
WebP images are processed with OCR and converted into PDF documents.
|
||||
- Plain text documents are supported as well and are added verbatim to
|
||||
paperless.
|
||||
- With the optional Tika integration enabled (see [Tika configuration](/configuration#tika),
|
||||
Paperless also supports various Office documents (.docx, .doc, odt,
|
||||
.ppt, .pptx, .odp, .xls, .xlsx, .ods).
|
||||
|
||||
Paperless-ngx determines the type of a file by inspecting its content.
|
||||
The file extensions do not matter.
|
||||
|
||||
## _Will paperless-ngx run on Raspberry Pi?_
|
||||
|
||||
**A:** The short answer is yes. I've tested it on a Raspberry Pi 3 B.
|
||||
The long answer is that certain parts of Paperless will run very slow,
|
||||
such as the OCR. On Raspberry Pi, try to OCR documents before feeding
|
||||
them into paperless so that paperless can reuse the text. The web
|
||||
interface is a lot snappier, since it runs in your browser and paperless
|
||||
has to do much less work to serve the data.
|
||||
|
||||
!!! note
|
||||
|
||||
You can adjust some of the settings so that paperless uses less
|
||||
processing power. See [setup](/setup#less-powerful-devices) for details.
|
||||
|
||||
## _How do I install paperless-ngx on Raspberry Pi?_
|
||||
|
||||
**A:** Docker images are available for armv7 and arm64 hardware, so just
|
||||
follow the docker-compose instructions. Apart from more required disk
|
||||
space compared to a bare metal installation, docker comes with close to
|
||||
zero overhead, even on Raspberry Pi.
|
||||
|
||||
If you decide to got with the bare metal route, be aware that some of
|
||||
the python requirements do not have precompiled packages for ARM /
|
||||
ARM64. Installation of these will require additional development
|
||||
libraries and compilation will take a long time.
|
||||
|
||||
## _How do I run this on Unraid?_
|
||||
|
||||
**A:** Paperless-ngx is available as [community
|
||||
app](https://unraid.net/community/apps?q=paperless-ngx) in Unraid. [Uli
|
||||
Fahrer](https://github.com/Tooa) created a container template for that.
|
||||
|
||||
## _How do I run this on my toaster?_
|
||||
|
||||
**A:** I honestly don't know! As for all other devices that might be
|
||||
able to run paperless, you're a bit on your own. If you can't run the
|
||||
docker image, the documentation has instructions for bare metal
|
||||
installs. I'm running paperless on an i3 processor from 2015 or so.
|
||||
This is also what I use to test new releases with. Apart from that, I
|
||||
also have a Raspberry Pi, which I occasionally build the image on and
|
||||
see if it works.
|
||||
|
||||
## _How do I proxy this with NGINX?_
|
||||
|
||||
**A:** See [here](/setup#nginx).
|
||||
|
||||
## _How do I get WebSocket support with Apache mod_wsgi_?
|
||||
|
||||
**A:** `mod_wsgi` by itself does not support ASGI. Paperless will
|
||||
continue to work with WSGI, but certain features such as status
|
||||
notifications about document consumption won't be available.
|
||||
|
||||
If you want to continue using `mod_wsgi`, you will have to run an
|
||||
ASGI-enabled web server as well that processes WebSocket connections,
|
||||
and configure Apache to redirect WebSocket connections to this server.
|
||||
Multiple options for ASGI servers exist:
|
||||
|
||||
- `gunicorn` with `uvicorn` as the worker implementation (the default
|
||||
of paperless)
|
||||
- `daphne` as a standalone server, which is the reference
|
||||
implementation for ASGI.
|
||||
- `uvicorn` as a standalone server
|
117
docs/faq.rst
@@ -1,117 +0,0 @@
|
||||
|
||||
**************************
|
||||
Frequently asked questions
|
||||
**************************
|
||||
|
||||
**Q:** *What's the general plan for Paperless-ngx?*
|
||||
|
||||
**A:** While Paperless-ngx is already considered largely "feature-complete" it is a community-driven
|
||||
project and development will be guided in this way. New features can be submitted via
|
||||
GitHub discussions and "up-voted" by the community but this is not a guarantee the feature
|
||||
will be implemented. This project will always be open to collaboration in the form of PRs,
|
||||
ideas etc.
|
||||
|
||||
**Q:** *I'm using docker. Where are my documents?*
|
||||
|
||||
**A:** Your documents are stored inside the docker volume ``paperless_media``.
|
||||
Docker manages this volume automatically for you. It is a persistent storage
|
||||
and will persist as long as you don't explicitly delete it. The actual location
|
||||
depends on your host operating system. On Linux, chances are high that this location
|
||||
is
|
||||
|
||||
.. code::
|
||||
|
||||
/var/lib/docker/volumes/paperless_media/_data
|
||||
|
||||
.. caution::
|
||||
|
||||
Do not mess with this folder. Don't change permissions and don't move
|
||||
files around manually. This folder is meant to be entirely managed by docker
|
||||
and paperless.
|
||||
|
||||
**Q:** *Let's say I want to switch tools in a year. Can I easily move to other systems?*
|
||||
|
||||
**A:** Your documents are stored as plain files inside the media folder. You can always drag those files
|
||||
out of that folder to use them elsewhere. Here are a couple notes about that.
|
||||
|
||||
* Paperless-ngx never modifies your original documents. It keeps checksums of all documents and uses a
|
||||
scheduled sanity checker to check that they remain the same.
|
||||
* By default, paperless uses the internal ID of each document as its filename. This might not be very
|
||||
convenient for export. However, you can adjust the way files are stored in paperless by
|
||||
:ref:`configuring the filename format <advanced-file_name_handling>`.
|
||||
* :ref:`The exporter <utilities-exporter>` is another easy way to get your files out of paperless with reasonable file names.
|
||||
|
||||
**Q:** *What file types does paperless-ngx support?*
|
||||
|
||||
**A:** Currently, the following files are supported:
|
||||
|
||||
* PDF documents, PNG images, JPEG images, TIFF images and GIF images are processed with OCR and converted into PDF documents.
|
||||
* Plain text documents are supported as well and are added verbatim
|
||||
to paperless.
|
||||
* With the optional Tika integration enabled (see :ref:`Configuration <configuration-tika>`), Paperless also supports various
|
||||
Office documents (.docx, .doc, odt, .ppt, .pptx, .odp, .xls, .xlsx, .ods).
|
||||
|
||||
Paperless-ngx determines the type of a file by inspecting its content. The
|
||||
file extensions do not matter.
|
||||
|
||||
**Q:** *Will paperless-ngx run on Raspberry Pi?*
|
||||
|
||||
**A:** The short answer is yes. I've tested it on a Raspberry Pi 3 B.
|
||||
The long answer is that certain parts of
|
||||
Paperless will run very slow, such as the OCR. On Raspberry Pi,
|
||||
try to OCR documents before feeding them into paperless so that paperless can
|
||||
reuse the text. The web interface is a lot snappier, since it runs
|
||||
in your browser and paperless has to do much less work to serve the data.
|
||||
|
||||
.. note::
|
||||
|
||||
You can adjust some of the settings so that paperless uses less processing
|
||||
power. See :ref:`setup-less_powerful_devices` for details.
|
||||
|
||||
|
||||
**Q:** *How do I install paperless-ngx on Raspberry Pi?*
|
||||
|
||||
**A:** Docker images are available for arm and arm64 hardware, so just follow
|
||||
the docker-compose instructions. Apart from more required disk space compared to
|
||||
a bare metal installation, docker comes with close to zero overhead, even on
|
||||
Raspberry Pi.
|
||||
|
||||
If you decide to got with the bare metal route, be aware that some of the
|
||||
python requirements do not have precompiled packages for ARM / ARM64. Installation
|
||||
of these will require additional development libraries and compilation will take
|
||||
a long time.
|
||||
|
||||
**Q:** *How do I run this on Unraid?*
|
||||
|
||||
**A:** Paperless-ngx is available as `community app <https://unraid.net/community/apps?q=paperless-ngx>`_
|
||||
in Unraid. `Uli Fahrer <https://github.com/Tooa>`_ created a container template for that.
|
||||
|
||||
**Q:** *How do I run this on my toaster?*
|
||||
|
||||
**A:** I honestly don't know! As for all other devices that might be able
|
||||
to run paperless, you're a bit on your own. If you can't run the docker image,
|
||||
the documentation has instructions for bare metal installs. I'm running
|
||||
paperless on an i3 processor from 2015 or so. This is also what I use to test
|
||||
new releases with. Apart from that, I also have a Raspberry Pi, which I
|
||||
occasionally build the image on and see if it works.
|
||||
|
||||
**Q:** *How do I proxy this with NGINX?*
|
||||
|
||||
**A:** See :ref:`here <setup-nginx>`.
|
||||
|
||||
.. _faq-mod_wsgi:
|
||||
|
||||
**Q:** *How do I get WebSocket support with Apache mod_wsgi*?
|
||||
|
||||
**A:** ``mod_wsgi`` by itself does not support ASGI. Paperless will continue
|
||||
to work with WSGI, but certain features such as status notifications about
|
||||
document consumption won't be available.
|
||||
|
||||
If you want to continue using ``mod_wsgi``, you will have to run an ASGI-enabled
|
||||
web server as well that processes WebSocket connections, and configure Apache to
|
||||
redirect WebSocket connections to this server. Multiple options for ASGI servers
|
||||
exist:
|
||||
|
||||
* ``gunicorn`` with ``uvicorn`` as the worker implementation (the default of paperless)
|
||||
* ``daphne`` as a standalone server, which is the reference implementation for ASGI.
|
||||
* ``uvicorn`` as a standalone server
|
138
docs/index.md
Normal file
@@ -0,0 +1,138 @@
|
||||
<div class="grid-left" markdown>
|
||||
{.index-logo}
|
||||
{.index-logo}
|
||||
|
||||
**Paperless-ngx** is a _community-supported_ open-source document management system that transforms your
|
||||
physical documents into a searchable online archive so you can keep, well, _less paper_.
|
||||
|
||||
[Get started](/setup){ .md-button .md-button--primary .index-callout }
|
||||
[Demo](https://demo.paperless-ngx.com){ .md-button .md-button--secondary target=\_blank }
|
||||
|
||||
</div>
|
||||
<div class="grid-right" markdown>
|
||||
{.index-screenshot}
|
||||
{.index-screenshot}
|
||||
</div>
|
||||
<div class="clear"></div>
|
||||
|
||||
## Why This Exists
|
||||
|
||||
Paper is a nightmare. Environmental issues aside, there's no excuse for
|
||||
it in the 21st century. It takes up space, collects dust, doesn't
|
||||
support any form of a search feature, indexing is tedious, it's heavy
|
||||
and prone to damage & loss.
|
||||
|
||||
This software is designed to make "going paperless" easier. No more worrying
|
||||
about finding stuff again, feed documents right from the post box into
|
||||
the scanner and then shred them. Perhaps you might find it useful too.
|
||||
|
||||
## Paperless, a history
|
||||
|
||||
Paperless is a simple Django application running in two parts: a
|
||||
_Consumer_ (the thing that does the indexing) and the _Web server_ (the
|
||||
part that lets you search & download already-indexed documents). If you
|
||||
want to learn more about its functions keep on reading after the
|
||||
installation section.
|
||||
|
||||
Paperless-ngx is a document management system that transforms your
|
||||
physical documents into a searchable online archive so you can keep,
|
||||
well, _less paper_.
|
||||
|
||||
Paperless-ngx forked from paperless-ng to continue the great work and
|
||||
distribute responsibility of supporting and advancing the project among
|
||||
a team of people.
|
||||
|
||||
NG stands for both Angular (the framework used for the Frontend) and
|
||||
next-gen. Publishing this project under a different name also avoids
|
||||
confusion between paperless and paperless-ngx.
|
||||
|
||||
If you want to learn about what's different in paperless-ngx from
|
||||
Paperless, check out these resources in the documentation:
|
||||
|
||||
- [Some screenshots](#screenshots) of the new UI are available.
|
||||
- Read [this section](/advanced_usage#automatic-matching) if you want to learn about how paperless automates all
|
||||
tagging using machine learning.
|
||||
- Paperless now comes with a [proper email consumer](/usage#usage-email) that's fully tested and production ready.
|
||||
- Paperless creates searchable PDF/A documents from whatever you put into the consumption directory. This means
|
||||
that you can select text in image-only documents coming from your scanner.
|
||||
- See [this note](/administration#encryption) about GnuPG encryption in paperless-ngx.
|
||||
- Paperless is now integrated with a
|
||||
[task processing queue](/setup#task_processor) that tells you at a glance when and why something is not working.
|
||||
- The [changelog](/changelog) contains a detailed list of all changes in paperless-ngx.
|
||||
|
||||
## Screenshots
|
||||
|
||||
This is what Paperless-ngx looks like.
|
||||
|
||||
The dashboard shows customizable views on your document and allows
|
||||
document uploads:
|
||||
|
||||
[](assets/screenshots/dashboard.png)
|
||||
|
||||
The document list provides three different styles to scroll through your
|
||||
documents:
|
||||
|
||||
[](assets/screenshots/documents-table.png)
|
||||
|
||||
[](assets/screenshots/documents-smallcards.png)
|
||||
|
||||
[](assets/screenshots/documents-largecards.png)
|
||||
|
||||
Paperless-ngx also supports dark mode:
|
||||
|
||||
[](assets/screenshots/documents-smallcards-dark.png)
|
||||
|
||||
Extensive filtering mechanisms:
|
||||
|
||||
[](assets/screenshots/documents-filter.png)
|
||||
|
||||
Bulk editing of document tags, correspondents, etc.:
|
||||
|
||||
[](assets/screenshots/bulk-edit.png)
|
||||
|
||||
Side-by-side editing of documents:
|
||||
|
||||
[](assets/screenshots/editing.png)
|
||||
|
||||
Tag editing. This looks about the same for correspondents and document
|
||||
types.
|
||||
|
||||
[](assets/screenshots/new-tag.png)
|
||||
|
||||
Searching provides auto complete and highlights the results.
|
||||
|
||||
[](assets/screenshots/search-preview.png)
|
||||
|
||||
[](assets/screenshots/search-results.png)
|
||||
|
||||
Fancy mail filters!
|
||||
|
||||
[](assets/screenshots/mail-rules-edited.png)
|
||||
|
||||
Mobile devices are supported.
|
||||
|
||||
[](assets/screenshots/mobile.png)
|
||||
|
||||
## Support
|
||||
|
||||
Community support is available via [GitHub Discussions](https://github.com/paperless-ngx/paperless-ngx/discussions/) and [the Matrix chat room](https://matrix.to/#/#paperless:matrix.org).
|
||||
|
||||
### Feature Requests
|
||||
|
||||
Feature requests can be submitted via [GitHub Discussions](https://github.com/paperless-ngx/paperless-ngx/discussions/categories/feature-requests) where you can search for existing ideas, add your own and vote for the ones you care about.
|
||||
|
||||
### Bugs
|
||||
|
||||
For bugs please [open an issue](https://github.com/paperless-ngx/paperless-ngx/issues) or [start a discussion](https://github.com/paperless-ngx/paperless-ngx/discussions/categories/support) if you have questions.
|
||||
|
||||
## Contributing
|
||||
|
||||
People interested in continuing the work on paperless-ngx are encouraged to reach out on [GitHub](https://github.com/paperless-ngx/paperless-ngx) or [the Matrix chat room](https://matrix.to/#/#paperless:matrix.org). If you would like to contribute to the project on an ongoing basis there are multiple teams (frontend, ci/cd, etc) that could use your help so please reach out!
|
||||
|
||||
### Translation
|
||||
|
||||
Paperless-ngx is available in many languages that are coordinated on [Crowdin](https://crwd.in/paperless-ngx). If you want to help out by translating paperless-ngx into your language, please head over to https://crwd.in/paperless-ngx, and thank you!
|
||||
|
||||
## Scanners & Software
|
||||
|
||||
Paperless-ngx is compatible with many different scanners and scanning tools. A user-maintained list of scanners and other software is available on [the wiki](https://github.com/paperless-ngx/paperless-ngx/wiki/Scanner-&-Software-Recommendations).
|
@@ -1,75 +0,0 @@
|
||||
*********
|
||||
Paperless
|
||||
*********
|
||||
|
||||
Paperless is a simple Django application running in two parts:
|
||||
a *Consumer* (the thing that does the indexing) and
|
||||
the *Web server* (the part that lets you search &
|
||||
download already-indexed documents). If you want to learn more about its
|
||||
functions keep on reading after the installation section.
|
||||
|
||||
|
||||
Why This Exists
|
||||
===============
|
||||
|
||||
Paper is a nightmare. Environmental issues aside, there's no excuse for it in
|
||||
the 21st century. It takes up space, collects dust, doesn't support any form
|
||||
of a search feature, indexing is tedious, it's heavy and prone to damage &
|
||||
loss.
|
||||
|
||||
I wrote this to make "going paperless" easier. I do not have to worry about
|
||||
finding stuff again. I feed documents right from the post box into the scanner
|
||||
and then shred them. Perhaps you might find it useful too.
|
||||
|
||||
|
||||
Paperless-ngx
|
||||
=============
|
||||
|
||||
Paperless-ngx is a document management system that transforms your physical
|
||||
documents into a searchable online archive so you can keep, well, *less paper*.
|
||||
|
||||
Paperless-ngx forked from paperless-ng to continue the great work and
|
||||
distribute responsibility of supporting and advancing the project among a team
|
||||
of people.
|
||||
|
||||
NG stands for both Angular (the framework used for the
|
||||
Frontend) and next-gen. Publishing this project under a different name also
|
||||
avoids confusion between paperless and paperless-ngx.
|
||||
|
||||
If you want to learn about what's different in paperless-ngx from Paperless, check out these
|
||||
resources in the documentation:
|
||||
|
||||
* :ref:`Some screenshots <screenshots>` of the new UI are available.
|
||||
* Read :ref:`this section <advanced-automatic_matching>` if you want to
|
||||
learn about how paperless automates all tagging using machine learning.
|
||||
* Paperless now comes with a :ref:`proper email consumer <usage-email>`
|
||||
that's fully tested and production ready.
|
||||
* Paperless creates searchable PDF/A documents from whatever you put into
|
||||
the consumption directory. This means that you can select text in
|
||||
image-only documents coming from your scanner.
|
||||
* See :ref:`this note <utilities-encyption>` about GnuPG encryption in
|
||||
paperless-ngx.
|
||||
* Paperless is now integrated with a
|
||||
:ref:`task processing queue <setup-task_processor>` that tells you
|
||||
at a glance when and why something is not working.
|
||||
* The :doc:`changelog </changelog>` contains a detailed list of all changes
|
||||
in paperless-ngx.
|
||||
|
||||
Contents
|
||||
========
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
setup
|
||||
usage_overview
|
||||
advanced_usage
|
||||
administration
|
||||
configuration
|
||||
api
|
||||
faq
|
||||
troubleshooting
|
||||
extending
|
||||
scanners
|
||||
screenshots
|
||||
changelog
|
@@ -1 +0,0 @@
|
||||
myst-parser==0.18.1
|
@@ -1,8 +0,0 @@
|
||||
|
||||
.. _scanners:
|
||||
|
||||
*******************
|
||||
Scanners & Software
|
||||
*******************
|
||||
|
||||
Paperless-ngx is compatible with many different scanners and scanning tools. A user-maintained list of scanners and other software is available on `the wiki <https://github.com/paperless-ngx/paperless-ngx/wiki/Scanner-&-Software-Recommendations>`_.
|
@@ -1,63 +0,0 @@
|
||||
.. _screenshots:
|
||||
|
||||
***********
|
||||
Screenshots
|
||||
***********
|
||||
|
||||
This is what Paperless-ngx looks like.
|
||||
|
||||
The dashboard shows customizable views on your document and allows document uploads:
|
||||
|
||||
.. image:: _static/screenshots/dashboard.png
|
||||
:target: _static/screenshots/dashboard.png
|
||||
|
||||
The document list provides three different styles to scroll through your documents:
|
||||
|
||||
.. image:: _static/screenshots/documents-table.png
|
||||
:target: _static/screenshots/documents-table.png
|
||||
.. image:: _static/screenshots/documents-smallcards.png
|
||||
:target: _static/screenshots/documents-smallcards.png
|
||||
.. image:: _static/screenshots/documents-largecards.png
|
||||
:target: _static/screenshots/documents-largecards.png
|
||||
|
||||
Paperless-ngx also supports "dark mode":
|
||||
|
||||
.. image:: _static/screenshots/documents-smallcards-dark.png
|
||||
:target: _static/screenshots/documents-smallcards-dark.png
|
||||
|
||||
Extensive filtering mechanisms:
|
||||
|
||||
.. image:: _static/screenshots/documents-filter.png
|
||||
:target: _static/screenshots/documents-filter.png
|
||||
|
||||
Bulk editing of document tags, correspondents, etc.:
|
||||
|
||||
.. image:: _static/screenshots/bulk-edit.png
|
||||
:target: _static/screenshots/bulk-edit.png
|
||||
|
||||
Side-by-side editing of documents:
|
||||
|
||||
.. image:: _static/screenshots/editing.png
|
||||
:target: _static/screenshots/editing.png
|
||||
|
||||
Tag editing. This looks about the same for correspondents and document types.
|
||||
|
||||
.. image:: _static/screenshots/new-tag.png
|
||||
:target: _static/screenshots/new-tag.png
|
||||
|
||||
Searching provides auto complete and highlights the results.
|
||||
|
||||
.. image:: _static/screenshots/search-preview.png
|
||||
:target: _static/screenshots/search-preview.png
|
||||
.. image:: _static/screenshots/search-results.png
|
||||
:target: _static/screenshots/search-results.png
|
||||
|
||||
Fancy mail filters!
|
||||
|
||||
.. image:: _static/screenshots/mail-rules-edited.png
|
||||
:target: _static/screenshots/mail-rules-edited.png
|
||||
|
||||
Mobile devices are supported.
|
||||
|
||||
.. image:: _static/screenshots/mobile.png
|
||||
:target: _static/screenshots/mobile.png
|
884
docs/setup.md
Normal file
@@ -0,0 +1,884 @@
|
||||
## Installation
|
||||
|
||||
You can go multiple routes to setup and run Paperless:
|
||||
|
||||
- [Use the easy install docker script](#docker_script)
|
||||
- [Pull the image from Docker Hub](#docker_hub)
|
||||
- [Build the Docker image yourself](#docker_build)
|
||||
- [Install Paperless directly on your system manually (bare metal)](#bare_metal)
|
||||
|
||||
The Docker routes are quick & easy. These are the recommended routes.
|
||||
This configures all the stuff from the above automatically so that it
|
||||
just works and uses sensible defaults for all configuration options.
|
||||
Here you find a cheat-sheet for docker beginners: [CLI
|
||||
Basics](https://www.sehn.tech/refs/devops-with-docker/)
|
||||
|
||||
The bare metal route is complicated to setup but makes it easier should
|
||||
you want to contribute some code back. You need to configure and run the
|
||||
above mentioned components yourself.
|
||||
|
||||
### Docker using the Installation Script {#docker_script}
|
||||
|
||||
Paperless provides an interactive installation script. This script will
|
||||
ask you for a couple configuration options, download and create the
|
||||
necessary configuration files, pull the docker image, start paperless
|
||||
and create your user account. This script essentially performs all the
|
||||
steps described in [Docker setup](#docker_hub) automatically.
|
||||
|
||||
1. Make sure that docker and docker-compose are installed.
|
||||
|
||||
2. Download and run the installation script:
|
||||
|
||||
```shell-session
|
||||
$ bash -c "$(curl -L https://raw.githubusercontent.com/paperless-ngx/paperless-ngx/main/install-paperless-ngx.sh)"
|
||||
```
|
||||
|
||||
### From GHCR / Docker Hub {#docker_hub}
|
||||
|
||||
1. Login with your user and create a folder in your home-directory to have a place for your
|
||||
configuration files and consumption directory.
|
||||
|
||||
```shell-session
|
||||
$ mkdir -v ~/paperless-ngx
|
||||
```
|
||||
|
||||
2. Go to the [/docker/compose directory on the project
|
||||
page](https://github.com/paperless-ngx/paperless-ngx/tree/master/docker/compose)
|
||||
and download one of the `docker-compose.*.yml` files,
|
||||
depending on which database backend you want to use. Rename this
|
||||
file to `docker-compose.yml`. If you want to enable
|
||||
optional support for Office documents, download a file with
|
||||
`-tika` in the file name. Download the
|
||||
`docker-compose.env` file and the `.env` file as well and store them
|
||||
in the same directory.
|
||||
|
||||
!!! tip
|
||||
|
||||
For new installations, it is recommended to use PostgreSQL as the
|
||||
database backend.
|
||||
|
||||
3. Install [Docker](https://www.docker.com/) and
|
||||
[docker-compose](https://docs.docker.com/compose/install/).
|
||||
|
||||
!!! warning
|
||||
|
||||
If you want to use the included `docker-compose.*.yml` file, you
|
||||
need to have at least Docker version **17.09.0** and docker-compose
|
||||
version **1.17.0**. To check do: `docker-compose -v` or `docker -v`
|
||||
|
||||
See the [Docker installation guide](https://docs.docker.com/engine/install/) on how to install the current
|
||||
version of Docker for your operating system or Linux distribution of
|
||||
choice. To get the latest version of docker-compose, follow the
|
||||
[docker-compose installation guide](https://docs.docker.com/compose/install/linux/) if your package repository
|
||||
doesn't include it.
|
||||
|
||||
4. Modify `docker-compose.yml` to your preferences. You may want to
|
||||
change the path to the consumption directory. Find the line that
|
||||
specifies where to mount the consumption directory:
|
||||
|
||||
```yaml
|
||||
- ./consume:/usr/src/paperless/consume
|
||||
```
|
||||
|
||||
Replace the part BEFORE the colon with a local directory of your
|
||||
choice:
|
||||
|
||||
```yaml
|
||||
- /home/jonaswinkler/paperless-inbox:/usr/src/paperless/consume
|
||||
```
|
||||
|
||||
Don't change the part after the colon or paperless wont find your
|
||||
documents.
|
||||
|
||||
You may also need to change the default port that the webserver will
|
||||
use from the default (8000):
|
||||
|
||||
```yaml
|
||||
ports:
|
||||
- 8000:8000
|
||||
```
|
||||
|
||||
Replace the part BEFORE the colon with a port of your choice:
|
||||
|
||||
```yaml
|
||||
ports:
|
||||
- 8010:8000
|
||||
```
|
||||
|
||||
Don't change the part after the colon or edit other lines that
|
||||
refer to port 8000. Modifying the part before the colon will map
|
||||
requests on another port to the webserver running on the default
|
||||
port.
|
||||
|
||||
**Rootless**
|
||||
|
||||
If you want to run Paperless as a rootless container, you will need
|
||||
to do the following in your `docker-compose.yml`:
|
||||
|
||||
- set the `user` running the container to map to the `paperless`
|
||||
user in the container. This value (`user_id` below), should be
|
||||
the same id that `USERMAP_UID` and `USERMAP_GID` are set to in
|
||||
the next step. See `USERMAP_UID` and `USERMAP_GID`
|
||||
[here](/configuration#docker).
|
||||
|
||||
Your entry for Paperless should contain something like:
|
||||
|
||||
> ```
|
||||
> webserver:
|
||||
> image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
> user: <user_id>
|
||||
> ```
|
||||
|
||||
5. Modify `docker-compose.env`, following the comments in the file. The
|
||||
most important change is to set `USERMAP_UID` and `USERMAP_GID` to
|
||||
the uid and gid of your user on the host system. Use `id -u` and
|
||||
`id -g` to get these.
|
||||
|
||||
This ensures that both the docker container and you on the host
|
||||
machine have write access to the consumption directory. If your UID
|
||||
and GID on the host system is 1000 (the default for the first normal
|
||||
user on most systems), it will work out of the box without any
|
||||
modifications. `id "username"` to check.
|
||||
|
||||
!!! note
|
||||
|
||||
You can copy any setting from the file `paperless.conf.example` and
|
||||
paste it here. Have a look at [configuration](/configuration) to see what's available.
|
||||
|
||||
!!! note
|
||||
|
||||
You can utilize Docker secrets for configuration settings by
|
||||
appending `_FILE` to configuration values. For example `PAPERLESS_DBUSER`
|
||||
can be set using `PAPERLESS_DBUSER_FILE=/var/run/secrets/password.txt`.
|
||||
|
||||
!!! warning
|
||||
|
||||
Some file systems such as NFS network shares don't support file
|
||||
system notifications with `inotify`. When storing the consumption
|
||||
directory on such a file system, paperless will not pick up new
|
||||
files with the default configuration. You will need to use
|
||||
`PAPERLESS_CONSUMER_POLLING`, which will disable inotify. See
|
||||
[here](/configuration#polling).
|
||||
|
||||
6. Run `docker-compose pull`, followed by `docker-compose up -d`. This
|
||||
will pull the image, create and start the necessary containers.
|
||||
|
||||
7. To be able to login, you will need a super user. To create it,
|
||||
execute the following command:
|
||||
|
||||
```shell-session
|
||||
$ docker-compose run --rm webserver createsuperuser
|
||||
```
|
||||
|
||||
This will prompt you to set a username, an optional e-mail address
|
||||
and finally a password (at least 8 characters).
|
||||
|
||||
8. The default `docker-compose.yml` exports the webserver on your local
|
||||
port
|
||||
|
||||
8000\. If you did not change this, you should now be able to visit
|
||||
your Paperless instance at `http://127.0.0.1:8000` or your servers
|
||||
IP-Address:8000. Use the login credentials you have created with the
|
||||
previous step.
|
||||
|
||||
### Build the Docker image yourself {#docker_build}
|
||||
|
||||
1. Clone the entire repository of paperless:
|
||||
|
||||
```shell-session
|
||||
git clone https://github.com/paperless-ngx/paperless-ngx
|
||||
```
|
||||
|
||||
The master branch always reflects the latest stable version.
|
||||
|
||||
2. Copy one of the `docker/compose/docker-compose.*.yml` to
|
||||
`docker-compose.yml` in the root folder, depending on which database
|
||||
backend you want to use. Copy `docker-compose.env` into the project
|
||||
root as well.
|
||||
|
||||
3. In the `docker-compose.yml` file, find the line that instructs
|
||||
docker-compose to pull the paperless image from Docker Hub:
|
||||
|
||||
```yaml
|
||||
webserver:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
```
|
||||
|
||||
and replace it with a line that instructs docker-compose to build
|
||||
the image from the current working directory instead:
|
||||
|
||||
```yaml
|
||||
webserver:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
QPDF_VERSION: x.y.x
|
||||
PIKEPDF_VERSION: x.y.z
|
||||
PSYCOPG2_VERSION: x.y.z
|
||||
JBIG2ENC_VERSION: 0.29
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
You should match the build argument versions to the version for the
|
||||
release you have checked out. These are pre-built images with
|
||||
certain, more updated software. If you want to build these images
|
||||
your self, that is possible, but beyond the scope of these steps.
|
||||
|
||||
4. Follow steps 3 to 8 of [Docker Setup](#docker_hub). When asked to run
|
||||
`docker-compose pull` to pull the image, do
|
||||
|
||||
```shell-session
|
||||
$ docker-compose build
|
||||
```
|
||||
|
||||
instead to build the image.
|
||||
|
||||
### Bare Metal Route {#bare_metal}
|
||||
|
||||
Paperless runs on linux only. The following procedure has been tested on
|
||||
a minimal installation of Debian/Buster, which is the current stable
|
||||
release at the time of writing. Windows is not and will never be
|
||||
supported.
|
||||
|
||||
1. Install dependencies. Paperless requires the following packages.
|
||||
|
||||
- `python3` 3.8, 3.9
|
||||
- `python3-pip`
|
||||
- `python3-dev`
|
||||
- `default-libmysqlclient-dev` for MariaDB
|
||||
- `fonts-liberation` for generating thumbnails for plain text
|
||||
files
|
||||
- `imagemagick` >= 6 for PDF conversion
|
||||
- `gnupg` for handling encrypted documents
|
||||
- `libpq-dev` for PostgreSQL
|
||||
- `libmagic-dev` for mime type detection
|
||||
- `mariadb-client` for MariaDB compile time
|
||||
- `mime-support` for mime type detection
|
||||
- `libzbar0` for barcode detection
|
||||
- `poppler-utils` for barcode detection
|
||||
|
||||
Use this list for your preferred package management:
|
||||
|
||||
```
|
||||
python3 python3-pip python3-dev imagemagick fonts-liberation gnupg libpq-dev default-libmysqlclient-dev libmagic-dev mime-support libzbar0 poppler-utils
|
||||
```
|
||||
|
||||
These dependencies are required for OCRmyPDF, which is used for text
|
||||
recognition.
|
||||
|
||||
- `unpaper`
|
||||
- `ghostscript`
|
||||
- `icc-profiles-free`
|
||||
- `qpdf`
|
||||
- `liblept5`
|
||||
- `libxml2`
|
||||
- `pngquant` (suggested for certain PDF image optimizations)
|
||||
- `zlib1g`
|
||||
- `tesseract-ocr` >= 4.0.0 for OCR
|
||||
- `tesseract-ocr` language packs (`tesseract-ocr-eng`,
|
||||
`tesseract-ocr-deu`, etc)
|
||||
|
||||
Use this list for your preferred package management:
|
||||
|
||||
```
|
||||
unpaper ghostscript icc-profiles-free qpdf liblept5 libxml2 pngquant zlib1g tesseract-ocr
|
||||
```
|
||||
|
||||
On Raspberry Pi, these libraries are required as well:
|
||||
|
||||
- `libatlas-base-dev`
|
||||
- `libxslt1-dev`
|
||||
|
||||
You will also need `build-essential`, `python3-setuptools` and
|
||||
`python3-wheel` for installing some of the python dependencies.
|
||||
|
||||
2. Install `redis` >= 6.0 and configure it to start automatically.
|
||||
|
||||
3. Optional. Install `postgresql` and configure a database, user and
|
||||
password for paperless. If you do not wish to use PostgreSQL,
|
||||
MariaDB and SQLite are available as well.
|
||||
|
||||
!!! note
|
||||
|
||||
On bare-metal installations using SQLite, ensure the [JSON1
|
||||
extension](https://code.djangoproject.com/wiki/JSON1Extension) is
|
||||
enabled. This is usually the case, but not always.
|
||||
|
||||
4. Create a system user with a new home folder under which you wish
|
||||
to run paperless.
|
||||
|
||||
```shell-session
|
||||
adduser paperless --system --home /opt/paperless --group
|
||||
```
|
||||
|
||||
5. Get the release archive from
|
||||
<https://github.com/paperless-ngx/paperless-ngx/releases> for example with
|
||||
|
||||
```shell-session
|
||||
curl -O -L https://github.com/paperless-ngx/paperless-ngx/releases/download/v1.10.2/paperless-ngx-v1.10.2.tar.xz
|
||||
```
|
||||
|
||||
Extract the archive with
|
||||
|
||||
```shell-session
|
||||
tar -xf paperless-ngx-v1.10.2.tar.xz
|
||||
```
|
||||
|
||||
and copy the contents to the
|
||||
home folder of the user you created before (`/opt/paperless`).
|
||||
|
||||
Optional: If you cloned the git repo, you will have to
|
||||
compile the frontend yourself, see [here](/development#front-end-development)
|
||||
and use the `build` step, not `serve`.
|
||||
|
||||
6. Configure paperless. See [configuration](/configuration) for details.
|
||||
Edit the included `paperless.conf` and adjust the settings to your
|
||||
needs. Required settings for getting
|
||||
paperless running are:
|
||||
|
||||
- `PAPERLESS_REDIS` should point to your redis server, such as
|
||||
<redis://localhost:6379>.
|
||||
- `PAPERLESS_DBENGINE` optional, and should be one of `postgres`,
|
||||
`mariadb`, or `sqlite`
|
||||
- `PAPERLESS_DBHOST` should be the hostname on which your
|
||||
PostgreSQL server is running. Do not configure this to use
|
||||
SQLite instead. Also configure port, database name, user and
|
||||
password as necessary.
|
||||
- `PAPERLESS_CONSUMPTION_DIR` should point to a folder which
|
||||
paperless should watch for documents. You might want to have
|
||||
this somewhere else. Likewise, `PAPERLESS_DATA_DIR` and
|
||||
`PAPERLESS_MEDIA_ROOT` define where paperless stores its data.
|
||||
If you like, you can point both to the same directory.
|
||||
- `PAPERLESS_SECRET_KEY` should be a random sequence of
|
||||
characters. It's used for authentication. Failure to do so
|
||||
allows third parties to forge authentication credentials.
|
||||
- `PAPERLESS_URL` if you are behind a reverse proxy. This should
|
||||
point to your domain. Please see
|
||||
[configuration](/configuration) for more
|
||||
information.
|
||||
|
||||
Many more adjustments can be made to paperless, especially the OCR
|
||||
part. The following options are recommended for everyone:
|
||||
|
||||
- Set `PAPERLESS_OCR_LANGUAGE` to the language most of your
|
||||
documents are written in.
|
||||
- Set `PAPERLESS_TIME_ZONE` to your local time zone.
|
||||
|
||||
7. Create the following directories if they are missing:
|
||||
|
||||
- `/opt/paperless/media`
|
||||
- `/opt/paperless/data`
|
||||
- `/opt/paperless/consume`
|
||||
|
||||
Adjust as necessary if you configured different folders.
|
||||
Ensure that the paperless user has write permissions for every one
|
||||
of these folders with
|
||||
|
||||
```shell-session
|
||||
ls -l -d /opt/paperless/media
|
||||
```
|
||||
|
||||
If needed, change the owner with
|
||||
|
||||
```shell-session
|
||||
sudo chown paperless:paperless /opt/paperless/media
|
||||
sudo chown paperless:paperless /opt/paperless/data
|
||||
sudo chown paperless:paperless /opt/paperless/consume
|
||||
```
|
||||
|
||||
8. Install python requirements from the `requirements.txt` file. It is
|
||||
up to you if you wish to use a virtual environment or not. First you should update your pip, so it gets the actual packages.
|
||||
|
||||
```shell-session
|
||||
sudo -Hu paperless pip3 install -r requirements.txt
|
||||
```
|
||||
|
||||
This will install all python dependencies in the home directory of
|
||||
the new paperless user.
|
||||
|
||||
9. Go to `/opt/paperless/src`, and execute the following commands:
|
||||
|
||||
```bash
|
||||
# This creates the database schema.
|
||||
sudo -Hu paperless python3 manage.py migrate
|
||||
|
||||
# This creates your first paperless user
|
||||
sudo -Hu paperless python3 manage.py createsuperuser
|
||||
```
|
||||
|
||||
10. Optional: Test that paperless is working by executing
|
||||
|
||||
```bash
|
||||
# Manually starts the webserver
|
||||
sudo -Hu paperless python3 manage.py runserver
|
||||
```
|
||||
|
||||
and pointing your browser to http://localhost:8000 if
|
||||
accessing from the same devices on which paperless is installed.
|
||||
If accessing from another machine, set up systemd services. You may need
|
||||
to set `PAPERLESS_DEBUG=true` in order for the development server to work
|
||||
normally in your browser.
|
||||
|
||||
!!! warning
|
||||
|
||||
This is a development server which should not be used in production.
|
||||
It is not audited for security and performance is inferior to
|
||||
production ready web servers.
|
||||
|
||||
!!! tip
|
||||
|
||||
This will not start the consumer. Paperless does this in a separate
|
||||
process.
|
||||
|
||||
11. Setup systemd services to run paperless automatically. You may use
|
||||
the service definition files included in the `scripts` folder as a
|
||||
starting point.
|
||||
|
||||
Paperless needs the `webserver` script to run the webserver, the
|
||||
`consumer` script to watch the input folder, `taskqueue` for the
|
||||
background workers used to handle things like document consumption
|
||||
and the `scheduler` script to run tasks such as email checking at
|
||||
certain times .
|
||||
|
||||
!!! note
|
||||
|
||||
The `socket` script enables `gunicorn` to run on port 80 without
|
||||
root privileges. For this you need to uncomment the
|
||||
`Require=paperless-webserver.socket` in the `webserver` script
|
||||
and configure `gunicorn` to listen on port 80 (see
|
||||
`paperless/gunicorn.conf.py`).
|
||||
|
||||
You may need to adjust the path to the `gunicorn` executable. This
|
||||
will be installed as part of the python dependencies, and is either
|
||||
located in the `bin` folder of your virtual environment, or in
|
||||
`~/.local/bin/` if no virtual environment is used.
|
||||
|
||||
These services rely on redis and optionally the database server, but
|
||||
don't need to be started in any particular order. The example files
|
||||
depend on redis being started. If you use a database server, you
|
||||
should add additional dependencies.
|
||||
|
||||
!!! warning
|
||||
|
||||
The included scripts run a `gunicorn` standalone server, which is
|
||||
fine for running paperless. It does support SSL, however, the
|
||||
documentation of GUnicorn states that you should use a proxy server
|
||||
in front of gunicorn instead.
|
||||
|
||||
For instructions on how to use nginx for that,
|
||||
[see the instructions below](/setup#nginx).
|
||||
|
||||
!!! warning
|
||||
|
||||
If celery won't start (check with
|
||||
`sudo systemctl status paperless-task-queue.service` for
|
||||
paperless-task-queue.service and paperless-scheduler.service
|
||||
) you need to change the path in the files. Example:
|
||||
`ExecStart=/opt/paperless/.local/bin/celery --app paperless worker --loglevel INFO`
|
||||
|
||||
12. Optional: Install a samba server and make the consumption folder
|
||||
available as a network share.
|
||||
|
||||
13. Configure ImageMagick to allow processing of PDF documents. Most
|
||||
distributions have this disabled by default, since PDF documents can
|
||||
contain malware. If you don't do this, paperless will fall back to
|
||||
ghostscript for certain steps such as thumbnail generation.
|
||||
|
||||
Edit `/etc/ImageMagick-6/policy.xml` and adjust
|
||||
|
||||
```
|
||||
<policy domain="coder" rights="none" pattern="PDF" />
|
||||
```
|
||||
|
||||
to
|
||||
|
||||
```
|
||||
<policy domain="coder" rights="read|write" pattern="PDF" />
|
||||
```
|
||||
|
||||
14. Optional: Install the
|
||||
[jbig2enc](https://ocrmypdf.readthedocs.io/en/latest/jbig2.html)
|
||||
encoder. This will reduce the size of generated PDF documents.
|
||||
You'll most likely need to compile this by yourself, because this
|
||||
software has been patented until around 2017 and binary packages are
|
||||
not available for most distributions.
|
||||
|
||||
15. Optional: If using the NLTK machine learning processing (see
|
||||
`PAPERLESS_ENABLE_NLTK` in [configuration](/configuration#software_tweaks) for details),
|
||||
download the NLTK data for the Snowball
|
||||
Stemmer, Stopwords and Punkt tokenizer to your
|
||||
`PAPERLESS_DATA_DIR/nltk`. Refer to the [NLTK
|
||||
instructions](https://www.nltk.org/data.html) for details on how to
|
||||
download the data.
|
||||
|
||||
# Migrating to Paperless-ngx
|
||||
|
||||
Migration is possible both from Paperless-ng or directly from the
|
||||
'original' Paperless.
|
||||
|
||||
## Migrating from Paperless-ng
|
||||
|
||||
Paperless-ngx is meant to be a drop-in replacement for Paperless-ng and
|
||||
thus upgrading should be trivial for most users, especially when using
|
||||
docker. However, as with any major change, it is recommended to take a
|
||||
full backup first. Once you are ready, simply change the docker image to
|
||||
point to the new source. E.g. if using Docker Compose, edit
|
||||
`docker-compose.yml` and change:
|
||||
|
||||
```
|
||||
image: jonaswinkler/paperless-ng:latest
|
||||
```
|
||||
|
||||
to
|
||||
|
||||
```
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
```
|
||||
|
||||
and then run `docker-compose up -d` which will pull the new image
|
||||
recreate the container. That's it!
|
||||
|
||||
Users who installed with the bare-metal route should also update their
|
||||
Git clone to point to `https://github.com/paperless-ngx/paperless-ngx`,
|
||||
e.g. using the command
|
||||
`git remote set-url origin https://github.com/paperless-ngx/paperless-ngx`
|
||||
and then pull the lastest version.
|
||||
|
||||
## Migrating from Paperless
|
||||
|
||||
At its core, paperless-ngx is still paperless and fully compatible.
|
||||
However, some things have changed under the hood, so you need to adapt
|
||||
your setup depending on how you installed paperless.
|
||||
|
||||
This setup describes how to update an existing paperless Docker
|
||||
installation. The important things to keep in mind are as follows:
|
||||
|
||||
- Read the [changelog](/changelog) and
|
||||
take note of breaking changes.
|
||||
- You should decide if you want to stick with SQLite or want to
|
||||
migrate your database to PostgreSQL. See [documentation](#sqlite_to_psql)
|
||||
for details on
|
||||
how to move your data from SQLite to PostgreSQL. Both work fine with
|
||||
paperless. However, if you already have a database server running
|
||||
for other services, you might as well use it for paperless as well.
|
||||
- The task scheduler of paperless, which is used to execute periodic
|
||||
tasks such as email checking and maintenance, requires a
|
||||
[redis](https://redis.io/) message broker instance. The
|
||||
docker-compose route takes care of that.
|
||||
- The layout of the folder structure for your documents and data
|
||||
remains the same, so you can just plug your old docker volumes into
|
||||
paperless-ngx and expect it to find everything where it should be.
|
||||
|
||||
Migration to paperless-ngx is then performed in a few simple steps:
|
||||
|
||||
1. Stop paperless.
|
||||
|
||||
```bash
|
||||
$ cd /path/to/current/paperless
|
||||
$ docker-compose down
|
||||
```
|
||||
|
||||
2. Do a backup for two purposes: If something goes wrong, you still
|
||||
have your data. Second, if you don't like paperless-ngx, you can
|
||||
switch back to paperless.
|
||||
|
||||
3. Download the latest release of paperless-ngx. You can either go with
|
||||
the docker-compose files from
|
||||
[here](https://github.com/paperless-ngx/paperless-ngx/tree/master/docker/compose)
|
||||
or clone the repository to build the image yourself (see
|
||||
[above](#docker_build)). You can
|
||||
either replace your current paperless folder or put paperless-ngx in
|
||||
a different location.
|
||||
|
||||
!!! warning
|
||||
|
||||
Paperless-ngx includes a `.env` file. This will set the project name
|
||||
for docker compose to `paperless`, which will also define the name
|
||||
of the volumes by paperless-ngx. However, if you experience that
|
||||
paperless-ngx is not using your old paperless volumes, verify the
|
||||
names of your volumes with
|
||||
|
||||
``` shell-session
|
||||
$ docker volume ls | grep _data
|
||||
```
|
||||
|
||||
and adjust the project name in the `.env` file so that it matches
|
||||
the name of the volumes before the `_data` part.
|
||||
|
||||
4. Download the `docker-compose.sqlite.yml` file to
|
||||
`docker-compose.yml`. If you want to switch to PostgreSQL, do that
|
||||
after you migrated your existing SQLite database.
|
||||
|
||||
5. Adjust `docker-compose.yml` and `docker-compose.env` to your needs.
|
||||
See [Docker setup](#docker_hub) details on
|
||||
which edits are advised.
|
||||
|
||||
6. [Update paperless.](/administration#updating)
|
||||
|
||||
7. In order to find your existing documents with the new search
|
||||
feature, you need to invoke a one-time operation that will create
|
||||
the search index:
|
||||
|
||||
```shell-session
|
||||
$ docker-compose run --rm webserver document_index reindex
|
||||
```
|
||||
|
||||
This will migrate your database and create the search index. After
|
||||
that, paperless will take care of maintaining the index by itself.
|
||||
|
||||
8. Start paperless-ngx.
|
||||
|
||||
```bash
|
||||
$ docker-compose up -d
|
||||
```
|
||||
|
||||
This will run paperless in the background and automatically start it
|
||||
on system boot.
|
||||
|
||||
9. Paperless installed a permanent redirect to `admin/` in your
|
||||
browser. This redirect is still in place and prevents access to the
|
||||
new UI. Clear your browsing cache in order to fix this.
|
||||
|
||||
10. Optionally, follow the instructions below to migrate your existing
|
||||
data to PostgreSQL.
|
||||
|
||||
## Migrating from LinuxServer.io Docker Image
|
||||
|
||||
As with any upgrades and large changes, it is highly recommended to
|
||||
create a backup before starting. This assumes the image was running
|
||||
using Docker Compose, but the instructions are translatable to Docker
|
||||
commands as well.
|
||||
|
||||
1. Stop and remove the paperless container
|
||||
2. If using an external database, stop the container
|
||||
3. Update Redis configuration
|
||||
a) If `REDIS_URL` is already set, change it to `PAPERLESS_REDIS`
|
||||
and continue to step 4.
|
||||
b) Otherwise, in the `docker-compose.yml` add a new service for
|
||||
Redis, following [the example compose
|
||||
files](https://github.com/paperless-ngx/paperless-ngx/tree/main/docker/compose)
|
||||
c) Set the environment variable `PAPERLESS_REDIS` so it points to
|
||||
the new Redis container
|
||||
4. Update user mapping
|
||||
a) If set, change the environment variable `PUID` to `USERMAP_UID`
|
||||
b) If set, change the environment variable `PGID` to `USERMAP_GID`
|
||||
5. Update configuration paths
|
||||
a) Set the environment variable `PAPERLESS_DATA_DIR` to `/config`
|
||||
6. Update media paths
|
||||
a) Set the environment variable `PAPERLESS_MEDIA_ROOT` to
|
||||
`/data/media`
|
||||
7. Update timezone
|
||||
a) Set the environment variable `PAPERLESS_TIME_ZONE` to the same
|
||||
value as `TZ`
|
||||
8. Modify the `image:` to point to
|
||||
`ghcr.io/paperless-ngx/paperless-ngx:latest` or a specific version
|
||||
if preferred.
|
||||
9. Start the containers as before, using `docker-compose`.
|
||||
|
||||
## Moving data from SQLite to PostgreSQL or MySQL/MariaDB {#sqlite_to_psql}
|
||||
|
||||
Moving your data from SQLite to PostgreSQL or MySQL/MariaDB is done via
|
||||
executing a series of django management commands as below. The commands
|
||||
below use PostgreSQL, but are applicable to MySQL/MariaDB with the
|
||||
|
||||
!!! warning
|
||||
|
||||
Make sure that your SQLite database is migrated to the latest version.
|
||||
Starting paperless will make sure that this is the case. If your try to
|
||||
load data from an old database schema in SQLite into a newer database
|
||||
schema in PostgreSQL, you will run into trouble.
|
||||
|
||||
!!! warning
|
||||
|
||||
On some database fields, PostgreSQL enforces predefined limits on
|
||||
maximum length, whereas SQLite does not. The fields in question are the
|
||||
title of documents (128 characters), names of document types, tags and
|
||||
correspondents (128 characters), and filenames (1024 characters). If you
|
||||
have data in these fields that surpasses these limits, migration to
|
||||
PostgreSQL is not possible and will fail with an error.
|
||||
|
||||
!!! warning
|
||||
|
||||
MySQL is case insensitive by default, treating values like "Name" and
|
||||
"NAME" as identical. See [MySQL caveats](/advanced_usage#mysql-caveats) for details.
|
||||
|
||||
!!! warning
|
||||
|
||||
MySQL also enforces limits on maximum lengths, but does so differently than
|
||||
PostgreSQL. It may not be possible to migrate to MySQL due to this.
|
||||
|
||||
1. Stop paperless, if it is running.
|
||||
|
||||
2. Tell paperless to use PostgreSQL:
|
||||
|
||||
a) With docker, copy the provided `docker-compose.postgres.yml`
|
||||
file to `docker-compose.yml`. Remember to adjust the consumption
|
||||
directory, if necessary.
|
||||
b) Without docker, configure the database in your `paperless.conf`
|
||||
file. See [configuration](/configuration) for
|
||||
details.
|
||||
|
||||
3. Open a shell and initialize the database:
|
||||
|
||||
a) With docker, run the following command to open a shell within
|
||||
the paperless container:
|
||||
|
||||
``` shell-session
|
||||
$ cd /path/to/paperless
|
||||
$ docker-compose run --rm webserver /bin/bash
|
||||
```
|
||||
|
||||
This will launch the container and initialize the PostgreSQL
|
||||
database.
|
||||
|
||||
b) Without docker, remember to activate any virtual environment,
|
||||
switch to the `src` directory and create the database schema:
|
||||
|
||||
``` shell-session
|
||||
$ cd /path/to/paperless/src
|
||||
$ python3 manage.py migrate
|
||||
```
|
||||
|
||||
This will not copy any data yet.
|
||||
|
||||
4. Dump your data from SQLite:
|
||||
|
||||
```shell-session
|
||||
$ python3 manage.py dumpdata --database=sqlite --exclude=contenttypes --exclude=auth.Permission > data.json
|
||||
```
|
||||
|
||||
5. Load your data into PostgreSQL:
|
||||
|
||||
```shell-session
|
||||
$ python3 manage.py loaddata data.json
|
||||
```
|
||||
|
||||
6. If operating inside Docker, you may exit the shell now.
|
||||
|
||||
```shell-session
|
||||
$ exit
|
||||
```
|
||||
|
||||
7. Start paperless.
|
||||
|
||||
## Moving back to Paperless
|
||||
|
||||
Lets say you migrated to Paperless-ngx and used it for a while, but
|
||||
decided that you don't like it and want to move back (If you do, send
|
||||
me a mail about what part you didn't like!), you can totally do that
|
||||
with a few simple steps.
|
||||
|
||||
Paperless-ngx modified the database schema slightly, however, these
|
||||
changes can be reverted while keeping your current data, so that your
|
||||
current data will be compatible with original Paperless. Thumbnails
|
||||
were also changed from PNG to WEBP format and will need to be
|
||||
re-generated.
|
||||
|
||||
Execute this:
|
||||
|
||||
```shell-session
|
||||
$ cd /path/to/paperless
|
||||
$ docker-compose run --rm webserver migrate documents 0023
|
||||
```
|
||||
|
||||
Or without docker:
|
||||
|
||||
```shell-session
|
||||
$ cd /path/to/paperless/src
|
||||
$ python3 manage.py migrate documents 0023
|
||||
```
|
||||
|
||||
After regenerating thumbnails, you'll need to clear your cookies
|
||||
(Paperless-ngx comes with updated dependencies that do cookie-processing
|
||||
differently) and probably your cache as well.
|
||||
|
||||
# Considerations for less powerful devices {#less-powerful-devices}
|
||||
|
||||
Paperless runs on Raspberry Pi. However, some things are rather slow on
|
||||
the Pi and configuring some options in paperless can help improve
|
||||
performance immensely:
|
||||
|
||||
- Stick with SQLite to save some resources.
|
||||
- Consider setting `PAPERLESS_OCR_PAGES` to 1, so that paperless will
|
||||
only OCR the first page of your documents. In most cases, this page
|
||||
contains enough information to be able to find it.
|
||||
- `PAPERLESS_TASK_WORKERS` and `PAPERLESS_THREADS_PER_WORKER` are
|
||||
configured to use all cores. The Raspberry Pi models 3 and up have 4
|
||||
cores, meaning that paperless will use 2 workers and 2 threads per
|
||||
worker. This may result in sluggish response times during
|
||||
consumption, so you might want to lower these settings (example: 2
|
||||
workers and 1 thread to always have some computing power left for
|
||||
other tasks).
|
||||
- Keep `PAPERLESS_OCR_MODE` at its default value `skip` and consider
|
||||
OCR'ing your documents before feeding them into paperless. Some
|
||||
scanners are able to do this! You might want to even specify
|
||||
`skip_noarchive` to skip archive file generation for already ocr'ed
|
||||
documents entirely.
|
||||
- If you want to perform OCR on the device, consider using
|
||||
`PAPERLESS_OCR_CLEAN=none`. This will speed up OCR times and use
|
||||
less memory at the expense of slightly worse OCR results.
|
||||
- If using docker, consider setting `PAPERLESS_WEBSERVER_WORKERS` to
|
||||
1. This will save some memory.
|
||||
- Consider setting `PAPERLESS_ENABLE_NLTK` to false, to disable the
|
||||
more advanced language processing, which can take more memory and
|
||||
processing time.
|
||||
|
||||
For details, refer to [configuration](/configuration).
|
||||
|
||||
!!! note
|
||||
|
||||
Updating the
|
||||
[automatic matching algorithm](/advanced_usage#automatic-matching) takes quite a bit of time. However, the update mechanism
|
||||
checks if your data has changed before doing the heavy lifting. If you
|
||||
experience the algorithm taking too much cpu time, consider changing the
|
||||
schedule in the admin interface to daily. You can also manually invoke
|
||||
the task by changing the date and time of the next run to today/now.
|
||||
|
||||
The actual matching of the algorithm is fast and works on Raspberry Pi
|
||||
as well as on any other device.
|
||||
|
||||
# Using nginx as a reverse proxy {#nginx}
|
||||
|
||||
If you want to expose paperless to the internet, you should hide it
|
||||
behind a reverse proxy with SSL enabled.
|
||||
|
||||
In addition to the usual configuration for SSL, the following
|
||||
configuration is required for paperless to operate:
|
||||
|
||||
```nginx
|
||||
http {
|
||||
|
||||
# Adjust as required. This is the maximum size for file uploads.
|
||||
# The default value 1M might be a little too small.
|
||||
client_max_body_size 10M;
|
||||
|
||||
server {
|
||||
|
||||
location / {
|
||||
|
||||
# Adjust host and port as required.
|
||||
proxy_pass http://localhost:8000/;
|
||||
|
||||
# These configuration options are required for WebSockets to work.
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
|
||||
proxy_redirect off;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Host $server_name;
|
||||
add_header P3P 'CP=""'; # may not be required in all setups
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The `PAPERLESS_URL` configuration variable is also required when using a
|
||||
reverse proxy. Please refer to the [hosting and security](/configuration#hosting-and-security) docs.
|
||||
|
||||
Also read
|
||||
[this](https://channels.readthedocs.io/en/stable/deploying.html#nginx-supervisor-ubuntu),
|
||||
towards the end of the section.
|
894
docs/setup.rst
@@ -1,894 +0,0 @@
|
||||
|
||||
*****
|
||||
Setup
|
||||
*****
|
||||
|
||||
Overview of Paperless-ngx
|
||||
#########################
|
||||
|
||||
Compared to paperless, paperless-ngx works a little different under the hood and has
|
||||
more moving parts that work together. While this increases the complexity of
|
||||
the system, it also brings many benefits.
|
||||
|
||||
Paperless consists of the following components:
|
||||
|
||||
* **The webserver:** This is pretty much the same as in paperless. It serves
|
||||
the administration pages, the API, and the new frontend. This is the main
|
||||
tool you'll be using to interact with paperless. You may start the webserver
|
||||
with
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless/src/
|
||||
$ gunicorn -c ../gunicorn.conf.py paperless.wsgi
|
||||
|
||||
or by any other means such as Apache ``mod_wsgi``.
|
||||
|
||||
* **The consumer:** This is what watches your consumption folder for documents.
|
||||
However, the consumer itself does not really consume your documents.
|
||||
Now it notifies a task processor that a new file is ready for consumption.
|
||||
I suppose it should be named differently.
|
||||
This was also used to check your emails, but that's now done elsewhere as well.
|
||||
|
||||
Start the consumer with the management command ``document_consumer``:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless/src/
|
||||
$ python3 manage.py document_consumer
|
||||
|
||||
.. _setup-task_processor:
|
||||
|
||||
* **The task processor:** Paperless relies on `Celery - Distributed Task Queue <https://docs.celeryq.dev/en/stable/index.html>`_
|
||||
for doing most of the heavy lifting. This is a task queue that accepts tasks from
|
||||
multiple sources and processes these in parallel. It also comes with a scheduler that executes
|
||||
certain commands periodically.
|
||||
|
||||
This task processor is responsible for:
|
||||
|
||||
* Consuming documents. When the consumer finds new documents, it notifies the task processor to
|
||||
start a consumption task.
|
||||
* The task processor also performs the consumption of any documents you upload through
|
||||
the web interface.
|
||||
* Consuming emails. It periodically checks your configured accounts for new emails and
|
||||
notifies the task processor to consume the attachment of an email.
|
||||
* Maintaining the search index and the automatic matching algorithm. These are things that paperless
|
||||
needs to do from time to time in order to operate properly.
|
||||
|
||||
This allows paperless to process multiple documents from your consumption folder in parallel! On
|
||||
a modern multi core system, this makes the consumption process with full OCR blazingly fast.
|
||||
|
||||
The task processor comes with a built-in admin interface that you can use to check whenever any of the
|
||||
tasks fail and inspect the errors (i.e., wrong email credentials, errors during consuming a specific
|
||||
file, etc).
|
||||
|
||||
* A `redis <https://redis.io/>`_ message broker: This is a really lightweight service that is responsible
|
||||
for getting the tasks from the webserver and the consumer to the task scheduler. These run in a different
|
||||
process (maybe even on different machines!), and therefore, this is necessary.
|
||||
|
||||
* Optional: A database server. Paperless supports PostgreSQL, MariaDB and SQLite for storing its data.
|
||||
|
||||
|
||||
Installation
|
||||
############
|
||||
|
||||
You can go multiple routes to setup and run Paperless:
|
||||
|
||||
* :ref:`Use the easy install docker script <setup-docker_script>`
|
||||
* :ref:`Pull the image from Docker Hub <setup-docker_hub>`
|
||||
* :ref:`Build the Docker image yourself <setup-docker_build>`
|
||||
* :ref:`Install Paperless directly on your system manually (bare metal) <setup-bare_metal>`
|
||||
|
||||
The Docker routes are quick & easy. These are the recommended routes. This configures all the stuff
|
||||
from the above automatically so that it just works and uses sensible defaults for all configuration options.
|
||||
Here you find a cheat-sheet for docker beginners: `CLI Basics <https://www.sehn.tech/refs/devops-with-docker/>`_
|
||||
|
||||
The bare metal route is complicated to setup but makes it easier
|
||||
should you want to contribute some code back. You need to configure and
|
||||
run the above mentioned components yourself.
|
||||
|
||||
.. _CLI Basics: https://www.sehn.tech/refs/devops-with-docker/
|
||||
|
||||
.. _setup-docker_script:
|
||||
|
||||
Install Paperless from Docker Hub using the installation script
|
||||
===============================================================
|
||||
|
||||
Paperless provides an interactive installation script. This script will ask you
|
||||
for a couple configuration options, download and create the necessary configuration files, pull the docker image, start paperless and create your user account. This script essentially
|
||||
performs all the steps described in :ref:`setup-docker_hub` automatically.
|
||||
|
||||
1. Make sure that docker and docker-compose are installed.
|
||||
2. Download and run the installation script:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ bash -c "$(curl -L https://raw.githubusercontent.com/paperless-ngx/paperless-ngx/main/install-paperless-ngx.sh)"
|
||||
|
||||
.. _setup-docker_hub:
|
||||
|
||||
Install Paperless from Docker Hub
|
||||
=================================
|
||||
|
||||
1. Login with your user and create a folder in your home-directory `mkdir -v ~/paperless-ngx` to have a place for your configuration files and consumption directory.
|
||||
|
||||
2. Go to the `/docker/compose directory on the project page <https://github.com/paperless-ngx/paperless-ngx/tree/master/docker/compose>`_
|
||||
and download one of the `docker-compose.*.yml` files, depending on which database backend you
|
||||
want to use. Rename this file to `docker-compose.yml`.
|
||||
If you want to enable optional support for Office documents, download a file with `-tika` in the file name.
|
||||
Download the ``docker-compose.env`` file and the ``.env`` file as well and store them
|
||||
in the same directory.
|
||||
|
||||
.. hint::
|
||||
|
||||
For new installations, it is recommended to use PostgreSQL as the database
|
||||
backend.
|
||||
|
||||
3. Install `Docker`_ and `docker-compose`_.
|
||||
|
||||
.. caution::
|
||||
|
||||
If you want to use the included ``docker-compose.*.yml`` file, you
|
||||
need to have at least Docker version **17.09.0** and docker-compose
|
||||
version **1.17.0**.
|
||||
To check do: `docker-compose -v` or `docker -v`
|
||||
|
||||
See the `Docker installation guide`_ on how to install the current
|
||||
version of Docker for your operating system or Linux distribution of
|
||||
choice. To get the latest version of docker-compose, follow the
|
||||
`docker-compose installation guide`_ if your package repository doesn't
|
||||
include it.
|
||||
|
||||
.. _Docker installation guide: https://docs.docker.com/engine/installation/
|
||||
.. _docker-compose installation guide: https://docs.docker.com/compose/install/
|
||||
|
||||
4. Modify ``docker-compose.yml`` to your preferences. You may want to change the path
|
||||
to the consumption directory. Find the line that specifies where
|
||||
to mount the consumption directory:
|
||||
|
||||
.. code::
|
||||
|
||||
- ./consume:/usr/src/paperless/consume
|
||||
|
||||
Replace the part BEFORE the colon with a local directory of your choice:
|
||||
|
||||
.. code::
|
||||
|
||||
- /home/jonaswinkler/paperless-inbox:/usr/src/paperless/consume
|
||||
|
||||
Don't change the part after the colon or paperless wont find your documents.
|
||||
|
||||
You may also need to change the default port that the webserver will use
|
||||
from the default (8000):
|
||||
|
||||
.. code::
|
||||
|
||||
ports:
|
||||
- 8000:8000
|
||||
|
||||
Replace the part BEFORE the colon with a port of your choice:
|
||||
|
||||
.. code::
|
||||
|
||||
ports:
|
||||
- 8010:8000
|
||||
|
||||
Don't change the part after the colon or edit other lines that refer to
|
||||
port 8000. Modifying the part before the colon will map requests on another
|
||||
port to the webserver running on the default port.
|
||||
|
||||
**Rootless**
|
||||
|
||||
If you want to run Paperless as a rootless container, you will need to do the
|
||||
following in your ``docker-compose.yml``:
|
||||
|
||||
- set the ``user`` running the container to map to the ``paperless`` user in the
|
||||
container.
|
||||
This value (``user_id`` below), should be the same id that ``USERMAP_UID`` and
|
||||
``USERMAP_GID`` are set to in the next step.
|
||||
See ``USERMAP_UID`` and ``USERMAP_GID`` :ref:`here <configuration-docker>`.
|
||||
|
||||
Your entry for Paperless should contain something like:
|
||||
|
||||
.. code::
|
||||
|
||||
webserver:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
user: <user_id>
|
||||
|
||||
5. Modify ``docker-compose.env``, following the comments in the file. The
|
||||
most important change is to set ``USERMAP_UID`` and ``USERMAP_GID``
|
||||
to the uid and gid of your user on the host system. Use ``id -u`` and
|
||||
``id -g`` to get these.
|
||||
|
||||
This ensures that
|
||||
both the docker container and you on the host machine have write access
|
||||
to the consumption directory. If your UID and GID on the host system is
|
||||
1000 (the default for the first normal user on most systems), it will
|
||||
work out of the box without any modifications. `id "username"` to check.
|
||||
|
||||
.. note::
|
||||
|
||||
You can copy any setting from the file ``paperless.conf.example`` and paste it here.
|
||||
Have a look at :ref:`configuration` to see what's available.
|
||||
|
||||
.. note::
|
||||
|
||||
You can utilize Docker secrets for some configuration settings by
|
||||
appending `_FILE` to some configuration values. This is supported currently
|
||||
only by:
|
||||
|
||||
* PAPERLESS_DBUSER
|
||||
* PAPERLESS_DBPASS
|
||||
* PAPERLESS_SECRET_KEY
|
||||
* PAPERLESS_AUTO_LOGIN_USERNAME
|
||||
* PAPERLESS_ADMIN_USER
|
||||
* PAPERLESS_ADMIN_MAIL
|
||||
* PAPERLESS_ADMIN_PASSWORD
|
||||
|
||||
.. caution::
|
||||
|
||||
Some file systems such as NFS network shares don't support file system
|
||||
notifications with ``inotify``. When storing the consumption directory
|
||||
on such a file system, paperless will not pick up new files
|
||||
with the default configuration. You will need to use ``PAPERLESS_CONSUMER_POLLING``,
|
||||
which will disable inotify. See :ref:`here <configuration-polling>`.
|
||||
|
||||
6. Run ``docker-compose pull``, followed by ``docker-compose up -d``.
|
||||
This will pull the image, create and start the necessary containers.
|
||||
|
||||
7. To be able to login, you will need a super user. To create it, execute the
|
||||
following command:
|
||||
|
||||
.. code-block:: shell-session
|
||||
|
||||
$ docker-compose run --rm webserver createsuperuser
|
||||
|
||||
This will prompt you to set a username, an optional e-mail address and
|
||||
finally a password (at least 8 characters).
|
||||
|
||||
8. The default ``docker-compose.yml`` exports the webserver on your local port
|
||||
8000. If you did not change this, you should now be able to visit your
|
||||
Paperless instance at ``http://127.0.0.1:8000`` or your servers IP-Address:8000.
|
||||
Use the login credentials you have created with the previous step.
|
||||
|
||||
.. _Docker: https://www.docker.com/
|
||||
.. _docker-compose: https://docs.docker.com/compose/install/
|
||||
|
||||
.. _setup-docker_build:
|
||||
|
||||
Build the Docker image yourself
|
||||
===============================
|
||||
|
||||
1. Clone the entire repository of paperless:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
git clone https://github.com/paperless-ngx/paperless-ngx
|
||||
|
||||
The master branch always reflects the latest stable version.
|
||||
|
||||
2. Copy one of the ``docker/compose/docker-compose.*.yml`` to ``docker-compose.yml`` in the root folder,
|
||||
depending on which database backend you want to use. Copy
|
||||
``docker-compose.env`` into the project root as well.
|
||||
|
||||
3. In the ``docker-compose.yml`` file, find the line that instructs docker-compose to pull the paperless image from Docker Hub:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
webserver:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
|
||||
and replace it with a line that instructs docker-compose to build the image from the current working directory instead:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
webserver:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
QPDF_VERSION: x.y.x
|
||||
PIKEPDF_VERSION: x.y.z
|
||||
PSYCOPG2_VERSION: x.y.z
|
||||
JBIG2ENC_VERSION: 0.29
|
||||
|
||||
.. note::
|
||||
|
||||
You should match the build argument versions to the version for the release you have
|
||||
checked out. These are pre-built images with certain, more updated software.
|
||||
If you want to build these images your self, that is possible, but beyond
|
||||
the scope of these steps.
|
||||
|
||||
4. Follow steps 3 to 8 of :ref:`setup-docker_hub`. When asked to run
|
||||
``docker-compose pull`` to pull the image, do
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ docker-compose build
|
||||
|
||||
instead to build the image.
|
||||
|
||||
.. _setup-bare_metal:
|
||||
|
||||
Bare Metal Route
|
||||
================
|
||||
|
||||
Paperless runs on linux only. The following procedure has been tested on a minimal
|
||||
installation of Debian/Buster, which is the current stable release at the time of
|
||||
writing. Windows is not and will never be supported.
|
||||
|
||||
1. Install dependencies. Paperless requires the following packages.
|
||||
|
||||
* ``python3`` 3.8, 3.9
|
||||
* ``python3-pip``
|
||||
* ``python3-dev``
|
||||
|
||||
* ``default-libmysqlclient-dev`` for MariaDB
|
||||
* ``fonts-liberation`` for generating thumbnails for plain text files
|
||||
* ``imagemagick`` >= 6 for PDF conversion
|
||||
* ``gnupg`` for handling encrypted documents
|
||||
* ``libpq-dev`` for PostgreSQL
|
||||
* ``libmagic-dev`` for mime type detection
|
||||
* ``mariadb-client`` for MariaDB compile time
|
||||
* ``mime-support`` for mime type detection
|
||||
* ``libzbar0`` for barcode detection
|
||||
* ``poppler-utils`` for barcode detection
|
||||
|
||||
Use this list for your preferred package management:
|
||||
|
||||
.. code::
|
||||
|
||||
python3 python3-pip python3-dev imagemagick fonts-liberation gnupg libpq-dev default-libmysqlclient-dev libmagic-dev mime-support libzbar0 poppler-utils
|
||||
|
||||
These dependencies are required for OCRmyPDF, which is used for text recognition.
|
||||
|
||||
* ``unpaper``
|
||||
* ``ghostscript``
|
||||
* ``icc-profiles-free``
|
||||
* ``qpdf``
|
||||
* ``liblept5``
|
||||
* ``libxml2``
|
||||
* ``pngquant`` (suggested for certain PDF image optimizations)
|
||||
* ``zlib1g``
|
||||
* ``tesseract-ocr`` >= 4.0.0 for OCR
|
||||
* ``tesseract-ocr`` language packs (``tesseract-ocr-eng``, ``tesseract-ocr-deu``, etc)
|
||||
|
||||
Use this list for your preferred package management:
|
||||
|
||||
.. code::
|
||||
|
||||
unpaper ghostscript icc-profiles-free qpdf liblept5 libxml2 pngquant zlib1g tesseract-ocr
|
||||
|
||||
On Raspberry Pi, these libraries are required as well:
|
||||
|
||||
* ``libatlas-base-dev``
|
||||
* ``libxslt1-dev``
|
||||
|
||||
You will also need ``build-essential``, ``python3-setuptools`` and ``python3-wheel``
|
||||
for installing some of the python dependencies.
|
||||
|
||||
2. Install ``redis`` >= 6.0 and configure it to start automatically.
|
||||
|
||||
3. Optional. Install ``postgresql`` and configure a database, user and password for paperless. If you do not wish
|
||||
to use PostgreSQL, MariaDB and SQLite are available as well.
|
||||
|
||||
.. note::
|
||||
|
||||
On bare-metal installations using SQLite, ensure the
|
||||
`JSON1 extension <https://code.djangoproject.com/wiki/JSON1Extension>`_ is enabled. This is
|
||||
usually the case, but not always.
|
||||
|
||||
4. Get the release archive from `<https://github.com/paperless-ngx/paperless-ngx/releases>`_.
|
||||
If you clone the git repo as it is, you also have to compile the front end by yourself.
|
||||
Extract the archive to a place from where you wish to execute it, such as ``/opt/paperless``.
|
||||
|
||||
5. Configure paperless. See :ref:`configuration` for details. Edit the included ``paperless.conf`` and adjust the
|
||||
settings to your needs. Required settings for getting paperless running are:
|
||||
|
||||
* ``PAPERLESS_REDIS`` should point to your redis server, such as redis://localhost:6379.
|
||||
* ``PAPERLESS_DBENGINE`` optional, and should be one of `postgres, mariadb, or sqlite`
|
||||
* ``PAPERLESS_DBHOST`` should be the hostname on which your PostgreSQL server is running. Do not configure this
|
||||
to use SQLite instead. Also configure port, database name, user and password as necessary.
|
||||
* ``PAPERLESS_CONSUMPTION_DIR`` should point to a folder which paperless should watch for documents. You might
|
||||
want to have this somewhere else. Likewise, ``PAPERLESS_DATA_DIR`` and ``PAPERLESS_MEDIA_ROOT`` define where
|
||||
paperless stores its data. If you like, you can point both to the same directory.
|
||||
* ``PAPERLESS_SECRET_KEY`` should be a random sequence of characters. It's used for authentication. Failure
|
||||
to do so allows third parties to forge authentication credentials.
|
||||
* ``PAPERLESS_URL`` if you are behind a reverse proxy. This should point to your domain. Please see
|
||||
:ref:`configuration` for more information.
|
||||
|
||||
Many more adjustments can be made to paperless, especially the OCR part. The following options are recommended
|
||||
for everyone:
|
||||
|
||||
* Set ``PAPERLESS_OCR_LANGUAGE`` to the language most of your documents are written in.
|
||||
* Set ``PAPERLESS_TIME_ZONE`` to your local time zone.
|
||||
|
||||
6. Create a system user under which you wish to run paperless.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
adduser paperless --system --home /opt/paperless --group
|
||||
|
||||
7. Ensure that these directories exist
|
||||
and that the paperless user has write permissions to the following directories:
|
||||
|
||||
* ``/opt/paperless/media``
|
||||
* ``/opt/paperless/data``
|
||||
* ``/opt/paperless/consume``
|
||||
|
||||
Adjust as necessary if you configured different folders.
|
||||
|
||||
8. Install python requirements from the ``requirements.txt`` file.
|
||||
It is up to you if you wish to use a virtual environment or not. First you should update your pip, so it gets the actual packages.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
sudo -Hu paperless pip3 install --upgrade pip
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
sudo -Hu paperless pip3 install -r requirements.txt
|
||||
|
||||
This will install all python dependencies in the home directory of
|
||||
the new paperless user.
|
||||
|
||||
9. Go to ``/opt/paperless/src``, and execute the following commands:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
# This creates the database schema.
|
||||
sudo -Hu paperless python3 manage.py migrate
|
||||
|
||||
# This creates your first paperless user
|
||||
sudo -Hu paperless python3 manage.py createsuperuser
|
||||
|
||||
10. Optional: Test that paperless is working by executing
|
||||
|
||||
.. code:: bash
|
||||
|
||||
# This collects static files from paperless and django.
|
||||
sudo -Hu paperless python3 manage.py runserver
|
||||
|
||||
and pointing your browser to http://localhost:8000/.
|
||||
|
||||
.. warning::
|
||||
|
||||
This is a development server which should not be used in
|
||||
production. It is not audited for security and performance
|
||||
is inferior to production ready web servers.
|
||||
|
||||
.. hint::
|
||||
|
||||
This will not start the consumer. Paperless does this in a
|
||||
separate process.
|
||||
|
||||
11. Setup systemd services to run paperless automatically. You may
|
||||
use the service definition files included in the ``scripts`` folder
|
||||
as a starting point.
|
||||
|
||||
Paperless needs the ``webserver`` script to run the webserver, the
|
||||
``consumer`` script to watch the input folder, ``taskqueue`` for the background workers
|
||||
used to handle things like document consumption and the ``scheduler`` script to run tasks such as
|
||||
email checking at certain times .
|
||||
|
||||
The ``socket`` script enables ``gunicorn`` to run on port 80 without
|
||||
root privileges. For this you need to uncomment the ``Require=paperless-webserver.socket``
|
||||
in the ``webserver`` script and configure ``gunicorn`` to listen on port 80 (see ``paperless/gunicorn.conf.py``).
|
||||
|
||||
You may need to adjust the path to the ``gunicorn`` executable. This
|
||||
will be installed as part of the python dependencies, and is either located
|
||||
in the ``bin`` folder of your virtual environment, or in ``~/.local/bin/`` if
|
||||
no virtual environment is used.
|
||||
|
||||
These services rely on redis and optionally the database server, but
|
||||
don't need to be started in any particular order. The example files
|
||||
depend on redis being started. If you use a database server, you should
|
||||
add additional dependencies.
|
||||
|
||||
.. caution::
|
||||
|
||||
The included scripts run a ``gunicorn`` standalone server,
|
||||
which is fine for running paperless. It does support SSL,
|
||||
however, the documentation of GUnicorn states that you should
|
||||
use a proxy server in front of gunicorn instead.
|
||||
|
||||
For instructions on how to use nginx for that,
|
||||
:ref:`see the instructions below <setup-nginx>`.
|
||||
|
||||
12. Optional: Install a samba server and make the consumption folder
|
||||
available as a network share.
|
||||
|
||||
13. Configure ImageMagick to allow processing of PDF documents. Most distributions have
|
||||
this disabled by default, since PDF documents can contain malware. If
|
||||
you don't do this, paperless will fall back to ghostscript for certain steps
|
||||
such as thumbnail generation.
|
||||
|
||||
Edit ``/etc/ImageMagick-6/policy.xml`` and adjust
|
||||
|
||||
.. code::
|
||||
|
||||
<policy domain="coder" rights="none" pattern="PDF" />
|
||||
|
||||
to
|
||||
|
||||
.. code::
|
||||
|
||||
<policy domain="coder" rights="read|write" pattern="PDF" />
|
||||
|
||||
14. Optional: Install the `jbig2enc <https://ocrmypdf.readthedocs.io/en/latest/jbig2.html>`_
|
||||
encoder. This will reduce the size of generated PDF documents. You'll most likely need
|
||||
to compile this by yourself, because this software has been patented until around 2017 and
|
||||
binary packages are not available for most distributions.
|
||||
|
||||
15. Optional: If using the NLTK machine learning processing (see ``PAPERLESS_ENABLE_NLTK`` in
|
||||
:ref:`configuration` for details), download the NLTK data for the Snowball Stemmer, Stopwords
|
||||
and Punkt tokenizer to your ``PAPERLESS_DATA_DIR/nltk``. Refer to
|
||||
the `NLTK instructions <https://www.nltk.org/data.html>`_ for details on how to
|
||||
download the data.
|
||||
|
||||
|
||||
Migrating to Paperless-ngx
|
||||
##########################
|
||||
|
||||
Migration is possible both from Paperless-ng or directly from the 'original' Paperless.
|
||||
|
||||
Migrating from Paperless-ng
|
||||
===========================
|
||||
|
||||
Paperless-ngx is meant to be a drop-in replacement for Paperless-ng and thus upgrading should be
|
||||
trivial for most users, especially when using docker. However, as with any major change, it is
|
||||
recommended to take a full backup first. Once you are ready, simply change the docker image to
|
||||
point to the new source. E.g. if using Docker Compose, edit ``docker-compose.yml`` and change:
|
||||
|
||||
.. code::
|
||||
|
||||
image: jonaswinkler/paperless-ng:latest
|
||||
|
||||
to
|
||||
|
||||
.. code::
|
||||
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
|
||||
and then run ``docker-compose up -d`` which will pull the new image recreate the container.
|
||||
That's it!
|
||||
|
||||
Users who installed with the bare-metal route should also update their Git clone to point to
|
||||
``https://github.com/paperless-ngx/paperless-ngx``, e.g. using the command
|
||||
``git remote set-url origin https://github.com/paperless-ngx/paperless-ngx`` and then pull the
|
||||
lastest version.
|
||||
|
||||
Migrating from Paperless
|
||||
========================
|
||||
|
||||
At its core, paperless-ngx is still paperless and fully compatible. However, some
|
||||
things have changed under the hood, so you need to adapt your setup depending on
|
||||
how you installed paperless.
|
||||
|
||||
This setup describes how to update an existing paperless Docker installation.
|
||||
The important things to keep in mind are as follows:
|
||||
|
||||
* Read the :doc:`changelog </changelog>` and take note of breaking changes.
|
||||
* You should decide if you want to stick with SQLite or want to migrate your database
|
||||
to PostgreSQL. See :ref:`setup-sqlite_to_psql` for details on how to move your data from
|
||||
SQLite to PostgreSQL. Both work fine with paperless. However, if you already have a
|
||||
database server running for other services, you might as well use it for paperless as well.
|
||||
* The task scheduler of paperless, which is used to execute periodic tasks
|
||||
such as email checking and maintenance, requires a `redis`_ message broker
|
||||
instance. The docker-compose route takes care of that.
|
||||
* The layout of the folder structure for your documents and data remains the
|
||||
same, so you can just plug your old docker volumes into paperless-ngx and
|
||||
expect it to find everything where it should be.
|
||||
|
||||
Migration to paperless-ngx is then performed in a few simple steps:
|
||||
|
||||
1. Stop paperless.
|
||||
|
||||
.. code:: bash
|
||||
|
||||
$ cd /path/to/current/paperless
|
||||
$ docker-compose down
|
||||
|
||||
2. Do a backup for two purposes: If something goes wrong, you still have your
|
||||
data. Second, if you don't like paperless-ngx, you can switch back to
|
||||
paperless.
|
||||
|
||||
3. Download the latest release of paperless-ngx. You can either go with the
|
||||
docker-compose files from `here <https://github.com/paperless-ngx/paperless-ngx/tree/master/docker/compose>`__
|
||||
or clone the repository to build the image yourself (see :ref:`above <setup-docker_build>`).
|
||||
You can either replace your current paperless folder or put paperless-ngx
|
||||
in a different location.
|
||||
|
||||
.. caution::
|
||||
|
||||
Paperless-ngx includes a ``.env`` file. This will set the
|
||||
project name for docker compose to ``paperless``, which will also define the name
|
||||
of the volumes by paperless-ngx. However, if you experience that paperless-ngx
|
||||
is not using your old paperless volumes, verify the names of your volumes with
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ docker volume ls | grep _data
|
||||
|
||||
and adjust the project name in the ``.env`` file so that it matches the name
|
||||
of the volumes before the ``_data`` part.
|
||||
|
||||
|
||||
4. Download the ``docker-compose.sqlite.yml`` file to ``docker-compose.yml``.
|
||||
If you want to switch to PostgreSQL, do that after you migrated your existing
|
||||
SQLite database.
|
||||
|
||||
5. Adjust ``docker-compose.yml`` and ``docker-compose.env`` to your needs.
|
||||
See :ref:`setup-docker_hub` for details on which edits are advised.
|
||||
|
||||
6. :ref:`Update paperless. <administration-updating>`
|
||||
|
||||
7. In order to find your existing documents with the new search feature, you need
|
||||
to invoke a one-time operation that will create the search index:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ docker-compose run --rm webserver document_index reindex
|
||||
|
||||
This will migrate your database and create the search index. After that,
|
||||
paperless will take care of maintaining the index by itself.
|
||||
|
||||
8. Start paperless-ngx.
|
||||
|
||||
.. code:: bash
|
||||
|
||||
$ docker-compose up -d
|
||||
|
||||
This will run paperless in the background and automatically start it on system boot.
|
||||
|
||||
9. Paperless installed a permanent redirect to ``admin/`` in your browser. This
|
||||
redirect is still in place and prevents access to the new UI. Clear your
|
||||
browsing cache in order to fix this.
|
||||
|
||||
10. Optionally, follow the instructions below to migrate your existing data to PostgreSQL.
|
||||
|
||||
|
||||
Migrating from LinuxServer.io Docker Image
|
||||
==========================================
|
||||
|
||||
As with any upgrades and large changes, it is highly recommended to create a backup before
|
||||
starting. This assumes the image was running using Docker Compose, but the instructions
|
||||
are translatable to Docker commands as well.
|
||||
|
||||
1. Stop and remove the paperless container
|
||||
2. If using an external database, stop the container
|
||||
3. Update Redis configuration
|
||||
|
||||
a) If ``REDIS_URL`` is already set, change it to ``PAPERLESS_REDIS`` and continue
|
||||
to step 4.
|
||||
b) Otherwise, in the ``docker-compose.yml`` add a new service for Redis,
|
||||
following `the example compose files <https://github.com/paperless-ngx/paperless-ngx/tree/main/docker/compose>`_
|
||||
c) Set the environment variable ``PAPERLESS_REDIS`` so it points to the new Redis container
|
||||
|
||||
4. Update user mapping
|
||||
|
||||
a) If set, change the environment variable ``PUID`` to ``USERMAP_UID``
|
||||
b) If set, change the environment variable ``PGID`` to ``USERMAP_GID``
|
||||
|
||||
5. Update configuration paths
|
||||
|
||||
a) Set the environment variable ``PAPERLESS_DATA_DIR``
|
||||
to ``/config``
|
||||
|
||||
6. Update media paths
|
||||
|
||||
a) Set the environment variable ``PAPERLESS_MEDIA_ROOT``
|
||||
to ``/data/media``
|
||||
|
||||
7. Update timezone
|
||||
|
||||
a) Set the environment variable ``PAPERLESS_TIME_ZONE``
|
||||
to the same value as ``TZ``
|
||||
|
||||
8. Modify the ``image:`` to point to ``ghcr.io/paperless-ngx/paperless-ngx:latest`` or
|
||||
a specific version if preferred.
|
||||
|
||||
9. Start the containers as before, using ``docker-compose``.
|
||||
|
||||
.. _setup-sqlite_to_psql:
|
||||
|
||||
Moving data from SQLite to PostgreSQL or MySQL/MariaDB
|
||||
======================================================
|
||||
|
||||
Moving your data from SQLite to PostgreSQL or MySQL/MariaDB is done via executing a series of django
|
||||
management commands as below. The commands below use PostgreSQL, but are applicable to MySQL/MariaDB
|
||||
with the
|
||||
|
||||
.. caution::
|
||||
|
||||
Make sure that your SQLite database is migrated to the latest version.
|
||||
Starting paperless will make sure that this is the case. If your try to
|
||||
load data from an old database schema in SQLite into a newer database
|
||||
schema in PostgreSQL, you will run into trouble.
|
||||
|
||||
.. warning::
|
||||
|
||||
On some database fields, PostgreSQL enforces predefined limits on maximum
|
||||
length, whereas SQLite does not. The fields in question are the title of documents
|
||||
(128 characters), names of document types, tags and correspondents (128 characters),
|
||||
and filenames (1024 characters). If you have data in these fields that surpasses these
|
||||
limits, migration to PostgreSQL is not possible and will fail with an error.
|
||||
|
||||
.. warning::
|
||||
|
||||
MySQL is case insensitive by default, treating values like "Name" and "NAME" as identical.
|
||||
See :ref:`advanced-mysql-caveats` for details.
|
||||
|
||||
|
||||
1. Stop paperless, if it is running.
|
||||
2. Tell paperless to use PostgreSQL:
|
||||
|
||||
a) With docker, copy the provided ``docker-compose.postgres.yml`` file to
|
||||
``docker-compose.yml``. Remember to adjust the consumption directory,
|
||||
if necessary.
|
||||
b) Without docker, configure the database in your ``paperless.conf`` file.
|
||||
See :ref:`configuration` for details.
|
||||
|
||||
3. Open a shell and initialize the database:
|
||||
|
||||
a) With docker, run the following command to open a shell within the paperless
|
||||
container:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless
|
||||
$ docker-compose run --rm webserver /bin/bash
|
||||
|
||||
This will launch the container and initialize the PostgreSQL database.
|
||||
|
||||
b) Without docker, remember to activate any virtual environment, switch to
|
||||
the ``src`` directory and create the database schema:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless/src
|
||||
$ python3 manage.py migrate
|
||||
|
||||
This will not copy any data yet.
|
||||
|
||||
4. Dump your data from SQLite:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ python3 manage.py dumpdata --database=sqlite --exclude=contenttypes --exclude=auth.Permission > data.json
|
||||
|
||||
5. Load your data into PostgreSQL:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ python3 manage.py loaddata data.json
|
||||
|
||||
6. If operating inside Docker, you may exit the shell now.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ exit
|
||||
|
||||
7. Start paperless.
|
||||
|
||||
|
||||
Moving back to Paperless
|
||||
========================
|
||||
|
||||
Lets say you migrated to Paperless-ngx and used it for a while, but decided that
|
||||
you don't like it and want to move back (If you do, send me a mail about what
|
||||
part you didn't like!), you can totally do that with a few simple steps.
|
||||
|
||||
Paperless-ngx modified the database schema slightly, however, these changes can
|
||||
be reverted while keeping your current data, so that your current data will
|
||||
be compatible with original Paperless.
|
||||
|
||||
Execute this:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless
|
||||
$ docker-compose run --rm webserver migrate documents 0023
|
||||
|
||||
Or without docker:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless/src
|
||||
$ python3 manage.py migrate documents 0023
|
||||
|
||||
After that, you need to clear your cookies (Paperless-ngx comes with updated
|
||||
dependencies that do cookie-processing differently) and probably your cache
|
||||
as well.
|
||||
|
||||
.. _setup-less_powerful_devices:
|
||||
|
||||
|
||||
Considerations for less powerful devices
|
||||
########################################
|
||||
|
||||
Paperless runs on Raspberry Pi. However, some things are rather slow on the Pi and
|
||||
configuring some options in paperless can help improve performance immensely:
|
||||
|
||||
* Stick with SQLite to save some resources.
|
||||
* Consider setting ``PAPERLESS_OCR_PAGES`` to 1, so that paperless will only OCR
|
||||
the first page of your documents. In most cases, this page contains enough
|
||||
information to be able to find it.
|
||||
* ``PAPERLESS_TASK_WORKERS`` and ``PAPERLESS_THREADS_PER_WORKER`` are configured
|
||||
to use all cores. The Raspberry Pi models 3 and up have 4 cores, meaning that
|
||||
paperless will use 2 workers and 2 threads per worker. This may result in
|
||||
sluggish response times during consumption, so you might want to lower these
|
||||
settings (example: 2 workers and 1 thread to always have some computing power
|
||||
left for other tasks).
|
||||
* Keep ``PAPERLESS_OCR_MODE`` at its default value ``skip`` and consider OCR'ing
|
||||
your documents before feeding them into paperless. Some scanners are able to
|
||||
do this! You might want to even specify ``skip_noarchive`` to skip archive
|
||||
file generation for already ocr'ed documents entirely.
|
||||
* If you want to perform OCR on the device, consider using ``PAPERLESS_OCR_CLEAN=none``.
|
||||
This will speed up OCR times and use less memory at the expense of slightly worse
|
||||
OCR results.
|
||||
* If using docker, consider setting ``PAPERLESS_WEBSERVER_WORKERS`` to
|
||||
1. This will save some memory.
|
||||
* Consider setting ``PAPERLESS_ENABLE_NLTK`` to false, to disable the more
|
||||
advanced language processing, which can take more memory and processing time.
|
||||
|
||||
For details, refer to :ref:`configuration`.
|
||||
|
||||
.. note::
|
||||
|
||||
Updating the :ref:`automatic matching algorithm <advanced-automatic_matching>`
|
||||
takes quite a bit of time. However, the update mechanism checks if your
|
||||
data has changed before doing the heavy lifting. If you experience the
|
||||
algorithm taking too much cpu time, consider changing the schedule in the
|
||||
admin interface to daily. You can also manually invoke the task
|
||||
by changing the date and time of the next run to today/now.
|
||||
|
||||
The actual matching of the algorithm is fast and works on Raspberry Pi as
|
||||
well as on any other device.
|
||||
|
||||
.. _redis: https://redis.io/
|
||||
|
||||
|
||||
.. _setup-nginx:
|
||||
|
||||
Using nginx as a reverse proxy
|
||||
##############################
|
||||
|
||||
If you want to expose paperless to the internet, you should hide it behind a
|
||||
reverse proxy with SSL enabled.
|
||||
|
||||
In addition to the usual configuration for SSL,
|
||||
the following configuration is required for paperless to operate:
|
||||
|
||||
.. code:: nginx
|
||||
|
||||
http {
|
||||
|
||||
# Adjust as required. This is the maximum size for file uploads.
|
||||
# The default value 1M might be a little too small.
|
||||
client_max_body_size 10M;
|
||||
|
||||
server {
|
||||
|
||||
location / {
|
||||
|
||||
# Adjust host and port as required.
|
||||
proxy_pass http://localhost:8000/;
|
||||
|
||||
# These configuration options are required for WebSockets to work.
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
|
||||
proxy_redirect off;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Host $server_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
The ``PAPERLESS_URL`` configuration variable is also required when using a reverse proxy. Please refer to the :ref:`hosting-and-security` docs.
|
||||
|
||||
Also read `this <https://channels.readthedocs.io/en/stable/deploying.html#nginx-supervisor-ubuntu>`__, towards the end of the section.
|
334
docs/troubleshooting.md
Normal file
@@ -0,0 +1,334 @@
|
||||
# Troubleshooting
|
||||
|
||||
## No files are added by the consumer
|
||||
|
||||
Check for the following issues:
|
||||
|
||||
- Ensure that the directory you're putting your documents in is the
|
||||
folder paperless is watching. With docker, this setting is performed
|
||||
in the `docker-compose.yml` file. Without docker, look at the
|
||||
`CONSUMPTION_DIR` setting. Don't adjust this setting if you're
|
||||
using docker.
|
||||
|
||||
- Ensure that redis is up and running. Paperless does its task
|
||||
processing asynchronously, and for documents to arrive at the task
|
||||
processor, it needs redis to run.
|
||||
|
||||
- Ensure that the task processor is running. Docker does this
|
||||
automatically. Manually invoke the task processor by executing
|
||||
|
||||
```shell-session
|
||||
$ celery --app paperless worker
|
||||
```
|
||||
|
||||
- Look at the output of paperless and inspect it for any errors.
|
||||
|
||||
- Go to the admin interface, and check if there are failed tasks. If
|
||||
so, the tasks will contain an error message.
|
||||
|
||||
## Consumer warns `OCR for XX failed`
|
||||
|
||||
If you find the OCR accuracy to be too low, and/or the document consumer
|
||||
warns that
|
||||
`OCR for XX failed, but we're going to stick with what we've got since FORGIVING_OCR is enabled`,
|
||||
then you might need to install the [Tesseract language
|
||||
files](https://packages.ubuntu.com/search?keywords=tesseract-ocr)
|
||||
marching your document's languages.
|
||||
|
||||
As an example, if you are running Paperless-ngx from any Ubuntu or
|
||||
Debian box, and your documents are written in Spanish you may need to
|
||||
run:
|
||||
|
||||
apt-get install -y tesseract-ocr-spa
|
||||
|
||||
## Consumer fails to pickup any new files
|
||||
|
||||
If you notice that the consumer will only pickup files in the
|
||||
consumption directory at startup, but won't find any other files added
|
||||
later, you will need to enable filesystem polling with the configuration
|
||||
option `PAPERLESS_CONSUMER_POLLING`, see
|
||||
`[here](/configuration#polling).
|
||||
|
||||
This will disable listening to filesystem changes with inotify and
|
||||
paperless will manually check the consumption directory for changes
|
||||
instead.
|
||||
|
||||
## Paperless always redirects to /admin
|
||||
|
||||
You probably had the old paperless installed at some point. Paperless
|
||||
installed a permanent redirect to /admin in your browser, and you need
|
||||
to clear your browsing data / cache to fix that.
|
||||
|
||||
## Operation not permitted
|
||||
|
||||
You might see errors such as:
|
||||
|
||||
```shell-session
|
||||
chown: changing ownership of '../export': Operation not permitted
|
||||
```
|
||||
|
||||
The container tries to set file ownership on the listed directories.
|
||||
This is required so that the user running paperless inside docker has
|
||||
write permissions to these folders. This happens when pointing these
|
||||
directories to NFS shares, for example.
|
||||
|
||||
Ensure that `chown` is possible on these directories.
|
||||
|
||||
## Classifier error: No training data available
|
||||
|
||||
This indicates that the Auto matching algorithm found no documents to
|
||||
learn from. This may have two reasons:
|
||||
|
||||
- You don't use the Auto matching algorithm: The error can be safely
|
||||
ignored in this case.
|
||||
- You are using the Auto matching algorithm: The classifier explicitly
|
||||
excludes documents with Inbox tags. Verify that there are documents
|
||||
in your archive without inbox tags. The algorithm will only learn
|
||||
from documents not in your inbox.
|
||||
|
||||
## UserWarning in sklearn on every single document
|
||||
|
||||
You may encounter warnings like this:
|
||||
|
||||
```
|
||||
/usr/local/lib/python3.7/site-packages/sklearn/base.py:315:
|
||||
UserWarning: Trying to unpickle estimator CountVectorizer from version 0.23.2 when using version 0.24.0.
|
||||
This might lead to breaking code or invalid results. Use at your own risk.
|
||||
```
|
||||
|
||||
This happens when certain dependencies of paperless that are responsible
|
||||
for the auto matching algorithm are updated. After updating these, your
|
||||
current training data _might_ not be compatible anymore. This can be
|
||||
ignored in most cases. This warning will disappear automatically when
|
||||
paperless updates the training data.
|
||||
|
||||
If you want to get rid of the warning or actually experience issues with
|
||||
automatic matching, delete the file `classification_model.pickle` in the
|
||||
data directory and let paperless recreate it.
|
||||
|
||||
## 504 Server Error: Gateway Timeout when adding Office documents
|
||||
|
||||
You may experience these errors when using the optional TIKA
|
||||
integration:
|
||||
|
||||
```
|
||||
requests.exceptions.HTTPError: 504 Server Error: Gateway Timeout for url: http://gotenberg:3000/forms/libreoffice/convert
|
||||
```
|
||||
|
||||
Gotenberg is a server that converts Office documents into PDF documents
|
||||
and has a default timeout of 30 seconds. When conversion takes longer,
|
||||
Gotenberg raises this error.
|
||||
|
||||
You can increase the timeout by configuring a command flag for Gotenberg
|
||||
(see also [here](https://gotenberg.dev/docs/modules/api#properties)). If
|
||||
using docker-compose, this is achieved by the following configuration
|
||||
change in the `docker-compose.yml` file:
|
||||
|
||||
```yaml
|
||||
# The gotenberg chromium route is used to convert .eml files. We do not
|
||||
# want to allow external content like tracking pixels or even javascript.
|
||||
command:
|
||||
- 'gotenberg'
|
||||
- '--chromium-disable-javascript=true'
|
||||
- '--chromium-allow-list=file:///tmp/.*'
|
||||
- '--api-timeout=60'
|
||||
```
|
||||
|
||||
## Permission denied errors in the consumption directory
|
||||
|
||||
You might encounter errors such as:
|
||||
|
||||
```shell-session
|
||||
The following error occured while consuming document.pdf: [Errno 13] Permission denied: '/usr/src/paperless/src/../consume/document.pdf'
|
||||
```
|
||||
|
||||
This happens when paperless does not have permission to delete files
|
||||
inside the consumption directory. Ensure that `USERMAP_UID` and
|
||||
`USERMAP_GID` are set to the user id and group id you use on the host
|
||||
operating system, if these are different from `1000`. See [Docker setup](/setup#docker_hub).
|
||||
|
||||
Also ensure that you are able to read and write to the consumption
|
||||
directory on the host.
|
||||
|
||||
## OSError: \[Errno 19\] No such device when consuming files
|
||||
|
||||
If you experience errors such as:
|
||||
|
||||
```shell-session
|
||||
File "/usr/local/lib/python3.7/site-packages/whoosh/codec/base.py", line 570, in open_compound_file
|
||||
return CompoundStorage(dbfile, use_mmap=storage.supports_mmap)
|
||||
File "/usr/local/lib/python3.7/site-packages/whoosh/filedb/compound.py", line 75, in __init__
|
||||
self._source = mmap.mmap(fileno, 0, access=mmap.ACCESS_READ)
|
||||
OSError: [Errno 19] No such device
|
||||
|
||||
During handling of the above exception, another exception occurred:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "/usr/local/lib/python3.7/site-packages/django_q/cluster.py", line 436, in worker
|
||||
res = f(*task["args"], **task["kwargs"])
|
||||
File "/usr/src/paperless/src/documents/tasks.py", line 73, in consume_file
|
||||
override_tag_ids=override_tag_ids)
|
||||
File "/usr/src/paperless/src/documents/consumer.py", line 271, in try_consume_file
|
||||
raise ConsumerError(e)
|
||||
```
|
||||
|
||||
Paperless uses a search index to provide better and faster full text
|
||||
searching. This search index is stored inside the `data` folder. The
|
||||
search index uses memory-mapped files (mmap). The above error indicates
|
||||
that paperless was unable to create and open these files.
|
||||
|
||||
This happens when you're trying to store the data directory on certain
|
||||
file systems (mostly network shares) that don't support memory-mapped
|
||||
files.
|
||||
|
||||
## Web-UI stuck at "Loading\..."
|
||||
|
||||
This might have multiple reasons.
|
||||
|
||||
1. If you built the docker image yourself or deployed using the bare
|
||||
metal route, make sure that there are files in
|
||||
`<paperless-root>/static/frontend/<lang-code>/`. If there are no
|
||||
files, make sure that you executed `collectstatic` successfully,
|
||||
either manually or as part of the docker image build.
|
||||
|
||||
If the front end is still missing, make sure that the front end is
|
||||
compiled (files present in `src/documents/static/frontend`). If it
|
||||
is not, you need to compile the front end yourself or download the
|
||||
release archive instead of cloning the repository.
|
||||
|
||||
2. Check the output of the web server. You might see errors like this:
|
||||
|
||||
```
|
||||
[2021-01-25 10:08:04 +0000] [40] [ERROR] Socket error processing request.
|
||||
Traceback (most recent call last):
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 134, in handle
|
||||
self.handle_request(listener, req, client, addr)
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 190, in handle_request
|
||||
util.reraise(*sys.exc_info())
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/util.py", line 625, in reraise
|
||||
raise value
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 178, in handle_request
|
||||
resp.write_file(respiter)
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/http/wsgi.py", line 396, in write_file
|
||||
if not self.sendfile(respiter):
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/http/wsgi.py", line 386, in sendfile
|
||||
sent += os.sendfile(sockno, fileno, offset + sent, count)
|
||||
OSError: [Errno 22] Invalid argument
|
||||
```
|
||||
|
||||
To fix this issue, add
|
||||
|
||||
```
|
||||
SENDFILE=0
|
||||
```
|
||||
|
||||
to your `docker-compose.env` file.
|
||||
|
||||
## Error while reading metadata
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
```
|
||||
[WARNING] [paperless.parsing.tesseract] Error while reading metadata
|
||||
```
|
||||
|
||||
This indicates that paperless failed to read PDF metadata from one of
|
||||
your documents. This happens when you open the affected documents in
|
||||
paperless for editing. Paperless will continue to work, and will simply
|
||||
not show the invalid metadata.
|
||||
|
||||
## Consumer fails with a FileNotFoundError
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
```
|
||||
[ERROR] [paperless.consumer] Error while consuming document SCN_0001.pdf: FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ocrmypdf.io.yhk3zbv0/origin.pdf'
|
||||
Traceback (most recent call last):
|
||||
File "/app/paperless/src/paperless_tesseract/parsers.py", line 261, in parse
|
||||
ocrmypdf.ocr(**args)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/api.py", line 337, in ocr
|
||||
return run_pipeline(options=options, plugin_manager=plugin_manager, api=True)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 385, in run_pipeline
|
||||
exec_concurrent(context, executor)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 302, in exec_concurrent
|
||||
pdf = post_process(pdf, context, executor)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 235, in post_process
|
||||
pdf_out = metadata_fixup(pdf_out, context)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_pipeline.py", line 798, in metadata_fixup
|
||||
with pikepdf.open(context.origin) as original, pikepdf.open(working_file) as pdf:
|
||||
File "/usr/local/lib/python3.8/dist-packages/pikepdf/_methods.py", line 923, in open
|
||||
pdf = Pdf._open(
|
||||
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ocrmypdf.io.yhk3zbv0/origin.pdf'
|
||||
```
|
||||
|
||||
This probably indicates paperless tried to consume the same file twice.
|
||||
This can happen for a number of reasons, depending on how documents are
|
||||
placed into the consume folder. If paperless is using inotify (the
|
||||
default) to check for documents, try adjusting the
|
||||
[inotify configuration](/configuration#inotify). If polling is enabled, try adjusting the
|
||||
[polling configuration](/configuration#polling).
|
||||
|
||||
## Consumer fails waiting for file to remain unmodified.
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
```
|
||||
[ERROR] [paperless.management.consumer] Timeout while waiting on file /usr/src/paperless/src/../consume/SCN_0001.pdf to remain unmodified.
|
||||
```
|
||||
|
||||
This indicates paperless timed out while waiting for the file to be
|
||||
completely written to the consume folder. Adjusting
|
||||
[polling configuration](/configuration#polling) values should resolve the issue.
|
||||
|
||||
!!! note
|
||||
|
||||
The user will need to manually move the file out of the consume folder
|
||||
and back in, for the initial failing file to be consumed.
|
||||
|
||||
## Consumer fails reporting "OS reports file as busy still".
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
```
|
||||
[WARNING] [paperless.management.consumer] Not consuming file /usr/src/paperless/src/../consume/SCN_0001.pdf: OS reports file as busy still
|
||||
```
|
||||
|
||||
This indicates paperless was unable to open the file, as the OS reported
|
||||
the file as still being in use. To prevent a crash, paperless did not
|
||||
try to consume the file. If paperless is using inotify (the default) to
|
||||
check for documents, try adjusting the
|
||||
[inotify configuration](/configuration#inotify). If polling is enabled, try adjusting the
|
||||
[polling configuration](/configuration#polling).
|
||||
|
||||
!!! note
|
||||
|
||||
The user will need to manually move the file out of the consume folder
|
||||
and back in, for the initial failing file to be consumed.
|
||||
|
||||
## Log reports "Creating PaperlessTask failed".
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
```
|
||||
[ERROR] [paperless.management.consumer] Creating PaperlessTask failed: db locked
|
||||
```
|
||||
|
||||
You are likely using an sqlite based installation, with an increased
|
||||
number of workers and are running into sqlite's concurrency
|
||||
limitations. Uploading or consuming multiple files at once results in
|
||||
many workers attempting to access the database simultaneously.
|
||||
|
||||
Consider changing to the PostgreSQL database if you will be processing
|
||||
many documents at once often. Otherwise, try tweaking the
|
||||
`PAPERLESS_DB_TIMEOUT` setting to allow more time for the database to
|
||||
unlock. This may have minor performance implications.
|
||||
|
||||
## gunicorn fails to start with "is not a valid port number"
|
||||
|
||||
You are likely running using Kubernetes, which automatically creates an
|
||||
environment variable named `${serviceName}_PORT`. This is
|
||||
the same environment variable which is used by Paperless to optionally
|
||||
change the port gunicorn listens on.
|
||||
|
||||
To fix this, set `PAPERLESS_PORT` again to your desired port, or the
|
||||
default of 8000.
|
@@ -1,328 +0,0 @@
|
||||
***************
|
||||
Troubleshooting
|
||||
***************
|
||||
|
||||
No files are added by the consumer
|
||||
##################################
|
||||
|
||||
Check for the following issues:
|
||||
|
||||
* Ensure that the directory you're putting your documents in is the folder
|
||||
paperless is watching. With docker, this setting is performed in the
|
||||
``docker-compose.yml`` file. Without docker, look at the ``CONSUMPTION_DIR``
|
||||
setting. Don't adjust this setting if you're using docker.
|
||||
* Ensure that redis is up and running. Paperless does its task processing
|
||||
asynchronously, and for documents to arrive at the task processor, it needs
|
||||
redis to run.
|
||||
* Ensure that the task processor is running. Docker does this automatically.
|
||||
Manually invoke the task processor by executing
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ celery --app paperless worker
|
||||
|
||||
* Look at the output of paperless and inspect it for any errors.
|
||||
* Go to the admin interface, and check if there are failed tasks. If so, the
|
||||
tasks will contain an error message.
|
||||
|
||||
Consumer warns ``OCR for XX failed``
|
||||
####################################
|
||||
|
||||
If you find the OCR accuracy to be too low, and/or the document consumer warns
|
||||
that ``OCR for XX failed, but we're going to stick with what we've got since
|
||||
FORGIVING_OCR is enabled``, then you might need to install the
|
||||
`Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
|
||||
marching your document's languages.
|
||||
|
||||
As an example, if you are running Paperless-ngx from any Ubuntu or Debian
|
||||
box, and your documents are written in Spanish you may need to run::
|
||||
|
||||
apt-get install -y tesseract-ocr-spa
|
||||
|
||||
Consumer fails to pickup any new files
|
||||
######################################
|
||||
|
||||
If you notice that the consumer will only pickup files in the consumption
|
||||
directory at startup, but won't find any other files added later, you will need to
|
||||
enable filesystem polling with the configuration option
|
||||
``PAPERLESS_CONSUMER_POLLING``, see :ref:`here <configuration-polling>`.
|
||||
|
||||
This will disable listening to filesystem changes with inotify and paperless will
|
||||
manually check the consumption directory for changes instead.
|
||||
|
||||
|
||||
Paperless always redirects to /admin
|
||||
####################################
|
||||
|
||||
You probably had the old paperless installed at some point. Paperless installed
|
||||
a permanent redirect to /admin in your browser, and you need to clear your
|
||||
browsing data / cache to fix that.
|
||||
|
||||
|
||||
Operation not permitted
|
||||
#######################
|
||||
|
||||
You might see errors such as:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
chown: changing ownership of '../export': Operation not permitted
|
||||
|
||||
The container tries to set file ownership on the listed directories. This is
|
||||
required so that the user running paperless inside docker has write permissions
|
||||
to these folders. This happens when pointing these directories to NFS shares,
|
||||
for example.
|
||||
|
||||
Ensure that ``chown`` is possible on these directories.
|
||||
|
||||
|
||||
Classifier error: No training data available
|
||||
############################################
|
||||
|
||||
This indicates that the Auto matching algorithm found no documents to learn from.
|
||||
This may have two reasons:
|
||||
|
||||
* You don't use the Auto matching algorithm: The error can be safely ignored in this case.
|
||||
* You are using the Auto matching algorithm: The classifier explicitly excludes documents
|
||||
with Inbox tags. Verify that there are documents in your archive without inbox tags.
|
||||
The algorithm will only learn from documents not in your inbox.
|
||||
|
||||
|
||||
UserWarning in sklearn on every single document
|
||||
###############################################
|
||||
|
||||
You may encounter warnings like this:
|
||||
|
||||
.. code::
|
||||
|
||||
/usr/local/lib/python3.7/site-packages/sklearn/base.py:315:
|
||||
UserWarning: Trying to unpickle estimator CountVectorizer from version 0.23.2 when using version 0.24.0.
|
||||
This might lead to breaking code or invalid results. Use at your own risk.
|
||||
|
||||
This happens when certain dependencies of paperless that are responsible for the auto matching algorithm are
|
||||
updated. After updating these, your current training data *might* not be compatible anymore. This can be ignored
|
||||
in most cases. This warning will disappear automatically when paperless updates the training data.
|
||||
|
||||
If you want to get rid of the warning or actually experience issues with automatic matching, delete
|
||||
the file ``classification_model.pickle`` in the data directory and let paperless recreate it.
|
||||
|
||||
|
||||
504 Server Error: Gateway Timeout when adding Office documents
|
||||
##############################################################
|
||||
|
||||
You may experience these errors when using the optional TIKA integration:
|
||||
|
||||
.. code::
|
||||
|
||||
requests.exceptions.HTTPError: 504 Server Error: Gateway Timeout for url: http://gotenberg:3000/forms/libreoffice/convert
|
||||
|
||||
Gotenberg is a server that converts Office documents into PDF documents and has a default timeout of 30 seconds.
|
||||
When conversion takes longer, Gotenberg raises this error.
|
||||
|
||||
You can increase the timeout by configuring a command flag for Gotenberg (see also `here <https://gotenberg.dev/docs/modules/api#properties>`__).
|
||||
If using docker-compose, this is achieved by the following configuration change in the ``docker-compose.yml`` file:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
gotenberg:
|
||||
image: gotenberg/gotenberg:7.6
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- "gotenberg"
|
||||
- "--chromium-disable-routes=true"
|
||||
- "--api-timeout=60"
|
||||
|
||||
Permission denied errors in the consumption directory
|
||||
#####################################################
|
||||
|
||||
You might encounter errors such as:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
The following error occured while consuming document.pdf: [Errno 13] Permission denied: '/usr/src/paperless/src/../consume/document.pdf'
|
||||
|
||||
This happens when paperless does not have permission to delete files inside the consumption directory.
|
||||
Ensure that ``USERMAP_UID`` and ``USERMAP_GID`` are set to the user id and group id you use on the host operating system, if these are
|
||||
different from ``1000``. See :ref:`setup-docker_hub`.
|
||||
|
||||
Also ensure that you are able to read and write to the consumption directory on the host.
|
||||
|
||||
|
||||
OSError: [Errno 19] No such device when consuming files
|
||||
#######################################################
|
||||
|
||||
If you experience errors such as:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
File "/usr/local/lib/python3.7/site-packages/whoosh/codec/base.py", line 570, in open_compound_file
|
||||
return CompoundStorage(dbfile, use_mmap=storage.supports_mmap)
|
||||
File "/usr/local/lib/python3.7/site-packages/whoosh/filedb/compound.py", line 75, in __init__
|
||||
self._source = mmap.mmap(fileno, 0, access=mmap.ACCESS_READ)
|
||||
OSError: [Errno 19] No such device
|
||||
|
||||
During handling of the above exception, another exception occurred:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "/usr/local/lib/python3.7/site-packages/django_q/cluster.py", line 436, in worker
|
||||
res = f(*task["args"], **task["kwargs"])
|
||||
File "/usr/src/paperless/src/documents/tasks.py", line 73, in consume_file
|
||||
override_tag_ids=override_tag_ids)
|
||||
File "/usr/src/paperless/src/documents/consumer.py", line 271, in try_consume_file
|
||||
raise ConsumerError(e)
|
||||
|
||||
Paperless uses a search index to provide better and faster full text searching. This search index is stored inside
|
||||
the ``data`` folder. The search index uses memory-mapped files (mmap). The above error indicates that paperless
|
||||
was unable to create and open these files.
|
||||
|
||||
This happens when you're trying to store the data directory on certain file systems (mostly network shares)
|
||||
that don't support memory-mapped files.
|
||||
|
||||
|
||||
Web-UI stuck at "Loading..."
|
||||
############################
|
||||
|
||||
This might have multiple reasons.
|
||||
|
||||
|
||||
1. If you built the docker image yourself or deployed using the bare metal route,
|
||||
make sure that there are files in ``<paperless-root>/static/frontend/<lang-code>/``.
|
||||
If there are no files, make sure that you executed ``collectstatic`` successfully, either
|
||||
manually or as part of the docker image build.
|
||||
|
||||
If the front end is still missing, make sure that the front end is compiled (files present in
|
||||
``src/documents/static/frontend``). If it is not, you need to compile the front end yourself
|
||||
or download the release archive instead of cloning the repository.
|
||||
|
||||
2. Check the output of the web server. You might see errors like this:
|
||||
|
||||
|
||||
.. code::
|
||||
|
||||
[2021-01-25 10:08:04 +0000] [40] [ERROR] Socket error processing request.
|
||||
Traceback (most recent call last):
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 134, in handle
|
||||
self.handle_request(listener, req, client, addr)
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 190, in handle_request
|
||||
util.reraise(*sys.exc_info())
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/util.py", line 625, in reraise
|
||||
raise value
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/sync.py", line 178, in handle_request
|
||||
resp.write_file(respiter)
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/http/wsgi.py", line 396, in write_file
|
||||
if not self.sendfile(respiter):
|
||||
File "/usr/local/lib/python3.7/site-packages/gunicorn/http/wsgi.py", line 386, in sendfile
|
||||
sent += os.sendfile(sockno, fileno, offset + sent, count)
|
||||
OSError: [Errno 22] Invalid argument
|
||||
|
||||
To fix this issue, add
|
||||
|
||||
.. code::
|
||||
|
||||
SENDFILE=0
|
||||
|
||||
to your `docker-compose.env` file.
|
||||
|
||||
Error while reading metadata
|
||||
############################
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
.. code::
|
||||
|
||||
[WARNING] [paperless.parsing.tesseract] Error while reading metadata
|
||||
|
||||
This indicates that paperless failed to read PDF metadata from one of your documents. This happens when you
|
||||
open the affected documents in paperless for editing. Paperless will continue to work, and will simply not
|
||||
show the invalid metadata.
|
||||
|
||||
Consumer fails with a FileNotFoundError
|
||||
#######################################
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
.. code::
|
||||
|
||||
[ERROR] [paperless.consumer] Error while consuming document SCN_0001.pdf: FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ocrmypdf.io.yhk3zbv0/origin.pdf'
|
||||
Traceback (most recent call last):
|
||||
File "/app/paperless/src/paperless_tesseract/parsers.py", line 261, in parse
|
||||
ocrmypdf.ocr(**args)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/api.py", line 337, in ocr
|
||||
return run_pipeline(options=options, plugin_manager=plugin_manager, api=True)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 385, in run_pipeline
|
||||
exec_concurrent(context, executor)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 302, in exec_concurrent
|
||||
pdf = post_process(pdf, context, executor)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 235, in post_process
|
||||
pdf_out = metadata_fixup(pdf_out, context)
|
||||
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_pipeline.py", line 798, in metadata_fixup
|
||||
with pikepdf.open(context.origin) as original, pikepdf.open(working_file) as pdf:
|
||||
File "/usr/local/lib/python3.8/dist-packages/pikepdf/_methods.py", line 923, in open
|
||||
pdf = Pdf._open(
|
||||
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ocrmypdf.io.yhk3zbv0/origin.pdf'
|
||||
|
||||
This probably indicates paperless tried to consume the same file twice. This can happen for a number of reasons,
|
||||
depending on how documents are placed into the consume folder. If paperless is using inotify (the default) to
|
||||
check for documents, try adjusting the :ref:`inotify configuration <configuration-inotify>`. If polling is enabled,
|
||||
try adjusting the :ref:`polling configuration <configuration-polling>`.
|
||||
|
||||
Consumer fails waiting for file to remain unmodified.
|
||||
#####################################################
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
.. code::
|
||||
|
||||
[ERROR] [paperless.management.consumer] Timeout while waiting on file /usr/src/paperless/src/../consume/SCN_0001.pdf to remain unmodified.
|
||||
|
||||
This indicates paperless timed out while waiting for the file to be completely written to the consume folder.
|
||||
Adjusting :ref:`polling configuration <configuration-polling>` values should resolve the issue.
|
||||
|
||||
.. note::
|
||||
|
||||
The user will need to manually move the file out of the consume folder and
|
||||
back in, for the initial failing file to be consumed.
|
||||
|
||||
Consumer fails reporting "OS reports file as busy still".
|
||||
#########################################################
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
.. code::
|
||||
|
||||
[WARNING] [paperless.management.consumer] Not consuming file /usr/src/paperless/src/../consume/SCN_0001.pdf: OS reports file as busy still
|
||||
|
||||
This indicates paperless was unable to open the file, as the OS reported the file as still being in use. To prevent a
|
||||
crash, paperless did not try to consume the file. If paperless is using inotify (the default) to
|
||||
check for documents, try adjusting the :ref:`inotify configuration <configuration-inotify>`. If polling is enabled,
|
||||
try adjusting the :ref:`polling configuration <configuration-polling>`.
|
||||
|
||||
.. note::
|
||||
|
||||
The user will need to manually move the file out of the consume folder and
|
||||
back in, for the initial failing file to be consumed.
|
||||
|
||||
Log reports "Creating PaperlessTask failed".
|
||||
#########################################################
|
||||
|
||||
You might find messages like these in your log files:
|
||||
|
||||
.. code::
|
||||
|
||||
[ERROR] [paperless.management.consumer] Creating PaperlessTask failed: db locked
|
||||
|
||||
You are likely using an sqlite based installation, with an increased number of workers and are running into sqlite's concurrency limitations.
|
||||
Uploading or consuming multiple files at once results in many workers attempting to access the database simultaneously.
|
||||
|
||||
Consider changing to the PostgreSQL database if you will be processing many documents at once often. Otherwise,
|
||||
try tweaking the ``PAPERLESS_DB_TIMEOUT`` setting to allow more time for the database to unlock. This may have
|
||||
minor performance implications.
|
||||
|
||||
|
||||
gunicorn fails to start with "is not a valid port number"
|
||||
#########################################################
|
||||
|
||||
You are likely running using Kubernetes, which automatically creates an environment variable named `${serviceName}_PORT`.
|
||||
This is the same environment variable which is used by Paperless to optionally change the port gunicorn listens on.
|
||||
|
||||
To fix this, set `PAPERLESS_PORT` again to your desired port, or the default of 8000.
|
495
docs/usage.md
Normal file
@@ -0,0 +1,495 @@
|
||||
# Usage Overview
|
||||
|
||||
Paperless is an application that manages your personal documents. With
|
||||
the help of a document scanner (see [the scanners wiki](https://github.com/paperless-ngx/paperless-ngx/wiki/Scanner-&-Software-Recommendations)),
|
||||
paperless transforms your unwieldy physical document binders into a searchable archive
|
||||
and provides many utilities for finding and managing your documents.
|
||||
|
||||
## Terms and definitions
|
||||
|
||||
Paperless essentially consists of two different parts for managing your
|
||||
documents:
|
||||
|
||||
- The _consumer_ watches a specified folder and adds all documents in
|
||||
that folder to paperless.
|
||||
- The _web server_ provides a UI that you use to manage and search for
|
||||
your scanned documents.
|
||||
|
||||
Each document has a couple of fields that you can assign to them:
|
||||
|
||||
- A _Document_ is a piece of paper that sometimes contains valuable
|
||||
information.
|
||||
- The _correspondent_ of a document is the person, institution or
|
||||
company that a document either originates from, or is sent to.
|
||||
- A _tag_ is a label that you can assign to documents. Think of labels
|
||||
as more powerful folders: Multiple documents can be grouped together
|
||||
with a single tag, however, a single document can also have multiple
|
||||
tags. This is not possible with folders. The reason folders are not
|
||||
implemented in paperless is simply that tags are much more versatile
|
||||
than folders.
|
||||
- A _document type_ is used to demarcate the type of a document such
|
||||
as letter, bank statement, invoice, contract, etc. It is used to
|
||||
identify what a document is about.
|
||||
- The _date added_ of a document is the date the document was scanned
|
||||
into paperless. You cannot and should not change this date.
|
||||
- The _date created_ of a document is the date the document was
|
||||
initially issued. This can be the date you bought a product, the
|
||||
date you signed a contract, or the date a letter was sent to you.
|
||||
- The _archive serial number_ (short: ASN) of a document is the
|
||||
identifier of the document in your physical document binders. See
|
||||
[recommended workflow](#usage-recommended-workflow) below.
|
||||
- The _content_ of a document is the text that was OCR'ed from the
|
||||
document. This text is fed into the search engine and is used for
|
||||
matching tags, correspondents and document types.
|
||||
|
||||
## Adding documents to paperless
|
||||
|
||||
Once you've got Paperless setup, you need to start feeding documents
|
||||
into it. When adding documents to paperless, it will perform the
|
||||
following operations on your documents:
|
||||
|
||||
1. OCR the document, if it has no text. Digital documents usually have
|
||||
text, and this step will be skipped for those documents.
|
||||
2. Paperless will create an archivable PDF/A document from your
|
||||
document. If this document is coming from your scanner, it will have
|
||||
embedded selectable text.
|
||||
3. Paperless performs automatic matching of tags, correspondents and
|
||||
types on the document before storing it in the database.
|
||||
|
||||
!!! tip
|
||||
|
||||
This process can be configured to fit your needs. If you don't want
|
||||
paperless to create archived versions for digital documents, you can
|
||||
configure that by configuring `PAPERLESS_OCR_MODE=skip_noarchive`.
|
||||
Please read the
|
||||
[relevant section in the documentation](/configuration#ocr).
|
||||
|
||||
!!! note
|
||||
|
||||
No matter which options you choose, Paperless will always store the
|
||||
original document that it found in the consumption directory or in the
|
||||
mail and will never overwrite that document. Archived versions are
|
||||
stored alongside the original versions.
|
||||
|
||||
### The consumption directory
|
||||
|
||||
The primary method of getting documents into your database is by putting
|
||||
them in the consumption directory. The consumer waits patiently, looking
|
||||
for new additions to this directory. When it finds them,
|
||||
the consumer goes about the process of parsing them with the OCR,
|
||||
indexing what it finds, and storing it in the media directory.
|
||||
|
||||
Getting stuff into this directory is up to you. If you're running
|
||||
Paperless on your local computer, you might just want to drag and drop
|
||||
files there, but if you're running this on a server and want your
|
||||
scanner to automatically push files to this directory, you'll need to
|
||||
setup some sort of service to accept the files from the scanner.
|
||||
Typically, you're looking at an FTP server like
|
||||
[Proftpd](http://www.proftpd.org/) or a Windows folder share with
|
||||
[Samba](https://www.samba.org/).
|
||||
|
||||
### Web UI Upload
|
||||
|
||||
The dashboard has a file drop field to upload documents to paperless.
|
||||
Simply drag a file onto this field or select a file with the file
|
||||
dialog. Multiple files are supported.
|
||||
|
||||
You can also upload documents on any other page of the web UI by
|
||||
dragging-and-dropping files into your browser window.
|
||||
|
||||
### Mobile upload {#usage-mobile_upload}
|
||||
|
||||
The mobile app over at [https://github.com/qcasey/paperless_share](https://github.com/qcasey/paperless_share)
|
||||
allows Android users to share any documents with paperless. This can be
|
||||
combined with any of the mobile scanning apps out there, such as Office
|
||||
Lens.
|
||||
|
||||
Furthermore, there is the [Paperless
|
||||
App](https://github.com/bauerj/paperless_app) as well, which not only
|
||||
has document upload, but also document browsing and download features.
|
||||
|
||||
Another option is [Paperless Mobile](https://github.com/astubenbord/paperless-mobile), an Android app that supports document upload, scanning, management of labels and more.
|
||||
|
||||
### IMAP (Email) {#usage-email}
|
||||
|
||||
You can tell paperless-ngx to consume documents from your email
|
||||
accounts. This is a very flexible and powerful feature, if you regularly
|
||||
received documents via mail that you need to archive. The mail consumer
|
||||
can be configured via the frontend settings (/settings/mail) in the following
|
||||
manner:
|
||||
|
||||
1. Define e-mail accounts.
|
||||
2. Define mail rules for your account.
|
||||
|
||||
These rules perform the following:
|
||||
|
||||
1. Connect to the mail server.
|
||||
2. Fetch all matching mails (as defined by folder, maximum age and the
|
||||
filters)
|
||||
3. Check if there are any consumable attachments.
|
||||
4. If so, instruct paperless to consume the attachments and optionally
|
||||
use the metadata provided in the rule for the new document.
|
||||
5. If documents were consumed from a mail, the rule action is performed
|
||||
on that mail.
|
||||
|
||||
Paperless will completely ignore mails that do not match your filters.
|
||||
It will also only perform the action on mails that it has consumed
|
||||
documents from.
|
||||
|
||||
The actions all ensure that the same mail is not consumed twice by
|
||||
different means. These are as follows:
|
||||
|
||||
- **Delete:** Immediately deletes mail that paperless has consumed
|
||||
documents from. Use with caution.
|
||||
- **Mark as read:** Mark consumed mail as read. Paperless will not
|
||||
consume documents from already read mails. If you read a mail before
|
||||
paperless sees it, it will be ignored.
|
||||
- **Flag:** Sets the 'important' flag on mails with consumed
|
||||
documents. Paperless will not consume flagged mails.
|
||||
- **Move to folder:** Moves consumed mails out of the way so that
|
||||
paperless wont consume them again.
|
||||
- **Add custom Tag:** Adds a custom tag to mails with consumed
|
||||
documents (the IMAP standard calls these "keywords"). Paperless
|
||||
will not consume mails already tagged. Not all mail servers support
|
||||
this feature!
|
||||
|
||||
- **Apple Mail support:** Apple Mail clients allow differently colored tags. For this to work use `apple:<color>` (e.g. _apple:green_) as a custom tag. Available colors are _red_, _orange_, _yellow_, _blue_, _green_, _violet_ and _grey_.
|
||||
|
||||
!!! warning
|
||||
|
||||
The mail consumer will perform these actions on all mails it has
|
||||
consumed documents from. Keep in mind that the actual consumption
|
||||
process may fail for some reason, leaving you with missing documents in
|
||||
paperless.
|
||||
|
||||
!!! note
|
||||
|
||||
With the correct set of rules, you can completely automate your email
|
||||
documents. Create rules for every correspondent you receive digital
|
||||
documents from and paperless will read them automatically. The default
|
||||
action "mark as read" is pretty tame and will not cause any damage or
|
||||
data loss whatsoever.
|
||||
|
||||
You can also setup a special folder in your mail account for paperless
|
||||
and use your favorite mail client to move to be consumed mails into that
|
||||
folder automatically or manually and tell paperless to move them to yet
|
||||
another folder after consumption. It's up to you.
|
||||
|
||||
!!! note
|
||||
|
||||
When defining a mail rule with a folder, you may need to try different
|
||||
characters to define how the sub-folders are separated. Common values
|
||||
include ".", "/" or "\|", but this varies by the mail server.
|
||||
Check the documentation for your mail server. In the event of an error
|
||||
fetching mail from a certain folder, check the Paperless logs. When a
|
||||
folder is not located, Paperless will attempt to list all folders found
|
||||
in the account to the Paperless logs.
|
||||
|
||||
!!! note
|
||||
|
||||
Paperless will process the rules in the order defined in the admin page.
|
||||
|
||||
You can define catch-all rules and have them executed last to consume
|
||||
any documents not matched by previous rules. Such a rule may assign an
|
||||
"Unknown mail document" tag to consumed documents so you can inspect
|
||||
them further.
|
||||
|
||||
Paperless is set up to check your mails every 10 minutes. This can be
|
||||
configured via `PAPERLESS_EMAIL_TASK_CRON` (see [software tweaks](/configuration#software_tweaks))
|
||||
|
||||
### REST API
|
||||
|
||||
You can also submit a document using the REST API, see [POSTing documents](/api#file-uploads)
|
||||
for details.
|
||||
|
||||
## Best practices {#basic-searching}
|
||||
|
||||
Paperless offers a couple tools that help you organize your document
|
||||
collection. However, it is up to you to use them in a way that helps you
|
||||
organize documents and find specific documents when you need them. This
|
||||
section offers a couple ideas for managing your collection.
|
||||
|
||||
Document types allow you to classify documents according to what they
|
||||
are. You can define types such as "Receipt", "Invoice", or
|
||||
"Contract". If you used to collect all your receipts in a single
|
||||
binder, you can recreate that system in paperless by defining a document
|
||||
type, assigning documents to that type and then filtering by that type
|
||||
to only see all receipts.
|
||||
|
||||
Not all documents need document types. Sometimes its hard to determine
|
||||
what the type of a document is or it is hard to justify creating a
|
||||
document type that you only need once or twice. This is okay. As long as
|
||||
the types you define help you organize your collection in the way you
|
||||
want, paperless is doing its job.
|
||||
|
||||
Tags can be used in many different ways. Think of tags are more
|
||||
versatile folders or binders. If you have a binder for documents related
|
||||
to university / your car or health care, you can create these binders in
|
||||
paperless by creating tags and assigning them to relevant documents.
|
||||
Just as with documents, you can filter the document list by tags and
|
||||
only see documents of a certain topic.
|
||||
|
||||
With physical documents, you'll often need to decide which folder the
|
||||
document belongs to. The advantage of tags over folders and binders is
|
||||
that a single document can have multiple tags. A physical document
|
||||
cannot magically appear in two different folders, but with tags, this is
|
||||
entirely possible.
|
||||
|
||||
!!! tip
|
||||
|
||||
This can be used in many different ways. One example: Imagine you're
|
||||
working on a particular task, such as signing up for university. Usually
|
||||
you'll need to collect a bunch of different documents that are already
|
||||
sorted into various folders. With the tag system of paperless, you can
|
||||
create a new group of documents that are relevant to this task without
|
||||
destroying the already existing organization. When you're done with the
|
||||
task, you could delete the tag again, which would be equal to sorting
|
||||
documents back into the folder they belong into. Or keep the tag, up to
|
||||
you.
|
||||
|
||||
All of the logic above applies to correspondents as well. Attach them to
|
||||
documents if you feel that they help you organize your collection.
|
||||
|
||||
When you've started organizing your documents, create a couple saved
|
||||
views for document collections you regularly access. This is equal to
|
||||
having labeled physical binders on your desk, except that these saved
|
||||
views are dynamic and simply update themselves as you add documents to
|
||||
the system.
|
||||
|
||||
Here are a couple examples of tags and types that you could use in your
|
||||
collection.
|
||||
|
||||
- An `inbox` tag for newly added documents that you haven't manually
|
||||
edited yet.
|
||||
- A tag `car` for everything car related (repairs, registration,
|
||||
insurance, etc)
|
||||
- A tag `todo` for documents that you still need to do something with,
|
||||
such as reply, or perform some task online.
|
||||
- A tag `bank account x` for all bank statement related to that
|
||||
account.
|
||||
- A tag `mail` for anything that you added to paperless via its mail
|
||||
processing capabilities.
|
||||
- A tag `missing_metadata` when you still need to add some metadata to
|
||||
a document, but can't or don't want to do this right now.
|
||||
|
||||
## Searching {#basic-usage_searching}
|
||||
|
||||
Paperless offers an extensive searching mechanism that is designed to
|
||||
allow you to quickly find a document you're looking for (for example,
|
||||
that thing that just broke and you bought a couple months ago, that
|
||||
contract you signed 8 years ago).
|
||||
|
||||
When you search paperless for a document, it tries to match this query
|
||||
against your documents. Paperless will look for matching documents by
|
||||
inspecting their content, title, correspondent, type and tags. Paperless
|
||||
returns a scored list of results, so that documents matching your query
|
||||
better will appear further up in the search results.
|
||||
|
||||
By default, paperless returns only documents which contain all words
|
||||
typed in the search bar. However, paperless also offers advanced search
|
||||
syntax if you want to drill down the results further.
|
||||
|
||||
Matching documents with logical expressions:
|
||||
|
||||
```
|
||||
shopname AND (product1 OR product2)
|
||||
```
|
||||
|
||||
Matching specific tags, correspondents or types:
|
||||
|
||||
```
|
||||
type:invoice tag:unpaid
|
||||
correspondent:university certificate
|
||||
```
|
||||
|
||||
Matching dates:
|
||||
|
||||
```
|
||||
created:[2005 to 2009]
|
||||
added:yesterday
|
||||
modified:today
|
||||
```
|
||||
|
||||
Matching inexact words:
|
||||
|
||||
```
|
||||
produ*name
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
Inexact terms are hard for search indexes. These queries might take a
|
||||
while to execute. That's why paperless offers auto complete and query
|
||||
correction.
|
||||
|
||||
All of these constructs can be combined as you see fit. If you want to
|
||||
learn more about the query language used by paperless, paperless uses
|
||||
Whoosh's default query language. Head over to [Whoosh query
|
||||
language](https://whoosh.readthedocs.io/en/latest/querylang.html). For
|
||||
details on what date parsing utilities are available, see [Date
|
||||
parsing](https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries).
|
||||
|
||||
## The recommended workflow {#usage-recommended-workflow}
|
||||
|
||||
Once you have familiarized yourself with paperless and are ready to use
|
||||
it for all your documents, the recommended workflow for managing your
|
||||
documents is as follows. This workflow also takes into account that some
|
||||
documents have to be kept in physical form, but still ensures that you
|
||||
get all the advantages for these documents as well.
|
||||
|
||||
The following diagram shows how easy it is to manage your documents.
|
||||
|
||||
{width=400}
|
||||
|
||||
### Preparations in paperless
|
||||
|
||||
- Create an inbox tag that gets assigned to all new documents.
|
||||
- Create a TODO tag.
|
||||
|
||||
### Processing of the physical documents
|
||||
|
||||
Keep a physical inbox. Whenever you receive a document that you need to
|
||||
archive, put it into your inbox. Regularly, do the following for all
|
||||
documents in your inbox:
|
||||
|
||||
1. For each document, decide if you need to keep the document in
|
||||
physical form. This applies to certain important documents, such as
|
||||
contracts and certificates.
|
||||
2. If you need to keep the document, write a running number on the
|
||||
document before scanning, starting at one and counting upwards. This
|
||||
is the archive serial number, or ASN in short.
|
||||
3. Scan the document.
|
||||
4. If the document has an ASN assigned, store it in a _single_ binder,
|
||||
sorted by ASN. Don't order this binder in any other way.
|
||||
5. If the document has no ASN, throw it away. Yay!
|
||||
|
||||
!!! tip
|
||||
|
||||
Instead of writing a number on the document by hand, you may also prepare
|
||||
a spool of labels with barcodes with an ascending serial number, that are
|
||||
formatted like `ASN00001`.
|
||||
This also enables Paperless to automatically parse and process the ASN
|
||||
(if enabled in the config), so that you don't need to manually assign it.
|
||||
|
||||
Over time, you will notice that your physical binder will fill up. If it
|
||||
is full, label the binder with the range of ASNs in this binder (i.e.,
|
||||
"Documents 1 to 343"), store the binder in your cellar or elsewhere,
|
||||
and start a new binder.
|
||||
|
||||
The idea behind this process is that you will never have to use the
|
||||
physical binders to find a document. If you need a specific physical
|
||||
document, you may find this document by:
|
||||
|
||||
1. Searching in paperless for the document.
|
||||
2. Identify the ASN of the document, since it appears on the scan.
|
||||
3. Grab the relevant document binder and get the document. This is easy
|
||||
since they are sorted by ASN.
|
||||
|
||||
### Processing of documents in paperless
|
||||
|
||||
Once you have scanned in a document, proceed in paperless as follows.
|
||||
|
||||
1. If the document has an ASN, assign the ASN to the document.
|
||||
2. Assign a correspondent to the document (i.e., your employer, bank,
|
||||
etc) This isn't strictly necessary but helps in finding a document
|
||||
when you need it.
|
||||
3. Assign a document type (i.e., invoice, bank statement, etc) to the
|
||||
document This isn't strictly necessary but helps in finding a
|
||||
document when you need it.
|
||||
4. Assign a proper title to the document (the name of an item you
|
||||
bought, the subject of the letter, etc)
|
||||
5. Check that the date of the document is correct. Paperless tries to
|
||||
read the date from the content of the document, but this fails
|
||||
sometimes if the OCR is bad or multiple dates appear on the
|
||||
document.
|
||||
6. Remove inbox tags from the documents.
|
||||
|
||||
!!! tip
|
||||
|
||||
You can setup manual matching rules for your correspondents and tags and
|
||||
paperless will assign them automatically. After consuming a couple
|
||||
documents, you can even ask paperless to *learn* when to assign tags and
|
||||
correspondents by itself. For details on this feature, see
|
||||
[advanced matching](/advanced_usage#matching).
|
||||
|
||||
### Task management
|
||||
|
||||
Some documents require attention and require you to act on the document.
|
||||
You may take two different approaches to handle these documents based on
|
||||
how regularly you intend to scan documents and use paperless.
|
||||
|
||||
- If you scan and process your documents in paperless regularly,
|
||||
assign a TODO tag to all scanned documents that you need to process.
|
||||
Create a saved view on the dashboard that shows all documents with
|
||||
this tag.
|
||||
- If you do not scan documents regularly and use paperless solely for
|
||||
archiving, create a physical todo box next to your physical inbox
|
||||
and put documents you need to process in the TODO box. When you
|
||||
performed the task associated with the document, move it to the
|
||||
inbox.
|
||||
|
||||
## Architecture
|
||||
|
||||
Paperless-ngx consists of the following components:
|
||||
|
||||
- **The webserver:** This serves the administration pages, the API,
|
||||
and the new frontend. This is the main tool you'll be using to interact
|
||||
with paperless. You may start the webserver directly with
|
||||
|
||||
```shell-session
|
||||
$ cd /path/to/paperless/src/
|
||||
$ gunicorn -c ../gunicorn.conf.py paperless.wsgi
|
||||
```
|
||||
|
||||
or by any other means such as Apache `mod_wsgi`.
|
||||
|
||||
- **The consumer:** This is what watches your consumption folder for
|
||||
documents. However, the consumer itself does not really consume your
|
||||
documents. Now it notifies a task processor that a new file is ready
|
||||
for consumption. I suppose it should be named differently. This was
|
||||
also used to check your emails, but that's now done elsewhere as
|
||||
well.
|
||||
|
||||
Start the consumer with the management command `document_consumer`:
|
||||
|
||||
```shell-session
|
||||
$ cd /path/to/paperless/src/
|
||||
$ python3 manage.py document_consumer
|
||||
```
|
||||
|
||||
- **The task processor:** Paperless relies on [Celery - Distributed
|
||||
Task Queue](https://docs.celeryq.dev/en/stable/index.html) for doing
|
||||
most of the heavy lifting. This is a task queue that accepts tasks
|
||||
from multiple sources and processes these in parallel. It also comes
|
||||
with a scheduler that executes certain commands periodically.
|
||||
|
||||
This task processor is responsible for:
|
||||
|
||||
- Consuming documents. When the consumer finds new documents, it
|
||||
notifies the task processor to start a consumption task.
|
||||
- The task processor also performs the consumption of any
|
||||
documents you upload through the web interface.
|
||||
- Consuming emails. It periodically checks your configured
|
||||
accounts for new emails and notifies the task processor to
|
||||
consume the attachment of an email.
|
||||
- Maintaining the search index and the automatic matching
|
||||
algorithm. These are things that paperless needs to do from time
|
||||
to time in order to operate properly.
|
||||
|
||||
This allows paperless to process multiple documents from your
|
||||
consumption folder in parallel! On a modern multi core system, this
|
||||
makes the consumption process with full OCR blazingly fast.
|
||||
|
||||
The task processor comes with a built-in admin interface that you
|
||||
can use to check whenever any of the tasks fail and inspect the
|
||||
errors (i.e., wrong email credentials, errors during consuming a
|
||||
specific file, etc).
|
||||
|
||||
- A [redis](https://redis.io/) message broker: This is a really
|
||||
lightweight service that is responsible for getting the tasks from
|
||||
the webserver and the consumer to the task scheduler. These run in a
|
||||
different process (maybe even on different machines!), and
|
||||
therefore, this is necessary.
|
||||
|
||||
- Optional: A database server. Paperless supports PostgreSQL, MariaDB
|
||||
and SQLite for storing its data.
|
@@ -1,420 +0,0 @@
|
||||
**************
|
||||
Usage Overview
|
||||
**************
|
||||
|
||||
Paperless is an application that manages your personal documents. With
|
||||
the help of a document scanner (see :ref:`scanners`), paperless transforms
|
||||
your wieldy physical document binders into a searchable archive and
|
||||
provides many utilities for finding and managing your documents.
|
||||
|
||||
|
||||
Terms and definitions
|
||||
#####################
|
||||
|
||||
Paperless essentially consists of two different parts for managing your
|
||||
documents:
|
||||
|
||||
* The *consumer* watches a specified folder and adds all documents in that
|
||||
folder to paperless.
|
||||
* The *web server* provides a UI that you use to manage and search for your
|
||||
scanned documents.
|
||||
|
||||
Each document has a couple of fields that you can assign to them:
|
||||
|
||||
* A *Document* is a piece of paper that sometimes contains valuable
|
||||
information.
|
||||
* The *correspondent* of a document is the person, institution or company that
|
||||
a document either originates from, or is sent to.
|
||||
* A *tag* is a label that you can assign to documents. Think of labels as more
|
||||
powerful folders: Multiple documents can be grouped together with a single
|
||||
tag, however, a single document can also have multiple tags. This is not
|
||||
possible with folders. The reason folders are not implemented in paperless
|
||||
is simply that tags are much more versatile than folders.
|
||||
* A *document type* is used to demarcate the type of a document such as letter,
|
||||
bank statement, invoice, contract, etc. It is used to identify what a document
|
||||
is about.
|
||||
* The *date added* of a document is the date the document was scanned into
|
||||
paperless. You cannot and should not change this date.
|
||||
* The *date created* of a document is the date the document was initially issued.
|
||||
This can be the date you bought a product, the date you signed a contract, or
|
||||
the date a letter was sent to you.
|
||||
* The *archive serial number* (short: ASN) of a document is the identifier of
|
||||
the document in your physical document binders. See
|
||||
:ref:`usage-recommended_workflow` below.
|
||||
* The *content* of a document is the text that was OCR'ed from the document.
|
||||
This text is fed into the search engine and is used for matching tags,
|
||||
correspondents and document types.
|
||||
|
||||
|
||||
Frontend overview
|
||||
#################
|
||||
|
||||
.. warning::
|
||||
|
||||
TBD. Add some fancy screenshots!
|
||||
|
||||
Adding documents to paperless
|
||||
#############################
|
||||
|
||||
Once you've got Paperless setup, you need to start feeding documents into it.
|
||||
When adding documents to paperless, it will perform the following operations on
|
||||
your documents:
|
||||
|
||||
1. OCR the document, if it has no text. Digital documents usually have text,
|
||||
and this step will be skipped for those documents.
|
||||
2. Paperless will create an archivable PDF/A document from your document.
|
||||
If this document is coming from your scanner, it will have embedded selectable text.
|
||||
3. Paperless performs automatic matching of tags, correspondents and types on the
|
||||
document before storing it in the database.
|
||||
|
||||
.. hint::
|
||||
|
||||
This process can be configured to fit your needs. If you don't want paperless
|
||||
to create archived versions for digital documents, you can configure that by
|
||||
configuring ``PAPERLESS_OCR_MODE=skip_noarchive``. Please read the
|
||||
:ref:`relevant section in the documentation <configuration-ocr>`.
|
||||
|
||||
.. note::
|
||||
|
||||
No matter which options you choose, Paperless will always store the original
|
||||
document that it found in the consumption directory or in the mail and
|
||||
will never overwrite that document. Archived versions are stored alongside the
|
||||
original versions.
|
||||
|
||||
|
||||
The consumption directory
|
||||
=========================
|
||||
|
||||
The primary method of getting documents into your database is by putting them in
|
||||
the consumption directory. The consumer runs in an infinite loop, looking for new
|
||||
additions to this directory. When it finds them, the consumer goes about the process
|
||||
of parsing them with the OCR, indexing what it finds, and storing it in the media directory.
|
||||
|
||||
Getting stuff into this directory is up to you. If you're running Paperless
|
||||
on your local computer, you might just want to drag and drop files there, but if
|
||||
you're running this on a server and want your scanner to automatically push
|
||||
files to this directory, you'll need to setup some sort of service to accept the
|
||||
files from the scanner. Typically, you're looking at an FTP server like
|
||||
`Proftpd`_ or a Windows folder share with `Samba`_.
|
||||
|
||||
.. _Proftpd: http://www.proftpd.org/
|
||||
.. _Samba: http://www.samba.org/
|
||||
|
||||
.. TODO: hyperref to configuration of the location of this magic folder.
|
||||
|
||||
Web UI Upload
|
||||
=============
|
||||
|
||||
The dashboard has a file drop field to upload documents to paperless. Simply drag a file
|
||||
onto this field or select a file with the file dialog. Multiple files are supported.
|
||||
|
||||
You can also upload documents on any other page of the web UI by dragging-and-dropping
|
||||
files into your browser window.
|
||||
|
||||
.. _usage-mobile_upload:
|
||||
|
||||
Mobile upload
|
||||
=============
|
||||
|
||||
The mobile app over at `<https://github.com/qcasey/paperless_share>`_ allows Android users
|
||||
to share any documents with paperless. This can be combined with any of the mobile
|
||||
scanning apps out there, such as Office Lens.
|
||||
|
||||
Furthermore, there is the `Paperless App <https://github.com/bauerj/paperless_app>`_ as well,
|
||||
which not only has document upload, but also document browsing and download features.
|
||||
|
||||
.. _usage-email:
|
||||
|
||||
IMAP (Email)
|
||||
============
|
||||
|
||||
You can tell paperless-ngx to consume documents from your email accounts.
|
||||
This is a very flexible and powerful feature, if you regularly received documents
|
||||
via mail that you need to archive. The mail consumer can be configured by using the
|
||||
admin interface in the following manner:
|
||||
|
||||
1. Define e-mail accounts.
|
||||
2. Define mail rules for your account.
|
||||
|
||||
These rules perform the following:
|
||||
|
||||
1. Connect to the mail server.
|
||||
2. Fetch all matching mails (as defined by folder, maximum age and the filters)
|
||||
3. Check if there are any consumable attachments.
|
||||
4. If so, instruct paperless to consume the attachments and optionally
|
||||
use the metadata provided in the rule for the new document.
|
||||
5. If documents were consumed from a mail, the rule action is performed
|
||||
on that mail.
|
||||
|
||||
Paperless will completely ignore mails that do not match your filters. It will also
|
||||
only perform the action on mails that it has consumed documents from.
|
||||
|
||||
The actions all ensure that the same mail is not consumed twice by different means.
|
||||
These are as follows:
|
||||
|
||||
* **Delete:** Immediately deletes mail that paperless has consumed documents from.
|
||||
Use with caution.
|
||||
* **Mark as read:** Mark consumed mail as read. Paperless will not consume documents
|
||||
from already read mails. If you read a mail before paperless sees it, it will be
|
||||
ignored.
|
||||
* **Flag:** Sets the 'important' flag on mails with consumed documents. Paperless
|
||||
will not consume flagged mails.
|
||||
* **Move to folder:** Moves consumed mails out of the way so that paperless wont
|
||||
consume them again.
|
||||
* **Add custom Tag:** Adds a custom tag to mails with consumed documents (the IMAP
|
||||
standard calls these "keywords"). Paperless will not consume mails already tagged.
|
||||
Not all mail servers support this feature!
|
||||
|
||||
.. caution::
|
||||
|
||||
The mail consumer will perform these actions on all mails it has consumed
|
||||
documents from. Keep in mind that the actual consumption process may fail
|
||||
for some reason, leaving you with missing documents in paperless.
|
||||
|
||||
.. note::
|
||||
|
||||
With the correct set of rules, you can completely automate your email documents.
|
||||
Create rules for every correspondent you receive digital documents from and
|
||||
paperless will read them automatically. The default action "mark as read" is
|
||||
pretty tame and will not cause any damage or data loss whatsoever.
|
||||
|
||||
You can also setup a special folder in your mail account for paperless and use
|
||||
your favorite mail client to move to be consumed mails into that folder
|
||||
automatically or manually and tell paperless to move them to yet another folder
|
||||
after consumption. It's up to you.
|
||||
|
||||
.. note::
|
||||
|
||||
When defining a mail rule with a folder, you may need to try different characters to
|
||||
define how the sub-folders are separated. Common values include ".", "/" or "|", but
|
||||
this varies by the mail server. Check the documentation for your mail server. In the
|
||||
event of an error fetching mail from a certain folder, check the Paperless logs. When
|
||||
a folder is not located, Paperless will attempt to list all folders found in the account
|
||||
to the Paperless logs.
|
||||
|
||||
.. note::
|
||||
|
||||
Paperless will process the rules in the order defined in the admin page.
|
||||
|
||||
You can define catch-all rules and have them executed last to consume
|
||||
any documents not matched by previous rules. Such a rule may assign an "Unknown
|
||||
mail document" tag to consumed documents so you can inspect them further.
|
||||
|
||||
Paperless is set up to check your mails every 10 minutes. This can be configured on the
|
||||
'Scheduled tasks' page in the admin.
|
||||
|
||||
|
||||
REST API
|
||||
========
|
||||
|
||||
You can also submit a document using the REST API, see :ref:`api-file_uploads` for details.
|
||||
|
||||
.. _basic-searching:
|
||||
|
||||
|
||||
Best practices
|
||||
##############
|
||||
|
||||
Paperless offers a couple tools that help you organize your document collection. However,
|
||||
it is up to you to use them in a way that helps you organize documents and find specific
|
||||
documents when you need them. This section offers a couple ideas for managing your collection.
|
||||
|
||||
Document types allow you to classify documents according to what they are. You can define
|
||||
types such as "Receipt", "Invoice", or "Contract". If you used to collect all your receipts
|
||||
in a single binder, you can recreate that system in paperless by defining a document type,
|
||||
assigning documents to that type and then filtering by that type to only see all receipts.
|
||||
|
||||
Not all documents need document types. Sometimes its hard to determine what the type of a
|
||||
document is or it is hard to justify creating a document type that you only need once or twice.
|
||||
This is okay. As long as the types you define help you organize your collection in the way
|
||||
you want, paperless is doing its job.
|
||||
|
||||
Tags can be used in many different ways. Think of tags are more versatile folders or binders.
|
||||
If you have a binder for documents related to university / your car or health care, you can
|
||||
create these binders in paperless by creating tags and assigning them to relevant documents.
|
||||
Just as with documents, you can filter the document list by tags and only see documents of
|
||||
a certain topic.
|
||||
|
||||
With physical documents, you'll often need to decide which folder the document belongs to.
|
||||
The advantage of tags over folders and binders is that a single document can have multiple
|
||||
tags. A physical document cannot magically appear in two different folders, but with tags,
|
||||
this is entirely possible.
|
||||
|
||||
.. hint::
|
||||
|
||||
This can be used in many different ways. One example: Imagine you're working on a particular
|
||||
task, such as signing up for university. Usually you'll need to collect a bunch of different
|
||||
documents that are already sorted into various folders. With the tag system of paperless,
|
||||
you can create a new group of documents that are relevant to this task without destroying
|
||||
the already existing organization. When you're done with the task, you could delete the
|
||||
tag again, which would be equal to sorting documents back into the folder they belong into.
|
||||
Or keep the tag, up to you.
|
||||
|
||||
All of the logic above applies to correspondents as well. Attach them to documents if you
|
||||
feel that they help you organize your collection.
|
||||
|
||||
When you've started organizing your documents, create a couple saved views for document collections
|
||||
you regularly access. This is equal to having labeled physical binders on your desk, except
|
||||
that these saved views are dynamic and simply update themselves as you add documents to the system.
|
||||
|
||||
Here are a couple examples of tags and types that you could use in your collection.
|
||||
|
||||
* An ``inbox`` tag for newly added documents that you haven't manually edited yet.
|
||||
* A tag ``car`` for everything car related (repairs, registration, insurance, etc)
|
||||
* A tag ``todo`` for documents that you still need to do something with, such as reply, or
|
||||
perform some task online.
|
||||
* A tag ``bank account x`` for all bank statement related to that account.
|
||||
* A tag ``mail`` for anything that you added to paperless via its mail processing capabilities.
|
||||
* A tag ``missing_metadata`` when you still need to add some metadata to a document, but can't
|
||||
or don't want to do this right now.
|
||||
|
||||
.. _basic-usage_searching:
|
||||
|
||||
Searching
|
||||
#########
|
||||
|
||||
Paperless offers an extensive searching mechanism that is designed to allow you to quickly
|
||||
find a document you're looking for (for example, that thing that just broke and you bought
|
||||
a couple months ago, that contract you signed 8 years ago).
|
||||
|
||||
When you search paperless for a document, it tries to match this query against your documents.
|
||||
Paperless will look for matching documents by inspecting their content, title, correspondent,
|
||||
type and tags. Paperless returns a scored list of results, so that documents matching your query
|
||||
better will appear further up in the search results.
|
||||
|
||||
By default, paperless returns only documents which contain all words typed in the search bar.
|
||||
However, paperless also offers advanced search syntax if you want to drill down the results
|
||||
further.
|
||||
|
||||
Matching documents with logical expressions:
|
||||
|
||||
.. code::
|
||||
|
||||
shopname AND (product1 OR product2)
|
||||
|
||||
Matching specific tags, correspondents or types:
|
||||
|
||||
.. code::
|
||||
|
||||
type:invoice tag:unpaid
|
||||
correspondent:university certificate
|
||||
|
||||
Matching dates:
|
||||
|
||||
.. code::
|
||||
|
||||
created:[2005 to 2009]
|
||||
added:yesterday
|
||||
modified:today
|
||||
|
||||
Matching inexact words:
|
||||
|
||||
.. code::
|
||||
|
||||
produ*name
|
||||
|
||||
.. note::
|
||||
|
||||
Inexact terms are hard for search indexes. These queries might take a while to execute. That's why paperless offers
|
||||
auto complete and query correction.
|
||||
|
||||
All of these constructs can be combined as you see fit.
|
||||
If you want to learn more about the query language used by paperless, paperless uses Whoosh's default query language.
|
||||
Head over to `Whoosh query language <https://whoosh.readthedocs.io/en/latest/querylang.html>`_.
|
||||
For details on what date parsing utilities are available, see
|
||||
`Date parsing <https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries>`_.
|
||||
|
||||
|
||||
.. _usage-recommended_workflow:
|
||||
|
||||
The recommended workflow
|
||||
########################
|
||||
|
||||
Once you have familiarized yourself with paperless and are ready to use it
|
||||
for all your documents, the recommended workflow for managing your documents
|
||||
is as follows. This workflow also takes into account that some documents
|
||||
have to be kept in physical form, but still ensures that you get all the
|
||||
advantages for these documents as well.
|
||||
|
||||
The following diagram shows how easy it is to manage your documents.
|
||||
|
||||
.. image:: _static/recommended_workflow.png
|
||||
|
||||
Preparations in paperless
|
||||
=========================
|
||||
|
||||
* Create an inbox tag that gets assigned to all new documents.
|
||||
* Create a TODO tag.
|
||||
|
||||
Processing of the physical documents
|
||||
====================================
|
||||
|
||||
Keep a physical inbox. Whenever you receive a document that you need to
|
||||
archive, put it into your inbox. Regularly, do the following for all documents
|
||||
in your inbox:
|
||||
|
||||
1. For each document, decide if you need to keep the document in physical
|
||||
form. This applies to certain important documents, such as contracts and
|
||||
certificates.
|
||||
2. If you need to keep the document, write a running number on the document
|
||||
before scanning, starting at one and counting upwards. This is the archive
|
||||
serial number, or ASN in short.
|
||||
3. Scan the document.
|
||||
4. If the document has an ASN assigned, store it in a *single* binder, sorted
|
||||
by ASN. Don't order this binder in any other way.
|
||||
5. If the document has no ASN, throw it away. Yay!
|
||||
|
||||
Over time, you will notice that your physical binder will fill up. If it is
|
||||
full, label the binder with the range of ASNs in this binder (i.e., "Documents
|
||||
1 to 343"), store the binder in your cellar or elsewhere, and start a new
|
||||
binder.
|
||||
|
||||
The idea behind this process is that you will never have to use the physical
|
||||
binders to find a document. If you need a specific physical document, you
|
||||
may find this document by:
|
||||
|
||||
1. Searching in paperless for the document.
|
||||
2. Identify the ASN of the document, since it appears on the scan.
|
||||
3. Grab the relevant document binder and get the document. This is easy since
|
||||
they are sorted by ASN.
|
||||
|
||||
Processing of documents in paperless
|
||||
====================================
|
||||
|
||||
Once you have scanned in a document, proceed in paperless as follows.
|
||||
|
||||
1. If the document has an ASN, assign the ASN to the document.
|
||||
2. Assign a correspondent to the document (i.e., your employer, bank, etc)
|
||||
This isn't strictly necessary but helps in finding a document when you need
|
||||
it.
|
||||
3. Assign a document type (i.e., invoice, bank statement, etc) to the document
|
||||
This isn't strictly necessary but helps in finding a document when you need
|
||||
it.
|
||||
4. Assign a proper title to the document (the name of an item you bought, the
|
||||
subject of the letter, etc)
|
||||
5. Check that the date of the document is correct. Paperless tries to read
|
||||
the date from the content of the document, but this fails sometimes if the
|
||||
OCR is bad or multiple dates appear on the document.
|
||||
6. Remove inbox tags from the documents.
|
||||
|
||||
.. hint::
|
||||
|
||||
You can setup manual matching rules for your correspondents and tags and
|
||||
paperless will assign them automatically. After consuming a couple documents,
|
||||
you can even ask paperless to *learn* when to assign tags and correspondents
|
||||
by itself. For details on this feature, see :ref:`advanced-matching`.
|
||||
|
||||
Task management
|
||||
===============
|
||||
|
||||
Some documents require attention and require you to act on the document. You
|
||||
may take two different approaches to handle these documents based on how
|
||||
regularly you intend to scan documents and use paperless.
|
||||
|
||||
* If you scan and process your documents in paperless regularly, assign a
|
||||
TODO tag to all scanned documents that you need to process. Create a saved
|
||||
view on the dashboard that shows all documents with this tag.
|
||||
* If you do not scan documents regularly and use paperless solely for archiving,
|
||||
create a physical todo box next to your physical inbox and put documents you
|
||||
need to process in the TODO box. When you performed the task associated with
|
||||
the document, move it to the inbox.
|
@@ -95,6 +95,7 @@ echo "============================"
|
||||
echo ""
|
||||
echo "The URL paperless will be available at. This is required if the"
|
||||
echo "installation will be accessible via the web, otherwise can be left blank."
|
||||
echo "Example: https://paperless.example.com"
|
||||
echo ""
|
||||
|
||||
ask "URL" ""
|
||||
@@ -112,6 +113,8 @@ echo ""
|
||||
echo "Paperless requires you to configure the current time zone correctly."
|
||||
echo "Otherwise, the dates of your documents may appear off by one day,"
|
||||
echo "depending on where you are on earth."
|
||||
echo "Example: Europe/Berlin"
|
||||
echo "See here for a list: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones"
|
||||
echo ""
|
||||
|
||||
ask "Current time zone" "$default_time_zone"
|
||||
|
66
mkdocs.yml
Normal file
@@ -0,0 +1,66 @@
|
||||
site_name: Paperless-ngx
|
||||
theme:
|
||||
name: material
|
||||
logo: assets/logo.svg
|
||||
font:
|
||||
text: Roboto
|
||||
code: Roboto Mono
|
||||
palette:
|
||||
# Palette toggle for light mode
|
||||
- media: "(prefers-color-scheme: light)"
|
||||
scheme: default
|
||||
toggle:
|
||||
icon: material/brightness-7
|
||||
name: Switch to dark mode
|
||||
|
||||
# Palette toggle for dark mode
|
||||
- media: "(prefers-color-scheme: dark)"
|
||||
scheme: slate
|
||||
toggle:
|
||||
icon: material/brightness-4
|
||||
name: Switch to light mode
|
||||
features:
|
||||
- navigation.tabs
|
||||
- navigation.top
|
||||
- toc.integrate
|
||||
- content.code.annotate
|
||||
icon:
|
||||
repo: fontawesome/brands/github
|
||||
favicon: assets/favicon.png
|
||||
repo_url: https://github.com/paperless-ngx/paperless-ngx
|
||||
edit_uri: blob/main/docs/
|
||||
extra_css:
|
||||
- assets/extra.css
|
||||
markdown_extensions:
|
||||
- attr_list
|
||||
- md_in_html
|
||||
- def_list
|
||||
- admonition
|
||||
- tables
|
||||
- pymdownx.highlight:
|
||||
anchor_linenums: true
|
||||
- pymdownx.superfences
|
||||
- pymdownx.inlinehilite
|
||||
- pymdownx.snippets
|
||||
strict: true
|
||||
nav:
|
||||
- index.md
|
||||
- setup.md
|
||||
- 'Basic Usage': usage.md
|
||||
- configuration.md
|
||||
- administration.md
|
||||
- advanced_usage.md
|
||||
- 'REST API': api.md
|
||||
- development.md
|
||||
- 'FAQs': faq.md
|
||||
- troubleshooting.md
|
||||
- changelog.md
|
||||
copyright: Copyright © 2016 - 2023 Daniel Quinn, Jonas Winkler, and the Paperless-ngx team
|
||||
extra:
|
||||
social:
|
||||
- icon: fontawesome/brands/github
|
||||
link: https://github.com/paperless-ngx/paperless-ngx
|
||||
- icon: fontawesome/brands/docker
|
||||
link: https://hub.docker.com/r/paperlessngx/paperless-ngx
|
||||
- icon: material/chat
|
||||
link: https://matrix.to/#/#paperless:matrix.org
|
@@ -1,5 +1,5 @@
|
||||
# Have a look at the docs for documentation.
|
||||
# https://paperless-ngx.readthedocs.io/en/latest/configuration.html
|
||||
# https://docs.paperless-ngx.com/configuration/
|
||||
|
||||
# Debug. Only enable this for development.
|
||||
|
||||
|
@@ -2,5 +2,5 @@
|
||||
|
||||
docker run -p 5432:5432 -e POSTGRES_PASSWORD=password -v paperless_pgdata:/var/lib/postgresql/data -d postgres:13
|
||||
docker run -d -p 6379:6379 redis:latest
|
||||
docker run -p 3000:3000 -d gotenberg/gotenberg:7.6
|
||||
docker run -p 3000:3000 -d gotenberg/gotenberg:7.6 gotenberg --chromium-disable-javascript=true --chromium-allow-list="file:///tmp/.*"
|
||||
docker run -p 9998:9998 -d ghcr.io/paperless-ngx/tika:latest
|
||||
|
@@ -1,18 +0,0 @@
|
||||
# This file is used by the build system to adjust CSS and JS output to support the specified browsers below.
|
||||
# For additional information regarding the format and rule options, please see:
|
||||
# https://github.com/browserslist/browserslist#queries
|
||||
|
||||
# For the full list of supported browsers by the Angular framework, please see:
|
||||
# https://angular.io/guide/browser-support
|
||||
|
||||
# You can see what browsers were selected by your queries by running:
|
||||
# npx browserslist
|
||||
|
||||
last 1 Chrome version
|
||||
last 1 Firefox version
|
||||
last 2 Edge major versions
|
||||
last 2 Safari major versions
|
||||
last 2 iOS major versions
|
||||
Firefox ESR
|
||||
not IE 9-10 # Angular support for IE 9-10 has been deprecated and will be removed as of Angular v11. To opt-in, remove the 'not' prefix on this line.
|
||||
not IE 11 # Angular supports IE 11 only as an opt-in. To opt-in, remove the 'not' prefix on this line.
|
51
src-ui/.eslintrc.json
Normal file
@@ -0,0 +1,51 @@
|
||||
{
|
||||
"root": true,
|
||||
"ignorePatterns": [
|
||||
"projects/**/*"
|
||||
],
|
||||
"overrides": [
|
||||
{
|
||||
"files": [
|
||||
"*.ts"
|
||||
],
|
||||
"parserOptions": {
|
||||
"project": [
|
||||
"tsconfig.json",
|
||||
"e2e/tsconfig.json"
|
||||
],
|
||||
"createDefaultProgram": true
|
||||
},
|
||||
"extends": [
|
||||
"plugin:@angular-eslint/recommended",
|
||||
"plugin:@angular-eslint/template/process-inline-templates"
|
||||
],
|
||||
"rules": {
|
||||
"@angular-eslint/directive-selector": [
|
||||
"error",
|
||||
{
|
||||
"type": "attribute",
|
||||
"prefix": "app",
|
||||
"style": "camelCase"
|
||||
}
|
||||
],
|
||||
"@angular-eslint/component-selector": [
|
||||
"error",
|
||||
{
|
||||
"type": "element",
|
||||
"prefix": "app",
|
||||
"style": "kebab-case"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"files": [
|
||||
"*.html"
|
||||
],
|
||||
"extends": [
|
||||
"plugin:@angular-eslint/template/recommended"
|
||||
],
|
||||
"rules": {}
|
||||
}
|
||||
]
|
||||
}
|
@@ -12,7 +12,7 @@ Run `ng generate component component-name` to generate a new component. You can
|
||||
|
||||
## Build
|
||||
|
||||
Run `ng build` to build the project. The build artifacts will be stored in the `dist/` directory. Use the `--prod` flag for a production build.
|
||||
Run `ng build` to build the project. The build artifacts will be stored in the `dist/` directory. Use the `--configuration production` flag for a production build.
|
||||
|
||||
## Running unit tests
|
||||
|
||||
|
@@ -16,6 +16,7 @@
|
||||
"i18n": {
|
||||
"sourceLocale": "en-US",
|
||||
"locales": {
|
||||
"ar-AR": "src/locale/messages.ar_AR.xlf",
|
||||
"be-BY": "src/locale/messages.be_BY.xlf",
|
||||
"cs-CZ": "src/locale/messages.cs_CZ.xlf",
|
||||
"da-DK": "src/locale/messages.da_DK.xlf",
|
||||
@@ -53,7 +54,8 @@
|
||||
"src/favicon.ico",
|
||||
"src/apple-touch-icon.png",
|
||||
"src/assets",
|
||||
"src/manifest.webmanifest", {
|
||||
"src/manifest.webmanifest",
|
||||
{
|
||||
"glob": "pdf.worker.min.js",
|
||||
"input": "node_modules/pdfjs-dist/build/",
|
||||
"output": "/assets/js/"
|
||||
@@ -103,7 +105,9 @@
|
||||
]
|
||||
},
|
||||
"en-US": {
|
||||
"localize": ["en-US"]
|
||||
"localize": [
|
||||
"en-US"
|
||||
]
|
||||
}
|
||||
},
|
||||
"defaultConfiguration": ""
|
||||
@@ -171,9 +175,31 @@
|
||||
"watch": true,
|
||||
"headless": false
|
||||
}
|
||||
},
|
||||
"lint": {
|
||||
"builder": "@angular-eslint/builder:lint",
|
||||
"options": {
|
||||
"lintFilePatterns": [
|
||||
"src/**/*.ts",
|
||||
"src/**/*.html"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"defaultProject": "paperless-ui"
|
||||
"defaultProject": "paperless-ui",
|
||||
"cli": {
|
||||
"schematicCollections": [
|
||||
"@angular-eslint/schematics"
|
||||
]
|
||||
},
|
||||
"schematics": {
|
||||
"@angular-eslint/schematics:application": {
|
||||
"setParserOptionsProject": true
|
||||
},
|
||||
"@angular-eslint/schematics:library": {
|
||||
"setParserOptionsProject": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -44,7 +44,7 @@ describe('document-detail', () => {
|
||||
})
|
||||
|
||||
cy.viewport(1024, 1024)
|
||||
cy.visit('/documents/1/')
|
||||
cy.visit('/documents/1/').wait('@ui-settings')
|
||||
})
|
||||
|
||||
it('should activate / deactivate save button when changes are saved', () => {
|
||||
@@ -66,8 +66,21 @@ describe('document-detail', () => {
|
||||
cy.contains('You have unsaved changes').should('not.exist')
|
||||
})
|
||||
|
||||
it('should show a mobile preview', () => {
|
||||
cy.viewport(440, 1000)
|
||||
cy.get('a')
|
||||
.contains('Preview')
|
||||
.scrollIntoView({ offset: { top: 150, left: 0 } })
|
||||
.click()
|
||||
cy.get('pdf-viewer').should('be.visible')
|
||||
})
|
||||
|
||||
it('should show a list of comments', () => {
|
||||
cy.wait(1000).get('a').contains('Comments').click().wait(1000)
|
||||
cy.wait(1000)
|
||||
.get('a')
|
||||
.contains('Comments')
|
||||
.click({ force: true })
|
||||
.wait(1000)
|
||||
cy.get('app-document-comments').find('.card').its('length').should('eq', 3)
|
||||
})
|
||||
|
||||
|
@@ -52,6 +52,10 @@ describe('documents-list', () => {
|
||||
|
||||
req.reply(response)
|
||||
})
|
||||
|
||||
cy.intercept('http://localhost:8000/api/documents/selection_data/', {
|
||||
fixture: 'documents/selection_data.json',
|
||||
}).as('selection-data')
|
||||
})
|
||||
|
||||
cy.viewport(1280, 1024)
|
||||
@@ -76,6 +80,28 @@ describe('documents-list', () => {
|
||||
cy.get('app-document-card-large')
|
||||
})
|
||||
|
||||
it('should show partial tag selection', () => {
|
||||
cy.get('app-document-card-small:nth-child(1)').click()
|
||||
cy.get('app-document-card-small:nth-child(4)').click()
|
||||
cy.get('app-bulk-editor button')
|
||||
.contains('Tags')
|
||||
.click()
|
||||
.wait('@selection-data')
|
||||
cy.get('svg.bi-dash').should('be.visible')
|
||||
cy.get('svg.bi-check').should('be.visible')
|
||||
})
|
||||
|
||||
it('should allow bulk removal', () => {
|
||||
cy.get('app-document-card-small:nth-child(1)').click()
|
||||
cy.get('app-document-card-small:nth-child(4)').click()
|
||||
cy.get('app-bulk-editor').within(() => {
|
||||
cy.get('button').contains('Tags').click().wait('@selection-data')
|
||||
cy.get('button').contains('Another Sample Tag').click()
|
||||
cy.get('button').contains('Apply').click()
|
||||
})
|
||||
cy.contains('operation will remove the tag')
|
||||
})
|
||||
|
||||
it('should filter tags', () => {
|
||||
cy.get('app-filter-editor app-filterable-dropdown[title="Tags"]').within(
|
||||
() => {
|
||||
|