mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Fixes the download and usage of the downloaded data
This commit is contained in:

committed by
Trenton H

parent
1262c121f0
commit
6523cf0c4b
@@ -306,6 +306,12 @@ class DocumentClassifier:
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import SnowballStemmer
|
||||
|
||||
import nltk
|
||||
|
||||
# Not really hacky, since it isn't private and is documented, but
|
||||
# set the search path for NLTK data to the single location it should be in
|
||||
nltk.data.path = [settings.NLTK_DIR]
|
||||
|
||||
if self.stemmer is None:
|
||||
self.stemmer = SnowballStemmer("english")
|
||||
|
||||
|
@@ -84,6 +84,8 @@ THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
|
||||
|
||||
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", os.path.join(BASE_DIR, "..", "data"))
|
||||
|
||||
NLTK_DIR = os.path.join(DATA_DIR, "nltk")
|
||||
|
||||
TRASH_DIR = os.getenv("PAPERLESS_TRASH_DIR")
|
||||
|
||||
# Lock file for synchronizing changes to the MEDIA directory across multiple
|
||||
|
Reference in New Issue
Block a user