Fixes the download and usage of the downloaded data

This commit is contained in:
Trenton Holmes
2022-09-16 06:55:42 -07:00
committed by Trenton H
parent 1262c121f0
commit 6523cf0c4b
4 changed files with 25 additions and 20 deletions

View File

@@ -306,6 +306,12 @@ class DocumentClassifier:
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
import nltk
# Not really hacky, since it isn't private and is documented, but
# set the search path for NLTK data to the single location it should be in
nltk.data.path = [settings.NLTK_DIR]
if self.stemmer is None:
self.stemmer = SnowballStemmer("english")

View File

@@ -84,6 +84,8 @@ THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", os.path.join(BASE_DIR, "..", "data"))
NLTK_DIR = os.path.join(DATA_DIR, "nltk")
TRASH_DIR = os.getenv("PAPERLESS_TRASH_DIR")
# Lock file for synchronizing changes to the MEDIA directory across multiple