mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Adds skipping of NLTK data download if the feature appears disabled
This commit is contained in:
parent
d1a17480ea
commit
d08eb0c66b
@ -56,15 +56,24 @@ map_folders() {
|
|||||||
nltk_data () {
|
nltk_data () {
|
||||||
# Store the NLTK data outside the Docker container
|
# Store the NLTK data outside the Docker container
|
||||||
local nltk_data_dir="${DATA_DIR}/nltk"
|
local nltk_data_dir="${DATA_DIR}/nltk"
|
||||||
|
readonly truthy_things=("yes y 1 t true")
|
||||||
|
|
||||||
# Download or update the snowball stemmer data
|
# If not set, or it looks truthy
|
||||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" snowball_data
|
if [[ -z "${PAPERLESS_ENABLE_NLTK}" ]] || [[ "${truthy_things[*]}" =~ ${PAPERLESS_ENABLE_NLTK,} ]]; then
|
||||||
|
|
||||||
# Download or update the stopwords corpus
|
# Download or update the snowball stemmer data
|
||||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" stopwords
|
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" snowball_data
|
||||||
|
|
||||||
# Download or update the punkt tokenizer data
|
# Download or update the stopwords corpus
|
||||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" punkt
|
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" stopwords
|
||||||
|
|
||||||
|
# Download or update the punkt tokenizer data
|
||||||
|
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" punkt
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "Skipping NLTK data download"
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user