From d08eb0c66bebbd9839b09d2c6689e68553d1ca43 Mon Sep 17 00:00:00 2001 From: Trenton H Date: Wed, 5 Oct 2022 11:01:45 -0700 Subject: [PATCH] Adds skipping of NLTK data download if the feature appears disabled --- docker/docker-entrypoint.sh | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index 81536f5ce..2a0269e73 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -56,15 +56,24 @@ map_folders() { nltk_data () { # Store the NLTK data outside the Docker container local nltk_data_dir="${DATA_DIR}/nltk" + readonly truthy_things=("yes y 1 t true") - # Download or update the snowball stemmer data - python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" snowball_data + # If not set, or it looks truthy + if [[ -z "${PAPERLESS_ENABLE_NLTK}" ]] || [[ "${truthy_things[*]}" =~ ${PAPERLESS_ENABLE_NLTK,} ]]; then - # Download or update the stopwords corpus - python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" stopwords + # Download or update the snowball stemmer data + python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" snowball_data - # Download or update the punkt tokenizer data - python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" punkt + # Download or update the stopwords corpus + python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" stopwords + + # Download or update the punkt tokenizer data + python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" punkt + + else + echo "Skipping NLTK data download" + + fi }