mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Fixes the download and usage of the downloaded data
This commit is contained in:

committed by
Trenton H

parent
3c12f13df2
commit
70b1988a55
@@ -53,6 +53,21 @@ map_folders() {
|
||||
export CONSUME_DIR="${PAPERLESS_CONSUMPTION_DIR:-/usr/src/paperless/consume}"
|
||||
}
|
||||
|
||||
nltk_data () {
|
||||
# Store the NLTK data outside the Docker container
|
||||
local nltk_data_dir="${DATA_DIR}/nltk"
|
||||
|
||||
# Download or update the snowball stemmer data
|
||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" snowball_data
|
||||
|
||||
# Download or update the stopwords corpus
|
||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" stopwords
|
||||
|
||||
# Download or update the punkt tokenizer data
|
||||
python3 -W ignore::RuntimeWarning -m nltk.downloader -d "${nltk_data_dir}" punkt
|
||||
|
||||
}
|
||||
|
||||
initialize() {
|
||||
|
||||
# Setup environment from secrets before anything else
|
||||
@@ -93,6 +108,8 @@ initialize() {
|
||||
echo "Creating directory ${tmp_dir}"
|
||||
mkdir -p "${tmp_dir}"
|
||||
|
||||
nltk_data
|
||||
|
||||
set +e
|
||||
echo "Adjusting permissions of paperless files. This may take a while."
|
||||
chown -R paperless:paperless ${tmp_dir}
|
||||
|
@@ -89,24 +89,6 @@ superuser() {
|
||||
fi
|
||||
}
|
||||
|
||||
nltk_data () {
|
||||
# Store the NLTK data outside the Docker container
|
||||
local nltk_data_dir="${DATA_DIR}/nltk"
|
||||
|
||||
# Download or update the snowball stemmer data
|
||||
python3 -m nltk.downloader -d "${nltk_data_dir}" snowball_data
|
||||
|
||||
# Download or update the stopwords corpus
|
||||
python3 -m nltk.downloader -d "${nltk_data_dir}" stopwords
|
||||
|
||||
# Download or update the punkt tokenizer data
|
||||
python3 -m nltk.downloader -d "${nltk_data_dir}" punkt
|
||||
|
||||
# Set env so nltk can find the downloaded data
|
||||
export NLTK_DATA="${nltk_data_dir}"
|
||||
|
||||
}
|
||||
|
||||
do_work() {
|
||||
if [[ "${PAPERLESS_DBENGINE}" == "mariadb" ]]; then
|
||||
wait_for_mariadb
|
||||
@@ -118,8 +100,6 @@ do_work() {
|
||||
|
||||
migrations
|
||||
|
||||
nltk_data
|
||||
|
||||
search_index
|
||||
|
||||
superuser
|
||||
|
Reference in New Issue
Block a user