From 30ebfaf8e601b9162d4000341eaea1848c763005 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Wed, 23 Nov 2022 09:07:54 -0800 Subject: [PATCH] Created Pre-Consume Script Examples (markdown) --- Pre-Consume-Script-Examples.md | 37 ++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 Pre-Consume-Script-Examples.md diff --git a/Pre-Consume-Script-Examples.md b/Pre-Consume-Script-Examples.md new file mode 100644 index 0000000..e760594 --- /dev/null +++ b/Pre-Consume-Script-Examples.md @@ -0,0 +1,37 @@ +This wiki page is a repository of example [pre-consume scripts](https://paperless-ngx.readthedocs.io/en/latest/advanced_usage.html#pre-consumption-script) contributed by the community. As always, you should exercise caution when using a script and make sure you understand the code before using a script from the internet. + + +## Removing Blank Pages + +- :warning: **This script modifies the original file** +- Original source: https://github.com/paperless-ngx/paperless-ngx/discussions/668#discussioncomment-3936343 + +```bash +#!/usr/bin/env bash +set -e -o pipefail +export LC_ALL=C + +#IN="$1" +IN="$DOCUMENT_SOURCE_PATH" + +PAGES=$(pdfinfo "$IN" | grep ^Pages: | tr -dc '0-9') +THRESHOLD=0.002 + +non_blank() { + for i in $(seq 1 $PAGES) ; do + PERCENT=$(gs -o - -dFirstPage=${i} -dLastPage=${i} -sDEVICE=inkcov "${IN}" | grep CMYK | nawk 'BEGIN { sum=0; } {sum += $1 + $2 + $3 + $4;} END { printf "%.5f\n", sum } ') + if awk "BEGIN { exit !($PERCENT > $THRESHOLD) }"; then + echo $i + else + >&2 echo Color-sum is $PERCENT: will remove blank page $i of $IN + fi + done +} + +NON_BLANK=$(non_blank) + +if [ -n "$NON_BLANK" ]; then + NON_BLANK=$(echo $NON_BLANK | tr ' ' ",") + qpdf "$IN" --replace-input --pages . $NON_BLANK -- +fi +``` \ No newline at end of file