mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Updated Pre Consume Script Examples (markdown)
parent
d1f7627d5f
commit
a6ca23919e
@ -4,39 +4,59 @@ This wiki page is a repository of example [pre-consume scripts](https://docs.pap
|
||||
## Removing Blank Pages
|
||||
|
||||
- :warning: **This script modifies the original file**
|
||||
- Original source: https://github.com/paperless-ngx/paperless-ngx/discussions/668#discussioncomment-3936343
|
||||
- Original source: https://github.com/paperless-ngx/paperless-ngx/discussions/668#discussioncomment-3936343 with slight update (suppress warnings for Apple PDFs)
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
#!/bin/bash
|
||||
#set -x -e -o pipefail
|
||||
set -e -o pipefail
|
||||
export LC_ALL=C
|
||||
|
||||
THRESHOLD=0.002
|
||||
|
||||
#IN="$1"
|
||||
IN="${DOCUMENT_WORKING_PATH}"
|
||||
IN="$DOCUMENT_WORKING_PATH"
|
||||
|
||||
# Check for PDF format
|
||||
TYPE=$(file -b "$IN")
|
||||
|
||||
if [ "${TYPE%%,*}" != "PDF document" ]; then
|
||||
>&2 echo "Skipping $IN - non PDF [$TYPE]."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# PDF file - proceed
|
||||
|
||||
#PAGES=$(pdfinfo "$IN" | grep ^Pages: | tr -dc '0-9')
|
||||
PAGES=$(pdfinfo "$IN" | awk '/Pages:/ {print $2}')
|
||||
|
||||
>&2 echo Total pages $PAGES
|
||||
|
||||
|
||||
# Threshold for HP scanners
|
||||
# THRESHOLD=1
|
||||
# Threshold for Lexmar MC2425
|
||||
THRESHOLD=0.8
|
||||
|
||||
PAGES=$(pdfinfo "${IN}" | grep -a "^Pages:" | tr -dc '0-9')
|
||||
|
||||
non_blank() {
|
||||
for (( i="1"; i<="${PAGES}"; i++ )); do
|
||||
PERCENT=$(gs -o - -dFirstPage="${i}" -dLastPage="${i}" -sDEVICE=inkcov "${IN}" | grep CMYK | nawk 'BEGIN { sum=0; } {sum += $1 + $2 + $3 + $4;} END { printf "%.5f\n", sum } ')
|
||||
if awk "BEGIN { exit !(${PERCENT} > ${THRESHOLD}) }"; then
|
||||
echo "${i}"
|
||||
for i in $(seq 1 $PAGES) ; do
|
||||
PERCENT=$(gs -o - -dFirstPage=${i} -dLastPage=${i} -sDEVICE=ink_cov "${IN}" | grep CMYK | nawk 'BEGIN { sum=0; } {sum += $1 + $2 + $3 + $4;} END { printf "%.5f\n", sum } ')
|
||||
>&2 echo -n "Color-sum in page $i is $PERCENT: "
|
||||
if awk "BEGIN { exit !($PERCENT > $THRESHOLD) }"; then
|
||||
echo $i
|
||||
>&2 echo "Page added to document"
|
||||
else
|
||||
>&2 echo "Color-sum is ${PERCENT}: will remove blank page ${i} of ${IN}"
|
||||
>&2 echo "Page removed from document"
|
||||
fi
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
NON_BLANK="$(non_blank)"
|
||||
NON_BLANK="$(tr '\n' ' ' <<<"${NON_BLANK}")"
|
||||
NON_BLANK="${NON_BLANK% }"
|
||||
NON_BLANK=$(non_blank)
|
||||
|
||||
if [ -n "${NON_BLANK}" ]; then
|
||||
NON_BLANK=$(echo "${NON_BLANK}" | tr ' ' ",")
|
||||
qpdf "${IN}" --replace-input --pages . "${NON_BLANK}" --
|
||||
if [ -n "$NON_BLANK" ]; then
|
||||
NON_BLANK=$(echo $NON_BLANK | tr ' ' ",")
|
||||
qpdf "$IN" --warning-exit-0 --replace-input --pages . $NON_BLANK --
|
||||
fi
|
||||
|
||||
```
|
||||
|
||||
## Cleaning with `qpdf`
|
||||
|
Loading…
x
Reference in New Issue
Block a user