mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Updated Pre Consume Script Examples (markdown)
| @@ -4,39 +4,59 @@ This wiki page is a repository of example [pre-consume scripts](https://docs.pap | ||||
| ## Removing Blank Pages | ||||
|  | ||||
| - :warning: **This script modifies the original file** | ||||
| - Original source: https://github.com/paperless-ngx/paperless-ngx/discussions/668#discussioncomment-3936343 | ||||
| - Original source: https://github.com/paperless-ngx/paperless-ngx/discussions/668#discussioncomment-3936343 with slight update (suppress warnings for Apple PDFs) | ||||
|  | ||||
| ```bash | ||||
| #!/usr/bin/env bash | ||||
| #!/bin/bash | ||||
| #set -x -e -o pipefail | ||||
| set -e -o pipefail | ||||
| export LC_ALL=C | ||||
|  | ||||
| THRESHOLD=0.002 | ||||
|  | ||||
| #IN="$1" | ||||
| IN="${DOCUMENT_WORKING_PATH}" | ||||
| IN="$DOCUMENT_WORKING_PATH" | ||||
|  | ||||
| # Check for PDF format | ||||
| TYPE=$(file -b "$IN") | ||||
|  | ||||
| if [ "${TYPE%%,*}" != "PDF document" ]; then | ||||
|   >&2 echo "Skipping $IN - non PDF [$TYPE]." | ||||
|   exit 0 | ||||
| fi | ||||
|  | ||||
| # PDF file - proceed | ||||
|  | ||||
| #PAGES=$(pdfinfo "$IN" | grep ^Pages: | tr -dc '0-9') | ||||
| PAGES=$(pdfinfo "$IN" | awk '/Pages:/ {print $2}') | ||||
|  | ||||
| >&2 echo Total pages $PAGES | ||||
|  | ||||
|  | ||||
| # Threshold for HP scanners | ||||
| # THRESHOLD=1 | ||||
| # Threshold for Lexmar MC2425 | ||||
| THRESHOLD=0.8 | ||||
|  | ||||
| PAGES=$(pdfinfo "${IN}" | grep -a "^Pages:" | tr -dc '0-9') | ||||
|  | ||||
| non_blank() { | ||||
| for (( i="1"; i<="${PAGES}"; i++ )); do | ||||
| PERCENT=$(gs -o -  -dFirstPage="${i}" -dLastPage="${i}" -sDEVICE=inkcov "${IN}" | grep CMYK | nawk 'BEGIN { sum=0; } {sum += $1 + $2 + $3 + $4;} END { printf "%.5f\n", sum } ') | ||||
|     if awk "BEGIN { exit !(${PERCENT} > ${THRESHOLD}) }"; then | ||||
|         echo "${i}" | ||||
|   for i in $(seq 1 $PAGES) ; do | ||||
|     PERCENT=$(gs -o -  -dFirstPage=${i} -dLastPage=${i} -sDEVICE=ink_cov "${IN}" | grep CMYK | nawk 'BEGIN { sum=0; } {sum += $1 + $2 + $3 + $4;} END {  printf "%.5f\n", sum } ') | ||||
|     >&2 echo -n "Color-sum in page $i is $PERCENT: " | ||||
|     if awk "BEGIN { exit !($PERCENT > $THRESHOLD) }"; then | ||||
|       echo $i | ||||
|       >&2 echo "Page added to document" | ||||
|     else | ||||
|         >&2 echo "Color-sum is ${PERCENT}: will remove blank page ${i} of ${IN}" | ||||
|       >&2 echo "Page removed from document" | ||||
|     fi | ||||
| done | ||||
|   done | ||||
| } | ||||
|  | ||||
| NON_BLANK="$(non_blank)" | ||||
| NON_BLANK="$(tr '\n' ' ' <<<"${NON_BLANK}")" | ||||
| NON_BLANK="${NON_BLANK% }" | ||||
| NON_BLANK=$(non_blank) | ||||
|  | ||||
| if [ -n "${NON_BLANK}" ]; then | ||||
|     NON_BLANK=$(echo "${NON_BLANK}" | tr ' ' ",") | ||||
|     qpdf "${IN}" --replace-input --pages . "${NON_BLANK}" -- | ||||
| if [ -n "$NON_BLANK" ]; then | ||||
|   NON_BLANK=$(echo $NON_BLANK  | tr ' ' ",") | ||||
|   qpdf "$IN" --warning-exit-0 --replace-input --pages . $NON_BLANK -- | ||||
| fi | ||||
|  | ||||
| ``` | ||||
|  | ||||
| ## Cleaning with `qpdf` | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jörg Menke
					Jörg Menke