#!/bin/bash
set -e

WORKDIR=$(mktemp -d)
trap "rm -rf $WORKDIR" EXIT

INPUT="$(dirname "$0")/ocr-test2.pdf"
OUTPUT="$WORKDIR/output.pdf"

echo "Running ocrmypdf on $INPUT ..."
ocrmypdf --force-ocr -l eng "$INPUT" "$OUTPUT"

TEXT=$(pdftotext "$OUTPUT" -)

# Expected OCR output of ocr-test2.pdf:
#   This is OCR test.
EXPECTED_WORDS=(This is OCR test)

FAILED=0
for WORD in "${EXPECTED_WORDS[@]}"; do
    if echo "$TEXT" | grep -qi "$WORD"; then
        echo "PASS: '$WORD' found in OCR output"
    else
        echo "FAIL: '$WORD' not found in OCR output"
        echo "Extracted text: $TEXT"
        FAILED=1
    fi
done

if [ "$FAILED" -eq 1 ]; then
    exit 1
fi
