-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscan.sh
62 lines (50 loc) · 1.18 KB
/
scan.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash
STARTPDF=$(perl tests.pl get_start_pdf)
echo "STARTING AT $STARTPDF"
scanimage --batch --batch-start=$STARTPDF --source="ADF Duplex" --resolution 300 --format=jpeg --mode Color &
sleep 20
{
CONTINUE=1
COUNT=0
rm .out*.not_blank
while [ $CONTINUE ]; do
for FILENAME in *.jpg; do
if [ -f $FILENAME ]; then
mkdir -p "blanks"
NOTBLANK=".$FILENAME.not_blank"
if [ ! -f $NOTBLANK ]; then
if perl tests.pl is_blank $FILENAME; then
mv $FILENAME blanks/
else
BASENAME=$(basename $FILENAME .jpg)
PDFFILE=$BASENAME.pdf
if [ ! -f $PDFFILE ]; then
echo "Trying to OCR $PDFFILE..."
while perl tests.pl not_ready_for_new_forks; do
echo "Too many open forks, waiting for some of them to finish"
sleep 1
done
tesseract -l deu $FILENAME $BASENAME pdf &
COUNT=0
fi
echo "NOT BLANK" > $NOTBLANK
fi
fi
fi
COUNT=$((COUNT+1))
done
sleep 1
if perl tests.pl every_jpg_has_nonempty_pdf; then
echo "FINISHED OCR!!!"
exit
fi
echo $COUNT
done
} &
for job in `jobs -p`
do
echo $job
wait $job || let "FAIL+=1"
done
rm gesamt.pdf
pdftk *.pdf cat output gesamt.pdf