-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathfoolfuuka-search
executable file
·36 lines (33 loc) · 1.27 KB
/
foolfuuka-search
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
# Search 4chan archives based on FoolFuuka
# Output: one post per line in HTML, prefixed with the post ID
# Note that posts can appear multiple times in the output in some cases. You're encouraged to filter based on the post ID.
domain="$1"
q="$2"
end=
nextend=2038-01-19
while :
do
end="${nextend}"
content=$(curl -s "https://${domain}/_/search/text/${q}/end/${end}/page/${page}/")
declare -i page=1
while [[ ${page} -lt 201 ]]
do
echo "Grabbing https://${domain}/_/search/text/${q}/end/${end}/page/${page}/" >&2
content=$(curl -s "https://${domain}/_/search/text/${q}/end/${end}/page/${page}/")
tr -d '\n' <<<"${content}" | grep -Po '<article class="post.*?</article>' | perl -pe 's,^(.*?id="(\d+)".*$),\2 \1,'
# Get last date seen to update end date; subtract one because the search appears to be a bit unreliable
nextend="$(date --date="@$(($(date --date="$(tr -d '\n' <<<"${content}" | grep -Po '<article class="post.*?</article>' | tail -1 | grep -Po '<time datetime="\K[^"]+')" '+%s') - 86400))" '+%Y-%m-%d')"
if grep -qF '<div class="alert"' <<<"${content}"
then
if ! tr -d '\n' <<<"${content}" | grep -Po '<div class="alert".*?</div>' | grep -q 'No results found'
then
echo "Error" >&2
break
else
break 2
fi
fi
page+=1
done
done