Skip to content

Commit

Permalink
Add a cache warmer script
Browse files Browse the repository at this point in the history
  • Loading branch information
joecorall committed May 7, 2024
1 parent e4b0c3d commit 8352833
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
scyllaridae
scyllaridae.yml
!examples/*/scyllaridae.yml
9 changes: 9 additions & 0 deletions examples/cache-warmer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ARG TAG=main
ARG DOCKER_REPOSITORY=local
FROM ${DOCKER_REPOSITORY}/scyllaridae:${TAG}

RUN apk update && \
apk add --no-cache jq==1.7.1-r0

COPY cmd.sh /app/
COPY scyllaridae.yml /app/scyllaridae.yml
65 changes: 65 additions & 0 deletions examples/cache-warmer/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env bash

set -eou pipefail

mygrep() {
# mac OS needs ggrep for the -P flag
if command -v ggrep &>/dev/null; then
ggrep "$@"
else
grep "$@"
fi
}

# how many cURL commands to run in parallel
PARALLEL_EXECUTIONS=3

# Base URL of the sitemap.xml file
BASE_URL="https://$DOMAIN/sitemap.xml"
PAGE=1

while true; do
NEXT_PAGE_URL="$BASE_URL?page=$PAGE"
STATUS=$(curl -w '%{http_code}' \
--silent \
-o links.xml \
"${NEXT_PAGE_URL}")

if [ "${STATUS}" -eq 200 ]; then
mapfile -t URLS < <(mygrep -oP '<loc>\K[^<]+' links.xml)
while [ "${#URLS[@]}" -gt 0 ]; do
for ((i = 0; i < PARALLEL_EXECUTIONS; i++)); do
array_length=${#URLS[@]}
if [ "$array_length" -gt 0 ]; then
URL="${URLS[$((array_length-1))]}"
unset "URLS[$((array_length-1))]"
else
break
fi
echo "Crawling: $URL"
curl --silent -o /dev/null "${URL}" &
job_ids+=($!)
done

for job_id in "${job_ids[@]}"; do
wait "$job_id" || echo "One job failed, but continuing anyway"
done
done

PAGE=$((PAGE + 1))
else
break
fi
done

rm -f links.xml

curl -v "https://$DOMAIN/api/v1/paged-content" > pc.json

mapfile -t NIDS < <(jq -r '.[]' pc.json)
for NID in "${NIDS[@]}"; do
echo "Processing: $NID"
curl -s -o /dev/null "https://$DOMAIN/node/$NID/book-manifest?cache-warmer=1"
done

rm -f pc.json
6 changes: 6 additions & 0 deletions examples/cache-warmer/scyllaridae.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
allowedMimeTypes: [
"*"
]
cmdByMimeType:
default:
cmd: "/app/cmd.sh"
1 change: 1 addition & 0 deletions internal/config/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ func BuildExecCommand(sourceMimeType, destinationMimeType, addtlArgs string, c *
}

cmd := exec.Command(cmdConfig.Cmd, args...)
cmd.Env = os.Environ()

return cmd, nil
}
Expand Down

0 comments on commit 8352833

Please sign in to comment.