-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.sh
executable file
·56 lines (49 loc) · 1.23 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/bash
# Check script dependencies
command -v wget >/dev/null 2>&1 || {
echo >&2 "Cannot locate wget. Is it installed? Aborting."
exit 1
}
command -v gunzip >/dev/null 2>&1 || {
echo >&2 "Cannot locate gunzip. Is it installed? Aborting."
exit 1
}
command -v sqlite3 >/dev/null 2>&1 || {
echo >&2 "Cannot locate sqlite3. Is it installed? Aborting."
exit 1
}
# Make sure pwd is script path
cd "$(dirname "$0")" || exit
# Create data and download files into it
mkdir ./data
echo "----------------------------------"
echo "Downloading data."
sleep 3
wget -r -np -l 1 -A gz -nH --cut-dirs=1 --directory-prefix=data https://datasets.imdbws.com/
# Unpack data
echo "----------------------------------"
echo "Download complete. Unzipping data."
sleep 3
(
cd data || exit
gunzip *.gz
)
# Convert quotes
echo "----------------------------------"
echo "Escaping quotes for sqlite import."
sleep 3
(
cd data || exit
sed -i 's/\"/\\\"/g' *.tsv
)
# Create database and import
rm imdb.db
echo "----------------------------------"
echo "Creating database."
sleep 3
sqlite3 imdb.db <import.sql
# Cleanup
rm -r ./data
echo "----------------------------------"
echo "Process completed. imdb.db created."
echo "----------------------------------"