forked from vfonov/re_mtl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_scrapper.sh
executable file
·33 lines (22 loc) · 1.04 KB
/
run_scrapper.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#! /bin/bash
# a crawler that is run everyday to update information
# setup environment
# conda activate <your conda environment>
# setup parameters
IN= # < where to keep a backup of data from centris>
OUT= # < where to keep final sqlite database>
PREFIX= # <scripts location>
# scrape realtor, dump data in .json.gz format
python $PREFIX/scrapper.py $IN
# scrape kijiji for Montreal
python $PREFIX/kijiji_scraper.py 'https://www.kijiji.ca/b-appartement-condo/ville-de-montreal/c37l1700281?ad=offering' --db $OUT/property.sqlite3
# scrape for south shore
python $PREFIX/kijiji_scraper.py 'https://www.kijiji.ca/b-appartement-condo/longueuil-rive-sud/c37l1700279?ad=offering' --db $OUT/property.sqlite3
# summmarize from realtor inside sqlite3 database
python $PREFIX/summarize.py $IN $OUT/property.sqlite3
# preprocess data for further statistical analysis
cd $PREFIX
Rscript preprocess_data.R
# regenerate website - this will update files in docs
Rscript -e "rmarkdown::render_site('index.Rmd')"
Rscript -e "rmarkdown::render_site('stats_habr.Rmd')"