diff --git a/README.md b/README.md index 2ad2344..d5d9762 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,8 @@ Airbnb claims to be part of the "sharing economy" and disrupting the hotel indus The dataset contains +12,000 property listings from Bali, Indonesia collected on April, 2018. This is not a complete collection of all the listings available but hopefully a good representation. +![Heatmap](assets/heatmap.png) + The data has been scraped from airbnb.com with a custom web scraper built with [Scrapy][ref:scrapy]. The crawler iterates on the Bali neighborhoods as provided by Airbnb and on the price range. Note that the scraper is generic enough to work on other cities, but this hasn't been tested. diff --git a/assets/heatmap.html b/assets/heatmap.html new file mode 100644 index 0000000..01eb006 --- /dev/null +++ b/assets/heatmap.html @@ -0,0 +1,12265 @@ + + + + +Google Maps - pygmaps + + + + +
+ + diff --git a/assets/heatmap.png b/assets/heatmap.png new file mode 100644 index 0000000..7649f91 Binary files /dev/null and b/assets/heatmap.png differ diff --git a/assets/map.png b/assets/map.png new file mode 100644 index 0000000..641c966 Binary files /dev/null and b/assets/map.png differ diff --git a/scripts/create_heatmap.py b/scripts/create_heatmap.py new file mode 100644 index 0000000..2541d9c --- /dev/null +++ b/scripts/create_heatmap.py @@ -0,0 +1,22 @@ +import pandas as pd +from gmplot import gmplot +import os +import time +from selenium import webdriver + +filepath = os.path.abspath('assets/heatmap.html') + +# Load dataset +df = pd.read_csv("data/listings_bali_201804.csv") +# Heatmap on Google Maps +gmap = gmplot.GoogleMapPlotter(-8.41, 115.07, 10) +gmap.heatmap(df.latitude, df.longitude) +gmap.draw(filepath) + +options = webdriver.FirefoxOptions() +options.add_argument('--headless') +browser = webdriver.Firefox(firefox_options=options) +browser.get(f"file://{filepath}") +time.sleep(3) +browser.save_screenshot("assets/heatmap.png") +browser.quit() diff --git a/scripts/create_map.py b/scripts/create_map.py new file mode 100644 index 0000000..cabf707 --- /dev/null +++ b/scripts/create_map.py @@ -0,0 +1,23 @@ +import matplotlib +matplotlib.use('Agg') + +import matplotlib.pyplot as plt +from mpl_toolkits.basemap import Basemap +import pandas as pd + +# Load dataset +df = pd.read_csv("data/listings_bali_201804.csv") +# Draw map of Bali +fig, ax = plt.subplots(figsize=(20, 10)) +m = Basemap(resolution='f', projection='lcc', + lon_0=115.1889, lat_0=-8.4095, + llcrnrlon=114.3531, llcrnrlat=-8.8980, + urcrnrlon=115.8595, urcrnrlat=-8.0159) +m.drawmapboundary(fill_color='#46bcec') +m.fillcontinents(color='#f2f2f2', lake_color='#46bcec') +m.drawcoastlines() +for i, row in df.iterrows(): + x, y = m(row.longitude, row.latitude) + m.plot(x, y, 'o', + markersize=2, color='#f70000', alpha=.6) +fig.savefig("assets/map.png", bbox_inches='tight', pad_index=0)