Skip to content

Commit

Permalink
chore: start adding markdown to new notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
RaczeQ committed Jan 24, 2025
1 parent fa9bb62 commit f11e025
Showing 1 changed file with 24 additions and 30 deletions.
54 changes: 24 additions & 30 deletions examples/use_cases/simple_ml_with_overture_maps_data.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Simple ML model using Overture Maps data\n",
"\n",
"Prerequisites:\n",
"- 12 GB of RAM\n",
"- Installed libraries: `srai[osm,overturemaps,plotting]`, `contextily`, `seaborn`, `xgboost`, `pypalettes`"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -18,15 +29,13 @@
"from h3 import int_to_str, str_to_int\n",
"from h3ronpy import grid_disk_aggregate_k\n",
"from pypalettes import load_cmap\n",
"from pytorch_lightning import seed_everything\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"from srai.embedders import ContextualCountEmbedder\n",
"\n",
"# from srai.embedders import CountEmbedder\n",
"from srai.h3 import h3_to_shapely_geometry\n",
"from srai.joiners import IntersectionJoiner\n",
"from srai.loaders import OSMOnlineLoader\n",
"from srai.loaders.overturemaps_loader import OvertureMapsLoader\n",
"from srai.neighbourhoods import H3Neighbourhood\n",
"from srai.regionalizers import H3Regionalizer, geocode_to_region_gdf"
]
Expand All @@ -37,8 +46,7 @@
"metadata": {},
"outputs": [],
"source": [
"SEED = 71\n",
"seed_everything(SEED)"
"SEED = 71"
]
},
{
Expand Down Expand Up @@ -163,7 +171,7 @@
")\n",
"h3_regions[\n",
" (h3_regions.city == \"Madrid\") & (h3_regions.distance_to_station > H3_PREDICTION_RANGE)\n",
"].plot(ax=ax, color=\"gray\", alpha=0.2)\n",
"].plot(ax=ax, color=\"gray\", alpha=0.3)\n",
"bicycle_stations_per_city[\"Madrid\"].representative_point().plot(ax=ax, color=\"black\", markersize=1)\n",
"\n",
"cx.add_basemap(ax, crs=h3_regions.crs, source=cx.providers.CartoDB.PositronNoLabels, zoom=13)\n",
Expand All @@ -178,12 +186,9 @@
"metadata": {},
"outputs": [],
"source": [
"from srai.loaders.overturemaps_loader import OvertureMapsLoader\n",
"\n",
"OVERTURE_MAPS_HIERARCHY_DEPTH_VALUES = {\n",
" (\"base\", \"infrastructure\"): 1,\n",
" (\"base\", \"land\"): 1,\n",
" # (\"base\", \"land_cover\"): 1,\n",
" (\"base\", \"land_use\"): 1,\n",
" (\"base\", \"water\"): 1,\n",
" (\"transportation\", \"segment\"): 2,\n",
Expand All @@ -197,20 +202,16 @@
" hierarchy_depth=list(OVERTURE_MAPS_HIERARCHY_DEPTH_VALUES.values()),\n",
" include_all_possible_columns=False,\n",
").load(area=h3_regions)\n",
"features # 637785 rows × 111 columns\n",
"features\n",
"\n",
"# if you want to use OpenStreetMap data instead you can use `OSMPbfLoader`\n",
"# with `GEOFABRIK_LAYERS` filter\n",
"\n",
"# from srai.loaders.osm_loaders.filters import GEOFABRIK_LAYERS\n",
"# from srai.loaders.osm_loaders import OSMPbfLoader\n",
"# features = OSMPbfLoader().load(area=h3_regions, tags=GEOFABRIK_LAYERS)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"features.columns.to_list()"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -233,16 +234,10 @@
" concatenate_vectors=False,\n",
" count_subcategories=False,\n",
").transform(regions_gdf=h3_regions, features_gdf=features, joint_gdf=joint)\n",
"embeddings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embeddings = embeddings.drop(columns=[c for c in embeddings.columns if \"bicycle_rental\" in c])\n",
"\n",
"# If you are using OpenStreetMap data, remeber to remove bicycle_sharing stations from the dataset\n",
"# embeddings = embeddings.drop(columns=[c for c in embeddings.columns if \"bicycle_rental\" in c])\n",
"\n",
"embeddings"
]
},
Expand Down Expand Up @@ -462,7 +457,6 @@
" \"pad\": -0.05,\n",
" },\n",
" vmin=max(0, city_data[\"predicted_distance_to_station\"].min()),\n",
" # vmax=max(H3_PREDICTION_MAX_DISTANCE, city_data[\"predicted_distance_to_station\"].max()),\n",
" vmax=city_data[\"predicted_distance_to_station\"].max(),\n",
" )\n",
" bicycle_stations_per_city[city_name].representative_point().plot(\n",
Expand Down

0 comments on commit f11e025

Please sign in to comment.