Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Intersect #386

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
232 changes: 149 additions & 83 deletions wntr/gis/geospatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,28 @@ def snap(A, B, tolerance):

def _backgound(A, B):

"""
hull_geom = A.unary_union.convex_hull

hull_data = gpd.GeoDataFrame(pd.DataFrame([{'geometry': hull_geom}]), crs=A.crs)

background_geom = hull_data.overlay(B, how='difference').unary_union

background = gpd.GeoDataFrame(pd.DataFrame([{'geometry': background_geom}]), crs=A.crs)
background.index = ['BACKGROUND']
"""
Ai_hull = gpd.GeoSeries([A.unary_union.envelope], crs=A.crs)
Bi = gpd.GeoSeries([B.unary_union], crs=B.crs)
background = Ai_hull.difference(Bi)
background = background.to_frame('geometry')
background.index = ['BACKGROUND']

# background.plot()

return background


def intersect(A, B, B_value=None, include_background=False, background_value=0):
def intersect(A, B, attributes=None, include_background=False):
"""
Intersect Points, Lines or Polygons in A with Points, Lines, or Polygons in B.
Return statistics on the intersection.
Expand All @@ -164,36 +174,23 @@ def intersect(A, B, B_value=None, include_background=False, background_value=0):
GeoDataFrame containing Point, LineString, or Polygon geometries
B : geopandas GeoDataFrame
GeoDataFrame containing Point, LineString, or Polygon geometries
B_value : str or None (optional)
Column name in B used to assign a value to each geometry.
attributes : list or None (optional)
List of column names in B used to assign a value to each geometry.
Default is None.
include_background : bool (optional)
Include background, defined as space covered by A that is not covered by B
(overlay difference between A and B). The background geometry is added
to B and is given the name 'BACKGROUND'. Default is False.
background_value : int or float (optional)
The value given to background space. This value is used in the intersection
statistics if a B_value column name is provided. Default is 0.

Include background, defined as space covered by A that is not covered by B
(overlay difference between A and B). The background geometry is added
to B and is given the name 'BACKGROUND'. Default is False.

Returns
-------
intersects : DataFrame
pandas DataFrame
Intersection statistics (index = A.index, columns = defined below)

Columns include:
- n: number of intersecting B geometries
- intersections: list of intersecting B indices

If B_value is given:
- values: list of intersecting B values
- sum: sum of the intersecting B values
- min: minimum of the intersecting B values
- max: maximum of the intersecting B values
- mean: mean of the intersecting B values

If A contains Lines and B contains Polygons:
- weighted_mean: weighted mean of intersecting B values

- fraction: list with fraction of A that intersects B
- n: number of intersecting B geometries
- for each attribute: list of attribute values
"""
if not has_shapely or not has_geopandas:
raise ModuleNotFoundError('shapley and geopandas are required')
Expand All @@ -206,79 +203,148 @@ def intersect(A, B, B_value=None, include_background=False, background_value=0):
assert (B['geometry'].geom_type).isin(['Point', 'LineString',
'MultiLineString', 'Polygon',
'MultiPolygon']).all()
if isinstance(B_value, str):
assert B_value in B.columns
if isinstance(attributes, list):
for B_value in attributes:
assert B_value in B.columns
isinstance(include_background, bool)
isinstance(background_value, (int, float))
assert A.crs == B.crs, "A and B must have the same crs."
assert A.crs == B.crs

if include_background:
background = _backgound(A, B)
if B_value is not None:
background[B_value] = background_value
B = pd.concat([B, background])

original_A_index = A.index
A = A.reset_index(drop=True)

original_B_index = B.index
B = B.reset_index(drop=True)

intersects = gpd.sjoin(A, B, predicate='intersects')

Ai = intersects.loc[:,'geometry'].reset_index()
Bi = B.loc[intersects['index_right'], 'geometry'].reset_index()
intersect_geom = Ai.intersection(Bi)
if (A['geometry'].geom_type).isin(['LineString', 'MultiLineString']).all():
fraction = intersect_geom.length/Ai.length
elif (A['geometry'].geom_type).isin(['Polygon', 'MultiPolygon']).all():
fraction = intersect_geom.area/Ai.area
else: # Point geometry, no fraction
fraction = pd.Series(None, index=Ai.index)
intersects['intersection_geometry'] = intersect_geom.values
intersects['intersection_fraction'] = fraction.round(3).values

intersects.index.name = '_tmp_index_name' # set a temp index name for grouping

# Sort values by index and intersecting object
intersects['sort_order'] = 1 # make sure 'BACKGROUND' is listed first
intersects.loc[intersects['index_right'] == 'BACKGROUND', 'sort_order'] = 0
intersects.sort_values(['_tmp_index_name', 'sort_order', 'index_right'], inplace=True)
intersects.sort_values(['_tmp_index_name', 'index_right'], inplace=True)

n = intersects.groupby('_tmp_index_name')['geometry'].count()
# bring original B indices back for B_indices list
# intersects.index_right = intersects.index_right.apply(lambda x: "BACKGROUND" if x == "BACKGROUND" else original_A_index[x])
intersects.index_right = intersects.index_right.apply(lambda x: original_B_index[x])
B_indices = intersects.groupby('_tmp_index_name')['index_right'].apply(list)
stats = pd.DataFrame(index=A.index, data={'intersections': B_indices,
'n': n,})
stats['n'] = stats['n'].fillna(0)
stats['n'] = stats['n'].apply(int)
stats.loc[stats['intersections'].isnull(), 'intersections'] = stats.loc[stats['intersections'].isnull(), 'intersections'] .apply(lambda x: [])
B_fraction = intersects.groupby('_tmp_index_name')['intersection_fraction'].apply(list)
B_n = B_indices.apply(len)

if B_value is not None:
stats['values'] = intersects.groupby('_tmp_index_name')[B_value].apply(list)
stats['sum'] = intersects.groupby('_tmp_index_name')[B_value].sum()
stats['min'] = intersects.groupby('_tmp_index_name')[B_value].min()
stats['max'] = intersects.groupby('_tmp_index_name')[B_value].max()
stats['mean'] = intersects.groupby('_tmp_index_name')[B_value].mean()

stats = stats.reindex(['intersections', 'values', 'n', 'sum', 'min', 'max', 'mean'], axis=1)
stats.loc[stats['values'].isnull(), 'values'] = stats.loc[stats['values'].isnull(), 'values'] .apply(lambda x: [])

weighted_mean = False
if (A['geometry'].geom_type).isin(['LineString', 'MultiLineString']).all():
if (B['geometry'].geom_type).isin(['Polygon', 'MultiPolygon']).all():
weighted_mean = True

if weighted_mean and B_value is not None:
stats['weighted_mean'] = 0
A_length = A.length
covered_length = pd.Series(0, index = A.index)

for i in B.index:
B_geom = gpd.GeoDataFrame(B.loc[[i],:], crs=B.crs)
val = float(B_geom[B_value])
A_subset = A.loc[stats['intersections'].apply(lambda x: i in x),:]
#print(i, lines_subset)
A_clip = gpd.clip(A_subset, B_geom)
A_clip_length = A_clip.length
A_clip_index = A_clip.index
A_index = intersects.index.unique().sort_values()
results = pd.DataFrame(index=A_index,
data={'intersections': B_indices,
'fraction': B_fraction,
'n': B_n})

if isinstance(attributes, list):
for B_value in attributes:
results[B_value] = intersects.groupby('_tmp_index_name')[B_value].apply(list)

# replace original indices and remove tmp_index_name
A.index = original_A_index
B.index = original_B_index
results.index = original_A_index[results.index]

return results


def intersect_filter_rows(intersect, entry, operation, fraction_threshold):
"""
Keep row if entry in `intersections` meets `fraction` threshold criteria

Parameters
----------
intersect: pd.DataFrame
output from intersect
entry: str, int, or float
name of entry in `intersections` column
operation: numpy operator
Numpy operator, options include
np.greater,
np.greater_equal,
np.less,
np.less_equal,
np.equal,
np.not_equal
fraction_threshold: float
Filter value
"""
assert all([i in intersect.columns for i in ['intersections', 'fraction']])
assert isinstance(intersect['intersections'].iloc[0], list)
assert isinstance(intersect['fraction'].iloc[0], list)

mask = []
for i, row in intersect.iterrows():
key_i = np.where(np.array(row['intersections']) == entry)[0]
if len(key_i) > 0:
keep = operation(row['fraction'][key_i[0]], fraction_threshold)
mask.append(keep)
else:
mask.append(True)

if A_clip_length.shape[0] > 0:
fraction_length = A_clip_length/A_length[A_clip_index]
covered_length[A_clip_index] = covered_length[A_clip_index] + fraction_length
weighed_val = fraction_length*val
stats.loc[A_clip_index, 'weighted_mean'] = stats.loc[A_clip_index, 'weighted_mean'] + weighed_val

# Normalize weighted mean by covered length (can be over 1 if polygons overlap)
# Can be less than 1 if there are gaps (when background is not used)
stats['weighted_mean'] = stats['weighted_mean']/covered_length

# Covered_length is NaN if length A is 0, set weighted mean to mean
stats.loc[covered_length.isna(), 'weighted_mean'] = stats.loc[covered_length.isna(), 'mean']
return intersect.loc[mask,:]


def intersect_stats(intersect, attribute, fraction_threshold=None, nan_value=None):
"""
Compute stats on an attribute of intersections.
Apply a fraction threshold and fill nan values if needed.
"""
assert all([i in intersect.columns for i in ['intersections', 'fraction', attribute]])
assert isinstance(intersect['intersections'].iloc[0], list)
assert isinstance(intersect['fraction'].iloc[0], list)
assert isinstance(intersect[attribute].iloc[0], list)

stats = {'intersections': [], 'fraction': [], 'n': [], attribute: [],
'sum': [], 'min': [], 'max': [], 'mean': [], 'weighted_mean': []}

for i in intersect.index:
val = np.array(intersect[attribute][i])
frac = np.array(intersect['fraction'][i])
inter = np.array(intersect['intersections'][i])

# No intersection, set weighted mean to NaN
stats.loc[stats['n']==0, 'weighted_mean'] = np.NaN
if isinstance(fraction_threshold, (int, float)):
mask = (frac >= fraction_threshold)
val = val[mask]
frac = frac[mask]
inter = inter[mask]

stats.index.name = None
stats['intersections'].append(inter)
stats['fraction'].append(frac)
stats['n'].append(len(inter))
stats[attribute].append(val)

if nan_value is not None:
np.nan_to_num(val, copy=False, nan=nan_value)

if len(val) > 0:
stats['sum'].append(np.nansum(val))
stats['min'].append(np.nanmin(val))
stats['max'].append(np.nanmax(val))
stats['mean'].append(np.nanmean(val))
stats['weighted_mean'].append(np.nansum(val*frac)/np.nansum(frac[~np.isnan(val)]))
else:
stats['sum'].append(None)
stats['min'].append(None)
stats['max'].append(None)
stats['mean'].append(None)
stats['weighted_mean'].append(None)

return stats
stats = pd.DataFrame(stats, index=intersect.index)

return stats
Loading
Loading