Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change behaviour of dtype on equivalent sources #516

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
22 changes: 6 additions & 16 deletions harmonica/_equivalent_sources/cartesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

from .._forward.utils import distance_cartesian
from .utils import (
cast_fit_input,
jacobian_numba_parallel,
jacobian_numba_serial,
pop_extra_coords,
Expand Down Expand Up @@ -129,8 +128,8 @@ class EquivalentSources(vdb.BaseGridder):
If True any predictions and Jacobian building is carried out in
parallel through Numba's ``jit.prange``, reducing the computation time.
If False, these tasks will be run on a single CPU. Default to True.
dtype : data-type
The desired data-type for the predictions and the Jacobian matrix.
dtype : dtype, optional
The desired data-type for the Jacobian matrix.
Default to ``"float64"``.

Attributes
Expand Down Expand Up @@ -215,21 +214,14 @@ def fit(self, coordinates, data, weights=None):
Returns this estimator instance for chaining operations.
"""
coordinates, data, weights = vdb.check_fit_input(coordinates, data, weights)
coordinates, data, weights = cast_fit_input(
coordinates, data, weights, self.dtype
)
# Capture the data region to use as a default when gridding.
self.region_ = vd.get_region(coordinates[:2])
coordinates = vdb.n_1d_arrays(coordinates, 3)
if self.points is None:
self.points_ = tuple(
p.astype(self.dtype) for p in self._build_points(coordinates)
)
self.points_ = self._build_points(coordinates)
else:
self.depth_ = None # set depth_ to None so we don't leave it unset
self.points_ = tuple(
p.astype(self.dtype) for p in vdb.n_1d_arrays(self.points, 3)
)
self.points_ = vdb.n_1d_arrays(self.points, 3)
jacobian = self.jacobian(coordinates, self.points_)
self.coefs_ = vdb.least_squares(jacobian, data, weights, self.damping)
return self
Expand Down Expand Up @@ -326,10 +318,8 @@ def predict(self, coordinates):
check_is_fitted(self, ["coefs_"])
shape = np.broadcast(*coordinates[:3]).shape
size = np.broadcast(*coordinates[:3]).size
coordinates = tuple(
np.atleast_1d(i.astype(self.dtype)).ravel() for i in coordinates[:3]
)
data = np.zeros(size, dtype=self.dtype)
coordinates = tuple(np.atleast_1d(i).ravel() for i in coordinates[:3])
data = np.zeros(size)
santisoler marked this conversation as resolved.
Show resolved Hide resolved
self._predict_kernel[self.parallel](
coordinates, self.points_, self.coefs_, data, self.greens_function
)
Expand Down
17 changes: 5 additions & 12 deletions harmonica/_equivalent_sources/gradient_boosted.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from verde import get_region, rolling_window

from .cartesian import EquivalentSources
from .utils import cast_fit_input, predict_numba_parallel
from .utils import predict_numba_parallel


class EquivalentSourcesGB(EquivalentSources):
Expand Down Expand Up @@ -78,8 +78,8 @@ class EquivalentSourcesGB(EquivalentSources):
If True any predictions and Jacobian building is carried out in
parallel through Numba's ``jit.prange``, reducing the computation time.
If False, these tasks will be run on a single CPU. Default to True.
dtype : data-type
The desired data-type for the predictions and the Jacobian matrix.
dtype : dtype, optional
The desired data-type for the Jacobian matrix.
Default to ``"float64"``.

Attributes
Expand Down Expand Up @@ -214,9 +214,6 @@ def fit(self, coordinates, data, weights=None):
Returns this estimator instance for chaining operations.
"""
coordinates, data, weights = vdb.check_fit_input(coordinates, data, weights)
coordinates, data, weights = cast_fit_input(
coordinates, data, weights, self.dtype
)
# Capture the data region to use as a default when gridding.
self.region_ = get_region(coordinates[:2])
# Ravel coordinates, data and weights to 1d-arrays
Expand All @@ -226,14 +223,10 @@ def fit(self, coordinates, data, weights=None):
weights = weights.ravel()
# Build point sources
if self.points is None:
self.points_ = tuple(
p.astype(self.dtype) for p in self._build_points(coordinates)
)
self.points_ = self._build_points(coordinates)
else:
self.depth_ = None # set depth_ to None so we don't leave it unset
self.points_ = tuple(
p.astype(self.dtype) for p in vdb.n_1d_arrays(self.points, 3)
)
self.points_ = vdb.n_1d_arrays(self.points, 3)
# Initialize coefficients
self.coefs_ = np.zeros_like(self.points_[0])
# Fit coefficients through gradient boosting
Expand Down
29 changes: 14 additions & 15 deletions harmonica/tests/test_eq_sources_cartesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,28 +343,29 @@ def test_dtype(
dtype,
):
"""
Test dtype argument on EquivalentSources
Test if predictions have the default dtype.
"""
# Define the points argument for EquivalentSources
points = None
if custom_points:
points = vd.grid_coordinates(region, spacing=300, extra_coords=-2e3)
# Define the points argument for EquivalentSources.fit()
if weights_none:
weights = None
points = (
vd.grid_coordinates(region, spacing=300, extra_coords=-2e3)
if custom_points
else None
)
# Define the weights argument for EquivalentSources.fit()
weights = weights if not weights_none else None
# Initialize and fit the equivalent sources
eqs = EquivalentSources(
damping=damping, points=points, block_size=block_size, dtype=dtype
)
eqs.fit(coordinates, data, weights)
# Make some predictions
# Ensure predictions have the expected dtype
prediction = eqs.predict(coordinates)
# Check data type of created objects
assert prediction.dtype == np.float64
# Locations of sources should be the same dtype as the coordinates
for coord in eqs.points_:
assert coord.dtype == np.dtype(dtype)
assert prediction.dtype == np.dtype(dtype)
# Check the data type of the source coefficients
# assert eqs.coefs_.dtype == np.dtype(dtype)
assert coord.dtype == coordinates[0].dtype
# Sources' coefficients should be the same dtype as the coordinates
assert eqs.coefs_.dtype == coordinates[0].dtype


@pytest.mark.use_numba
Expand All @@ -381,8 +382,6 @@ def test_jacobian_dtype(region, dtype):
points = tuple(
p.ravel() for p in vd.grid_coordinates(region, shape=(6, 6), extra_coords=-2e3)
)
# Ravel the coordinates
coordinates = tuple(c.ravel() for c in coordinates)
# Initialize and fit the equivalent sources
eqs = EquivalentSources(points=points, dtype=dtype)
# Build jacobian matrix
Expand Down
12 changes: 7 additions & 5 deletions harmonica/tests/test_gradient_boosted_eqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def test_dtype(
dtype,
):
"""
Test dtype argument on EquivalentSources
Test dtype argument on EquivalentSourcesGB
santisoler marked this conversation as resolved.
Show resolved Hide resolved
"""
# Define the points argument for EquivalentSources
points = None
Expand All @@ -314,12 +314,14 @@ def test_dtype(
dtype=dtype,
)
eqs.fit(coordinates, data, weights)
# Make some predictions
# Ensure predictions have the expected dtype
prediction = eqs.predict(coordinates)
# Check data type of created objects
assert prediction.dtype == np.float64
# Locations of sources should be the same dtype as the coordinates
for coord in eqs.points_:
assert coord.dtype == np.dtype(dtype)
assert prediction.dtype == np.dtype(dtype)
assert coord.dtype == coordinates[0].dtype
# Sources' coefficients should be the same dtype as the coordinates
assert eqs.coefs_.dtype == coordinates[0].dtype


@run_only_with_numba
Expand Down