From b2c6a1793afb241984e6a63b679d10018cce9571 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Tue, 18 Jun 2024 13:41:11 -0700 Subject: [PATCH 1/6] Change behaviour of dtype on equivalent sources Apply `dtype` argument only to the jacobian matrix and the predictions for equivalent sources. Don't use the `dtype` for casting coordinates and location of the sources. --- harmonica/_equivalent_sources/cartesian.py | 17 ++++------------- .../_equivalent_sources/gradient_boosted.py | 13 +++---------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/harmonica/_equivalent_sources/cartesian.py b/harmonica/_equivalent_sources/cartesian.py index ee63e5135..1b47a10ec 100644 --- a/harmonica/_equivalent_sources/cartesian.py +++ b/harmonica/_equivalent_sources/cartesian.py @@ -129,7 +129,7 @@ class EquivalentSources(vdb.BaseGridder): If True any predictions and Jacobian building is carried out in parallel through Numba's ``jit.prange``, reducing the computation time. If False, these tasks will be run on a single CPU. Default to True. - dtype : data-type + dtype : dtype, optional The desired data-type for the predictions and the Jacobian matrix. Default to ``"float64"``. @@ -215,21 +215,14 @@ def fit(self, coordinates, data, weights=None): Returns this estimator instance for chaining operations. """ coordinates, data, weights = vdb.check_fit_input(coordinates, data, weights) - coordinates, data, weights = cast_fit_input( - coordinates, data, weights, self.dtype - ) # Capture the data region to use as a default when gridding. self.region_ = vd.get_region(coordinates[:2]) coordinates = vdb.n_1d_arrays(coordinates, 3) if self.points is None: - self.points_ = tuple( - p.astype(self.dtype) for p in self._build_points(coordinates) - ) + self.points_ = self._build_points(coordinates) else: self.depth_ = None # set depth_ to None so we don't leave it unset - self.points_ = tuple( - p.astype(self.dtype) for p in vdb.n_1d_arrays(self.points, 3) - ) + self.points_ = vdb.n_1d_arrays(self.points, 3) jacobian = self.jacobian(coordinates, self.points_) self.coefs_ = vdb.least_squares(jacobian, data, weights, self.damping) return self @@ -326,9 +319,7 @@ def predict(self, coordinates): check_is_fitted(self, ["coefs_"]) shape = np.broadcast(*coordinates[:3]).shape size = np.broadcast(*coordinates[:3]).size - coordinates = tuple( - np.atleast_1d(i.astype(self.dtype)).ravel() for i in coordinates[:3] - ) + coordinates = tuple(np.atleast_1d(i).ravel() for i in coordinates[:3]) data = np.zeros(size, dtype=self.dtype) self._predict_kernel[self.parallel]( coordinates, self.points_, self.coefs_, data, self.greens_function diff --git a/harmonica/_equivalent_sources/gradient_boosted.py b/harmonica/_equivalent_sources/gradient_boosted.py index 85be86cb2..7fc3f8450 100644 --- a/harmonica/_equivalent_sources/gradient_boosted.py +++ b/harmonica/_equivalent_sources/gradient_boosted.py @@ -73,7 +73,7 @@ class EquivalentSourcesGB(EquivalentSources): If True any predictions and Jacobian building is carried out in parallel through Numba's ``jit.prange``, reducing the computation time. If False, these tasks will be run on a single CPU. Default to True. - dtype : data-type + dtype : dtype, optional The desired data-type for the predictions and the Jacobian matrix. Default to ``"float64"``. @@ -205,9 +205,6 @@ def fit(self, coordinates, data, weights=None): Returns this estimator instance for chaining operations. """ coordinates, data, weights = vdb.check_fit_input(coordinates, data, weights) - coordinates, data, weights = cast_fit_input( - coordinates, data, weights, self.dtype - ) # Capture the data region to use as a default when gridding. self.region_ = get_region(coordinates[:2]) # Ravel coordinates, data and weights to 1d-arrays @@ -217,13 +214,9 @@ def fit(self, coordinates, data, weights=None): weights = weights.ravel() # Build point sources if self.points is None: - self.points_ = tuple( - p.astype(self.dtype) for p in self._build_points(coordinates) - ) + self.points_ = self._build_points(coordinates) else: - self.points_ = tuple( - p.astype(self.dtype) for p in vdb.n_1d_arrays(self.points, 3) - ) + self.points_ = vdb.n_1d_arrays(self.points, 3) # Initialize coefficients self.coefs_ = np.zeros_like(self.points_[0]) # Fit coefficients through gradient boosting From 39ba5c4ddbc886dee53c5136a03991ab63199497 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Tue, 18 Jun 2024 13:50:25 -0700 Subject: [PATCH 2/6] Remove unused imports --- harmonica/_equivalent_sources/cartesian.py | 1 - harmonica/_equivalent_sources/gradient_boosted.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/harmonica/_equivalent_sources/cartesian.py b/harmonica/_equivalent_sources/cartesian.py index 1b47a10ec..99de70793 100644 --- a/harmonica/_equivalent_sources/cartesian.py +++ b/harmonica/_equivalent_sources/cartesian.py @@ -19,7 +19,6 @@ from .._forward.utils import distance_cartesian from .utils import ( - cast_fit_input, jacobian_numba_parallel, jacobian_numba_serial, pop_extra_coords, diff --git a/harmonica/_equivalent_sources/gradient_boosted.py b/harmonica/_equivalent_sources/gradient_boosted.py index 7fc3f8450..3d349f185 100644 --- a/harmonica/_equivalent_sources/gradient_boosted.py +++ b/harmonica/_equivalent_sources/gradient_boosted.py @@ -15,7 +15,7 @@ from verde import get_region, rolling_window from .cartesian import EquivalentSources -from .utils import cast_fit_input, predict_numba_parallel +from .utils import predict_numba_parallel class EquivalentSourcesGB(EquivalentSources): From ebc2eb88a65102c205a324006555c71c000fb04c Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Wed, 19 Jun 2024 10:17:24 -0700 Subject: [PATCH 3/6] Update tests for dtype in eqs --- harmonica/tests/test_eq_sources_cartesian.py | 29 ++++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/harmonica/tests/test_eq_sources_cartesian.py b/harmonica/tests/test_eq_sources_cartesian.py index 33f008d46..4570ac846 100644 --- a/harmonica/tests/test_eq_sources_cartesian.py +++ b/harmonica/tests/test_eq_sources_cartesian.py @@ -343,28 +343,29 @@ def test_dtype( dtype, ): """ - Test dtype argument on EquivalentSources + Test if predictions have the dtype passed as argument. """ # Define the points argument for EquivalentSources - points = None - if custom_points: - points = vd.grid_coordinates(region, spacing=300, extra_coords=-2e3) - # Define the points argument for EquivalentSources.fit() - if weights_none: - weights = None + points = ( + vd.grid_coordinates(region, spacing=300, extra_coords=-2e3) + if custom_points + else None + ) + # Define the weights argument for EquivalentSources.fit() + weights = weights if not weights_none else None # Initialize and fit the equivalent sources eqs = EquivalentSources( damping=damping, points=points, block_size=block_size, dtype=dtype ) eqs.fit(coordinates, data, weights) - # Make some predictions + # Ensure predictions have the expected dtype prediction = eqs.predict(coordinates) - # Check data type of created objects - for coord in eqs.points_: - assert coord.dtype == np.dtype(dtype) assert prediction.dtype == np.dtype(dtype) - # Check the data type of the source coefficients - # assert eqs.coefs_.dtype == np.dtype(dtype) + # Locations of sources should be the same dtype as the coordinates + for coord in eqs.points_: + assert coord.dtype == coordinates[0].dtype + # Sources' coefficients should be the same dtype as the coordinates + assert eqs.coefs_.dtype == coordinates[0].dtype @pytest.mark.use_numba @@ -381,8 +382,6 @@ def test_jacobian_dtype(region, dtype): points = tuple( p.ravel() for p in vd.grid_coordinates(region, shape=(6, 6), extra_coords=-2e3) ) - # Ravel the coordinates - coordinates = tuple(c.ravel() for c in coordinates) # Initialize and fit the equivalent sources eqs = EquivalentSources(points=points, dtype=dtype) # Build jacobian matrix From e5170ebf15c76865d9486fb6c6bbb6fd26f94ad2 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Tue, 25 Jun 2024 12:18:01 -0700 Subject: [PATCH 4/6] Use dtype only to build the jacobian matrix Update code, docstrings and tests. --- harmonica/_equivalent_sources/cartesian.py | 4 ++-- harmonica/_equivalent_sources/gradient_boosted.py | 2 +- harmonica/tests/test_eq_sources_cartesian.py | 4 ++-- harmonica/tests/test_gradient_boosted_eqs.py | 12 +++++++----- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/harmonica/_equivalent_sources/cartesian.py b/harmonica/_equivalent_sources/cartesian.py index 99de70793..724123fbb 100644 --- a/harmonica/_equivalent_sources/cartesian.py +++ b/harmonica/_equivalent_sources/cartesian.py @@ -129,7 +129,7 @@ class EquivalentSources(vdb.BaseGridder): parallel through Numba's ``jit.prange``, reducing the computation time. If False, these tasks will be run on a single CPU. Default to True. dtype : dtype, optional - The desired data-type for the predictions and the Jacobian matrix. + The desired data-type for the Jacobian matrix. Default to ``"float64"``. Attributes @@ -319,7 +319,7 @@ def predict(self, coordinates): shape = np.broadcast(*coordinates[:3]).shape size = np.broadcast(*coordinates[:3]).size coordinates = tuple(np.atleast_1d(i).ravel() for i in coordinates[:3]) - data = np.zeros(size, dtype=self.dtype) + data = np.zeros(size) self._predict_kernel[self.parallel]( coordinates, self.points_, self.coefs_, data, self.greens_function ) diff --git a/harmonica/_equivalent_sources/gradient_boosted.py b/harmonica/_equivalent_sources/gradient_boosted.py index 3d349f185..e1669964e 100644 --- a/harmonica/_equivalent_sources/gradient_boosted.py +++ b/harmonica/_equivalent_sources/gradient_boosted.py @@ -74,7 +74,7 @@ class EquivalentSourcesGB(EquivalentSources): parallel through Numba's ``jit.prange``, reducing the computation time. If False, these tasks will be run on a single CPU. Default to True. dtype : dtype, optional - The desired data-type for the predictions and the Jacobian matrix. + The desired data-type for the Jacobian matrix. Default to ``"float64"``. Attributes diff --git a/harmonica/tests/test_eq_sources_cartesian.py b/harmonica/tests/test_eq_sources_cartesian.py index 4570ac846..e061366b6 100644 --- a/harmonica/tests/test_eq_sources_cartesian.py +++ b/harmonica/tests/test_eq_sources_cartesian.py @@ -343,7 +343,7 @@ def test_dtype( dtype, ): """ - Test if predictions have the dtype passed as argument. + Test if predictions have the default dtype. """ # Define the points argument for EquivalentSources points = ( @@ -360,7 +360,7 @@ def test_dtype( eqs.fit(coordinates, data, weights) # Ensure predictions have the expected dtype prediction = eqs.predict(coordinates) - assert prediction.dtype == np.dtype(dtype) + assert prediction.dtype == np.float64 # Locations of sources should be the same dtype as the coordinates for coord in eqs.points_: assert coord.dtype == coordinates[0].dtype diff --git a/harmonica/tests/test_gradient_boosted_eqs.py b/harmonica/tests/test_gradient_boosted_eqs.py index 9492b1f1a..0450199b2 100644 --- a/harmonica/tests/test_gradient_boosted_eqs.py +++ b/harmonica/tests/test_gradient_boosted_eqs.py @@ -296,7 +296,7 @@ def test_dtype( dtype, ): """ - Test dtype argument on EquivalentSources + Test dtype argument on EquivalentSourcesGB """ # Define the points argument for EquivalentSources points = None @@ -314,12 +314,14 @@ def test_dtype( dtype=dtype, ) eqs.fit(coordinates, data, weights) - # Make some predictions + # Ensure predictions have the expected dtype prediction = eqs.predict(coordinates) - # Check data type of created objects + assert prediction.dtype == np.float64 + # Locations of sources should be the same dtype as the coordinates for coord in eqs.points_: - assert coord.dtype == np.dtype(dtype) - assert prediction.dtype == np.dtype(dtype) + assert coord.dtype == coordinates[0].dtype + # Sources' coefficients should be the same dtype as the coordinates + assert eqs.coefs_.dtype == coordinates[0].dtype @run_only_with_numba From 993d3e317426a01542cb678f4a6179e6130a8d34 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Fri, 28 Jun 2024 08:16:25 -0700 Subject: [PATCH 5/6] Explicit dtype of `data` array when predicting --- harmonica/_equivalent_sources/cartesian.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonica/_equivalent_sources/cartesian.py b/harmonica/_equivalent_sources/cartesian.py index 724123fbb..d160d2a03 100644 --- a/harmonica/_equivalent_sources/cartesian.py +++ b/harmonica/_equivalent_sources/cartesian.py @@ -319,7 +319,7 @@ def predict(self, coordinates): shape = np.broadcast(*coordinates[:3]).shape size = np.broadcast(*coordinates[:3]).size coordinates = tuple(np.atleast_1d(i).ravel() for i in coordinates[:3]) - data = np.zeros(size) + data = np.zeros(size, dtype=np.float64) self._predict_kernel[self.parallel]( coordinates, self.points_, self.coefs_, data, self.greens_function ) From 91b3f231e027ca66ce47551e5d56ad280bff7287 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Fri, 28 Jun 2024 10:11:19 -0700 Subject: [PATCH 6/6] Update docstring for test function --- harmonica/tests/test_gradient_boosted_eqs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonica/tests/test_gradient_boosted_eqs.py b/harmonica/tests/test_gradient_boosted_eqs.py index 883659fc8..157d9b036 100644 --- a/harmonica/tests/test_gradient_boosted_eqs.py +++ b/harmonica/tests/test_gradient_boosted_eqs.py @@ -296,7 +296,7 @@ def test_dtype( dtype, ): """ - Test dtype argument on EquivalentSourcesGB + Test if predictions have the default dtype. """ # Define the points argument for EquivalentSources points = None