tests

online-ml · Dec 1, 2024 · 1f2e5f2 · 1f2e5f2
1 parent c1d0893
commit 1f2e5f2
Show file tree

Hide file tree

Showing 2 changed files with 53 additions and 52 deletions.
diff --git a/river/linear_model/AUC_SGD.py b/river/linear_model/AUC_SGD.py
@@ -5,54 +5,54 @@
 
 class AUC_SGD:
     """
-        AUC Stochastic Gradient Descent (SGD)
-
-        This class implements an SGD-based optimization method for maximizing the AUC (Area Under the Curve)
-        of a binary classifier assuming a linear regression model.
-
-        Attributes
-        ----------
-        epochs : int
-            Number of training epochs.
-        lr : float
-            Initial learning rate for gradient descent updates.
-        n_mc : int
-            Number of Monte Carlo samples used for estimating gradients.
-        gamma : float
-            Learning rate decay parameter.
-        eps : float
-            Smoothing parameter for numerical stability.
-
-        Methods
-        -------
-
-        getTrain(X_train, y_train):
-            Returns the Prediction to maximize training AUC score.
-        getTest(X_test, y_test):
-            Returns the Prediction to maximize testing AUC score.
-
-        Examples
-        --------
-        >>> from river import linear_model
-        >>> from sklearn.metrics import roc_auc_score
-        >>> from sklearn.datasets import make_classification
-        >>> from sklearn.model_selection import train_test_split
-        >>> from sklearn.linear_model import LogisticRegression
-        >>> X, y = make_classification(n_samples=2000, n_informative=9, n_redundant=0, n_repeated=0, random_state=2)
-        >>> X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=4)
-        >>> base = LogisticRegression().fit(X_train, y_train)
-        >>> X1 = X_train[y_train==1]
-        >>> X0 = X_train[y_train==0]
-        >>> model = linear_model.AUC_SGD()
-        >>> np.random.seed(123)
-        >>> theta = np.random.randn(X_train[0].shape[0])
-        >>> test_auc = model.getTest(X_train, X_test, y_train)
-        >>> train_auc = model.getTrain(X_train, y_train)
-        >>> print(roc_auc_score(y_train, train_auc))
-        0.8899135830932864
-        >>> print(roc_auc_score(y_test, test_auc))
-        0.8849634963496349
-        """
+    AUC Stochastic Gradient Descent (SGD)
+
+    This class implements an SGD-based optimization method for maximizing the AUC (Area Under the Curve)
+    of a binary classifier assuming a linear regression model.
+
+    Attributes
+    ----------
+    epochs : int
+        Number of training epochs.
+    lr : float
+        Initial learning rate for gradient descent updates.
+    n_mc : int
+        Number of Monte Carlo samples used for estimating gradients.
+    gamma : float
+        Learning rate decay parameter.
+    eps : float
+        Smoothing parameter for numerical stability.
+
+    Methods
+    -------
+
+    getTrain(X_train, y_train):
+        Returns the Prediction to maximize training AUC score.
+    getTest(X_test, y_test):
+        Returns the Prediction to maximize testing AUC score.
+
+    Examples
+    --------
+    >>> from river import linear_model
+    >>> from sklearn.metrics import roc_auc_score
+    >>> from sklearn.datasets import make_classification
+    >>> from sklearn.model_selection import train_test_split
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> X, y = make_classification(n_samples=2000, n_informative=9, n_redundant=0, n_repeated=0, random_state=2)
+    >>> X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=4)
+    >>> base = LogisticRegression().fit(X_train, y_train)
+    >>> X1 = X_train[y_train==1]
+    >>> X0 = X_train[y_train==0]
+    >>> model = linear_model.AUC_SGD()
+    >>> np.random.seed(123)
+    >>> theta = np.random.randn(X_train[0].shape[0])
+    >>> test_auc = model.getTest(X_train, X_test, y_train)
+    >>> train_auc = model.getTrain(X_train, y_train)
+    >>> print(f"{round(roc_auc_score(y_train, train_auc) * 100)}" + "%")
+    89%
+    >>> print(f"{round(roc_auc_score(y_test, test_auc) * 100)}" + "%")
+    88%
+    """
 
     def __init__(self, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01):
         super().__init__()
@@ -63,15 +63,14 @@ def __init__(self, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01):
         self.eps = eps
 
     def sigma_eps(self, z, eps):
-        q = - z / eps
+        q = -z / eps
         if abs(q) < 35:
             return 1 / (1 + np.exp(q))
         elif q > 0:
             return 0
         else:
             return 1
 
-
     def stochastic_gradient(self, theta, X1, X0, N=1000, eps=0.01, random_state=1):
         """
         Computes the stochastic gradient of the AUC objective.
@@ -123,7 +122,9 @@ def compute(self, X_train, X_test, y_train):
             # learning rate scheduler
             self.lr = self.lr / (1 + self.gamma)
 
-            theta = theta - self.lr * self.stochastic_gradient(theta, X1, X0, N=self.n_mc, random_state=seed)
+            theta = theta - self.lr * self.stochastic_gradient(
+                theta, X1, X0, N=self.n_mc, random_state=seed
+            )
 
         if X_test is not None:
             return theta @ X_test.T

diff --git a/river/linear_model/__init__.py b/river/linear_model/__init__.py
@@ -4,13 +4,13 @@
 
 from . import base
 from .alma import ALMAClassifier
+from .AUC_SGD import AUC_SGD
 from .bayesian_lin_reg import BayesianLinearRegression
 from .lin_reg import LinearRegression
 from .log_reg import LogisticRegression
 from .pa import PAClassifier, PARegressor
 from .perceptron import Perceptron
 from .softmax import SoftmaxRegression
-from .AUC_SGD import AUC_SGD
 
 __all__ = [
     "base",