Skip to content

Commit

Permalink
tests
Browse files Browse the repository at this point in the history
  • Loading branch information
W0lfgunbl00d committed Dec 1, 2024
1 parent c1d0893 commit 1f2e5f2
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 52 deletions.
103 changes: 52 additions & 51 deletions river/linear_model/AUC_SGD.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,54 +5,54 @@

class AUC_SGD:
"""
AUC Stochastic Gradient Descent (SGD)
This class implements an SGD-based optimization method for maximizing the AUC (Area Under the Curve)
of a binary classifier assuming a linear regression model.
Attributes
----------
epochs : int
Number of training epochs.
lr : float
Initial learning rate for gradient descent updates.
n_mc : int
Number of Monte Carlo samples used for estimating gradients.
gamma : float
Learning rate decay parameter.
eps : float
Smoothing parameter for numerical stability.
Methods
-------
getTrain(X_train, y_train):
Returns the Prediction to maximize training AUC score.
getTest(X_test, y_test):
Returns the Prediction to maximize testing AUC score.
Examples
--------
>>> from river import linear_model
>>> from sklearn.metrics import roc_auc_score
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import train_test_split
>>> from sklearn.linear_model import LogisticRegression
>>> X, y = make_classification(n_samples=2000, n_informative=9, n_redundant=0, n_repeated=0, random_state=2)
>>> X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=4)
>>> base = LogisticRegression().fit(X_train, y_train)
>>> X1 = X_train[y_train==1]
>>> X0 = X_train[y_train==0]
>>> model = linear_model.AUC_SGD()
>>> np.random.seed(123)
>>> theta = np.random.randn(X_train[0].shape[0])
>>> test_auc = model.getTest(X_train, X_test, y_train)
>>> train_auc = model.getTrain(X_train, y_train)
>>> print(roc_auc_score(y_train, train_auc))
0.8899135830932864
>>> print(roc_auc_score(y_test, test_auc))
0.8849634963496349
"""
AUC Stochastic Gradient Descent (SGD)
This class implements an SGD-based optimization method for maximizing the AUC (Area Under the Curve)
of a binary classifier assuming a linear regression model.
Attributes
----------
epochs : int
Number of training epochs.
lr : float
Initial learning rate for gradient descent updates.
n_mc : int
Number of Monte Carlo samples used for estimating gradients.
gamma : float
Learning rate decay parameter.
eps : float
Smoothing parameter for numerical stability.
Methods
-------
getTrain(X_train, y_train):
Returns the Prediction to maximize training AUC score.
getTest(X_test, y_test):
Returns the Prediction to maximize testing AUC score.
Examples
--------
>>> from river import linear_model
>>> from sklearn.metrics import roc_auc_score
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import train_test_split
>>> from sklearn.linear_model import LogisticRegression
>>> X, y = make_classification(n_samples=2000, n_informative=9, n_redundant=0, n_repeated=0, random_state=2)
>>> X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=4)
>>> base = LogisticRegression().fit(X_train, y_train)
>>> X1 = X_train[y_train==1]
>>> X0 = X_train[y_train==0]
>>> model = linear_model.AUC_SGD()
>>> np.random.seed(123)
>>> theta = np.random.randn(X_train[0].shape[0])
>>> test_auc = model.getTest(X_train, X_test, y_train)
>>> train_auc = model.getTrain(X_train, y_train)
>>> print(f"{round(roc_auc_score(y_train, train_auc) * 100)}" + "%")
89%
>>> print(f"{round(roc_auc_score(y_test, test_auc) * 100)}" + "%")
88%
"""

def __init__(self, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01):
super().__init__()
Expand All @@ -63,15 +63,14 @@ def __init__(self, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01):
self.eps = eps

def sigma_eps(self, z, eps):
q = - z / eps
q = -z / eps
if abs(q) < 35:
return 1 / (1 + np.exp(q))
elif q > 0:
return 0
else:
return 1


def stochastic_gradient(self, theta, X1, X0, N=1000, eps=0.01, random_state=1):
"""
Computes the stochastic gradient of the AUC objective.
Expand Down Expand Up @@ -123,7 +122,9 @@ def compute(self, X_train, X_test, y_train):
# learning rate scheduler
self.lr = self.lr / (1 + self.gamma)

theta = theta - self.lr * self.stochastic_gradient(theta, X1, X0, N=self.n_mc, random_state=seed)
theta = theta - self.lr * self.stochastic_gradient(
theta, X1, X0, N=self.n_mc, random_state=seed
)

if X_test is not None:
return theta @ X_test.T
Expand Down
2 changes: 1 addition & 1 deletion river/linear_model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

from . import base
from .alma import ALMAClassifier
from .AUC_SGD import AUC_SGD
from .bayesian_lin_reg import BayesianLinearRegression
from .lin_reg import LinearRegression
from .log_reg import LogisticRegression
from .pa import PAClassifier, PARegressor
from .perceptron import Perceptron
from .softmax import SoftmaxRegression
from .AUC_SGD import AUC_SGD

__all__ = [
"base",
Expand Down

0 comments on commit 1f2e5f2

Please sign in to comment.