Skip to content

glassbox.models.linear_model.logistic

LogisticRegression model.


LogisticRegression

LogisticRegression(
    learning_rate=0.01,
    max_epochs=1000,
    tol=1e-06,
    schedule=CONSTANT,
)

Bases: BaseLinearModel

Logistic regression model for binary classification.

Source code in glassbox/models/linear_model/_base.py
def __init__(
    self,
    learning_rate: float = 0.01,
    max_epochs: int = 1000,
    tol: float = 1e-6,
    schedule: LearningSchedule = LearningSchedule.CONSTANT,
) -> None:
    """
    Initialize shared linear-model hyperparameters and learned coefficients.

    Parameters
    ----------
    learning_rate : float, default=0.01
        Initial learning rate used by the optimizer.
    max_epochs : int, default=1000
        Maximum number of optimization epochs.
    tol : float, default=1e-6
        Convergence tolerance used by stopping criteria.
    schedule : LearningSchedule, default=LearningSchedule.CONSTANT
        Strategy used to update the learning rate across epochs.
    """
    if learning_rate <= 0:
        raise ValueError("learning_rate must be strictly positive")
    if max_epochs <= 0:
        raise ValueError("max_epochs must be strictly positive")
    if tol < 0:
        raise ValueError("tol must be non-negative")

    self.learning_rate = learning_rate
    self.max_epochs = max_epochs
    self.tol = tol
    self.schedule = schedule
    self.weights: np.ndarray = np.array([])
    self.bias: float = 0.0

fit

fit(X, y)

Fit the logistic regression model to training data.

Parameters:

Name Type Description Default
X ndarray

Training feature matrix of shape (n_samples, n_features).

required
y ndarray

Training target vector of shape (n_samples,).

required

Returns:

Type Description
Self

The fitted model instance.

Source code in glassbox/models/linear_model/logistic.py
def fit(self, X: np.ndarray, y: np.ndarray) -> Self:
    """
    Fit the logistic regression model to training data.

    Parameters
    ----------
    X : np.ndarray
        Training feature matrix of shape (n_samples, n_features).
    y : np.ndarray
        Training target vector of shape (n_samples,).

    Returns
    -------
    Self
        The fitted model instance.
    """
    X_arr = np.asarray(X, dtype=float)
    y_arr = np.asarray(y)

    if X_arr.ndim != 2:
        raise ValueError("X must be a 2D array")
    if y_arr.ndim != 1:
        raise ValueError("y must be a 1D array")
    if X_arr.shape[0] != y_arr.shape[0]:
        raise ValueError("X and y must contain the same number of samples")
    if X_arr.shape[0] == 0:
        raise ValueError("X and y cannot be empty")

    classes = np.unique(y_arr)
    if not np.all(np.isin(classes, np.array([0, 1]))):
        raise ValueError(f"y must contain binary labels encoded as 0 and 1, but found: {classes.tolist()}")

    y_bin = y_arr.astype(float)

    n_samples, n_features = X_arr.shape
    self.weights = np.zeros(n_features, dtype=float)
    self.bias = 0.0

    previous_loss = np.inf
    for epoch in range(self.max_epochs):
        learning_rate = self._update_learning_rate(epoch)

        logits = X_arr @ self.weights + self.bias
        probabilities = self._sigmoid(logits)
        errors = probabilities - y_bin

        gradient_w = (X_arr.T @ errors) / n_samples
        gradient_b = float(np.mean(errors))

        self.weights -= learning_rate * gradient_w
        self.bias -= learning_rate * gradient_b

        probabilities_clipped = np.clip(probabilities, 1e-15, 1.0 - 1e-15)
        current_loss = float(
            -np.mean(
                y_bin * np.log(probabilities_clipped)
                + (1.0 - y_bin) * np.log(1.0 - probabilities_clipped)
            )
        )
        if abs(previous_loss - current_loss) <= self.tol:
            break
        previous_loss = current_loss

    return self

predict

predict(X, **kwargs)

Predict class labels for input samples.

Parameters:

Name Type Description Default
X ndarray

Input feature matrix of shape (n_samples, n_features).

required
**kwargs Any

Additional keyword arguments for prediction.

{}

Returns:

Type Description
ndarray

Predicted class labels of shape (n_samples,).

Source code in glassbox/models/linear_model/logistic.py
def predict(self, X: np.ndarray, **kwargs: Any) -> np.ndarray:
    """
    Predict class labels for input samples.

    Parameters
    ----------
    X : np.ndarray
        Input feature matrix of shape (n_samples, n_features).
    **kwargs : Any
        Additional keyword arguments for prediction.

    Returns
    -------
    np.ndarray
        Predicted class labels of shape (n_samples,).
    """
    threshold = kwargs.get("threshold", 0.5)
    if not isinstance(threshold, (int, float)):
        raise ValueError("threshold must be a numeric value")
    if threshold < 0.0 or threshold > 1.0:
        raise ValueError("threshold must be in the [0.0, 1.0] interval")

    probabilities = self.predict_proba(X)
    return (probabilities >= float(threshold)).astype(int)

predict_proba

predict_proba(X)

Predict class probabilities for input samples.

Parameters:

Name Type Description Default
X ndarray

Input feature matrix of shape (n_samples, n_features).

required

Returns:

Type Description
ndarray

Predicted probabilities of shape (n_samples,).

Source code in glassbox/models/linear_model/logistic.py
def predict_proba(self, X: np.ndarray) -> np.ndarray:
    """
    Predict class probabilities for input samples.

    Parameters
    ----------
    X : np.ndarray
        Input feature matrix of shape (n_samples, n_features).

    Returns
    -------
    np.ndarray
        Predicted probabilities of shape (n_samples,).
    """
    if self.weights.size == 0:
        raise RuntimeError("Model is not fitted yet.")

    X_arr = np.asarray(X, dtype=float)
    if X_arr.ndim != 2:
        raise ValueError("X must be a 2D array")
    if X_arr.shape[1] != self.weights.shape[0]:
        raise ValueError("X must have the same number of features used during fit")

    logits = X_arr @ self.weights + self.bias
    return self._sigmoid(logits)