Skip to content

glassbox.models.gaussian_nb

Gaussian Naive Bayes models.


GaussianNB

GaussianNB(epsilon=1e-09)

Bases: BaseModel

Gaussian Naive Bayes classifier.

A probabilistic classifier based on Bayes' theorem with the assumption that features follow a Gaussian (normal) distribution within each class.

Parameters:

Name Type Description Default
epsilon float

Small constant to avoid division by zero in variance calculations.

1e-9

Attributes:

Name Type Description
epsilon float

Small constant to avoid division by zero.

classes ndarray

Unique class labels, shape (n_classes,).

class_priors dict

Prior probability for each class.

class_means dict

Mean of each feature per class.

class_variances dict

Variance of each feature per class.

Initialize the Gaussian Naive Bayes classifier.

Parameters:

Name Type Description Default
epsilon float

Small constant to avoid division by zero in variance calculations.

1e-9
Source code in glassbox/models/gaussian_nb/gaussian_nb.py
def __init__(self, epsilon: float = 1e-9) -> None:
    """
    Initialize the Gaussian Naive Bayes classifier.

    Parameters
    ----------
    epsilon : float, default=1e-9
        Small constant to avoid division by zero in variance calculations.
    """
    self.epsilon: float = epsilon
    self.classes: np.ndarray = np.array([])
    self.class_priors: dict = {}
    self.class_means: dict = {}
    self.class_variances: dict = {}

fit

fit(X, y)

Fit the Gaussian Naive Bayes model to training data.

Calculates the mean, variance, and prior probability for each feature in each class.

Parameters:

Name Type Description Default
X ndarray

Training data of shape (n_samples, n_features).

required
y ndarray

Target values of shape (n_samples,).

required

Returns:

Type Description
Self

The fitted model.

Raises:

Type Description
ValueError

If X and y have incompatible dimensions.

Source code in glassbox/models/gaussian_nb/gaussian_nb.py
def fit(self, X: np.ndarray, y: np.ndarray) -> Self:
    """
    Fit the Gaussian Naive Bayes model to training data.

    Calculates the mean, variance, and prior probability for each feature
    in each class.

    Parameters
    ----------
    X : np.ndarray
        Training data of shape (n_samples, n_features).
    y : np.ndarray
        Target values of shape (n_samples,).

    Returns
    -------
    Self
        The fitted model.

    Raises
    ------
    ValueError
        If X and y have incompatible dimensions.
    """
    if X.shape[0] != y.shape[0]:
        raise ValueError(
            f"X and y must have the same number of samples, "
            f"got {X.shape[0]} and {y.shape[0]}"
        )

    self.classes = np.unique(y)

    for cls in self.classes:
        X_cls = X[y == cls]
        self.class_means[cls] = np.mean(X_cls, axis=0)
        self.class_variances[cls] = np.var(X_cls, axis=0)
        self.class_priors[cls] = X_cls.shape[0] / X.shape[0]

    return self

predict

predict(X, **kwargs)

Predict class labels for samples in X.

Parameters:

Name Type Description Default
X ndarray

Data to predict on, of shape (n_samples, n_features).

required
**kwargs Any

Additional keyword arguments (unused).

{}

Returns:

Type Description
ndarray

Predicted class labels of shape (n_samples,).

Raises:

Type Description
ValueError

If model has not been fitted yet.

Source code in glassbox/models/gaussian_nb/gaussian_nb.py
def predict(self, X: np.ndarray, **kwargs: Any) -> np.ndarray:
    """
    Predict class labels for samples in X.

    Parameters
    ----------
    X : np.ndarray
        Data to predict on, of shape (n_samples, n_features).
    **kwargs : Any
        Additional keyword arguments (unused).

    Returns
    -------
    np.ndarray
        Predicted class labels of shape (n_samples,).

    Raises
    ------
    ValueError
        If model has not been fitted yet.
    """
    if len(self.classes) == 0:
        raise ValueError("Model has not been fitted yet")

    probabilities = self.predict_proba(X)
    class_indices = np.argmax(probabilities, axis=1)
    return self.classes[class_indices]

predict_proba

predict_proba(X)

Predict class probabilities for samples in X.

Parameters:

Name Type Description Default
X ndarray

Data to predict on, of shape (n_samples, n_features).

required

Returns:

Type Description
ndarray

Predicted class probabilities of shape (n_samples, n_classes). Each row sums to 1.0.

Raises:

Type Description
ValueError

If model has not been fitted yet.

Source code in glassbox/models/gaussian_nb/gaussian_nb.py
def predict_proba(self, X: np.ndarray) -> np.ndarray:
    """
    Predict class probabilities for samples in X.

    Parameters
    ----------
    X : np.ndarray
        Data to predict on, of shape (n_samples, n_features).

    Returns
    -------
    np.ndarray
        Predicted class probabilities of shape (n_samples, n_classes).
        Each row sums to 1.0.

    Raises
    ------
    ValueError
        If model has not been fitted yet.
    """
    if len(self.classes) == 0:
        raise ValueError("Model has not been fitted yet")

    n_samples = X.shape[0]
    n_classes = len(self.classes)
    log_posteriors = np.zeros((n_samples, n_classes))

    for class_idx, cls in enumerate(self.classes):
        log_prior = np.log(self.class_priors[cls])
        pdf = self._calculate_pdf(class_idx, X)
        log_likelihood = np.sum(np.log(pdf), axis=1)
        log_posteriors[:, class_idx] = log_prior + log_likelihood

    # Convert from log probabilities to probabilities using softmax
    # Subtract max for numerical stability
    max_log_posteriors = np.max(log_posteriors, axis=1, keepdims=True)
    log_posteriors_stable = log_posteriors - max_log_posteriors
    probabilities = np.exp(log_posteriors_stable)
    probabilities = probabilities / np.sum(probabilities, axis=1, keepdims=True)

    return probabilities