Skip to content

glassbox.models.ensemble._base

Abstract BaseRandomForest with bootstrap sampling and feature subsets.


BaseRandomForest

BaseRandomForest(
    n_estimators=100, max_depth=100, min_samples_split=2
)

Bases: BaseModel

Abstract base class for all random forest models.

Initialize the random forest model.

Parameters:

Name Type Description Default
n_estimators int

The number of trees in the forest.

100
max_depth int

Maximum depth of individual trees.

100
min_samples_split int

Minimum number of samples required to split an internal node.

2
Source code in glassbox/models/ensemble/_base.py
def __init__(
    self, n_estimators: int = 100, max_depth: int = 100, min_samples_split: int = 2
) -> None:
    """
    Initialize the random forest model.

    Parameters
    ----------
    n_estimators : int, default=100
        The number of trees in the forest.
    max_depth : int, default=100
        Maximum depth of individual trees.
    min_samples_split : int, default=2
        Minimum number of samples required to split an internal node.
    """
    self.n_estimators = n_estimators
    self.max_depth = max_depth
    self.min_samples_split = min_samples_split
    self.trees: List[BaseTree] = []

fit

fit(X, y)

Fits the ensemble model to the training data.

Parameters:

Name Type Description Default
X ndarray

Training data of shape (n_samples, n_features).

required
y ndarray

Target values of shape (n_samples,).

required

Returns:

Type Description
Self

The fitted model.

Source code in glassbox/models/ensemble/_base.py
def fit(self, X: np.ndarray, y: np.ndarray) -> Self:
    """
    Fits the ensemble model to the training data.

    Parameters
    ----------
    X : np.ndarray
        Training data of shape (n_samples, n_features).
    y : np.ndarray
        Target values of shape (n_samples,).

    Returns
    -------
    Self
        The fitted model.
    """
    self.trees = []
    n_samples, n_features = X.shape

    for _ in range(self.n_estimators):
        tree = self._create_tree()
        tree.max_depth = self.max_depth
        tree.min_samples_split = self.min_samples_split

        X_sample, y_sample = self._bootstrap_sample(X, y)
        feature_idx = self._get_feature_subset(n_features)

        # Keep track of which features this specific tree was trained on
        tree._feature_idx = feature_idx

        X_subset = X_sample[:, feature_idx]
        tree.fit(X_subset, y_sample)

        self.trees.append(tree)

    return self

predict

predict(X, **kwargs)

Predicts target values for the given data using the ensemble.

Parameters:

Name Type Description Default
X ndarray

Data to predict on, of shape (n_samples, n_features).

required
**kwargs Any

Additional keyword arguments.

{}

Returns:

Type Description
ndarray

Predicted target values.

Source code in glassbox/models/ensemble/_base.py
def predict(self, X: np.ndarray, **kwargs: Any) -> np.ndarray:
    """
    Predicts target values for the given data using the ensemble.

    Parameters
    ----------
    X : np.ndarray
        Data to predict on, of shape (n_samples, n_features).
    **kwargs : Any
        Additional keyword arguments.

    Returns
    -------
    np.ndarray
        Predicted target values.
    """
    if not self.trees:
        raise RuntimeError("Model is not fitted yet.")

    tree_preds = []
    for tree in self.trees:
        X_subset = X[:, tree._feature_idx]
        tree_preds.append(tree.predict(X_subset))

    # Shape of tree_preds: (n_estimators, n_samples)
    # Transpose to (n_samples, n_estimators) for sample-wise aggregation
    tree_preds = np.array(tree_preds).T

    return self._aggregate(tree_preds)