Skip to content

glassbox.models.neighbors._knn

KNeighborsClassifier and KNeighborsRegressor.


BaseKNN

BaseKNN(k=5, metric=EUCLIDEAN, algorithm=BRUTE_FORCE)

Bases: BaseModel

Initialize the BaseKNN estimator.

Parameters:

Name Type Description Default
k int

Number of neighbors to use.

5
metric DistanceMetric

Distance metric to compute distances.

DistanceMetric.EUCLIDEAN
algorithm SearchAlgorithm

Algorithm used to compute the nearest neighbors.

SearchAlgorithm.BRUTE_FORCE
Source code in glassbox/models/neighbors/_knn.py
def __init__(
    self,
    k: int = 5,
    metric: DistanceMetric = DistanceMetric.EUCLIDEAN,
    algorithm: SearchAlgorithm = SearchAlgorithm.BRUTE_FORCE,
) -> None:
    """
    Initialize the BaseKNN estimator.

    Parameters
    ----------
    k : int, default=5
        Number of neighbors to use.
    metric : DistanceMetric, default=DistanceMetric.EUCLIDEAN
        Distance metric to compute distances.
    algorithm : SearchAlgorithm, default=SearchAlgorithm.BRUTE_FORCE
        Algorithm used to compute the nearest neighbors.
    """
    self.k: int = k
    self.metric: DistanceMetric = metric
    self.algorithm: SearchAlgorithm = algorithm
    self.index: BaseIndex | None = None
    self.y_train: np.ndarray | None = None

fit

fit(X, y)

Fits the model to the training data.

Parameters:

Name Type Description Default
X ndarray

Training data of shape (n_samples, n_features).

required
y ndarray

Target values of shape (n_samples,).

required

Returns:

Type Description
Self

The fitted model.

Source code in glassbox/models/neighbors/_knn.py
def fit(self, X: np.ndarray, y: np.ndarray) -> Self:
    """
    Fits the model to the training data.

    Parameters
    ----------
    X : np.ndarray
        Training data of shape (n_samples, n_features).
    y : np.ndarray
        Target values of shape (n_samples,).

    Returns
    -------
    Self
        The fitted model.
    """
    X_arr = np.asarray(X)
    self.y_train = np.asarray(y)

    if self.algorithm == SearchAlgorithm.BRUTE_FORCE:
        self.index = BruteForceIndex(metric=self.metric)
    elif self.algorithm == SearchAlgorithm.KD_TREE:
        self.index = KDTreeIndex(metric=self.metric)
    else:
        raise ValueError(f"Unsupported algorithm: {self.algorithm}")

    self.index.build(X_arr)
    return self

predict

predict(X, **kwargs)

Predicts target values for the given data.

Parameters:

Name Type Description Default
X ndarray

Data to predict on, of shape (n_samples, n_features) or (n_features,).

required
**kwargs Any

Additional keyword arguments.

{}

Returns:

Type Description
ndarray

Predicted target values.

Source code in glassbox/models/neighbors/_knn.py
def predict(self, X: np.ndarray, **kwargs: Any) -> np.ndarray:
    """
    Predicts target values for the given data.

    Parameters
    ----------
    X : np.ndarray
        Data to predict on, of shape (n_samples, n_features) or (n_features,).
    **kwargs : Any
        Additional keyword arguments.

    Returns
    -------
    np.ndarray
        Predicted target values.
    """
    if self.index is None or self.y_train is None:
        raise ValueError("Model must be fitted before calling predict.")

    X_arr = np.asarray(X)
    single_query = X_arr.ndim == 1

    nearest_indices = self.index.query(X_arr, self.k)

    if single_query:
        nearest_y = self.y_train[nearest_indices].reshape(1, -1)
    else:
        nearest_y = self.y_train[nearest_indices]

    preds = self._aggregate(nearest_y)

    if single_query:
        return preds[0]
    return preds

KNeighborsClassifier

KNeighborsClassifier(
    k=5, metric=EUCLIDEAN, algorithm=BRUTE_FORCE
)

Bases: BaseKNN

Source code in glassbox/models/neighbors/_knn.py
def __init__(
    self,
    k: int = 5,
    metric: DistanceMetric = DistanceMetric.EUCLIDEAN,
    algorithm: SearchAlgorithm = SearchAlgorithm.BRUTE_FORCE,
) -> None:
    """
    Initialize the BaseKNN estimator.

    Parameters
    ----------
    k : int, default=5
        Number of neighbors to use.
    metric : DistanceMetric, default=DistanceMetric.EUCLIDEAN
        Distance metric to compute distances.
    algorithm : SearchAlgorithm, default=SearchAlgorithm.BRUTE_FORCE
        Algorithm used to compute the nearest neighbors.
    """
    self.k: int = k
    self.metric: DistanceMetric = metric
    self.algorithm: SearchAlgorithm = algorithm
    self.index: BaseIndex | None = None
    self.y_train: np.ndarray | None = None

KNeighborsRegressor

KNeighborsRegressor(
    k=5, metric=EUCLIDEAN, algorithm=BRUTE_FORCE
)

Bases: BaseKNN

Source code in glassbox/models/neighbors/_knn.py
def __init__(
    self,
    k: int = 5,
    metric: DistanceMetric = DistanceMetric.EUCLIDEAN,
    algorithm: SearchAlgorithm = SearchAlgorithm.BRUTE_FORCE,
) -> None:
    """
    Initialize the BaseKNN estimator.

    Parameters
    ----------
    k : int, default=5
        Number of neighbors to use.
    metric : DistanceMetric, default=DistanceMetric.EUCLIDEAN
        Distance metric to compute distances.
    algorithm : SearchAlgorithm, default=SearchAlgorithm.BRUTE_FORCE
        Algorithm used to compute the nearest neighbors.
    """
    self.k: int = k
    self.metric: DistanceMetric = metric
    self.algorithm: SearchAlgorithm = algorithm
    self.index: BaseIndex | None = None
    self.y_train: np.ndarray | None = None