Skip to content

glassbox.frame.io

Functions for reading and writing CSV files.


read_csv

read_csv(filepath)

Load a CSV file into a Dataset.

Parameters:

Name Type Description Default
filepath str

Path to the CSV file.

required

Returns:

Type Description
Dataset

Loaded dataset object.

Source code in glassbox/frame/io.py
def read_csv(filepath: str) -> Dataset:
    """
    Load a CSV file into a Dataset.

    Parameters
    ----------
    filepath : str
        Path to the CSV file.

    Returns
    -------
    Dataset
        Loaded dataset object.
    """
    with open(filepath, newline="", encoding="utf-8") as data_source:
        reader = csv.reader(data_source)
        # Filter empty rows
        entries = [row for row in reader if row]

    if not entries:
        raise ValueError(f"CSV file is empty: {filepath}")

    # Parse header
    columns = [col.strip() for col in entries[0]]

    # Parse rows
    rows = []
    for _row in entries[1:]:
        cells = [cell.strip() for cell in _row]
        rows.append(cells)

    if not rows:
        # Header-only file — return empty dataset with float64 dtype
        data = np.empty((0, len(columns)), dtype=object)
        return Dataset(data, columns)

    # Build a column-at-a-time array, trying float conversion per column
    n_rows = len(rows)
    n_cols = len(columns)
    data = np.empty((n_rows, n_cols), dtype=object)

    for r, row in enumerate(rows):
        for c, cell in enumerate(row):
            if cell == "" or cell.upper() == "NA":
                data[r, c] = np.nan
            else:
                data[r, c] = cell

    # Try to cast each column to float; leave as object if it fails
    float_data = np.empty((n_rows, n_cols), dtype=float)
    col_is_float = np.ones(n_cols, dtype=bool)

    for c in range(n_cols):
        try:
            float_data[:, c] = data[:, c].astype(float)
        except (ValueError, TypeError):
            col_is_float[c] = False

    if col_is_float.all():
        return Dataset(float_data, columns)

    # Mixed types — keep object array as-is; promote pure-float cols
    for c in range(n_cols):
        if col_is_float[c]:
            data[:, c] = float_data[:, c]

    return Dataset(data, columns)

to_csv

to_csv(data, columns, filepath)

Save a Dataset to a CSV file on disk.

Parameters:

Name Type Description Default
data Dataset

The dataset to save.

required
columns List[str]

List of column names to save.

required
filepath str

Destination path for the CSV file.

required

Returns:

Type Description
None
Source code in glassbox/frame/io.py
def to_csv(data: Dataset, columns: List[str], filepath: str) -> None:
    """
    Save a Dataset to a CSV file on disk.

    Parameters
    ----------
    data : Dataset
        The dataset to save.
    columns : List[str]
        List of column names to save.
    filepath : str
        Destination path for the CSV file.

    Returns
    -------
    None
    """
    subset = data.get_columns(columns)  # shape (n_rows, len(columns))

    with open(filepath, "w", encoding="utf-8", newline="") as fh:
        # Header
        fh.write(",".join(columns) + "\n")

        # Rows
        for row in subset.data:
            fh.write(",".join(_format_cell(v) for v in row) + "\n")