Source code for causal_falsify.utils.cond_indep

from typing import Optional
import numpy as np
from causallearn.utils.cit import CIT

"""
Multiple implementations of conditional independence tests to test null hypothesis:

    H0: X ⟂ Y | Z

"""


def _validate_inputs(x: np.ndarray, y: np.ndarray, z: np.ndarray) -> None:
    """
    Validate the shapes of input arrays for conditional independence (CI) tests.

    Parameters
    ----------
    x : np.ndarray
        Input array representing variable X. Must be a 2D array with shape (n_samples, 1).
    y : np.ndarray
        Input array representing variable Y. Must be a 2D array with shape (n_samples, 1).
    z : np.ndarray
        Input array representing conditioning variables Z. Must be a 2D array with shape (n_samples, n_features).

    Raises
    ------
    ValueError
        If any of the input arrays do not have the required shape or if the number of samples (rows) do not match.
    """
    if not (x.ndim == 2 and x.shape[1] == 1):
        raise ValueError(
            f"Input x must be a 2D array with a single column (shape: (n_samples, 1)), but has shape {x.shape}."
        )
    if not (y.ndim == 2 and y.shape[1] == 1):
        raise ValueError(
            f"Input y must be a 2D array with a single column (shape: (n_samples, 1)), but has shape {y.shape}."
        )
    if not (z.ndim == 2):
        raise ValueError(
            f"Input z must be a 2D array with shape (n_samples, n_features), but has shape {z.shape}."
        )

    n_samples = x.shape[0]
    if not (y.shape[0] == n_samples == z.shape[0]):
        raise ValueError("All inputs must have the same number of rows.")



[docs]
def kcit_rbf(x: np.ndarray, y: np.ndarray, z: np.ndarray) -> Optional[float]:
    """
    Kernel-based Conditional Independence Test (KCIT) with RBF kernels.

    Parameters
    ----------
    x : np.ndarray
        Input array representing variable X. Must be a 2D array with shape (n_samples, 1).
    y : np.ndarray
        Input array representing variable Y. Must be a 2D array with shape (n_samples, 1).
    z : np.ndarray
        Input array representing conditioning variables Z. Must be a 2D array with shape (n_samples, n_covariates).

    Returns
    -------
    Optional[float]
        p-value of the test if successful; None if an error occurred.
    """
    _validate_inputs(x, y, z)

    data = np.hstack([x, y, z])

    try:
        cit_obj = CIT(
            data,
            method="kci",
            kernelX="Gaussian",
            kernelY="Gaussian",
            kernelZ="Gaussian",
            approx=False,
            use_gp=True,
            est_width="median",
        )
        conditioning_set = list(range(2, data.shape[1]))
        pval = cit_obj(0, 1, conditioning_set)
        assert 0 <= pval <= 1, f"Invalid p-value computed: {pval}"
        return pval

    except AssertionError as e:
        print(f"Assertion error: {e}")
        return None




[docs]
def fisherz(x: np.ndarray, y: np.ndarray, z: np.ndarray) -> Optional[float]:
    """
    Fisher's Z Conditional Independence Test.

    Parameters
    ----------
    x : np.ndarray
        Input array representing variable X. Must be a 2D array with shape (n_samples, 1).
    y : np.ndarray
        Input array representing variable Y. Must be a 2D array with shape (n_samples, 1).
    z : np.ndarray
        Input array representing conditioning variables Z. Must be a 2D array with shape (n_samples, n_covariates).

    Returns
    -------
    Optional[float]
        p-value of the test if successful; None if an error occurred.
    """
    _validate_inputs(x, y, z)

    data = np.hstack([x, y, z])

    try:
        cit_obj = CIT(data, method="fisherz")
        conditioning_set = list(range(2, data.shape[1]))
        pval = cit_obj(0, 1, conditioning_set)
        assert 0 <= pval <= 1, f"Invalid p-value computed: {pval}"
        return pval

    except AssertionError as e:
        print(f"Assertion error: {e}")
        return None