Source code for causal_falsify.utils.cond_indep

from typing import Optional
import numpy as np
from causallearn.utils.cit import CIT

"""
Multiple implementations of conditional independence tests to test null hypothesis:

    H0: X ⟂ Y | Z

"""


def _validate_inputs(x: np.ndarray, y: np.ndarray, z: np.ndarray) -> None:
    """
    Validate the shapes of input arrays for conditional independence (CI) tests.

    Parameters
    ----------
    x : np.ndarray
        Input array representing variable X. Must be a 2D array with shape (n_samples, 1).
    y : np.ndarray
        Input array representing variable Y. Must be a 2D array with shape (n_samples, 1).
    z : np.ndarray
        Input array representing conditioning variables Z. Must be a 2D array with shape (n_samples, n_features).

    Raises
    ------
    ValueError
        If any of the input arrays do not have the required shape or if the number of samples (rows) do not match.
    """
    if not (x.ndim == 2 and x.shape[1] == 1):
        raise ValueError(
            f"Input x must be a 2D array with a single column (shape: (n_samples, 1)), but has shape {x.shape}."
        )
    if not (y.ndim == 2 and y.shape[1] == 1):
        raise ValueError(
            f"Input y must be a 2D array with a single column (shape: (n_samples, 1)), but has shape {y.shape}."
        )
    if not (z.ndim == 2):
        raise ValueError(
            f"Input z must be a 2D array with shape (n_samples, n_features), but has shape {z.shape}."
        )

    n_samples = x.shape[0]
    if not (y.shape[0] == n_samples == z.shape[0]):
        raise ValueError("All inputs must have the same number of rows.")


[docs] def kcit_rbf(x: np.ndarray, y: np.ndarray, z: np.ndarray) -> Optional[float]: """ Kernel-based Conditional Independence Test (KCIT) with RBF kernels. Parameters ---------- x : np.ndarray Input array representing variable X. Must be a 2D array with shape (n_samples, 1). y : np.ndarray Input array representing variable Y. Must be a 2D array with shape (n_samples, 1). z : np.ndarray Input array representing conditioning variables Z. Must be a 2D array with shape (n_samples, n_covariates). Returns ------- Optional[float] p-value of the test if successful; None if an error occurred. """ _validate_inputs(x, y, z) data = np.hstack([x, y, z]) try: cit_obj = CIT( data, method="kci", kernelX="Gaussian", kernelY="Gaussian", kernelZ="Gaussian", approx=False, use_gp=True, est_width="median", ) conditioning_set = list(range(2, data.shape[1])) pval = cit_obj(0, 1, conditioning_set) assert 0 <= pval <= 1, f"Invalid p-value computed: {pval}" return pval except AssertionError as e: print(f"Assertion error: {e}") return None
[docs] def fisherz(x: np.ndarray, y: np.ndarray, z: np.ndarray) -> Optional[float]: """ Fisher's Z Conditional Independence Test. Parameters ---------- x : np.ndarray Input array representing variable X. Must be a 2D array with shape (n_samples, 1). y : np.ndarray Input array representing variable Y. Must be a 2D array with shape (n_samples, 1). z : np.ndarray Input array representing conditioning variables Z. Must be a 2D array with shape (n_samples, n_covariates). Returns ------- Optional[float] p-value of the test if successful; None if an error occurred. """ _validate_inputs(x, y, z) data = np.hstack([x, y, z]) try: cit_obj = CIT(data, method="fisherz") conditioning_set = list(range(2, data.shape[1])) pval = cit_obj(0, 1, conditioning_set) assert 0 <= pval <= 1, f"Invalid p-value computed: {pval}" return pval except AssertionError as e: print(f"Assertion error: {e}") return None