Source code for bandhic.utils.SCC

import numpy as np

__all__ = ["scc"]


[docs]def scc( A, B, max_k=None, min_n=10, method="pearson", ): """ Stratified correlation coefficient (SCC) for Hi-C using BandHiC. Parameters ---------- A, B : band_hic_matrix Two BandHiC matrices (same shape, same diag_num) max_k : int Max diagonal (distance bin) to consider min_n : int Minimum number of valid pixels per stratum method : {"pearson", "spearman"} Returns ------- scc : float per_diag : dict {k: rho_k} Examples -------- >>> import bandhic as bh >>> A = bh.straw_chr("./data/GSE130275_mESC_WT_combined_1.3B_microc.hic", "chr1", 100_000, diag_num=20) >>> B = A.copy() >>> scc, per_diag = scc(A, B) >>> scc 1.0 """ if A.shape != B.shape: raise ValueError("A and B must have the same shape") if A.diag_num != B.diag_num: raise ValueError("A and B must have the same diag_num") if max_k is None: max_k = A.diag_num - 1 if max_k > A.diag_num - 1: raise ValueError("max_k must be less than diag_num") num = 0.0 den = 0.0 per_diag = {} for k in range(1, max_k + 1): diag_len = A.shape[0] - k x = A.data[:diag_len, k] y = B.data[:diag_len, k] # mask invalid valid = np.isfinite(x) & np.isfinite(y) if A.mask is not None: valid &= ~A.mask[:diag_len, k] if B.mask is not None: valid &= ~B.mask[:diag_len, k] if valid.sum() < min_n: continue xk = x[valid] yk = y[valid] if method == "pearson": rho = np.corrcoef(xk, yk)[0, 1] if not np.isfinite(rho): continue elif method == "spearman": from scipy.stats import rankdata rx = rankdata(xk) ry = rankdata(yk) rho = np.corrcoef(rx, ry)[0, 1] if not np.isfinite(rho): continue else: raise ValueError("Unknown method") w = valid.sum() - 1 num += w * rho den += w per_diag[k] = rho return num / den if den > 0 else np.nan, per_diag