Source code for spider.preprocess

import pandas as pd
import numpy as np
import scanpy as sc
import magic
import scprep
import anndata 
from importlib import resources
from .ot import cot_combine_sparse
from scipy.spatial.distance import pdist
from scipy.spatial import ConvexHull
import itertools
from sklearn.mixture import GaussianMixture
    
# imputation
def impute_MAGIC(adata):
    magic_op = magic.MAGIC(n_jobs=5, random_state=0)
    inp = adata.to_df()
    inp = scprep.normalize.library_size_normalize(inp)
    inp = scprep.transform.sqrt(inp)
    outp = magic_op.fit_transform(inp)
    adata.X = outp

# idata
[docs]def idata_construct(score, direction, pairs_meta, lr_df, lr_raw, pathway_df, adata, drop=True, normalize_total=False): """ Constructs an AnnData object for interface profiles from input data. This function takes scores and metadata to create an AnnData object, setting various attributes and performing quality checks on the data. Parameters ---------- score : np.ndarray A 2D array of scores representing the interaction strengths between ligand-receptor pairs. direction : np.ndarray A 2D array indicating the direction of interactions for the respective scores. pairs_meta : pd.DataFrame A DataFrame containing metadata for ligand-receptor pairs, which should include relevant identifiers. lr_df : pd.DataFrame A DataFrame containing ligand-receptor information, with indices corresponding to ligand-receptor pairs. lr_raw : pd.DataFrame Raw metadata for ligand-receptor pairs, used for further analysis and storage in the output AnnData object. pathway_df : pd.DataFrame A DataFrame containing pathway information that describes interactions between transcription factors and genes. adata : AnnData The original AnnData object containing the input data for the analysis. drop : bool, optional If True, drops cells that do not have any expressed genes (default is True). normalize_total : bool, optional If True, normalizes the total counts of interactions in the AnnData object (default is False). Returns ------- idata : AnnData The constructed AnnData object containing the following: - Scores and directions as layers. - Metadata and quality metrics for interfaces. - Spatial coordinates if available. Notes ----- - The function performs quality checks on the data, filtering genes and cells based on specified criteria. - It normalizes total interaction strengths per interface if the `normalize_total` flag is set to True. - The function constructs the output AnnData object (`idata`) with appropriate metadata for downstream analysis. Examples -------- >>> idata = idata_construct(score_array, direction_array, pairs_meta_df, lr_df, lr_raw_df, pathway_df, adata) """ idata = anndata.AnnData(score) idata.layers['direction'] = direction idata.obs_names = pairs_meta.index idata.var_names = lr_df.index idata.var = lr_df idata.uns['lr_meta'] = lr_raw idata.uns['pathway_meta'] = pathway_df idata.obs = pairs_meta unique_cells = np.unique(idata.obs[['A', 'B']].to_numpy().flatten()) cell_meta = adata.obs.loc[unique_cells] idata.uns['cell_meta'] = cell_meta idata.uns['tf_count'] = adata.to_df()[np.unique(pathway_df[['src', 'dest']])].to_numpy() idata.uns['tf_header'] = np.unique(pathway_df[['src', 'dest']]) # quality check sc.pp.calculate_qc_metrics(idata, inplace=True, percent_top=None) sc.pp.filter_genes(idata, min_cells=5) if drop: sc.pp.filter_cells(idata, min_genes=1) if normalize_total: sc.pp.normalize_total(adata) print('Normalizing total counts per cell to the median of total counts for cells before normalization') print(f'Construct idata with {idata.shape[0]} interfaces and {idata.shape[1]} LR pairs.') return idata
def subset_lr(is_human): lr_raw = load_lr_df(is_human).drop_duplicates(subset=['ligand', 'receptor'], keep="last") lr_raw['score'] = 1 return lr_raw def build_neighbors(row, edges): source_node = row.A target_node = row.B return pd.Series({'A': [n for n in edges.loc[source_node]['neighbor'] if n != target_node], 'B': [n for n in edges.loc[target_node]['neighbor'] if n != source_node]}) def get_interface_neighbors(adata, interface_meta): neighborA = interface_meta.groupby('A')['B'].apply(list) neighborB = interface_meta.groupby('B')['A'].apply(list) neighbor = pd.concat([neighborA, neighborB], axis=0).reset_index(drop=False) neighbor.columns = ['A', 'B'] neighbor = pd.Series(neighbor.groupby('A')['B'].sum()) df = pd.DataFrame(index=adata.obs_names, columns=['neighbor']) df['neighbor'] = [[]] * len(df) df.loc[neighbor.index, 'neighbor'] = neighbor.to_numpy() # Function to get neighbors for the source node of an edge interface_neighbor = interface_meta.apply(build_neighbors, args=(df,), axis=1) return interface_neighbor def algebraic_mean_v1(row, df, is_source, alpha=0.3): # alpha is the portion for max if is_source: related_samples = row.A else: related_samples = row.B values = df.loc[related_samples] mean = values.mean() * (1-alpha) + values.max() * alpha return mean def algebraic_mean(related_samples, df, alpha=0.3): # alpha is the portion for max values = df.loc[related_samples] mean = values.mean() * (1-alpha) + values.max() * alpha return mean def score(adata, lr_df, pairs, interface_meta): interface_neighbor = get_interface_neighbors(adata, interface_meta) exp_ref = adata.to_df() exp_ref = exp_ref.loc[:,~exp_ref.columns.duplicated()] l = lr_df['ligand'].to_numpy().flatten() r = lr_df['receptor'].to_numpy().flatten() sub_exp = exp_ref[np.concatenate((l, r))] sub_exp_rev = exp_ref[np.concatenate((r, l))] # Compute algebraic mean for each sample neighbor_exp_A = interface_neighbor.apply(algebraic_mean_v1, args=(sub_exp, True), axis=1) neighbor_exp_B = interface_neighbor.apply(algebraic_mean_v1, args=(sub_exp_rev, False), axis=1) mask_A = neighbor_exp_A.isna().any(axis=1) mask_B = neighbor_exp_B.isna().any(axis=1) neighbor_exp_A[mask_A] = sub_exp.loc[interface_meta.A[mask_A]].to_numpy() neighbor_exp_B[mask_B] = sub_exp_rev.loc[interface_meta.B[mask_B]].to_numpy() neighbor_exp_A = neighbor_exp_A.to_numpy() neighbor_exp_B = neighbor_exp_B.to_numpy() sub_exp = sub_exp.to_numpy() sub_exp_rev = sub_exp_rev.to_numpy() # multiply lr exp edge_exp_both = np.multiply(sub_exp[pairs[0]] + neighbor_exp_A, sub_exp_rev[pairs[1]] + neighbor_exp_B)/4 print('scoring') print('using neighbor+sqrt+max') score = np.sqrt(np.maximum(edge_exp_both[:, :int(len(l))], edge_exp_both[:, int(len(l)):])) direction = np.argmax((edge_exp_both[:, :int(len(l))], edge_exp_both[:, int(len(l)):]), 0) return score, direction
[docs]def score_ot(adata, lr_df, interface_meta, interface_cell_pair, weight=(0.25, 0.25, 0.25, 0.25), itermax = 1000): """ Score interactions and directions between ligands and receptors based on spatial data. This function evaluates the interactions between ligands and receptors in the provided AnnData object, using a COT scoring mechanism that incorporates spatial information and co-expression data. Parameters ---------- adata : AnnData Input AnnData object containing expression data and spatial coordinates. lr_df : DataFrame DataFrame containing ligand-receptor pairs to be analyzed. interface_meta : DataFrame Metadata associated with the interface analysis (not directly used in scoring). interface_cell_pair : DataFrame A DataFrame containing pairs of cell indices for which interactions are to be scored. weight : tuple of float, optional Weights for the scoring components (default is (0.25, 0.25, 0.25, 0.25)). itermax : int, optional Maximum number of iterations for the scoring algorithm (default is 1000). Returns ------- idata_score : ndarray A 2D numpy array containing the interaction scores for each ligand-receptor pair. idata_direction : ndarray A 2D numpy array indicating the direction (e.g., ligand or receptor dominance) of each interaction. comm_network : dict A dictionary representing the combined communication network, mapping ligand-receptor pairs to their scores. S : ndarray A 2D numpy array of gene expression values for ligands. D : ndarray A 2D numpy array of gene expression values for receptors. Notes ----- - The scoring mechanism relies on the spatial proximity of cells, calculating distances between pairs of cells to inform the scoring. - Ligand-receptor interactions are evaluated based on COT and their co-expression in the spatial context of the cells. - The output includes normalized scores for interactions and a directionality indicator. Examples -------- >>> interaction_scores, interaction_directions, network, S, D = score_ot(adata, lr_df, interface_meta, interface_cell_pair) """ data_genes = set(adata.var_names) ligs = list(set(lr_df.iloc[:,0]).intersection(data_genes)) recs = list(set(lr_df.iloc[:,1]).intersection(data_genes)) A = np.inf * np.ones([len(ligs), len(recs)], float) for i in range(len(lr_df)): tmp_lig = lr_df.iloc[i][0] tmp_rec = lr_df.iloc[i][1] if tmp_lig in ligs and tmp_rec in recs: A[ligs.index(tmp_lig), recs.index(tmp_rec)] = 1.0 A = A S = adata[:,ligs].to_df().to_numpy() D = adata[:,recs].to_df().to_numpy() M = np.zeros((adata.shape[0], adata.shape[0])) rows, cols = zip(*interface_cell_pair.T) M[rows, cols] = [np.finfo(np.float32).eps+np.linalg.norm(adata.obsm['spatial'][p[0]] - adata.obsm['spatial'][p[1]]) for p in interface_cell_pair.T] M += M.T cutoff = float( np.max(np.max(M))* 1.1) * np.ones_like(A) M[M==0] = np.max(np.max(M)) * 10 cot_eps = 0.11 cot_rho = 1e1 cot_weights = weight cot_nitermax = itermax comm_network = cot_combine_sparse(S, D, A, M, cutoff, \ eps_p=cot_eps, eps_mu=cot_eps, eps_nu=cot_eps, rho=cot_rho, weights=cot_weights, nitermax=cot_nitermax) idata_score = pd.DataFrame(np.zeros((len(interface_cell_pair[0]), len(comm_network))), columns = lr_df.index) idata_direction = pd.DataFrame(np.ones((len(interface_cell_pair[0]), len(comm_network))), columns = lr_df.index) * -1 for key, val in comm_network.items(): l, r = ligs[key[0]], recs[key[1]] pair_name = f'{l}_{r}' vals = [] dirs = [] lsum = S[:, key[0]] rsum = D[:, key[1]] vals_coexp = [] for pair in interface_cell_pair.T: coexp = np.sqrt(np.max([lsum[pair[0]]*rsum[pair[1]], lsum[pair[1]]*rsum[pair[0]]])) # coexp = np.sqrt(np.max([lsum[pair[0]]*rsum[pair[1]], lsum[pair[1]]*rsum[pair[0]]])) val_two = [val[pair[0], pair[1]], val[pair[1], pair[0]]] vals.append(np.max(val_two)) vals_coexp.append(coexp) dirs.append(np.argmax(val_two)) max_scale = np.max([np.max(vals), np.max(vals_coexp)]) vals_coexp = (vals_coexp - np.min(vals_coexp)) / (np.max(vals_coexp) - np.min(vals_coexp)) vals = (vals - np.min(vals)) / (np.max(vals) - np.min(vals)) idata_score[pair_name] = np.maximum(vals_coexp, vals) * max_scale idata_direction[pair_name] = dirs return idata_score.to_numpy(), idata_direction.to_numpy(), comm_network, S, D
def score_ot_entropy(adata, lr_df, interface_meta, interface_cell_pair, weight=(0.25, 0.25, 0.25, 0.25), itermax = 1000): ot_score, direction, comm_network, S_smth, D_smth = score_ot(adata, lr_df, interface_meta, interface_cell_pair, weight, itermax) coexp_score, _ = score(adata, lr_df, interface_cell_pair, interface_meta) log_Aa = np.log1p((coexp_score + 0.00001)/(ot_score+0.00001)) nonzero_mean = np.mean(ot_score, axis=0) alogAa = (ot_score + nonzero_mean)*log_Aa / 2 return alogAa, direction, comm_network, S_smth, D_smth def score_ot_weighted(adata, lr_df, interface_meta, interface_cell_pair, weight=(0.25, 0.25, 0.25, 0.25), itermax = 1000, alpha=0.01): ot_score, direction, comm_network, S_smth, D_smth = score_ot(adata, lr_df, interface_meta, interface_cell_pair, weight, itermax) coexp_score, _ = score(adata, lr_df, interface_cell_pair, interface_meta) return alpha*coexp_score + (1-alpha)*ot_score, direction, comm_network, S_smth, D_smth def kernel_function(x, eta, nu, kernel, normalization=None): if kernel == "exp": phi = np.exp(-np.power(x/eta, nu)) elif kernel == "lorentz": phi = 1 / ( 1 + np.power(x/eta, nu) ) else: phi = x if normalization == "unit_row_sum": phi = (phi.T / np.sum(phi.T, axis=0)).T elif normalization == "unit_col_sum": phi = phi / np.sum(phi, axis=0) return phi def score_v1(adata, lr_df, pairs): exp_ref = adata.to_df() exp_ref = exp_ref.loc[:,~exp_ref.columns.duplicated()] l = lr_df['ligand'].to_numpy().flatten() r = lr_df['receptor'].to_numpy().flatten() sub_exp = exp_ref[np.concatenate((l, r))].to_numpy() sub_exp_rev = exp_ref[np.concatenate((r, l))].to_numpy() edge_exp_both = np.multiply(sub_exp[pairs[0]], sub_exp_rev[pairs[1]]) # equation 2 in the manuscript print('scoring') print('using sqrt+max') score = np.sqrt(np.maximum(edge_exp_both[:, :int(len(l))], edge_exp_both[:, int(len(l)):])) return score
[docs]def find_interfaces(adata, cluster_key, lr_df, cutoff=None, is_sc=False): """ Identify interfaces between cells based on interaction capacity and spatial distance. This function locates interfaces between cells by analyzing interaction capacity and their spatial distances. It applies filtering based on a specified cutoff distance and can optionally include self-interaction pairs for bulk ST data. Parameters ---------- adata : AnnData Input AnnData object containing spatial and expression data. cluster_key : str Key in `adata.obs` that specifies the clustering information for the cells. lr_df : DataFrame DataFrame containing ligand-receptor pairs to be analyzed. cutoff : float, optional Distance cutoff for filtering interfaces (default is None). If not specified, the cutoff is determined based on the distribution of distances. is_sc : bool, optional If True, self-interaction pairs are not included (default is False). Returns ------- pairs : ndarray Array of identified cell pairs representing interfaces. pairs_meta : DataFrame DataFrame containing metadata for the identified interfaces, including distances. Notes ----- - The function calculates a median distance and uses it to filter out low-capacity cells and their interfaces. - Gaussian Mixture Models (GMM) are used to estimate the ideal number of interfaces based on the capacity of the cells. - If `is_sc` is False, self-interaction pairs are added to the results. Examples -------- >>> pairs, pairs_meta = find_interfaces(adata, 'cluster_key', lr_df) """ pairs = power_tri_init(adata, lr_df) pairs_meta = meta(adata, cluster_key, pairs) # filter by distance if not cutoff: median = np.median(pairs_meta['dist']) min_dist = np.min(pairs_meta['dist']) if median == min_dist: print('using 0.75') cutoff = np.quantile(pairs_meta['dist'], 0.75) else: print('using 0.99') cutoff = np.quantile(pairs_meta['dist'], 0.99) org_number = len(pairs_meta) pairs = pairs[:, pairs_meta['dist'] <= cutoff] pairs_meta = pairs_meta[pairs_meta['dist'] <= cutoff] print(f'Located {len(pairs_meta)} interfaces on {org_number} power cell boundaries with distance cutoff {cutoff}.') capacity = (adata.obs['capacity'] / max(adata.obs['capacity'])).to_numpy().reshape(-1,1) val, count = np.unique(pairs_meta[['A', 'B']].to_numpy().flatten(), return_counts=True) gmm = GaussianMixture(n_components=np.max(count), random_state=0).fit(capacity) gmm_labels = gmm.predict(X=capacity / max(capacity)) gmm_labels = np.argsort(gmm.means_.flatten())[gmm_labels] adata.obs['n_interface'] = 0 adata.obs.loc[val, 'n_interface'] = count adata.obs['ideal_n_interface'] = gmm_labels + 1 median = np.median(pairs_meta['dist']) to_remove = [] for to_prune_class in range(5): to_prune = adata.obs_names[np.argwhere(gmm_labels == to_prune_class)].flatten() for i in to_prune: interfaces = pairs_meta[(pairs_meta['A'] == i) | (pairs_meta['B'] == i)].sort_values(by='dist') interfaces = interfaces[interfaces['dist'] > median] to_remove += list(interfaces.index[1:]) to_remove = np.unique(to_remove) print(f'Dropped {len(to_remove)} out of {len(pairs_meta)} interfaces for low capacity cells.') pairs = pairs[:, pairs_meta.index.isin(to_remove)==False] pairs_meta = pairs_meta.drop(to_remove) if not is_sc: # add i-i pairs self_pairs = [range(adata.shape[0]), range(adata.shape[0])] self_pairs_meta = meta(adata, cluster_key, self_pairs) pairs = np.concatenate((pairs, self_pairs), axis=1) pairs_meta = pd.concat([pairs_meta, self_pairs_meta], axis=0) return pairs, pairs_meta
def subset_adata(adata, lr_df, pathway_df, imputation, normalize_total): if imputation: print('Running imputation with MAGIC') impute_MAGIC(adata) genes = adata.var_names.tolist() # get lr genes lr_df = lr_df[lr_df['ligand'].isin(genes) & lr_df['receptor'].isin(genes)] lr_df.index = lr_df['ligand'] + "_" + lr_df['receptor'] l = lr_df['ligand'].to_numpy().flatten() r = lr_df['receptor'].to_numpy().flatten() unique_lr = np.unique(np.concatenate((l, r))) # get pathway tf genes pathway_df = subset_pathway_df(pathway_df, lr_df, adata) unique_tf = np.unique(pathway_df[['src', 'dest']]) # subset adata adata = adata[:, adata.var_names.isin(np.concatenate((unique_lr, unique_tf)))] sc.pp.filter_genes(adata, min_cells=5) sc.pp.filter_cells(adata, min_genes=1) # normalize total will tenper the expression built by simulation if normalize_total: sc.pp.normalize_total(adata) print('Normalizing total counts per cell to the median of total counts for cells before normalization') # sc.pp.normalize_total(adata, target_sum=1e4) genes = adata.var_names.tolist() lr_df = lr_df[lr_df['ligand'].isin(genes) & lr_df['receptor'].isin(genes)] pathway_df = pathway_df[pathway_df['src'].isin(genes) & pathway_df['dest'].isin(genes)] return lr_df, pathway_df, adata def find_pairs_v1(adata, coord_type='generic', n_neighs=6): from squidpy.gr import spatial_neighbors from scipy.sparse import triu if coord_type == 'grid': spatial_neighbors(adata, coord_type=coord_type, n_neighs=n_neighs) else: spatial_neighbors(adata, coord_type=coord_type, delaunay=True, n_neighs=n_neighs) return np.transpose(triu(adata.obsp['spatial_connectivities']).nonzero()).T
[docs]def power_tri_init(adata, lr_df): """ Initialize power triangulation based on interaction capacity and spatial data. This function computes the power triangulation for a set of cells based on their spatial coordinates and ligand-receptor (LR) capacities. It normalizes the capacities, computes lifted weighted points, and determines the convex hull and Delaunay triangulation to identify interactions. Parameters ---------- adata : AnnData Input AnnData object containing spatial coordinates and expression data. lr_df : DataFrame DataFrame containing ligand-receptor pairs to be analyzed. Returns ------- pairs : ndarray Array of unique pairs of cell indices representing power triangulation edges. Notes ----- - The function normalizes the ligand-receptor capacity per cell and computes a minimum distance based on spatial coordinates. - The lifted weighted points are calculated from the spatial data and capacities. - The convex hull is computed to extract the Delaunay triangulation, which is then used to generate unique cell pairs representing interfaces. Examples -------- >>> pairs = power_tri_init(adata, lr_df) """ # normalize lr capacity per cell unique_lr = np.unique(lr_df[['ligand', 'receptor']].to_numpy().flatten()) adata_exp = adata.to_df() adata_exp = adata_exp / adata_exp.sum(axis=0) capacity = adata_exp[unique_lr].sum(axis=1).values min_dist = np.min(pdist(adata.obsm['spatial'][:, ::-1])) adata.obs['capacity'] = capacity capacity = (capacity - np.min(capacity)) / (np.max(capacity) - np.min(capacity)) + 0.01 capacity = capacity * min_dist # Compute the lifted weighted points s_norm = np.sum(adata.obsm['spatial'] ** 2, axis = 1) - capacity ** 2 s_lifted = np.concatenate([adata.obsm['spatial'], s_norm[:,None]], axis = 1) # Compute the convex hull of the lifted weighted points hull = ConvexHull(s_lifted) # Extract the Delaunay triangulation from the lower hull tri_list = tuple([a, b, c] if is_ccw_triangle(adata.obsm['spatial'][a], adata.obsm['spatial'][b], adata.obsm['spatial'][c]) else [a, c, b] for (a, b, c), eq in zip(hull.simplices, hull.equations) if eq[2] <= 0) # Compute the Voronoi points return np.unique(list(list(sorted(edge)) for tri in tri_list for edge in itertools.combinations(tri, 2)), axis=0).T
def meta(adata, cluster_key, pairs): # get label pairs_meta = pd.DataFrame() pairs_meta['A'] = adata.obs_names[pairs[0]] pairs_meta['B'] = adata.obs_names[pairs[1]] pairs_meta[['A_row', 'A_col']] = adata.obsm['spatial'][pairs[0]] pairs_meta[['B_row', 'B_col']] = adata.obsm['spatial'][pairs[1]] if cluster_key != '' and cluster_key in adata.obs.keys(): node_labels_text = adata.obs[cluster_key].to_numpy() pairs_meta['A_label'] = node_labels_text[pairs[0]].astype(str) pairs_meta['B_label'] = node_labels_text[pairs[1]].astype(str) node_labels = adata.obs[cluster_key].astype('category').cat.codes.to_numpy() + 1 pairs_meta['A_label_int'] = node_labels[pairs[0]] pairs_meta['B_label_int'] = node_labels[pairs[1]] pairs_meta['label_1'] = pairs_meta["A_label_int"].astype(str) + pairs_meta["B_label_int"].astype(str) pairs_meta['label_2'] = pairs_meta["B_label_int"].astype(str) + pairs_meta["A_label_int"].astype(str) pairs_meta['label_int'] = pairs_meta[['label_1', 'label_2']].astype(int).max(axis=1).astype(str).astype('category') label_1 = pairs_meta['A_label'].astype(str) + '_' + pairs_meta['B_label'].astype(str).to_numpy() label_2 = pairs_meta['B_label'].astype(str) + '_' + pairs_meta['A_label'].astype(str).to_numpy() pick = pairs_meta[['label_1', 'label_2']].astype(int).idxmax(axis=1).to_numpy() text_label = [label_1[i] if x=='label_1' else label_2[i] for i,x in enumerate(pick)] pairs_meta['label'] = text_label pairs_meta['label'] = pairs_meta['label'].astype('category') pairs_meta.index = pairs_meta['A'] + "_" + pairs_meta['B'] # get position A_pos = pairs_meta[['A_row', 'A_col']].to_numpy(dtype=float) B_pos = pairs_meta[['B_row', 'B_col']].to_numpy(dtype=float) avg_pair_pos = (A_pos + B_pos) / 2 pairs_meta[['row', 'col']] = avg_pair_pos pairs_meta['dist'] = np.linalg.norm(A_pos-B_pos, axis=1) return pairs_meta def load_lr_df(is_human): from importlib import resources with resources.path("spider.lrdb", "lrpairs.tsv") as pw_fn: lr_list = pd.read_csv(pw_fn, sep='\t', index_col=0) if is_human: print('Using human LR pair dataset.') lr_list = lr_list[lr_list.species=='Human'] else: print('Using mouse LR pair dataset.') lr_list = lr_list[lr_list.species=='Mouse'] return lr_list def load_pathway_df(is_human): from importlib import resources with resources.path("spider.lrdb", "pathways.tsv") as pw_fn: pw_list = pd.read_csv(pw_fn, sep='\t', index_col=0) if is_human: print('Using human pathway dataset.') pw_list = pw_list[pw_list.species=='Human'] else: print('Using mouse pathway dataset.') pw_list = pw_list[pw_list.species=='Mouse'] return pw_list def subset_pathway_df(pathways, lr_df, adata): pathways = pathways[['src', 'dest']] pathways.index = pathways['src'] + '_' + pathways['dest'] # remove pathways that are in the lrdb by index as we only want the downstream targets pathways = pathways[~pathways.index.isin(lr_df.index)] pathways = pathways[pathways['src'].isin(adata.var_names) | pathways['dest'].isin(adata.var_names)] # to tolerate missing points in pathwaay pathways = pathways.drop_duplicates() return pathways def norm2(X): return np.sqrt(np.sum(X ** 2)) def normalized(X): return X / norm2(X) def get_triangle_normal(A, B, C): return normalized(np.cross(A, B) + np.cross(B, C) + np.cross(C, A)) def get_power_circumcenter(A, B, C): N = get_triangle_normal(A, B, C) return (-.5 / N[2]) * N[:2] def is_ccw_triangle(A, B, C): M = np.concatenate([np.stack([A, B, C]), np.ones((3, 1))], axis = 1) return np.linalg.det(M) > 0