import pandas as pd
import numpy as np
from . import svi
from . import preprocess
from . import enrichment
from . import visualization
from . import util
from . import ot
[docs]class SPIDER():
"""
The `SPIDER` class encapsulates methods for processing spatial transcriptomics data.
The class provides functionality for constructing an interface data object from spatial transcriptomics (ST) data.
It supports various preprocessing steps, such as filtering, normalization, and imputation, to prepare the data for
downstream analysis of cell-cell interactions.
"""
def __init__(self):
self.svi = svi
self.pp = preprocess
self.er = enrichment
self.vis = visualization
self.util = util
self.ot = ot
pass
[docs] def prep(self,
adata_input,
cluster_key='type',
is_human=True,
cutoff=None,
imputation=False,
itermax = 1000,
lr_raw = None,
pathway_raw = None,
is_sc=False,
normalize_total = False,
):
"""
Prepares the data for interface analysis by processing input AnnData.
This method subsets the input AnnData object to include only ligand-receptor
genes and transcription factor genes, constructs interfaces from the data,
and computes interface profiles.
Parameters
----------
adata_input : AnnData
Input AnnData object containing spatial transcriptomics data.
cluster_key : str, optional
Key in `adata` that denotes the cluster or cell type for analysis (default is 'type').
is_human : bool, optional
Indicates whether the input data is from human samples (default is True).
cutoff : float, optional
Threshold for filtering long-distance interactions (default is None, meaning no filtering).
imputation : bool, optional
If True, performs imputation on the input data (default is False).
itermax : int, optional
Maximum number of iterations for optimization algorithms (default is 1000).
lr_raw : DataFrame, optional
Predefined ligand-receptor pairs. If None, defaults to loading based on `is_human`.
pathway_raw : DataFrame, optional
Predefined pathways. If None, defaults to loading based on `is_human`.
is_sc : bool, optional
Indicates whether the input data is single-cell data (default is False).
normalize_total : bool, optional
If True, normalizes total counts in the `adata` object (default is False).
Returns
-------
idata : AnnData
An AnnData object constructed from the processed interface profiles and metadata.
Notes
-----
- The method first creates a copy of the input AnnData object and releases the original.
- It checks if ligand-receptor pairs and pathway data are provided; if not, it loads them based on the `is_human` flag.
- Interfaces are constructed from the data, followed by scoring based on the provided parameters.
- The resulting `idata` object contains computed scores and metadata for further analysis.
Examples
--------
>>> idata = self.prep(adata_input, cluster_key='cell_type', is_human=True)
"""
adata = adata_input.copy()
del adata_input
# Prep: find lr pairs and subset adata to have only lr genes and tf genes
if lr_raw is None:
lr_raw = preprocess.subset_lr(is_human)
if pathway_raw is None:
pathway_raw = preprocess.load_pathway_df(is_human)
lr_df, pathway_df, adata = preprocess.subset_adata(adata, lr_raw, pathway_raw, imputation, normalize_total)
# Step: construct interface
interface_cell_pair, interface_meta = preprocess.find_interfaces(adata, cluster_key, lr_df, cutoff=cutoff, is_sc=is_sc)
# Step: compute interface profile
score, direction, _, _, _ = preprocess.score_ot(adata, lr_df, interface_meta, interface_cell_pair, itermax=itermax)
# idata object construction
idata = preprocess.idata_construct(score, direction, interface_meta, lr_df, lr_raw, pathway_df, adata, drop=False, normalize_total=normalize_total)
return idata
[docs] def find_svi(self, idata, out_f, R_path, abstract=True, overwrite=False, n_neighbors=5, alpha=0.3, threshold=0.01, pattern_prune_threshold=1e-20, predefined_pattern_number=-1, svi_number=10, n_jobs=10):
"""
Find SVI and SVI patterns from interface idata.
This method processes the provided interface data to compute SVI and related patterns.
It can optionally perform abstraction based on the number of interfaces present.
Parameters
----------
idata : AnnData
Input AnnData object containing interface data.
out_f : str
Output directory path where results will be saved.
R_path : str
Path to the R script or binary for external processing.
abstract : bool, optional
If True, performs abstraction on the data if the number of interfaces is sufficient (default is True).
overwrite : bool, optional
If True, overwrites existing output files (default is False).
n_neighbors : int, optional
Number of neighbors to use for the abstraction process (default is 5).
alpha : float, optional
Alpha parameter for the abstraction algorithm (default is 0.3).
threshold : float, optional
Threshold for filtering SVI candidates (default is 0.01).
pattern_prune_threshold : float, optional
Threshold for pruning patterns during SVI pattern generation (default is 1e-20).
predefined_pattern_number : int, optional
If set to a positive integer, limits the number of predefined patterns (default is -1, which means no limit).
svi_number : int, optional
The number of SVI patterns to return (default is 10).
n_jobs : int, optional
Number of parallel jobs to run during processing (default is 10).
Returns
-------
idata : AnnData
The updated AnnData object containing results of SVI and SVI patterns.
meta_idata : AnnData or None
The meta AnnData object containing abstraction results, or None if abstraction was not performed.
Notes
-----
- The method checks if the output directory exists and creates it if it doesn't.
- If the number of interfaces in `idata` is less than 1000, abstraction is skipped.
- Various intermediate results are saved as CSV files in the specified output directory.
- The method utilizes external R processing for some computations, requiring R to be installed and accessible. Otherwise, only the three python methods are used.
Examples
--------
>>> result_idata, result_meta_idata = op.find_svi(idata, 'output_directory', '/path/to/R', abstract=True)
"""
from os.path import exists
from os import mkdir
if not exists(out_f):
print(f'Creating folder {out_f}')
mkdir(out_f)
if len(idata) < 1000:
print('number of interface is less than 2000, skipping abstraction')
abstract=False
if abstract:
som, idata, meta_idata = svi.abstract(idata, n_neighbors, alpha)
svi.find_svi(meta_idata, out_f, overwrite, R_path, som=som, n_jobs=n_jobs) #generating results
print('finished running all SVI tests')
if 'tf_support_count' in idata.var.columns:
meta_idata.var['tf_support_count'] = idata.var['tf_support_count']
svi_df, svi_df_strict = svi.combine_SVI(meta_idata, threshold=threshold, svi_number=svi_number)
if (overwrite) | (not exists(f'{out_f}pattern.csv')):
svi.SVI_patterns(meta_idata, svi_df_strict, pattern_prune_threshold=pattern_prune_threshold, predefined_pattern_number=predefined_pattern_number)
pd.DataFrame(meta_idata.obsm['pattern_score']).to_csv(f'{out_f}pattern.csv')
meta_idata.var.to_csv(f'{out_f}membership.csv')
else:
meta_idata.obsm['pattern_score'] = pd.read_csv(f'{out_f}pattern.csv', index_col=0).to_numpy()
meta_idata.var = pd.read_csv(f'{out_f}membership.csv', index_col=0)
svi.meta_pattern_to_idata(idata, meta_idata)
pd.DataFrame(meta_idata.obsm['pattern_score']).to_csv(f'{out_f}full_pattern.csv')
else:
svi.find_svi(idata, out_f, overwrite, R_path, n_jobs=n_jobs) #generating results
svi_df, svi_df_strict = svi.combine_SVI(idata, threshold=threshold, svi_number=svi_number)
if (overwrite) | (not exists(f'{out_f}pattern.csv')):
svi.SVI_patterns(idata, svi_df_strict, pattern_prune_threshold=pattern_prune_threshold, predefined_pattern_number=predefined_pattern_number)
pd.DataFrame(idata.obsm['pattern_score']).to_csv(f'{out_f}pattern.csv')
idata.var.to_csv(f'{out_f}membership.csv')
else:
idata.obsm['pattern_score'] = pd.read_csv(f'{out_f}pattern.csv', index_col=0).to_numpy()
idata.var = pd.read_csv(f'{out_f}membership.csv', index_col=0)
meta_idata = None
idata.var[[f'pattern_correlation_{x}' for x in range(idata.obsm['pattern_score'].shape[1])]] = 0
corr_df=pd.concat([idata[:,idata.var['is_svi']==1].to_df(),pd.DataFrame(idata.obsm['pattern_score'],index=idata.obs_names)],axis=1).corr().loc[idata[:,idata.var['is_svi']==1].var_names, range(idata.obsm['pattern_score'].shape[1])]
idata.var.loc[idata[:,idata.var['is_svi']==1].var_names, [f'pattern_correlation_{x}' for x in range(idata.obsm['pattern_score'].shape[1])]] = corr_df.to_numpy()
if 'tf_corr' in idata.uns.keys():
idata.uns['tf_corr'].replace(np.nan, 0, inplace=True)
# svi.tf_pattern_to_idata(idata, idata_tf)
return idata, meta_idata
def find_svi_without_pattern(self, idata, out_f, R_path, abstract=True, overwrite=False, n_neighbors=5, alpha=0.3, threshold=0.01, pattern_prune_threshold=1e-6, predefined_pattern_number=-1, svi_number=10, n_jobs=10):
from os.path import exists
from os import mkdir
if 'tf_support_count' not in idata.var.columns:
print('not using tf info')
idata.var['tf_support_count'] = 1
idata.uns['tf_corr'] = pd.DataFrame()
if not exists(out_f):
print(f'Creating folder {out_f}')
mkdir(out_f)
if len(idata) < 1000:
print('number of interface is less than 1000, skipping abstraction')
abstract=False
if abstract:
som, idata, meta_idata = svi.abstract(idata, n_neighbors, alpha)
svi.find_svi(meta_idata, out_f, overwrite, R_path, som=som, n_jobs=n_jobs, skip_metric=True) #generating results
if 'tf_support_count' in idata.var.columns:
meta_idata.var['tf_support_count'] = idata.var['tf_support_count']
svi_df, svi_df_strict = svi.combine_SVI(meta_idata, threshold=threshold, svi_number=svi_number)
else:
svi.find_svi(idata, out_f, overwrite, R_path, n_jobs=n_jobs, skip_metric=True) #generating results
svi_df, svi_df_strict = svi.combine_SVI(idata, threshold=threshold, svi_number=svi_number)
meta_idata = None
idata.uns['tf_corr'].replace(np.nan, 0, inplace=True)
return idata, meta_idata
# DEVELOPTAL FUNCTION
def prep_exp(self,
adata_input,
cluster_key='type',
is_human=True,
cutoff=None,
imputation=False,
lr_raw = None,
is_sc=True,
normalize_total = False,
):
adata = adata_input.copy()
del adata_input
# Prep: find lr pairs and subset adata to have only lr genes
lr_raw = preprocess.subset_lr(is_human)
lr_df, adata = preprocess.subset_adata(adata, lr_raw, imputation)
# Step: construct interface
interface_cell_pair, interface_meta = preprocess.find_interfaces(adata, cluster_key, lr_df, cutoff=cutoff, is_sc=is_sc)
# Step: compute interface profile
score, direction = preprocess.score(adata, lr_df, interface_cell_pair, interface_meta)
# idata object construction
idata = preprocess.idata_construct(score, direction, interface_meta, lr_df, lr_raw, adata, drop=False, normalize_total=normalize_total)
return idata
def prep_compare(self,
adata_input,
cluster_key='type',
is_human=True,
cutoff=None,
imputation=False,
itermax = 1000,
lr_raw = None,
is_sc=True,
normalize_total = False,
):
adata = adata_input.copy()
del adata_input
# Prep: find lr pairs and subset adata to have only lr genes
if lr_raw is None:
lr_raw = preprocess.subset_lr(is_human)
lr_df, adata = preprocess.subset_adata(adata, lr_raw, imputation, normalize_total)
# Step: construct interface
interface_cell_pair, interface_meta = preprocess.find_interfaces(adata, cluster_key, lr_df, cutoff=cutoff, is_sc=is_sc)
# Step: compute interface profile
# score, direction = preprocess.score(adata, lr_df, interface_cell_pair, interface_meta)
score, direction, _, _, _ = preprocess.score_ot(adata, lr_df, interface_meta, interface_cell_pair, itermax=itermax)
# score, direction, _, _, _ = preprocess.score_ot_entropy(adata, lr_df, interface_meta, interface_cell_pair, itermax=itermax)
# score, direction, _, _, _ = preprocess.score_ot_weighted(adata, lr_df, interface_meta, interface_cell_pair, itermax=itermax, alpha=alpha)
# idata object construction
idata = preprocess.idata_construct(score, direction, interface_meta, lr_df, lr_raw, adata, drop=False, normalize_total=normalize_total)
score_old, direction_old = preprocess.score(adata, lr_df, interface_cell_pair, interface_meta)
idata_old = preprocess.idata_construct(score_old, direction_old, interface_meta, lr_df, lr_raw, adata)
return idata, idata_old