[ PROMPT_NODE_26210 ]

manipulation

[ SKILL_DOCUMENTATION ]

# 数据操作用于转换、提取子集和操作 AnnData 对象的各种方法。 ## 提取子集 ### 按索引 python import anndata as ad import numpy as np adata = ad.AnnData(X=np.random.rand(1000, 2000)) # 整数索引 subset = adata[0:100, 0:500] # 前 100 个观测值，前 500 个变量 # 索引列表 obs_indices = [0, 10, 20, 30, 40] var_indices = [0, 1, 2, 3, 4] subset = adata[obs_indices, var_indices] # 单个观测值或变量 single_obs = adata[0, :] single_var = adata[:, 0] ### 按名称 python import pandas as pd # 创建带有命名索引的对象 obs_names = [f'cell_{i}' for i in range(1000)] var_names = [f'gene_{i}' for i in range(2000)] adata = ad.AnnData( X=np.random.rand(1000, 2000), obs=pd.DataFrame(index=obs_names), var=pd.DataFrame(index=var_names) ) # 按观测值名称提取子集 subset = adata[['cell_0', 'cell_1', 'cell_2'], :] # 按变量名称提取子集 subset = adata[:, ['gene_0', 'gene_10', 'gene_20']] # 同时操作两个轴 subset = adata[['cell_0', 'cell_1'], ['gene_0', 'gene_1']] ### 按布尔掩码 python # 创建布尔掩码 high_count_obs = np.random.rand(1000) > 0.5 high_var_genes = np.random.rand(2000) > 0.7 # 使用掩码提取子集 subset = adata[high_count_obs, :] subset = adata[:, high_var_genes] subset = adata[high_count_obs, high_var_genes] ### 按元数据条件 python # 添加元数据 adata.obs['cell_type'] = np.random.choice(['A', 'B', 'C'], 1000) adata.obs['quality_score'] = np.random.rand(1000) adata.var['highly_variable'] = np.random.rand(2000) > 0.8 # 按细胞类型过滤 t_cells = adata[adata.obs['cell_type'] == 'A'] # 按多个条件过滤 high_quality_a_cells = adata[ (adata.obs['cell_type'] == 'A') & (adata.obs['quality_score'] > 0.7) ] # 按变量元数据过滤 hv_genes = adata[:, adata.var['highly_variable']] # 复杂条件 filtered = adata[ (adata.obs['quality_score'] > 0.5) & (adata.obs['cell_type'].isin(['A', 'B'])), adata.var['highly_variable'] ] ## 转置 python # 转置 AnnData 对象 (交换观测值和变量) adata_T = adata.T # 形状改变 print(adata.shape) # (1000, 2000) print(adata_T.shape) # (2000, 1000) # obs 和 var 被交换 print(adata.obs.head()) # 观测值元数据 print(adata_T.var.head()) # 相同数据，现在作为变量元数据 # 当数据方向相反时非常有用 # 常见于某些基因作为行的文件格式 ## 复制 ### 完全复制

数据来源：claude-code-templates（MIT），中文翻译由 AI 生成。详见关于我们。

BAGUA AI