{ "cells": [ { "cell_type": "markdown", "metadata": { "vscode": { "languageId": "plaintext" } }, "source": [ "# ArchR cell-by-peak matrix to scanpy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook describes how to read the ArchR output and get a cell-by-peak matrix of Scanpy. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## import" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import scanpy as sc\n", "import os\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Generate cell-by-peak matrix with Scanpy adata format" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# - load the ArchR output\n", "data_dir = '../../data/iPSC_example/ArchR_files'\n", "\n", "mtx_file=os.path.join(data_dir,'pm_matrix.mtx')\n", "peak_file=os.path.join(data_dir,'pm_peak.csv')\n", "barcode_file=os.path.join(data_dir,'pm_barcode.csv')\n", "\n", "adata = sc.read_mtx(mtx_file)\n", "peak=pd.read_csv(peak_file, sep=',',header=0,index_col=0)\n", "barcode=pd.read_csv(barcode_file, sep=',',header=0,index_col=0)\n", "\n", "# - the peak feature should be in format of 'chrxxx_xxx_xxx' \n", "peak.index=[peak['seqnames'].values[i]+'_'+str(peak['start'].values[i])+'_'+str(peak['end'].values[i]) for i in range(len(peak['seqnames']))]\n", "adata_atac=adata.T\n", "adata_atac.obs=barcode\n", "adata_atac.var=peak" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 13387 × 246132\n", " obs: 'BlacklistRatio', 'DoubletEnrichment', 'DoubletScore', 'nDiFrags', 'nFrags', 'nMonoFrags', 'nMultiFrags', 'NucleosomeRatio', 'PassQC', 'PromoterRatio', 'ReadsInBlacklist', 'ReadsInPromoter', 'ReadsInTSS', 'Sample', 'TSSEnrichment', 'Clusters', 'ReadsInPeaks', 'FRIP'\n", " var: 'seqnames', 'start', 'end', 'width', 'strand', 'score', 'replicateScoreQuantile', 'groupScoreQuantile', 'Reproducibility', 'GroupReplicate', 'distToGeneStart', 'nearestGene', 'peakType', 'distToTSS', 'nearestTSS', 'GC', 'idx', 'N'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata_atac" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# - save the result\n", "save_dir = '../../data/iPSC_example/ATAC_data'\n", "os.makedirs(save_dir, exist_ok=True)\n", "adata_atac.write(os.path.join(save_dir, 'adata_atac_raw.h5ad'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "pytorch2", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 2 }