diff --git a/docs/tutorials/cell_level_ldsc_analysis.ipynb b/docs/tutorials/cell_level_ldsc_analysis.ipynb new file mode 100644 index 0000000..ec5a3f6 --- /dev/null +++ b/docs/tutorials/cell_level_ldsc_analysis.ipynb @@ -0,0 +1,7306 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tutorial: Cell-Level LDSC analysis\n", + "\n", + "This tutorial demonstrates how to perform cell-type-specific LD Score regression (LDSC) analysis through the `cellink` package. The `cellink` package provides a unified interface to LDSC and its preparation scripts, making it easier to perform comprehensive genetic analyses that identify which cell types are most relevant to complex traits and diseases.\n", + "\n", + "This notebook assumes familiarity with single-cell data processing and basic statistical genetics concepts. The `cellink` package provides convenient wrapper functions that handle data preparation and formatting for LDSC. For LDSC installation, please follow instructions [here](https://github.com/bulik/ldsc). We recommend utilizing LDSC via a Docker image like [this one](https://hub.docker.com/r/zijingliu/ldsc). For usage on HPCs, please consider using singularity or enroot." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environment Setup\n", + "\n", + "We begin by importing necessary libraries and defining key parameters for our analysis. The `cellink` package provides wrapper functions for LDSC that automatically handle preprocessing, data formatting and preparation." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "import os\n", + "\n", + "from cellink._core import DAnn, GAnn\n", + "from cellink.resources import get_onek1k\n", + "from cellink.tl.external import (\n", + " preprocess_for_sldsc,\n", + " generate_sldsc_genesets,\n", + " generate_gene_coord_file,\n", + " configure_ldsc_runner,\n", + " make_annot_from_donor_data,\n", + " munge_sumstats,\n", + " estimate_ld_scores_from_donor_data,\n", + " estimate_heritability,\n", + " estimate_genetic_correlation,\n", + " compute_ld_scores_with_annotations_from_donor_data,\n", + " estimate_celltype_specific_heritability,\n", + ")\n", + "from cellink.resources import get_1000genomes_ld_scores, get_1000genomes_ld_weights\n", + "from cellink.resources import get_gwas_catalog_study_summary_stats\n", + "\n", + "# Analysis parameters\n", + "chrom = 22\n", + "cell_type = \"CD8 Naive\"\n", + "celltype_key = \"predicted.celltype.l2\"\n", + "original_donor_col = \"donor_id\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/project/genomics/ayshan\n" + ] + } + ], + "source": [ + "!pwd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load and Prepare Data\n", + "\n", + "We load the OneK1K dataset, which contains both genotype and single-cell expression data. We also add gene annotations from Ensembl for our analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting pybiomart\n", + " Using cached pybiomart-0.2.0-py3-none-any.whl.metadata (1.1 kB)\n", + "Collecting future (from pybiomart)\n", + " Using cached future-1.0.0-py3-none-any.whl.metadata (4.0 kB)\n", + "Requirement already satisfied: pandas in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pybiomart) (2.3.3)\n", + "Requirement already satisfied: requests in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pybiomart) (2.32.5)\n", + "Collecting requests-cache (from pybiomart)\n", + " Using cached requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)\n", + "Requirement already satisfied: numpy>=1.23.2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (1.26.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (2025.2)\n", + "Requirement already satisfied: six>=1.5 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas->pybiomart) (1.17.0)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (3.4.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (2025.11.12)\n", + "Requirement already satisfied: attrs>=21.2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests-cache->pybiomart) (25.4.0)\n", + "Collecting cattrs>=22.2 (from requests-cache->pybiomart)\n", + " Using cached cattrs-25.3.0-py3-none-any.whl.metadata (8.4 kB)\n", + "Requirement already satisfied: platformdirs>=2.5 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests-cache->pybiomart) (4.5.0)\n", + "Collecting url-normalize>=1.4 (from requests-cache->pybiomart)\n", + " Using cached url_normalize-2.2.1-py3-none-any.whl.metadata (5.6 kB)\n", + "Requirement already satisfied: typing-extensions>=4.14.0 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from cattrs>=22.2->requests-cache->pybiomart) (4.15.0)\n", + "Using cached pybiomart-0.2.0-py3-none-any.whl (10 kB)\n", + "Using cached future-1.0.0-py3-none-any.whl (491 kB)\n", + "Using cached requests_cache-1.2.1-py3-none-any.whl (61 kB)\n", + "Using cached cattrs-25.3.0-py3-none-any.whl (70 kB)\n", + "Using cached url_normalize-2.2.1-py3-none-any.whl (14 kB)\n", + "Installing collected packages: url-normalize, future, cattrs, requests-cache, pybiomart\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5/5\u001b[0m [pybiomart]━\u001b[0m \u001b[32m4/5\u001b[0m [pybiomart]ache]\n", + "\u001b[1A\u001b[2KSuccessfully installed cattrs-25.3.0 future-1.0.0 pybiomart-0.2.0 requests-cache-1.2.1 url-normalize-2.2.1\n" + ] + } + ], + "source": [ + "!pip install pybiomart\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/onek1k_cellxgene.h5ad already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/OneK1K.noGP.vcf.gz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/OneK1K.noGP.vcf.gz.csi already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/gene_counts_Ensembl_105_phenotype_metadata.tsv.gz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/pandas/core/internals/blocks.py:2661: RuntimeWarning: invalid value encountered in cast\n", + " return self.values.astype(_dtype_obj)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset shape: (981, 10595884, 1248980, 36469)\n" + ] + } + ], + "source": [ + "# Load the dataset\n", + "dd = get_onek1k(config_path=\"cellink/src/cellink/resources/config/onek1k.yaml\", data_home=\"/project/genomics/ayshan/1k1k_dataset\", verify_checksum=False)\n", + "print(f\"Dataset shape: {dd.shape}\")\n", + "\n", + "\n", + "# Add gene annotations from Ensembl\n", + "def _get_ensembl_gene_id_start_end_chr():\n", + " from pybiomart import Server\n", + "\n", + " server = Server(host=\"http://www.ensembl.org\")\n", + " dataset = server.marts[\"ENSEMBL_MART_ENSEMBL\"].datasets[\"hsapiens_gene_ensembl\"]\n", + " ensembl_gene_id_start_end_chr = dataset.query(\n", + " attributes=[\"ensembl_gene_id\", \"start_position\", \"end_position\", \"chromosome_name\"]\n", + " )\n", + " ensembl_gene_id_start_end_chr = ensembl_gene_id_start_end_chr.set_index(\"Gene stable ID\")\n", + " ensembl_gene_id_start_end_chr = ensembl_gene_id_start_end_chr.rename(\n", + " columns={\n", + " \"Gene start (bp)\": GAnn.start,\n", + " \"Gene end (bp)\": GAnn.end,\n", + " \"Chromosome/scaffold name\": GAnn.chrom,\n", + " }\n", + " )\n", + " return ensembl_gene_id_start_end_chr\n", + "\n", + "\n", + "ensembl_gene_id_start_end_chr = _get_ensembl_gene_id_start_end_chr()\n", + "dd.C.var = dd.C.var.join(ensembl_gene_id_start_end_chr)\n", + "\n", + "# Set up donor information\n", + "dd.C.obs[DAnn.donor] = dd.C.obs[original_donor_col]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "dd.G.obs[\"donor_id\"] = dd.G.obs.index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cell-Type-Specific LDSC analysis\n", + "\n", + "Cell-type-specific LDSC analysis helps identify which cell types are most relevant to complex traits by testing whether genetic variants associated with a trait are enriched in genes specifically expressed in certain cell types. This analysis follows the method described in [Duncan et al. 2025](https://www.nature.com/articles/s41593-024-01834-w)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 1: Preprocessing and Gene Set Generation\n", + "First, we preprocess the single-cell data to compute cell-type-specific gene expression and identify genes that are specifically expressed in each cell type." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._sldsc_utils:Applying log1p transformation\n", + "INFO:cellink.tl.external._sldsc_utils:Log1p applied.\n", + "INFO:cellink.tl.external._sldsc_utils:Querying Ensembl BioMart (GRCh38)...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetching gene annotations from GRCh38...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetched annotations for 86371 genes from GRCh38\n", + "INFO:cellink.tl.external._sldsc_utils:Removing version suffixes from Gene IDs\n", + "INFO:cellink.tl.external._sldsc_utils:Dropping conflicting columns from adata.var before merge: ['chrom', 'start', 'end']\n", + "INFO:cellink.tl.external._sldsc_utils:Annotated 35522 / 36469 genes.\n", + "INFO:cellink.tl.external._sldsc_utils:Using annotation columns: gene=gene, biotype=gene_biotype, chr=chrom, start=start, end=end\n", + "INFO:cellink.tl.external._sldsc_utils:Applying gene filters\n", + "INFO:cellink.tl.external._sldsc_utils:Protein-coding genes: 19273\n", + "INFO:cellink.tl.external._sldsc_utils:Expressed genes: 31285\n", + "INFO:cellink.tl.external._sldsc_utils:Unique gene names: 36469\n", + "INFO:cellink.tl.external._sldsc_utils:Non-MHC genes: 36469\n", + "INFO:cellink.tl.external._sldsc_utils:Keeping 18068 / 36469 genes after filtering\n", + "INFO:cellink.tl.external._sldsc_utils:Computing mean expression for predicted.celltype.l2\n", + "INFO:cellink.tl.external._sldsc_utils:Computing specificity scores\n", + "INFO:cellink.tl.external._sldsc_utils:Final data shape: (1248980, 18068)\n", + "INFO:cellink.tl.external._sldsc_utils:Mean expression shape: (18068, 31)\n", + "INFO:cellink.tl.external._sldsc_utils:Specificity shape: (18068, 31)\n" + ] + } + ], + "source": [ + "dd.C.var[\"gene\"] = dd.C.var_names\n", + "adata = dd.C.copy()\n", + "adata_filtered, mean_expr, specificity = preprocess_for_sldsc(\n", + " adata,\n", + " celltype_col=celltype_key,\n", + " gene_col=\"gene\",\n", + " gene_identifier_mode=\"ensembl\",\n", + " genome_build=\"GRCh38\",\n", + " inplace=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we generate gene sets for each cell type containing the top 10% most specifically expressed genes. These gene sets will be used to create genomic annotations." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._sldsc_utils:Removing version suffixes from Gene IDs\n", + "INFO:cellink.tl.external._sldsc_utils:Writing gene sets to ldsc_genesets\n", + "INFO:cellink.tl.external._sldsc_utils:Overlapping genes: 18068/18068\n", + "INFO:cellink.tl.external._sldsc_utils:Selecting top 1807 genes (10.0%) per cell type\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._sldsc_utils:Wrote control gene set with 18068 genes\n", + "INFO:cellink.tl.external._sldsc_utils:Generated 31 cell-type-specific gene sets\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cell_typen_genesoutput_path
0ASDC1807ldsc_genesets/ASDC.GeneSet
1B intermediate1807ldsc_genesets/B_intermediate.GeneSet
2B memory1807ldsc_genesets/B_memory.GeneSet
3B naive1807ldsc_genesets/B_naive.GeneSet
4CD4 CTL1807ldsc_genesets/CD4_CTL.GeneSet
5CD4 Naive1807ldsc_genesets/CD4_Naive.GeneSet
6CD4 Proliferating1807ldsc_genesets/CD4_Proliferating.GeneSet
7CD4 TCM1807ldsc_genesets/CD4_TCM.GeneSet
8CD4 TEM1807ldsc_genesets/CD4_TEM.GeneSet
9CD8 Naive1807ldsc_genesets/CD8_Naive.GeneSet
10CD8 Proliferating1807ldsc_genesets/CD8_Proliferating.GeneSet
11CD8 TCM1807ldsc_genesets/CD8_TCM.GeneSet
12CD8 TEM1807ldsc_genesets/CD8_TEM.GeneSet
13CD14 Mono1807ldsc_genesets/CD14_Mono.GeneSet
14CD16 Mono1807ldsc_genesets/CD16_Mono.GeneSet
15Doublet1807ldsc_genesets/Doublet.GeneSet
16Eryth1807ldsc_genesets/Eryth.GeneSet
17HSPC1807ldsc_genesets/HSPC.GeneSet
18ILC1807ldsc_genesets/ILC.GeneSet
19MAIT1807ldsc_genesets/MAIT.GeneSet
20NK1807ldsc_genesets/NK.GeneSet
21NK Proliferating1807ldsc_genesets/NK_Proliferating.GeneSet
22NK_CD56bright1807ldsc_genesets/NK_CD56bright.GeneSet
23Plasmablast1807ldsc_genesets/Plasmablast.GeneSet
24Platelet1807ldsc_genesets/Platelet.GeneSet
25Treg1807ldsc_genesets/Treg.GeneSet
26cDC11807ldsc_genesets/cDC1.GeneSet
27cDC21807ldsc_genesets/cDC2.GeneSet
28dnT1807ldsc_genesets/dnT.GeneSet
29gdT1807ldsc_genesets/gdT.GeneSet
30pDC1807ldsc_genesets/pDC.GeneSet
\n", + "
" + ], + "text/plain": [ + " cell_type n_genes output_path\n", + "0 ASDC 1807 ldsc_genesets/ASDC.GeneSet\n", + "1 B intermediate 1807 ldsc_genesets/B_intermediate.GeneSet\n", + "2 B memory 1807 ldsc_genesets/B_memory.GeneSet\n", + "3 B naive 1807 ldsc_genesets/B_naive.GeneSet\n", + "4 CD4 CTL 1807 ldsc_genesets/CD4_CTL.GeneSet\n", + "5 CD4 Naive 1807 ldsc_genesets/CD4_Naive.GeneSet\n", + "6 CD4 Proliferating 1807 ldsc_genesets/CD4_Proliferating.GeneSet\n", + "7 CD4 TCM 1807 ldsc_genesets/CD4_TCM.GeneSet\n", + "8 CD4 TEM 1807 ldsc_genesets/CD4_TEM.GeneSet\n", + "9 CD8 Naive 1807 ldsc_genesets/CD8_Naive.GeneSet\n", + "10 CD8 Proliferating 1807 ldsc_genesets/CD8_Proliferating.GeneSet\n", + "11 CD8 TCM 1807 ldsc_genesets/CD8_TCM.GeneSet\n", + "12 CD8 TEM 1807 ldsc_genesets/CD8_TEM.GeneSet\n", + "13 CD14 Mono 1807 ldsc_genesets/CD14_Mono.GeneSet\n", + "14 CD16 Mono 1807 ldsc_genesets/CD16_Mono.GeneSet\n", + "15 Doublet 1807 ldsc_genesets/Doublet.GeneSet\n", + "16 Eryth 1807 ldsc_genesets/Eryth.GeneSet\n", + "17 HSPC 1807 ldsc_genesets/HSPC.GeneSet\n", + "18 ILC 1807 ldsc_genesets/ILC.GeneSet\n", + "19 MAIT 1807 ldsc_genesets/MAIT.GeneSet\n", + "20 NK 1807 ldsc_genesets/NK.GeneSet\n", + "21 NK Proliferating 1807 ldsc_genesets/NK_Proliferating.GeneSet\n", + "22 NK_CD56bright 1807 ldsc_genesets/NK_CD56bright.GeneSet\n", + "23 Plasmablast 1807 ldsc_genesets/Plasmablast.GeneSet\n", + "24 Platelet 1807 ldsc_genesets/Platelet.GeneSet\n", + "25 Treg 1807 ldsc_genesets/Treg.GeneSet\n", + "26 cDC1 1807 ldsc_genesets/cDC1.GeneSet\n", + "27 cDC2 1807 ldsc_genesets/cDC2.GeneSet\n", + "28 dnT 1807 ldsc_genesets/dnT.GeneSet\n", + "29 gdT 1807 ldsc_genesets/gdT.GeneSet\n", + "30 pDC 1807 ldsc_genesets/pDC.GeneSet" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "summary = generate_sldsc_genesets(specificity, dd.C, out_dir=\"ldsc_genesets\", top_frac=0.10, overwrite=True)\n", + "summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need to generate a gene coordinate file that maps genes to their genomic positions:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._sldsc_utils:Fetching gene annotations from Ensembl GRCh38...\n", + "INFO:cellink.tl.external._sldsc_utils:Querying Ensembl BioMart (GRCh38)...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetching gene annotations from GRCh38...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetched annotations for 86371 genes from GRCh38\n", + "INFO:cellink.tl.external._sldsc_utils:Removing version suffixes from gene identifiers\n", + "WARNING:cellink.tl.external._sldsc_utils:Removed 2 duplicate gene entries\n", + "INFO:cellink.tl.external._sldsc_utils:Writing 86369 gene coordinates to gene_coords.txt\n", + "INFO:cellink.tl.external._sldsc_utils:Successfully created gene coordinate file: gene_coords.txt\n" + ] + } + ], + "source": [ + "generate_gene_coord_file(\"gene_coords.txt\", gene_identifier_mode=\"ensembl\", genome_build=\"GRCh38\", overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 2: Configure LDSC Runner\n", + "Before running LDSC commands, we need to configure the runner. LDSC can be executed via a local installation or through container solutions like Docker or Singularity. Sample configuration files are provided in `./src/cellink/tl/external/config/` for local execution, Docker, and Singularity." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "runner = configure_ldsc_runner(config_path=\"cellink/src/cellink/tl/external/config/ldsc_singularity.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 3: Prepare Data for Analysis\n", + "To speed up computation in this tutorial, we filter the data to a specific chromosome and a random subset of SNPs. Note: In a real analysis, you would process all chromosomes without subsetting." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╔═ DonorData(n_donors=981, n_cells_per_donor=[333-3,511], donor_id='donor_id') ═══════════════════════════════╗\n",
+       "║ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ║\n",
+       "║ ┃ G (donors)                                          C (cells)                                          ┃ ║\n",
+       "║ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ ║\n",
+       "║ │ AnnData object with n_obs × n_vars = 981 × 10,299  │ AnnData object with n_obs × n_vars = 1,248,980 ×   │ ║\n",
+       "║ │                                                    │ 36,469                                             │ ║\n",
+       "║ │     obs: 'donor_id'                                │     obs: 'orig.ident', 'nCount_RNA',               │ ║\n",
+       "║ │                                                    │ 'nFeature_RNA', 'percent.mt', 'donor_id',          │ ║\n",
+       "║ │                                                    │ 'pool_number', 'predicted.celltype.l2',            │ ║\n",
+       "║ │                                                    │ 'predicted.celltype.l2.score', 'age',              │ ║\n",
+       "║ │                                                    │ 'organism_ontology_term_id',                       │ ║\n",
+       "║ │                                                    │ 'tissue_ontology_term_id',                         │ ║\n",
+       "║ │                                                    │ 'assay_ontology_term_id',                          │ ║\n",
+       "║ │                                                    │ 'disease_ontology_term_id',                        │ ║\n",
+       "║ │                                                    │ 'cell_type_ontology_term_id',                      │ ║\n",
+       "║ │                                                    │ 'self_reported_ethnicity_ontology_term_id',        │ ║\n",
+       "║ │                                                    │ 'development_stage_ontology_term_id',              │ ║\n",
+       "║ │                                                    │ 'sex_ontology_term_id', 'is_primary_data',         │ ║\n",
+       "║ │                                                    │ 'suspension_type', 'tissue_type', 'cell_type',     │ ║\n",
+       "║ │                                                    │ 'assay', 'disease', 'organism', 'sex', 'tissue',   │ ║\n",
+       "║ │                                                    │ 'self_reported_ethnicity', 'development_stage',    │ ║\n",
+       "║ │                                                    │ 'observation_joinid'                               │ ║\n",
+       "║ │     var: 'chrom', 'pos', 'a0', 'a1', 'AF', 'AN',   │     var: 'vst.mean', 'vst.variance',               │ ║\n",
+       "║ │ 'ER2', 'IMPUTED', 'maf', 'NS', 'R2', 'TYPED',      │ 'vst.variance.expected',                           │ ║\n",
+       "║ │ 'TYPED_ONLY', 'id', 'id_mask', 'length',           │ 'vst.variance.standardized', 'vst.variable',       │ ║\n",
+       "║ │ 'quality', 'pos_hg19', 'id_hg19'                   │ 'feature_is_filtered', 'feature_name',             │ ║\n",
+       "║ │                                                    │ 'feature_reference', 'feature_biotype',            │ ║\n",
+       "║ │                                                    │ 'feature_length', 'feature_type', 'start', 'end',  │ ║\n",
+       "║ │                                                    │ 'chrom', 'gene', 'gene_upper'                      │ ║\n",
+       "║ │     uns: 'kinship'                                 │     uns: 'cell_type_ontology_term_id_colors',      │ ║\n",
+       "║ │                                                    │ 'citation', 'default_embedding',                   │ ║\n",
+       "║ │                                                    │ 'schema_reference', 'schema_version', 'title'      │ ║\n",
+       "║ │     obsm: 'gPCs'                                   │     obsm: 'X_azimuth_spca', 'X_azimuth_umap',      │ ║\n",
+       "║ │                                                    │ 'X_harmony', 'X_pca', 'X_umap'                     │ ║\n",
+       "║ │     varm: 'filter'                                 │     varm: 'PCs'                                    │ ║\n",
+       "║ └────────────────────────────────────────────────────┴────────────────────────────────────────────────────┘ ║\n",
+       "╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n",
+       "
\n" + ], + "text/plain": [ + "╔═\u001b[1;38;5;197m DonorData(n_donors=981, n_cells_per_donor=[333-3,511], donor_id='donor_id') \u001b[0m═══════════════════════════════╗\n", + "║ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ║\n", + "║ ┃\u001b[1;38;5;197m \u001b[0m\u001b[1;38;5;197mG (donors) \u001b[0m\u001b[1;38;5;197m \u001b[0m┃\u001b[1;38;5;197m \u001b[0m\u001b[1;38;5;197mC (cells) \u001b[0m\u001b[1;38;5;197m \u001b[0m┃ ║\n", + "║ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ ║\n", + "║ │ AnnData object with n_obs × n_vars = 981 × 10,299 │ AnnData object with n_obs × n_vars = 1,248,980 × │ ║\n", + "║ │ │ 36,469 │ ║\n", + "║ │ obs: 'donor_id' │ obs: 'orig.ident', 'nCount_RNA', │ ║\n", + "║ │ │ 'nFeature_RNA', 'percent.mt', \u001b[1;38;5;197m'donor_id', \u001b[0m │ ║\n", + "║ │ │ 'pool_number', 'predicted.celltype.l2', │ ║\n", + "║ │ │ 'predicted.celltype.l2.score', 'age', │ ║\n", + "║ │ │ 'organism_ontology_term_id', │ ║\n", + "║ │ │ 'tissue_ontology_term_id', │ ║\n", + "║ │ │ 'assay_ontology_term_id', │ ║\n", + "║ │ │ 'disease_ontology_term_id', │ ║\n", + "║ │ │ 'cell_type_ontology_term_id', │ ║\n", + "║ │ │ 'self_reported_ethnicity_ontology_term_id', │ ║\n", + "║ │ │ 'development_stage_ontology_term_id', │ ║\n", + "║ │ │ 'sex_ontology_term_id', 'is_primary_data', │ ║\n", + "║ │ │ 'suspension_type', 'tissue_type', 'cell_type', │ ║\n", + "║ │ │ 'assay', 'disease', 'organism', 'sex', 'tissue', │ ║\n", + "║ │ │ 'self_reported_ethnicity', 'development_stage', │ ║\n", + "║ │ │ 'observation_joinid' │ ║\n", + "║ │ var: 'chrom', 'pos', 'a0', 'a1', 'AF', 'AN', │ var: 'vst.mean', 'vst.variance', │ ║\n", + "║ │ 'ER2', 'IMPUTED', 'maf', 'NS', 'R2', 'TYPED', │ 'vst.variance.expected', │ ║\n", + "║ │ 'TYPED_ONLY', 'id', 'id_mask', 'length', │ 'vst.variance.standardized', 'vst.variable', │ ║\n", + "║ │ 'quality', 'pos_hg19', 'id_hg19' │ 'feature_is_filtered', 'feature_name', │ ║\n", + "║ │ │ 'feature_reference', 'feature_biotype', │ ║\n", + "║ │ │ 'feature_length', 'feature_type', 'start', 'end', │ ║\n", + "║ │ │ 'chrom', 'gene', 'gene_upper' │ ║\n", + "║ │ uns: 'kinship' │ uns: 'cell_type_ontology_term_id_colors', │ ║\n", + "║ │ │ 'citation', 'default_embedding', │ ║\n", + "║ │ │ 'schema_reference', 'schema_version', 'title' │ ║\n", + "║ │ obsm: 'gPCs' │ obsm: 'X_azimuth_spca', 'X_azimuth_umap', │ ║\n", + "║ │ │ 'X_harmony', 'X_pca', 'X_umap' │ ║\n", + "║ │ varm: 'filter' │ varm: 'PCs' │ ║\n", + "║ └────────────────────────────────────────────────────┴────────────────────────────────────────────────────┘ ║\n", + "╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.seed(42)\n", + "all_selected_idx = []\n", + "for chrom in range(1, 23):\n", + " chrom_idx = np.where(dd.G.var.chrom == str(chrom))[0]\n", + " n_snps = max(1, int(len(chrom_idx) * 0.001))\n", + " selected_idx = np.random.choice(chrom_idx, n_snps, replace=False)\n", + " all_selected_idx.extend(selected_idx)\n", + "all_selected_idx = np.sort(all_selected_idx)\n", + "\n", + "dd = dd[:, all_selected_idx, :, :].copy()\n", + "dd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 4: Create Cell-Type Annotations\n", + "Now we create binary annotation files that indicate which SNPs are near cell-type-specific genes. This is done using LDSC's `make_annot` functionality, wrapped by `cellink`. We process two cell types (CD8 Naive and CD4 Naive) across all chromosomes." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:34<00:00, 34.56s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_1.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data -B /home/aih/ayshan.aliyeva/cellink_data:/cellink_data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_1.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n" + ] + }, + { + "ename": "CalledProcessError", + "evalue": "Command 'singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data -B /home/aih/ayshan.aliyeva/cellink_data:/cellink_data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_1.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000' returned non-zero exit status 1.", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mCalledProcessError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[25]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m cell_type \u001b[38;5;129;01min\u001b[39;00m [\u001b[33m\"\u001b[39m\u001b[33mCD8 Naive\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mCD4 Naive\u001b[39m\u001b[33m\"\u001b[39m]:\n\u001b[32m 3\u001b[39m dd_chrom = dd.sel(G_var=dd.G.var.chrom == \u001b[38;5;28mstr\u001b[39m(chrom), C_var=dd.C.var.chrom == \u001b[38;5;28mstr\u001b[39m(chrom)).copy()\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m result = \u001b[43mmake_annot_from_donor_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mdd\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdd_chrom\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mannot_file\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43mf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mcell_type\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;250;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m_\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[33;43m_\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mchrom\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[33;43m.annot.gz\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43mgene_set_file\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[33;43mf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m./ldsc_genesets/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mcell_type\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;250;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m_\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[33;43m.GeneSet\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[43m \u001b[49m\u001b[43mgene_coord_file\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mgene_coords.txt\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 9\u001b[39m \u001b[43m \u001b[49m\u001b[43mwindowsize\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m100000\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 10\u001b[39m \u001b[43m \u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/ictstr01/project_copy/genomics/ayshan/cellink/src/cellink/tl/external/_ldsc.py:1405\u001b[39m, in \u001b[36mmake_annot_from_donor_data\u001b[39m\u001b[34m(dd, annot_file, gene_set_file, gene_coord_file, windowsize, bed_file, nomerge, out_prefix, run, cleanup_files, plink_export_kwargs, runner, **kwargs)\u001b[39m\n\u001b[32m 1402\u001b[39m to_plink(dd.G, out_prefix, **plink_export_kwargs)\n\u001b[32m 1403\u001b[39m bimfile = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mout_prefix\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.bim\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m-> \u001b[39m\u001b[32m1405\u001b[39m results = \u001b[43m_run_ldsc_make_annot\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1406\u001b[39m \u001b[43m \u001b[49m\u001b[43mbimfile\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbimfile\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1407\u001b[39m \u001b[43m \u001b[49m\u001b[43mannot_file\u001b[49m\u001b[43m=\u001b[49m\u001b[43mannot_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1408\u001b[39m \u001b[43m \u001b[49m\u001b[43mgene_set_file\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgene_set_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1409\u001b[39m \u001b[43m \u001b[49m\u001b[43mgene_coord_file\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgene_coord_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1410\u001b[39m \u001b[43m \u001b[49m\u001b[43mwindowsize\u001b[49m\u001b[43m=\u001b[49m\u001b[43mwindowsize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1411\u001b[39m \u001b[43m \u001b[49m\u001b[43mbed_file\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbed_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1412\u001b[39m \u001b[43m \u001b[49m\u001b[43mnomerge\u001b[49m\u001b[43m=\u001b[49m\u001b[43mnomerge\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1413\u001b[39m \u001b[43m \u001b[49m\u001b[43mrun\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1414\u001b[39m \u001b[43m \u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1415\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1416\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1418\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m cleanup_files \u001b[38;5;129;01mand\u001b[39;00m run:\n\u001b[32m 1419\u001b[39m extensions = [\u001b[33m\"\u001b[39m\u001b[33m.bim\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m.fam\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m.bed\u001b[39m\u001b[33m\"\u001b[39m]\n", + "\u001b[36mFile \u001b[39m\u001b[32m/ictstr01/project_copy/genomics/ayshan/cellink/src/cellink/tl/external/_ldsc.py:1125\u001b[39m, in \u001b[36m_run_ldsc_make_annot\u001b[39m\u001b[34m(bimfile, annot_file, gene_set_file, gene_coord_file, windowsize, bed_file, nomerge, run, runner, **kwargs)\u001b[39m\n\u001b[32m 1122\u001b[39m file_paths.append(bed_file)\n\u001b[32m 1124\u001b[39m logger.info(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mCreating annotation file: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcmd\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1125\u001b[39m \u001b[43mrunner\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun_command\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcmd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile_paths\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfile_paths\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m 1126\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m annot_file\n\u001b[32m 1127\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[36mFile \u001b[39m\u001b[32m/ictstr01/project_copy/genomics/ayshan/cellink/src/cellink/tl/external/_ldsc.py:166\u001b[39m, in \u001b[36mLDSCRunner.run_command\u001b[39m\u001b[34m(self, base_command, file_paths, check)\u001b[39m\n\u001b[32m 163\u001b[39m full_command = \u001b[38;5;28mself\u001b[39m._build_container_command(container_command, volumes)\n\u001b[32m 165\u001b[39m logger.info(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mExecuting: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfull_command\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m166\u001b[39m result = \u001b[43msubprocess\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfull_command\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mshell\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcheck\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcapture_output\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtext\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m 167\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m result.stdout:\n\u001b[32m 168\u001b[39m logger.info(result.stdout)\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/subprocess.py:571\u001b[39m, in \u001b[36mrun\u001b[39m\u001b[34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[39m\n\u001b[32m 569\u001b[39m retcode = process.poll()\n\u001b[32m 570\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m check \u001b[38;5;129;01mand\u001b[39;00m retcode:\n\u001b[32m--> \u001b[39m\u001b[32m571\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m CalledProcessError(retcode, process.args,\n\u001b[32m 572\u001b[39m output=stdout, stderr=stderr)\n\u001b[32m 573\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m CompletedProcess(process.args, retcode, stdout, stderr)\n", + "\u001b[31mCalledProcessError\u001b[39m: Command 'singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data -B /home/aih/ayshan.aliyeva/cellink_data:/cellink_data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_1.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000' returned non-zero exit status 1." + ] + } + ], + "source": [ + "for chrom in range(1, 23):\n", + " for cell_type in [\"CD8 Naive\", \"CD4 Naive\"]:\n", + " dd_chrom = dd.sel(G_var=dd.G.var.chrom == str(chrom), C_var=dd.C.var.chrom == str(chrom)).copy()\n", + " result = make_annot_from_donor_data(\n", + " dd=dd_chrom,\n", + " annot_file=f\"{cell_type.replace(' ', '_')}_{chrom}.annot.gz\",\n", + " gene_set_file = f\"./ldsc_genesets/{cell_type.replace(' ', '_')}.GeneSet\",\n", + " gene_coord_file=\"gene_coords.txt\",\n", + " windowsize=100000,\n", + " runner=runner,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/project/genomics/ayshan\n", + "ls: cannot access 'ldsc_annot.bim': No such file or directory\n", + "ls: cannot access 'gene_coords.txt': No such file or directory\n", + "ls: cannot access 'ldsc_genesets/CD8_Naive.GeneSet': No such file or directory\n" + ] + } + ], + "source": [ + "!cd /ictstr01/project_copy/genomics/ayshan\n", + "\n", + "!singularity exec \\\n", + " -B /ictstr01/project_copy/genomics/ayshan:/data \\\n", + " -B /home/aih/ayshan.aliyeva/cellink_data:/cellink_data \\\n", + " /project/genomics/ayshan/containers/ldsc.sif \\\n", + " bash -lc 'pwd; ls; ls -l ldsc_annot.bim gene_coords.txt ldsc_genesets/CD8_Naive.GeneSet'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 5: Compute Cell-Type-Specific LD Scores\n", + "With annotations created, we now compute LD scores that incorporate cell-type-specific information. These LD scores quantify how much genetic variation near cell-type-specific genes contributes to linkage disequilibrium patterns. We here use the function `compute_ld_scores_with_annotations_from_donor_data`. Instead, one could also perform this using 1000G plink data via `compute_ld_scores_with_annotations_from_bimfile` (Plink files may be downloaded via `cellink.resources.get_1000genomes_plink_files`). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.35s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.1 --annot CD8_Naive_1.annot.gz --out cts_ldscores_CD8_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.1 --annot CD8_Naive_1.annot.gz --out cts_ldscores_CD8_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.1 \\\n", + "--bfile cts_ldscores_CD8_Naive.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:51:21 2025\n", + "Read list of 796 SNPs from cts_ldscores_CD8_Naive.1.bim\n", + "Read 1 annotations for 796 SNPs from CD8_Naive_1.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.1.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.1.bed\n", + "After filtering, 796 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 796 SNPs to cts_ldscores_CD8_Naive.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1798 0.1780\n", + "std 0.1526 0.3931\n", + "min 0.0092 -0.0415\n", + "25% 0.0432 -0.0082\n", + "50% 0.1300 0.0062\n", + "75% 0.3086 0.0275\n", + "max 0.4995 1.8826\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0341\n", + "L2 0.0341 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 125\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 796.0000\n", + "mean 0.1570\n", + "std 0.3641\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:51:21 2025\n", + "Total time elapsed: 0.32s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.1.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.1.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.1.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.56s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.1 --annot CD4_Naive_1.annot.gz --out cts_ldscores_CD4_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.1 --annot CD4_Naive_1.annot.gz --out cts_ldscores_CD4_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.1 \\\n", + "--bfile cts_ldscores_CD4_Naive.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:51:44 2025\n", + "Read list of 796 SNPs from cts_ldscores_CD4_Naive.1.bim\n", + "Read 1 annotations for 796 SNPs from CD4_Naive_1.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.1.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.1.bed\n", + "After filtering, 796 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 796 SNPs to cts_ldscores_CD4_Naive.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1798 0.2007\n", + "std 0.1526 0.4154\n", + "min 0.0092 -0.0448\n", + "25% 0.0432 -0.0082\n", + "50% 0.1300 0.0066\n", + "75% 0.3086 0.0327\n", + "max 0.4995 1.8620\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0207\n", + "L2 0.0207 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 143\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 796.0000\n", + "mean 0.1796\n", + "std 0.3841\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:51:45 2025\n", + "Total time elapsed: 0.31s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.1.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.1.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.1.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:04<00:00, 4.99s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.2 --annot CD8_Naive_2.annot.gz --out cts_ldscores_CD8_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.2 --annot CD8_Naive_2.annot.gz --out cts_ldscores_CD8_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.2 \\\n", + "--bfile cts_ldscores_CD8_Naive.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:52:02 2025\n", + "Read list of 864 SNPs from cts_ldscores_CD8_Naive.2.bim\n", + "Read 1 annotations for 864 SNPs from CD8_Naive_2.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.2.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.2.bed\n", + "After filtering, 864 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 864 SNPs to cts_ldscores_CD8_Naive.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1623 0.1578\n", + "std 0.1408 0.4100\n", + "min 0.0087 -0.0418\n", + "25% 0.0381 -0.0092\n", + "50% 0.1131 0.0037\n", + "75% 0.2638 0.0212\n", + "max 0.5000 2.7574\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0279\n", + "L2 0.0279 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 115\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 864.0000\n", + "mean 0.1331\n", + "std 0.3399\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:52:02 2025\n", + "Total time elapsed: 0.33s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.2.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.2.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.2.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.05s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.2 --annot CD4_Naive_2.annot.gz --out cts_ldscores_CD4_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.2 --annot CD4_Naive_2.annot.gz --out cts_ldscores_CD4_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.2 \\\n", + "--bfile cts_ldscores_CD4_Naive.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:52:22 2025\n", + "Read list of 864 SNPs from cts_ldscores_CD4_Naive.2.bim\n", + "Read 1 annotations for 864 SNPs from CD4_Naive_2.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.2.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.2.bed\n", + "After filtering, 864 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 864 SNPs to cts_ldscores_CD4_Naive.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1623 0.1530\n", + "std 0.1408 0.4118\n", + "min 0.0087 -0.0371\n", + "25% 0.0381 -0.0081\n", + "50% 0.1131 0.0050\n", + "75% 0.2638 0.0223\n", + "max 0.5000 2.7688\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0305\n", + "L2 0.0305 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 111\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 864.0000\n", + "mean 0.1285\n", + "std 0.3348\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:52:22 2025\n", + "Total time elapsed: 0.41s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.2.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.2.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.2.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.06s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.3 --annot CD8_Naive_3.annot.gz --out cts_ldscores_CD8_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.3 --annot CD8_Naive_3.annot.gz --out cts_ldscores_CD8_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.3 \\\n", + "--bfile cts_ldscores_CD8_Naive.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:52:49 2025\n", + "Read list of 737 SNPs from cts_ldscores_CD8_Naive.3.bim\n", + "Read 1 annotations for 737 SNPs from CD8_Naive_3.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.3.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.3.bed\n", + "After filtering, 737 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 737 SNPs to cts_ldscores_CD8_Naive.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1702 0.1528\n", + "std 0.1499 0.3819\n", + "min 0.0082 -0.0427\n", + "25% 0.0392 -0.0069\n", + "50% 0.1142 0.0056\n", + "75% 0.2920 0.0225\n", + "max 0.4995 1.9927\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0303\n", + "L2 0.0303 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 100\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 737.0000\n", + "mean 0.1357\n", + "std 0.3427\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:52:50 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.3.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.3.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.3.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.73s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.3 --annot CD4_Naive_3.annot.gz --out cts_ldscores_CD4_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.3 --annot CD4_Naive_3.annot.gz --out cts_ldscores_CD4_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.3 \\\n", + "--bfile cts_ldscores_CD4_Naive.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:53:13 2025\n", + "Read list of 737 SNPs from cts_ldscores_CD4_Naive.3.bim\n", + "Read 1 annotations for 737 SNPs from CD4_Naive_3.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.3.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.3.bed\n", + "After filtering, 737 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 737 SNPs to cts_ldscores_CD4_Naive.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1702 0.1601\n", + "std 0.1499 0.3874\n", + "min 0.0082 -0.0366\n", + "25% 0.0392 -0.0069\n", + "50% 0.1142 0.0065\n", + "75% 0.2920 0.0256\n", + "max 0.4995 2.0018\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0091\n", + "L2 0.0091 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 103\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 737.0000\n", + "mean 0.1398\n", + "std 0.3470\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:53:13 2025\n", + "Total time elapsed: 0.31s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.3.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.3.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.3.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.08s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.4 --annot CD8_Naive_4.annot.gz --out cts_ldscores_CD8_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.4 --annot CD8_Naive_4.annot.gz --out cts_ldscores_CD8_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.4 \\\n", + "--bfile cts_ldscores_CD8_Naive.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:53:36 2025\n", + "Read list of 754 SNPs from cts_ldscores_CD8_Naive.4.bim\n", + "Read 1 annotations for 754 SNPs from CD8_Naive_4.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.4.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.4.bed\n", + "After filtering, 754 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 754 SNPs to cts_ldscores_CD8_Naive.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1720 0.1023\n", + "std 0.1460 0.3066\n", + "min 0.0087 -0.0272\n", + "25% 0.0394 -0.0059\n", + "50% 0.1295 0.0027\n", + "75% 0.2783 0.0144\n", + "max 0.5000 1.3741\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.009\n", + "L2 -0.009 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 69\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 754.0000\n", + "mean 0.0915\n", + "std 0.2885\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:53:36 2025\n", + "Total time elapsed: 0.29s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.4.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.4.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.4.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.33s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.4 --annot CD4_Naive_4.annot.gz --out cts_ldscores_CD4_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.4 --annot CD4_Naive_4.annot.gz --out cts_ldscores_CD4_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.4 \\\n", + "--bfile cts_ldscores_CD4_Naive.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:53:58 2025\n", + "Read list of 754 SNPs from cts_ldscores_CD4_Naive.4.bim\n", + "Read 1 annotations for 754 SNPs from CD4_Naive_4.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.4.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.4.bed\n", + "After filtering, 754 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 754 SNPs to cts_ldscores_CD4_Naive.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1720 0.0894\n", + "std 0.1460 0.2923\n", + "min 0.0087 -0.0256\n", + "25% 0.0394 -0.0052\n", + "50% 0.1295 0.0029\n", + "75% 0.2783 0.0130\n", + "max 0.5000 1.5382\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0013\n", + "L2 0.0013 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 59\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 754.0000\n", + "mean 0.0782\n", + "std 0.2687\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:53:58 2025\n", + "Total time elapsed: 0.28s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.4.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.4.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.4.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.60s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.5 --annot CD8_Naive_5.annot.gz --out cts_ldscores_CD8_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.5 --annot CD8_Naive_5.annot.gz --out cts_ldscores_CD8_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.5 \\\n", + "--bfile cts_ldscores_CD8_Naive.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:54:16 2025\n", + "Read list of 671 SNPs from cts_ldscores_CD8_Naive.5.bim\n", + "Read 1 annotations for 671 SNPs from CD8_Naive_5.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.5.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.5.bed\n", + "After filtering, 671 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 671 SNPs to cts_ldscores_CD8_Naive.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1760 0.1430\n", + "std 0.1514 0.3666\n", + "min 0.0087 -0.0319\n", + "25% 0.0395 -0.0067\n", + "50% 0.1239 0.0033\n", + "75% 0.3017 0.0209\n", + "max 0.4980 1.9526\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0064\n", + "L2 0.0064 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 86\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 671.0000\n", + "mean 0.1282\n", + "std 0.3345\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:54:17 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.5.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.5.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.5.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.02s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.5 --annot CD4_Naive_5.annot.gz --out cts_ldscores_CD4_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.5 --annot CD4_Naive_5.annot.gz --out cts_ldscores_CD4_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.5 \\\n", + "--bfile cts_ldscores_CD4_Naive.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:54:34 2025\n", + "Read list of 671 SNPs from cts_ldscores_CD4_Naive.5.bim\n", + "Read 1 annotations for 671 SNPs from CD4_Naive_5.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.5.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.5.bed\n", + "After filtering, 671 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 671 SNPs to cts_ldscores_CD4_Naive.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1760 0.1453\n", + "std 0.1514 0.3693\n", + "min 0.0087 -0.0328\n", + "25% 0.0395 -0.0076\n", + "50% 0.1239 0.0039\n", + "75% 0.3017 0.0208\n", + "max 0.4980 1.9556\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0023\n", + "L2 -0.0023 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 87\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 671.0000\n", + "mean 0.1297\n", + "std 0.3362\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:54:34 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.5.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.5.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.5.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.98s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.6 --annot CD8_Naive_6.annot.gz --out cts_ldscores_CD8_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.6 --annot CD8_Naive_6.annot.gz --out cts_ldscores_CD8_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.6 \\\n", + "--bfile cts_ldscores_CD8_Naive.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:54:58 2025\n", + "Read list of 701 SNPs from cts_ldscores_CD8_Naive.6.bim\n", + "Read 1 annotations for 701 SNPs from CD8_Naive_6.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.6.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.6.bed\n", + "After filtering, 701 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 701 SNPs to cts_ldscores_CD8_Naive.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1743 0.2136\n", + "std 0.1473 0.4680\n", + "min 0.0087 -0.0376\n", + "25% 0.0418 -0.0047\n", + "50% 0.1310 0.0083\n", + "75% 0.2880 0.0470\n", + "max 0.4995 2.9027\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0757\n", + "L2 0.0757 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 113\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 701.0000\n", + "mean 0.1612\n", + "std 0.3680\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:54:58 2025\n", + "Total time elapsed: 0.27s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.6.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.6.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.6.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.89s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.6 --annot CD4_Naive_6.annot.gz --out cts_ldscores_CD4_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.6 --annot CD4_Naive_6.annot.gz --out cts_ldscores_CD4_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.6 \\\n", + "--bfile cts_ldscores_CD4_Naive.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:55:21 2025\n", + "Read list of 701 SNPs from cts_ldscores_CD4_Naive.6.bim\n", + "Read 1 annotations for 701 SNPs from CD4_Naive_6.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.6.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.6.bed\n", + "After filtering, 701 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 701 SNPs to cts_ldscores_CD4_Naive.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1743 0.2152\n", + "std 0.1473 0.4682\n", + "min 0.0087 -0.0372\n", + "25% 0.0418 -0.0057\n", + "50% 0.1310 0.0086\n", + "75% 0.2880 0.0521\n", + "max 0.4995 2.9119\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0722\n", + "L2 0.0722 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 112\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 701.0000\n", + "mean 0.1598\n", + "std 0.3667\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:55:22 2025\n", + "Total time elapsed: 0.32s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.6.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.6.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.6.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.70s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.7 --annot CD8_Naive_7.annot.gz --out cts_ldscores_CD8_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.7 --annot CD8_Naive_7.annot.gz --out cts_ldscores_CD8_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.7 \\\n", + "--bfile cts_ldscores_CD8_Naive.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:55:44 2025\n", + "Read list of 611 SNPs from cts_ldscores_CD8_Naive.7.bim\n", + "Read 1 annotations for 611 SNPs from CD8_Naive_7.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.7.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.7.bed\n", + "After filtering, 611 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 611 SNPs to cts_ldscores_CD8_Naive.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1808 0.1731\n", + "std 0.1567 0.4343\n", + "min 0.0092 -0.0336\n", + "25% 0.0367 -0.0076\n", + "50% 0.1295 0.0034\n", + "75% 0.3084 0.0217\n", + "max 0.5000 2.9954\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0353\n", + "L2 0.0353 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 91\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 611.0000\n", + "mean 0.1489\n", + "std 0.3563\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:55:45 2025\n", + "Total time elapsed: 0.26s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.7.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.7.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.7.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.73s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.7 --annot CD4_Naive_7.annot.gz --out cts_ldscores_CD4_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.7 --annot CD4_Naive_7.annot.gz --out cts_ldscores_CD4_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.7 \\\n", + "--bfile cts_ldscores_CD4_Naive.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:56:08 2025\n", + "Read list of 611 SNPs from cts_ldscores_CD4_Naive.7.bim\n", + "Read 1 annotations for 611 SNPs from CD4_Naive_7.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.7.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.7.bed\n", + "After filtering, 611 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 611 SNPs to cts_ldscores_CD4_Naive.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1808 0.1483\n", + "std 0.1567 0.4024\n", + "min 0.0092 -0.0304\n", + "25% 0.0367 -0.0061\n", + "50% 0.1295 0.0039\n", + "75% 0.3084 0.0166\n", + "max 0.5000 2.9600\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0053\n", + "L2 -0.0053 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 78\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 611.0000\n", + "mean 0.1277\n", + "std 0.3340\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:56:09 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.7.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.7.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.7.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.23s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.8 --annot CD8_Naive_8.annot.gz --out cts_ldscores_CD8_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.8 --annot CD8_Naive_8.annot.gz --out cts_ldscores_CD8_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.8 \\\n", + "--bfile cts_ldscores_CD8_Naive.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:56:27 2025\n", + "Read list of 562 SNPs from cts_ldscores_CD8_Naive.8.bim\n", + "Read 1 annotations for 562 SNPs from CD8_Naive_8.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.8.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.8.bed\n", + "After filtering, 562 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 562 SNPs to cts_ldscores_CD8_Naive.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1774 0.1353\n", + "std 0.1475 0.3897\n", + "min 0.0087 -0.0349\n", + "25% 0.0449 -0.0058\n", + "50% 0.1376 0.0034\n", + "75% 0.3012 0.0151\n", + "max 0.4995 2.9989\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0589\n", + "L2 0.0589 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 65\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 562.0000\n", + "mean 0.1157\n", + "std 0.3201\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:56:27 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.8.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.8.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.8.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.14s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.8 --annot CD4_Naive_8.annot.gz --out cts_ldscores_CD4_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.8 --annot CD4_Naive_8.annot.gz --out cts_ldscores_CD4_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.8 \\\n", + "--bfile cts_ldscores_CD4_Naive.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:56:46 2025\n", + "Read list of 562 SNPs from cts_ldscores_CD4_Naive.8.bim\n", + "Read 1 annotations for 562 SNPs from CD4_Naive_8.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.8.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.8.bed\n", + "After filtering, 562 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 562 SNPs to cts_ldscores_CD4_Naive.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1774 0.1623\n", + "std 0.1475 0.4241\n", + "min 0.0087 -0.0339\n", + "25% 0.0449 -0.0055\n", + "50% 0.1376 0.0049\n", + "75% 0.3012 0.0210\n", + "max 0.4995 2.9978\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.081\n", + "L2 0.081 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 75\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 562.0000\n", + "mean 0.1335\n", + "std 0.3404\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:56:46 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.8.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.8.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.8.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.85s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.9 --annot CD8_Naive_9.annot.gz --out cts_ldscores_CD8_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.9 --annot CD8_Naive_9.annot.gz --out cts_ldscores_CD8_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.9 \\\n", + "--bfile cts_ldscores_CD8_Naive.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:57:09 2025\n", + "Read list of 440 SNPs from cts_ldscores_CD8_Naive.9.bim\n", + "Read 1 annotations for 440 SNPs from CD8_Naive_9.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.9.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.9.bed\n", + "After filtering, 440 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 440 SNPs to cts_ldscores_CD8_Naive.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1794 0.1553\n", + "std 0.1510 0.3640\n", + "min 0.0092 -0.0335\n", + "25% 0.0401 -0.0058\n", + "50% 0.1412 0.0041\n", + "75% 0.2987 0.0192\n", + "max 0.4985 1.2104\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0037\n", + "L2 0.0037 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 65\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 440.0000\n", + "mean 0.1477\n", + "std 0.3552\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:57:09 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.9.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.9.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.9.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.04s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.9 --annot CD4_Naive_9.annot.gz --out cts_ldscores_CD4_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.9 --annot CD4_Naive_9.annot.gz --out cts_ldscores_CD4_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.9 \\\n", + "--bfile cts_ldscores_CD4_Naive.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:57:33 2025\n", + "Read list of 440 SNPs from cts_ldscores_CD4_Naive.9.bim\n", + "Read 1 annotations for 440 SNPs from CD4_Naive_9.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.9.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.9.bed\n", + "After filtering, 440 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 440 SNPs to cts_ldscores_CD4_Naive.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1794 0.1710\n", + "std 0.1510 0.4031\n", + "min 0.0092 -0.0251\n", + "25% 0.0401 -0.0055\n", + "50% 0.1412 0.0049\n", + "75% 0.2987 0.0183\n", + "max 0.4985 2.0366\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0575\n", + "L2 0.0575 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 67\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 440.0000\n", + "mean 0.1523\n", + "std 0.3597\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:57:33 2025\n", + "Total time elapsed: 0.2s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.9.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.9.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.9.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.40s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.10 --annot CD8_Naive_10.annot.gz --out cts_ldscores_CD8_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.10 --annot CD8_Naive_10.annot.gz --out cts_ldscores_CD8_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.10 \\\n", + "--bfile cts_ldscores_CD8_Naive.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:57:51 2025\n", + "Read list of 523 SNPs from cts_ldscores_CD8_Naive.10.bim\n", + "Read 1 annotations for 523 SNPs from CD8_Naive_10.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.10.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.10.bed\n", + "After filtering, 523 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 523 SNPs to cts_ldscores_CD8_Naive.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1713 0.1529\n", + "std 0.1466 0.4107\n", + "min 0.0082 -0.0333\n", + "25% 0.0403 -0.0082\n", + "50% 0.1300 0.0027\n", + "75% 0.2752 0.0176\n", + "max 0.4959 2.8415\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.009\n", + "L2 -0.009 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 69\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 523.0000\n", + "mean 0.1319\n", + "std 0.3387\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:57:52 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.10.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.10.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.10.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.32s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.10 --annot CD4_Naive_10.annot.gz --out cts_ldscores_CD4_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.10 --annot CD4_Naive_10.annot.gz --out cts_ldscores_CD4_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.10 \\\n", + "--bfile cts_ldscores_CD4_Naive.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:58:09 2025\n", + "Read list of 523 SNPs from cts_ldscores_CD4_Naive.10.bim\n", + "Read 1 annotations for 523 SNPs from CD4_Naive_10.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.10.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.10.bed\n", + "After filtering, 523 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 523 SNPs to cts_ldscores_CD4_Naive.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1713 0.1588\n", + "std 0.1466 0.4224\n", + "min 0.0082 -0.0287\n", + "25% 0.0403 -0.0065\n", + "50% 0.1300 0.0032\n", + "75% 0.2752 0.0173\n", + "max 0.4959 2.8300\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0075\n", + "L2 -0.0075 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 70\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 523.0000\n", + "mean 0.1338\n", + "std 0.3408\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:58:09 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.10.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.10.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.10.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.13s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.11 --annot CD8_Naive_11.annot.gz --out cts_ldscores_CD8_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.11 --annot CD8_Naive_11.annot.gz --out cts_ldscores_CD8_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.11 \\\n", + "--bfile cts_ldscores_CD8_Naive.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:58:32 2025\n", + "Read list of 506 SNPs from cts_ldscores_CD8_Naive.11.bim\n", + "Read 1 annotations for 506 SNPs from CD8_Naive_11.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.11.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.11.bed\n", + "After filtering, 506 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 506 SNPs to cts_ldscores_CD8_Naive.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1762 0.2665\n", + "std 0.1520 0.4811\n", + "min 0.0102 -0.0352\n", + "25% 0.0413 -0.0063\n", + "50% 0.1208 0.0099\n", + "75% 0.2985 0.1766\n", + "max 0.4995 2.5324\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0443\n", + "L2 0.0443 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 108\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 506.0000\n", + "mean 0.2134\n", + "std 0.4101\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:58:32 2025\n", + "Total time elapsed: 0.23s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.11.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.11.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.11.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.10s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.11 --annot CD4_Naive_11.annot.gz --out cts_ldscores_CD4_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.11 --annot CD4_Naive_11.annot.gz --out cts_ldscores_CD4_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.11 \\\n", + "--bfile cts_ldscores_CD4_Naive.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:58:56 2025\n", + "Read list of 506 SNPs from cts_ldscores_CD4_Naive.11.bim\n", + "Read 1 annotations for 506 SNPs from CD4_Naive_11.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.11.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.11.bed\n", + "After filtering, 506 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 506 SNPs to cts_ldscores_CD4_Naive.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1762 0.2682\n", + "std 0.1520 0.4870\n", + "min 0.0102 -0.0380\n", + "25% 0.0413 -0.0056\n", + "50% 0.1208 0.0112\n", + "75% 0.2985 0.1107\n", + "max 0.4995 2.5043\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0298\n", + "L2 0.0298 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 108\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 506.0000\n", + "mean 0.2134\n", + "std 0.4101\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:58:56 2025\n", + "Total time elapsed: 0.28s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.11.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.11.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.11.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.67s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.12 --annot CD8_Naive_12.annot.gz --out cts_ldscores_CD8_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.12 --annot CD8_Naive_12.annot.gz --out cts_ldscores_CD8_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.12 \\\n", + "--bfile cts_ldscores_CD8_Naive.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:59:16 2025\n", + "Read list of 507 SNPs from cts_ldscores_CD8_Naive.12.bim\n", + "Read 1 annotations for 507 SNPs from CD8_Naive_12.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.12.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.12.bed\n", + "After filtering, 507 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 507 SNPs to cts_ldscores_CD8_Naive.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1721 0.2046\n", + "std 0.1498 0.4351\n", + "min 0.0092 -0.0357\n", + "25% 0.0372 -0.0054\n", + "50% 0.1254 0.0068\n", + "75% 0.2918 0.0314\n", + "max 0.5000 2.4147\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0307\n", + "L2 -0.0307 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 92\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 507.0000\n", + "mean 0.1815\n", + "std 0.3858\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:59:16 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.12.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.12.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.12.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.93s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.12 --annot CD4_Naive_12.annot.gz --out cts_ldscores_CD4_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.12 --annot CD4_Naive_12.annot.gz --out cts_ldscores_CD4_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.12 \\\n", + "--bfile cts_ldscores_CD4_Naive.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:59:35 2025\n", + "Read list of 507 SNPs from cts_ldscores_CD4_Naive.12.bim\n", + "Read 1 annotations for 507 SNPs from CD4_Naive_12.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.12.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.12.bed\n", + "After filtering, 507 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 507 SNPs to cts_ldscores_CD4_Naive.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1721 0.2027\n", + "std 0.1498 0.4416\n", + "min 0.0092 -0.0303\n", + "25% 0.0372 -0.0056\n", + "50% 0.1254 0.0065\n", + "75% 0.2918 0.0288\n", + "max 0.5000 2.4133\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0081\n", + "L2 -0.0081 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 90\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 507.0000\n", + "mean 0.1775\n", + "std 0.3825\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:59:35 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.12.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.12.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.12.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.49s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.13 --annot CD8_Naive_13.annot.gz --out cts_ldscores_CD8_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.13 --annot CD8_Naive_13.annot.gz --out cts_ldscores_CD8_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.13 \\\n", + "--bfile cts_ldscores_CD8_Naive.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:59:59 2025\n", + "Read list of 383 SNPs from cts_ldscores_CD8_Naive.13.bim\n", + "Read 1 annotations for 383 SNPs from CD8_Naive_13.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.13.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.13.bed\n", + "After filtering, 383 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 383 SNPs to cts_ldscores_CD8_Naive.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1783 0.1073\n", + "std 0.1509 0.3126\n", + "min 0.0097 -0.0222\n", + "25% 0.0413 -0.0052\n", + "50% 0.1356 0.0010\n", + "75% 0.2854 0.0093\n", + "max 0.4959 1.2337\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0339\n", + "L2 -0.0339 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 39\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 383.0000\n", + "mean 0.1018\n", + "std 0.3028\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:59:59 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.13.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.13.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.13.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.48s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.13 --annot CD4_Naive_13.annot.gz --out cts_ldscores_CD4_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.13 --annot CD4_Naive_13.annot.gz --out cts_ldscores_CD4_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.13 \\\n", + "--bfile cts_ldscores_CD4_Naive.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:00:23 2025\n", + "Read list of 383 SNPs from cts_ldscores_CD4_Naive.13.bim\n", + "Read 1 annotations for 383 SNPs from CD4_Naive_13.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.13.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.13.bed\n", + "After filtering, 383 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 383 SNPs to cts_ldscores_CD4_Naive.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1783 0.1057\n", + "std 0.1509 0.3105\n", + "min 0.0097 -0.0210\n", + "25% 0.0413 -0.0052\n", + "50% 0.1356 0.0023\n", + "75% 0.2854 0.0110\n", + "max 0.4959 1.3078\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0205\n", + "L2 0.0205 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 38\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 383.0000\n", + "mean 0.0992\n", + "std 0.2993\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:00:24 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.13.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.13.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.13.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.56s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.14 --annot CD8_Naive_14.annot.gz --out cts_ldscores_CD8_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.14 --annot CD8_Naive_14.annot.gz --out cts_ldscores_CD8_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.14 \\\n", + "--bfile cts_ldscores_CD8_Naive.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:00:43 2025\n", + "Read list of 339 SNPs from cts_ldscores_CD8_Naive.14.bim\n", + "Read 1 annotations for 339 SNPs from CD8_Naive_14.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.14.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.14.bed\n", + "After filtering, 339 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 339 SNPs to cts_ldscores_CD8_Naive.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1641 0.1102\n", + "std 0.1444 0.3275\n", + "min 0.0087 -0.0172\n", + "25% 0.0370 -0.0059\n", + "50% 0.1096 0.0007\n", + "75% 0.2808 0.0105\n", + "max 0.4929 1.9204\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0839\n", + "L2 -0.0839 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 34\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 339.0000\n", + "mean 0.1003\n", + "std 0.3008\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:00:43 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.14.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.14.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.14.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.73s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.14 --annot CD4_Naive_14.annot.gz --out cts_ldscores_CD4_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.14 --annot CD4_Naive_14.annot.gz --out cts_ldscores_CD4_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.14 \\\n", + "--bfile cts_ldscores_CD4_Naive.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:01:00 2025\n", + "Read list of 339 SNPs from cts_ldscores_CD4_Naive.14.bim\n", + "Read 1 annotations for 339 SNPs from CD4_Naive_14.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.14.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.14.bed\n", + "After filtering, 339 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 339 SNPs to cts_ldscores_CD4_Naive.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1641 0.0935\n", + "std 0.1444 0.3057\n", + "min 0.0087 -0.0180\n", + "25% 0.0370 -0.0045\n", + "50% 0.1096 0.0010\n", + "75% 0.2808 0.0092\n", + "max 0.4929 1.9183\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0672\n", + "L2 -0.0672 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 28\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 339.0000\n", + "mean 0.0826\n", + "std 0.2757\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:01:01 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.14.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.14.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.14.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.30s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.15 --annot CD8_Naive_15.annot.gz --out cts_ldscores_CD8_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.15 --annot CD8_Naive_15.annot.gz --out cts_ldscores_CD8_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.15 \\\n", + "--bfile cts_ldscores_CD8_Naive.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:01:24 2025\n", + "Read list of 290 SNPs from cts_ldscores_CD8_Naive.15.bim\n", + "Read 1 annotations for 290 SNPs from CD8_Naive_15.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.15.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.15.bed\n", + "After filtering, 290 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 290 SNPs to cts_ldscores_CD8_Naive.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1834 0.1941\n", + "std 0.1504 0.4318\n", + "min 0.0092 -0.0209\n", + "25% 0.0477 -0.0045\n", + "50% 0.1430 0.0036\n", + "75% 0.3072 0.0215\n", + "max 0.4964 1.9951\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0693\n", + "L2 -0.0693 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 49\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 290.0000\n", + "mean 0.1690\n", + "std 0.3754\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:01:24 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.15.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.15.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.15.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.90s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.15 --annot CD4_Naive_15.annot.gz --out cts_ldscores_CD4_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.15 --annot CD4_Naive_15.annot.gz --out cts_ldscores_CD4_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.15 \\\n", + "--bfile cts_ldscores_CD4_Naive.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:01:49 2025\n", + "Read list of 290 SNPs from cts_ldscores_CD4_Naive.15.bim\n", + "Read 1 annotations for 290 SNPs from CD4_Naive_15.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.15.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.15.bed\n", + "After filtering, 290 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 290 SNPs to cts_ldscores_CD4_Naive.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1834 0.2427\n", + "std 0.1504 0.4641\n", + "min 0.0092 -0.0211\n", + "25% 0.0477 -0.0041\n", + "50% 0.1430 0.0055\n", + "75% 0.3072 0.0437\n", + "max 0.4964 1.9847\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0453\n", + "L2 -0.0453 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 62\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 290.0000\n", + "mean 0.2138\n", + "std 0.4107\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:01:49 2025\n", + "Total time elapsed: 0.2s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.15.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.15.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.15.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.38s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.16 --annot CD8_Naive_16.annot.gz --out cts_ldscores_CD8_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.16 --annot CD8_Naive_16.annot.gz --out cts_ldscores_CD8_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.16 \\\n", + "--bfile cts_ldscores_CD8_Naive.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:02:07 2025\n", + "Read list of 312 SNPs from cts_ldscores_CD8_Naive.16.bim\n", + "Read 1 annotations for 312 SNPs from CD8_Naive_16.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.16.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.16.bed\n", + "After filtering, 312 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 312 SNPs to cts_ldscores_CD8_Naive.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1761 0.1561\n", + "std 0.1502 0.3667\n", + "min 0.0097 -0.0232\n", + "25% 0.0391 -0.0038\n", + "50% 0.1381 0.0028\n", + "75% 0.2792 0.0163\n", + "max 0.4990 1.4741\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0282\n", + "L2 -0.0282 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 45\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 312.0000\n", + "mean 0.1442\n", + "std 0.3519\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:02:08 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.16.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.16.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.16.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.83s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.16 --annot CD4_Naive_16.annot.gz --out cts_ldscores_CD4_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.16 --annot CD4_Naive_16.annot.gz --out cts_ldscores_CD4_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.16 \\\n", + "--bfile cts_ldscores_CD4_Naive.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:02:26 2025\n", + "Read list of 312 SNPs from cts_ldscores_CD4_Naive.16.bim\n", + "Read 1 annotations for 312 SNPs from CD4_Naive_16.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.16.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.16.bed\n", + "After filtering, 312 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 312 SNPs to cts_ldscores_CD4_Naive.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1761 0.1616\n", + "std 0.1502 0.3680\n", + "min 0.0097 -0.0214\n", + "25% 0.0391 -0.0037\n", + "50% 0.1381 0.0043\n", + "75% 0.2792 0.0144\n", + "max 0.4990 1.1301\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.1229\n", + "L2 -0.1229 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 47\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 312.0000\n", + "mean 0.1506\n", + "std 0.3583\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:02:26 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.16.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.16.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.16.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.84s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.17 --annot CD8_Naive_17.annot.gz --out cts_ldscores_CD8_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.17 --annot CD8_Naive_17.annot.gz --out cts_ldscores_CD8_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.17 \\\n", + "--bfile cts_ldscores_CD8_Naive.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:02:44 2025\n", + "Read list of 272 SNPs from cts_ldscores_CD8_Naive.17.bim\n", + "Read 1 annotations for 272 SNPs from CD8_Naive_17.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.17.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.17.bed\n", + "After filtering, 272 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 272 SNPs to cts_ldscores_CD8_Naive.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1944 0.2967\n", + "std 0.1557 0.4779\n", + "min 0.0082 -0.0303\n", + "25% 0.0401 -0.0016\n", + "50% 0.1656 0.0116\n", + "75% 0.3336 0.9886\n", + "max 0.4954 1.7618\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0708\n", + "L2 0.0708 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 72\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 272.0000\n", + "mean 0.2647\n", + "std 0.4420\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 1.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:02:44 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.17.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.17.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.17.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.19s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.17 --annot CD4_Naive_17.annot.gz --out cts_ldscores_CD4_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.17 --annot CD4_Naive_17.annot.gz --out cts_ldscores_CD4_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.17 \\\n", + "--bfile cts_ldscores_CD4_Naive.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:03 2025\n", + "Read list of 272 SNPs from cts_ldscores_CD4_Naive.17.bim\n", + "Read 1 annotations for 272 SNPs from CD4_Naive_17.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.17.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.17.bed\n", + "After filtering, 272 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 272 SNPs to cts_ldscores_CD4_Naive.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1944 0.2763\n", + "std 0.1557 0.5245\n", + "min 0.0082 -0.0315\n", + "25% 0.0401 -0.0046\n", + "50% 0.1656 0.0067\n", + "75% 0.3336 0.2849\n", + "max 0.4954 2.8720\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0414\n", + "L2 0.0414 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 60\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 272.0000\n", + "mean 0.2206\n", + "std 0.4154\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:03 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.17.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.17.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.17.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.43s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.18 --annot CD8_Naive_18.annot.gz --out cts_ldscores_CD8_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.18 --annot CD8_Naive_18.annot.gz --out cts_ldscores_CD8_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.18 \\\n", + "--bfile cts_ldscores_CD8_Naive.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:20 2025\n", + "Read list of 294 SNPs from cts_ldscores_CD8_Naive.18.bim\n", + "Read 1 annotations for 294 SNPs from CD8_Naive_18.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.18.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.18.bed\n", + "After filtering, 294 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 294 SNPs to cts_ldscores_CD8_Naive.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1912 0.0927\n", + "std 0.1518 0.3541\n", + "min 0.0097 -0.0158\n", + "25% 0.0515 -0.0052\n", + "50% 0.1590 -0.0011\n", + "75% 0.3068 0.0060\n", + "max 0.4939 2.2866\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0585\n", + "L2 0.0585 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 21\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 294.0000\n", + "mean 0.0714\n", + "std 0.2580\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:21 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.18.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.18.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.18.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.64s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.18 --annot CD4_Naive_18.annot.gz --out cts_ldscores_CD4_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.18 --annot CD4_Naive_18.annot.gz --out cts_ldscores_CD4_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.18 \\\n", + "--bfile cts_ldscores_CD4_Naive.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:38 2025\n", + "Read list of 294 SNPs from cts_ldscores_CD4_Naive.18.bim\n", + "Read 1 annotations for 294 SNPs from CD4_Naive_18.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.18.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.18.bed\n", + "After filtering, 294 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 294 SNPs to cts_ldscores_CD4_Naive.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1912 0.0869\n", + "std 0.1518 0.3529\n", + "min 0.0097 -0.0148\n", + "25% 0.0515 -0.0060\n", + "50% 0.1590 -0.0006\n", + "75% 0.3068 0.0051\n", + "max 0.4939 2.2790\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1123\n", + "L2 0.1123 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 19\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 294.0000\n", + "mean 0.0646\n", + "std 0.2463\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:38 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.18.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.18.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.18.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:07<00:00, 7.12s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.19 --annot CD8_Naive_19.annot.gz --out cts_ldscores_CD8_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.19 --annot CD8_Naive_19.annot.gz --out cts_ldscores_CD8_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.19 \\\n", + "--bfile cts_ldscores_CD8_Naive.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:58 2025\n", + "Read list of 239 SNPs from cts_ldscores_CD8_Naive.19.bim\n", + "Read 1 annotations for 239 SNPs from CD8_Naive_19.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.19.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.19.bed\n", + "After filtering, 239 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 239 SNPs to cts_ldscores_CD8_Naive.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1813 0.4960\n", + "std 0.1483 0.5393\n", + "min 0.0102 -0.0273\n", + "25% 0.0418 0.0026\n", + "50% 0.1448 0.0425\n", + "75% 0.3007 1.0072\n", + "max 0.4969 2.0049\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0458\n", + "L2 0.0458 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 108\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 239.0000\n", + "mean 0.4519\n", + "std 0.4987\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 1.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:58 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.19.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.19.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.19.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:09<00:00, 9.06s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.19 --annot CD4_Naive_19.annot.gz --out cts_ldscores_CD4_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.19 --annot CD4_Naive_19.annot.gz --out cts_ldscores_CD4_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.19 \\\n", + "--bfile cts_ldscores_CD4_Naive.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:04:20 2025\n", + "Read list of 239 SNPs from cts_ldscores_CD4_Naive.19.bim\n", + "Read 1 annotations for 239 SNPs from CD4_Naive_19.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.19.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.19.bed\n", + "After filtering, 239 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 239 SNPs to cts_ldscores_CD4_Naive.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1813 0.3652\n", + "std 0.1483 0.5128\n", + "min 0.0102 -0.0269\n", + "25% 0.0418 -0.0020\n", + "50% 0.1448 0.0140\n", + "75% 0.3007 0.9950\n", + "max 0.4969 2.0049\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.085\n", + "L2 0.085 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 79\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 239.0000\n", + "mean 0.3305\n", + "std 0.4714\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 1.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:04:20 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.19.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.19.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.19.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:07<00:00, 7.31s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.20 --annot CD8_Naive_20.annot.gz --out cts_ldscores_CD8_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.20 --annot CD8_Naive_20.annot.gz --out cts_ldscores_CD8_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.20 \\\n", + "--bfile cts_ldscores_CD8_Naive.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:04:56 2025\n", + "Read list of 227 SNPs from cts_ldscores_CD8_Naive.20.bim\n", + "Read 1 annotations for 227 SNPs from CD8_Naive_20.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.20.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.20.bed\n", + "After filtering, 227 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 227 SNPs to cts_ldscores_CD8_Naive.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1749 0.0991\n", + "std 0.1404 0.3103\n", + "min 0.0102 -0.0135\n", + "25% 0.0538 -0.0043\n", + "50% 0.1386 -0.0004\n", + "75% 0.2796 0.0066\n", + "max 0.4837 1.4853\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0314\n", + "L2 -0.0314 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 21\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 227.0000\n", + "mean 0.0925\n", + "std 0.2904\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:04:56 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.20.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.20.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.20.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.35s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.20 --annot CD4_Naive_20.annot.gz --out cts_ldscores_CD4_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.20 --annot CD4_Naive_20.annot.gz --out cts_ldscores_CD4_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.20 \\\n", + "--bfile cts_ldscores_CD4_Naive.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:05:35 2025\n", + "Read list of 227 SNPs from cts_ldscores_CD4_Naive.20.bim\n", + "Read 1 annotations for 227 SNPs from CD4_Naive_20.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.20.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.20.bed\n", + "After filtering, 227 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 227 SNPs to cts_ldscores_CD4_Naive.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1749 0.1306\n", + "std 0.1404 0.3364\n", + "min 0.0102 -0.0149\n", + "25% 0.0538 -0.0047\n", + "50% 0.1386 0.0015\n", + "75% 0.2796 0.0112\n", + "max 0.4837 1.0987\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.1106\n", + "L2 -0.1106 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 28\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 227.0000\n", + "mean 0.1233\n", + "std 0.3296\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:05:35 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.20.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.20.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.20.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.64s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.21 --annot CD8_Naive_21.annot.gz --out cts_ldscores_CD8_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.21 --annot CD8_Naive_21.annot.gz --out cts_ldscores_CD8_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.21 \\\n", + "--bfile cts_ldscores_CD8_Naive.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:06:04 2025\n", + "Read list of 135 SNPs from cts_ldscores_CD8_Naive.21.bim\n", + "Read 1 annotations for 135 SNPs from CD8_Naive_21.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.21.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.21.bed\n", + "After filtering, 135 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 135 SNPs to cts_ldscores_CD8_Naive.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1925 0.1890\n", + "std 0.1475 0.3923\n", + "min 0.0087 -0.0117\n", + "25% 0.0581 -0.0030\n", + "50% 0.1590 0.0026\n", + "75% 0.3129 0.0169\n", + "max 0.4893 1.2037\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0929\n", + "L2 -0.0929 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 24\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 135.0000\n", + "mean 0.1778\n", + "std 0.3837\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:06:04 2025\n", + "Total time elapsed: 0.21s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.21.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.21.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.21.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.82s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.21 --annot CD4_Naive_21.annot.gz --out cts_ldscores_CD4_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.21 --annot CD4_Naive_21.annot.gz --out cts_ldscores_CD4_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.21 \\\n", + "--bfile cts_ldscores_CD4_Naive.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:06:26 2025\n", + "Read list of 135 SNPs from cts_ldscores_CD4_Naive.21.bim\n", + "Read 1 annotations for 135 SNPs from CD4_Naive_21.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.21.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.21.bed\n", + "After filtering, 135 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 135 SNPs to cts_ldscores_CD4_Naive.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1925 0.1647\n", + "std 0.1475 0.3729\n", + "min 0.0087 -0.0129\n", + "25% 0.0581 -0.0026\n", + "50% 0.1590 0.0018\n", + "75% 0.3129 0.0085\n", + "max 0.4893 1.2044\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0325\n", + "L2 -0.0325 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 21\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 135.0000\n", + "mean 0.1556\n", + "std 0.3638\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:06:26 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.21.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.21.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.21.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.24s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.22 --annot CD8_Naive_22.annot.gz --out cts_ldscores_CD8_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.22 --annot CD8_Naive_22.annot.gz --out cts_ldscores_CD8_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.22 \\\n", + "--bfile cts_ldscores_CD8_Naive.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:06:48 2025\n", + "Read list of 136 SNPs from cts_ldscores_CD8_Naive.22.bim\n", + "Read 1 annotations for 136 SNPs from CD8_Naive_22.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.22.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.22.bed\n", + "After filtering, 136 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 136 SNPs to cts_ldscores_CD8_Naive.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1617 0.2025\n", + "std 0.1358 0.4513\n", + "min 0.0102 -0.0143\n", + "25% 0.0405 -0.0031\n", + "50% 0.1241 0.0030\n", + "75% 0.2683 0.0165\n", + "max 0.4944 2.1050\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0641\n", + "L2 -0.0641 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 24\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 136.0000\n", + "mean 0.1765\n", + "std 0.3826\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:06:48 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.22.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.22.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.22.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.40s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.22 --annot CD4_Naive_22.annot.gz --out cts_ldscores_CD4_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.22 --annot CD4_Naive_22.annot.gz --out cts_ldscores_CD4_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.22 \\\n", + "--bfile cts_ldscores_CD4_Naive.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:07:10 2025\n", + "Read list of 136 SNPs from cts_ldscores_CD4_Naive.22.bim\n", + "Read 1 annotations for 136 SNPs from CD4_Naive_22.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.22.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.22.bed\n", + "After filtering, 136 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 136 SNPs to cts_ldscores_CD4_Naive.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1617 0.2171\n", + "std 0.1358 0.4606\n", + "min 0.0102 -0.0137\n", + "25% 0.0405 -0.0025\n", + "50% 0.1241 0.0033\n", + "75% 0.2683 0.0197\n", + "max 0.4944 2.1059\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.017\n", + "L2 -0.017 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 26\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 136.0000\n", + "mean 0.1912\n", + "std 0.3947\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:07:11 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.22.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.22.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.22.bed\n" + ] + } + ], + "source": [ + "for chrom in range(1, 23):\n", + " for cell_type in [\"CD8 Naive\", \"CD4 Naive\"]:\n", + " print(f\"Processing cell type: {cell_type}\")\n", + "\n", + " dd_chrom = dd.sel(G_var=dd.G.var.chrom == str(chrom), C_var=dd.C.var.chrom == str(chrom)).copy()\n", + " results = compute_ld_scores_with_annotations_from_donor_data(\n", + " dd=dd_chrom,\n", + " annot_file=f\"{cell_type.replace(' ', '_')}_{chrom}.annot.gz\",\n", + " out_prefix=f\"cts_ldscores_{cell_type.replace(' ', '_')}.{chrom}\",\n", + " run=True,\n", + " runner=runner,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 6: Prepare Reference LD Scores and Weights\n", + "For the final analysis, we need baseline LD scores and regression weights. These control for genomic confounders and ensure proper statistical inference. We download these from the 1000 Genomes reference panel." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:/Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/1000G_Phase3_baselineLD_v2.2_ldscores.tgz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/1000G_Phase3_weights_hm3_no_MHC.tgz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n" + ] + } + ], + "source": [ + "ldscores_path, ldscores_prefix = get_1000genomes_ld_scores(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")\n", + "ldweights_path, ldweights_prefix = get_1000genomes_ld_weights(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a control file listing all cell-type-specific LD score prefixes:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"celltype_ldscores.txt\", \"w\") as f:\n", + " f.write(\"CD8_Naive\\tcts_ldscores_CD8_Naive.\\n\") # ,{os.path.join(ldscores_path, ldscores_prefix)}\n", + " f.write(\"CD4_Naive\\tcts_ldscores_CD4_Naive.\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need baseline LD scores. Here we compute them from our donor data (in a real analysis, you would use pre-computed baseline LD scores):" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.06s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.1 --l2 --out BaselineLD.1 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.1 --l2 --out BaselineLD.1 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.1 \\\n", + "--bfile BaselineLD.1 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:09:35 2025\n", + "Read list of 796 SNPs from BaselineLD.1.bim\n", + "Read list of 981 individuals from BaselineLD.1.fam\n", + "Reading genotypes from BaselineLD.1.bed\n", + "After filtering, 796 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 796 SNPs to BaselineLD.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1798 1.1546\n", + "std 0.1526 0.2973\n", + "min 0.0092 0.8847\n", + "25% 0.0432 1.0024\n", + "50% 0.1300 1.0518\n", + "75% 0.3086 1.1259\n", + "max 0.4995 2.8918\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.129\n", + "L2 0.129 1.000\n", + "Analysis finished at Thu Nov 6 21:09:35 2025\n", + "Total time elapsed: 0.31s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.1.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.1.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.1.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:08<00:00, 8.55s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.2 --l2 --out BaselineLD.2 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.2 --l2 --out BaselineLD.2 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.2 \\\n", + "--bfile BaselineLD.2 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:10:01 2025\n", + "Read list of 864 SNPs from BaselineLD.2.bim\n", + "Read list of 981 individuals from BaselineLD.2.fam\n", + "Reading genotypes from BaselineLD.2.bed\n", + "After filtering, 864 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 864 SNPs to BaselineLD.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1623 1.1604\n", + "std 0.1408 0.3164\n", + "min 0.0087 0.8871\n", + "25% 0.0381 1.0030\n", + "50% 0.1131 1.0526\n", + "75% 0.2638 1.1510\n", + "max 0.5000 2.9248\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1575\n", + "L2 0.1575 1.0000\n", + "Analysis finished at Thu Nov 6 21:10:02 2025\n", + "Total time elapsed: 0.34s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.2.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.2.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.2.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:15<00:00, 15.01s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.3 --l2 --out BaselineLD.3 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.3 --l2 --out BaselineLD.3 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.3 \\\n", + "--bfile BaselineLD.3 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:10:33 2025\n", + "Read list of 737 SNPs from BaselineLD.3.bim\n", + "Read list of 981 individuals from BaselineLD.3.fam\n", + "Reading genotypes from BaselineLD.3.bed\n", + "After filtering, 737 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 737 SNPs to BaselineLD.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1702 1.1388\n", + "std 0.1499 0.2501\n", + "min 0.0082 0.8972\n", + "25% 0.0392 1.0100\n", + "50% 0.1142 1.0548\n", + "75% 0.2920 1.1376\n", + "max 0.4995 2.6729\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.2076\n", + "L2 0.2076 1.0000\n", + "Analysis finished at Thu Nov 6 21:10:33 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.3.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.3.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.3.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.59s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.4 --l2 --out BaselineLD.4 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.4 --l2 --out BaselineLD.4 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.4 \\\n", + "--bfile BaselineLD.4 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:11:02 2025\n", + "Read list of 754 SNPs from BaselineLD.4.bim\n", + "Read list of 981 individuals from BaselineLD.4.fam\n", + "Reading genotypes from BaselineLD.4.bed\n", + "After filtering, 754 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 754 SNPs to BaselineLD.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1720 1.1587\n", + "std 0.1460 0.3023\n", + "min 0.0087 0.8834\n", + "25% 0.0394 1.0077\n", + "50% 0.1295 1.0548\n", + "75% 0.2783 1.1547\n", + "max 0.5000 3.5811\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0516\n", + "L2 0.0516 1.0000\n", + "Analysis finished at Thu Nov 6 21:11:02 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.4.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.4.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.4.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.21s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.5 --l2 --out BaselineLD.5 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.5 --l2 --out BaselineLD.5 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.5 \\\n", + "--bfile BaselineLD.5 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:11:21 2025\n", + "Read list of 671 SNPs from BaselineLD.5.bim\n", + "Read list of 981 individuals from BaselineLD.5.fam\n", + "Reading genotypes from BaselineLD.5.bed\n", + "After filtering, 671 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 671 SNPs to BaselineLD.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1760 1.1557\n", + "std 0.1514 0.3052\n", + "min 0.0087 0.8851\n", + "25% 0.0395 1.0035\n", + "50% 0.1239 1.0486\n", + "75% 0.3017 1.1545\n", + "max 0.4980 3.3168\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.268\n", + "L2 0.268 1.000\n", + "Analysis finished at Thu Nov 6 21:11:22 2025\n", + "Total time elapsed: 0.27s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.5.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.5.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.5.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.67s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.6 --l2 --out BaselineLD.6 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.6 --l2 --out BaselineLD.6 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.6 \\\n", + "--bfile BaselineLD.6 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:11:48 2025\n", + "Read list of 701 SNPs from BaselineLD.6.bim\n", + "Read list of 981 individuals from BaselineLD.6.fam\n", + "Reading genotypes from BaselineLD.6.bed\n", + "After filtering, 701 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 701 SNPs to BaselineLD.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1743 1.2584\n", + "std 0.1473 0.4762\n", + "min 0.0087 0.9079\n", + "25% 0.0418 1.0161\n", + "50% 0.1310 1.0778\n", + "75% 0.2880 1.2383\n", + "max 0.4995 4.5224\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1685\n", + "L2 0.1685 1.0000\n", + "Analysis finished at Thu Nov 6 21:11:48 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.6.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.6.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.6.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.33s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.7 --l2 --out BaselineLD.7 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.7 --l2 --out BaselineLD.7 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.7 \\\n", + "--bfile BaselineLD.7 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:12:14 2025\n", + "Read list of 611 SNPs from BaselineLD.7.bim\n", + "Read list of 981 individuals from BaselineLD.7.fam\n", + "Reading genotypes from BaselineLD.7.bed\n", + "After filtering, 611 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 611 SNPs to BaselineLD.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1808 1.1485\n", + "std 0.1567 0.3127\n", + "min 0.0092 0.9191\n", + "25% 0.0367 1.0038\n", + "50% 0.1295 1.0474\n", + "75% 0.3084 1.1255\n", + "max 0.5000 3.0536\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1759\n", + "L2 0.1759 1.0000\n", + "Analysis finished at Thu Nov 6 21:12:14 2025\n", + "Total time elapsed: 0.27s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.7.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.7.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.7.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.25s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.8 --l2 --out BaselineLD.8 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.8 --l2 --out BaselineLD.8 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.8 \\\n", + "--bfile BaselineLD.8 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:12:35 2025\n", + "Read list of 562 SNPs from BaselineLD.8.bim\n", + "Read list of 981 individuals from BaselineLD.8.fam\n", + "Reading genotypes from BaselineLD.8.bed\n", + "After filtering, 562 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 562 SNPs to BaselineLD.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1774 1.1633\n", + "std 0.1475 0.3349\n", + "min 0.0087 0.9183\n", + "25% 0.0449 1.0160\n", + "50% 0.1376 1.0559\n", + "75% 0.3012 1.1600\n", + "max 0.4995 3.2220\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.177\n", + "L2 0.177 1.000\n", + "Analysis finished at Thu Nov 6 21:12:35 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.8.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.8.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.8.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.52s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.9 --l2 --out BaselineLD.9 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.9 --l2 --out BaselineLD.9 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.9 \\\n", + "--bfile BaselineLD.9 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:13:01 2025\n", + "Read list of 440 SNPs from BaselineLD.9.bim\n", + "Read list of 981 individuals from BaselineLD.9.fam\n", + "Reading genotypes from BaselineLD.9.bed\n", + "After filtering, 440 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 440 SNPs to BaselineLD.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1794 1.1344\n", + "std 0.1510 0.2982\n", + "min 0.0092 0.9294\n", + "25% 0.0401 1.0108\n", + "50% 0.1412 1.0428\n", + "75% 0.2987 1.1075\n", + "max 0.4985 3.0546\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.2587\n", + "L2 0.2587 1.0000\n", + "Analysis finished at Thu Nov 6 21:13:01 2025\n", + "Total time elapsed: 0.23s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.9.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.9.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.9.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.87s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.10 --l2 --out BaselineLD.10 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.10 --l2 --out BaselineLD.10 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.10 \\\n", + "--bfile BaselineLD.10 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:13:21 2025\n", + "Read list of 523 SNPs from BaselineLD.10.bim\n", + "Read list of 981 individuals from BaselineLD.10.fam\n", + "Reading genotypes from BaselineLD.10.bed\n", + "After filtering, 523 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 523 SNPs to BaselineLD.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1713 1.1418\n", + "std 0.1466 0.2758\n", + "min 0.0082 0.9121\n", + "25% 0.0403 1.0123\n", + "50% 0.1300 1.0485\n", + "75% 0.2752 1.1195\n", + "max 0.4959 2.8504\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1955\n", + "L2 0.1955 1.0000\n", + "Analysis finished at Thu Nov 6 21:13:21 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.10.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.10.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.10.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.12s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.11 --l2 --out BaselineLD.11 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.11 --l2 --out BaselineLD.11 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.11 \\\n", + "--bfile BaselineLD.11 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:13:46 2025\n", + "Read list of 506 SNPs from BaselineLD.11.bim\n", + "Read list of 981 individuals from BaselineLD.11.fam\n", + "Reading genotypes from BaselineLD.11.bed\n", + "After filtering, 506 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 506 SNPs to BaselineLD.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1762 1.2376\n", + "std 0.1520 0.5903\n", + "min 0.0102 0.9036\n", + "25% 0.0413 1.0065\n", + "50% 0.1208 1.0547\n", + "75% 0.2985 1.1684\n", + "max 0.4995 5.9738\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0226\n", + "L2 0.0226 1.0000\n", + "Analysis finished at Thu Nov 6 21:13:46 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.11.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.11.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.11.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.03s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.12 --l2 --out BaselineLD.12 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.12 --l2 --out BaselineLD.12 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.12 \\\n", + "--bfile BaselineLD.12 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:14:05 2025\n", + "Read list of 507 SNPs from BaselineLD.12.bim\n", + "Read list of 981 individuals from BaselineLD.12.fam\n", + "Reading genotypes from BaselineLD.12.bed\n", + "After filtering, 507 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 507 SNPs to BaselineLD.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1721 1.1261\n", + "std 0.1498 0.2455\n", + "min 0.0092 0.9320\n", + "25% 0.0372 1.0033\n", + "50% 0.1254 1.0379\n", + "75% 0.2918 1.1104\n", + "max 0.5000 2.4573\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1584\n", + "L2 0.1584 1.0000\n", + "Analysis finished at Thu Nov 6 21:14:05 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.12.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.12.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.12.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.40s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.13 --l2 --out BaselineLD.13 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.13 --l2 --out BaselineLD.13 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.13 \\\n", + "--bfile BaselineLD.13 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:14:29 2025\n", + "Read list of 383 SNPs from BaselineLD.13.bim\n", + "Read list of 981 individuals from BaselineLD.13.fam\n", + "Reading genotypes from BaselineLD.13.bed\n", + "After filtering, 383 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 383 SNPs to BaselineLD.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1783 1.1527\n", + "std 0.1509 0.2924\n", + "min 0.0097 0.9433\n", + "25% 0.0413 1.0108\n", + "50% 0.1356 1.0419\n", + "75% 0.2854 1.1182\n", + "max 0.4959 2.4980\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1471\n", + "L2 0.1471 1.0000\n", + "Analysis finished at Thu Nov 6 21:14:29 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.13.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.13.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.13.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.37s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.14 --l2 --out BaselineLD.14 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.14 --l2 --out BaselineLD.14 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.14 \\\n", + "--bfile BaselineLD.14 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:14:48 2025\n", + "Read list of 339 SNPs from BaselineLD.14.bim\n", + "Read list of 981 individuals from BaselineLD.14.fam\n", + "Reading genotypes from BaselineLD.14.bed\n", + "After filtering, 339 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 339 SNPs to BaselineLD.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1641 1.1955\n", + "std 0.1444 0.3391\n", + "min 0.0087 0.9398\n", + "25% 0.0370 1.0106\n", + "50% 0.1096 1.0488\n", + "75% 0.2808 1.1684\n", + "max 0.4929 2.3924\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1236\n", + "L2 0.1236 1.0000\n", + "Analysis finished at Thu Nov 6 21:14:48 2025\n", + "Total time elapsed: 0.17s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.14.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.14.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.14.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.09s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.15 --l2 --out BaselineLD.15 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.15 --l2 --out BaselineLD.15 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.15 \\\n", + "--bfile BaselineLD.15 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:15:13 2025\n", + "Read list of 290 SNPs from BaselineLD.15.bim\n", + "Read list of 981 individuals from BaselineLD.15.fam\n", + "Reading genotypes from BaselineLD.15.bed\n", + "After filtering, 290 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 290 SNPs to BaselineLD.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1834 1.1574\n", + "std 0.1504 0.3490\n", + "min 0.0092 0.9385\n", + "25% 0.0477 1.0066\n", + "50% 0.1430 1.0358\n", + "75% 0.3072 1.1152\n", + "max 0.4964 2.9996\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1579\n", + "L2 0.1579 1.0000\n", + "Analysis finished at Thu Nov 6 21:15:13 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.15.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.15.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.15.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.77s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.16 --l2 --out BaselineLD.16 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.16 --l2 --out BaselineLD.16 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.16 \\\n", + "--bfile BaselineLD.16 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:15:31 2025\n", + "Read list of 312 SNPs from BaselineLD.16.bim\n", + "Read list of 981 individuals from BaselineLD.16.fam\n", + "Reading genotypes from BaselineLD.16.bed\n", + "After filtering, 312 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 312 SNPs to BaselineLD.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1761 1.1088\n", + "std 0.1502 0.2288\n", + "min 0.0097 0.9313\n", + "25% 0.0391 1.0001\n", + "50% 0.1381 1.0333\n", + "75% 0.2792 1.0959\n", + "max 0.4990 2.3852\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0326\n", + "L2 0.0326 1.0000\n", + "Analysis finished at Thu Nov 6 21:15:31 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.16.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.16.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.16.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.92s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.17 --l2 --out BaselineLD.17 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.17 --l2 --out BaselineLD.17 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.17 \\\n", + "--bfile BaselineLD.17 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:15:50 2025\n", + "Read list of 272 SNPs from BaselineLD.17.bim\n", + "Read list of 981 individuals from BaselineLD.17.fam\n", + "Reading genotypes from BaselineLD.17.bed\n", + "After filtering, 272 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 272 SNPs to BaselineLD.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1944 1.1639\n", + "std 0.1557 0.3838\n", + "min 0.0082 0.9350\n", + "25% 0.0401 1.0009\n", + "50% 0.1656 1.0279\n", + "75% 0.3336 1.0893\n", + "max 0.4954 3.4348\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.074\n", + "L2 0.074 1.000\n", + "Analysis finished at Thu Nov 6 21:15:50 2025\n", + "Total time elapsed: 0.17s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.17.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.17.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.17.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.57s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.18 --l2 --out BaselineLD.18 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.18 --l2 --out BaselineLD.18 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.18 \\\n", + "--bfile BaselineLD.18 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:16:08 2025\n", + "Read list of 294 SNPs from BaselineLD.18.bim\n", + "Read list of 981 individuals from BaselineLD.18.fam\n", + "Reading genotypes from BaselineLD.18.bed\n", + "After filtering, 294 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 294 SNPs to BaselineLD.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1912 1.1399\n", + "std 0.1518 0.2752\n", + "min 0.0097 0.9468\n", + "25% 0.0515 1.0082\n", + "50% 0.1590 1.0393\n", + "75% 0.3068 1.1086\n", + "max 0.4939 2.6077\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1146\n", + "L2 0.1146 1.0000\n", + "Analysis finished at Thu Nov 6 21:16:08 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.18.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.18.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.18.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:07<00:00, 7.76s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.19 --l2 --out BaselineLD.19 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.19 --l2 --out BaselineLD.19 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.19 \\\n", + "--bfile BaselineLD.19 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:16:29 2025\n", + "Read list of 239 SNPs from BaselineLD.19.bim\n", + "Read list of 981 individuals from BaselineLD.19.fam\n", + "Reading genotypes from BaselineLD.19.bed\n", + "After filtering, 239 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 239 SNPs to BaselineLD.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1813 1.1145\n", + "std 0.1483 0.2572\n", + "min 0.0102 0.9521\n", + "25% 0.0418 1.0033\n", + "50% 0.1448 1.0256\n", + "75% 0.3007 1.0950\n", + "max 0.4969 2.5550\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1708\n", + "L2 0.1708 1.0000\n", + "Analysis finished at Thu Nov 6 21:16:29 2025\n", + "Total time elapsed: 0.16s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.19.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.19.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.19.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.31s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.20 --l2 --out BaselineLD.20 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.20 --l2 --out BaselineLD.20 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.20 \\\n", + "--bfile BaselineLD.20 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:16:48 2025\n", + "Read list of 227 SNPs from BaselineLD.20.bim\n", + "Read list of 981 individuals from BaselineLD.20.fam\n", + "Reading genotypes from BaselineLD.20.bed\n", + "After filtering, 227 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 227 SNPs to BaselineLD.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1749 1.0926\n", + "std 0.1404 0.2111\n", + "min 0.0102 0.9478\n", + "25% 0.0538 0.9939\n", + "50% 0.1386 1.0248\n", + "75% 0.2796 1.0713\n", + "max 0.4837 2.2165\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.2494\n", + "L2 0.2494 1.0000\n", + "Analysis finished at Thu Nov 6 21:16:48 2025\n", + "Total time elapsed: 0.15s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.20.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.20.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.20.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.27s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.21 --l2 --out BaselineLD.21 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.21 --l2 --out BaselineLD.21 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.21 \\\n", + "--bfile BaselineLD.21 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:17:04 2025\n", + "Read list of 135 SNPs from BaselineLD.21.bim\n", + "Read list of 981 individuals from BaselineLD.21.fam\n", + "Reading genotypes from BaselineLD.21.bed\n", + "After filtering, 135 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 135 SNPs to BaselineLD.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1925 1.1085\n", + "std 0.1475 0.2201\n", + "min 0.0087 0.9721\n", + "25% 0.0581 1.0043\n", + "50% 0.1590 1.0215\n", + "75% 0.3129 1.0810\n", + "max 0.4893 2.1313\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.301\n", + "L2 0.301 1.000\n", + "Analysis finished at Thu Nov 6 21:17:04 2025\n", + "Total time elapsed: 0.16s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.21.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.21.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.21.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.15s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.22 --l2 --out BaselineLD.22 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.22 --l2 --out BaselineLD.22 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.22 \\\n", + "--bfile BaselineLD.22 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:17:19 2025\n", + "Read list of 136 SNPs from BaselineLD.22.bim\n", + "Read list of 981 individuals from BaselineLD.22.fam\n", + "Reading genotypes from BaselineLD.22.bed\n", + "After filtering, 136 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 136 SNPs to BaselineLD.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1617 1.0711\n", + "std 0.1358 0.1791\n", + "min 0.0102 0.9639\n", + "25% 0.0405 1.0011\n", + "50% 0.1241 1.0171\n", + "75% 0.2683 1.0539\n", + "max 0.4944 2.1255\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0206\n", + "L2 -0.0206 1.0000\n", + "Analysis finished at Thu Nov 6 21:17:19 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.22.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.22.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.22.bed\n" + ] + } + ], + "source": [ + "for chrom in range(1, 23):\n", + " dd_chrom = dd.sel(G_var=dd.G.var.chrom == str(chrom), C_var=dd.C.var.chrom == str(chrom)).copy()\n", + " result = estimate_ld_scores_from_donor_data(dd=dd_chrom, out_prefix=f\"BaselineLD.{chrom}\", run=True, runner=runner)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 7: Prepare GWAS Summary Statistics\n", + "For this demonstration, we generate fake GWAS summary statistics. In a real analysis, you would use actual GWAS summary statistics from published studies." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated 9239 SNPs (8239 real, 1000 fake) -> fake_munged.sumstats.gz\n" + ] + }, + { + "data": { + "text/plain": [ + "'fake_munged.sumstats.gz'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def generate_fake_sumstats(dd, output_file=\"fake_munged.sumstats.gz\", subset_frac=0.8, n_extra_snps=1000, seed=42):\n", + " np.random.seed(seed)\n", + "\n", + " n_real = int(len(dd.G.var) * subset_frac)\n", + " real_idx = np.random.choice(len(dd.G.var), n_real, replace=False)\n", + "\n", + " snp_ids = dd.G.var.index[real_idx].tolist()\n", + " allele_0 = dd.G.var[\"a0\"].iloc[real_idx].tolist()\n", + " allele_1 = dd.G.var[\"a1\"].iloc[real_idx].tolist()\n", + "\n", + " for i in range(n_extra_snps):\n", + " chrom = np.random.randint(1, 23)\n", + " pos = np.random.randint(1000000, 50000000)\n", + " ref = np.random.choice([\"A\", \"C\", \"G\", \"T\"])\n", + " alt = np.random.choice([a for a in [\"A\", \"C\", \"G\", \"T\"] if a != ref])\n", + " snp_ids.append(f\"{chrom}_{pos}_{ref}_{alt}\")\n", + " allele_0.append(ref)\n", + " allele_1.append(alt)\n", + "\n", + " z_scores = np.random.randn(len(snp_ids))\n", + " large_effect_idx = np.random.choice(len(snp_ids), int(len(snp_ids) * 0.01), replace=False)\n", + " z_scores[large_effect_idx] = np.random.randn(len(large_effect_idx)) * 3\n", + "\n", + " fake_sumstats = pd.DataFrame({\"SNP\": snp_ids, \"A1\": allele_1, \"A2\": allele_0, \"Z\": z_scores, \"N\": 336924.0})\n", + "\n", + " fake_sumstats.to_csv(output_file, sep=\"\\t\", index=False, compression=\"gzip\", float_format=\"%.3f\")\n", + " print(f\"Generated {len(fake_sumstats)} SNPs ({n_real} real, {n_extra_snps} fake) -> {output_file}\")\n", + " return output_file\n", + "\n", + "\n", + "generate_fake_sumstats(dd, subset_frac=0.8, n_extra_snps=1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 8: Run Cell-Type-Specific Heritability Analysis\n", + "Finally, we run the cell-type-specific heritability analysis, which tests whether each cell type shows enrichment for trait heritability." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Running cell-type-specific heritability analysis: /ldsc/ldsc.py --h2-cts fake_munged.sumstats.gz --ref-ld-chr BaselineLD. --w-ld-chr BaselineLD. --ref-ld-chr-cts celltype_ldscores.txt --out CHD_CD8_Naive_h2\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --h2-cts fake_munged.sumstats.gz --ref-ld-chr BaselineLD. --w-ld-chr BaselineLD. --ref-ld-chr-cts celltype_ldscores.txt --out CHD_CD8_Naive_h2\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2-cts fake_munged.sumstats.gz \\\n", + "--ref-ld-chr BaselineLD. \\\n", + "--out CHD_CD8_Naive_h2 \\\n", + "--ref-ld-chr-cts celltype_ldscores.txt \\\n", + "--w-ld-chr BaselineLD. \n", + "\n", + "Beginning analysis at Thu Nov 6 22:23:46 2025\n", + "Reading summary statistics from fake_munged.sumstats.gz ...\n", + "Read summary statistics for 9239 SNPs.\n", + "Reading reference panel LD Score from BaselineLD.[1-22] ...\n", + "Read reference panel LD Scores for 10299 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from BaselineLD.[1-22] ...\n", + "Read regression weight LD Scores for 10299 SNPs.\n", + "After merging with reference panel LD, 8239 SNPs remain.\n", + "After merging with regression SNP LD, 8239 SNPs remain.\n", + "WARNING: number of SNPs less than 200k; this is almost always bad.\n", + "Removed 0 SNPs with chi^2 > 336.924 (8239 SNPs remain)\n", + "Reading cts reference panel LD Score from cts_ldscores_CD8_Naive.[1-22] ...\n", + "Performing regression.\n", + "Reading cts reference panel LD Score from cts_ldscores_CD4_Naive.[1-22] ...\n", + "Performing regression.\n", + "Results printed to CHD_CD8_Naive_h2.cell_type_results.txt\n", + "Analysis finished at Thu Nov 6 22:23:47 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "result = estimate_celltype_specific_heritability(\n", + " sumstats_file=\"fake_munged.sumstats.gz\",\n", + " ref_ld_chr=\"BaselineLD.\",\n", + " w_ld_chr=\"BaselineLD.\",\n", + " ref_ld_chr_cts=\"celltype_ldscores.txt\",\n", + " out_prefix=\"CHD_CD8_Naive_h2\",\n", + " run=True,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Standard LDSC Analyses\n", + "Beyond cell-type-specific analysis, LDSC can also be used for standard heritability estimation and genetic correlation analysis. Here we demonstrate these capabilities.\n", + "#### SNP Heritability Estimation\n", + "First, we download real GWAS summary statistics from the GWAS Catalog and prepare them for LDSC analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "runner = configure_ldsc_runner(config_path=\"../../src/cellink/tl/external/config/ldsc_docker.yaml\")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Fetching https://www.ebi.ac.uk/gwas/rest/api/v2/studies/GCST004787\n", + "INFO:root:Found harmonised file: 28714975-GCST004787-EFO_0001645.h.tsv.gz\n", + "INFO:root:Downloading http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST004001-GCST005000/GCST004787/harmonised/28714975-GCST004787-EFO_0001645.h.tsv.gz to /Users/larnoldt/cellink_data/GCST004787_summary_stats.tsv.gz\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/Users/larnoldt/cellink_data/GCST004787_summary_stats.tsv.gz')" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gwas_summary_statistic_path_1 = get_gwas_catalog_study_summary_stats(\"GCST004787\", return_path=True)\n", + "gwas_summary_statistic_path_1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "LDSC requires \"munged\" (cleaned and standardized) summary statistics. The `munge_sumstats` function performs quality control, standardizes column names, and prepares the data:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Running munge_sumstats: /ldsc/munge_sumstats.py --sumstats /Users/larnoldt/cellink_data/GCST004787_summary_stats.tsv.gz --out GCST004787_summary_stats_munged --N 336924 --signed-sumstats logor,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/munge_sumstats.py --sumstats /cellink_data/GCST004787_summary_stats.tsv.gz --out GCST004787_summary_stats_munged --N 336924 --signed-sumstats logor,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./munge_sumstats.py \\\n", + "--signed-sumstats logor,0 \\\n", + "--out GCST004787_summary_stats_munged \\\n", + "--N 336924.0 \\\n", + "--a1 effect_allele \\\n", + "--a2 other_allele \\\n", + "--snp variant_id \\\n", + "--sumstats /cellink_data/GCST004787_summary_stats.tsv.gz \\\n", + "--p p_value \n", + "\n", + "Interpreting column names as follows:\n", + "effect_allele:\tAllele 1, interpreted as ref allele for signed sumstat.\n", + "logor:\tDirectional summary statistic as specified by --signed-sumstats.\n", + "other_allele:\tAllele 2, interpreted as non-ref allele for signed sumstat.\n", + "variant_id:\tVariant ID (e.g., rs number)\n", + "p_value:\tp-Value\n", + "\n", + "Reading sumstats from /cellink_data/GCST004787_summary_stats.tsv.gz into memory 5000000 SNPs at a time.\n", + ".. done\n", + "Read 9020474 SNPs from --sumstats file.\n", + "Removed 0 SNPs with missing values.\n", + "Removed 0 SNPs with INFO <= 0.9.\n", + "Removed 0 SNPs with MAF <= 0.01.\n", + "Removed 0 SNPs with out-of-bounds p-values.\n", + "Removed 1855539 variants that were not SNPs or were strand-ambiguous.\n", + "7164935 SNPs remain.\n", + "Removed 9 SNPs with duplicated rs numbers (7164926 SNPs remain).\n", + "Using N = 336924.0\n", + "Median value of SIGNED_SUMSTATS was 0.00966, which seems sensible.\n", + "Writing summary statistics for 7164926 SNPs (7164926 with nonmissing beta) to GCST004787_summary_stats_munged.sumstats.gz.\n", + "\n", + "Metadata:\n", + "Mean chi^2 = 1.078\n", + "Lambda GC = 0.999\n", + "Max chi^2 = 458.046\n", + "2243 Genome-wide significant SNPs (some may have been removed by filtering).\n", + "\n", + "Conversion finished at Thu Nov 6 10:22:30 2025\n", + "Total time elapsed: 1.0m:24.42s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "munged_file_1 = munge_sumstats(\n", + " sumstats_file=gwas_summary_statistic_path_1,\n", + " out_prefix=str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_munged\"),\n", + " info_min=0.9,\n", + " maf_min=0.01,\n", + " signed_sumstats=(\"logor\", 0),\n", + " run=True,\n", + " p_col=\"p_value\",\n", + " snp_col=\"variant_id\",\n", + " a1_col=\"effect_allele\",\n", + " a2_col=\"other_allele\",\n", + " n_samples=336924,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download reference LD scores and weights for heritability estimation:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:/Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/1000G_Phase3_baselineLD_v2.2_ldscores.tgz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Downloading https://zenodo.org/records/10515792/files/1000G_Phase3_weights_hm3_no_MHC.tgz?download=1 to /Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/1000G_Phase3_weights_hm3_no_MHC.tgz\n" + ] + } + ], + "source": [ + "ldscores_path, ldscores_prefix = get_1000genomes_ld_scores(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")\n", + "ldweights_path, ldweights_prefix = get_1000genomes_ld_weights(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now estimate SNP heritability:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Estimating heritability: /ldsc/ldsc.py --h2 GCST004787_summary_stats_munged.sumstats.gz --ref-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out GCST004787_summary_stats_h2\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --h2 GCST004787_summary_stats_munged.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out GCST004787_summary_stats_h2\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 GCST004787_summary_stats_munged.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. \\\n", + "--out GCST004787_summary_stats_h2 \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \n", + "\n", + "Beginning analysis at Thu Nov 6 10:37:03 2025\n", + "Reading summary statistics from GCST004787_summary_stats_munged.sumstats.gz ...\n", + "Read summary statistics for 7164926 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177210 SNPs remain.\n", + "After merging with regression SNP LD, 1174301 SNPs remain.\n", + "Removed 3 SNPs with chi^2 > 336.924 (1174298 SNPs remain)\n", + "Total Observed scale h2: 0.0482 (0.0031)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale h2: -1.2111e-02 9.6569e-04 -1.5190e-03 -1.4271e-04 -1.4653e-04 6.8730e-05 1.8687e-03 -9.9421e-03 1.8025e-02 1.1469e-02 -1.0165e-03 -9.5619e-03 1.6673e-04 1.0070e-03 2.8008e-03 2.5649e-04 3.5439e-04 -1.1089e-03 4.3438e-03 4.2590e-03 -3.0551e-03 -5.3830e-04 -7.5871e-04 -1.1994e-03 -7.7712e-04 -2.9209e-03 -1.9523e-03 1.0687e-02 2.2717e-03 1.2578e-02 5.5138e-03 -2.1547e-03 -1.8587e-03 -2.6358e-03 -8.1469e-04 -1.7456e-03 -2.3468e-03 -1.4169e-03 -1.2684e-03 2.7806e-03 -3.6505e-04 -2.0104e-03 -4.8563e-03 1.8942e-04 2.2270e-03 -3.0406e-03 -1.3599e-03 8.8194e-04 3.1442e-03 -1.1567e-04 -4.2058e-04 4.6045e-03 -7.1973e-03 -1.5086e-02 1.2136e-03 8.1949e-04 3.0214e-03 4.8475e-03 3.3215e-03 2.1972e-03 5.2089e-03 3.9394e-03 4.3883e-03 6.0610e-03 4.0806e-03 -8.3636e-09 -1.8387e-05 -5.2094e-03 -6.6823e-03 -3.7387e-04 -6.2807e-03 1.1181e-16 5.0034e-03 3.4716e-03 -7.7899e-04 -5.3968e-04\n", + " 8.2235e-04 3.9258e-05 -2.0397e-03 -7.0521e-03 5.7726e-03 1.7156e-02 2.6581e-03 -8.3309e-04 4.1682e-03 -3.2282e-03 1.1065e-03 -1.0149e-03 -3.9125e-03 -5.0393e-05 1.9047e-03 2.8446e-03 4.1525e-03 -2.0550e-04 2.5138e-03 2.6276e-04 4.5676e-04\n", + "Observed scale h2 SE: 3.6876e-02 2.8241e-03 3.4639e-03 4.1116e-03 8.9579e-03 4.0255e-03 4.9871e-03 7.6983e-03 1.0069e-02 1.4079e-02 2.4109e-02 1.2775e-02 1.1922e-03 1.9142e-03 3.3326e-03 3.4785e-03 8.1126e-03 7.6878e-03 5.5114e-03 2.1808e-03 6.6970e-03 3.7155e-03 9.1362e-03 1.0647e-02 5.1892e-03 5.8420e-03 7.1083e-03 4.7557e-03 5.6549e-03 5.1946e-03 4.8613e-03 2.5779e-03 1.9454e-03 1.4419e-03 2.2687e-03 3.2906e-03 1.8913e-03 2.0055e-02 6.1729e-03 2.3233e-03 1.0197e-03 6.0144e-03 8.4002e-03 1.1188e-02 1.0522e-02 2.0156e-03 2.3500e-03 1.8394e-03 3.3389e-03 7.7766e-04 1.4525e-03 2.5864e-03 4.0733e-03 1.1147e-02 1.8261e-03 9.4725e-04 1.2719e-03 1.2614e-03 1.0969e-03 8.3697e-04 1.0659e-03 1.2968e-03 1.0163e-03 1.5718e-03 2.0401e-03 1.8306e-08 9.6177e-06 2.8681e-03 7.6236e-03 3.0607e-03 8.4407e-03 1.0669e-16 1.8470e-03 2.3519e-03 2.1682e-03 3.0374e-03\n", + " 1.3747e-03 8.5399e-04 3.5229e-03 1.8007e-02 5.5563e-03 1.4238e-02 3.2248e-03 5.6425e-03 2.2698e-03 2.4325e-03 1.9759e-03 9.2022e-04 2.3846e-03 1.4766e-03 1.5829e-03 2.2442e-03 1.6180e-03 1.7903e-03 1.3109e-03 6.2655e-04 4.3958e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of h2g: -2.5111e-01 2.0022e-02 -3.1493e-02 -2.9588e-03 -3.0380e-03 1.4250e-03 3.8744e-02 -2.0613e-01 3.7372e-01 2.3779e-01 -2.1075e-02 -1.9825e-01 3.4570e-03 2.0879e-02 5.8070e-02 5.3178e-03 7.3477e-03 -2.2992e-02 9.0061e-02 8.8303e-02 -6.3341e-02 -1.1161e-02 -1.5730e-02 -2.4867e-02 -1.6112e-02 -6.0560e-02 -4.0478e-02 2.2157e-01 4.7100e-02 2.6079e-01 1.1432e-01 -4.4673e-02 -3.8536e-02 -5.4650e-02 -1.6891e-02 -3.6192e-02 -4.8657e-02 -2.9378e-02 -2.6298e-02 5.7650e-02 -7.5687e-03 -4.1682e-02 -1.0069e-01 3.9273e-03 4.6174e-02 -6.3042e-02 -2.8196e-02 1.8285e-02 6.5190e-02 -2.3983e-03 -8.7199e-03 9.5466e-02 -1.4922e-01 -3.1278e-01 2.5162e-02 1.6991e-02 6.2644e-02 1.0050e-01 6.8866e-02 4.5555e-02 1.0800e-01 8.1676e-02 9.0984e-02 1.2566e-01 8.4605e-02 -1.7341e-07 -3.8123e-04 -1.0801e-01 -1.3855e-01 -7.7515e-03 -1.3022e-01 2.3181e-15 1.0374e-01 7.1978e-02 -1.6151e-02 -1.1189e-02\n", + " 1.7050e-02 8.1395e-04 -4.2290e-02 -1.4621e-01 1.1968e-01 3.5570e-01 5.5111e-02 -1.7273e-02 8.6421e-02 -6.6931e-02 2.2941e-02 -2.1042e-02 -8.1119e-02 -1.0448e-03 3.9491e-02 5.8977e-02 8.6096e-02 -4.2608e-03 5.2118e-02 5.4478e-03 9.4701e-03\n", + "Enrichment: -4.5303e+00 2.5332e+01 -1.1510e+01 -2.1637e+00 -1.7939e-01 1.0795e+00 1.4878e+01 -2.7473e+01 1.6736e+01 3.9015e+01 -2.2940e+00 -1.0836e+01 1.4458e+01 2.5609e+01 2.4990e+01 2.0037e+00 1.5841e+00 -2.0790e+00 4.1764e+00 5.0711e+01 -4.2557e+00 -3.0223e+00 -1.6806e+00 -1.0595e+00 -1.5950e+00 -2.6411e+01 -5.4955e+00 3.2725e+01 2.2252e+01 3.7549e+01 1.9742e+01 -2.0802e+00 -7.4435e+01 -1.1912e+02 -1.2245e+01 -1.4092e+01 -8.2893e+01 -1.1522e+00 -1.8359e+00 6.2203e+00 -4.3029e+01 -5.7417e+00 -8.6471e+00 2.0526e-01 1.9991e+00 -6.3872e+01 -3.0771e+01 2.9539e+01 7.6995e+01 -7.9242e+00 -7.3557e+00 8.2385e+01 -3.9697e+01 -3.2332e+00 5.5701e+01 2.9947e+00 1.1318e+01 1.8204e+01 1.2338e+01 8.3594e+00 1.9554e+01 1.4775e+01 1.6377e+01 2.2425e+01 1.5502e+01 -9.2218e-01 -2.4595e+00 -1.2552e+00 -5.4235e-01 -7.8690e-01 -2.3372e+02 -1.7808e+00 1.8099e+02 7.8485e+01 -2.1790e+01 -6.3619e+00\n", + " 9.8590e+01 5.4079e+00 -2.5919e+01 -6.9871e+00 1.0071e+02 2.0194e+01 5.1599e+01 -1.9905e+00 1.1507e+02 -6.8492e+01 2.7199e+01 -1.0578e+02 -4.4045e+01 -1.9239e+00 1.7083e+02 1.9999e+02 3.0230e+02 -8.5157e+00 1.4186e+01 3.9367e+01 2.9607e+02\n", + "Coefficients: -2.0317e-09 1.1361e-08 -5.1620e-09 -9.7038e-10 -8.0450e-11 4.8414e-10 6.6726e-09 -1.2321e-08 7.5057e-09 1.7497e-08 -1.0288e-09 -4.8598e-09 6.4839e-09 1.1485e-08 1.1207e-08 8.9859e-10 7.1042e-10 -9.3238e-10 1.8730e-09 2.2742e-08 -1.9086e-09 -1.3554e-09 -7.5369e-10 -4.7514e-10 -7.1532e-10 -1.1845e-08 -2.4646e-09 1.4676e-08 9.9796e-09 1.6840e-08 8.8538e-09 -9.3290e-10 -3.3382e-08 -5.3422e-08 -5.4914e-09 -6.3198e-09 -3.7175e-08 -5.1671e-10 -8.2333e-10 2.7896e-09 -1.9297e-08 -2.5750e-09 -3.8780e-09 9.2052e-11 8.9653e-10 -2.8645e-08 -1.3800e-08 1.3247e-08 3.4530e-08 -3.5538e-09 -3.2988e-09 3.6947e-08 -1.7803e-08 -1.4500e-09 2.4980e-08 1.3431e-09 5.0759e-09 8.1639e-09 5.5335e-09 3.7490e-09 8.7694e-09 6.6261e-09 7.3447e-09 1.0057e-08 6.9523e-09 -4.1357e-10 -1.1030e-09 -5.6292e-10 -2.4323e-10 -3.5290e-10 -1.0482e-07 -7.9863e-10 8.1170e-08 3.5198e-08 -9.7721e-09 -2.8531e-09\n", + " 4.4215e-08 2.4253e-09 -1.1624e-08 -3.1335e-09 4.5168e-08 9.0565e-09 2.3141e-08 -8.9270e-10 5.1606e-08 -3.0717e-08 1.2198e-08 -4.7438e-08 -1.9753e-08 -8.6281e-10 7.6613e-08 8.9689e-08 1.3558e-07 -3.8191e-09 6.3620e-09 1.7655e-08 1.3278e-07\n", + "Coefficient SE: 6.1861e-09 3.3224e-08 1.1772e-08 2.7958e-08 4.9183e-09 2.8356e-08 1.7807e-08 9.5404e-09 4.1928e-09 2.1478e-08 2.4402e-08 6.4928e-09 4.6362e-08 2.1831e-08 1.3335e-08 1.2187e-08 1.6263e-08 6.4638e-09 2.3765e-09 1.1645e-08 4.1838e-09 9.3553e-09 9.0757e-09 4.2179e-09 4.7765e-09 2.3690e-08 8.9733e-09 6.5309e-09 2.4842e-08 6.9545e-09 7.8060e-09 1.1161e-09 3.4939e-08 2.9223e-08 1.5292e-08 1.1913e-08 2.9960e-08 7.3135e-09 4.0069e-09 2.3308e-09 5.3903e-08 7.7035e-09 6.7080e-09 5.4371e-09 4.2360e-09 1.8988e-08 2.3846e-08 2.7630e-08 3.6668e-08 2.3892e-08 1.1393e-08 2.0754e-08 1.0075e-08 1.0714e-09 3.7586e-08 1.5524e-09 2.1367e-09 2.1244e-09 1.8274e-09 1.4281e-09 1.7946e-09 2.1813e-09 1.7010e-09 2.6081e-09 3.4758e-09 9.0521e-10 5.7694e-10 3.0992e-10 2.7749e-10 2.8891e-09 1.4086e-07 7.6207e-10 2.9964e-08 2.3846e-08 2.7199e-08 1.6058e-08\n", + " 7.3915e-08 5.2758e-08 2.0077e-08 8.0013e-09 4.3475e-08 7.5160e-09 2.8074e-08 6.0463e-09 2.8102e-08 2.3146e-08 2.1782e-08 4.3013e-08 1.2039e-08 2.5282e-08 6.3669e-08 7.0758e-08 5.2827e-08 3.3272e-08 3.3177e-09 4.2098e-08 1.2779e-07\n", + "Lambda GC: 1.0466\n", + "Mean Chi^2: 1.139\n", + "Intercept: 0.8491 (0.0094)\n", + "Ratio < 0 (usually indicates GC correction).\n", + "Analysis finished at Thu Nov 6 10:37:31 2025\n", + "Total time elapsed: 27.21s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "h2_results = estimate_heritability(\n", + " sumstats_file=str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_munged.sumstats.gz\"),\n", + " ref_ld_chr=os.path.join(ldscores_path, ldscores_prefix),\n", + " w_ld_chr=os.path.join(ldweights_path, ldweights_prefix),\n", + " out_prefix=str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_h2\"),\n", + " run=True,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Genetic Correlation Analysis\n", + "Genetic correlation analysis quantifies the shared genetic basis between two traits. We download a second GWAS for coronary artery disease and compute the genetic correlation." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Fetching https://www.ebi.ac.uk/gwas/rest/api/v2/studies/GCST90043957\n", + "INFO:root:Found harmonised file: 34737426-GCST90043957-MONDO_0021661.h.tsv.gz\n", + "INFO:root:Downloading http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST90043001-GCST90044000/GCST90043957/harmonised/34737426-GCST90043957-MONDO_0021661.h.tsv.gz to /Users/larnoldt/cellink_data/GCST90043957_summary_stats.tsv.gz\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/Users/larnoldt/cellink_data/GCST90043957_summary_stats.tsv.gz')" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gwas_summary_statistic_path_2 = get_gwas_catalog_study_summary_stats(\"GCST90043957\", return_path=True)\n", + "gwas_summary_statistic_path_2" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Running munge_sumstats: /ldsc/munge_sumstats.py --sumstats /Users/larnoldt/cellink_data/GCST90043957_summary_stats.tsv.gz --out GCST90043957_summary_stats_munged --N 456348 --signed-sumstats beta,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/munge_sumstats.py --sumstats /cellink_data/GCST90043957_summary_stats.tsv.gz --out GCST90043957_summary_stats_munged --N 456348 --signed-sumstats beta,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./munge_sumstats.py \\\n", + "--signed-sumstats beta,0 \\\n", + "--out GCST90043957_summary_stats_munged \\\n", + "--N 456348.0 \\\n", + "--a1 effect_allele \\\n", + "--a2 other_allele \\\n", + "--snp variant_id \\\n", + "--sumstats /cellink_data/GCST90043957_summary_stats.tsv.gz \\\n", + "--p p_value \n", + "\n", + "Interpreting column names as follows:\n", + "p_value:\tp-Value\n", + "other_allele:\tAllele 2, interpreted as non-ref allele for signed sumstat.\n", + "n:\tSample size\n", + "beta:\tDirectional summary statistic as specified by --signed-sumstats.\n", + "variant_id:\tVariant ID (e.g., rs number)\n", + "effect_allele:\tAllele 1, interpreted as ref allele for signed sumstat.\n", + "\n", + "Reading sumstats from /cellink_data/GCST90043957_summary_stats.tsv.gz into memory 5000000 SNPs at a time.\n", + "... done\n", + "Read 11831294 SNPs from --sumstats file.\n", + "Removed 0 SNPs with missing values.\n", + "Removed 0 SNPs with INFO <= 0.9.\n", + "Removed 0 SNPs with MAF <= 0.01.\n", + "Removed 0 SNPs with out-of-bounds p-values.\n", + "Removed 1811396 variants that were not SNPs or were strand-ambiguous.\n", + "10019898 SNPs remain.\n", + "Removed 0 SNPs with duplicated rs numbers (10019898 SNPs remain).\n", + "Removed 0 SNPs with N < 304180.0 (10019898 SNPs remain).\n", + "Median value of SIGNED_SUMSTATS was -0.0001342485, which seems sensible.\n", + "Writing summary statistics for 10019898 SNPs (10019898 with nonmissing beta) to GCST90043957_summary_stats_munged.sumstats.gz.\n", + "\n", + "Metadata:\n", + "Mean chi^2 = 1.156\n", + "Lambda GC = 1.112\n", + "Max chi^2 = 378.956\n", + "2388 Genome-wide significant SNPs (some may have been removed by filtering).\n", + "\n", + "Conversion finished at Thu Nov 6 22:30:15 2025\n", + "Total time elapsed: 1.0m:54.97s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "munged_file_2 = munge_sumstats(\n", + " sumstats_file=gwas_summary_statistic_path_2,\n", + " out_prefix=str(Path(Path(gwas_summary_statistic_path_2).stem).stem + \"_munged\"),\n", + " info_min=0.9,\n", + " maf_min=0.01,\n", + " signed_sumstats=(\"beta\", 0),\n", + " run=True,\n", + " p_col=\"p_value\",\n", + " snp_col=\"variant_id\",\n", + " a1_col=\"effect_allele\",\n", + " a2_col=\"other_allele\",\n", + " n_samples=456348,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, estimate the genetic correlation between the two traits:" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Estimating genetic correlation: /ldsc/ldsc.py --rg GCST004787_summary_stats_munged.sumstats.gz,GCST90043957_summary_stats_munged.sumstats.gz --ref-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out CHD_rg\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --rg GCST004787_summary_stats_munged.sumstats.gz,GCST90043957_summary_stats_munged.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out CHD_rg\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. \\\n", + "--out CHD_rg \\\n", + "--rg GCST004787_summary_stats_munged.sumstats.gz,GCST90043957_summary_stats_munged.sumstats.gz \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \n", + "\n", + "Beginning analysis at Thu Nov 6 22:30:48 2025\n", + "Reading summary statistics from GCST004787_summary_stats_munged.sumstats.gz ...\n", + "Read summary statistics for 7164926 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177210 SNPs remain.\n", + "After merging with regression SNP LD, 1174301 SNPs remain.\n", + "Computing rg for phenotype 2/2\n", + "Reading summary statistics from GCST90043957_summary_stats_munged.sumstats.gz ...\n", + "Read summary statistics for 10019898 SNPs.\n", + "After merging with summary statistics, 1157270 SNPs remain.\n", + "1157270 SNPs with valid alleles.\n", + "\n", + "Heritability of phenotype 1\n", + "---------------------------\n", + "Total Observed scale h2: 0.0491 (0.0038)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale h2: -1.0015e-02 1.0538e-03 -2.4187e-03 -8.5473e-04 -2.3173e-03 -7.0296e-04 3.3579e-03 -1.5240e-02 1.6266e-02 1.1006e-02 2.9414e-03 -1.3212e-02 -9.9847e-05 1.4003e-03 3.1795e-03 -2.5359e-04 4.9086e-04 1.4679e-03 5.3681e-03 3.7131e-03 -4.7677e-03 7.1913e-04 2.2203e-03 -3.9735e-03 -4.0377e-04 -3.1958e-03 4.9703e-04 9.7101e-03 1.9432e-03 1.2466e-02 6.1202e-03 -2.6276e-03 -1.9326e-03 -3.1809e-03 -9.6151e-04 -6.3418e-04 -2.9363e-03 -4.9169e-03 -1.3964e-03 2.6407e-03 -4.3309e-04 -3.2521e-04 -8.7740e-03 -5.4192e-05 5.4624e-03 -3.0598e-03 -1.4553e-03 9.4411e-04 3.3972e-03 -1.8424e-04 -5.0624e-04 4.0449e-03 -5.9383e-03 -1.3807e-02 1.6324e-03 2.2984e-03 4.9572e-03 5.2428e-03 3.9244e-03 3.0600e-03 3.8396e-03 4.6460e-03 5.1612e-03 5.8238e-03 4.4137e-03 -1.7508e-08 -3.0546e-05 -6.6402e-03 -5.6393e-03 3.4633e-03 -7.1287e-03 1.6247e-17 4.7571e-03 3.7948e-03 -1.2112e-03 -6.3892e-05\n", + " 6.4765e-04 -2.5847e-05 -1.9629e-03 -1.8892e-03 4.7842e-03 1.2490e-02 2.5051e-03 -1.0641e-03 3.6368e-03 -3.1866e-03 1.7460e-03 -1.0900e-03 -4.7720e-03 -3.4389e-04 1.1120e-03 2.4022e-03 4.8712e-03 -2.9330e-04 2.5735e-03 5.4408e-04 3.0727e-04\n", + "Observed scale h2 SE: 3.3113e-02 3.0412e-03 3.7903e-03 4.0891e-03 9.6479e-03 4.4886e-03 6.0158e-03 8.4307e-03 1.0836e-02 1.5152e-02 2.6129e-02 1.4501e-02 1.3498e-03 2.1675e-03 3.4629e-03 3.8180e-03 8.7109e-03 7.7280e-03 5.8321e-03 2.4646e-03 7.1905e-03 4.0641e-03 9.6940e-03 1.2233e-02 6.3812e-03 6.6464e-03 7.4992e-03 4.8999e-03 6.0768e-03 5.5779e-03 5.4241e-03 2.4000e-03 2.0993e-03 1.5682e-03 2.4484e-03 3.9649e-03 2.2279e-03 1.9340e-02 6.7085e-03 2.3778e-03 1.1476e-03 6.5687e-03 8.8161e-03 1.0342e-02 1.1430e-02 2.2670e-03 2.5392e-03 2.0371e-03 3.6266e-03 8.5538e-04 1.5581e-03 2.5421e-03 4.3575e-03 1.2080e-02 1.9372e-03 1.9705e-03 2.0474e-03 1.3799e-03 1.1893e-03 9.8355e-04 1.3933e-03 1.5192e-03 1.0290e-03 1.5327e-03 2.1380e-03 2.1371e-08 1.2226e-05 3.0942e-03 7.3861e-03 4.8725e-03 8.1873e-03 1.3327e-16 1.9197e-03 2.5245e-03 2.1664e-03 3.2087e-03\n", + " 1.4117e-03 9.2452e-04 3.4364e-03 1.8878e-02 6.3746e-03 1.5571e-02 3.4471e-03 6.2454e-03 2.2727e-03 2.6651e-03 2.0385e-03 9.7837e-04 2.3306e-03 1.5901e-03 1.5159e-03 2.2262e-03 1.8028e-03 1.8342e-03 1.2877e-03 6.7567e-04 4.3004e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of h2g: -2.0388e-01 2.1451e-02 -4.9237e-02 -1.7400e-02 -4.7173e-02 -1.4310e-02 6.8356e-02 -3.1025e-01 3.3112e-01 2.2404e-01 5.9878e-02 -2.6896e-01 -2.0326e-03 2.8506e-02 6.4726e-02 -5.1624e-03 9.9925e-03 2.9882e-02 1.0928e-01 7.5588e-02 -9.7055e-02 1.4639e-02 4.5199e-02 -8.0888e-02 -8.2195e-03 -6.5057e-02 1.0118e-02 1.9767e-01 3.9557e-02 2.5376e-01 1.2459e-01 -5.3490e-02 -3.9343e-02 -6.4753e-02 -1.9574e-02 -1.2910e-02 -5.9775e-02 -1.0009e-01 -2.8427e-02 5.3757e-02 -8.8164e-03 -6.6202e-03 -1.7861e-01 -1.1032e-03 1.1120e-01 -6.2289e-02 -2.9625e-02 1.9219e-02 6.9157e-02 -3.7505e-03 -1.0306e-02 8.2343e-02 -1.2089e-01 -2.8107e-01 3.3232e-02 4.6788e-02 1.0091e-01 1.0673e-01 7.9890e-02 6.2292e-02 7.8162e-02 9.4579e-02 1.0507e-01 1.1856e-01 8.9849e-02 -3.5640e-07 -6.2183e-04 -1.3518e-01 -1.1480e-01 7.0503e-02 -1.4512e-01 3.3073e-16 9.6841e-02 7.7252e-02 -2.4656e-02 -1.3007e-03\n", + " 1.3184e-02 -5.2616e-04 -3.9960e-02 -3.8459e-02 9.7392e-02 2.5427e-01 5.0996e-02 -2.1661e-02 7.4035e-02 -6.4869e-02 3.5543e-02 -2.2189e-02 -9.7145e-02 -7.0006e-03 2.2637e-02 4.8901e-02 9.9163e-02 -5.9708e-03 5.2390e-02 1.1076e-02 6.2552e-03\n", + "Enrichment: -3.6782e+00 2.7141e+01 -1.7995e+01 -1.2724e+01 -2.7855e+00 -1.0841e+01 2.6250e+01 -4.1350e+01 1.4828e+01 3.6759e+01 6.5178e+00 -1.4701e+01 -8.5008e+00 3.4965e+01 2.7854e+01 -1.9451e+00 2.1543e+00 2.7021e+00 5.0676e+00 4.3409e+01 -6.5209e+00 3.9643e+00 4.8288e+00 -3.4462e+00 -8.1368e-01 -2.8372e+01 1.3737e+00 2.9194e+01 1.8689e+01 3.6537e+01 2.1516e+01 -2.4907e+00 -7.5992e+01 -1.4114e+02 -1.4189e+01 -5.0267e+00 -1.0183e+02 -3.9255e+00 -1.9844e+00 5.8003e+00 -5.0123e+01 -9.1193e-01 -1.5340e+01 -5.7657e-02 4.8142e+00 -6.3109e+01 -3.2330e+01 3.1047e+01 8.1680e+01 -1.2392e+01 -8.6933e+00 7.1060e+01 -3.2158e+01 -2.9054e+00 7.3564e+01 8.2467e+00 1.8232e+01 1.9331e+01 1.4314e+01 1.1431e+01 1.4152e+01 1.7109e+01 1.8912e+01 2.1156e+01 1.6463e+01 -1.8954e+00 -4.0117e+00 -1.5709e+00 -4.4940e-01 7.1571e+00 -2.6046e+02 -2.5407e-01 1.6896e+02 8.4235e+01 -3.3263e+01 -7.3951e-01\n", + " 7.6237e+01 -3.4958e+00 -2.4491e+01 -1.8378e+00 8.1955e+01 1.4435e+01 4.7746e+01 -2.4963e+00 9.8579e+01 -6.6382e+01 4.2139e+01 -1.1154e+02 -5.2746e+01 -1.2891e+01 9.7922e+01 1.6582e+02 3.4819e+02 -1.1933e+01 1.4260e+01 8.0036e+01 1.9556e+02\n", + "Coefficients: -1.6801e-09 1.2397e-08 -8.2195e-09 -5.8119e-09 -1.2723e-09 -4.9517e-09 1.1990e-08 -1.8887e-08 6.7730e-09 1.6790e-08 2.9771e-09 -6.7151e-09 -3.8828e-09 1.5970e-08 1.2723e-08 -8.8846e-10 9.8399e-10 1.2342e-09 2.3147e-09 1.9827e-08 -2.9785e-09 1.8107e-09 2.2056e-09 -1.5741e-09 -3.7166e-10 -1.2959e-08 6.2744e-10 1.3335e-08 8.5363e-09 1.6689e-08 9.8275e-09 -1.1377e-09 -3.4710e-08 -6.4468e-08 -6.4811e-09 -2.2960e-09 -4.6514e-08 -1.7930e-09 -9.0642e-10 2.6494e-09 -2.2894e-08 -4.1654e-10 -7.0065e-09 -2.6336e-11 2.1990e-09 -2.8826e-08 -1.4767e-08 1.4181e-08 3.7308e-08 -5.6602e-09 -3.9708e-09 3.2458e-08 -1.4689e-08 -1.3271e-09 3.3601e-08 3.7668e-09 8.3278e-09 8.8297e-09 6.5379e-09 5.2210e-09 6.4641e-09 7.8146e-09 8.6383e-09 9.6633e-09 7.5197e-09 -8.6573e-10 -1.8324e-09 -7.1753e-10 -2.0527e-10 3.2691e-09 -1.1897e-07 -1.1605e-10 7.7175e-08 3.8475e-08 -1.5193e-08 -3.3778e-10\n", + " 3.4822e-08 -1.5967e-09 -1.1187e-08 -8.3945e-10 3.7434e-08 6.5935e-09 2.1809e-08 -1.1402e-09 4.5027e-08 -3.0321e-08 1.9247e-08 -5.0948e-08 -2.4092e-08 -5.8879e-09 4.4727e-08 7.5740e-08 1.5904e-07 -5.4507e-09 6.5133e-09 3.6557e-08 8.9323e-08\n", + "Coefficient SE: 5.5549e-09 3.5779e-08 1.2881e-08 2.7805e-08 5.2972e-09 3.1618e-08 2.1481e-08 1.0448e-08 4.5121e-09 2.3115e-08 2.6446e-08 7.3700e-09 5.2489e-08 2.4720e-08 1.3856e-08 1.3376e-08 1.7462e-08 6.4977e-09 2.5148e-09 1.3160e-08 4.4921e-09 1.0233e-08 9.6299e-09 4.8462e-09 5.8738e-09 2.6952e-08 9.4668e-09 6.7290e-09 2.6695e-08 7.4676e-09 8.7098e-09 1.0391e-09 3.7704e-08 3.1785e-08 1.6503e-08 1.4355e-08 3.5291e-08 7.0525e-09 4.3545e-09 2.3856e-09 6.0666e-08 8.4134e-09 7.0401e-09 5.0257e-09 4.6013e-09 2.1357e-08 2.5766e-08 3.0599e-08 3.9828e-08 2.6280e-08 1.2221e-08 2.0399e-08 1.0778e-08 1.1611e-09 3.9874e-08 3.2294e-09 3.4395e-09 2.3239e-09 1.9813e-09 1.6782e-09 2.3457e-09 2.5554e-09 1.7222e-09 2.5432e-09 3.6425e-09 1.0568e-09 7.3341e-10 3.3436e-10 2.6885e-10 4.5993e-09 1.3663e-07 9.5193e-10 3.1143e-08 2.5595e-08 2.7176e-08 1.6964e-08\n", + " 7.5904e-08 5.7115e-08 1.9584e-08 8.3880e-09 4.9878e-08 8.2198e-09 3.0010e-08 6.6923e-09 2.8138e-08 2.5359e-08 2.2473e-08 4.5731e-08 1.1766e-08 2.7226e-08 6.0971e-08 7.0190e-08 5.8859e-08 3.4086e-08 3.2591e-09 4.5399e-08 1.2501e-07\n", + "Lambda GC: 1.0466\n", + "Mean Chi^2: 1.142\n", + "Intercept: 0.8533 (0.0121)\n", + "Ratio < 0 (usually indicates GC correction).\n", + "\n", + "Heritability of phenotype 2/2\n", + "-----------------------------\n", + "Total Observed scale h2: 0.0297 (0.0032)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale h2: 8.8839e-04 -9.3805e-04 2.5936e-04 -1.0410e-03 -8.5140e-03 -2.9552e-03 2.5074e-03 -4.2855e-03 1.9348e-02 -1.6913e-02 2.8922e-02 -1.6714e-02 8.3074e-04 -1.5534e-04 2.2472e-03 3.2617e-03 -7.8316e-03 9.2509e-03 4.9165e-03 6.0522e-04 -3.5348e-03 3.7162e-03 -4.8263e-03 -2.1888e-03 -1.0812e-03 -2.6360e-03 1.7446e-03 7.5089e-03 3.0158e-03 4.8341e-03 8.7325e-03 2.0837e-03 -2.4400e-03 -2.2472e-03 1.3217e-03 2.7870e-03 -3.2071e-03 -6.9366e-03 8.3931e-04 -1.2416e-03 -7.6842e-05 -2.0082e-03 -1.1581e-02 -4.2742e-03 2.5723e-03 -3.2393e-03 -1.6740e-03 9.2816e-04 3.2906e-03 -1.5070e-03 -2.2401e-05 9.4201e-04 -2.9080e-03 -2.6246e-03 2.6168e-03 1.1527e-03 2.0262e-03 2.4177e-03 2.8496e-03 1.4368e-03 1.6993e-03 3.2907e-03 2.8629e-03 4.0534e-03 3.6632e-03 -1.2251e-08 -2.1761e-05 -5.9192e-03 -5.1659e-03 -4.6335e-04 -9.8274e-03 -6.0863e-17 1.0092e-03 1.8435e-03 6.1848e-06 1.0243e-03\n", + " 1.4544e-03 7.6913e-04 -1.5412e-03 1.3678e-02 1.3692e-03 4.1833e-04 2.0872e-04 -1.1583e-03 3.5822e-03 -2.8837e-03 1.5556e-03 -1.5021e-03 -1.1339e-03 -8.1364e-05 1.7519e-03 1.8571e-03 2.7442e-03 -3.5932e-04 3.8293e-04 -8.6324e-05 3.1916e-04\n", + "Observed scale h2 SE: 2.9650e-02 2.0989e-03 3.5977e-03 3.5999e-03 8.8784e-03 4.5947e-03 5.6360e-03 6.5695e-03 1.0205e-02 1.4792e-02 2.5825e-02 1.3656e-02 9.0434e-04 1.7933e-03 2.7224e-03 2.9873e-03 6.8048e-03 6.1199e-03 4.9819e-03 2.0055e-03 6.0473e-03 3.2420e-03 7.0628e-03 8.9577e-03 5.3871e-03 5.7062e-03 5.2790e-03 3.8313e-03 5.5824e-03 4.8573e-03 3.9929e-03 2.1181e-03 1.9731e-03 1.2732e-03 2.1772e-03 3.9637e-03 2.0869e-03 1.8437e-02 5.0498e-03 1.8207e-03 1.0379e-03 4.6858e-03 6.8138e-03 9.6916e-03 9.2174e-03 1.7571e-03 2.0083e-03 1.3270e-03 3.8174e-03 6.9828e-04 1.2639e-03 1.8807e-03 4.0674e-03 9.6844e-03 1.5332e-03 1.6927e-03 1.2729e-03 9.6167e-04 8.5544e-04 8.5599e-04 9.6612e-04 1.1168e-03 1.0875e-03 1.3383e-03 2.1800e-03 1.5217e-08 1.1085e-05 2.7410e-03 5.4126e-03 3.3349e-03 5.5275e-03 8.8838e-17 1.4751e-03 2.0632e-03 1.7612e-03 2.7837e-03\n", + " 1.0161e-03 8.5864e-04 2.6315e-03 1.4837e-02 6.1369e-03 1.2928e-02 2.8169e-03 5.1240e-03 2.1247e-03 2.2462e-03 1.7575e-03 8.4163e-04 1.8861e-03 1.3695e-03 1.1031e-03 1.5723e-03 1.5232e-03 1.6419e-03 9.7241e-04 4.6907e-04 3.5868e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of h2g: 2.9961e-02 -3.1636e-02 8.7468e-03 -3.5110e-02 -2.8714e-01 -9.9667e-02 8.4564e-02 -1.4453e-01 6.5253e-01 -5.7040e-01 9.7542e-01 -5.6368e-01 2.8017e-02 -5.2389e-03 7.5787e-02 1.1000e-01 -2.6412e-01 3.1199e-01 1.6581e-01 2.0411e-02 -1.1921e-01 1.2533e-01 -1.6277e-01 -7.3818e-02 -3.6465e-02 -8.8899e-02 5.8836e-02 2.5324e-01 1.0171e-01 1.6303e-01 2.9451e-01 7.0273e-02 -8.2290e-02 -7.5789e-02 4.4576e-02 9.3992e-02 -1.0816e-01 -2.3394e-01 2.8306e-02 -4.1874e-02 -2.5915e-03 -6.7726e-02 -3.9059e-01 -1.4415e-01 8.6752e-02 -1.0925e-01 -5.6455e-02 3.1302e-02 1.1098e-01 -5.0824e-02 -7.5547e-04 3.1770e-02 -9.8074e-02 -8.8516e-02 8.8252e-02 3.8876e-02 6.8333e-02 8.1537e-02 9.6103e-02 4.8456e-02 5.7311e-02 1.1098e-01 9.6552e-02 1.3670e-01 1.2354e-01 -4.1316e-07 -7.3390e-04 -1.9963e-01 -1.7422e-01 -1.5627e-02 -3.3143e-01 -2.0526e-15 3.4034e-02 6.2174e-02 2.0858e-04 3.4544e-02\n", + " 4.9051e-02 2.5939e-02 -5.1976e-02 4.6130e-01 4.6175e-02 1.4108e-02 7.0393e-03 -3.9064e-02 1.2081e-01 -9.7252e-02 5.2464e-02 -5.0659e-02 -3.8242e-02 -2.7440e-03 5.9083e-02 6.2631e-02 9.2550e-02 -1.2118e-02 1.2914e-02 -2.9113e-03 1.0764e-02\n", + "Enrichment: 5.4053e-01 -4.0027e+01 3.1968e+00 -2.5675e+01 -1.6955e+01 -7.5504e+01 3.2474e+01 -1.9263e+01 2.9222e+01 -9.3586e+01 1.0617e+02 -3.0811e+01 1.1717e+02 -6.4258e+00 3.2614e+01 4.1447e+01 -5.6943e+01 2.8211e+01 7.6892e+00 1.1722e+01 -8.0096e+00 3.3938e+01 -1.7389e+01 -3.1450e+00 -3.6098e+00 -3.8770e+01 7.9878e+00 3.7401e+01 4.8053e+01 2.3474e+01 5.0859e+01 3.2722e+00 -1.5895e+02 -1.6520e+02 3.2314e+01 3.6597e+01 -1.8426e+02 -9.1747e+00 1.9760e+00 -4.5181e+00 -1.4733e+01 -9.3292e+00 -3.3544e+01 -7.5338e+00 3.7559e+00 -1.1068e+02 -6.1610e+01 5.0566e+01 1.3107e+02 -1.6793e+02 -6.3728e-01 2.7417e+01 -2.6090e+01 -9.1499e-01 1.9536e+02 6.8521e+00 1.2346e+01 1.4768e+01 1.7218e+01 8.8917e+00 1.0377e+01 2.0076e+01 1.7379e+01 2.4394e+01 2.2637e+01 -2.1972e+00 -4.7347e+00 -2.3199e+00 -6.8201e-01 -1.5863e+00 -5.9486e+02 1.5768e+00 5.9381e+01 6.7794e+01 2.8141e-01 1.9641e+01\n", + " 2.8363e+02 1.7234e+02 -3.1856e+01 2.2044e+01 3.8856e+01 8.0096e-01 6.5907e+00 -4.5018e+00 1.6086e+02 -9.9521e+01 6.2201e+01 -2.5466e+02 -2.0764e+01 -5.0528e+00 2.5558e+02 2.1238e+02 3.2497e+02 -2.4220e+01 3.5151e+00 -2.1038e+01 3.3651e+02\n", + "Coefficients: 1.4903e-10 -1.1036e-08 8.8138e-10 -7.0788e-09 -4.6747e-09 -2.0817e-08 8.9534e-09 -5.3109e-09 8.0567e-09 -2.5802e-08 2.9273e-08 -8.4948e-09 3.2306e-08 -1.7716e-09 8.9920e-09 1.1427e-08 -1.5699e-08 7.7780e-09 2.1200e-09 3.2317e-09 -2.2083e-09 9.3570e-09 -4.7943e-09 -8.6710e-10 -9.9525e-10 -1.0689e-08 2.2023e-09 1.0312e-08 1.3248e-08 6.4718e-09 1.4022e-08 9.0217e-10 -4.3823e-08 -4.5546e-08 8.9093e-09 1.0090e-08 -5.0802e-08 -2.5295e-09 5.4480e-10 -1.2457e-09 -4.0620e-09 -2.5721e-09 -9.2484e-09 -2.0771e-09 1.0355e-09 -3.0516e-08 -1.6986e-08 1.3942e-08 3.6137e-08 -4.6299e-08 -1.7570e-10 7.5590e-09 -7.1931e-09 -2.5227e-10 5.3862e-08 1.8892e-09 3.4039e-09 4.0718e-09 4.7472e-09 2.4515e-09 2.8609e-09 5.5350e-09 4.7916e-09 6.7256e-09 6.2411e-09 -6.0579e-10 -1.3054e-09 -6.3962e-10 -1.8804e-10 -4.3736e-10 -1.6401e-07 4.3474e-10 1.6372e-08 1.8691e-08 7.7585e-11 5.4150e-09\n", + " 7.8200e-08 4.7515e-08 -8.7829e-09 6.0777e-09 1.0713e-08 2.2083e-10 1.8171e-09 -1.2412e-09 4.4350e-08 -2.7439e-08 1.7149e-08 -7.0211e-08 -5.7247e-09 -1.3931e-09 7.0465e-08 5.8553e-08 8.9596e-08 -6.6776e-09 9.6913e-10 -5.8002e-09 9.2778e-08\n", + "Coefficient SE: 4.9738e-09 2.4692e-08 1.2226e-08 2.4478e-08 4.8747e-09 3.2365e-08 2.0125e-08 8.1415e-09 4.2494e-09 2.2566e-08 2.6138e-08 6.9407e-09 3.5168e-08 2.0452e-08 1.0893e-08 1.0466e-08 1.3641e-08 5.1456e-09 2.1482e-09 1.0709e-08 3.7779e-09 8.1631e-09 7.0161e-09 3.5486e-09 4.9586e-09 2.3139e-08 6.6641e-09 5.2614e-09 2.4524e-08 6.5028e-09 6.4116e-09 9.1706e-10 3.5437e-08 2.5804e-08 1.4676e-08 1.4350e-08 3.3059e-08 6.7235e-09 3.2779e-09 1.8266e-09 5.4867e-08 6.0018e-09 5.4412e-09 4.7098e-09 3.7106e-09 1.6553e-08 2.0379e-08 1.9932e-08 4.1923e-08 2.1453e-08 9.9135e-09 1.5092e-08 1.0061e-08 9.3083e-10 3.1559e-08 2.7741e-09 2.1384e-09 1.6196e-09 1.4251e-09 1.4605e-09 1.6265e-09 1.8785e-09 1.8202e-09 2.2207e-09 3.7141e-09 7.5249e-10 6.6493e-10 2.9619e-10 1.9701e-10 3.1478e-09 9.2247e-08 6.3456e-10 2.3931e-08 2.0918e-08 2.2094e-08 1.4716e-08\n", + " 5.4635e-08 5.3045e-08 1.4996e-08 6.5926e-09 4.8018e-08 6.8248e-09 2.4523e-08 5.4907e-09 2.6305e-08 2.1373e-08 1.9375e-08 3.9339e-08 9.5223e-09 2.3449e-08 4.4370e-08 4.9575e-08 4.9729e-08 3.0514e-08 2.4610e-09 3.1517e-08 1.0427e-07\n", + "Lambda GC: 1.1908\n", + "Mean Chi^2: 1.2634\n", + "Intercept: 1.0261 (0.0119)\n", + "Ratio: 0.0992 (0.0452)\n", + "\n", + "Genetic Covariance\n", + "------------------\n", + "Total Observed scale gencov: 0.042 (0.0028)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale gencov: 4.0572e-02 -1.6669e-03 3.1592e-03 4.1247e-03 3.9495e-03 -3.2056e-03 2.6912e-03 1.6858e-03 6.2850e-03 9.1468e-03 4.4117e-03 -4.9844e-03 4.7380e-04 1.4304e-04 1.4632e-03 3.1222e-03 -6.7569e-03 -1.8984e-04 2.6434e-03 8.5961e-04 3.6739e-03 4.9158e-03 3.7673e-03 -1.0337e-02 -6.6240e-03 -2.8831e-03 1.1834e-03 8.0947e-03 3.0511e-03 5.5734e-03 3.0505e-03 1.1373e-03 -1.3913e-03 -1.9972e-03 3.5823e-04 -1.0687e-03 -2.9276e-03 -9.8307e-03 -5.5376e-03 -3.6785e-04 -7.0872e-04 -9.8307e-03 -3.0002e-03 -8.6648e-03 1.8813e-03 -1.4124e-03 -6.5603e-04 1.9911e-03 2.1165e-03 -1.2635e-03 -1.7451e-03 1.9816e-04 -5.0904e-03 -1.4118e-02 4.0085e-04 7.9698e-04 2.0265e-03 2.9401e-03 2.9314e-03 7.6991e-04 3.9815e-03 1.2263e-03 3.8370e-03 4.5887e-03 6.7958e-03 -2.4995e-08 -3.1508e-05 -1.2315e-03 -1.6629e-02 -5.4121e-03 -1.1852e-02 3.8827e-17 3.7332e-03 3.2676e-03 -2.3151e-04 -3.8085e-05\n", + " 7.6824e-04 1.8288e-03 -1.2957e-04 -5.2788e-03 -3.7168e-03 4.8980e-03 4.6803e-03 6.5474e-03 3.3841e-03 -7.2881e-04 -8.2463e-04 -1.0779e-03 2.3076e-03 -1.0272e-03 3.3716e-03 4.1005e-03 1.9414e-03 -7.2782e-04 7.9713e-04 -8.8215e-04 4.2808e-04\n", + "Observed scale gencov SE: 5.2173e-02 2.6063e-03 3.7021e-03 2.8447e-03 6.0285e-03 2.9222e-03 3.3289e-03 5.7966e-03 9.1277e-03 8.9377e-03 1.6397e-02 1.0584e-02 7.7051e-04 1.4768e-03 2.7325e-03 2.4247e-03 6.4332e-03 6.3571e-03 4.6036e-03 1.5999e-03 6.0999e-03 2.8057e-03 6.6400e-03 8.8631e-03 4.5303e-03 4.1952e-03 5.0407e-03 4.4812e-03 3.8085e-03 4.7169e-03 3.6894e-03 2.4768e-03 1.9606e-03 1.2380e-03 1.9378e-03 2.4272e-03 1.4339e-03 2.4136e-02 5.1363e-03 2.0651e-03 7.4823e-04 5.2539e-03 5.5072e-03 1.4024e-02 7.4177e-03 1.7716e-03 1.7839e-03 1.7261e-03 3.5178e-03 6.7507e-04 1.7741e-03 1.9158e-03 3.5763e-03 1.0298e-02 1.3807e-03 8.6784e-04 9.9155e-04 8.8151e-04 1.0061e-03 9.7862e-04 1.1429e-03 1.1571e-03 1.6367e-03 1.3505e-03 3.7714e-03 1.7863e-08 1.0453e-05 2.3375e-03 8.4771e-03 4.3907e-03 6.7778e-03 9.4404e-17 1.5976e-03 1.6119e-03 1.6703e-03 2.7225e-03\n", + " 1.0998e-03 1.0836e-03 3.4137e-03 1.5120e-02 3.6770e-03 1.4093e-02 2.0971e-03 4.8909e-03 1.8984e-03 1.6996e-03 1.9937e-03 8.6094e-04 2.6317e-03 1.4989e-03 1.8308e-03 2.2868e-03 1.4014e-03 1.7966e-03 1.0874e-03 4.8382e-04 4.4978e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of gencov: 9.6611e-01 -3.9694e-02 7.5229e-02 9.8219e-02 9.4048e-02 -7.6334e-02 6.4086e-02 4.0144e-02 1.4966e-01 2.1781e-01 1.0505e-01 -1.1869e-01 1.1282e-02 3.4062e-03 3.4842e-02 7.4348e-02 -1.6090e-01 -4.5205e-03 6.2946e-02 2.0470e-02 8.7484e-02 1.1706e-01 8.9708e-02 -2.4615e-01 -1.5773e-01 -6.8654e-02 2.8179e-02 1.9276e-01 7.2655e-02 1.3272e-01 7.2641e-02 2.7083e-02 -3.3131e-02 -4.7557e-02 8.5303e-03 -2.5448e-02 -6.9714e-02 -2.3410e-01 -1.3187e-01 -8.7594e-03 -1.6876e-02 -2.3409e-01 -7.1443e-02 -2.0633e-01 4.4799e-02 -3.3633e-02 -1.5622e-02 4.7412e-02 5.0399e-02 -3.0088e-02 -4.1554e-02 4.7187e-03 -1.2122e-01 -3.3618e-01 9.5452e-03 1.8978e-02 4.8256e-02 7.0011e-02 6.9805e-02 1.8334e-02 9.4811e-02 2.9201e-02 9.1368e-02 1.0927e-01 1.6183e-01 -5.9521e-07 -7.5030e-04 -2.9325e-02 -3.9597e-01 -1.2888e-01 -2.8224e-01 9.2456e-16 8.8898e-02 7.7811e-02 -5.5130e-03 -9.0691e-04\n", + " 1.8294e-02 4.3548e-02 -3.0855e-03 -1.2570e-01 -8.8506e-02 1.1663e-01 1.1145e-01 1.5591e-01 8.0585e-02 -1.7355e-02 -1.9637e-02 -2.5667e-02 5.4949e-02 -2.4461e-02 8.0287e-02 9.7643e-02 4.6229e-02 -1.7331e-02 1.8982e-02 -2.1006e-02 1.0194e-02\n", + "Enrichment: 1.7430e+01 -5.0222e+01 2.7495e+01 7.1826e+01 5.5534e+00 -5.7828e+01 2.4610e+01 5.3504e+00 6.7023e+00 3.5736e+01 1.1435e+01 -6.4877e+00 4.7186e+01 4.1779e+00 1.4994e+01 2.8013e+01 -3.4688e+01 -4.0877e-01 2.9190e+00 1.1755e+01 5.8778e+00 3.1698e+01 9.5840e+00 -1.0487e+01 -1.5615e+01 -2.9941e+01 3.8257e+00 2.8469e+01 3.4326e+01 1.9109e+01 1.2545e+01 1.2611e+00 -6.3993e+01 -1.0366e+02 6.1838e+00 -9.9085e+00 -1.1877e+02 -9.1809e+00 -9.2054e+00 -9.4512e-01 -9.5946e+01 -3.2246e+01 -6.1357e+00 -1.0784e+01 1.9396e+00 -3.4076e+01 -1.7048e+01 7.6591e+01 5.9525e+01 -9.9414e+01 -3.5053e+01 4.0721e+00 -3.2246e+01 -3.4751e+00 2.1130e+01 3.3450e+00 8.7186e+00 1.2681e+01 1.2507e+01 3.3642e+00 1.7166e+01 5.2822e+00 1.6446e+01 1.9499e+01 2.9651e+01 -3.1653e+00 -4.8405e+00 -3.4080e-01 -1.5501e+00 -1.3083e+01 -5.0656e+02 -7.1024e-01 1.5510e+02 8.4845e+01 -7.4377e+00 -5.1564e-01\n", + " 1.0578e+02 2.8933e+02 -1.8911e+00 -6.0069e+00 -7.4477e+01 6.6217e+00 1.0435e+02 1.7967e+01 1.0730e+02 -1.7760e+01 -2.3281e+01 -1.2903e+02 2.9835e+01 -4.5041e+01 3.4730e+02 3.3110e+02 1.6232e+02 -3.4639e+01 5.1666e+00 -1.5179e+02 3.1869e+02\n", + "Mean z1*z2: 0.4205\n", + "Intercept: 0.1475 (0.0083)\n", + "\n", + "Genetic Correlation\n", + "-------------------\n", + "Genetic Correlation: 1.1003 (0.0858)\n", + "Z-score: 12.8278\n", + "P: 1.1459e-37\n", + "\n", + "\n", + "Summary of Genetic Correlation Results\n", + "p1 p2 rg se z p h2_obs h2_obs_se h2_int h2_int_se gcov_int gcov_int_se\n", + "GCST004787_summary_stats_munged.sumstats.gz GCST90043957_summary_stats_munged.sumstats.gz 1.1003 0.0858 12.8278 1.1459e-37 0.0297 0.0032 1.0261 0.0119 0.1475 0.0083\n", + "\n", + "Analysis finished at Thu Nov 6 22:31:52 2025\n", + "Total time elapsed: 1.0m:3.9s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "rg_results = estimate_genetic_correlation(\n", + " sumstats_files=[\n", + " str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_munged.sumstats.gz\"),\n", + " str(Path(Path(gwas_summary_statistic_path_2).stem).stem + \"_munged.sumstats.gz\"),\n", + " ],\n", + " ref_ld_chr=os.path.join(ldscores_path, ldscores_prefix),\n", + " w_ld_chr=os.path.join(ldweights_path, ldweights_prefix),\n", + " out_prefix=\"CHD_rg\",\n", + " run=True,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Summary\n", + "This tutorial demonstrated how to perform comprehensive LDSC analyses using the `cellink` package, including:\n", + "\n", + "1. Cell-type-specific heritability analysis: Identifying which cell types are most relevant to complex traits\n", + "2. SNP heritability estimation: Quantifying the proportion of trait variance explained by common genetic variants\n", + "3. Genetic correlation analysis: Measuring shared genetic architecture between traits\n", + "\n", + "The `cellink` package simplifies these analyses by providing unified wrapper functions that handle data formatting, file management, and command execution for LDSC and its auxiliary tools." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/tutorials/cell_level_ldsc_analysis_updates.ipynb b/docs/tutorials/cell_level_ldsc_analysis_updates.ipynb new file mode 100644 index 0000000..f47c3c7 --- /dev/null +++ b/docs/tutorials/cell_level_ldsc_analysis_updates.ipynb @@ -0,0 +1,8150 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tutorial: Cell-Level LDSC analysis\n", + "\n", + "This tutorial demonstrates how to perform cell-type-specific LD Score regression (LDSC) analysis through the `cellink` package. The `cellink` package provides a unified interface to LDSC and its preparation scripts, making it easier to perform comprehensive genetic analyses that identify which cell types are most relevant to complex traits and diseases.\n", + "\n", + "This notebook assumes familiarity with single-cell data processing and basic statistical genetics concepts. The `cellink` package provides convenient wrapper functions that handle data preparation and formatting for LDSC. For LDSC installation, please follow instructions [here](https://github.com/bulik/ldsc). We recommend utilizing LDSC via a Docker image like [this one](https://hub.docker.com/r/zijingliu/ldsc). For usage on HPCs, please consider using singularity or enroot." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environment Setup\n", + "\n", + "We begin by importing necessary libraries and defining key parameters for our analysis. The `cellink` package provides wrapper functions for LDSC that automatically handle preprocessing, data formatting and preparation." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/sgkit/__init__.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", + " from pkg_resources import DistributionNotFound, get_distribution # type: ignore[import]\n", + "/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "import os\n", + "\n", + "from cellink._core import DAnn, GAnn\n", + "from cellink.resources import get_onek1k\n", + "from cellink.tl.external import (\n", + " preprocess_for_sldsc,\n", + " generate_sldsc_genesets,\n", + " generate_gene_coord_file,\n", + " configure_ldsc_runner,\n", + " make_annot_from_donor_data,\n", + " munge_sumstats,\n", + " estimate_ld_scores_from_donor_data,\n", + " estimate_heritability,\n", + " estimate_genetic_correlation,\n", + " compute_ld_scores_with_annotations_from_donor_data,\n", + " estimate_celltype_specific_heritability,\n", + ")\n", + "from cellink.resources import get_1000genomes_ld_scores, get_1000genomes_ld_weights\n", + "from cellink.resources import get_gwas_catalog_study_summary_stats\n", + "\n", + "# Analysis parameters\n", + "chrom = 22\n", + "cell_type = \"CD8 Naive\"\n", + "celltype_key = \"predicted.celltype.l2\"\n", + "original_donor_col = \"donor_id\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/project/genomics/ayshan\n" + ] + } + ], + "source": [ + "!pwd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load and Prepare Data\n", + "\n", + "We load the OneK1K dataset, which contains both genotype and single-cell expression data. We also add gene annotations from Ensembl for our analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pybiomart in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (0.2.0)\n", + "Requirement already satisfied: future in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pybiomart) (1.0.0)\n", + "Requirement already satisfied: pandas in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pybiomart) (2.3.3)\n", + "Requirement already satisfied: requests in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pybiomart) (2.32.5)\n", + "Requirement already satisfied: requests-cache in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pybiomart) (1.2.1)\n", + "Requirement already satisfied: numpy>=1.23.2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (1.26.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (2025.2)\n", + "Requirement already satisfied: six>=1.5 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas->pybiomart) (1.17.0)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (3.4.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (2025.11.12)\n", + "Requirement already satisfied: attrs>=21.2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests-cache->pybiomart) (25.4.0)\n", + "Requirement already satisfied: cattrs>=22.2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests-cache->pybiomart) (25.3.0)\n", + "Requirement already satisfied: platformdirs>=2.5 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests-cache->pybiomart) (4.5.0)\n", + "Requirement already satisfied: url-normalize>=1.4 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests-cache->pybiomart) (2.2.1)\n", + "Requirement already satisfied: typing-extensions>=4.14.0 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from cattrs>=22.2->requests-cache->pybiomart) (4.15.0)\n" + ] + } + ], + "source": [ + "!pip install pybiomart" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/onek1k_cellxgene.h5ad already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/OneK1K.noGP.vcf.gz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/OneK1K.noGP.vcf.gz.csi already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/gene_counts_Ensembl_105_phenotype_metadata.tsv.gz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/pandas/core/internals/blocks.py:2661: RuntimeWarning: invalid value encountered in cast\n", + " return self.values.astype(_dtype_obj)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset shape: (981, 10595884, 1248980, 36469)\n" + ] + } + ], + "source": [ + "# Load the dataset\n", + "dd = get_onek1k(config_path=\"cellink/src/cellink/resources/config/onek1k.yaml\", data_home=\"/project/genomics/ayshan/1k1k_dataset\", verify_checksum=False)\n", + "print(f\"Dataset shape: {dd.shape}\")\n", + "\n", + "\n", + "# Add gene annotations from Ensembl\n", + "def _get_ensembl_gene_id_start_end_chr():\n", + " from pybiomart import Server\n", + "\n", + " server = Server(host=\"http://www.ensembl.org\")\n", + " dataset = server.marts[\"ENSEMBL_MART_ENSEMBL\"].datasets[\"hsapiens_gene_ensembl\"]\n", + " ensembl_gene_id_start_end_chr = dataset.query(\n", + " attributes=[\"ensembl_gene_id\", \"start_position\", \"end_position\", \"chromosome_name\"]\n", + " )\n", + " ensembl_gene_id_start_end_chr = ensembl_gene_id_start_end_chr.set_index(\"Gene stable ID\")\n", + " ensembl_gene_id_start_end_chr = ensembl_gene_id_start_end_chr.rename(\n", + " columns={\n", + " \"Gene start (bp)\": GAnn.start,\n", + " \"Gene end (bp)\": GAnn.end,\n", + " \"Chromosome/scaffold name\": GAnn.chrom,\n", + " }\n", + " )\n", + " return ensembl_gene_id_start_end_chr\n", + "\n", + "\n", + "ensembl_gene_id_start_end_chr = _get_ensembl_gene_id_start_end_chr()\n", + "dd.C.var = dd.C.var.join(ensembl_gene_id_start_end_chr)\n", + "\n", + "# Set up donor information\n", + "dd.C.obs[DAnn.donor] = dd.C.obs[original_donor_col]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "dd.G.obs[\"donor_id\"] = dd.G.obs.index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cell-Type-Specific LDSC analysis\n", + "\n", + "Cell-type-specific LDSC analysis helps identify which cell types are most relevant to complex traits by testing whether genetic variants associated with a trait are enriched in genes specifically expressed in certain cell types. This analysis follows the method described in [Duncan et al. 2025](https://www.nature.com/articles/s41593-024-01834-w)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 1: Preprocessing and Gene Set Generation\n", + "First, we preprocess the single-cell data to compute cell-type-specific gene expression and identify genes that are specifically expressed in each cell type." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._sldsc_utils:Querying Ensembl BioMart (GRCh38)...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetching gene annotations from GRCh38...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetched annotations for 86371 genes from GRCh38\n", + "INFO:cellink.tl.external._sldsc_utils:Removing version suffixes from Gene IDs\n", + "INFO:cellink.tl.external._sldsc_utils:Dropping conflicting columns from adata.var before merge: ['chrom', 'start', 'end']\n", + "INFO:cellink.tl.external._sldsc_utils:Annotated 35522 / 36469 genes.\n", + "INFO:cellink.tl.external._sldsc_utils:Using annotation columns: gene=gene, biotype=gene_biotype, chr=chrom, start=start, end=end\n", + "INFO:cellink.tl.external._sldsc_utils:Applying gene filters\n", + "INFO:cellink.tl.external._sldsc_utils:Protein-coding genes: 19273\n", + "INFO:cellink.tl.external._sldsc_utils:Expressed genes: 31285\n", + "INFO:cellink.tl.external._sldsc_utils:Non-MHC genes: 36469\n", + "INFO:cellink.tl.external._sldsc_utils:Keeping 18068 / 36469 genes after filtering\n", + "INFO:cellink.tl.external._sldsc_utils:n_cells = 1248980, n_genes = 18068, n_clusters = 31\n", + "INFO:cellink.tl.external._sldsc_utils:Applying log1p transformation\n", + "Aggregating clusters: 100%|██████████| 31/31 [02:27<00:00, 4.75s/it]\n", + "INFO:cellink.tl.external._sldsc_utils:Log1p applied.\n", + "INFO:cellink.tl.external._sldsc_utils:Computing mean expression for predicted.celltype.l2\n", + "INFO:cellink.tl.external._sldsc_utils:Computing specificity scores\n", + "INFO:cellink.tl.external._sldsc_utils:Final data shape: (1248980, 18068)\n", + "INFO:cellink.tl.external._sldsc_utils:Mean expression shape: (18068, 31)\n", + "INFO:cellink.tl.external._sldsc_utils:Specificity shape: (18068, 31)\n" + ] + } + ], + "source": [ + "dd.C.var[\"gene\"] = dd.C.var_names\n", + "adata = dd.C.copy()\n", + "adata_filtered, mean_expr, specificity = preprocess_for_sldsc(\n", + " adata,\n", + " celltype_col=celltype_key,\n", + " gene_identifier_mode=\"ensembl\",\n", + " gene_col=\"gene\",\n", + " genome_build=\"GRCh38\",\n", + " inplace=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we generate gene sets for each cell type containing the top 10% most specifically expressed genes. These gene sets will be used to create genomic annotations." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._sldsc_utils:Writing gene sets to ldsc_genesets\n", + "INFO:cellink.tl.external._sldsc_utils:specificity_df index looks like Ensembl IDs; using them directly.\n", + "INFO:cellink.tl.external._sldsc_utils:Selecting top 1807 genes (10.0%) per cell type\n", + "INFO:cellink.tl.external._sldsc_utils:Wrote control gene set with 18068 genes\n", + "INFO:cellink.tl.external._sldsc_utils:Generated 31 cell-type-specific gene sets\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cell_typen_genesoutput_path
0ASDC1807ldsc_genesets/ASDC.GeneSet
1B intermediate1807ldsc_genesets/B_intermediate.GeneSet
2B memory1807ldsc_genesets/B_memory.GeneSet
3B naive1807ldsc_genesets/B_naive.GeneSet
4CD14 Mono1807ldsc_genesets/CD14_Mono.GeneSet
5CD16 Mono1807ldsc_genesets/CD16_Mono.GeneSet
6CD4 CTL1807ldsc_genesets/CD4_CTL.GeneSet
7CD4 Naive1807ldsc_genesets/CD4_Naive.GeneSet
8CD4 Proliferating1807ldsc_genesets/CD4_Proliferating.GeneSet
9CD4 TCM1807ldsc_genesets/CD4_TCM.GeneSet
10CD4 TEM1807ldsc_genesets/CD4_TEM.GeneSet
11CD8 Naive1807ldsc_genesets/CD8_Naive.GeneSet
12CD8 Proliferating1807ldsc_genesets/CD8_Proliferating.GeneSet
13CD8 TCM1807ldsc_genesets/CD8_TCM.GeneSet
14CD8 TEM1807ldsc_genesets/CD8_TEM.GeneSet
15Doublet1807ldsc_genesets/Doublet.GeneSet
16Eryth1807ldsc_genesets/Eryth.GeneSet
17HSPC1807ldsc_genesets/HSPC.GeneSet
18ILC1807ldsc_genesets/ILC.GeneSet
19MAIT1807ldsc_genesets/MAIT.GeneSet
20NK1807ldsc_genesets/NK.GeneSet
21NK Proliferating1807ldsc_genesets/NK_Proliferating.GeneSet
22NK_CD56bright1807ldsc_genesets/NK_CD56bright.GeneSet
23Plasmablast1807ldsc_genesets/Plasmablast.GeneSet
24Platelet1807ldsc_genesets/Platelet.GeneSet
25Treg1807ldsc_genesets/Treg.GeneSet
26cDC11807ldsc_genesets/cDC1.GeneSet
27cDC21807ldsc_genesets/cDC2.GeneSet
28dnT1807ldsc_genesets/dnT.GeneSet
29gdT1807ldsc_genesets/gdT.GeneSet
30pDC1807ldsc_genesets/pDC.GeneSet
\n", + "
" + ], + "text/plain": [ + " cell_type n_genes output_path\n", + "0 ASDC 1807 ldsc_genesets/ASDC.GeneSet\n", + "1 B intermediate 1807 ldsc_genesets/B_intermediate.GeneSet\n", + "2 B memory 1807 ldsc_genesets/B_memory.GeneSet\n", + "3 B naive 1807 ldsc_genesets/B_naive.GeneSet\n", + "4 CD14 Mono 1807 ldsc_genesets/CD14_Mono.GeneSet\n", + "5 CD16 Mono 1807 ldsc_genesets/CD16_Mono.GeneSet\n", + "6 CD4 CTL 1807 ldsc_genesets/CD4_CTL.GeneSet\n", + "7 CD4 Naive 1807 ldsc_genesets/CD4_Naive.GeneSet\n", + "8 CD4 Proliferating 1807 ldsc_genesets/CD4_Proliferating.GeneSet\n", + "9 CD4 TCM 1807 ldsc_genesets/CD4_TCM.GeneSet\n", + "10 CD4 TEM 1807 ldsc_genesets/CD4_TEM.GeneSet\n", + "11 CD8 Naive 1807 ldsc_genesets/CD8_Naive.GeneSet\n", + "12 CD8 Proliferating 1807 ldsc_genesets/CD8_Proliferating.GeneSet\n", + "13 CD8 TCM 1807 ldsc_genesets/CD8_TCM.GeneSet\n", + "14 CD8 TEM 1807 ldsc_genesets/CD8_TEM.GeneSet\n", + "15 Doublet 1807 ldsc_genesets/Doublet.GeneSet\n", + "16 Eryth 1807 ldsc_genesets/Eryth.GeneSet\n", + "17 HSPC 1807 ldsc_genesets/HSPC.GeneSet\n", + "18 ILC 1807 ldsc_genesets/ILC.GeneSet\n", + "19 MAIT 1807 ldsc_genesets/MAIT.GeneSet\n", + "20 NK 1807 ldsc_genesets/NK.GeneSet\n", + "21 NK Proliferating 1807 ldsc_genesets/NK_Proliferating.GeneSet\n", + "22 NK_CD56bright 1807 ldsc_genesets/NK_CD56bright.GeneSet\n", + "23 Plasmablast 1807 ldsc_genesets/Plasmablast.GeneSet\n", + "24 Platelet 1807 ldsc_genesets/Platelet.GeneSet\n", + "25 Treg 1807 ldsc_genesets/Treg.GeneSet\n", + "26 cDC1 1807 ldsc_genesets/cDC1.GeneSet\n", + "27 cDC2 1807 ldsc_genesets/cDC2.GeneSet\n", + "28 dnT 1807 ldsc_genesets/dnT.GeneSet\n", + "29 gdT 1807 ldsc_genesets/gdT.GeneSet\n", + "30 pDC 1807 ldsc_genesets/pDC.GeneSet" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "summary = generate_sldsc_genesets(specificity, dd.C, out_dir=\"ldsc_genesets\", top_frac=0.10, overwrite=True)\n", + "summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need to generate a gene coordinate file that maps genes to their genomic positions:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._sldsc_utils:Fetching gene annotations from Ensembl GRCh38...\n", + "INFO:cellink.tl.external._sldsc_utils:Querying Ensembl BioMart (GRCh38)...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetching gene annotations from GRCh38...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetched annotations for 86371 genes from GRCh38\n", + "INFO:cellink.tl.external._sldsc_utils:Removing version suffixes from gene identifiers\n", + "WARNING:cellink.tl.external._sldsc_utils:Removed 2 duplicate gene entries\n", + "INFO:cellink.tl.external._sldsc_utils:Writing 86369 gene coordinates to gene_coords.txt\n", + "INFO:cellink.tl.external._sldsc_utils:Successfully created gene coordinate file: gene_coords.txt\n" + ] + } + ], + "source": [ + "generate_gene_coord_file(\"gene_coords.txt\", gene_identifier_mode=\"ensembl\", genome_build=\"GRCh38\", overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Optional: Generate Magma cell specificity file\n", + "\n", + ".GeneSet files generated for LDSC can be used for MAGMA analysis by converting them into .gmt format\n", + "\n", + "Running the GeneSet → MAGMA Conversion Without a Mapping TSV\n", + "\n", + "This pipeline converts .GeneSet files (with Ensembl gene IDs) into a MAGMA-compatible .gmt file (which requires Entrez gene IDs).\n", + "Normally, this conversion needs a mapping file (ensembl → entrez).\n", + "However, there are situations where you may not have this TSV file.\n", + "\n", + "Below are your options and when to use them\n", + "\n", + "\n", + "Recommended: create and reuse a mapping TSV\n", + "\n", + "For real analyses (especially MAGMA), this is the best practice.\n", + "\n", + "Mapping file format\n", + "\n", + "A simple tab-separated file:\n", + "\n", + "ensembl_gene_id entrez_id\n", + "ENSG00000141510 7157\n", + "ENSG00000171862 1956" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting mygene\n", + " Using cached mygene-3.2.2-py2.py3-none-any.whl.metadata (10 kB)\n", + "Collecting biothings-client>=0.2.6 (from mygene)\n", + " Using cached biothings_client-0.4.1-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: httpx>=0.22.0 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from biothings-client>=0.2.6->mygene) (0.28.1)\n", + "Requirement already satisfied: anyio in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from httpx>=0.22.0->biothings-client>=0.2.6->mygene) (4.11.0)\n", + "Requirement already satisfied: certifi in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from httpx>=0.22.0->biothings-client>=0.2.6->mygene) (2025.11.12)\n", + "Requirement already satisfied: httpcore==1.* in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from httpx>=0.22.0->biothings-client>=0.2.6->mygene) (1.0.9)\n", + "Requirement already satisfied: idna in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from httpx>=0.22.0->biothings-client>=0.2.6->mygene) (3.11)\n", + "Requirement already satisfied: h11>=0.16 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from httpcore==1.*->httpx>=0.22.0->biothings-client>=0.2.6->mygene) (0.16.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from anyio->httpx>=0.22.0->biothings-client>=0.2.6->mygene) (1.3.1)\n", + "Requirement already satisfied: typing_extensions>=4.5 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from anyio->httpx>=0.22.0->biothings-client>=0.2.6->mygene) (4.15.0)\n", + "Using cached mygene-3.2.2-py2.py3-none-any.whl (5.4 kB)\n", + "Using cached biothings_client-0.4.1-py3-none-any.whl (46 kB)\n", + "Installing collected packages: biothings-client, mygene\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2/2\u001b[0m [mygene]━━━━\u001b[0m \u001b[32m1/2\u001b[0m [mygene]\n", + "\u001b[1A\u001b[2KSuccessfully installed biothings-client-0.4.1 mygene-3.2.2\n" + ] + } + ], + "source": [ + "!pip install mygene" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/sgkit/__init__.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", + " from pkg_resources import DistributionNotFound, get_distribution # type: ignore[import]\n", + "/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000189144']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000168078']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000189144']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000168078']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000189144']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found dup hits:\t[('ENSG00000175711', 2)]\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found dup hits:\t[('ENSG00000175711', 2)]\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000168078']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000168078']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found dup hits:\t[('ENSG00000175711', 2)]\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found dup hits:\t[('ENSG00000175711', 2)]\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "INFO:cellink.tl.external._ldsc2magma:Wrote 31 gene sets to /ictstr01/project_copy/genomics/ayshan/magma_genesets/genesets.gmt (skipped 0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/ictstr01/project_copy/genomics/ayshan/magma_genesets/genesets.gmt\n" + ] + } + ], + "source": [ + "from cellink.tl.external import genesets_dir_to_entrez_gmt\n", + "\n", + "\n", + "out_gmt = genesets_dir_to_entrez_gmt(\n", + " geneset_dir=\"ldsc_genesets\",\n", + " ensembl_to_entrez_tsv=None,\n", + " allow_mygene_fallback=True,\n", + " include_control=False\n", + ")\n", + "\n", + "print(out_gmt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 2: Configure LDSC Runner\n", + "Before running LDSC commands, we need to configure the runner. LDSC can be executed via a local installation or through container solutions like Docker or Singularity. Sample configuration files are provided in `./src/cellink/tl/external/config/` for local execution, Docker, and Singularity." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "runner = configure_ldsc_runner(config_path=\"cellink/src/cellink/tl/external/config/ldsc_singularity.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 3: Prepare Data for Analysis\n", + "To speed up computation in this tutorial, we filter the data to a specific chromosome and a random subset of SNPs. Note: In a real analysis, you would process all chromosomes without subsetting." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╔═ DonorData(n_donors=981, n_cells_per_donor=[333-3,511], donor_id='donor_id') ═══════════════════════════════╗\n",
+       "║ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ║\n",
+       "║ ┃ G (donors)                                          C (cells)                                          ┃ ║\n",
+       "║ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ ║\n",
+       "║ │ AnnData object with n_obs × n_vars = 981 × 10,299  │ AnnData object with n_obs × n_vars = 1,248,980 ×   │ ║\n",
+       "║ │                                                    │ 36,469                                             │ ║\n",
+       "║ │     obs: 'donor_id'                                │     obs: 'orig.ident', 'nCount_RNA',               │ ║\n",
+       "║ │                                                    │ 'nFeature_RNA', 'percent.mt', 'donor_id',          │ ║\n",
+       "║ │                                                    │ 'pool_number', 'predicted.celltype.l2',            │ ║\n",
+       "║ │                                                    │ 'predicted.celltype.l2.score', 'age',              │ ║\n",
+       "║ │                                                    │ 'organism_ontology_term_id',                       │ ║\n",
+       "║ │                                                    │ 'tissue_ontology_term_id',                         │ ║\n",
+       "║ │                                                    │ 'assay_ontology_term_id',                          │ ║\n",
+       "║ │                                                    │ 'disease_ontology_term_id',                        │ ║\n",
+       "║ │                                                    │ 'cell_type_ontology_term_id',                      │ ║\n",
+       "║ │                                                    │ 'self_reported_ethnicity_ontology_term_id',        │ ║\n",
+       "║ │                                                    │ 'development_stage_ontology_term_id',              │ ║\n",
+       "║ │                                                    │ 'sex_ontology_term_id', 'is_primary_data',         │ ║\n",
+       "║ │                                                    │ 'suspension_type', 'tissue_type', 'cell_type',     │ ║\n",
+       "║ │                                                    │ 'assay', 'disease', 'organism', 'sex', 'tissue',   │ ║\n",
+       "║ │                                                    │ 'self_reported_ethnicity', 'development_stage',    │ ║\n",
+       "║ │                                                    │ 'observation_joinid'                               │ ║\n",
+       "║ │     var: 'chrom', 'pos', 'a0', 'a1', 'AF', 'AN',   │     var: 'vst.mean', 'vst.variance',               │ ║\n",
+       "║ │ 'ER2', 'IMPUTED', 'maf', 'NS', 'R2', 'TYPED',      │ 'vst.variance.expected',                           │ ║\n",
+       "║ │ 'TYPED_ONLY', 'id', 'id_mask', 'length',           │ 'vst.variance.standardized', 'vst.variable',       │ ║\n",
+       "║ │ 'quality', 'pos_hg19', 'id_hg19'                   │ 'feature_is_filtered', 'feature_name',             │ ║\n",
+       "║ │                                                    │ 'feature_reference', 'feature_biotype',            │ ║\n",
+       "║ │                                                    │ 'feature_length', 'feature_type', 'start', 'end',  │ ║\n",
+       "║ │                                                    │ 'chrom', 'gene'                                    │ ║\n",
+       "║ │     uns: 'kinship'                                 │     uns: 'cell_type_ontology_term_id_colors',      │ ║\n",
+       "║ │                                                    │ 'citation', 'default_embedding',                   │ ║\n",
+       "║ │                                                    │ 'schema_reference', 'schema_version', 'title'      │ ║\n",
+       "║ │     obsm: 'gPCs'                                   │     obsm: 'X_azimuth_spca', 'X_azimuth_umap',      │ ║\n",
+       "║ │                                                    │ 'X_harmony', 'X_pca', 'X_umap'                     │ ║\n",
+       "║ │     varm: 'filter'                                 │     varm: 'PCs'                                    │ ║\n",
+       "║ └────────────────────────────────────────────────────┴────────────────────────────────────────────────────┘ ║\n",
+       "╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n",
+       "
\n" + ], + "text/plain": [ + "╔═\u001b[1;38;5;197m DonorData(n_donors=981, n_cells_per_donor=[333-3,511], donor_id='donor_id') \u001b[0m═══════════════════════════════╗\n", + "║ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ║\n", + "║ ┃\u001b[1;38;5;197m \u001b[0m\u001b[1;38;5;197mG (donors) \u001b[0m\u001b[1;38;5;197m \u001b[0m┃\u001b[1;38;5;197m \u001b[0m\u001b[1;38;5;197mC (cells) \u001b[0m\u001b[1;38;5;197m \u001b[0m┃ ║\n", + "║ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ ║\n", + "║ │ AnnData object with n_obs × n_vars = 981 × 10,299 │ AnnData object with n_obs × n_vars = 1,248,980 × │ ║\n", + "║ │ │ 36,469 │ ║\n", + "║ │ obs: 'donor_id' │ obs: 'orig.ident', 'nCount_RNA', │ ║\n", + "║ │ │ 'nFeature_RNA', 'percent.mt', \u001b[1;38;5;197m'donor_id', \u001b[0m │ ║\n", + "║ │ │ 'pool_number', 'predicted.celltype.l2', │ ║\n", + "║ │ │ 'predicted.celltype.l2.score', 'age', │ ║\n", + "║ │ │ 'organism_ontology_term_id', │ ║\n", + "║ │ │ 'tissue_ontology_term_id', │ ║\n", + "║ │ │ 'assay_ontology_term_id', │ ║\n", + "║ │ │ 'disease_ontology_term_id', │ ║\n", + "║ │ │ 'cell_type_ontology_term_id', │ ║\n", + "║ │ │ 'self_reported_ethnicity_ontology_term_id', │ ║\n", + "║ │ │ 'development_stage_ontology_term_id', │ ║\n", + "║ │ │ 'sex_ontology_term_id', 'is_primary_data', │ ║\n", + "║ │ │ 'suspension_type', 'tissue_type', 'cell_type', │ ║\n", + "║ │ │ 'assay', 'disease', 'organism', 'sex', 'tissue', │ ║\n", + "║ │ │ 'self_reported_ethnicity', 'development_stage', │ ║\n", + "║ │ │ 'observation_joinid' │ ║\n", + "║ │ var: 'chrom', 'pos', 'a0', 'a1', 'AF', 'AN', │ var: 'vst.mean', 'vst.variance', │ ║\n", + "║ │ 'ER2', 'IMPUTED', 'maf', 'NS', 'R2', 'TYPED', │ 'vst.variance.expected', │ ║\n", + "║ │ 'TYPED_ONLY', 'id', 'id_mask', 'length', │ 'vst.variance.standardized', 'vst.variable', │ ║\n", + "║ │ 'quality', 'pos_hg19', 'id_hg19' │ 'feature_is_filtered', 'feature_name', │ ║\n", + "║ │ │ 'feature_reference', 'feature_biotype', │ ║\n", + "║ │ │ 'feature_length', 'feature_type', 'start', 'end', │ ║\n", + "║ │ │ 'chrom', 'gene' │ ║\n", + "║ │ uns: 'kinship' │ uns: 'cell_type_ontology_term_id_colors', │ ║\n", + "║ │ │ 'citation', 'default_embedding', │ ║\n", + "║ │ │ 'schema_reference', 'schema_version', 'title' │ ║\n", + "║ │ obsm: 'gPCs' │ obsm: 'X_azimuth_spca', 'X_azimuth_umap', │ ║\n", + "║ │ │ 'X_harmony', 'X_pca', 'X_umap' │ ║\n", + "║ │ varm: 'filter' │ varm: 'PCs' │ ║\n", + "║ └────────────────────────────────────────────────────┴────────────────────────────────────────────────────┘ ║\n", + "╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.seed(42)\n", + "all_selected_idx = []\n", + "for chrom in range(1, 23):\n", + " chrom_idx = np.where(dd.G.var.chrom == str(chrom))[0]\n", + " n_snps = max(1, int(len(chrom_idx) * 0.001))\n", + " selected_idx = np.random.choice(chrom_idx, n_snps, replace=False)\n", + " all_selected_idx.extend(selected_idx)\n", + "all_selected_idx = np.sort(all_selected_idx)\n", + "\n", + "dd = dd[:, all_selected_idx, :, :].copy()\n", + "dd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 4: Create Cell-Type Annotations\n", + "Now we create binary annotation files that indicate which SNPs are near cell-type-specific genes. This is done using LDSC's `make_annot` functionality, wrapped by `cellink`. We process two cell types (CD8 Naive and CD4 Naive) across all chromosomes." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:55<00:00, 55.32s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_1.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_1.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:25<00:00, 25.84s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_1.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_1.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.39s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_2.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_2.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.57s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_2.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_2.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:47<00:00, 47.75s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_3.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_3.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:26<00:00, 26.14s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_3.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_3.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:26<00:00, 26.09s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_4.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_4.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:25<00:00, 25.97s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_4.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_4.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.15s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_5.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_5.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.27s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_5.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_5.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:37<00:00, 37.43s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_6.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_6.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:25<00:00, 25.59s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_6.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_6.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:37<00:00, 37.38s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_7.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_7.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:26<00:00, 27.00s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_7.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_7.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.51s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_8.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_8.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.54s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_8.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_8.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:36<00:00, 36.10s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_9.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_9.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:26<00:00, 26.34s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_9.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_9.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.33s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_10.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_10.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.32s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_10.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_10.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:37<00:00, 37.41s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_11.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_11.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 0%| | 0/1 [00:14 \u001b[39m\u001b[32m4\u001b[39m result = \u001b[43mmake_annot_from_donor_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mdd\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdd_chrom\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mannot_file\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43mf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mcell_type\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;250;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m_\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[33;43m_\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mchrom\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[33;43m.annot.gz\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43mgene_set_file\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[33;43mf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m./ldsc_genesets/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mcell_type\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;250;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m_\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[33;43m.GeneSet\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[43m \u001b[49m\u001b[43mgene_coord_file\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mgene_coords.txt\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 9\u001b[39m \u001b[43m \u001b[49m\u001b[43mwindowsize\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m100000\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 10\u001b[39m \u001b[43m \u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/ictstr01/project_copy/genomics/ayshan/cellink/src/cellink/tl/external/_ldsc.py:1424\u001b[39m, in \u001b[36mmake_annot_from_donor_data\u001b[39m\u001b[34m(dd, annot_file, gene_set_file, gene_coord_file, windowsize, bed_file, nomerge, out_prefix, run, cleanup_files, plink_export_kwargs, runner, **kwargs)\u001b[39m\n\u001b[32m 1421\u001b[39m plink_export_kwargs = {}\n\u001b[32m 1423\u001b[39m logger.info(\u001b[33m\"\u001b[39m\u001b[33mExporting genotype data to PLINK format for annotation creation\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1424\u001b[39m \u001b[43mto_plink\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mG\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout_prefix\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mplink_export_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1425\u001b[39m bimfile = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mout_prefix\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.bim\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 1427\u001b[39m results = _run_ldsc_make_annot(\n\u001b[32m 1428\u001b[39m bimfile=bimfile,\n\u001b[32m 1429\u001b[39m annot_file=annot_file,\n\u001b[32m (...)\u001b[39m\u001b[32m 1437\u001b[39m **kwargs,\n\u001b[32m 1438\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m/ictstr01/project_copy/genomics/ayshan/cellink/src/cellink/io/_export.py:148\u001b[39m, in \u001b[36mto_plink\u001b[39m\u001b[34m(gdata, output_prefix, donor_id, donor_family_id, donor_paternal_id, donor_maternal_id, donor_sex, chrom, pos, a0, a1)\u001b[39m\n\u001b[32m 73\u001b[39m output_prefix += \u001b[33m\"\u001b[39m\u001b[33m.bed\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 75\u001b[39m xarr = xr.DataArray(\n\u001b[32m 76\u001b[39m gdata.X.astype(\u001b[33m\"\u001b[39m\u001b[33mfloat32\u001b[39m\u001b[33m\"\u001b[39m),\n\u001b[32m 77\u001b[39m dims=(\u001b[33m\"\u001b[39m\u001b[33msample\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mvariant\u001b[39m\u001b[33m\"\u001b[39m),\n\u001b[32m (...)\u001b[39m\u001b[32m 146\u001b[39m name=\u001b[33m\"\u001b[39m\u001b[33mgenotypes\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 147\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m148\u001b[39m \u001b[43mwrite_plink1_bin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_prefix\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/pandas_plink/_write.py:180\u001b[39m, in \u001b[36mwrite_plink1_bin\u001b[39m\u001b[34m(G, bed, bim, fam, major, verbose)\u001b[39m\n\u001b[32m 178\u001b[39m G = _fill_sample(G)\n\u001b[32m 179\u001b[39m G = _fill_variant(G)\n\u001b[32m--> \u001b[39m\u001b[32m180\u001b[39m \u001b[43mwrite_bed\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbed\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mG\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmajor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 182\u001b[39m _echo(\u001b[33m\"\u001b[39m\u001b[33mWriting FAM... \u001b[39m\u001b[33m\"\u001b[39m, end=\u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m, disable=\u001b[38;5;129;01mnot\u001b[39;00m verbose)\n\u001b[32m 183\u001b[39m _write_fam(fam, G)\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/pandas_plink/_bed_write.py:44\u001b[39m, in \u001b[36mwrite_bed\u001b[39m\u001b[34m(filepath, X, major, verbose)\u001b[39m\n\u001b[32m 42\u001b[39m row_start = \u001b[32m0\u001b[39m\n\u001b[32m 43\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m tqdm(G.chunks[\u001b[32m0\u001b[39m], \u001b[33m\"\u001b[39m\u001b[33mWriting BED\u001b[39m\u001b[33m\"\u001b[39m, disable=\u001b[38;5;129;01mnot\u001b[39;00m verbose):\n\u001b[32m---> \u001b[39m\u001b[32m44\u001b[39m data = \u001b[43mG\u001b[49m\u001b[43m[\u001b[49m\u001b[43mrow_start\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mrow_start\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcompute\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 45\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m data.dtype \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m [float32, float64]:\n\u001b[32m 46\u001b[39m msg = \u001b[33m\"\u001b[39m\u001b[33mUnsupported data type. \u001b[39m\u001b[33m\"\u001b[39m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/dask/base.py:373\u001b[39m, in \u001b[36mDaskMethodsMixin.compute\u001b[39m\u001b[34m(self, **kwargs)\u001b[39m\n\u001b[32m 349\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mcompute\u001b[39m(\u001b[38;5;28mself\u001b[39m, **kwargs):\n\u001b[32m 350\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Compute this dask collection\u001b[39;00m\n\u001b[32m 351\u001b[39m \n\u001b[32m 352\u001b[39m \u001b[33;03m This turns a lazy Dask collection into its in-memory equivalent.\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 371\u001b[39m \u001b[33;03m dask.compute\u001b[39;00m\n\u001b[32m 372\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m373\u001b[39m (result,) = \u001b[43mcompute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtraverse\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 374\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/dask/base.py:681\u001b[39m, in \u001b[36mcompute\u001b[39m\u001b[34m(traverse, optimize_graph, scheduler, get, *args, **kwargs)\u001b[39m\n\u001b[32m 678\u001b[39m expr = expr.optimize()\n\u001b[32m 679\u001b[39m keys = \u001b[38;5;28mlist\u001b[39m(flatten(expr.__dask_keys__()))\n\u001b[32m--> \u001b[39m\u001b[32m681\u001b[39m results = \u001b[43mschedule\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 683\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m repack(results)\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/queue.py:171\u001b[39m, in \u001b[36mQueue.get\u001b[39m\u001b[34m(self, block, timeout)\u001b[39m\n\u001b[32m 169\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m._qsize():\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mnot_empty\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 172\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m timeout < \u001b[32m0\u001b[39m:\n\u001b[32m 173\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33m'\u001b[39m\u001b[33mtimeout\u001b[39m\u001b[33m'\u001b[39m\u001b[33m must be a non-negative number\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/threading.py:327\u001b[39m, in \u001b[36mCondition.wait\u001b[39m\u001b[34m(self, timeout)\u001b[39m\n\u001b[32m 325\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;66;03m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[39;00m\n\u001b[32m 326\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m327\u001b[39m \u001b[43mwaiter\u001b[49m\u001b[43m.\u001b[49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 328\u001b[39m gotit = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 329\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[31mKeyboardInterrupt\u001b[39m: " + ] + } + ], + "source": [ + "for chrom in range(1, 23):\n", + " for cell_type in [\"CD8 Naive\", \"CD4 Naive\"]:\n", + " dd_chrom = dd.sel(G_var=dd.G.var.chrom == str(chrom), C_var=dd.C.var.chrom == str(chrom)).copy()\n", + " result = make_annot_from_donor_data(\n", + " dd=dd_chrom,\n", + " annot_file=f\"{cell_type.replace(' ', '_')}_{chrom}.annot.gz\",\n", + " gene_set_file = f\"./ldsc_genesets/{cell_type.replace(' ', '_')}.GeneSet\",\n", + " gene_coord_file=\"gene_coords.txt\",\n", + " windowsize=100000,\n", + " runner=runner,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/project/genomics/ayshan\n", + "ls: cannot access 'ldsc_annot.bim': No such file or directory\n", + "ls: cannot access 'gene_coords.txt': No such file or directory\n", + "ls: cannot access 'ldsc_genesets/CD8_Naive.GeneSet': No such file or directory\n" + ] + } + ], + "source": [ + "!cd /ictstr01/project_copy/genomics/ayshan\n", + "\n", + "!singularity exec \\\n", + " -B /ictstr01/project_copy/genomics/ayshan:/data \\\n", + " -B /home/aih/ayshan.aliyeva/cellink_data:/cellink_data \\\n", + " /project/genomics/ayshan/containers/ldsc.sif \\\n", + " bash -lc 'pwd; ls; ls -l ldsc_annot.bim gene_coords.txt ldsc_genesets/CD8_Naive.GeneSet'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 5: Compute Cell-Type-Specific LD Scores\n", + "With annotations created, we now compute LD scores that incorporate cell-type-specific information. These LD scores quantify how much genetic variation near cell-type-specific genes contributes to linkage disequilibrium patterns. We here use the function `compute_ld_scores_with_annotations_from_donor_data`. Instead, one could also perform this using 1000G plink data via `compute_ld_scores_with_annotations_from_bimfile` (Plink files may be downloaded via `cellink.resources.get_1000genomes_plink_files`). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.35s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.1 --annot CD8_Naive_1.annot.gz --out cts_ldscores_CD8_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.1 --annot CD8_Naive_1.annot.gz --out cts_ldscores_CD8_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.1 \\\n", + "--bfile cts_ldscores_CD8_Naive.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:51:21 2025\n", + "Read list of 796 SNPs from cts_ldscores_CD8_Naive.1.bim\n", + "Read 1 annotations for 796 SNPs from CD8_Naive_1.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.1.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.1.bed\n", + "After filtering, 796 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 796 SNPs to cts_ldscores_CD8_Naive.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1798 0.1780\n", + "std 0.1526 0.3931\n", + "min 0.0092 -0.0415\n", + "25% 0.0432 -0.0082\n", + "50% 0.1300 0.0062\n", + "75% 0.3086 0.0275\n", + "max 0.4995 1.8826\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0341\n", + "L2 0.0341 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 125\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 796.0000\n", + "mean 0.1570\n", + "std 0.3641\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:51:21 2025\n", + "Total time elapsed: 0.32s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.1.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.1.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.1.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.56s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.1 --annot CD4_Naive_1.annot.gz --out cts_ldscores_CD4_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.1 --annot CD4_Naive_1.annot.gz --out cts_ldscores_CD4_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.1 \\\n", + "--bfile cts_ldscores_CD4_Naive.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:51:44 2025\n", + "Read list of 796 SNPs from cts_ldscores_CD4_Naive.1.bim\n", + "Read 1 annotations for 796 SNPs from CD4_Naive_1.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.1.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.1.bed\n", + "After filtering, 796 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 796 SNPs to cts_ldscores_CD4_Naive.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1798 0.2007\n", + "std 0.1526 0.4154\n", + "min 0.0092 -0.0448\n", + "25% 0.0432 -0.0082\n", + "50% 0.1300 0.0066\n", + "75% 0.3086 0.0327\n", + "max 0.4995 1.8620\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0207\n", + "L2 0.0207 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 143\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 796.0000\n", + "mean 0.1796\n", + "std 0.3841\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:51:45 2025\n", + "Total time elapsed: 0.31s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.1.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.1.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.1.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:04<00:00, 4.99s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.2 --annot CD8_Naive_2.annot.gz --out cts_ldscores_CD8_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.2 --annot CD8_Naive_2.annot.gz --out cts_ldscores_CD8_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.2 \\\n", + "--bfile cts_ldscores_CD8_Naive.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:52:02 2025\n", + "Read list of 864 SNPs from cts_ldscores_CD8_Naive.2.bim\n", + "Read 1 annotations for 864 SNPs from CD8_Naive_2.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.2.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.2.bed\n", + "After filtering, 864 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 864 SNPs to cts_ldscores_CD8_Naive.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1623 0.1578\n", + "std 0.1408 0.4100\n", + "min 0.0087 -0.0418\n", + "25% 0.0381 -0.0092\n", + "50% 0.1131 0.0037\n", + "75% 0.2638 0.0212\n", + "max 0.5000 2.7574\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0279\n", + "L2 0.0279 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 115\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 864.0000\n", + "mean 0.1331\n", + "std 0.3399\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:52:02 2025\n", + "Total time elapsed: 0.33s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.2.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.2.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.2.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.05s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.2 --annot CD4_Naive_2.annot.gz --out cts_ldscores_CD4_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.2 --annot CD4_Naive_2.annot.gz --out cts_ldscores_CD4_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.2 \\\n", + "--bfile cts_ldscores_CD4_Naive.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:52:22 2025\n", + "Read list of 864 SNPs from cts_ldscores_CD4_Naive.2.bim\n", + "Read 1 annotations for 864 SNPs from CD4_Naive_2.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.2.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.2.bed\n", + "After filtering, 864 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 864 SNPs to cts_ldscores_CD4_Naive.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1623 0.1530\n", + "std 0.1408 0.4118\n", + "min 0.0087 -0.0371\n", + "25% 0.0381 -0.0081\n", + "50% 0.1131 0.0050\n", + "75% 0.2638 0.0223\n", + "max 0.5000 2.7688\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0305\n", + "L2 0.0305 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 111\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 864.0000\n", + "mean 0.1285\n", + "std 0.3348\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:52:22 2025\n", + "Total time elapsed: 0.41s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.2.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.2.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.2.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.06s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.3 --annot CD8_Naive_3.annot.gz --out cts_ldscores_CD8_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.3 --annot CD8_Naive_3.annot.gz --out cts_ldscores_CD8_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.3 \\\n", + "--bfile cts_ldscores_CD8_Naive.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:52:49 2025\n", + "Read list of 737 SNPs from cts_ldscores_CD8_Naive.3.bim\n", + "Read 1 annotations for 737 SNPs from CD8_Naive_3.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.3.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.3.bed\n", + "After filtering, 737 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 737 SNPs to cts_ldscores_CD8_Naive.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1702 0.1528\n", + "std 0.1499 0.3819\n", + "min 0.0082 -0.0427\n", + "25% 0.0392 -0.0069\n", + "50% 0.1142 0.0056\n", + "75% 0.2920 0.0225\n", + "max 0.4995 1.9927\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0303\n", + "L2 0.0303 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 100\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 737.0000\n", + "mean 0.1357\n", + "std 0.3427\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:52:50 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.3.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.3.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.3.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.73s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.3 --annot CD4_Naive_3.annot.gz --out cts_ldscores_CD4_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.3 --annot CD4_Naive_3.annot.gz --out cts_ldscores_CD4_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.3 \\\n", + "--bfile cts_ldscores_CD4_Naive.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:53:13 2025\n", + "Read list of 737 SNPs from cts_ldscores_CD4_Naive.3.bim\n", + "Read 1 annotations for 737 SNPs from CD4_Naive_3.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.3.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.3.bed\n", + "After filtering, 737 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 737 SNPs to cts_ldscores_CD4_Naive.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1702 0.1601\n", + "std 0.1499 0.3874\n", + "min 0.0082 -0.0366\n", + "25% 0.0392 -0.0069\n", + "50% 0.1142 0.0065\n", + "75% 0.2920 0.0256\n", + "max 0.4995 2.0018\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0091\n", + "L2 0.0091 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 103\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 737.0000\n", + "mean 0.1398\n", + "std 0.3470\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:53:13 2025\n", + "Total time elapsed: 0.31s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.3.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.3.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.3.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.08s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.4 --annot CD8_Naive_4.annot.gz --out cts_ldscores_CD8_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.4 --annot CD8_Naive_4.annot.gz --out cts_ldscores_CD8_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.4 \\\n", + "--bfile cts_ldscores_CD8_Naive.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:53:36 2025\n", + "Read list of 754 SNPs from cts_ldscores_CD8_Naive.4.bim\n", + "Read 1 annotations for 754 SNPs from CD8_Naive_4.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.4.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.4.bed\n", + "After filtering, 754 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 754 SNPs to cts_ldscores_CD8_Naive.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1720 0.1023\n", + "std 0.1460 0.3066\n", + "min 0.0087 -0.0272\n", + "25% 0.0394 -0.0059\n", + "50% 0.1295 0.0027\n", + "75% 0.2783 0.0144\n", + "max 0.5000 1.3741\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.009\n", + "L2 -0.009 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 69\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 754.0000\n", + "mean 0.0915\n", + "std 0.2885\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:53:36 2025\n", + "Total time elapsed: 0.29s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.4.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.4.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.4.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.33s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.4 --annot CD4_Naive_4.annot.gz --out cts_ldscores_CD4_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.4 --annot CD4_Naive_4.annot.gz --out cts_ldscores_CD4_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.4 \\\n", + "--bfile cts_ldscores_CD4_Naive.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:53:58 2025\n", + "Read list of 754 SNPs from cts_ldscores_CD4_Naive.4.bim\n", + "Read 1 annotations for 754 SNPs from CD4_Naive_4.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.4.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.4.bed\n", + "After filtering, 754 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 754 SNPs to cts_ldscores_CD4_Naive.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1720 0.0894\n", + "std 0.1460 0.2923\n", + "min 0.0087 -0.0256\n", + "25% 0.0394 -0.0052\n", + "50% 0.1295 0.0029\n", + "75% 0.2783 0.0130\n", + "max 0.5000 1.5382\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0013\n", + "L2 0.0013 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 59\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 754.0000\n", + "mean 0.0782\n", + "std 0.2687\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:53:58 2025\n", + "Total time elapsed: 0.28s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.4.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.4.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.4.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.60s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.5 --annot CD8_Naive_5.annot.gz --out cts_ldscores_CD8_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.5 --annot CD8_Naive_5.annot.gz --out cts_ldscores_CD8_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.5 \\\n", + "--bfile cts_ldscores_CD8_Naive.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:54:16 2025\n", + "Read list of 671 SNPs from cts_ldscores_CD8_Naive.5.bim\n", + "Read 1 annotations for 671 SNPs from CD8_Naive_5.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.5.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.5.bed\n", + "After filtering, 671 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 671 SNPs to cts_ldscores_CD8_Naive.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1760 0.1430\n", + "std 0.1514 0.3666\n", + "min 0.0087 -0.0319\n", + "25% 0.0395 -0.0067\n", + "50% 0.1239 0.0033\n", + "75% 0.3017 0.0209\n", + "max 0.4980 1.9526\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0064\n", + "L2 0.0064 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 86\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 671.0000\n", + "mean 0.1282\n", + "std 0.3345\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:54:17 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.5.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.5.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.5.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.02s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.5 --annot CD4_Naive_5.annot.gz --out cts_ldscores_CD4_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.5 --annot CD4_Naive_5.annot.gz --out cts_ldscores_CD4_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.5 \\\n", + "--bfile cts_ldscores_CD4_Naive.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:54:34 2025\n", + "Read list of 671 SNPs from cts_ldscores_CD4_Naive.5.bim\n", + "Read 1 annotations for 671 SNPs from CD4_Naive_5.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.5.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.5.bed\n", + "After filtering, 671 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 671 SNPs to cts_ldscores_CD4_Naive.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1760 0.1453\n", + "std 0.1514 0.3693\n", + "min 0.0087 -0.0328\n", + "25% 0.0395 -0.0076\n", + "50% 0.1239 0.0039\n", + "75% 0.3017 0.0208\n", + "max 0.4980 1.9556\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0023\n", + "L2 -0.0023 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 87\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 671.0000\n", + "mean 0.1297\n", + "std 0.3362\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:54:34 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.5.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.5.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.5.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.98s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.6 --annot CD8_Naive_6.annot.gz --out cts_ldscores_CD8_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.6 --annot CD8_Naive_6.annot.gz --out cts_ldscores_CD8_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.6 \\\n", + "--bfile cts_ldscores_CD8_Naive.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:54:58 2025\n", + "Read list of 701 SNPs from cts_ldscores_CD8_Naive.6.bim\n", + "Read 1 annotations for 701 SNPs from CD8_Naive_6.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.6.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.6.bed\n", + "After filtering, 701 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 701 SNPs to cts_ldscores_CD8_Naive.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1743 0.2136\n", + "std 0.1473 0.4680\n", + "min 0.0087 -0.0376\n", + "25% 0.0418 -0.0047\n", + "50% 0.1310 0.0083\n", + "75% 0.2880 0.0470\n", + "max 0.4995 2.9027\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0757\n", + "L2 0.0757 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 113\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 701.0000\n", + "mean 0.1612\n", + "std 0.3680\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:54:58 2025\n", + "Total time elapsed: 0.27s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.6.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.6.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.6.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.89s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.6 --annot CD4_Naive_6.annot.gz --out cts_ldscores_CD4_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.6 --annot CD4_Naive_6.annot.gz --out cts_ldscores_CD4_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.6 \\\n", + "--bfile cts_ldscores_CD4_Naive.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:55:21 2025\n", + "Read list of 701 SNPs from cts_ldscores_CD4_Naive.6.bim\n", + "Read 1 annotations for 701 SNPs from CD4_Naive_6.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.6.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.6.bed\n", + "After filtering, 701 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 701 SNPs to cts_ldscores_CD4_Naive.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1743 0.2152\n", + "std 0.1473 0.4682\n", + "min 0.0087 -0.0372\n", + "25% 0.0418 -0.0057\n", + "50% 0.1310 0.0086\n", + "75% 0.2880 0.0521\n", + "max 0.4995 2.9119\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0722\n", + "L2 0.0722 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 112\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 701.0000\n", + "mean 0.1598\n", + "std 0.3667\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:55:22 2025\n", + "Total time elapsed: 0.32s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.6.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.6.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.6.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.70s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.7 --annot CD8_Naive_7.annot.gz --out cts_ldscores_CD8_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.7 --annot CD8_Naive_7.annot.gz --out cts_ldscores_CD8_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.7 \\\n", + "--bfile cts_ldscores_CD8_Naive.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:55:44 2025\n", + "Read list of 611 SNPs from cts_ldscores_CD8_Naive.7.bim\n", + "Read 1 annotations for 611 SNPs from CD8_Naive_7.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.7.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.7.bed\n", + "After filtering, 611 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 611 SNPs to cts_ldscores_CD8_Naive.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1808 0.1731\n", + "std 0.1567 0.4343\n", + "min 0.0092 -0.0336\n", + "25% 0.0367 -0.0076\n", + "50% 0.1295 0.0034\n", + "75% 0.3084 0.0217\n", + "max 0.5000 2.9954\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0353\n", + "L2 0.0353 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 91\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 611.0000\n", + "mean 0.1489\n", + "std 0.3563\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:55:45 2025\n", + "Total time elapsed: 0.26s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.7.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.7.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.7.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.73s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.7 --annot CD4_Naive_7.annot.gz --out cts_ldscores_CD4_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.7 --annot CD4_Naive_7.annot.gz --out cts_ldscores_CD4_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.7 \\\n", + "--bfile cts_ldscores_CD4_Naive.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:56:08 2025\n", + "Read list of 611 SNPs from cts_ldscores_CD4_Naive.7.bim\n", + "Read 1 annotations for 611 SNPs from CD4_Naive_7.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.7.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.7.bed\n", + "After filtering, 611 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 611 SNPs to cts_ldscores_CD4_Naive.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1808 0.1483\n", + "std 0.1567 0.4024\n", + "min 0.0092 -0.0304\n", + "25% 0.0367 -0.0061\n", + "50% 0.1295 0.0039\n", + "75% 0.3084 0.0166\n", + "max 0.5000 2.9600\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0053\n", + "L2 -0.0053 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 78\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 611.0000\n", + "mean 0.1277\n", + "std 0.3340\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:56:09 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.7.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.7.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.7.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.23s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.8 --annot CD8_Naive_8.annot.gz --out cts_ldscores_CD8_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.8 --annot CD8_Naive_8.annot.gz --out cts_ldscores_CD8_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.8 \\\n", + "--bfile cts_ldscores_CD8_Naive.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:56:27 2025\n", + "Read list of 562 SNPs from cts_ldscores_CD8_Naive.8.bim\n", + "Read 1 annotations for 562 SNPs from CD8_Naive_8.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.8.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.8.bed\n", + "After filtering, 562 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 562 SNPs to cts_ldscores_CD8_Naive.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1774 0.1353\n", + "std 0.1475 0.3897\n", + "min 0.0087 -0.0349\n", + "25% 0.0449 -0.0058\n", + "50% 0.1376 0.0034\n", + "75% 0.3012 0.0151\n", + "max 0.4995 2.9989\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0589\n", + "L2 0.0589 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 65\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 562.0000\n", + "mean 0.1157\n", + "std 0.3201\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:56:27 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.8.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.8.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.8.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.14s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.8 --annot CD4_Naive_8.annot.gz --out cts_ldscores_CD4_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.8 --annot CD4_Naive_8.annot.gz --out cts_ldscores_CD4_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.8 \\\n", + "--bfile cts_ldscores_CD4_Naive.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:56:46 2025\n", + "Read list of 562 SNPs from cts_ldscores_CD4_Naive.8.bim\n", + "Read 1 annotations for 562 SNPs from CD4_Naive_8.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.8.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.8.bed\n", + "After filtering, 562 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 562 SNPs to cts_ldscores_CD4_Naive.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1774 0.1623\n", + "std 0.1475 0.4241\n", + "min 0.0087 -0.0339\n", + "25% 0.0449 -0.0055\n", + "50% 0.1376 0.0049\n", + "75% 0.3012 0.0210\n", + "max 0.4995 2.9978\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.081\n", + "L2 0.081 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 75\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 562.0000\n", + "mean 0.1335\n", + "std 0.3404\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:56:46 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.8.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.8.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.8.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.85s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.9 --annot CD8_Naive_9.annot.gz --out cts_ldscores_CD8_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.9 --annot CD8_Naive_9.annot.gz --out cts_ldscores_CD8_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.9 \\\n", + "--bfile cts_ldscores_CD8_Naive.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:57:09 2025\n", + "Read list of 440 SNPs from cts_ldscores_CD8_Naive.9.bim\n", + "Read 1 annotations for 440 SNPs from CD8_Naive_9.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.9.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.9.bed\n", + "After filtering, 440 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 440 SNPs to cts_ldscores_CD8_Naive.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1794 0.1553\n", + "std 0.1510 0.3640\n", + "min 0.0092 -0.0335\n", + "25% 0.0401 -0.0058\n", + "50% 0.1412 0.0041\n", + "75% 0.2987 0.0192\n", + "max 0.4985 1.2104\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0037\n", + "L2 0.0037 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 65\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 440.0000\n", + "mean 0.1477\n", + "std 0.3552\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:57:09 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.9.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.9.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.9.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.04s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.9 --annot CD4_Naive_9.annot.gz --out cts_ldscores_CD4_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.9 --annot CD4_Naive_9.annot.gz --out cts_ldscores_CD4_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.9 \\\n", + "--bfile cts_ldscores_CD4_Naive.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:57:33 2025\n", + "Read list of 440 SNPs from cts_ldscores_CD4_Naive.9.bim\n", + "Read 1 annotations for 440 SNPs from CD4_Naive_9.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.9.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.9.bed\n", + "After filtering, 440 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 440 SNPs to cts_ldscores_CD4_Naive.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1794 0.1710\n", + "std 0.1510 0.4031\n", + "min 0.0092 -0.0251\n", + "25% 0.0401 -0.0055\n", + "50% 0.1412 0.0049\n", + "75% 0.2987 0.0183\n", + "max 0.4985 2.0366\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0575\n", + "L2 0.0575 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 67\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 440.0000\n", + "mean 0.1523\n", + "std 0.3597\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:57:33 2025\n", + "Total time elapsed: 0.2s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.9.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.9.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.9.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.40s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.10 --annot CD8_Naive_10.annot.gz --out cts_ldscores_CD8_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.10 --annot CD8_Naive_10.annot.gz --out cts_ldscores_CD8_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.10 \\\n", + "--bfile cts_ldscores_CD8_Naive.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:57:51 2025\n", + "Read list of 523 SNPs from cts_ldscores_CD8_Naive.10.bim\n", + "Read 1 annotations for 523 SNPs from CD8_Naive_10.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.10.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.10.bed\n", + "After filtering, 523 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 523 SNPs to cts_ldscores_CD8_Naive.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1713 0.1529\n", + "std 0.1466 0.4107\n", + "min 0.0082 -0.0333\n", + "25% 0.0403 -0.0082\n", + "50% 0.1300 0.0027\n", + "75% 0.2752 0.0176\n", + "max 0.4959 2.8415\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.009\n", + "L2 -0.009 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 69\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 523.0000\n", + "mean 0.1319\n", + "std 0.3387\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:57:52 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.10.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.10.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.10.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.32s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.10 --annot CD4_Naive_10.annot.gz --out cts_ldscores_CD4_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.10 --annot CD4_Naive_10.annot.gz --out cts_ldscores_CD4_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.10 \\\n", + "--bfile cts_ldscores_CD4_Naive.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:58:09 2025\n", + "Read list of 523 SNPs from cts_ldscores_CD4_Naive.10.bim\n", + "Read 1 annotations for 523 SNPs from CD4_Naive_10.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.10.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.10.bed\n", + "After filtering, 523 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 523 SNPs to cts_ldscores_CD4_Naive.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1713 0.1588\n", + "std 0.1466 0.4224\n", + "min 0.0082 -0.0287\n", + "25% 0.0403 -0.0065\n", + "50% 0.1300 0.0032\n", + "75% 0.2752 0.0173\n", + "max 0.4959 2.8300\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0075\n", + "L2 -0.0075 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 70\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 523.0000\n", + "mean 0.1338\n", + "std 0.3408\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:58:09 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.10.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.10.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.10.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.13s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.11 --annot CD8_Naive_11.annot.gz --out cts_ldscores_CD8_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.11 --annot CD8_Naive_11.annot.gz --out cts_ldscores_CD8_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.11 \\\n", + "--bfile cts_ldscores_CD8_Naive.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:58:32 2025\n", + "Read list of 506 SNPs from cts_ldscores_CD8_Naive.11.bim\n", + "Read 1 annotations for 506 SNPs from CD8_Naive_11.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.11.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.11.bed\n", + "After filtering, 506 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 506 SNPs to cts_ldscores_CD8_Naive.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1762 0.2665\n", + "std 0.1520 0.4811\n", + "min 0.0102 -0.0352\n", + "25% 0.0413 -0.0063\n", + "50% 0.1208 0.0099\n", + "75% 0.2985 0.1766\n", + "max 0.4995 2.5324\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0443\n", + "L2 0.0443 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 108\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 506.0000\n", + "mean 0.2134\n", + "std 0.4101\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:58:32 2025\n", + "Total time elapsed: 0.23s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.11.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.11.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.11.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.10s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.11 --annot CD4_Naive_11.annot.gz --out cts_ldscores_CD4_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.11 --annot CD4_Naive_11.annot.gz --out cts_ldscores_CD4_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.11 \\\n", + "--bfile cts_ldscores_CD4_Naive.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:58:56 2025\n", + "Read list of 506 SNPs from cts_ldscores_CD4_Naive.11.bim\n", + "Read 1 annotations for 506 SNPs from CD4_Naive_11.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.11.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.11.bed\n", + "After filtering, 506 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 506 SNPs to cts_ldscores_CD4_Naive.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1762 0.2682\n", + "std 0.1520 0.4870\n", + "min 0.0102 -0.0380\n", + "25% 0.0413 -0.0056\n", + "50% 0.1208 0.0112\n", + "75% 0.2985 0.1107\n", + "max 0.4995 2.5043\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0298\n", + "L2 0.0298 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 108\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 506.0000\n", + "mean 0.2134\n", + "std 0.4101\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:58:56 2025\n", + "Total time elapsed: 0.28s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.11.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.11.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.11.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.67s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.12 --annot CD8_Naive_12.annot.gz --out cts_ldscores_CD8_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.12 --annot CD8_Naive_12.annot.gz --out cts_ldscores_CD8_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.12 \\\n", + "--bfile cts_ldscores_CD8_Naive.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:59:16 2025\n", + "Read list of 507 SNPs from cts_ldscores_CD8_Naive.12.bim\n", + "Read 1 annotations for 507 SNPs from CD8_Naive_12.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.12.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.12.bed\n", + "After filtering, 507 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 507 SNPs to cts_ldscores_CD8_Naive.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1721 0.2046\n", + "std 0.1498 0.4351\n", + "min 0.0092 -0.0357\n", + "25% 0.0372 -0.0054\n", + "50% 0.1254 0.0068\n", + "75% 0.2918 0.0314\n", + "max 0.5000 2.4147\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0307\n", + "L2 -0.0307 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 92\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 507.0000\n", + "mean 0.1815\n", + "std 0.3858\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:59:16 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.12.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.12.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.12.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.93s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.12 --annot CD4_Naive_12.annot.gz --out cts_ldscores_CD4_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.12 --annot CD4_Naive_12.annot.gz --out cts_ldscores_CD4_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.12 \\\n", + "--bfile cts_ldscores_CD4_Naive.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:59:35 2025\n", + "Read list of 507 SNPs from cts_ldscores_CD4_Naive.12.bim\n", + "Read 1 annotations for 507 SNPs from CD4_Naive_12.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.12.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.12.bed\n", + "After filtering, 507 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 507 SNPs to cts_ldscores_CD4_Naive.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1721 0.2027\n", + "std 0.1498 0.4416\n", + "min 0.0092 -0.0303\n", + "25% 0.0372 -0.0056\n", + "50% 0.1254 0.0065\n", + "75% 0.2918 0.0288\n", + "max 0.5000 2.4133\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0081\n", + "L2 -0.0081 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 90\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 507.0000\n", + "mean 0.1775\n", + "std 0.3825\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:59:35 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.12.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.12.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.12.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.49s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.13 --annot CD8_Naive_13.annot.gz --out cts_ldscores_CD8_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.13 --annot CD8_Naive_13.annot.gz --out cts_ldscores_CD8_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.13 \\\n", + "--bfile cts_ldscores_CD8_Naive.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:59:59 2025\n", + "Read list of 383 SNPs from cts_ldscores_CD8_Naive.13.bim\n", + "Read 1 annotations for 383 SNPs from CD8_Naive_13.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.13.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.13.bed\n", + "After filtering, 383 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 383 SNPs to cts_ldscores_CD8_Naive.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1783 0.1073\n", + "std 0.1509 0.3126\n", + "min 0.0097 -0.0222\n", + "25% 0.0413 -0.0052\n", + "50% 0.1356 0.0010\n", + "75% 0.2854 0.0093\n", + "max 0.4959 1.2337\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0339\n", + "L2 -0.0339 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 39\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 383.0000\n", + "mean 0.1018\n", + "std 0.3028\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:59:59 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.13.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.13.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.13.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.48s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.13 --annot CD4_Naive_13.annot.gz --out cts_ldscores_CD4_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.13 --annot CD4_Naive_13.annot.gz --out cts_ldscores_CD4_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.13 \\\n", + "--bfile cts_ldscores_CD4_Naive.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:00:23 2025\n", + "Read list of 383 SNPs from cts_ldscores_CD4_Naive.13.bim\n", + "Read 1 annotations for 383 SNPs from CD4_Naive_13.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.13.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.13.bed\n", + "After filtering, 383 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 383 SNPs to cts_ldscores_CD4_Naive.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1783 0.1057\n", + "std 0.1509 0.3105\n", + "min 0.0097 -0.0210\n", + "25% 0.0413 -0.0052\n", + "50% 0.1356 0.0023\n", + "75% 0.2854 0.0110\n", + "max 0.4959 1.3078\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0205\n", + "L2 0.0205 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 38\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 383.0000\n", + "mean 0.0992\n", + "std 0.2993\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:00:24 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.13.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.13.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.13.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.56s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.14 --annot CD8_Naive_14.annot.gz --out cts_ldscores_CD8_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.14 --annot CD8_Naive_14.annot.gz --out cts_ldscores_CD8_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.14 \\\n", + "--bfile cts_ldscores_CD8_Naive.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:00:43 2025\n", + "Read list of 339 SNPs from cts_ldscores_CD8_Naive.14.bim\n", + "Read 1 annotations for 339 SNPs from CD8_Naive_14.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.14.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.14.bed\n", + "After filtering, 339 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 339 SNPs to cts_ldscores_CD8_Naive.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1641 0.1102\n", + "std 0.1444 0.3275\n", + "min 0.0087 -0.0172\n", + "25% 0.0370 -0.0059\n", + "50% 0.1096 0.0007\n", + "75% 0.2808 0.0105\n", + "max 0.4929 1.9204\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0839\n", + "L2 -0.0839 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 34\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 339.0000\n", + "mean 0.1003\n", + "std 0.3008\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:00:43 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.14.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.14.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.14.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.73s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.14 --annot CD4_Naive_14.annot.gz --out cts_ldscores_CD4_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.14 --annot CD4_Naive_14.annot.gz --out cts_ldscores_CD4_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.14 \\\n", + "--bfile cts_ldscores_CD4_Naive.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:01:00 2025\n", + "Read list of 339 SNPs from cts_ldscores_CD4_Naive.14.bim\n", + "Read 1 annotations for 339 SNPs from CD4_Naive_14.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.14.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.14.bed\n", + "After filtering, 339 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 339 SNPs to cts_ldscores_CD4_Naive.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1641 0.0935\n", + "std 0.1444 0.3057\n", + "min 0.0087 -0.0180\n", + "25% 0.0370 -0.0045\n", + "50% 0.1096 0.0010\n", + "75% 0.2808 0.0092\n", + "max 0.4929 1.9183\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0672\n", + "L2 -0.0672 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 28\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 339.0000\n", + "mean 0.0826\n", + "std 0.2757\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:01:01 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.14.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.14.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.14.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.30s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.15 --annot CD8_Naive_15.annot.gz --out cts_ldscores_CD8_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.15 --annot CD8_Naive_15.annot.gz --out cts_ldscores_CD8_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.15 \\\n", + "--bfile cts_ldscores_CD8_Naive.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:01:24 2025\n", + "Read list of 290 SNPs from cts_ldscores_CD8_Naive.15.bim\n", + "Read 1 annotations for 290 SNPs from CD8_Naive_15.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.15.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.15.bed\n", + "After filtering, 290 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 290 SNPs to cts_ldscores_CD8_Naive.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1834 0.1941\n", + "std 0.1504 0.4318\n", + "min 0.0092 -0.0209\n", + "25% 0.0477 -0.0045\n", + "50% 0.1430 0.0036\n", + "75% 0.3072 0.0215\n", + "max 0.4964 1.9951\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0693\n", + "L2 -0.0693 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 49\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 290.0000\n", + "mean 0.1690\n", + "std 0.3754\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:01:24 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.15.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.15.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.15.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.90s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.15 --annot CD4_Naive_15.annot.gz --out cts_ldscores_CD4_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.15 --annot CD4_Naive_15.annot.gz --out cts_ldscores_CD4_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.15 \\\n", + "--bfile cts_ldscores_CD4_Naive.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:01:49 2025\n", + "Read list of 290 SNPs from cts_ldscores_CD4_Naive.15.bim\n", + "Read 1 annotations for 290 SNPs from CD4_Naive_15.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.15.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.15.bed\n", + "After filtering, 290 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 290 SNPs to cts_ldscores_CD4_Naive.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1834 0.2427\n", + "std 0.1504 0.4641\n", + "min 0.0092 -0.0211\n", + "25% 0.0477 -0.0041\n", + "50% 0.1430 0.0055\n", + "75% 0.3072 0.0437\n", + "max 0.4964 1.9847\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0453\n", + "L2 -0.0453 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 62\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 290.0000\n", + "mean 0.2138\n", + "std 0.4107\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:01:49 2025\n", + "Total time elapsed: 0.2s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.15.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.15.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.15.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.38s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.16 --annot CD8_Naive_16.annot.gz --out cts_ldscores_CD8_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.16 --annot CD8_Naive_16.annot.gz --out cts_ldscores_CD8_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.16 \\\n", + "--bfile cts_ldscores_CD8_Naive.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:02:07 2025\n", + "Read list of 312 SNPs from cts_ldscores_CD8_Naive.16.bim\n", + "Read 1 annotations for 312 SNPs from CD8_Naive_16.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.16.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.16.bed\n", + "After filtering, 312 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 312 SNPs to cts_ldscores_CD8_Naive.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1761 0.1561\n", + "std 0.1502 0.3667\n", + "min 0.0097 -0.0232\n", + "25% 0.0391 -0.0038\n", + "50% 0.1381 0.0028\n", + "75% 0.2792 0.0163\n", + "max 0.4990 1.4741\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0282\n", + "L2 -0.0282 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 45\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 312.0000\n", + "mean 0.1442\n", + "std 0.3519\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:02:08 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.16.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.16.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.16.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.83s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.16 --annot CD4_Naive_16.annot.gz --out cts_ldscores_CD4_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.16 --annot CD4_Naive_16.annot.gz --out cts_ldscores_CD4_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.16 \\\n", + "--bfile cts_ldscores_CD4_Naive.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:02:26 2025\n", + "Read list of 312 SNPs from cts_ldscores_CD4_Naive.16.bim\n", + "Read 1 annotations for 312 SNPs from CD4_Naive_16.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.16.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.16.bed\n", + "After filtering, 312 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 312 SNPs to cts_ldscores_CD4_Naive.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1761 0.1616\n", + "std 0.1502 0.3680\n", + "min 0.0097 -0.0214\n", + "25% 0.0391 -0.0037\n", + "50% 0.1381 0.0043\n", + "75% 0.2792 0.0144\n", + "max 0.4990 1.1301\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.1229\n", + "L2 -0.1229 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 47\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 312.0000\n", + "mean 0.1506\n", + "std 0.3583\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:02:26 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.16.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.16.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.16.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.84s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.17 --annot CD8_Naive_17.annot.gz --out cts_ldscores_CD8_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.17 --annot CD8_Naive_17.annot.gz --out cts_ldscores_CD8_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.17 \\\n", + "--bfile cts_ldscores_CD8_Naive.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:02:44 2025\n", + "Read list of 272 SNPs from cts_ldscores_CD8_Naive.17.bim\n", + "Read 1 annotations for 272 SNPs from CD8_Naive_17.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.17.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.17.bed\n", + "After filtering, 272 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 272 SNPs to cts_ldscores_CD8_Naive.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1944 0.2967\n", + "std 0.1557 0.4779\n", + "min 0.0082 -0.0303\n", + "25% 0.0401 -0.0016\n", + "50% 0.1656 0.0116\n", + "75% 0.3336 0.9886\n", + "max 0.4954 1.7618\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0708\n", + "L2 0.0708 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 72\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 272.0000\n", + "mean 0.2647\n", + "std 0.4420\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 1.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:02:44 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.17.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.17.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.17.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.19s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.17 --annot CD4_Naive_17.annot.gz --out cts_ldscores_CD4_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.17 --annot CD4_Naive_17.annot.gz --out cts_ldscores_CD4_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.17 \\\n", + "--bfile cts_ldscores_CD4_Naive.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:03 2025\n", + "Read list of 272 SNPs from cts_ldscores_CD4_Naive.17.bim\n", + "Read 1 annotations for 272 SNPs from CD4_Naive_17.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.17.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.17.bed\n", + "After filtering, 272 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 272 SNPs to cts_ldscores_CD4_Naive.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1944 0.2763\n", + "std 0.1557 0.5245\n", + "min 0.0082 -0.0315\n", + "25% 0.0401 -0.0046\n", + "50% 0.1656 0.0067\n", + "75% 0.3336 0.2849\n", + "max 0.4954 2.8720\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0414\n", + "L2 0.0414 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 60\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 272.0000\n", + "mean 0.2206\n", + "std 0.4154\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:03 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.17.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.17.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.17.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.43s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.18 --annot CD8_Naive_18.annot.gz --out cts_ldscores_CD8_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.18 --annot CD8_Naive_18.annot.gz --out cts_ldscores_CD8_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.18 \\\n", + "--bfile cts_ldscores_CD8_Naive.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:20 2025\n", + "Read list of 294 SNPs from cts_ldscores_CD8_Naive.18.bim\n", + "Read 1 annotations for 294 SNPs from CD8_Naive_18.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.18.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.18.bed\n", + "After filtering, 294 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 294 SNPs to cts_ldscores_CD8_Naive.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1912 0.0927\n", + "std 0.1518 0.3541\n", + "min 0.0097 -0.0158\n", + "25% 0.0515 -0.0052\n", + "50% 0.1590 -0.0011\n", + "75% 0.3068 0.0060\n", + "max 0.4939 2.2866\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0585\n", + "L2 0.0585 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 21\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 294.0000\n", + "mean 0.0714\n", + "std 0.2580\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:21 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.18.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.18.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.18.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.64s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.18 --annot CD4_Naive_18.annot.gz --out cts_ldscores_CD4_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.18 --annot CD4_Naive_18.annot.gz --out cts_ldscores_CD4_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.18 \\\n", + "--bfile cts_ldscores_CD4_Naive.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:38 2025\n", + "Read list of 294 SNPs from cts_ldscores_CD4_Naive.18.bim\n", + "Read 1 annotations for 294 SNPs from CD4_Naive_18.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.18.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.18.bed\n", + "After filtering, 294 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 294 SNPs to cts_ldscores_CD4_Naive.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1912 0.0869\n", + "std 0.1518 0.3529\n", + "min 0.0097 -0.0148\n", + "25% 0.0515 -0.0060\n", + "50% 0.1590 -0.0006\n", + "75% 0.3068 0.0051\n", + "max 0.4939 2.2790\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1123\n", + "L2 0.1123 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 19\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 294.0000\n", + "mean 0.0646\n", + "std 0.2463\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:38 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.18.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.18.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.18.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:07<00:00, 7.12s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.19 --annot CD8_Naive_19.annot.gz --out cts_ldscores_CD8_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.19 --annot CD8_Naive_19.annot.gz --out cts_ldscores_CD8_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.19 \\\n", + "--bfile cts_ldscores_CD8_Naive.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:58 2025\n", + "Read list of 239 SNPs from cts_ldscores_CD8_Naive.19.bim\n", + "Read 1 annotations for 239 SNPs from CD8_Naive_19.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.19.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.19.bed\n", + "After filtering, 239 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 239 SNPs to cts_ldscores_CD8_Naive.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1813 0.4960\n", + "std 0.1483 0.5393\n", + "min 0.0102 -0.0273\n", + "25% 0.0418 0.0026\n", + "50% 0.1448 0.0425\n", + "75% 0.3007 1.0072\n", + "max 0.4969 2.0049\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0458\n", + "L2 0.0458 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 108\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 239.0000\n", + "mean 0.4519\n", + "std 0.4987\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 1.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:58 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.19.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.19.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.19.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:09<00:00, 9.06s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.19 --annot CD4_Naive_19.annot.gz --out cts_ldscores_CD4_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.19 --annot CD4_Naive_19.annot.gz --out cts_ldscores_CD4_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.19 \\\n", + "--bfile cts_ldscores_CD4_Naive.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:04:20 2025\n", + "Read list of 239 SNPs from cts_ldscores_CD4_Naive.19.bim\n", + "Read 1 annotations for 239 SNPs from CD4_Naive_19.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.19.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.19.bed\n", + "After filtering, 239 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 239 SNPs to cts_ldscores_CD4_Naive.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1813 0.3652\n", + "std 0.1483 0.5128\n", + "min 0.0102 -0.0269\n", + "25% 0.0418 -0.0020\n", + "50% 0.1448 0.0140\n", + "75% 0.3007 0.9950\n", + "max 0.4969 2.0049\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.085\n", + "L2 0.085 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 79\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 239.0000\n", + "mean 0.3305\n", + "std 0.4714\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 1.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:04:20 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.19.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.19.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.19.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:07<00:00, 7.31s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.20 --annot CD8_Naive_20.annot.gz --out cts_ldscores_CD8_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.20 --annot CD8_Naive_20.annot.gz --out cts_ldscores_CD8_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.20 \\\n", + "--bfile cts_ldscores_CD8_Naive.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:04:56 2025\n", + "Read list of 227 SNPs from cts_ldscores_CD8_Naive.20.bim\n", + "Read 1 annotations for 227 SNPs from CD8_Naive_20.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.20.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.20.bed\n", + "After filtering, 227 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 227 SNPs to cts_ldscores_CD8_Naive.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1749 0.0991\n", + "std 0.1404 0.3103\n", + "min 0.0102 -0.0135\n", + "25% 0.0538 -0.0043\n", + "50% 0.1386 -0.0004\n", + "75% 0.2796 0.0066\n", + "max 0.4837 1.4853\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0314\n", + "L2 -0.0314 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 21\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 227.0000\n", + "mean 0.0925\n", + "std 0.2904\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:04:56 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.20.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.20.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.20.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.35s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.20 --annot CD4_Naive_20.annot.gz --out cts_ldscores_CD4_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.20 --annot CD4_Naive_20.annot.gz --out cts_ldscores_CD4_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.20 \\\n", + "--bfile cts_ldscores_CD4_Naive.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:05:35 2025\n", + "Read list of 227 SNPs from cts_ldscores_CD4_Naive.20.bim\n", + "Read 1 annotations for 227 SNPs from CD4_Naive_20.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.20.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.20.bed\n", + "After filtering, 227 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 227 SNPs to cts_ldscores_CD4_Naive.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1749 0.1306\n", + "std 0.1404 0.3364\n", + "min 0.0102 -0.0149\n", + "25% 0.0538 -0.0047\n", + "50% 0.1386 0.0015\n", + "75% 0.2796 0.0112\n", + "max 0.4837 1.0987\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.1106\n", + "L2 -0.1106 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 28\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 227.0000\n", + "mean 0.1233\n", + "std 0.3296\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:05:35 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.20.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.20.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.20.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.64s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.21 --annot CD8_Naive_21.annot.gz --out cts_ldscores_CD8_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.21 --annot CD8_Naive_21.annot.gz --out cts_ldscores_CD8_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.21 \\\n", + "--bfile cts_ldscores_CD8_Naive.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:06:04 2025\n", + "Read list of 135 SNPs from cts_ldscores_CD8_Naive.21.bim\n", + "Read 1 annotations for 135 SNPs from CD8_Naive_21.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.21.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.21.bed\n", + "After filtering, 135 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 135 SNPs to cts_ldscores_CD8_Naive.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1925 0.1890\n", + "std 0.1475 0.3923\n", + "min 0.0087 -0.0117\n", + "25% 0.0581 -0.0030\n", + "50% 0.1590 0.0026\n", + "75% 0.3129 0.0169\n", + "max 0.4893 1.2037\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0929\n", + "L2 -0.0929 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 24\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 135.0000\n", + "mean 0.1778\n", + "std 0.3837\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:06:04 2025\n", + "Total time elapsed: 0.21s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.21.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.21.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.21.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.82s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.21 --annot CD4_Naive_21.annot.gz --out cts_ldscores_CD4_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.21 --annot CD4_Naive_21.annot.gz --out cts_ldscores_CD4_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.21 \\\n", + "--bfile cts_ldscores_CD4_Naive.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:06:26 2025\n", + "Read list of 135 SNPs from cts_ldscores_CD4_Naive.21.bim\n", + "Read 1 annotations for 135 SNPs from CD4_Naive_21.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.21.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.21.bed\n", + "After filtering, 135 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 135 SNPs to cts_ldscores_CD4_Naive.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1925 0.1647\n", + "std 0.1475 0.3729\n", + "min 0.0087 -0.0129\n", + "25% 0.0581 -0.0026\n", + "50% 0.1590 0.0018\n", + "75% 0.3129 0.0085\n", + "max 0.4893 1.2044\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0325\n", + "L2 -0.0325 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 21\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 135.0000\n", + "mean 0.1556\n", + "std 0.3638\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:06:26 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.21.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.21.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.21.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.24s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.22 --annot CD8_Naive_22.annot.gz --out cts_ldscores_CD8_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.22 --annot CD8_Naive_22.annot.gz --out cts_ldscores_CD8_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.22 \\\n", + "--bfile cts_ldscores_CD8_Naive.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:06:48 2025\n", + "Read list of 136 SNPs from cts_ldscores_CD8_Naive.22.bim\n", + "Read 1 annotations for 136 SNPs from CD8_Naive_22.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.22.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.22.bed\n", + "After filtering, 136 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 136 SNPs to cts_ldscores_CD8_Naive.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1617 0.2025\n", + "std 0.1358 0.4513\n", + "min 0.0102 -0.0143\n", + "25% 0.0405 -0.0031\n", + "50% 0.1241 0.0030\n", + "75% 0.2683 0.0165\n", + "max 0.4944 2.1050\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0641\n", + "L2 -0.0641 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 24\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 136.0000\n", + "mean 0.1765\n", + "std 0.3826\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:06:48 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.22.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.22.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.22.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.40s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.22 --annot CD4_Naive_22.annot.gz --out cts_ldscores_CD4_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.22 --annot CD4_Naive_22.annot.gz --out cts_ldscores_CD4_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.22 \\\n", + "--bfile cts_ldscores_CD4_Naive.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:07:10 2025\n", + "Read list of 136 SNPs from cts_ldscores_CD4_Naive.22.bim\n", + "Read 1 annotations for 136 SNPs from CD4_Naive_22.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.22.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.22.bed\n", + "After filtering, 136 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 136 SNPs to cts_ldscores_CD4_Naive.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1617 0.2171\n", + "std 0.1358 0.4606\n", + "min 0.0102 -0.0137\n", + "25% 0.0405 -0.0025\n", + "50% 0.1241 0.0033\n", + "75% 0.2683 0.0197\n", + "max 0.4944 2.1059\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.017\n", + "L2 -0.017 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 26\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 136.0000\n", + "mean 0.1912\n", + "std 0.3947\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:07:11 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.22.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.22.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.22.bed\n" + ] + } + ], + "source": [ + "for chrom in range(1, 23):\n", + " for cell_type in [\"CD8 Naive\", \"CD4 Naive\"]:\n", + " print(f\"Processing cell type: {cell_type}\")\n", + "\n", + " dd_chrom = dd.sel(G_var=dd.G.var.chrom == str(chrom), C_var=dd.C.var.chrom == str(chrom)).copy()\n", + " results = compute_ld_scores_with_annotations_from_donor_data(\n", + " dd=dd_chrom,\n", + " annot_file=f\"{cell_type.replace(' ', '_')}_{chrom}.annot.gz\",\n", + " out_prefix=f\"cts_ldscores_{cell_type.replace(' ', '_')}.{chrom}\",\n", + " run=True,\n", + " runner=runner,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 6: Prepare Reference LD Scores and Weights\n", + "For the final analysis, we need baseline LD scores and regression weights. These control for genomic confounders and ensure proper statistical inference. We download these from the 1000 Genomes reference panel." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:/Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/1000G_Phase3_baselineLD_v2.2_ldscores.tgz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/1000G_Phase3_weights_hm3_no_MHC.tgz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n" + ] + } + ], + "source": [ + "ldscores_path, ldscores_prefix = get_1000genomes_ld_scores(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")\n", + "ldweights_path, ldweights_prefix = get_1000genomes_ld_weights(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a control file listing all cell-type-specific LD score prefixes:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"celltype_ldscores.txt\", \"w\") as f:\n", + " f.write(\"CD8_Naive\\tcts_ldscores_CD8_Naive.\\n\") # ,{os.path.join(ldscores_path, ldscores_prefix)}\n", + " f.write(\"CD4_Naive\\tcts_ldscores_CD4_Naive.\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need baseline LD scores. Here we compute them from our donor data (in a real analysis, you would use pre-computed baseline LD scores):" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.06s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.1 --l2 --out BaselineLD.1 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.1 --l2 --out BaselineLD.1 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.1 \\\n", + "--bfile BaselineLD.1 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:09:35 2025\n", + "Read list of 796 SNPs from BaselineLD.1.bim\n", + "Read list of 981 individuals from BaselineLD.1.fam\n", + "Reading genotypes from BaselineLD.1.bed\n", + "After filtering, 796 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 796 SNPs to BaselineLD.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1798 1.1546\n", + "std 0.1526 0.2973\n", + "min 0.0092 0.8847\n", + "25% 0.0432 1.0024\n", + "50% 0.1300 1.0518\n", + "75% 0.3086 1.1259\n", + "max 0.4995 2.8918\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.129\n", + "L2 0.129 1.000\n", + "Analysis finished at Thu Nov 6 21:09:35 2025\n", + "Total time elapsed: 0.31s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.1.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.1.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.1.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:08<00:00, 8.55s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.2 --l2 --out BaselineLD.2 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.2 --l2 --out BaselineLD.2 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.2 \\\n", + "--bfile BaselineLD.2 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:10:01 2025\n", + "Read list of 864 SNPs from BaselineLD.2.bim\n", + "Read list of 981 individuals from BaselineLD.2.fam\n", + "Reading genotypes from BaselineLD.2.bed\n", + "After filtering, 864 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 864 SNPs to BaselineLD.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1623 1.1604\n", + "std 0.1408 0.3164\n", + "min 0.0087 0.8871\n", + "25% 0.0381 1.0030\n", + "50% 0.1131 1.0526\n", + "75% 0.2638 1.1510\n", + "max 0.5000 2.9248\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1575\n", + "L2 0.1575 1.0000\n", + "Analysis finished at Thu Nov 6 21:10:02 2025\n", + "Total time elapsed: 0.34s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.2.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.2.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.2.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:15<00:00, 15.01s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.3 --l2 --out BaselineLD.3 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.3 --l2 --out BaselineLD.3 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.3 \\\n", + "--bfile BaselineLD.3 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:10:33 2025\n", + "Read list of 737 SNPs from BaselineLD.3.bim\n", + "Read list of 981 individuals from BaselineLD.3.fam\n", + "Reading genotypes from BaselineLD.3.bed\n", + "After filtering, 737 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 737 SNPs to BaselineLD.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1702 1.1388\n", + "std 0.1499 0.2501\n", + "min 0.0082 0.8972\n", + "25% 0.0392 1.0100\n", + "50% 0.1142 1.0548\n", + "75% 0.2920 1.1376\n", + "max 0.4995 2.6729\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.2076\n", + "L2 0.2076 1.0000\n", + "Analysis finished at Thu Nov 6 21:10:33 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.3.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.3.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.3.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.59s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.4 --l2 --out BaselineLD.4 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.4 --l2 --out BaselineLD.4 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.4 \\\n", + "--bfile BaselineLD.4 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:11:02 2025\n", + "Read list of 754 SNPs from BaselineLD.4.bim\n", + "Read list of 981 individuals from BaselineLD.4.fam\n", + "Reading genotypes from BaselineLD.4.bed\n", + "After filtering, 754 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 754 SNPs to BaselineLD.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1720 1.1587\n", + "std 0.1460 0.3023\n", + "min 0.0087 0.8834\n", + "25% 0.0394 1.0077\n", + "50% 0.1295 1.0548\n", + "75% 0.2783 1.1547\n", + "max 0.5000 3.5811\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0516\n", + "L2 0.0516 1.0000\n", + "Analysis finished at Thu Nov 6 21:11:02 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.4.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.4.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.4.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.21s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.5 --l2 --out BaselineLD.5 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.5 --l2 --out BaselineLD.5 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.5 \\\n", + "--bfile BaselineLD.5 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:11:21 2025\n", + "Read list of 671 SNPs from BaselineLD.5.bim\n", + "Read list of 981 individuals from BaselineLD.5.fam\n", + "Reading genotypes from BaselineLD.5.bed\n", + "After filtering, 671 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 671 SNPs to BaselineLD.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1760 1.1557\n", + "std 0.1514 0.3052\n", + "min 0.0087 0.8851\n", + "25% 0.0395 1.0035\n", + "50% 0.1239 1.0486\n", + "75% 0.3017 1.1545\n", + "max 0.4980 3.3168\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.268\n", + "L2 0.268 1.000\n", + "Analysis finished at Thu Nov 6 21:11:22 2025\n", + "Total time elapsed: 0.27s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.5.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.5.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.5.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.67s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.6 --l2 --out BaselineLD.6 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.6 --l2 --out BaselineLD.6 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.6 \\\n", + "--bfile BaselineLD.6 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:11:48 2025\n", + "Read list of 701 SNPs from BaselineLD.6.bim\n", + "Read list of 981 individuals from BaselineLD.6.fam\n", + "Reading genotypes from BaselineLD.6.bed\n", + "After filtering, 701 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 701 SNPs to BaselineLD.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1743 1.2584\n", + "std 0.1473 0.4762\n", + "min 0.0087 0.9079\n", + "25% 0.0418 1.0161\n", + "50% 0.1310 1.0778\n", + "75% 0.2880 1.2383\n", + "max 0.4995 4.5224\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1685\n", + "L2 0.1685 1.0000\n", + "Analysis finished at Thu Nov 6 21:11:48 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.6.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.6.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.6.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.33s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.7 --l2 --out BaselineLD.7 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.7 --l2 --out BaselineLD.7 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.7 \\\n", + "--bfile BaselineLD.7 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:12:14 2025\n", + "Read list of 611 SNPs from BaselineLD.7.bim\n", + "Read list of 981 individuals from BaselineLD.7.fam\n", + "Reading genotypes from BaselineLD.7.bed\n", + "After filtering, 611 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 611 SNPs to BaselineLD.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1808 1.1485\n", + "std 0.1567 0.3127\n", + "min 0.0092 0.9191\n", + "25% 0.0367 1.0038\n", + "50% 0.1295 1.0474\n", + "75% 0.3084 1.1255\n", + "max 0.5000 3.0536\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1759\n", + "L2 0.1759 1.0000\n", + "Analysis finished at Thu Nov 6 21:12:14 2025\n", + "Total time elapsed: 0.27s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.7.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.7.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.7.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.25s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.8 --l2 --out BaselineLD.8 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.8 --l2 --out BaselineLD.8 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.8 \\\n", + "--bfile BaselineLD.8 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:12:35 2025\n", + "Read list of 562 SNPs from BaselineLD.8.bim\n", + "Read list of 981 individuals from BaselineLD.8.fam\n", + "Reading genotypes from BaselineLD.8.bed\n", + "After filtering, 562 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 562 SNPs to BaselineLD.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1774 1.1633\n", + "std 0.1475 0.3349\n", + "min 0.0087 0.9183\n", + "25% 0.0449 1.0160\n", + "50% 0.1376 1.0559\n", + "75% 0.3012 1.1600\n", + "max 0.4995 3.2220\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.177\n", + "L2 0.177 1.000\n", + "Analysis finished at Thu Nov 6 21:12:35 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.8.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.8.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.8.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.52s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.9 --l2 --out BaselineLD.9 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.9 --l2 --out BaselineLD.9 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.9 \\\n", + "--bfile BaselineLD.9 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:13:01 2025\n", + "Read list of 440 SNPs from BaselineLD.9.bim\n", + "Read list of 981 individuals from BaselineLD.9.fam\n", + "Reading genotypes from BaselineLD.9.bed\n", + "After filtering, 440 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 440 SNPs to BaselineLD.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1794 1.1344\n", + "std 0.1510 0.2982\n", + "min 0.0092 0.9294\n", + "25% 0.0401 1.0108\n", + "50% 0.1412 1.0428\n", + "75% 0.2987 1.1075\n", + "max 0.4985 3.0546\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.2587\n", + "L2 0.2587 1.0000\n", + "Analysis finished at Thu Nov 6 21:13:01 2025\n", + "Total time elapsed: 0.23s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.9.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.9.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.9.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.87s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.10 --l2 --out BaselineLD.10 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.10 --l2 --out BaselineLD.10 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.10 \\\n", + "--bfile BaselineLD.10 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:13:21 2025\n", + "Read list of 523 SNPs from BaselineLD.10.bim\n", + "Read list of 981 individuals from BaselineLD.10.fam\n", + "Reading genotypes from BaselineLD.10.bed\n", + "After filtering, 523 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 523 SNPs to BaselineLD.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1713 1.1418\n", + "std 0.1466 0.2758\n", + "min 0.0082 0.9121\n", + "25% 0.0403 1.0123\n", + "50% 0.1300 1.0485\n", + "75% 0.2752 1.1195\n", + "max 0.4959 2.8504\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1955\n", + "L2 0.1955 1.0000\n", + "Analysis finished at Thu Nov 6 21:13:21 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.10.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.10.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.10.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.12s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.11 --l2 --out BaselineLD.11 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.11 --l2 --out BaselineLD.11 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.11 \\\n", + "--bfile BaselineLD.11 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:13:46 2025\n", + "Read list of 506 SNPs from BaselineLD.11.bim\n", + "Read list of 981 individuals from BaselineLD.11.fam\n", + "Reading genotypes from BaselineLD.11.bed\n", + "After filtering, 506 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 506 SNPs to BaselineLD.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1762 1.2376\n", + "std 0.1520 0.5903\n", + "min 0.0102 0.9036\n", + "25% 0.0413 1.0065\n", + "50% 0.1208 1.0547\n", + "75% 0.2985 1.1684\n", + "max 0.4995 5.9738\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0226\n", + "L2 0.0226 1.0000\n", + "Analysis finished at Thu Nov 6 21:13:46 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.11.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.11.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.11.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.03s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.12 --l2 --out BaselineLD.12 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.12 --l2 --out BaselineLD.12 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.12 \\\n", + "--bfile BaselineLD.12 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:14:05 2025\n", + "Read list of 507 SNPs from BaselineLD.12.bim\n", + "Read list of 981 individuals from BaselineLD.12.fam\n", + "Reading genotypes from BaselineLD.12.bed\n", + "After filtering, 507 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 507 SNPs to BaselineLD.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1721 1.1261\n", + "std 0.1498 0.2455\n", + "min 0.0092 0.9320\n", + "25% 0.0372 1.0033\n", + "50% 0.1254 1.0379\n", + "75% 0.2918 1.1104\n", + "max 0.5000 2.4573\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1584\n", + "L2 0.1584 1.0000\n", + "Analysis finished at Thu Nov 6 21:14:05 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.12.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.12.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.12.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.40s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.13 --l2 --out BaselineLD.13 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.13 --l2 --out BaselineLD.13 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.13 \\\n", + "--bfile BaselineLD.13 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:14:29 2025\n", + "Read list of 383 SNPs from BaselineLD.13.bim\n", + "Read list of 981 individuals from BaselineLD.13.fam\n", + "Reading genotypes from BaselineLD.13.bed\n", + "After filtering, 383 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 383 SNPs to BaselineLD.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1783 1.1527\n", + "std 0.1509 0.2924\n", + "min 0.0097 0.9433\n", + "25% 0.0413 1.0108\n", + "50% 0.1356 1.0419\n", + "75% 0.2854 1.1182\n", + "max 0.4959 2.4980\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1471\n", + "L2 0.1471 1.0000\n", + "Analysis finished at Thu Nov 6 21:14:29 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.13.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.13.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.13.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.37s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.14 --l2 --out BaselineLD.14 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.14 --l2 --out BaselineLD.14 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.14 \\\n", + "--bfile BaselineLD.14 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:14:48 2025\n", + "Read list of 339 SNPs from BaselineLD.14.bim\n", + "Read list of 981 individuals from BaselineLD.14.fam\n", + "Reading genotypes from BaselineLD.14.bed\n", + "After filtering, 339 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 339 SNPs to BaselineLD.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1641 1.1955\n", + "std 0.1444 0.3391\n", + "min 0.0087 0.9398\n", + "25% 0.0370 1.0106\n", + "50% 0.1096 1.0488\n", + "75% 0.2808 1.1684\n", + "max 0.4929 2.3924\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1236\n", + "L2 0.1236 1.0000\n", + "Analysis finished at Thu Nov 6 21:14:48 2025\n", + "Total time elapsed: 0.17s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.14.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.14.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.14.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.09s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.15 --l2 --out BaselineLD.15 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.15 --l2 --out BaselineLD.15 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.15 \\\n", + "--bfile BaselineLD.15 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:15:13 2025\n", + "Read list of 290 SNPs from BaselineLD.15.bim\n", + "Read list of 981 individuals from BaselineLD.15.fam\n", + "Reading genotypes from BaselineLD.15.bed\n", + "After filtering, 290 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 290 SNPs to BaselineLD.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1834 1.1574\n", + "std 0.1504 0.3490\n", + "min 0.0092 0.9385\n", + "25% 0.0477 1.0066\n", + "50% 0.1430 1.0358\n", + "75% 0.3072 1.1152\n", + "max 0.4964 2.9996\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1579\n", + "L2 0.1579 1.0000\n", + "Analysis finished at Thu Nov 6 21:15:13 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.15.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.15.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.15.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.77s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.16 --l2 --out BaselineLD.16 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.16 --l2 --out BaselineLD.16 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.16 \\\n", + "--bfile BaselineLD.16 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:15:31 2025\n", + "Read list of 312 SNPs from BaselineLD.16.bim\n", + "Read list of 981 individuals from BaselineLD.16.fam\n", + "Reading genotypes from BaselineLD.16.bed\n", + "After filtering, 312 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 312 SNPs to BaselineLD.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1761 1.1088\n", + "std 0.1502 0.2288\n", + "min 0.0097 0.9313\n", + "25% 0.0391 1.0001\n", + "50% 0.1381 1.0333\n", + "75% 0.2792 1.0959\n", + "max 0.4990 2.3852\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0326\n", + "L2 0.0326 1.0000\n", + "Analysis finished at Thu Nov 6 21:15:31 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.16.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.16.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.16.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.92s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.17 --l2 --out BaselineLD.17 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.17 --l2 --out BaselineLD.17 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.17 \\\n", + "--bfile BaselineLD.17 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:15:50 2025\n", + "Read list of 272 SNPs from BaselineLD.17.bim\n", + "Read list of 981 individuals from BaselineLD.17.fam\n", + "Reading genotypes from BaselineLD.17.bed\n", + "After filtering, 272 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 272 SNPs to BaselineLD.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1944 1.1639\n", + "std 0.1557 0.3838\n", + "min 0.0082 0.9350\n", + "25% 0.0401 1.0009\n", + "50% 0.1656 1.0279\n", + "75% 0.3336 1.0893\n", + "max 0.4954 3.4348\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.074\n", + "L2 0.074 1.000\n", + "Analysis finished at Thu Nov 6 21:15:50 2025\n", + "Total time elapsed: 0.17s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.17.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.17.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.17.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.57s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.18 --l2 --out BaselineLD.18 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.18 --l2 --out BaselineLD.18 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.18 \\\n", + "--bfile BaselineLD.18 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:16:08 2025\n", + "Read list of 294 SNPs from BaselineLD.18.bim\n", + "Read list of 981 individuals from BaselineLD.18.fam\n", + "Reading genotypes from BaselineLD.18.bed\n", + "After filtering, 294 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 294 SNPs to BaselineLD.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1912 1.1399\n", + "std 0.1518 0.2752\n", + "min 0.0097 0.9468\n", + "25% 0.0515 1.0082\n", + "50% 0.1590 1.0393\n", + "75% 0.3068 1.1086\n", + "max 0.4939 2.6077\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1146\n", + "L2 0.1146 1.0000\n", + "Analysis finished at Thu Nov 6 21:16:08 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.18.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.18.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.18.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:07<00:00, 7.76s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.19 --l2 --out BaselineLD.19 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.19 --l2 --out BaselineLD.19 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.19 \\\n", + "--bfile BaselineLD.19 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:16:29 2025\n", + "Read list of 239 SNPs from BaselineLD.19.bim\n", + "Read list of 981 individuals from BaselineLD.19.fam\n", + "Reading genotypes from BaselineLD.19.bed\n", + "After filtering, 239 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 239 SNPs to BaselineLD.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1813 1.1145\n", + "std 0.1483 0.2572\n", + "min 0.0102 0.9521\n", + "25% 0.0418 1.0033\n", + "50% 0.1448 1.0256\n", + "75% 0.3007 1.0950\n", + "max 0.4969 2.5550\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1708\n", + "L2 0.1708 1.0000\n", + "Analysis finished at Thu Nov 6 21:16:29 2025\n", + "Total time elapsed: 0.16s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.19.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.19.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.19.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.31s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.20 --l2 --out BaselineLD.20 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.20 --l2 --out BaselineLD.20 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.20 \\\n", + "--bfile BaselineLD.20 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:16:48 2025\n", + "Read list of 227 SNPs from BaselineLD.20.bim\n", + "Read list of 981 individuals from BaselineLD.20.fam\n", + "Reading genotypes from BaselineLD.20.bed\n", + "After filtering, 227 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 227 SNPs to BaselineLD.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1749 1.0926\n", + "std 0.1404 0.2111\n", + "min 0.0102 0.9478\n", + "25% 0.0538 0.9939\n", + "50% 0.1386 1.0248\n", + "75% 0.2796 1.0713\n", + "max 0.4837 2.2165\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.2494\n", + "L2 0.2494 1.0000\n", + "Analysis finished at Thu Nov 6 21:16:48 2025\n", + "Total time elapsed: 0.15s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.20.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.20.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.20.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.27s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.21 --l2 --out BaselineLD.21 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.21 --l2 --out BaselineLD.21 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.21 \\\n", + "--bfile BaselineLD.21 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:17:04 2025\n", + "Read list of 135 SNPs from BaselineLD.21.bim\n", + "Read list of 981 individuals from BaselineLD.21.fam\n", + "Reading genotypes from BaselineLD.21.bed\n", + "After filtering, 135 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 135 SNPs to BaselineLD.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1925 1.1085\n", + "std 0.1475 0.2201\n", + "min 0.0087 0.9721\n", + "25% 0.0581 1.0043\n", + "50% 0.1590 1.0215\n", + "75% 0.3129 1.0810\n", + "max 0.4893 2.1313\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.301\n", + "L2 0.301 1.000\n", + "Analysis finished at Thu Nov 6 21:17:04 2025\n", + "Total time elapsed: 0.16s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.21.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.21.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.21.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.15s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.22 --l2 --out BaselineLD.22 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.22 --l2 --out BaselineLD.22 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.22 \\\n", + "--bfile BaselineLD.22 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:17:19 2025\n", + "Read list of 136 SNPs from BaselineLD.22.bim\n", + "Read list of 981 individuals from BaselineLD.22.fam\n", + "Reading genotypes from BaselineLD.22.bed\n", + "After filtering, 136 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 136 SNPs to BaselineLD.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1617 1.0711\n", + "std 0.1358 0.1791\n", + "min 0.0102 0.9639\n", + "25% 0.0405 1.0011\n", + "50% 0.1241 1.0171\n", + "75% 0.2683 1.0539\n", + "max 0.4944 2.1255\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0206\n", + "L2 -0.0206 1.0000\n", + "Analysis finished at Thu Nov 6 21:17:19 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.22.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.22.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.22.bed\n" + ] + } + ], + "source": [ + "for chrom in range(1, 23):\n", + " dd_chrom = dd.sel(G_var=dd.G.var.chrom == str(chrom), C_var=dd.C.var.chrom == str(chrom)).copy()\n", + " result = estimate_ld_scores_from_donor_data(dd=dd_chrom, out_prefix=f\"BaselineLD.{chrom}\", run=True, runner=runner)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 7: Prepare GWAS Summary Statistics\n", + "For this demonstration, we generate fake GWAS summary statistics. In a real analysis, you would use actual GWAS summary statistics from published studies." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated 9239 SNPs (8239 real, 1000 fake) -> fake_munged.sumstats.gz\n" + ] + }, + { + "data": { + "text/plain": [ + "'fake_munged.sumstats.gz'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def generate_fake_sumstats(dd, output_file=\"fake_munged.sumstats.gz\", subset_frac=0.8, n_extra_snps=1000, seed=42):\n", + " np.random.seed(seed)\n", + "\n", + " n_real = int(len(dd.G.var) * subset_frac)\n", + " real_idx = np.random.choice(len(dd.G.var), n_real, replace=False)\n", + "\n", + " snp_ids = dd.G.var.index[real_idx].tolist()\n", + " allele_0 = dd.G.var[\"a0\"].iloc[real_idx].tolist()\n", + " allele_1 = dd.G.var[\"a1\"].iloc[real_idx].tolist()\n", + "\n", + " for i in range(n_extra_snps):\n", + " chrom = np.random.randint(1, 23)\n", + " pos = np.random.randint(1000000, 50000000)\n", + " ref = np.random.choice([\"A\", \"C\", \"G\", \"T\"])\n", + " alt = np.random.choice([a for a in [\"A\", \"C\", \"G\", \"T\"] if a != ref])\n", + " snp_ids.append(f\"{chrom}_{pos}_{ref}_{alt}\")\n", + " allele_0.append(ref)\n", + " allele_1.append(alt)\n", + "\n", + " z_scores = np.random.randn(len(snp_ids))\n", + " large_effect_idx = np.random.choice(len(snp_ids), int(len(snp_ids) * 0.01), replace=False)\n", + " z_scores[large_effect_idx] = np.random.randn(len(large_effect_idx)) * 3\n", + "\n", + " fake_sumstats = pd.DataFrame({\"SNP\": snp_ids, \"A1\": allele_1, \"A2\": allele_0, \"Z\": z_scores, \"N\": 336924.0})\n", + "\n", + " fake_sumstats.to_csv(output_file, sep=\"\\t\", index=False, compression=\"gzip\", float_format=\"%.3f\")\n", + " print(f\"Generated {len(fake_sumstats)} SNPs ({n_real} real, {n_extra_snps} fake) -> {output_file}\")\n", + " return output_file\n", + "\n", + "\n", + "generate_fake_sumstats(dd, subset_frac=0.8, n_extra_snps=1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 8: Run Cell-Type-Specific Heritability Analysis\n", + "Finally, we run the cell-type-specific heritability analysis, which tests whether each cell type shows enrichment for trait heritability." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Running cell-type-specific heritability analysis: /ldsc/ldsc.py --h2-cts fake_munged.sumstats.gz --ref-ld-chr BaselineLD. --w-ld-chr BaselineLD. --ref-ld-chr-cts celltype_ldscores.txt --out CHD_CD8_Naive_h2\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --h2-cts fake_munged.sumstats.gz --ref-ld-chr BaselineLD. --w-ld-chr BaselineLD. --ref-ld-chr-cts celltype_ldscores.txt --out CHD_CD8_Naive_h2\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2-cts fake_munged.sumstats.gz \\\n", + "--ref-ld-chr BaselineLD. \\\n", + "--out CHD_CD8_Naive_h2 \\\n", + "--ref-ld-chr-cts celltype_ldscores.txt \\\n", + "--w-ld-chr BaselineLD. \n", + "\n", + "Beginning analysis at Thu Nov 6 22:23:46 2025\n", + "Reading summary statistics from fake_munged.sumstats.gz ...\n", + "Read summary statistics for 9239 SNPs.\n", + "Reading reference panel LD Score from BaselineLD.[1-22] ...\n", + "Read reference panel LD Scores for 10299 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from BaselineLD.[1-22] ...\n", + "Read regression weight LD Scores for 10299 SNPs.\n", + "After merging with reference panel LD, 8239 SNPs remain.\n", + "After merging with regression SNP LD, 8239 SNPs remain.\n", + "WARNING: number of SNPs less than 200k; this is almost always bad.\n", + "Removed 0 SNPs with chi^2 > 336.924 (8239 SNPs remain)\n", + "Reading cts reference panel LD Score from cts_ldscores_CD8_Naive.[1-22] ...\n", + "Performing regression.\n", + "Reading cts reference panel LD Score from cts_ldscores_CD4_Naive.[1-22] ...\n", + "Performing regression.\n", + "Results printed to CHD_CD8_Naive_h2.cell_type_results.txt\n", + "Analysis finished at Thu Nov 6 22:23:47 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "result = estimate_celltype_specific_heritability(\n", + " sumstats_file=\"fake_munged.sumstats.gz\",\n", + " ref_ld_chr=\"BaselineLD.\",\n", + " w_ld_chr=\"BaselineLD.\",\n", + " ref_ld_chr_cts=\"celltype_ldscores.txt\",\n", + " out_prefix=\"CHD_CD8_Naive_h2\",\n", + " run=True,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Standard LDSC Analyses\n", + "Beyond cell-type-specific analysis, LDSC can also be used for standard heritability estimation and genetic correlation analysis. Here we demonstrate these capabilities.\n", + "#### SNP Heritability Estimation\n", + "First, we download real GWAS summary statistics from the GWAS Catalog and prepare them for LDSC analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "runner = configure_ldsc_runner(config_path=\"../../src/cellink/tl/external/config/ldsc_docker.yaml\")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Fetching https://www.ebi.ac.uk/gwas/rest/api/v2/studies/GCST004787\n", + "INFO:root:Found harmonised file: 28714975-GCST004787-EFO_0001645.h.tsv.gz\n", + "INFO:root:Downloading http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST004001-GCST005000/GCST004787/harmonised/28714975-GCST004787-EFO_0001645.h.tsv.gz to /Users/larnoldt/cellink_data/GCST004787_summary_stats.tsv.gz\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/Users/larnoldt/cellink_data/GCST004787_summary_stats.tsv.gz')" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gwas_summary_statistic_path_1 = get_gwas_catalog_study_summary_stats(\"GCST004787\", return_path=True)\n", + "gwas_summary_statistic_path_1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "LDSC requires \"munged\" (cleaned and standardized) summary statistics. The `munge_sumstats` function performs quality control, standardizes column names, and prepares the data:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Running munge_sumstats: /ldsc/munge_sumstats.py --sumstats /Users/larnoldt/cellink_data/GCST004787_summary_stats.tsv.gz --out GCST004787_summary_stats_munged --N 336924 --signed-sumstats logor,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/munge_sumstats.py --sumstats /cellink_data/GCST004787_summary_stats.tsv.gz --out GCST004787_summary_stats_munged --N 336924 --signed-sumstats logor,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./munge_sumstats.py \\\n", + "--signed-sumstats logor,0 \\\n", + "--out GCST004787_summary_stats_munged \\\n", + "--N 336924.0 \\\n", + "--a1 effect_allele \\\n", + "--a2 other_allele \\\n", + "--snp variant_id \\\n", + "--sumstats /cellink_data/GCST004787_summary_stats.tsv.gz \\\n", + "--p p_value \n", + "\n", + "Interpreting column names as follows:\n", + "effect_allele:\tAllele 1, interpreted as ref allele for signed sumstat.\n", + "logor:\tDirectional summary statistic as specified by --signed-sumstats.\n", + "other_allele:\tAllele 2, interpreted as non-ref allele for signed sumstat.\n", + "variant_id:\tVariant ID (e.g., rs number)\n", + "p_value:\tp-Value\n", + "\n", + "Reading sumstats from /cellink_data/GCST004787_summary_stats.tsv.gz into memory 5000000 SNPs at a time.\n", + ".. done\n", + "Read 9020474 SNPs from --sumstats file.\n", + "Removed 0 SNPs with missing values.\n", + "Removed 0 SNPs with INFO <= 0.9.\n", + "Removed 0 SNPs with MAF <= 0.01.\n", + "Removed 0 SNPs with out-of-bounds p-values.\n", + "Removed 1855539 variants that were not SNPs or were strand-ambiguous.\n", + "7164935 SNPs remain.\n", + "Removed 9 SNPs with duplicated rs numbers (7164926 SNPs remain).\n", + "Using N = 336924.0\n", + "Median value of SIGNED_SUMSTATS was 0.00966, which seems sensible.\n", + "Writing summary statistics for 7164926 SNPs (7164926 with nonmissing beta) to GCST004787_summary_stats_munged.sumstats.gz.\n", + "\n", + "Metadata:\n", + "Mean chi^2 = 1.078\n", + "Lambda GC = 0.999\n", + "Max chi^2 = 458.046\n", + "2243 Genome-wide significant SNPs (some may have been removed by filtering).\n", + "\n", + "Conversion finished at Thu Nov 6 10:22:30 2025\n", + "Total time elapsed: 1.0m:24.42s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "munged_file_1 = munge_sumstats(\n", + " sumstats_file=gwas_summary_statistic_path_1,\n", + " out_prefix=str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_munged\"),\n", + " info_min=0.9,\n", + " maf_min=0.01,\n", + " signed_sumstats=(\"logor\", 0),\n", + " run=True,\n", + " p_col=\"p_value\",\n", + " snp_col=\"variant_id\",\n", + " a1_col=\"effect_allele\",\n", + " a2_col=\"other_allele\",\n", + " n_samples=336924,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download reference LD scores and weights for heritability estimation:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:/Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/1000G_Phase3_baselineLD_v2.2_ldscores.tgz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Downloading https://zenodo.org/records/10515792/files/1000G_Phase3_weights_hm3_no_MHC.tgz?download=1 to /Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/1000G_Phase3_weights_hm3_no_MHC.tgz\n" + ] + } + ], + "source": [ + "ldscores_path, ldscores_prefix = get_1000genomes_ld_scores(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")\n", + "ldweights_path, ldweights_prefix = get_1000genomes_ld_weights(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now estimate SNP heritability:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Estimating heritability: /ldsc/ldsc.py --h2 GCST004787_summary_stats_munged.sumstats.gz --ref-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out GCST004787_summary_stats_h2\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --h2 GCST004787_summary_stats_munged.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out GCST004787_summary_stats_h2\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 GCST004787_summary_stats_munged.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. \\\n", + "--out GCST004787_summary_stats_h2 \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \n", + "\n", + "Beginning analysis at Thu Nov 6 10:37:03 2025\n", + "Reading summary statistics from GCST004787_summary_stats_munged.sumstats.gz ...\n", + "Read summary statistics for 7164926 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177210 SNPs remain.\n", + "After merging with regression SNP LD, 1174301 SNPs remain.\n", + "Removed 3 SNPs with chi^2 > 336.924 (1174298 SNPs remain)\n", + "Total Observed scale h2: 0.0482 (0.0031)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale h2: -1.2111e-02 9.6569e-04 -1.5190e-03 -1.4271e-04 -1.4653e-04 6.8730e-05 1.8687e-03 -9.9421e-03 1.8025e-02 1.1469e-02 -1.0165e-03 -9.5619e-03 1.6673e-04 1.0070e-03 2.8008e-03 2.5649e-04 3.5439e-04 -1.1089e-03 4.3438e-03 4.2590e-03 -3.0551e-03 -5.3830e-04 -7.5871e-04 -1.1994e-03 -7.7712e-04 -2.9209e-03 -1.9523e-03 1.0687e-02 2.2717e-03 1.2578e-02 5.5138e-03 -2.1547e-03 -1.8587e-03 -2.6358e-03 -8.1469e-04 -1.7456e-03 -2.3468e-03 -1.4169e-03 -1.2684e-03 2.7806e-03 -3.6505e-04 -2.0104e-03 -4.8563e-03 1.8942e-04 2.2270e-03 -3.0406e-03 -1.3599e-03 8.8194e-04 3.1442e-03 -1.1567e-04 -4.2058e-04 4.6045e-03 -7.1973e-03 -1.5086e-02 1.2136e-03 8.1949e-04 3.0214e-03 4.8475e-03 3.3215e-03 2.1972e-03 5.2089e-03 3.9394e-03 4.3883e-03 6.0610e-03 4.0806e-03 -8.3636e-09 -1.8387e-05 -5.2094e-03 -6.6823e-03 -3.7387e-04 -6.2807e-03 1.1181e-16 5.0034e-03 3.4716e-03 -7.7899e-04 -5.3968e-04\n", + " 8.2235e-04 3.9258e-05 -2.0397e-03 -7.0521e-03 5.7726e-03 1.7156e-02 2.6581e-03 -8.3309e-04 4.1682e-03 -3.2282e-03 1.1065e-03 -1.0149e-03 -3.9125e-03 -5.0393e-05 1.9047e-03 2.8446e-03 4.1525e-03 -2.0550e-04 2.5138e-03 2.6276e-04 4.5676e-04\n", + "Observed scale h2 SE: 3.6876e-02 2.8241e-03 3.4639e-03 4.1116e-03 8.9579e-03 4.0255e-03 4.9871e-03 7.6983e-03 1.0069e-02 1.4079e-02 2.4109e-02 1.2775e-02 1.1922e-03 1.9142e-03 3.3326e-03 3.4785e-03 8.1126e-03 7.6878e-03 5.5114e-03 2.1808e-03 6.6970e-03 3.7155e-03 9.1362e-03 1.0647e-02 5.1892e-03 5.8420e-03 7.1083e-03 4.7557e-03 5.6549e-03 5.1946e-03 4.8613e-03 2.5779e-03 1.9454e-03 1.4419e-03 2.2687e-03 3.2906e-03 1.8913e-03 2.0055e-02 6.1729e-03 2.3233e-03 1.0197e-03 6.0144e-03 8.4002e-03 1.1188e-02 1.0522e-02 2.0156e-03 2.3500e-03 1.8394e-03 3.3389e-03 7.7766e-04 1.4525e-03 2.5864e-03 4.0733e-03 1.1147e-02 1.8261e-03 9.4725e-04 1.2719e-03 1.2614e-03 1.0969e-03 8.3697e-04 1.0659e-03 1.2968e-03 1.0163e-03 1.5718e-03 2.0401e-03 1.8306e-08 9.6177e-06 2.8681e-03 7.6236e-03 3.0607e-03 8.4407e-03 1.0669e-16 1.8470e-03 2.3519e-03 2.1682e-03 3.0374e-03\n", + " 1.3747e-03 8.5399e-04 3.5229e-03 1.8007e-02 5.5563e-03 1.4238e-02 3.2248e-03 5.6425e-03 2.2698e-03 2.4325e-03 1.9759e-03 9.2022e-04 2.3846e-03 1.4766e-03 1.5829e-03 2.2442e-03 1.6180e-03 1.7903e-03 1.3109e-03 6.2655e-04 4.3958e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of h2g: -2.5111e-01 2.0022e-02 -3.1493e-02 -2.9588e-03 -3.0380e-03 1.4250e-03 3.8744e-02 -2.0613e-01 3.7372e-01 2.3779e-01 -2.1075e-02 -1.9825e-01 3.4570e-03 2.0879e-02 5.8070e-02 5.3178e-03 7.3477e-03 -2.2992e-02 9.0061e-02 8.8303e-02 -6.3341e-02 -1.1161e-02 -1.5730e-02 -2.4867e-02 -1.6112e-02 -6.0560e-02 -4.0478e-02 2.2157e-01 4.7100e-02 2.6079e-01 1.1432e-01 -4.4673e-02 -3.8536e-02 -5.4650e-02 -1.6891e-02 -3.6192e-02 -4.8657e-02 -2.9378e-02 -2.6298e-02 5.7650e-02 -7.5687e-03 -4.1682e-02 -1.0069e-01 3.9273e-03 4.6174e-02 -6.3042e-02 -2.8196e-02 1.8285e-02 6.5190e-02 -2.3983e-03 -8.7199e-03 9.5466e-02 -1.4922e-01 -3.1278e-01 2.5162e-02 1.6991e-02 6.2644e-02 1.0050e-01 6.8866e-02 4.5555e-02 1.0800e-01 8.1676e-02 9.0984e-02 1.2566e-01 8.4605e-02 -1.7341e-07 -3.8123e-04 -1.0801e-01 -1.3855e-01 -7.7515e-03 -1.3022e-01 2.3181e-15 1.0374e-01 7.1978e-02 -1.6151e-02 -1.1189e-02\n", + " 1.7050e-02 8.1395e-04 -4.2290e-02 -1.4621e-01 1.1968e-01 3.5570e-01 5.5111e-02 -1.7273e-02 8.6421e-02 -6.6931e-02 2.2941e-02 -2.1042e-02 -8.1119e-02 -1.0448e-03 3.9491e-02 5.8977e-02 8.6096e-02 -4.2608e-03 5.2118e-02 5.4478e-03 9.4701e-03\n", + "Enrichment: -4.5303e+00 2.5332e+01 -1.1510e+01 -2.1637e+00 -1.7939e-01 1.0795e+00 1.4878e+01 -2.7473e+01 1.6736e+01 3.9015e+01 -2.2940e+00 -1.0836e+01 1.4458e+01 2.5609e+01 2.4990e+01 2.0037e+00 1.5841e+00 -2.0790e+00 4.1764e+00 5.0711e+01 -4.2557e+00 -3.0223e+00 -1.6806e+00 -1.0595e+00 -1.5950e+00 -2.6411e+01 -5.4955e+00 3.2725e+01 2.2252e+01 3.7549e+01 1.9742e+01 -2.0802e+00 -7.4435e+01 -1.1912e+02 -1.2245e+01 -1.4092e+01 -8.2893e+01 -1.1522e+00 -1.8359e+00 6.2203e+00 -4.3029e+01 -5.7417e+00 -8.6471e+00 2.0526e-01 1.9991e+00 -6.3872e+01 -3.0771e+01 2.9539e+01 7.6995e+01 -7.9242e+00 -7.3557e+00 8.2385e+01 -3.9697e+01 -3.2332e+00 5.5701e+01 2.9947e+00 1.1318e+01 1.8204e+01 1.2338e+01 8.3594e+00 1.9554e+01 1.4775e+01 1.6377e+01 2.2425e+01 1.5502e+01 -9.2218e-01 -2.4595e+00 -1.2552e+00 -5.4235e-01 -7.8690e-01 -2.3372e+02 -1.7808e+00 1.8099e+02 7.8485e+01 -2.1790e+01 -6.3619e+00\n", + " 9.8590e+01 5.4079e+00 -2.5919e+01 -6.9871e+00 1.0071e+02 2.0194e+01 5.1599e+01 -1.9905e+00 1.1507e+02 -6.8492e+01 2.7199e+01 -1.0578e+02 -4.4045e+01 -1.9239e+00 1.7083e+02 1.9999e+02 3.0230e+02 -8.5157e+00 1.4186e+01 3.9367e+01 2.9607e+02\n", + "Coefficients: -2.0317e-09 1.1361e-08 -5.1620e-09 -9.7038e-10 -8.0450e-11 4.8414e-10 6.6726e-09 -1.2321e-08 7.5057e-09 1.7497e-08 -1.0288e-09 -4.8598e-09 6.4839e-09 1.1485e-08 1.1207e-08 8.9859e-10 7.1042e-10 -9.3238e-10 1.8730e-09 2.2742e-08 -1.9086e-09 -1.3554e-09 -7.5369e-10 -4.7514e-10 -7.1532e-10 -1.1845e-08 -2.4646e-09 1.4676e-08 9.9796e-09 1.6840e-08 8.8538e-09 -9.3290e-10 -3.3382e-08 -5.3422e-08 -5.4914e-09 -6.3198e-09 -3.7175e-08 -5.1671e-10 -8.2333e-10 2.7896e-09 -1.9297e-08 -2.5750e-09 -3.8780e-09 9.2052e-11 8.9653e-10 -2.8645e-08 -1.3800e-08 1.3247e-08 3.4530e-08 -3.5538e-09 -3.2988e-09 3.6947e-08 -1.7803e-08 -1.4500e-09 2.4980e-08 1.3431e-09 5.0759e-09 8.1639e-09 5.5335e-09 3.7490e-09 8.7694e-09 6.6261e-09 7.3447e-09 1.0057e-08 6.9523e-09 -4.1357e-10 -1.1030e-09 -5.6292e-10 -2.4323e-10 -3.5290e-10 -1.0482e-07 -7.9863e-10 8.1170e-08 3.5198e-08 -9.7721e-09 -2.8531e-09\n", + " 4.4215e-08 2.4253e-09 -1.1624e-08 -3.1335e-09 4.5168e-08 9.0565e-09 2.3141e-08 -8.9270e-10 5.1606e-08 -3.0717e-08 1.2198e-08 -4.7438e-08 -1.9753e-08 -8.6281e-10 7.6613e-08 8.9689e-08 1.3558e-07 -3.8191e-09 6.3620e-09 1.7655e-08 1.3278e-07\n", + "Coefficient SE: 6.1861e-09 3.3224e-08 1.1772e-08 2.7958e-08 4.9183e-09 2.8356e-08 1.7807e-08 9.5404e-09 4.1928e-09 2.1478e-08 2.4402e-08 6.4928e-09 4.6362e-08 2.1831e-08 1.3335e-08 1.2187e-08 1.6263e-08 6.4638e-09 2.3765e-09 1.1645e-08 4.1838e-09 9.3553e-09 9.0757e-09 4.2179e-09 4.7765e-09 2.3690e-08 8.9733e-09 6.5309e-09 2.4842e-08 6.9545e-09 7.8060e-09 1.1161e-09 3.4939e-08 2.9223e-08 1.5292e-08 1.1913e-08 2.9960e-08 7.3135e-09 4.0069e-09 2.3308e-09 5.3903e-08 7.7035e-09 6.7080e-09 5.4371e-09 4.2360e-09 1.8988e-08 2.3846e-08 2.7630e-08 3.6668e-08 2.3892e-08 1.1393e-08 2.0754e-08 1.0075e-08 1.0714e-09 3.7586e-08 1.5524e-09 2.1367e-09 2.1244e-09 1.8274e-09 1.4281e-09 1.7946e-09 2.1813e-09 1.7010e-09 2.6081e-09 3.4758e-09 9.0521e-10 5.7694e-10 3.0992e-10 2.7749e-10 2.8891e-09 1.4086e-07 7.6207e-10 2.9964e-08 2.3846e-08 2.7199e-08 1.6058e-08\n", + " 7.3915e-08 5.2758e-08 2.0077e-08 8.0013e-09 4.3475e-08 7.5160e-09 2.8074e-08 6.0463e-09 2.8102e-08 2.3146e-08 2.1782e-08 4.3013e-08 1.2039e-08 2.5282e-08 6.3669e-08 7.0758e-08 5.2827e-08 3.3272e-08 3.3177e-09 4.2098e-08 1.2779e-07\n", + "Lambda GC: 1.0466\n", + "Mean Chi^2: 1.139\n", + "Intercept: 0.8491 (0.0094)\n", + "Ratio < 0 (usually indicates GC correction).\n", + "Analysis finished at Thu Nov 6 10:37:31 2025\n", + "Total time elapsed: 27.21s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "h2_results = estimate_heritability(\n", + " sumstats_file=str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_munged.sumstats.gz\"),\n", + " ref_ld_chr=os.path.join(ldscores_path, ldscores_prefix),\n", + " w_ld_chr=os.path.join(ldweights_path, ldweights_prefix),\n", + " out_prefix=str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_h2\"),\n", + " run=True,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Genetic Correlation Analysis\n", + "Genetic correlation analysis quantifies the shared genetic basis between two traits. We download a second GWAS for coronary artery disease and compute the genetic correlation." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Fetching https://www.ebi.ac.uk/gwas/rest/api/v2/studies/GCST90043957\n", + "INFO:root:Found harmonised file: 34737426-GCST90043957-MONDO_0021661.h.tsv.gz\n", + "INFO:root:Downloading http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST90043001-GCST90044000/GCST90043957/harmonised/34737426-GCST90043957-MONDO_0021661.h.tsv.gz to /Users/larnoldt/cellink_data/GCST90043957_summary_stats.tsv.gz\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/Users/larnoldt/cellink_data/GCST90043957_summary_stats.tsv.gz')" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gwas_summary_statistic_path_2 = get_gwas_catalog_study_summary_stats(\"GCST90043957\", return_path=True)\n", + "gwas_summary_statistic_path_2" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Running munge_sumstats: /ldsc/munge_sumstats.py --sumstats /Users/larnoldt/cellink_data/GCST90043957_summary_stats.tsv.gz --out GCST90043957_summary_stats_munged --N 456348 --signed-sumstats beta,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/munge_sumstats.py --sumstats /cellink_data/GCST90043957_summary_stats.tsv.gz --out GCST90043957_summary_stats_munged --N 456348 --signed-sumstats beta,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./munge_sumstats.py \\\n", + "--signed-sumstats beta,0 \\\n", + "--out GCST90043957_summary_stats_munged \\\n", + "--N 456348.0 \\\n", + "--a1 effect_allele \\\n", + "--a2 other_allele \\\n", + "--snp variant_id \\\n", + "--sumstats /cellink_data/GCST90043957_summary_stats.tsv.gz \\\n", + "--p p_value \n", + "\n", + "Interpreting column names as follows:\n", + "p_value:\tp-Value\n", + "other_allele:\tAllele 2, interpreted as non-ref allele for signed sumstat.\n", + "n:\tSample size\n", + "beta:\tDirectional summary statistic as specified by --signed-sumstats.\n", + "variant_id:\tVariant ID (e.g., rs number)\n", + "effect_allele:\tAllele 1, interpreted as ref allele for signed sumstat.\n", + "\n", + "Reading sumstats from /cellink_data/GCST90043957_summary_stats.tsv.gz into memory 5000000 SNPs at a time.\n", + "... done\n", + "Read 11831294 SNPs from --sumstats file.\n", + "Removed 0 SNPs with missing values.\n", + "Removed 0 SNPs with INFO <= 0.9.\n", + "Removed 0 SNPs with MAF <= 0.01.\n", + "Removed 0 SNPs with out-of-bounds p-values.\n", + "Removed 1811396 variants that were not SNPs or were strand-ambiguous.\n", + "10019898 SNPs remain.\n", + "Removed 0 SNPs with duplicated rs numbers (10019898 SNPs remain).\n", + "Removed 0 SNPs with N < 304180.0 (10019898 SNPs remain).\n", + "Median value of SIGNED_SUMSTATS was -0.0001342485, which seems sensible.\n", + "Writing summary statistics for 10019898 SNPs (10019898 with nonmissing beta) to GCST90043957_summary_stats_munged.sumstats.gz.\n", + "\n", + "Metadata:\n", + "Mean chi^2 = 1.156\n", + "Lambda GC = 1.112\n", + "Max chi^2 = 378.956\n", + "2388 Genome-wide significant SNPs (some may have been removed by filtering).\n", + "\n", + "Conversion finished at Thu Nov 6 22:30:15 2025\n", + "Total time elapsed: 1.0m:54.97s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "munged_file_2 = munge_sumstats(\n", + " sumstats_file=gwas_summary_statistic_path_2,\n", + " out_prefix=str(Path(Path(gwas_summary_statistic_path_2).stem).stem + \"_munged\"),\n", + " info_min=0.9,\n", + " maf_min=0.01,\n", + " signed_sumstats=(\"beta\", 0),\n", + " run=True,\n", + " p_col=\"p_value\",\n", + " snp_col=\"variant_id\",\n", + " a1_col=\"effect_allele\",\n", + " a2_col=\"other_allele\",\n", + " n_samples=456348,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, estimate the genetic correlation between the two traits:" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Estimating genetic correlation: /ldsc/ldsc.py --rg GCST004787_summary_stats_munged.sumstats.gz,GCST90043957_summary_stats_munged.sumstats.gz --ref-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out CHD_rg\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --rg GCST004787_summary_stats_munged.sumstats.gz,GCST90043957_summary_stats_munged.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out CHD_rg\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. \\\n", + "--out CHD_rg \\\n", + "--rg GCST004787_summary_stats_munged.sumstats.gz,GCST90043957_summary_stats_munged.sumstats.gz \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \n", + "\n", + "Beginning analysis at Thu Nov 6 22:30:48 2025\n", + "Reading summary statistics from GCST004787_summary_stats_munged.sumstats.gz ...\n", + "Read summary statistics for 7164926 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177210 SNPs remain.\n", + "After merging with regression SNP LD, 1174301 SNPs remain.\n", + "Computing rg for phenotype 2/2\n", + "Reading summary statistics from GCST90043957_summary_stats_munged.sumstats.gz ...\n", + "Read summary statistics for 10019898 SNPs.\n", + "After merging with summary statistics, 1157270 SNPs remain.\n", + "1157270 SNPs with valid alleles.\n", + "\n", + "Heritability of phenotype 1\n", + "---------------------------\n", + "Total Observed scale h2: 0.0491 (0.0038)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale h2: -1.0015e-02 1.0538e-03 -2.4187e-03 -8.5473e-04 -2.3173e-03 -7.0296e-04 3.3579e-03 -1.5240e-02 1.6266e-02 1.1006e-02 2.9414e-03 -1.3212e-02 -9.9847e-05 1.4003e-03 3.1795e-03 -2.5359e-04 4.9086e-04 1.4679e-03 5.3681e-03 3.7131e-03 -4.7677e-03 7.1913e-04 2.2203e-03 -3.9735e-03 -4.0377e-04 -3.1958e-03 4.9703e-04 9.7101e-03 1.9432e-03 1.2466e-02 6.1202e-03 -2.6276e-03 -1.9326e-03 -3.1809e-03 -9.6151e-04 -6.3418e-04 -2.9363e-03 -4.9169e-03 -1.3964e-03 2.6407e-03 -4.3309e-04 -3.2521e-04 -8.7740e-03 -5.4192e-05 5.4624e-03 -3.0598e-03 -1.4553e-03 9.4411e-04 3.3972e-03 -1.8424e-04 -5.0624e-04 4.0449e-03 -5.9383e-03 -1.3807e-02 1.6324e-03 2.2984e-03 4.9572e-03 5.2428e-03 3.9244e-03 3.0600e-03 3.8396e-03 4.6460e-03 5.1612e-03 5.8238e-03 4.4137e-03 -1.7508e-08 -3.0546e-05 -6.6402e-03 -5.6393e-03 3.4633e-03 -7.1287e-03 1.6247e-17 4.7571e-03 3.7948e-03 -1.2112e-03 -6.3892e-05\n", + " 6.4765e-04 -2.5847e-05 -1.9629e-03 -1.8892e-03 4.7842e-03 1.2490e-02 2.5051e-03 -1.0641e-03 3.6368e-03 -3.1866e-03 1.7460e-03 -1.0900e-03 -4.7720e-03 -3.4389e-04 1.1120e-03 2.4022e-03 4.8712e-03 -2.9330e-04 2.5735e-03 5.4408e-04 3.0727e-04\n", + "Observed scale h2 SE: 3.3113e-02 3.0412e-03 3.7903e-03 4.0891e-03 9.6479e-03 4.4886e-03 6.0158e-03 8.4307e-03 1.0836e-02 1.5152e-02 2.6129e-02 1.4501e-02 1.3498e-03 2.1675e-03 3.4629e-03 3.8180e-03 8.7109e-03 7.7280e-03 5.8321e-03 2.4646e-03 7.1905e-03 4.0641e-03 9.6940e-03 1.2233e-02 6.3812e-03 6.6464e-03 7.4992e-03 4.8999e-03 6.0768e-03 5.5779e-03 5.4241e-03 2.4000e-03 2.0993e-03 1.5682e-03 2.4484e-03 3.9649e-03 2.2279e-03 1.9340e-02 6.7085e-03 2.3778e-03 1.1476e-03 6.5687e-03 8.8161e-03 1.0342e-02 1.1430e-02 2.2670e-03 2.5392e-03 2.0371e-03 3.6266e-03 8.5538e-04 1.5581e-03 2.5421e-03 4.3575e-03 1.2080e-02 1.9372e-03 1.9705e-03 2.0474e-03 1.3799e-03 1.1893e-03 9.8355e-04 1.3933e-03 1.5192e-03 1.0290e-03 1.5327e-03 2.1380e-03 2.1371e-08 1.2226e-05 3.0942e-03 7.3861e-03 4.8725e-03 8.1873e-03 1.3327e-16 1.9197e-03 2.5245e-03 2.1664e-03 3.2087e-03\n", + " 1.4117e-03 9.2452e-04 3.4364e-03 1.8878e-02 6.3746e-03 1.5571e-02 3.4471e-03 6.2454e-03 2.2727e-03 2.6651e-03 2.0385e-03 9.7837e-04 2.3306e-03 1.5901e-03 1.5159e-03 2.2262e-03 1.8028e-03 1.8342e-03 1.2877e-03 6.7567e-04 4.3004e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of h2g: -2.0388e-01 2.1451e-02 -4.9237e-02 -1.7400e-02 -4.7173e-02 -1.4310e-02 6.8356e-02 -3.1025e-01 3.3112e-01 2.2404e-01 5.9878e-02 -2.6896e-01 -2.0326e-03 2.8506e-02 6.4726e-02 -5.1624e-03 9.9925e-03 2.9882e-02 1.0928e-01 7.5588e-02 -9.7055e-02 1.4639e-02 4.5199e-02 -8.0888e-02 -8.2195e-03 -6.5057e-02 1.0118e-02 1.9767e-01 3.9557e-02 2.5376e-01 1.2459e-01 -5.3490e-02 -3.9343e-02 -6.4753e-02 -1.9574e-02 -1.2910e-02 -5.9775e-02 -1.0009e-01 -2.8427e-02 5.3757e-02 -8.8164e-03 -6.6202e-03 -1.7861e-01 -1.1032e-03 1.1120e-01 -6.2289e-02 -2.9625e-02 1.9219e-02 6.9157e-02 -3.7505e-03 -1.0306e-02 8.2343e-02 -1.2089e-01 -2.8107e-01 3.3232e-02 4.6788e-02 1.0091e-01 1.0673e-01 7.9890e-02 6.2292e-02 7.8162e-02 9.4579e-02 1.0507e-01 1.1856e-01 8.9849e-02 -3.5640e-07 -6.2183e-04 -1.3518e-01 -1.1480e-01 7.0503e-02 -1.4512e-01 3.3073e-16 9.6841e-02 7.7252e-02 -2.4656e-02 -1.3007e-03\n", + " 1.3184e-02 -5.2616e-04 -3.9960e-02 -3.8459e-02 9.7392e-02 2.5427e-01 5.0996e-02 -2.1661e-02 7.4035e-02 -6.4869e-02 3.5543e-02 -2.2189e-02 -9.7145e-02 -7.0006e-03 2.2637e-02 4.8901e-02 9.9163e-02 -5.9708e-03 5.2390e-02 1.1076e-02 6.2552e-03\n", + "Enrichment: -3.6782e+00 2.7141e+01 -1.7995e+01 -1.2724e+01 -2.7855e+00 -1.0841e+01 2.6250e+01 -4.1350e+01 1.4828e+01 3.6759e+01 6.5178e+00 -1.4701e+01 -8.5008e+00 3.4965e+01 2.7854e+01 -1.9451e+00 2.1543e+00 2.7021e+00 5.0676e+00 4.3409e+01 -6.5209e+00 3.9643e+00 4.8288e+00 -3.4462e+00 -8.1368e-01 -2.8372e+01 1.3737e+00 2.9194e+01 1.8689e+01 3.6537e+01 2.1516e+01 -2.4907e+00 -7.5992e+01 -1.4114e+02 -1.4189e+01 -5.0267e+00 -1.0183e+02 -3.9255e+00 -1.9844e+00 5.8003e+00 -5.0123e+01 -9.1193e-01 -1.5340e+01 -5.7657e-02 4.8142e+00 -6.3109e+01 -3.2330e+01 3.1047e+01 8.1680e+01 -1.2392e+01 -8.6933e+00 7.1060e+01 -3.2158e+01 -2.9054e+00 7.3564e+01 8.2467e+00 1.8232e+01 1.9331e+01 1.4314e+01 1.1431e+01 1.4152e+01 1.7109e+01 1.8912e+01 2.1156e+01 1.6463e+01 -1.8954e+00 -4.0117e+00 -1.5709e+00 -4.4940e-01 7.1571e+00 -2.6046e+02 -2.5407e-01 1.6896e+02 8.4235e+01 -3.3263e+01 -7.3951e-01\n", + " 7.6237e+01 -3.4958e+00 -2.4491e+01 -1.8378e+00 8.1955e+01 1.4435e+01 4.7746e+01 -2.4963e+00 9.8579e+01 -6.6382e+01 4.2139e+01 -1.1154e+02 -5.2746e+01 -1.2891e+01 9.7922e+01 1.6582e+02 3.4819e+02 -1.1933e+01 1.4260e+01 8.0036e+01 1.9556e+02\n", + "Coefficients: -1.6801e-09 1.2397e-08 -8.2195e-09 -5.8119e-09 -1.2723e-09 -4.9517e-09 1.1990e-08 -1.8887e-08 6.7730e-09 1.6790e-08 2.9771e-09 -6.7151e-09 -3.8828e-09 1.5970e-08 1.2723e-08 -8.8846e-10 9.8399e-10 1.2342e-09 2.3147e-09 1.9827e-08 -2.9785e-09 1.8107e-09 2.2056e-09 -1.5741e-09 -3.7166e-10 -1.2959e-08 6.2744e-10 1.3335e-08 8.5363e-09 1.6689e-08 9.8275e-09 -1.1377e-09 -3.4710e-08 -6.4468e-08 -6.4811e-09 -2.2960e-09 -4.6514e-08 -1.7930e-09 -9.0642e-10 2.6494e-09 -2.2894e-08 -4.1654e-10 -7.0065e-09 -2.6336e-11 2.1990e-09 -2.8826e-08 -1.4767e-08 1.4181e-08 3.7308e-08 -5.6602e-09 -3.9708e-09 3.2458e-08 -1.4689e-08 -1.3271e-09 3.3601e-08 3.7668e-09 8.3278e-09 8.8297e-09 6.5379e-09 5.2210e-09 6.4641e-09 7.8146e-09 8.6383e-09 9.6633e-09 7.5197e-09 -8.6573e-10 -1.8324e-09 -7.1753e-10 -2.0527e-10 3.2691e-09 -1.1897e-07 -1.1605e-10 7.7175e-08 3.8475e-08 -1.5193e-08 -3.3778e-10\n", + " 3.4822e-08 -1.5967e-09 -1.1187e-08 -8.3945e-10 3.7434e-08 6.5935e-09 2.1809e-08 -1.1402e-09 4.5027e-08 -3.0321e-08 1.9247e-08 -5.0948e-08 -2.4092e-08 -5.8879e-09 4.4727e-08 7.5740e-08 1.5904e-07 -5.4507e-09 6.5133e-09 3.6557e-08 8.9323e-08\n", + "Coefficient SE: 5.5549e-09 3.5779e-08 1.2881e-08 2.7805e-08 5.2972e-09 3.1618e-08 2.1481e-08 1.0448e-08 4.5121e-09 2.3115e-08 2.6446e-08 7.3700e-09 5.2489e-08 2.4720e-08 1.3856e-08 1.3376e-08 1.7462e-08 6.4977e-09 2.5148e-09 1.3160e-08 4.4921e-09 1.0233e-08 9.6299e-09 4.8462e-09 5.8738e-09 2.6952e-08 9.4668e-09 6.7290e-09 2.6695e-08 7.4676e-09 8.7098e-09 1.0391e-09 3.7704e-08 3.1785e-08 1.6503e-08 1.4355e-08 3.5291e-08 7.0525e-09 4.3545e-09 2.3856e-09 6.0666e-08 8.4134e-09 7.0401e-09 5.0257e-09 4.6013e-09 2.1357e-08 2.5766e-08 3.0599e-08 3.9828e-08 2.6280e-08 1.2221e-08 2.0399e-08 1.0778e-08 1.1611e-09 3.9874e-08 3.2294e-09 3.4395e-09 2.3239e-09 1.9813e-09 1.6782e-09 2.3457e-09 2.5554e-09 1.7222e-09 2.5432e-09 3.6425e-09 1.0568e-09 7.3341e-10 3.3436e-10 2.6885e-10 4.5993e-09 1.3663e-07 9.5193e-10 3.1143e-08 2.5595e-08 2.7176e-08 1.6964e-08\n", + " 7.5904e-08 5.7115e-08 1.9584e-08 8.3880e-09 4.9878e-08 8.2198e-09 3.0010e-08 6.6923e-09 2.8138e-08 2.5359e-08 2.2473e-08 4.5731e-08 1.1766e-08 2.7226e-08 6.0971e-08 7.0190e-08 5.8859e-08 3.4086e-08 3.2591e-09 4.5399e-08 1.2501e-07\n", + "Lambda GC: 1.0466\n", + "Mean Chi^2: 1.142\n", + "Intercept: 0.8533 (0.0121)\n", + "Ratio < 0 (usually indicates GC correction).\n", + "\n", + "Heritability of phenotype 2/2\n", + "-----------------------------\n", + "Total Observed scale h2: 0.0297 (0.0032)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale h2: 8.8839e-04 -9.3805e-04 2.5936e-04 -1.0410e-03 -8.5140e-03 -2.9552e-03 2.5074e-03 -4.2855e-03 1.9348e-02 -1.6913e-02 2.8922e-02 -1.6714e-02 8.3074e-04 -1.5534e-04 2.2472e-03 3.2617e-03 -7.8316e-03 9.2509e-03 4.9165e-03 6.0522e-04 -3.5348e-03 3.7162e-03 -4.8263e-03 -2.1888e-03 -1.0812e-03 -2.6360e-03 1.7446e-03 7.5089e-03 3.0158e-03 4.8341e-03 8.7325e-03 2.0837e-03 -2.4400e-03 -2.2472e-03 1.3217e-03 2.7870e-03 -3.2071e-03 -6.9366e-03 8.3931e-04 -1.2416e-03 -7.6842e-05 -2.0082e-03 -1.1581e-02 -4.2742e-03 2.5723e-03 -3.2393e-03 -1.6740e-03 9.2816e-04 3.2906e-03 -1.5070e-03 -2.2401e-05 9.4201e-04 -2.9080e-03 -2.6246e-03 2.6168e-03 1.1527e-03 2.0262e-03 2.4177e-03 2.8496e-03 1.4368e-03 1.6993e-03 3.2907e-03 2.8629e-03 4.0534e-03 3.6632e-03 -1.2251e-08 -2.1761e-05 -5.9192e-03 -5.1659e-03 -4.6335e-04 -9.8274e-03 -6.0863e-17 1.0092e-03 1.8435e-03 6.1848e-06 1.0243e-03\n", + " 1.4544e-03 7.6913e-04 -1.5412e-03 1.3678e-02 1.3692e-03 4.1833e-04 2.0872e-04 -1.1583e-03 3.5822e-03 -2.8837e-03 1.5556e-03 -1.5021e-03 -1.1339e-03 -8.1364e-05 1.7519e-03 1.8571e-03 2.7442e-03 -3.5932e-04 3.8293e-04 -8.6324e-05 3.1916e-04\n", + "Observed scale h2 SE: 2.9650e-02 2.0989e-03 3.5977e-03 3.5999e-03 8.8784e-03 4.5947e-03 5.6360e-03 6.5695e-03 1.0205e-02 1.4792e-02 2.5825e-02 1.3656e-02 9.0434e-04 1.7933e-03 2.7224e-03 2.9873e-03 6.8048e-03 6.1199e-03 4.9819e-03 2.0055e-03 6.0473e-03 3.2420e-03 7.0628e-03 8.9577e-03 5.3871e-03 5.7062e-03 5.2790e-03 3.8313e-03 5.5824e-03 4.8573e-03 3.9929e-03 2.1181e-03 1.9731e-03 1.2732e-03 2.1772e-03 3.9637e-03 2.0869e-03 1.8437e-02 5.0498e-03 1.8207e-03 1.0379e-03 4.6858e-03 6.8138e-03 9.6916e-03 9.2174e-03 1.7571e-03 2.0083e-03 1.3270e-03 3.8174e-03 6.9828e-04 1.2639e-03 1.8807e-03 4.0674e-03 9.6844e-03 1.5332e-03 1.6927e-03 1.2729e-03 9.6167e-04 8.5544e-04 8.5599e-04 9.6612e-04 1.1168e-03 1.0875e-03 1.3383e-03 2.1800e-03 1.5217e-08 1.1085e-05 2.7410e-03 5.4126e-03 3.3349e-03 5.5275e-03 8.8838e-17 1.4751e-03 2.0632e-03 1.7612e-03 2.7837e-03\n", + " 1.0161e-03 8.5864e-04 2.6315e-03 1.4837e-02 6.1369e-03 1.2928e-02 2.8169e-03 5.1240e-03 2.1247e-03 2.2462e-03 1.7575e-03 8.4163e-04 1.8861e-03 1.3695e-03 1.1031e-03 1.5723e-03 1.5232e-03 1.6419e-03 9.7241e-04 4.6907e-04 3.5868e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of h2g: 2.9961e-02 -3.1636e-02 8.7468e-03 -3.5110e-02 -2.8714e-01 -9.9667e-02 8.4564e-02 -1.4453e-01 6.5253e-01 -5.7040e-01 9.7542e-01 -5.6368e-01 2.8017e-02 -5.2389e-03 7.5787e-02 1.1000e-01 -2.6412e-01 3.1199e-01 1.6581e-01 2.0411e-02 -1.1921e-01 1.2533e-01 -1.6277e-01 -7.3818e-02 -3.6465e-02 -8.8899e-02 5.8836e-02 2.5324e-01 1.0171e-01 1.6303e-01 2.9451e-01 7.0273e-02 -8.2290e-02 -7.5789e-02 4.4576e-02 9.3992e-02 -1.0816e-01 -2.3394e-01 2.8306e-02 -4.1874e-02 -2.5915e-03 -6.7726e-02 -3.9059e-01 -1.4415e-01 8.6752e-02 -1.0925e-01 -5.6455e-02 3.1302e-02 1.1098e-01 -5.0824e-02 -7.5547e-04 3.1770e-02 -9.8074e-02 -8.8516e-02 8.8252e-02 3.8876e-02 6.8333e-02 8.1537e-02 9.6103e-02 4.8456e-02 5.7311e-02 1.1098e-01 9.6552e-02 1.3670e-01 1.2354e-01 -4.1316e-07 -7.3390e-04 -1.9963e-01 -1.7422e-01 -1.5627e-02 -3.3143e-01 -2.0526e-15 3.4034e-02 6.2174e-02 2.0858e-04 3.4544e-02\n", + " 4.9051e-02 2.5939e-02 -5.1976e-02 4.6130e-01 4.6175e-02 1.4108e-02 7.0393e-03 -3.9064e-02 1.2081e-01 -9.7252e-02 5.2464e-02 -5.0659e-02 -3.8242e-02 -2.7440e-03 5.9083e-02 6.2631e-02 9.2550e-02 -1.2118e-02 1.2914e-02 -2.9113e-03 1.0764e-02\n", + "Enrichment: 5.4053e-01 -4.0027e+01 3.1968e+00 -2.5675e+01 -1.6955e+01 -7.5504e+01 3.2474e+01 -1.9263e+01 2.9222e+01 -9.3586e+01 1.0617e+02 -3.0811e+01 1.1717e+02 -6.4258e+00 3.2614e+01 4.1447e+01 -5.6943e+01 2.8211e+01 7.6892e+00 1.1722e+01 -8.0096e+00 3.3938e+01 -1.7389e+01 -3.1450e+00 -3.6098e+00 -3.8770e+01 7.9878e+00 3.7401e+01 4.8053e+01 2.3474e+01 5.0859e+01 3.2722e+00 -1.5895e+02 -1.6520e+02 3.2314e+01 3.6597e+01 -1.8426e+02 -9.1747e+00 1.9760e+00 -4.5181e+00 -1.4733e+01 -9.3292e+00 -3.3544e+01 -7.5338e+00 3.7559e+00 -1.1068e+02 -6.1610e+01 5.0566e+01 1.3107e+02 -1.6793e+02 -6.3728e-01 2.7417e+01 -2.6090e+01 -9.1499e-01 1.9536e+02 6.8521e+00 1.2346e+01 1.4768e+01 1.7218e+01 8.8917e+00 1.0377e+01 2.0076e+01 1.7379e+01 2.4394e+01 2.2637e+01 -2.1972e+00 -4.7347e+00 -2.3199e+00 -6.8201e-01 -1.5863e+00 -5.9486e+02 1.5768e+00 5.9381e+01 6.7794e+01 2.8141e-01 1.9641e+01\n", + " 2.8363e+02 1.7234e+02 -3.1856e+01 2.2044e+01 3.8856e+01 8.0096e-01 6.5907e+00 -4.5018e+00 1.6086e+02 -9.9521e+01 6.2201e+01 -2.5466e+02 -2.0764e+01 -5.0528e+00 2.5558e+02 2.1238e+02 3.2497e+02 -2.4220e+01 3.5151e+00 -2.1038e+01 3.3651e+02\n", + "Coefficients: 1.4903e-10 -1.1036e-08 8.8138e-10 -7.0788e-09 -4.6747e-09 -2.0817e-08 8.9534e-09 -5.3109e-09 8.0567e-09 -2.5802e-08 2.9273e-08 -8.4948e-09 3.2306e-08 -1.7716e-09 8.9920e-09 1.1427e-08 -1.5699e-08 7.7780e-09 2.1200e-09 3.2317e-09 -2.2083e-09 9.3570e-09 -4.7943e-09 -8.6710e-10 -9.9525e-10 -1.0689e-08 2.2023e-09 1.0312e-08 1.3248e-08 6.4718e-09 1.4022e-08 9.0217e-10 -4.3823e-08 -4.5546e-08 8.9093e-09 1.0090e-08 -5.0802e-08 -2.5295e-09 5.4480e-10 -1.2457e-09 -4.0620e-09 -2.5721e-09 -9.2484e-09 -2.0771e-09 1.0355e-09 -3.0516e-08 -1.6986e-08 1.3942e-08 3.6137e-08 -4.6299e-08 -1.7570e-10 7.5590e-09 -7.1931e-09 -2.5227e-10 5.3862e-08 1.8892e-09 3.4039e-09 4.0718e-09 4.7472e-09 2.4515e-09 2.8609e-09 5.5350e-09 4.7916e-09 6.7256e-09 6.2411e-09 -6.0579e-10 -1.3054e-09 -6.3962e-10 -1.8804e-10 -4.3736e-10 -1.6401e-07 4.3474e-10 1.6372e-08 1.8691e-08 7.7585e-11 5.4150e-09\n", + " 7.8200e-08 4.7515e-08 -8.7829e-09 6.0777e-09 1.0713e-08 2.2083e-10 1.8171e-09 -1.2412e-09 4.4350e-08 -2.7439e-08 1.7149e-08 -7.0211e-08 -5.7247e-09 -1.3931e-09 7.0465e-08 5.8553e-08 8.9596e-08 -6.6776e-09 9.6913e-10 -5.8002e-09 9.2778e-08\n", + "Coefficient SE: 4.9738e-09 2.4692e-08 1.2226e-08 2.4478e-08 4.8747e-09 3.2365e-08 2.0125e-08 8.1415e-09 4.2494e-09 2.2566e-08 2.6138e-08 6.9407e-09 3.5168e-08 2.0452e-08 1.0893e-08 1.0466e-08 1.3641e-08 5.1456e-09 2.1482e-09 1.0709e-08 3.7779e-09 8.1631e-09 7.0161e-09 3.5486e-09 4.9586e-09 2.3139e-08 6.6641e-09 5.2614e-09 2.4524e-08 6.5028e-09 6.4116e-09 9.1706e-10 3.5437e-08 2.5804e-08 1.4676e-08 1.4350e-08 3.3059e-08 6.7235e-09 3.2779e-09 1.8266e-09 5.4867e-08 6.0018e-09 5.4412e-09 4.7098e-09 3.7106e-09 1.6553e-08 2.0379e-08 1.9932e-08 4.1923e-08 2.1453e-08 9.9135e-09 1.5092e-08 1.0061e-08 9.3083e-10 3.1559e-08 2.7741e-09 2.1384e-09 1.6196e-09 1.4251e-09 1.4605e-09 1.6265e-09 1.8785e-09 1.8202e-09 2.2207e-09 3.7141e-09 7.5249e-10 6.6493e-10 2.9619e-10 1.9701e-10 3.1478e-09 9.2247e-08 6.3456e-10 2.3931e-08 2.0918e-08 2.2094e-08 1.4716e-08\n", + " 5.4635e-08 5.3045e-08 1.4996e-08 6.5926e-09 4.8018e-08 6.8248e-09 2.4523e-08 5.4907e-09 2.6305e-08 2.1373e-08 1.9375e-08 3.9339e-08 9.5223e-09 2.3449e-08 4.4370e-08 4.9575e-08 4.9729e-08 3.0514e-08 2.4610e-09 3.1517e-08 1.0427e-07\n", + "Lambda GC: 1.1908\n", + "Mean Chi^2: 1.2634\n", + "Intercept: 1.0261 (0.0119)\n", + "Ratio: 0.0992 (0.0452)\n", + "\n", + "Genetic Covariance\n", + "------------------\n", + "Total Observed scale gencov: 0.042 (0.0028)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale gencov: 4.0572e-02 -1.6669e-03 3.1592e-03 4.1247e-03 3.9495e-03 -3.2056e-03 2.6912e-03 1.6858e-03 6.2850e-03 9.1468e-03 4.4117e-03 -4.9844e-03 4.7380e-04 1.4304e-04 1.4632e-03 3.1222e-03 -6.7569e-03 -1.8984e-04 2.6434e-03 8.5961e-04 3.6739e-03 4.9158e-03 3.7673e-03 -1.0337e-02 -6.6240e-03 -2.8831e-03 1.1834e-03 8.0947e-03 3.0511e-03 5.5734e-03 3.0505e-03 1.1373e-03 -1.3913e-03 -1.9972e-03 3.5823e-04 -1.0687e-03 -2.9276e-03 -9.8307e-03 -5.5376e-03 -3.6785e-04 -7.0872e-04 -9.8307e-03 -3.0002e-03 -8.6648e-03 1.8813e-03 -1.4124e-03 -6.5603e-04 1.9911e-03 2.1165e-03 -1.2635e-03 -1.7451e-03 1.9816e-04 -5.0904e-03 -1.4118e-02 4.0085e-04 7.9698e-04 2.0265e-03 2.9401e-03 2.9314e-03 7.6991e-04 3.9815e-03 1.2263e-03 3.8370e-03 4.5887e-03 6.7958e-03 -2.4995e-08 -3.1508e-05 -1.2315e-03 -1.6629e-02 -5.4121e-03 -1.1852e-02 3.8827e-17 3.7332e-03 3.2676e-03 -2.3151e-04 -3.8085e-05\n", + " 7.6824e-04 1.8288e-03 -1.2957e-04 -5.2788e-03 -3.7168e-03 4.8980e-03 4.6803e-03 6.5474e-03 3.3841e-03 -7.2881e-04 -8.2463e-04 -1.0779e-03 2.3076e-03 -1.0272e-03 3.3716e-03 4.1005e-03 1.9414e-03 -7.2782e-04 7.9713e-04 -8.8215e-04 4.2808e-04\n", + "Observed scale gencov SE: 5.2173e-02 2.6063e-03 3.7021e-03 2.8447e-03 6.0285e-03 2.9222e-03 3.3289e-03 5.7966e-03 9.1277e-03 8.9377e-03 1.6397e-02 1.0584e-02 7.7051e-04 1.4768e-03 2.7325e-03 2.4247e-03 6.4332e-03 6.3571e-03 4.6036e-03 1.5999e-03 6.0999e-03 2.8057e-03 6.6400e-03 8.8631e-03 4.5303e-03 4.1952e-03 5.0407e-03 4.4812e-03 3.8085e-03 4.7169e-03 3.6894e-03 2.4768e-03 1.9606e-03 1.2380e-03 1.9378e-03 2.4272e-03 1.4339e-03 2.4136e-02 5.1363e-03 2.0651e-03 7.4823e-04 5.2539e-03 5.5072e-03 1.4024e-02 7.4177e-03 1.7716e-03 1.7839e-03 1.7261e-03 3.5178e-03 6.7507e-04 1.7741e-03 1.9158e-03 3.5763e-03 1.0298e-02 1.3807e-03 8.6784e-04 9.9155e-04 8.8151e-04 1.0061e-03 9.7862e-04 1.1429e-03 1.1571e-03 1.6367e-03 1.3505e-03 3.7714e-03 1.7863e-08 1.0453e-05 2.3375e-03 8.4771e-03 4.3907e-03 6.7778e-03 9.4404e-17 1.5976e-03 1.6119e-03 1.6703e-03 2.7225e-03\n", + " 1.0998e-03 1.0836e-03 3.4137e-03 1.5120e-02 3.6770e-03 1.4093e-02 2.0971e-03 4.8909e-03 1.8984e-03 1.6996e-03 1.9937e-03 8.6094e-04 2.6317e-03 1.4989e-03 1.8308e-03 2.2868e-03 1.4014e-03 1.7966e-03 1.0874e-03 4.8382e-04 4.4978e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of gencov: 9.6611e-01 -3.9694e-02 7.5229e-02 9.8219e-02 9.4048e-02 -7.6334e-02 6.4086e-02 4.0144e-02 1.4966e-01 2.1781e-01 1.0505e-01 -1.1869e-01 1.1282e-02 3.4062e-03 3.4842e-02 7.4348e-02 -1.6090e-01 -4.5205e-03 6.2946e-02 2.0470e-02 8.7484e-02 1.1706e-01 8.9708e-02 -2.4615e-01 -1.5773e-01 -6.8654e-02 2.8179e-02 1.9276e-01 7.2655e-02 1.3272e-01 7.2641e-02 2.7083e-02 -3.3131e-02 -4.7557e-02 8.5303e-03 -2.5448e-02 -6.9714e-02 -2.3410e-01 -1.3187e-01 -8.7594e-03 -1.6876e-02 -2.3409e-01 -7.1443e-02 -2.0633e-01 4.4799e-02 -3.3633e-02 -1.5622e-02 4.7412e-02 5.0399e-02 -3.0088e-02 -4.1554e-02 4.7187e-03 -1.2122e-01 -3.3618e-01 9.5452e-03 1.8978e-02 4.8256e-02 7.0011e-02 6.9805e-02 1.8334e-02 9.4811e-02 2.9201e-02 9.1368e-02 1.0927e-01 1.6183e-01 -5.9521e-07 -7.5030e-04 -2.9325e-02 -3.9597e-01 -1.2888e-01 -2.8224e-01 9.2456e-16 8.8898e-02 7.7811e-02 -5.5130e-03 -9.0691e-04\n", + " 1.8294e-02 4.3548e-02 -3.0855e-03 -1.2570e-01 -8.8506e-02 1.1663e-01 1.1145e-01 1.5591e-01 8.0585e-02 -1.7355e-02 -1.9637e-02 -2.5667e-02 5.4949e-02 -2.4461e-02 8.0287e-02 9.7643e-02 4.6229e-02 -1.7331e-02 1.8982e-02 -2.1006e-02 1.0194e-02\n", + "Enrichment: 1.7430e+01 -5.0222e+01 2.7495e+01 7.1826e+01 5.5534e+00 -5.7828e+01 2.4610e+01 5.3504e+00 6.7023e+00 3.5736e+01 1.1435e+01 -6.4877e+00 4.7186e+01 4.1779e+00 1.4994e+01 2.8013e+01 -3.4688e+01 -4.0877e-01 2.9190e+00 1.1755e+01 5.8778e+00 3.1698e+01 9.5840e+00 -1.0487e+01 -1.5615e+01 -2.9941e+01 3.8257e+00 2.8469e+01 3.4326e+01 1.9109e+01 1.2545e+01 1.2611e+00 -6.3993e+01 -1.0366e+02 6.1838e+00 -9.9085e+00 -1.1877e+02 -9.1809e+00 -9.2054e+00 -9.4512e-01 -9.5946e+01 -3.2246e+01 -6.1357e+00 -1.0784e+01 1.9396e+00 -3.4076e+01 -1.7048e+01 7.6591e+01 5.9525e+01 -9.9414e+01 -3.5053e+01 4.0721e+00 -3.2246e+01 -3.4751e+00 2.1130e+01 3.3450e+00 8.7186e+00 1.2681e+01 1.2507e+01 3.3642e+00 1.7166e+01 5.2822e+00 1.6446e+01 1.9499e+01 2.9651e+01 -3.1653e+00 -4.8405e+00 -3.4080e-01 -1.5501e+00 -1.3083e+01 -5.0656e+02 -7.1024e-01 1.5510e+02 8.4845e+01 -7.4377e+00 -5.1564e-01\n", + " 1.0578e+02 2.8933e+02 -1.8911e+00 -6.0069e+00 -7.4477e+01 6.6217e+00 1.0435e+02 1.7967e+01 1.0730e+02 -1.7760e+01 -2.3281e+01 -1.2903e+02 2.9835e+01 -4.5041e+01 3.4730e+02 3.3110e+02 1.6232e+02 -3.4639e+01 5.1666e+00 -1.5179e+02 3.1869e+02\n", + "Mean z1*z2: 0.4205\n", + "Intercept: 0.1475 (0.0083)\n", + "\n", + "Genetic Correlation\n", + "-------------------\n", + "Genetic Correlation: 1.1003 (0.0858)\n", + "Z-score: 12.8278\n", + "P: 1.1459e-37\n", + "\n", + "\n", + "Summary of Genetic Correlation Results\n", + "p1 p2 rg se z p h2_obs h2_obs_se h2_int h2_int_se gcov_int gcov_int_se\n", + "GCST004787_summary_stats_munged.sumstats.gz GCST90043957_summary_stats_munged.sumstats.gz 1.1003 0.0858 12.8278 1.1459e-37 0.0297 0.0032 1.0261 0.0119 0.1475 0.0083\n", + "\n", + "Analysis finished at Thu Nov 6 22:31:52 2025\n", + "Total time elapsed: 1.0m:3.9s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "rg_results = estimate_genetic_correlation(\n", + " sumstats_files=[\n", + " str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_munged.sumstats.gz\"),\n", + " str(Path(Path(gwas_summary_statistic_path_2).stem).stem + \"_munged.sumstats.gz\"),\n", + " ],\n", + " ref_ld_chr=os.path.join(ldscores_path, ldscores_prefix),\n", + " w_ld_chr=os.path.join(ldweights_path, ldweights_prefix),\n", + " out_prefix=\"CHD_rg\",\n", + " run=True,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Summary\n", + "This tutorial demonstrated how to perform comprehensive LDSC analyses using the `cellink` package, including:\n", + "\n", + "1. Cell-type-specific heritability analysis: Identifying which cell types are most relevant to complex traits\n", + "2. SNP heritability estimation: Quantifying the proportion of trait variance explained by common genetic variants\n", + "3. Genetic correlation analysis: Measuring shared genetic architecture between traits\n", + "\n", + "The `cellink` package simplifies these analyses by providing unified wrapper functions that handle data formatting, file management, and command execution for LDSC and its auxiliary tools." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cellink-env", + "language": "python", + "name": "cellink-env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/tutorials/ldsc_duncan_merge_anndatas.ipynb b/docs/tutorials/ldsc_duncan_merge_anndatas.ipynb deleted file mode 100644 index 6e8dc9d..0000000 --- a/docs/tutorials/ldsc_duncan_merge_anndatas.ipynb +++ /dev/null @@ -1,279 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "3fdcf27c", - "metadata": {}, - "outputs": [], - "source": [ - "import anndata as ad\n", - "from pathlib import Path\n", - "from scipy.sparse import csr_array\n", - "import numpy as np\n", - "from tqdm.auto import tqdm" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "00283b59", - "metadata": {}, - "outputs": [], - "source": [ - "import gc" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "22285917", - "metadata": {}, - "outputs": [], - "source": [ - "obs_map = {\n", - " \"Age\": \"age\",\n", - " \"CellCycle\": \"cell_cycle\",\n", - " \"CellID\": \"cell_id\",\n", - " \"Chemistry\": \"chemistry\",\n", - " \"Clusters\": \"clusters\",\n", - " \"Donor\": \"donor\",\n", - " \"NGenes\": \"n_genes\",\n", - " \"ROIGroupCoarse\": \"roi_group_coarse\",\n", - " \"ROIGroupFine\": \"roi_group_fine\",\n", - " \"Roi\": \"roi\",\n", - " \"SampleID\": \"sample_id\",\n", - " \"Sex\": \"sex\",\n", - " \"Subclusters\": \"subclusters\",\n", - " \"Tissue\": \"tissue\",\n", - " \"TotalUMI\": \"total_umi\",\n", - "}\n", - "\n", - "var_map = {\n", - " \"Accession\": \"accession\",\n", - " \"Gene\": \"gene\",\n", - " \"Valid\": \"valid\",\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "5b4b8160", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AnnData object with n_obs × n_vars = 50000 × 59480\n", - " obs: 'age', 'cell_cycle', 'chemistry', 'clusters', 'donor', 'n_genes', 'roi_group_coarse', 'roi_group_fine', 'roi', 'sample_id', 'sex', 'subclusters', 'tissue', 'total_umi', 'chunk'\n", - " var: 'accession', 'gene', 'valid'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "adatas_paths = list(\n", - " Path(\"/project/genomics/ayshan/ldsc_analysis/data_2/Single_cell_Siletti_Duncan/h5ad_chunks_new\").glob(\"*.h5ad\")\n", - ")\n", - "\n", - "\n", - "def read_h5ad(apath):\n", - " adata = ad.read_h5ad(apath)\n", - " adata.X = csr_array(adata.X.astype(np.uint16, copy=False))\n", - " adata.obs = adata.obs.rename(columns=obs_map)\n", - " adata.var = adata.var.rename(columns=var_map)\n", - " adata.obs[\"chunk\"] = apath.stem\n", - " adata.obs = adata.obs.set_index(\"cell_id\")\n", - " return adata\n", - "\n", - "\n", - "adata = read_h5ad(adatas_paths[0])\n", - "adata.write_h5ad(\"/lustre/groups/shared/scgenetics/single_cell_siletti_duncan_single_chunk.h5ad\")\n", - "adata" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "e0b2b8dd", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "13f97890116b459394539a9ecec79ff1", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/67 [00:00 AnnData: +def read_bgen( + path: str | Path = None, + metafile_path: str | Path = None, + sample_path: str | Path = None, + *, + var_rename=None, + obs_rename=None, + hard_call=True, + **kwargs, +) -> AnnData: """Read bgen Format Params @@ -184,6 +193,6 @@ def read_bgen(path: str | Path = None, *, var_rename=None, obs_rename=None, hard """ from sgkit.io import bgen as sg_bgen - sgkit_dataset = sg_bgen.read_bgen(path=path, **kwargs) + sgkit_dataset = sg_bgen.read_bgen(path=path, metafile_path=metafile_path, sample_path=sample_path, **kwargs) gdata = from_sgkit_dataset(sgkit_dataset, var_rename=var_rename, obs_rename=obs_rename, hard_call=hard_call) return gdata diff --git a/src/cellink/resources/__init__.py b/src/cellink/resources/__init__.py index 8829196..820340a 100644 --- a/src/cellink/resources/__init__.py +++ b/src/cellink/resources/__init__.py @@ -8,4 +8,4 @@ get_pgs_catalog_score, get_pgs_catalog_scores, ) -from ._ld import get_1000genomes_ld_scores, get_1000genomes_ld_weights +from ._ld import get_1000genomes_ld_scores, get_1000genomes_ld_weights, get_1000genomes_plink_files diff --git a/src/cellink/resources/_datasets.py b/src/cellink/resources/_datasets.py index 4ef67b8..0f0a2d5 100644 --- a/src/cellink/resources/_datasets.py +++ b/src/cellink/resources/_datasets.py @@ -1,27 +1,20 @@ -import hashlib import logging import os -import shutil -import subprocess -from os.path import expanduser, join -from pathlib import Path -from urllib.request import urlretrieve import anndata as ad import pandas as pd -import yaml import cellink as cl -from .._core import DonorData -from cellink.resources._utils import get_data_home, _download_file, _run, _load_config from cellink.resources._datasets_utils import plink_filter_prune, plink_kinship, preprocess_vcf_to_plink, try_liftover +from cellink.resources._utils import _download_file, _load_config, _run, get_data_home + +from .._core import DonorData logging.basicConfig(level=logging.INFO) + def get_1000genomes( - config_path: str = "./cellink/resources/config/1000genomes.yaml", - data_home: str | None = None, - verify_checksum=True + config_path: str = "./cellink/resources/config/1000genomes.yaml", data_home: str | None = None, verify_checksum=True ) -> ad.AnnData: """ Download and preprocess the 1000 Genomes Project genotype data. @@ -87,7 +80,7 @@ def get_1000genomes( def get_onek1k( config_path: str = "./cellink/resources/config/onek1k.yaml", data_home: str | None = None, - verify_checksum: bool = True + verify_checksum: bool = True, ) -> DonorData: """ Download and preprocess the OneK1K genotype and expression dataset. diff --git a/src/cellink/resources/_gwas_prs_qtl.py b/src/cellink/resources/_gwas_prs_qtl.py index 66d5395..190b3c5 100644 --- a/src/cellink/resources/_gwas_prs_qtl.py +++ b/src/cellink/resources/_gwas_prs_qtl.py @@ -1,10 +1,10 @@ import logging +from pathlib import Path +from typing import Any from urllib.request import urlretrieve import pandas as pd import requests -from typing import Any, Optional, Union -from pathlib import Path from cellink.resources._utils import _cache_df, _to_dataframe, get_data_home @@ -15,13 +15,9 @@ EQTL_API_BASE = "https://www.ebi.ac.uk/eqtl/api/v3" - def _fetch( - url: str, - params: Optional[dict[str, Any]] = None, - paginate: bool = True, - max_pages: Optional[int] = None -) -> Union[list, dict]: + url: str, params: dict[str, Any] | None = None, paginate: bool = True, max_pages: int | None = None +) -> list | dict: """ Fetch JSON data from a REST API, optionally handling pagination. @@ -73,10 +69,7 @@ def _fetch( def get_gwas_catalog_studies( - data_home: Optional[Union[str, Path]] = None, - max_pages: Optional[int] = None, - refresh: bool = False, - **params: Any + data_home: str | Path | None = None, max_pages: int | None = None, refresh: bool = False, **params: Any ) -> pd.DataFrame: """ Retrieve GWAS catalog studies and cache locally as a parquet file. @@ -126,11 +119,8 @@ def get_gwas_catalog_study(accession_id: str, **params: Any) -> dict: def get_gwas_catalog_study_summary_stats( - accession_id: str, - dest: Optional[Union[str, Path]] = None, - return_path: bool = False, - **params: Any -) -> Union[pd.DataFrame, Path]: + accession_id: str, dest: str | Path | None = None, return_path: bool = False, **params: Any +) -> pd.DataFrame | Path: """ Download full summary statistics for a GWAS study. @@ -150,16 +140,96 @@ def get_gwas_catalog_study_summary_stats( pd.DataFrame or Path DataFrame containing the summary statistics, or Path to the downloaded file if `return_path=True`. """ + study_meta = _fetch(f"{GWAS_API_BASE}/studies/{accession_id}", params=params, paginate=False) + + if "full_summary_stats" not in study_meta: + raise ValueError(f"Study {accession_id} does not have full summary statistics available") + + base_url = study_meta["full_summary_stats"] + harmonised_url = f"{base_url}/harmonised" + + import re + + try: + r = requests.get(harmonised_url) + r.raise_for_status() + + all_files = re.findall(r'href="([^"]*\.tsv\.gz)"', r.text) + + h_files = [f for f in all_files if f.endswith(".h.tsv.gz") and not f.endswith(".h.tsv.gz-meta.yaml")] + + if h_files: + + def build_priority(filename): + filename_lower = filename.lower() + if "build38" in filename_lower or "hg38" in filename_lower or "grch38" in filename_lower: + return 2 + elif "build37" in filename_lower or "hg19" in filename_lower or "grch37" in filename_lower: + return 1 + else: + return 0 + + h_files.sort(key=build_priority, reverse=True) + filename = h_files[0] + url = f"{harmonised_url}/{filename}" + logging.info(f"Found harmonised file: {filename}") + else: + raise ValueError("No harmonised .h.tsv.gz files found") + + except Exception as e: + logging.warning(f"Could not find harmonised files ({e}), trying base directory") + + try: + r = requests.get(base_url) + r.raise_for_status() + files = re.findall(r'href="([^"]*\.tsv\.gz)"', r.text) + + if files: + + def build_priority(filename): + filename_lower = filename.lower() + if "build38" in filename_lower or "hg38" in filename_lower or "grch38" in filename_lower: + return 2 + elif "build37" in filename_lower or "hg19" in filename_lower or "grch37" in filename_lower: + return 1 + else: + return 0 + + files.sort(key=build_priority, reverse=True) + filename = files[0] + url = f"{base_url}/{filename}" + else: + possible_files = [ + f"{accession_id}_buildGRCh38.tsv.gz", + f"{accession_id}_buildGRCh37.tsv.gz", + f"{accession_id}.tsv.gz", + ] + + for filename in possible_files: + test_url = f"{base_url}/{filename}" + try: + test_r = requests.head(test_url) + if test_r.status_code == 200: + url = test_url + break + except: + continue + else: + raise ValueError(f"Could not find summary statistics file for {accession_id}") + + except Exception as e2: + raise ValueError(f"Could not find summary statistics for {accession_id}: {e2}") + if not dest: data_home = get_data_home() dest = data_home / f"{accession_id}_summary_stats.tsv.gz" - url = ( - _fetch(f"{GWAS_API_BASE}/studies/{accession_id}", params=params, paginate=False)["full_summary_stats"] - + f"/{accession_id}_buildGRCh37.tsv.gz" - ) logging.info(f"Downloading {url} to {dest}") - urlretrieve(url, dest) + + try: + urlretrieve(url, dest) + except Exception as e: + raise RuntimeError(f"Failed to download summary statistics from {url}: {e}") if return_path: return dest @@ -168,11 +238,7 @@ def get_gwas_catalog_study_summary_stats( return data -def get_gwas_catalog_genes( - data_home: Optional[Union[str, Path]] = None, - refresh: bool = False, - **params: Any -) -> pd.DataFrame: +def get_gwas_catalog_genes(data_home: str | Path | None = None, refresh: bool = False, **params: Any) -> pd.DataFrame: """ Retrieve GWAS catalog gene associations and cache locally. @@ -219,10 +285,7 @@ def get_gwas_catalog_gene(gene_name: str, **params: Any) -> dict: def get_pgs_catalog_scores( - data_home: Optional[Union[str, Path]] = None, - max_pages: Optional[int] = None, - refresh: bool = False, - **params: Any + data_home: str | Path | None = None, max_pages: int | None = None, refresh: bool = False, **params: Any ) -> pd.DataFrame: """ Retrieve PGS catalog scores and cache locally. @@ -272,11 +335,8 @@ def get_pgs_catalog_score(pgs_id: str, **params: Any) -> dict: def get_pgs_catalog_score_file( - pgs_id: str, - dest: Optional[Union[str, Path]] = None, - return_path: bool = False, - **params: Any -) -> Union[pd.DataFrame, Path]: + pgs_id: str, dest: str | Path | None = None, return_path: bool = False, **params: Any +) -> pd.DataFrame | Path: """ Download the scoring file for a PGS catalog score. @@ -314,10 +374,7 @@ def get_pgs_catalog_score_file( def get_eqtl_catalog_datasets( - data_home: Optional[Union[str, Path]] = None, - max_pages: Optional[int] = None, - refresh: bool = False, - **params: Any + data_home: str | Path | None = None, max_pages: int | None = None, refresh: bool = False, **params: Any ) -> pd.DataFrame: """ Retrieve eQTL catalog datasets and cache locally. @@ -349,11 +406,11 @@ def get_eqtl_catalog_datasets( def get_eqtl_catalog_dataset_associations( dataset_id: str, - data_home: Optional[Union[str, Path]] = None, + data_home: str | Path | None = None, refresh: bool = False, return_path: bool = False, - **params: Any -) -> Union[pd.DataFrame, Path]: + **params: Any, +) -> pd.DataFrame | Path: """ Retrieve associations for a specific eQTL catalog dataset and cache locally. diff --git a/src/cellink/resources/_ld.py b/src/cellink/resources/_ld.py index a0ccc60..6d90022 100644 --- a/src/cellink/resources/_ld.py +++ b/src/cellink/resources/_ld.py @@ -1,9 +1,8 @@ import shutil import tarfile +from pathlib import Path import pandas as pd -from pathlib import Path -from typing import Optional, Tuple, Union from cellink.resources._utils import _download_file, _load_config, get_data_home @@ -36,29 +35,35 @@ def _extract_or_refresh(tgz_path: Path, extract_path: Path, refresh: bool = Fals else: shutil.rmtree(item) - if not any(p for p in extract_path.iterdir() if p != tgz_path): + existing_contents = [p for p in extract_path.iterdir() if p != tgz_path] + + if not existing_contents: with tarfile.open(tgz_path, "r:gz") as tar: tar.extractall(path=extract_path) - contents = list(extract_path.iterdir()) - if len(contents) == 2 and contents[1].is_dir(): - for item in contents[1].iterdir(): + contents = [p for p in extract_path.iterdir() if p != tgz_path] + + if len(contents) == 1 and contents[0].is_dir(): + nested_dir = contents[0] + + for item in nested_dir.iterdir(): shutil.move(str(item), str(extract_path)) - contents[1].rmdir() + + nested_dir.rmdir() def get_1000genomes_ld_scores( - config_path: Union[str, Path] = "./cellink/resources/config/1000genomes.yaml", + config_path: str | Path = "./cellink/resources/config/1000genomes.yaml", population: str = "EUR", - data_home: Optional[Union[str, Path]] = None, + data_home: str | Path | None = None, return_path: bool = False, refresh: bool = False, -) -> Union[Tuple[pd.DataFrame, pd.DataFrame, str], Tuple[Path, str]]: +) -> tuple[pd.DataFrame, pd.DataFrame, str] | tuple[Path, str]: """ Download, extract, and load precomputed 1000 Genomes linkage disequilibrium (LD) scores. This function downloads population-specific LD scores from the 1000 Genomes project, - extracts them to a local directory, and concatenates chromosome-wise annotation and + extracts them to a local directory, and concatenates chromosome-wise annotation and LD score files into pandas DataFrames. Parameters @@ -84,7 +89,7 @@ def get_1000genomes_ld_scores( Concatenated LD score files for all chromosomes. - prefix : str File name prefix used in the extracted data. - + If `return_path=True`, returns `(DATA, prefix)`: - DATA : pathlib.Path Path to the directory containing extracted files. @@ -135,17 +140,17 @@ def get_1000genomes_ld_scores( def get_1000genomes_ld_weights( - config_path: Union[str, Path] = "./cellink/resources/config/1000genomes.yaml", + config_path: str | Path = "./cellink/resources/config/1000genomes.yaml", population: str = "EUR", - data_home: Optional[Union[str, Path]] = None, + data_home: str | Path | None = None, return_path: bool = False, refresh: bool = False, -) -> Union[Tuple[pd.DataFrame, pd.DataFrame], Tuple[Path, str]]: +) -> tuple[pd.DataFrame, pd.DataFrame] | tuple[Path, str]: """ Download, extract, and load precomputed 1000 Genomes LD weights. This function downloads population-specific LD weights from the 1000 Genomes project, - extracts them to a local directory, and concatenates chromosome-wise weight files + extracts them to a local directory, and concatenates chromosome-wise weight files into a single pandas DataFrame. Parameters @@ -168,7 +173,7 @@ def get_1000genomes_ld_weights( - None : placeholder for compatibility with LD scores interface. - weights : pd.DataFrame Concatenated LD weight files for all chromosomes. - + If `return_path=True`, returns `(DATA, prefix)`: - DATA : pathlib.Path Path to the directory containing extracted files. @@ -211,8 +216,73 @@ def get_1000genomes_ld_weights( return annot, weights +def get_1000genomes_plink_files( + config_path: str | Path = "./cellink/resources/config/1000genomes.yaml", + population: str = "EUR", + data_home: str | Path | None = None, + refresh: bool = False, +) -> Path: + """ + Download and extract 1000 Genomes PLINK files (BED/BIM/FAM format). + + This function downloads population-specific PLINK files from the 1000 Genomes project, + extracts them to a local directory, and returns the path to the extracted files. + + Parameters + ---------- + config_path : str or pathlib.Path, default='./cellink/resources/config/1000genomes.yaml' + Path to YAML configuration file specifying URLs and file names for PLINK files. + population : str, default='EUR' + Population code for PLINK files. Currently only 'EUR' is supported. + data_home : str or pathlib.Path, optional + Root directory where data will be stored. Defaults to user-specific cache directory. + refresh : bool, default=False + If True, re-downloads and re-extracts files even if they already exist locally. + + Returns + ------- + - pathlib.Path + Path to the directory containing extracted PLINK files (.bed, .bim, .fam). + Files are named as: {prefix}{chrom}.bed/bim/fam where chrom ranges from 1-22. + - prefix : str + File name prefix used in the extracted data. + + Raises + ------ + ValueError + If `population` is not supported in the configuration. + + Examples + -------- + >>> plink_dir = get_1000genomes_plink_files(population="EUR") + >>> # Access chromosome 1 files at: + >>> # plink_dir / "1000G.EUR.QC.1.bed" + >>> # plink_dir / "1000G.EUR.QC.1.bim" + >>> # plink_dir / "1000G.EUR.QC.1.fam" + """ + data_home = get_data_home(data_home) + DATA = data_home / f"1000genomes_plink_{population}" + DATA.mkdir(exist_ok=True) + + config = _load_config(config_path) + if population not in config["plink_files"]: + raise ValueError(f"population must be one of {list(config['plink_files'].keys())}") + + prefix = config["plink_files"]["prefix"] + tgz_path = DATA / config["plink_files"][population]["filename"] + + _download_file(config["plink_files"][population]["url"], tgz_path, checksum=None) + _extract_or_refresh(tgz_path, DATA, refresh=refresh) + + return DATA, prefix + + if __name__ == "__main__": annot, ldscores, prefix = get_1000genomes_ld_scores(population="EUR") annot, ldscores, prefix = get_1000genomes_ld_scores(population="EAS") + annot, weights, prefix = get_1000genomes_ld_weights(population="EUR") annot, weights, prefix = get_1000genomes_ld_weights(population="EAS") + + plink_files, prefix = get_1000genomes_plink_files(population="EUR") + plink_files, prefix = get_1000genomes_plink_files(population="EAS") diff --git a/src/cellink/resources/config/1000genomes.yaml b/src/cellink/resources/config/1000genomes.yaml index 2e1fe8a..0e8ef84 100644 --- a/src/cellink/resources/config/1000genomes.yaml +++ b/src/cellink/resources/config/1000genomes.yaml @@ -131,6 +131,14 @@ remote_files: - filename: "ALL.chr22.phase3_shapeit2_mvncall_integrated_v5b.20130502.genotypes.vcf.gz.tbi" url: "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5b.20130502.genotypes.vcf.gz.tbi" checksum: "27de6b77af65d300bb968e8e372439deb949389e4395eb0dd251f9ba7d73bbed" +plink_files: + prefix: 1000G.EUR.QC. + EUR: + filename: "1000G_Phase3_plinkfiles.tgz" + url: "https://zenodo.org/records/7796478/files/1000G_Phase3_plinkfiles.tgz?download=1" + EAS: + filename: "1000G_Phase3_EAS_plinkfiles.tgz" + url: "https://zenodo.org/records/7796478/files/1000G_Phase3_EAS_plinkfiles.tgz?download=1" ld_scores: prefix: baselineLD. EUR: diff --git a/src/cellink/tl/external/__init__.py b/src/cellink/tl/external/__init__.py index 0a474f1..ea7de7c 100644 --- a/src/cellink/tl/external/__init__.py +++ b/src/cellink/tl/external/__init__.py @@ -1,5 +1,31 @@ from ._jaxqtl import read_jaxqtl_results, run_jaxqtl from ._ld import calculate_ld +from ._ldsc import ( + compute_ld_scores_with_annotations_from_bimfile, + compute_ld_scores_with_annotations_from_donor_data, + configure_ldsc_runner, + estimate_celltype_specific_heritability, + estimate_genetic_correlation, + estimate_heritability, + estimate_ld_scores_from_bimfile, + estimate_ld_scores_from_donor_data, + make_annot_from_bimfile, + make_annot_from_donor_data, + make_continuous_annot_from_bimfile, + make_continuous_annot_from_donor_data, + munge_sumstats, +) from ._mixmil import run_mixmil from ._pc import calculate_pcs +from ._sldsc_utils import generate_gene_coord_file, generate_sldsc_genesets, preprocess_for_sldsc +from ._ldsc2magma import ( + load_ensembl_to_entrez_map, + genesets_dir_to_entrez_gmt, + scores_to_gmt, + scores_to_covar, + run_magma_annotate, + run_magma_gene_analysis, + run_magma_gsa, + run_magma_gpa, +) from ._tensorqtl import read_tensorqtl_results, run_tensorqtl diff --git a/src/cellink/tl/external/_ldsc.py b/src/cellink/tl/external/_ldsc.py new file mode 100644 index 0000000..32a8b7f --- /dev/null +++ b/src/cellink/tl/external/_ldsc.py @@ -0,0 +1,2369 @@ +import logging +import os +import subprocess +from typing import Any, Literal +import shlex + +import numpy as np +import pandas as pd +import yaml + +from cellink._core import DonorData +from cellink.io import to_plink +from cellink.resources._utils import get_data_home + +logger = logging.getLogger(__name__) + + +class LDSCRunner: + """Enhanced LDSC Runner with YAML config and automatic path inference""" + + def __init__(self, config_path: str | None = None, config_dict: dict | None = None): + """ + Initialize LDSC Runner + + Parameters + ---------- + config_path : str, optional + Path to YAML configuration file + config_dict : dict, optional + Configuration dictionary (takes precedence over config_path) + """ + self.config = self._load_config(config_path, config_dict) + self._validate_config() + + def _load_config(self, config_path: str | None, config_dict: dict | None) -> dict: + """Load configuration from file or dictionary""" + if config_dict: + return config_dict + + if config_path and os.path.exists(config_path): + with open(config_path) as f: + return yaml.safe_load(f) + return { + "execution_mode": "local", + "docker_image": "zijingliu/ldsc", + "singularity_image": None, + "ldsc_command": "ldsc.py", + "make_annot_command": "make_annot.py", + "munge_command": "munge_sumstats.py", + } + + def _validate_config(self): + """Validate configuration parameters""" + required_fields = ["execution_mode", "ldsc_command", "make_annot_command", "munge_command"] + for field in required_fields: + if field not in self.config: + raise ValueError(f"Missing required configuration field: {field}") + + if self.config["execution_mode"] not in ["local", "docker", "singularity"]: + raise ValueError("execution_mode must be 'local', 'docker', or 'singularity'") + + def _infer_volumes_from_paths(self, *file_paths: str, data_home: str | None = None) -> dict[str, str]: + """ + Automatically infer docker volumes or singularity binds from file paths + + Parameters + ---------- + *file_paths : str + Variable number of file paths to analyze + + Returns + ------- + dict + Dictionary mapping host paths to container paths + """ + volumes = {} + + volumes[os.getcwd()] = "/data" + + #cellink_data_path = get_data_home(data_home) + #if os.path.exists(cellink_data_path): + # volumes[cellink_data_path] = "/cellink_data" + + for file_path in file_paths: + if file_path: + abs_path = os.path.abspath(file_path) + parent_dir = os.path.dirname(abs_path) + + covered = False + for host_path in volumes.keys(): + host_path = str(host_path) + if abs_path.startswith(host_path): + covered = True + break + + if not covered: + container_path = f"/external_{len(volumes)}" + volumes[parent_dir] = container_path + + return volumes + + def _convert_path_to_container(self, file_path: str, volumes: dict[str, str]) -> str: + """Convert host path to container path""" + if not file_path: + return file_path + + abs_path = os.path.abspath(file_path) + + for host_path, container_path in volumes.items(): + host_path = str(host_path) + if abs_path.startswith(host_path): + relative_path = os.path.relpath(abs_path, host_path) + return os.path.join(container_path, relative_path).replace("\\", "/") + + return file_path + + def _build_container_command(self, base_command: str, file_paths: list[str] = None) -> str: + """Build docker or singularity command with volumes""" + if self.config["execution_mode"] == "local": + return base_command + + if file_paths is None: + file_paths = [] + + volumes = self._infer_volumes_from_paths(*file_paths) + container_command = self._rewrite_paths_in_command(base_command, volumes) + + #container_command = base_command + for host_path, container_path in volumes.items(): + container_command = str(container_command).replace(str(host_path), str(container_path)) + + if self.config["execution_mode"] == "docker": + volume_args = [] + for host_path, container_path in volumes.items(): + volume_args.extend(["-v", f"{host_path}:{container_path}"]) + + cmd = ["docker", "run", "--rm", *volume_args, "-w", "/data", self.config["docker_image"], container_command] + return " ".join(cmd) + + elif self.config["execution_mode"] == "singularity": + bind_args = [] + for host_path, container_path in volumes.items(): + bind_args.extend(["-B", f"{host_path}:{container_path}"]) + + cmd = ["singularity", "exec", *bind_args, self.config["singularity_image"], container_command] + return " ".join(cmd) + + return base_command + + def _rewrite_paths_in_command(self, command: str, volumes: dict[str, str]) -> str: + prefix_tokens = ["--bfile", "--out", "--ref-ld-chr", "--w-ld-chr", "--frqfile-chr", "--ref-ld-chr-cts", "--annot-file"] + tokens = shlex.split(command) + rewritten = [] + + for token_i, token in enumerate(tokens): + new_token = token + + if os.path.exists(token) or (token_i > 1 and tokens[token_i-1] in prefix_tokens): + abs_path = os.path.abspath(token) + + for host_path, container_path in volumes.items(): + if abs_path.startswith(host_path): + rel = os.path.relpath(abs_path, host_path) + new_token = os.path.join(container_path, rel).replace("\\", "/") + break + + rewritten.append(new_token) + + return " ".join(rewritten) + + def run_command(self, base_command: str, file_paths: list[str] = None, check: bool = True): + """ + Execute command with automatic path inference + + Parameters + ---------- + base_command : str + The base LDSC command + file_paths : list, optional + List of file paths involved in the command (for volume inference) + check : bool + Whether to raise exception on command failure + """ + if file_paths is None: + file_paths = [] + if os.getcwd() not in file_paths: + file_paths.append(os.getcwd()) + + if self.config["execution_mode"] == "local": + result = subprocess.run(base_command, shell=True, check=check, capture_output=True, text=True) + if result.stdout: + logger.info(result.stdout) + if result.stderr: + logger.warning(result.stderr) + else: + full_command = self._build_container_command(base_command, file_paths) + + logger.info(f"Executing: {full_command}") + result = subprocess.run(full_command, shell=True, check=check, capture_output=True, text=True) + if result.stdout: + logger.info(result.stdout) + if result.stderr: + logger.warning(result.stderr) + + @property + def ldsc_command(self) -> str: + return self.config["ldsc_command"] + + @property + def make_annot_command(self) -> str: + return self.config["make_annot_command"] + + @property + def munge_command(self) -> str: + return self.config["munge_command"] + + @property + def execution_mode(self) -> str: + return self.config["execution_mode"] + + +_ldsc_runner = None + + +def configure_ldsc_runner(config_path: str | None = None, config_dict: dict | None = None) -> LDSCRunner: + """ + Configure global LDSC runner + + Parameters + ---------- + config_path : str, optional + Path to YAML configuration file + config_dict : dict, optional + Configuration dictionary + + Returns + ------- + LDSCRunner + Configured runner instance + """ + if config_path is not None and not os.path.isfile(config_path): + raise FileNotFoundError(f"Configuration file not found: {config_path}") + global _ldsc_runner + _ldsc_runner = LDSCRunner(config_path=config_path, config_dict=config_dict) + return _ldsc_runner + + +def get_ldsc_runner() -> LDSCRunner: + """Get the global LDSC runner instance""" + global _ldsc_runner + if _ldsc_runner is None: + _ldsc_runner = LDSCRunner() + return _ldsc_runner + + +def munge_sumstats( + sumstats_file: str, + out_prefix: str = "GWAS_summary_statistics_munged", + n_samples: int | None = None, + merge_alleles: str | None = None, + snplist: str | None = None, + info_min: float = 0.9, + maf_min: float = 0.01, + a1_inc: bool = False, + signed_sumstats: tuple[str, float] | None = None, + p_col: str | None = None, + a1_col: str | None = None, + a2_col: str | None = None, + snp_col: str | None = None, + n_col: str | None = None, + info_col: str | None = None, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> str | None: + """ + Munge (clean and standardize) GWAS summary statistics for LDSC analysis + + This function processes raw GWAS summary statistics files to prepare them for + LD Score regression analysis. It performs quality control, standardizes column + names, filters SNPs, and aligns alleles to a reference panel. + + Parameters + ---------- + sumstats_file : str + Path to input GWAS summary statistics file. Can be plain text or gzipped. + Should contain columns for SNP ID, effect allele, other allele, and p-value. + out_prefix : str, default "GWAS_summary_statistics_munged" + Prefix for output files. Will create {out_prefix}.sumstats.gz + n_samples : int, optional + Total sample size. If the summary statistics file has a sample size column, + this will be used to verify it. If there's no sample size column, this will + be added to all SNPs. + merge_alleles : str, optional + Path to reference allele file (e.g., w_hm3.snplist) for aligning alleles + and removing strand-ambiguous SNPs. Recommended for downstream analysis. + snplist : str, optional + Path to file with SNP IDs to keep. Only SNPs in this list will be retained. + info_min : float, default 0.9 + Minimum INFO score for SNP inclusion. SNPs with INFO < info_min are removed. + maf_min : float, default 0.01 + Minimum minor allele frequency for SNP inclusion. SNPs with MAF < maf_min + are removed. + a1_inc : bool, default False + If True, A1 is the effect allele (increasing allele). If False, A1 is the + other allele and the sign of the effect will be flipped. + signed_sumstats : tuple[str, float], optional + Tuple of (column_name, sign) for identifying the direction of effect. + Example: ("OR", 1) means odds ratios where values >1 indicate positive effect. + Example: ("BETA", 0) means betas where values >0 indicate positive effect. + p_col : str, optional + Name of the p-value column if non-standard (default: "P") + a1_col : str, optional + Name of the effect allele column if non-standard (default: "A1") + a2_col : str, optional + Name of the other allele column if non-standard (default: "A2") + snp_col : str, optional + Name of the SNP ID column if non-standard (default: "SNP") + n_col : str, optional + Name of the sample size column if non-standard (default: "N") + info_col : str, optional + Name of the INFO score column if non-standard (default: "INFO") + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to munge_sumstats.py + Common options include: + - ignore: List of columns to ignore + - daner: Set if input is in daner format (PGC) + - no-alleles: Don't require allele information + - merge-alleles: Alternative way to specify reference alleles + + Returns + ------- + dict + Results dictionary containing: + - 'sumstats_file': Path to the munged summary statistics file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Raises + ------ + subprocess.CalledProcessError + If the munging process fails (e.g., due to malformed input file) + + Examples + -------- + Basic usage with standard column names: + >>> result = munge_sumstats( + ... sumstats_file="height_gwas.txt.gz", + ... out_prefix="height_munged", + ... n_samples=253288, + ... merge_alleles="w_hm3.snplist", + ... ) + + With custom column names: + >>> result = munge_sumstats( + ... sumstats_file="custom_gwas.txt", + ... out_prefix="custom_munged", + ... n_samples=50000, + ... snp_col="RSID", + ... a1_col="EFFECT_ALLELE", + ... a2_col="OTHER_ALLELE", + ... p_col="PVAL", + ... signed_sumstats=("BETA", 0), + ... ) + + Case-control study with odds ratios: + >>> result = munge_sumstats( + ... sumstats_file="case_control_gwas.txt.gz", + ... out_prefix="case_control_munged", + ... n_samples=10000, + ... merge_alleles="w_hm3.snplist", + ... signed_sumstats=("OR", 1), + ... a1_inc=True, + ... ) + + Just generate the command without running: + >>> result = munge_sumstats( + ... sumstats_file="height_gwas.txt.gz", out_prefix="height_munged", n_samples=253288, run=False + ... ) + >>> print(result["command"]) + + Notes + ----- + - The function expects summary statistics files to follow standard GWAS format + - Strand-ambiguous SNPs (A/T or G/C) are removed when merge_alleles is used + - The output file will be gzipped and named {out_prefix}.sumstats.gz + - It's highly recommended to use merge_alleles with a reference panel (e.g., HapMap3) + to ensure proper allele alignment + - For binary traits, signed_sumstats should typically be ("OR", 1) or ("BETA", 0) + - For quantitative traits, signed_sumstats is typically ("BETA", 0) or ("Z", 0) + """ + if runner is None: + runner = get_ldsc_runner() + + cmd = f"{runner.munge_command} --sumstats {sumstats_file} --out {out_prefix}" + + if n_samples is not None: + cmd += f" --N {n_samples}" + if merge_alleles is not None: + cmd += f" --merge-alleles {merge_alleles}" + if snplist is not None: + cmd += f" --merge {snplist}" + if info_min != 0.9: + cmd += f" --info-min {info_min}" + if maf_min != 0.01: + cmd += f" --maf-min {maf_min}" + if a1_inc: + cmd += " --a1-inc" + + if signed_sumstats is not None: + col, min_val = signed_sumstats + cmd += f" --signed-sumstats {col},{min_val}" + if p_col is not None: + cmd += f" --p {p_col}" + if a1_col is not None: + cmd += f" --a1 {a1_col}" + if a2_col is not None: + cmd += f" --a2 {a2_col}" + if snp_col is not None: + cmd += f" --snp {snp_col}" + if n_col is not None: + cmd += f" --N-col {n_col}" + if info_col is not None: + cmd += f" --info {info_col}" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = [sumstats_file] + if merge_alleles: + file_paths.append(merge_alleles) + if snplist: + file_paths.append(snplist) + + if run: + logger.info(f"Running munge_sumstats: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + return f"{out_prefix}.sumstats.gz" + else: + return runner._build_container_command(cmd, file_paths) + + +def _run_ldsc_estimate_ld_scores( + bfile_prefix: str, + out_prefix: str, + ld_wind_cm: float = 1.0, + ld_wind_kb: int | None = None, + ld_wind_snp: int | None = None, + annot_file: str | None = None, + thin_annot: bool = False, + print_snps: str | None = None, + maf_min: float = 0.01, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> str | None: + """ + Estimate LD Scores from genotype data + """ + if runner is None: + runner = get_ldsc_runner() + + cmd = f"{runner.ldsc_command} --bfile {bfile_prefix} --l2 --out {out_prefix}" + + flags = [ld_wind_kb, ld_wind_snp, ld_wind_cm] + non_null_flags = sum(f is not None for f in flags) + + if non_null_flags > 1: + raise ValueError("Only one of ld_wind_kb, ld_wind_snp, or ld_wind_cm may be specified.") + + if ld_wind_kb is not None: + cmd += f" --ld-wind-kb {ld_wind_kb}" + elif ld_wind_snp is not None: + cmd += f" --ld-wind-snp {ld_wind_snp}" + else: + cmd += f" --ld-wind-cm {ld_wind_cm}" + + if annot_file is not None: + cmd += f" --annot {annot_file}" + if thin_annot: + cmd += " --thin-annot" + + if print_snps is not None: + cmd += f" --print-snps {print_snps}" + + if maf_min != 0.01: + cmd += f" --maf {maf_min}" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + cmd += " --yes-really" + + file_paths = [f"{bfile_prefix}.bed", f"{bfile_prefix}.bim", f"{bfile_prefix}.fam"] + if annot_file: + file_paths.append(annot_file) + if print_snps: + file_paths.append(print_snps) + + if run: + logger.info(f"Estimating LD scores: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + return f"{out_prefix}.l2.ldscore.gz" + else: + return runner._build_container_command(cmd, file_paths) + + +def estimate_ld_scores_from_bimfile( + bfile_prefix: str, + out_prefix: str, + ld_wind_cm: float = 1.0, + ld_wind_kb: int | None = None, + ld_wind_snp: int | None = None, + annot_file: str | None = None, + thin_annot: bool = False, + print_snps: str | None = None, + maf_min: float = 0.01, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Estimate LD scores from PLINK bfile (works with any bfile, including 1000G) + + Parameters + ---------- + bfile_prefix : str + Path to PLINK binary files (without .bed/.bim/.fam extension) + out_prefix : str + Prefix for output files + ld_wind_cm : float, default 1.0 + LD window size in centiMorgans + ld_wind_kb : int, optional + LD window size in kilobases (alternative to ld_wind_cm) + ld_wind_snp : int, optional + LD window size in number of SNPs (alternative to ld_wind_cm) + annot_file : str, optional + Annotation file for computing category-specific LD scores + thin_annot : bool, default False + Thin the annot file by removing columns with <1% SNPs + print_snps : str, optional + File with SNP IDs to restrict LD score computation + maf_min : float, default 0.01 + Minimum MAF threshold + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use + **kwargs + Additional arguments passed to ldsc.py + + Returns + ------- + dict + Results dictionary with: + - 'ld_scores_file': Path to LD scores file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + >>> # Using 1000G reference panel + >>> result = estimate_ld_scores_from_bimfile( + ... bfile_prefix="1000G_EUR_Phase3_plink/1000G.EUR.QC.22", + ... out_prefix="my_ldscores_chr22", + ... annot_file="immune_genes.22.annot.gz", + ... print_snps="hm3_snps.txt", + ... ) + """ + if runner is None: + runner = get_ldsc_runner() + + results = {} + + result_file = _run_ldsc_estimate_ld_scores( + bfile_prefix=bfile_prefix, + out_prefix=out_prefix, + ld_wind_cm=ld_wind_cm, + ld_wind_kb=ld_wind_kb, + ld_wind_snp=ld_wind_snp, + annot_file=annot_file, + thin_annot=thin_annot, + print_snps=print_snps, + maf_min=maf_min, + run=run, + runner=runner, + **kwargs, + ) + + if run: + results["ld_scores_file"] = result_file + results["files_created"] = [ + f"{out_prefix}.l2.ldscore.gz", + f"{out_prefix}.l2.M", + f"{out_prefix}.l2.M_5_50", + f"{out_prefix}.log", + ] + else: + results["command"] = result_file + + return results + + +def estimate_ld_scores_from_donor_data( + dd: DonorData, + out_prefix: str = "ldscores", + ld_wind_cm: float = 1.0, + ld_wind_kb: int | None = None, + ld_wind_snp: int | None = None, + annot_file: str | None = None, + thin_annot: bool = False, + print_snps: str | None = None, + maf_min: float = 0.01, + cleanup_files: bool = True, + plink_export_kwargs: dict | None = None, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Estimate LD scores from DonorData object + + This convenience function exports genotype data from DonorData to PLINK format, + then computes LD scores. + + Parameters + ---------- + dd : DonorData + DonorData object containing genotype information + out_prefix : str, default "ldscores" + Prefix for output files (also used for temporary PLINK files) + cleanup_files : bool, default True + Whether to remove temporary PLINK files after computing LD scores + plink_export_kwargs : dict, optional + Additional keyword arguments to pass to to_plink() + ... (other parameters as in estimate_ld_scores_from_bimfile) + + Returns + ------- + dict + Results dictionary (same as estimate_ld_scores_from_bimfile) + + Examples + -------- + >>> result = estimate_ld_scores_from_donor_data( + ... dd=my_donor_data, out_prefix="my_ldscores", annot_file="immune_genes.annot.gz", ld_wind_cm=1.0 + ... ) + """ + if runner is None: + runner = get_ldsc_runner() + + if plink_export_kwargs is None: + plink_export_kwargs = {} + + logger.info("Exporting genotype data to PLINK format for LD score estimation") + to_plink(dd.G, out_prefix, **plink_export_kwargs) + + results = estimate_ld_scores_from_bimfile( + bfile_prefix=out_prefix, + out_prefix=out_prefix, + ld_wind_cm=ld_wind_cm, + ld_wind_kb=ld_wind_kb, + ld_wind_snp=ld_wind_snp, + annot_file=annot_file, + thin_annot=thin_annot, + print_snps=print_snps, + maf_min=maf_min, + run=run, + runner=runner, + **kwargs, + ) + + if cleanup_files and run: + extensions = [".bim", ".fam", ".bed"] + for ext in extensions: + filename = out_prefix + ext + if os.path.isfile(filename): + os.remove(filename) + logger.info(f"Cleaned up file: {filename}") + + return results + + +def _run_ldsc_heritability( + sumstats_file: str, + ref_ld_chr: str, + w_ld_chr: str, + out_prefix: str, + overlap_annot: bool = False, + frqfile_chr: str | None = None, + not_m_5_50: bool = False, + print_coefficients: bool = False, + print_delete_vals: bool = False, + samp_prev: float | None = None, + pop_prev: float | None = None, + intercept_h2: float | None = None, + no_intercept: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> str | None: + """ + Estimate SNP heritability using LD Score regression + """ + if runner is None: + runner = get_ldsc_runner() + + cmd = ( + f"{runner.ldsc_command} --h2 {sumstats_file} --ref-ld-chr {ref_ld_chr} --w-ld-chr {w_ld_chr} --out {out_prefix}" + ) + + if overlap_annot: + cmd += " --overlap-annot" + if frqfile_chr is None: + logger.warning("--overlap-annot requires --frqfile-chr") + + if frqfile_chr is not None: + cmd += f" --frqfile-chr {frqfile_chr}" + + if not_m_5_50: + cmd += " --not-M-5-50" + + if print_coefficients: + cmd += " --print-coefficients" + + if print_delete_vals: + cmd += " --print-delete-vals" + + if samp_prev is not None: + cmd += f" --samp-prev {samp_prev}" + + if pop_prev is not None: + cmd += f" --pop-prev {pop_prev}" + + if intercept_h2 is not None: + cmd += f" --intercept-h2 {intercept_h2}" + + if no_intercept: + cmd += " --no-intercept" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = [sumstats_file, ref_ld_chr, w_ld_chr] + if frqfile_chr: + file_paths.append(frqfile_chr) + + if run: + logger.info(f"Estimating heritability: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + return f"{out_prefix}.log" + else: + return runner._build_container_command(cmd, file_paths) + + +def estimate_heritability( + sumstats_file: str, + ref_ld_chr: str, + w_ld_chr: str, + out_prefix: str, + overlap_annot: bool = False, + frqfile_chr: str | None = None, + not_m_5_50: bool = False, + print_coefficients: bool = False, + print_delete_vals: bool = False, + samp_prev: float | None = None, + pop_prev: float | None = None, + intercept_h2: float | None = None, + no_intercept: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Estimate SNP heritability using LD Score regression + + Convenience wrapper around run_ldsc_heritability with validation and + structured output. + + Parameters + ---------- + sumstats_file : str + Path to munged summary statistics file (.sumstats.gz) + ref_ld_chr : str + Prefix for reference LD scores (with @, e.g., "baseline.") + w_ld_chr : str + Prefix for regression weights (with @, e.g., "weights.") + out_prefix : str + Prefix for output files + overlap_annot : bool, default False + Use overlapping annotation model + frqfile_chr : str, optional + Prefix for allele frequency files (required with overlap_annot) + not_m_5_50 : bool, default False + Don't restrict to common SNPs for estimating h2 + print_coefficients : bool, default False + Print coefficient estimates + print_delete_vals : bool, default False + Print delete values + samp_prev : float, optional + Sample prevalence (for binary traits) + pop_prev : float, optional + Population prevalence (for binary traits) + intercept_h2 : float, optional + Constrain the LD Score regression intercept + no_intercept : bool, default False + Force intercept to 1 + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use + **kwargs + Additional arguments passed to ldsc.py + + Returns + ------- + dict + Results dictionary with: + - 'log_file': Path to log file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + >>> result = estimate_heritability( + ... sumstats_file="height_munged.sumstats.gz", + ... ref_ld_chr="baseline_v1.2/baseline.", + ... w_ld_chr="weights_hm3_no_hla/weights.", + ... out_prefix="height_h2", + ... ) + """ + if runner is None: + runner = get_ldsc_runner() + + if not sumstats_file: + raise ValueError("sumstats_file is required") + if not ref_ld_chr: + raise ValueError("ref_ld_chr is required") + if not w_ld_chr: + raise ValueError("w_ld_chr is required") + + results = {} + + result_file = _run_ldsc_heritability( + sumstats_file=sumstats_file, + ref_ld_chr=ref_ld_chr, + w_ld_chr=w_ld_chr, + out_prefix=out_prefix, + overlap_annot=overlap_annot, + frqfile_chr=frqfile_chr, + not_m_5_50=not_m_5_50, + print_coefficients=print_coefficients, + print_delete_vals=print_delete_vals, + samp_prev=samp_prev, + pop_prev=pop_prev, + intercept_h2=intercept_h2, + no_intercept=no_intercept, + run=run, + runner=runner, + **kwargs, + ) + + if run: + results["log_file"] = result_file + results["files_created"] = [f"{out_prefix}.log"] + else: + results["command"] = result_file + + return results + + +def _run_ldsc_genetic_correlation( + sumstats_files: list[str], + ref_ld_chr: str, + w_ld_chr: str, + out_prefix: str, + overlap_annot: bool = False, + frqfile_chr: str | None = None, + not_m_5_50: bool = False, + print_coefficients: bool = False, + print_delete_vals: bool = False, + samp_prev: list[float] | None = None, + pop_prev: list[float] | None = None, + intercept_h2: list[float] | None = None, + intercept_gencov: list[float] | None = None, + no_intercept: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> str | None: + """ + Estimate genetic correlation using LD Score regression + """ + if runner is None: + runner = get_ldsc_runner() + + sumstats_str = ",".join(sumstats_files) + cmd = ( + f"{runner.ldsc_command} --rg {sumstats_str} --ref-ld-chr {ref_ld_chr} --w-ld-chr {w_ld_chr} --out {out_prefix}" + ) + + if overlap_annot: + cmd += " --overlap-annot" + if frqfile_chr is None: + logger.warning("--overlap-annot requires --frqfile-chr") + + if frqfile_chr is not None: + cmd += f" --frqfile-chr {frqfile_chr}" + + if not_m_5_50: + cmd += " --not-M-5-50" + + if print_coefficients: + cmd += " --print-coefficients" + + if print_delete_vals: + cmd += " --print-delete-vals" + + if samp_prev is not None: + samp_prev_str = ",".join([str(x) if x is not None else "nan" for x in samp_prev]) + cmd += f" --samp-prev {samp_prev_str}" + + if pop_prev is not None: + pop_prev_str = ",".join([str(x) if x is not None else "nan" for x in pop_prev]) + cmd += f" --pop-prev {pop_prev_str}" + + if intercept_h2 is not None: + intercept_h2_str = ",".join([str(x) for x in intercept_h2]) + cmd += f" --intercept-h2 {intercept_h2_str}" + + if intercept_gencov is not None: + intercept_gencov_str = ",".join([str(x).replace("-", "N") for x in intercept_gencov]) + cmd += f" --intercept-gencov {intercept_gencov_str}" + + if no_intercept: + cmd += " --no-intercept" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = sumstats_files + [ref_ld_chr, w_ld_chr] + if frqfile_chr: + file_paths.append(frqfile_chr) + + if run: + logger.info(f"Estimating genetic correlation: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + return f"{out_prefix}.log" + else: + return runner._build_container_command(cmd, file_paths) + + +def estimate_genetic_correlation( + sumstats_files: list[str], + ref_ld_chr: str, + w_ld_chr: str, + out_prefix: str, + overlap_annot: bool = False, + frqfile_chr: str | None = None, + not_m_5_50: bool = False, + print_coefficients: bool = False, + print_delete_vals: bool = False, + samp_prev: list[float] | None = None, + pop_prev: list[float] | None = None, + intercept_h2: list[float] | None = None, + intercept_gencov: list[float] | None = None, + no_intercept: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Estimate genetic correlation using LD Score regression + + Convenience wrapper around run_ldsc_genetic_correlation with validation + and structured output. + + Parameters + ---------- + sumstats_files : list[str] + List of paths to munged summary statistics files (.sumstats.gz) + ref_ld_chr : str + Prefix for reference LD scores (with @, e.g., "baseline.") + w_ld_chr : str + Prefix for regression weights (with @, e.g., "weights.") + out_prefix : str + Prefix for output files + overlap_annot : bool, default False + Use overlapping annotation model + frqfile_chr : str, optional + Prefix for allele frequency files (required with overlap_annot) + not_m_5_50 : bool, default False + Don't restrict to common SNPs + print_coefficients : bool, default False + Print coefficient estimates + print_delete_vals : bool, default False + Print delete values + samp_prev : list[float], optional + Sample prevalences for each trait (use None for quantitative traits) + pop_prev : list[float], optional + Population prevalences for each trait + intercept_h2 : list[float], optional + Constrain h2 intercepts for each trait + intercept_gencov : list[float], optional + Constrain genetic covariance intercepts + no_intercept : bool, default False + Force intercepts to 1 and 0 + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use + **kwargs + Additional arguments passed to ldsc.py + + Returns + ------- + dict + Results dictionary with: + - 'log_file': Path to log file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + >>> result = estimate_genetic_correlation( + ... sumstats_files=["height_munged.sumstats.gz", "bmi_munged.sumstats.gz"], + ... ref_ld_chr="baseline_v1.2/baseline.", + ... w_ld_chr="weights_hm3_no_hla/weights.", + ... out_prefix="height_bmi_rg", + ... ) + """ + if runner is None: + runner = get_ldsc_runner() + + if not sumstats_files or len(sumstats_files) < 2: + raise ValueError("sumstats_files must contain at least 2 files for genetic correlation") + if not ref_ld_chr: + raise ValueError("ref_ld_chr is required") + if not w_ld_chr: + raise ValueError("w_ld_chr is required") + + results = {} + + result_file = _run_ldsc_genetic_correlation( + sumstats_files=sumstats_files, + ref_ld_chr=ref_ld_chr, + w_ld_chr=w_ld_chr, + out_prefix=out_prefix, + overlap_annot=overlap_annot, + frqfile_chr=frqfile_chr, + not_m_5_50=not_m_5_50, + print_coefficients=print_coefficients, + print_delete_vals=print_delete_vals, + samp_prev=samp_prev, + pop_prev=pop_prev, + intercept_h2=intercept_h2, + intercept_gencov=intercept_gencov, + no_intercept=no_intercept, + run=run, + runner=runner, + **kwargs, + ) + + if run: + results["log_file"] = result_file + results["files_created"] = [f"{out_prefix}.log"] + else: + results["command"] = result_file + + return results + + +def _run_ldsc_make_annot( + bimfile: str, + annot_file: str, + gene_set_file: str | None = None, + gene_coord_file: str | None = None, + windowsize: int | None = None, + bed_file: str | None = None, + nomerge: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> str | None: + """ + Internal function to create annotation files using make_annot.py + + Either gene_set_file or bed_file must be provided. + Returns annot_file path if run=True, otherwise command string. + """ + if runner is None: + runner = get_ldsc_runner() + + if gene_set_file is None and bed_file is None: + raise ValueError("Either gene_set_file or bed_file must be provided") + + cmd = f"{runner.make_annot_command} --bimfile {bimfile} --annot-file {annot_file}" + + if gene_set_file is not None: + cmd += f" --gene-set-file {gene_set_file}" + + if gene_coord_file is not None: + cmd += f" --gene-coord-file {gene_coord_file}" + + if windowsize is not None: + cmd += f" --windowsize {windowsize}" + + if bed_file is not None: + cmd += f" --bed-file {bed_file}" + + if nomerge: + cmd += " --nomerge" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = [bimfile] + if gene_set_file: + file_paths.append(gene_set_file) + if gene_coord_file: + file_paths.append(gene_coord_file) + if bed_file: + file_paths.append(bed_file) + + if run: + logger.info(f"Creating annotation file: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + return annot_file + else: + return runner._build_container_command(cmd, file_paths) + + +def _expand_annot_to_full_format(bimfile: str, annot_file: str) -> None: + """ + Post-process a make_annot.py output from ANNOT-only to CHR/BP/SNP/CM/ANNOT format. + + make_annot.py writes a single-column file (header: ANNOT, values: 0/1). + This function reads the matching bimfile, prepends the SNP coordinate columns, + and rewrites the annotation file in place so binary and continuous annotation files + share the same format. + + Does nothing if the file already contains more than one column (idempotent). + """ + annot = pd.read_csv(annot_file, sep="\t") + if annot.shape[1] > 1: + return # already full format + + bim = pd.read_csv( + bimfile, sep="\t", header=None, + names=["CHR", "SNP", "CM", "BP", "A1", "A2"], + ) + full = bim[["CHR", "BP", "SNP", "CM"]].copy() + full["ANNOT"] = annot["ANNOT"].values + compression = "gzip" if annot_file.endswith(".gz") else None + full.to_csv(annot_file, sep="\t", index=False, compression=compression) + logger.info(f"Expanded annotation to full format: {annot_file}") + + +def make_annot_from_bimfile( + bimfile: str, + annot_file: str, + gene_set_file: str | None = None, + gene_coord_file: str | None = None, + windowsize: int | None = None, + bed_file: str | None = None, + nomerge: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Create annotation file from a PLINK bimfile + + This function creates binary annotation files that indicate which SNPs belong to + specific genomic regions or gene sets. These annotations can be used with LDSC + to compute category-specific LD scores. Works with any PLINK bimfile, including + standard reference panels like 1000 Genomes. + + Parameters + ---------- + bimfile : str + Path to PLINK .bim file (e.g., from 1000 Genomes reference panel). + This defines the SNPs for which annotations will be created. + annot_file : str + The name of the annot file to output. Should typically end in .annot or .annot.gz + gene_set_file : str, optional + A file of gene names, one line per gene. Used for gene-set based annotations. + Either this or bed_file must be provided. + gene_coord_file : str, optional + A file with columns GENE, CHR, START, and END, where START and END are + base pair coordinates of TSS and TES. This file can contain more genes + than are in the gene set. Default ENSG_coord.txt is provided by LDSC. + Only used with gene_set_file. + windowsize : int, optional + How many base pairs to add around the transcribed region to make the annotation. + Only used with gene-set based annotations. Typical values: 0-500000 (0-500kb). + bed_file : str, optional + The UCSC bed file with the regions that make up your annotation. + Used for region-based annotations. Either this or gene_set_file must be provided. + nomerge : bool, default False + Don't merge the bed file; make an annot file with values proportional to + the number of intervals in the bedfile overlapping the SNP. Only used with bed_file. + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to make_annot.py + + Returns + ------- + dict + Results dictionary containing: + - 'annot_file': Path to the created annotation file + - 'files_created': List of files created (if run=True) + - 'command': Command string (if run=False) + + Raises + ------ + ValueError + If neither gene_set_file nor bed_file is provided + + Examples + -------- + Gene-set based annotation for chromosome 22: + >>> result = make_annot_from_bimfile( + ... bimfile="1000G_EUR_Phase3_plink/1000G.EUR.QC.22.bim", + ... annot_file="immune_genes.22.annot.gz", + ... gene_set_file="immune_genes.txt", + ... gene_coord_file="ENSG_coord.txt", + ... windowsize=100000, + ... ) + + BED-file based annotation for enhancer regions: + >>> result = make_annot_from_bimfile( + ... bimfile="1000G.EUR.QC.1.bim", annot_file="enhancers.1.annot.gz", bed_file="enhancers.bed" + ... ) + + Generate command without running: + >>> result = make_annot_from_bimfile( + ... bimfile="1000G.EUR.QC.22.bim", + ... annot_file="my_annot.22.annot.gz", + ... gene_set_file="my_genes.txt", + ... gene_coord_file="ENSG_coord.txt", + ... windowsize=50000, + ... run=False, + ... ) + >>> print(result["command"]) + + Notes + ----- + - Either gene_set_file or bed_file must be provided, but not both + - gene_coord_file and windowsize are only used with gene_set_file + - nomerge is only used with bed_file + - The output annotation file has columns CHR, BP, SNP, CM, ANNOT (integer 0/1), + the same full format written by :func:`make_continuous_annot_from_bimfile` + - For whole-genome analyses, this should be run separately for each chromosome + - Typical workflow: Create annotations for chr 1-22, then compute LD scores + for each chromosome using these annotations + + **Interchangeability with** :func:`make_continuous_annot_from_bimfile` + + Both functions write the same five-column format (CHR, BP, SNP, CM, ANNOT), so + the downstream :func:`compute_ld_scores_with_annotations_from_bimfile` call is + identical regardless of annotation type: + + >>> # Binary workflow + >>> make_annot_from_bimfile(bimfile=..., annot_file="ct.22.annot.gz", + ... gene_set_file=..., gene_coord_file=..., windowsize=100_000) + >>> # Continuous workflow + >>> make_continuous_annot_from_bimfile(bimfile=..., scores=..., + ... annot_file="ct.22.annot.gz", + ... gene_coord_file=...) + >>> # Identical downstream call for both annotation types: + >>> compute_ld_scores_with_annotations_from_bimfile( + ... bfile_prefix=..., annot_file="ct.22.annot.gz", out_prefix=... + ... ) + + See Also + -------- + make_annot_from_donor_data : Create binary annotations from DonorData object. + make_continuous_annot_from_bimfile : Continuous annotation variant. + compute_ld_scores_with_annotations_from_bimfile : Next step after annotation. + """ + if runner is None: + runner = get_ldsc_runner() + + results = {"annot_file": annot_file, "files_created": []} + + result_file = _run_ldsc_make_annot( + bimfile=bimfile, + annot_file=annot_file, + gene_set_file=gene_set_file, + gene_coord_file=gene_coord_file, + windowsize=windowsize, + bed_file=bed_file, + nomerge=nomerge, + run=run, + runner=runner, + **kwargs, + ) + + if run: + _expand_annot_to_full_format(bimfile, annot_file) + results["annot_file"] = result_file + results["files_created"].append(annot_file) + else: + results["command"] = result_file + + return results + + +def make_annot_from_donor_data( + dd: DonorData, + annot_file: str, + gene_set_file: str | None = None, + gene_coord_file: str | None = None, + windowsize: int | None = None, + bed_file: str | None = None, + nomerge: bool = False, + out_prefix: str = "ldsc_annot", + run: bool = True, + cleanup_files: bool = True, + plink_export_kwargs: dict | None = None, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Create annotation file from DonorData object + + This convenience function exports genotype data from a DonorData object to PLINK + format, then creates binary annotation files that indicate which SNPs belong to + specific genomic regions or gene sets. These annotations can be used with LDSC + to compute category-specific LD scores. + + Parameters + ---------- + dd : DonorData + DonorData object containing genotype information + annot_file : str + The name of the annot file to output. Should typically end in .annot or .annot.gz + gene_set_file : str, optional + A file of gene names, one line per gene. Used for gene-set based annotations. + Either this or bed_file must be provided. + gene_coord_file : str, optional + A file with columns GENE, CHR, START, and END, where START and END are + base pair coordinates of TSS and TES. This file can contain more genes + than are in the gene set. Default ENSG_coord.txt is provided by LDSC. + Only used with gene_set_file. + windowsize : int, optional + How many base pairs to add around the transcribed region to make the annotation. + Only used with gene-set based annotations. Typical values: 0-500000 (0-500kb). + Common choices: + - 0: Only SNPs within gene body + - 10000: ±10kb around gene + - 100000: ±100kb around gene (default in many studies) + bed_file : str, optional + The UCSC bed file with the regions that make up your annotation. + Used for region-based annotations. Either this or gene_set_file must be provided. + nomerge : bool, default False + Don't merge the bed file; make an annot file with values proportional to + the number of intervals in the bedfile overlapping the SNP. Only used with bed_file. + out_prefix : str, default "ldsc_annot" + Prefix for temporary PLINK files created during export + run : bool, default True + Whether to execute the command or just return it + cleanup_files : bool, default True + Whether to remove temporary PLINK files after creating annotations. + If True, removes {out_prefix}.bed, .bim, and .fam files. + plink_export_kwargs : dict, optional + Additional keyword arguments to pass to to_plink() + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to make_annot.py + + Returns + ------- + dict + Results dictionary containing: + - 'annot_file': Path to the created annotation file + - 'files_created': List of files created (if run=True) + - 'command': Command string (if run=False) + + Raises + ------ + ValueError + If neither gene_set_file nor bed_file is provided + + Examples + -------- + Create gene-set annotation from DonorData: + >>> result = make_annot_from_donor_data( + ... dd=my_donor_data, + ... annot_file="immune_genes.annot.gz", + ... gene_set_file="immune_genes.txt", + ... gene_coord_file="ENSG_coord.txt", + ... windowsize=100000, + ... ) + + Create BED-file annotation for regulatory regions: + >>> result = make_annot_from_donor_data(dd=my_donor_data, annot_file="enhancers.annot.gz", bed_file="enhancers.bed") + + Keep temporary PLINK files for inspection: + >>> result = make_annot_from_donor_data( + ... dd=my_donor_data, + ... annot_file="my_annot.annot.gz", + ... gene_set_file="my_genes.txt", + ... gene_coord_file="ENSG_coord.txt", + ... windowsize=50000, + ... cleanup_files=False, + ... ) + + Generate command without running: + >>> result = make_annot_from_donor_data( + ... dd=my_donor_data, + ... annot_file="my_annot.annot.gz", + ... gene_set_file="my_genes.txt", + ... gene_coord_file="ENSG_coord.txt", + ... windowsize=100000, + ... run=False, + ... ) + >>> print(result["command"]) + + Notes + ----- + - This function exports dd.G to PLINK format, creates the annotation, + then optionally cleans up the temporary PLINK files + - Either gene_set_file or bed_file must be provided, but not both + - The output annotation file has columns CHR, BP, SNP, CM, ANNOT (integer 0/1), + the same full format as :func:`make_continuous_annot_from_donor_data` + - gene_coord_file should contain coordinates for all genes you might annotate, + not just those in your specific gene set + - For gene-based annotations, the annotation includes SNPs within windowsize bp + of the transcribed region (TSS to TES) + - Temporary PLINK files are created in the current directory and cleaned up by + default, but you can set cleanup_files=False to keep them + + See Also + -------- + make_annot_from_bimfile : Create annotations from existing PLINK bimfile + estimate_ld_scores_from_donor_data : Compute LD scores from DonorData + """ + if plink_export_kwargs is None: + plink_export_kwargs = {} + + logger.info("Exporting genotype data to PLINK format for annotation creation") + to_plink(dd.G, out_prefix, **plink_export_kwargs) + bimfile = f"{out_prefix}.bim" + + results = _run_ldsc_make_annot( + bimfile=bimfile, + annot_file=annot_file, + gene_set_file=gene_set_file, + gene_coord_file=gene_coord_file, + windowsize=windowsize, + bed_file=bed_file, + nomerge=nomerge, + run=run, + runner=runner, + **kwargs, + ) + + if run: + _expand_annot_to_full_format(bimfile, annot_file) + + if cleanup_files and run: + extensions = [".bim", ".fam", ".bed"] + for ext in extensions: + filename = out_prefix + ext + if os.path.isfile(filename): + os.remove(filename) + logger.info(f"Cleaned up file: {filename}") + + return results + + +def compute_ld_scores_with_annotations_from_bimfile( + bfile_prefix: str, + annot_file: str, + out_prefix: str, + ld_wind_cm: float = 1.0, + ld_wind_kb: int | None = None, + ld_wind_snp: int | None = None, + print_snps: str | None = None, + thin_annot: bool = False, + maf_min: float = 0.01, + yes_really: bool = True, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Compute LD scores with cell-type-specific annotations from PLINK bfile + + This is the first step in cell-type-specific LDSC analysis. It computes + LD scores for SNPs while incorporating cell-type-specific gene annotations. + This function should be run for each chromosome and each cell type. + + Works with any PLINK bfile, including standard reference panels like 1000 Genomes. + + Parameters + ---------- + bfile_prefix : str + Path to PLINK binary files (without .bed/.bim/.fam extension). + Typically from 1000 Genomes reference panel, e.g., + "1000G_EUR_Phase3_plink/1000G.EUR.QC.22" + annot_file : str + Path to the annotation file created by ``make_annot_from_bimfile()``, + ``make_annot_from_donor_data()``, ``make_continuous_annot_from_bimfile()``, + or ``make_continuous_annot_from_donor_data()``. + Should end in ``.annot.gz``. Example: ``"CD8_Naive.22.annot.gz"`` + out_prefix : str + Prefix for output files. Will create: + - {out_prefix}.l2.ldscore.gz (LD scores) + - {out_prefix}.l2.M (number of SNPs) + - {out_prefix}.l2.M_5_50 (number of common SNPs) + - {out_prefix}.log (log file) + ld_wind_cm : float, default 1.0 + LD window size in centiMorgans. Only one of ld_wind_cm, ld_wind_kb, + or ld_wind_snp can be specified. + ld_wind_kb : int, optional + LD window size in kilobases (alternative to ld_wind_cm) + ld_wind_snp : int, optional + LD window size in number of SNPs (alternative to ld_wind_cm) + print_snps : str, optional + Path to file with SNP IDs (one per row) to restrict LD score computation. + Commonly used with HapMap3 SNPs (e.g., "hapmap3_snps/hm.22.snp"). + The sum r^2 will still include all SNPs, but only listed SNPs will + have LD scores computed. + thin_annot : bool, default False + Whether the annotation file contains only the annotation column(s) with no + leading CHR / BP / SNP / CM columns. All annotation functions in this + module (``make_annot_from_bimfile``, ``make_annot_from_donor_data``, + ``make_continuous_annot_from_bimfile``, ``make_continuous_annot_from_donor_data``) + write the full five-column format (CHR, BP, SNP, CM, ANNOT), so the default + ``False`` is correct for all of them. Set to ``True`` only if you are passing + a manually prepared thin-annot file. + maf_min : float, default 0.01 + Minimum minor allele frequency threshold + yes_really : bool, default True + Required flag for computing whole-chromosome LD scores + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to ldsc.py + + Returns + ------- + dict + Results dictionary containing: + - 'ld_scores_file': Path to LD scores file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + Basic usage for chromosome 22: + >>> result = compute_ld_scores_with_annotations_from_bimfile( + ... bfile_prefix="1000G_EUR_Phase3_plink/1000G.EUR.QC.22", + ... annot_file="CD8_Naive.22.annot.gz", + ... out_prefix="CD8_Naive.22", + ... print_snps="hapmap3_snps/hm.22.snp", + ... ) + + For all chromosomes (in a loop): + >>> for chrom in range(1, 23): + ... result = compute_ld_scores_with_annotations_from_bimfile( + ... bfile_prefix=f"1000G_EUR/1000G.EUR.QC.{chrom}", + ... annot_file=f"CD8_Naive.{chrom}.annot.gz", + ... out_prefix=f"CD8_Naive.{chrom}", + ... print_snps=f"hapmap3_snps/hm.{chrom}.snp", + ... ) + + Just generate command without running: + >>> result = compute_ld_scores_with_annotations_from_bimfile( + ... bfile_prefix="1000G.EUR.QC.22", annot_file="CD8_Naive.22.annot.gz", out_prefix="CD8_Naive.22", run=False + ... ) + >>> print(result["command"]) + + Notes + ----- + - This function is specifically for cell-type-specific analysis workflow + - Should be run separately for each chromosome (1-22) + - The annotation file should be created first using make_annot_from_donor_data() + or make_annot_from_bimfile() + - print_snps is typically used to restrict to HapMap3 SNPs for better + matching with standard reference LD scores + - After computing LD scores for all chromosomes, use + estimate_celltype_specific_heritability() for the actual analysis + + See Also + -------- + compute_ld_scores_with_annotations_from_donor_data : Compute from DonorData + make_annot_from_donor_data : Create annotations from DonorData + estimate_celltype_specific_heritability : Run cell-type-specific analysis + """ + if runner is None: + runner = get_ldsc_runner() + + cmd = f"{runner.ldsc_command} --l2 --bfile {bfile_prefix} --annot {annot_file} --out {out_prefix}" + + flags = [ld_wind_kb, ld_wind_snp, ld_wind_cm] + non_null_flags = sum(f is not None for f in flags) + + if non_null_flags > 1: + raise ValueError("Only one of ld_wind_kb, ld_wind_snp, or ld_wind_cm may be specified.") + + if ld_wind_kb is not None: + cmd += f" --ld-wind-kb {ld_wind_kb}" + elif ld_wind_snp is not None: + cmd += f" --ld-wind-snp {ld_wind_snp}" + else: + cmd += f" --ld-wind-cm {ld_wind_cm}" + + if thin_annot: + cmd += " --thin-annot" + + if print_snps is not None: + cmd += f" --print-snps {print_snps}" + + if maf_min != 0.01: + cmd += f" --maf {maf_min}" + + if yes_really: + cmd += " --yes-really" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = [f"{bfile_prefix}.bed", f"{bfile_prefix}.bim", f"{bfile_prefix}.fam", annot_file] + if print_snps: + file_paths.append(print_snps) + + if run: + logger.info(f"Computing LD scores with annotations: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + + return { + "ld_scores_file": f"{out_prefix}.l2.ldscore.gz", + "files_created": [ + f"{out_prefix}.l2.ldscore.gz", + f"{out_prefix}.l2.M", + f"{out_prefix}.l2.M_5_50", + f"{out_prefix}.log", + ], + } + else: + return {"command": runner._build_container_command(cmd, file_paths)} + + +def compute_ld_scores_with_annotations_from_donor_data( + dd: DonorData, + annot_file: str, + out_prefix: str = "ldscores_annot", + ld_wind_cm: float = 1.0, + ld_wind_kb: int | None = None, + ld_wind_snp: int | None = None, + print_snps: str | None = None, + thin_annot: bool = False, + maf_min: float = 0.01, + yes_really: bool = True, + cleanup_files: bool = True, + plink_export_kwargs: dict | None = None, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Compute LD scores with cell-type-specific annotations from DonorData object + + This convenience function exports genotype data from DonorData to PLINK format, + then computes LD scores with cell-type-specific annotations. This is useful when + you want to compute LD scores from your own genotype data rather than using a + reference panel like 1000 Genomes. + + Parameters + ---------- + dd : DonorData + DonorData object containing genotype information + annot_file : str + Path to the annotation file created by make_annot_from_donor_data() + or make_annot_from_bimfile(). Should end in .annot.gz + Example: "CD8_Naive.annot.gz" + out_prefix : str, default "ldscores_annot" + Prefix for output files (also used for temporary PLINK files). + Will create: + - {out_prefix}.l2.ldscore.gz (LD scores) + - {out_prefix}.l2.M (number of SNPs) + - {out_prefix}.l2.M_5_50 (number of common SNPs) + - {out_prefix}.log (log file) + ld_wind_cm : float, default 1.0 + LD window size in centiMorgans. Only one of ld_wind_cm, ld_wind_kb, + or ld_wind_snp can be specified. + ld_wind_kb : int, optional + LD window size in kilobases (alternative to ld_wind_cm) + ld_wind_snp : int, optional + LD window size in number of SNPs (alternative to ld_wind_cm) + print_snps : str, optional + Path to file with SNP IDs (one per row) to restrict LD score computation. + Commonly used with HapMap3 SNPs (e.g., "hapmap3_snps/hm.22.snp"). + thin_annot : bool, default True + Assume annotation files only have annotations (no SNP, CM, CHR, BP columns). + Should typically be True for annotations created by make_annot functions. + maf_min : float, default 0.01 + Minimum minor allele frequency threshold + yes_really : bool, default True + Required flag for computing whole-chromosome LD scores + cleanup_files : bool, default True + Whether to remove temporary PLINK files after computing LD scores. + If True, removes {out_prefix}.bed, .bim, and .fam files. + plink_export_kwargs : dict, optional + Additional keyword arguments to pass to to_plink() + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to ldsc.py + + Returns + ------- + dict + Results dictionary containing: + - 'ld_scores_file': Path to LD scores file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + Basic usage: + >>> result = compute_ld_scores_with_annotations_from_donor_data( + ... dd=my_donor_data, + ... annot_file="CD8_Naive.annot.gz", + ... out_prefix="CD8_Naive_ldscores", + ... print_snps="hapmap3_snps.txt", + ... ) + + Complete workflow for cell-type analysis: + >>> # 1. Create annotation from DonorData + >>> annot_result = make_annot_from_donor_data( + ... dd=my_donor_data, + ... annot_file="CD8_Naive.annot.gz", + ... gene_set_file="CD8_Naive.GeneSet", + ... gene_coord_file="gene_coords.txt", + ... windowsize=100000, + ... ) + + >>> # 2. Compute LD scores with annotations + >>> ldsc_result = compute_ld_scores_with_annotations_from_donor_data( + ... dd=my_donor_data, annot_file="CD8_Naive.annot.gz", out_prefix="CD8_Naive_ldscores" + ... ) + + Keep temporary PLINK files: + >>> result = compute_ld_scores_with_annotations_from_donor_data( + ... dd=my_donor_data, annot_file="immune_genes.annot.gz", out_prefix="my_ldscores", cleanup_files=False + ... ) + + Just generate command: + >>> result = compute_ld_scores_with_annotations_from_donor_data( + ... dd=my_donor_data, annot_file="CD8_Naive.annot.gz", out_prefix="CD8_Naive_ldscores", run=False + ... ) + >>> print(result["command"]) + + Notes + ----- + - This function exports dd.G to PLINK format, computes LD scores with annotations, + then optionally cleans up the temporary PLINK files + - The annotation file must match the SNPs in the DonorData object + - Typically used when you have your own genotype data and want to compute + custom LD scores rather than using pre-computed reference LD scores + - For standard cell-type-specific heritability analysis, it's more common to use + compute_ld_scores_with_annotations_from_bimfile() with 1000 Genomes data + - Temporary PLINK files are created in the current directory and cleaned up by + default, but you can set cleanup_files=False to keep them + + See Also + -------- + compute_ld_scores_with_annotations_from_bimfile : Compute from existing PLINK files + make_annot_from_donor_data : Create annotations from DonorData + estimate_celltype_specific_heritability : Run cell-type-specific analysis + """ + if runner is None: + runner = get_ldsc_runner() + + if plink_export_kwargs is None: + plink_export_kwargs = {} + + logger.info("Exporting genotype data to PLINK format for LD score computation") + to_plink(dd.G, out_prefix, **plink_export_kwargs) + + results = compute_ld_scores_with_annotations_from_bimfile( + bfile_prefix=out_prefix, + annot_file=annot_file, + out_prefix=out_prefix, + ld_wind_cm=ld_wind_cm, + ld_wind_kb=ld_wind_kb, + ld_wind_snp=ld_wind_snp, + print_snps=print_snps, + thin_annot=thin_annot, + maf_min=maf_min, + yes_really=yes_really, + run=run, + runner=runner, + **kwargs, + ) + + if cleanup_files and run: + extensions = [".bim", ".fam", ".bed"] + for ext in extensions: + filename = out_prefix + ext + if os.path.isfile(filename): + os.remove(filename) + logger.info(f"Cleaned up file: {filename}") + + return results + + +def estimate_celltype_specific_heritability( + sumstats_file: str, + ref_ld_chr: str, + w_ld_chr: str, + ref_ld_chr_cts: str, + out_prefix: str, + print_all_cts: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Estimate cell-type-specific heritability using LD Score regression + + This is the second step in cell-type-specific LDSC analysis. It tests whether + SNP heritability is enriched in specific cell types by regressing GWAS summary + statistics against cell-type-specific LD scores. + + This function requires that LD scores with cell-type annotations have already + been computed using compute_ld_scores_with_annotations() for all chromosomes. + + Parameters + ---------- + sumstats_file : str + Path to munged summary statistics file (.sumstats.gz) from munge_sumstats() + ref_ld_chr : str + Prefix for baseline reference LD scores (with @, e.g., "baseline_v1.2/baseline."). + These are the standard LD scores used for controlling confounders. + w_ld_chr : str + Prefix for regression weights (with @, e.g., "weights_hm3_no_hla/weights."). + These are standard weights files from the LDSC resources. + ref_ld_chr_cts : str + Path to control file listing cell-type-specific LD score prefixes. + This file should have two tab-separated columns per line: + - Cell type name + - Prefix for that cell type's LD scores (with @ for chromosome) + + Example file content: + ``` + CD8_Naive cts_ldscores/CD8_Naive. + CD4_Memory cts_ldscores/CD4_Memory. + B_cells cts_ldscores/B_cells. + ``` + + LDSC will look for files like: + cts_ldscores/CD8_Naive.1.l2.ldscore.gz through + cts_ldscores/CD8_Naive.22.l2.ldscore.gz + out_prefix : str + Prefix for output files. Will create: + - {out_prefix}.cell_type_results.txt (main results) + - {out_prefix}.log (log file) + print_all_cts : bool, default False + Print results for all cell types (not just significant ones) + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to ldsc.py + + Returns + ------- + dict + Results dictionary containing: + - 'results_file': Path to cell type results file (if run=True) + - 'log_file': Path to log file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + Basic usage after computing LD scores: + >>> # First create control file + >>> with open("celltype_ldscores.txt", "w") as f: + ... f.write("CD8_Naive\\tcts_ldscores/CD8_Naive.\\n") + ... f.write("CD4_Memory\\tcts_ldscores/CD4_Memory.\\n") + ... f.write("B_cells\\tcts_ldscores/B_cells.\\n") + + >>> # Run cell-type-specific analysis + >>> result = estimate_celltype_specific_heritability( + ... sumstats_file="height_munged.sumstats.gz", + ... ref_ld_chr="baseline_v1.2/baseline.", + ... w_ld_chr="weights_hm3_no_hla/weights.", + ... ref_ld_chr_cts="celltype_ldscores.txt", + ... out_prefix="height_celltype_results", + ... ) + + Complete workflow example: + >>> # 1. Prepare annotations for each cell type and chromosome + >>> for cell_type in ["CD8_Naive", "CD4_Memory"]: + ... for chrom in range(1, 23): + ... make_annot_from_donor_data( + ... dd=dd_chr, + ... annot_file=f"annots/{cell_type}.{chrom}.annot.gz", + ... gene_set_file=f"genesets/{cell_type}.GeneSet", + ... gene_coord_file="gene_coords.txt", + ... ) + + >>> # 2. Compute LD scores for each cell type and chromosome + >>> for cell_type in ["CD8_Naive", "CD4_Memory"]: + ... for chrom in range(1, 23): + ... compute_ld_scores_with_annotations( + ... bfile_prefix=f"1000G/1000G.EUR.QC.{chrom}", + ... annot_file=f"annots/{cell_type}.{chrom}.annot.gz", + ... out_prefix=f"cts_ldscores/{cell_type}.{chrom}", + ... print_snps=f"hapmap3/hm.{chrom}.snp", + ... ) + + >>> # 3. Create control file + >>> with open("celltype_ldscores.txt", "w") as f: + ... f.write("CD8_Naive\\tcts_ldscores/CD8_Naive.\\n") + ... f.write("CD4_Memory\\tcts_ldscores/CD4_Memory.\\n") + + >>> # 4. Run cell-type-specific analysis + >>> result = estimate_celltype_specific_heritability( + ... sumstats_file="disease_munged.sumstats.gz", + ... ref_ld_chr="baseline_v1.2/baseline.", + ... w_ld_chr="weights_hm3_no_hla/weights.", + ... ref_ld_chr_cts="celltype_ldscores.txt", + ... out_prefix="disease_celltype", + ... ) + + Notes + ----- + - This function performs the final cell-type-specific heritability analysis + - Requires baseline LD scores and weights (can be downloaded from LDSC resources) + - The ref_ld_chr_cts file format is critical: tab-separated, cell type name + then prefix with @ or chromosome numbers appended + - Tests whether heritability is enriched in genes specific to each cell type + - Results show coefficient estimates and p-values for each cell type + - Significant positive coefficients indicate heritability enrichment in that cell type + + See Also + -------- + compute_ld_scores_with_annotations : Compute LD scores with annotations + make_annot_from_donor_data : Create cell-type-specific annotations + munge_sumstats : Prepare GWAS summary statistics + """ + if runner is None: + runner = get_ldsc_runner() + + if not sumstats_file: + raise ValueError("sumstats_file is required") + if not ref_ld_chr: + raise ValueError("ref_ld_chr is required") + if not w_ld_chr: + raise ValueError("w_ld_chr is required") + if not ref_ld_chr_cts: + raise ValueError("ref_ld_chr_cts is required") + + cmd = ( + f"{runner.ldsc_command} --h2-cts {sumstats_file} " + f"--ref-ld-chr {ref_ld_chr} " + f"--w-ld-chr {w_ld_chr} " + f"--ref-ld-chr-cts {ref_ld_chr_cts} " + f"--out {out_prefix}" + ) + + if print_all_cts: + cmd += " --print-all-cts" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = [sumstats_file, ref_ld_chr, w_ld_chr, ref_ld_chr_cts] + + if run: + logger.info(f"Running cell-type-specific heritability analysis: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + + return { + "results_file": f"{out_prefix}.cell_type_results.txt", + "log_file": f"{out_prefix}.log", + "files_created": [f"{out_prefix}.cell_type_results.txt", f"{out_prefix}.log"], + } + else: + return {"command": runner._build_container_command(cmd, file_paths)} + + +# --------------------------------------------------------------------------- +# Continuous annotation helpers +# --------------------------------------------------------------------------- + +def _load_gene_coord_file(gene_coord_file: str) -> pd.DataFrame: + """ + Load gene coordinate file in either headed (GENE/CHR/START/END) or + headless (4-column TSV) format. + + Returns a DataFrame with columns: gene, chr, start, end. + """ + sample = pd.read_csv(gene_coord_file, sep="\t", nrows=1) + upper_cols = [c.strip().upper() for c in sample.columns] + if "GENE" in upper_cols and "CHR" in upper_cols: + df = pd.read_csv(gene_coord_file, sep="\t") + df.columns = [c.strip().upper() for c in df.columns] + df = df.rename(columns={"GENE": "gene", "CHR": "chr", "START": "start", "END": "end"}) + else: + df = pd.read_csv( + gene_coord_file, sep="\t", header=None, + names=["gene", "chr", "start", "end"], + ) + return df[["gene", "chr", "start", "end"]] + + +def _compute_continuous_annot_for_bimfile( + bimfile: str, + scores: pd.Series, + gene_coords: pd.DataFrame, + windowsize: int = 100_000, + score_agg: Literal["max", "sum", "mean"] = "max", +) -> pd.DataFrame: + """ + Compute continuous SNP annotations from per-gene scores for one bimfile. + + Each SNP receives the aggregated score of all genes whose ±windowsize bp + window overlaps the SNP position. SNPs with no overlapping gene get 0. + + Parameters + ---------- + bimfile + Path to PLINK .bim file (CHR, SNP, CM, BP, A1, A2). + scores + Per-gene scores indexed by gene ID (must match gene_coords["gene"]). + gene_coords + DataFrame with columns: gene, chr, start, end. + windowsize + Flanking window around each gene body in base pairs. + score_agg + How to combine scores when multiple gene windows overlap a SNP: + "max" (default, matches the combined_pipeline), "sum", or "mean". + + Returns + ------- + pd.DataFrame + Columns: CHR, BP, SNP, CM, ANNOT. + Callers decide which columns to write (thin-annot = ANNOT only, + full format = all five columns). + """ + bim = pd.read_csv( + bimfile, sep="\t", header=None, + names=["CHR", "SNP", "CM", "BP", "A1", "A2"], + ) + chrom = str(bim["CHR"].iloc[0]) + + scores_idx = scores.copy() + scores_idx.index = scores_idx.index.astype(str) + + chr_genes = gene_coords[gene_coords["chr"].astype(str) == chrom].copy() + chr_genes = chr_genes.merge( + scores_idx.rename("score").to_frame(), + left_on="gene", right_index=True, + how="inner", + ) + chr_genes["win_start"] = chr_genes["start"] - windowsize + chr_genes["win_end"] = chr_genes["end"] + windowsize + + bp = bim["BP"].values + score_vals = np.zeros(len(bim), dtype=np.float64) + + if score_agg == "max": + for _, g in chr_genes.iterrows(): + mask = (bp >= g["win_start"]) & (bp <= g["win_end"]) + if mask.any(): + score_vals[mask] = np.maximum(score_vals[mask], g["score"]) + elif score_agg == "sum": + for _, g in chr_genes.iterrows(): + mask = (bp >= g["win_start"]) & (bp <= g["win_end"]) + if mask.any(): + score_vals[mask] += g["score"] + elif score_agg == "mean": + count_vals = np.zeros(len(bim), dtype=np.float64) + for _, g in chr_genes.iterrows(): + mask = (bp >= g["win_start"]) & (bp <= g["win_end"]) + if mask.any(): + score_vals[mask] += g["score"] + count_vals[mask] += 1 + nonzero = count_vals > 0 + score_vals[nonzero] /= count_vals[nonzero] + else: + raise ValueError(f"score_agg must be 'max', 'sum', or 'mean', got '{score_agg}'") + + result = bim[["CHR", "BP", "SNP", "CM"]].copy() + result["ANNOT"] = score_vals + return result + + +def make_continuous_annot_from_bimfile( + bimfile: str, + scores: pd.Series, + annot_file: str, + gene_coord_file: str, + windowsize: int = 100_000, + score_agg: Literal["max", "sum", "mean"] = "max", +) -> dict[str, Any]: + """ + Create a continuous S-LDSC annotation file from per-gene scores and a PLINK bimfile. + + Each SNP is assigned a continuous value equal to the aggregated score of all + genes whose ±``windowsize`` bp window overlaps that SNP position. SNPs with no + overlapping gene receive 0. + + The output file contains columns CHR, BP, SNP, CM, ANNOT — the same full format + written by :func:`make_annot_from_bimfile`, so both annotation types feed into + :func:`compute_ld_scores_with_annotations_from_bimfile` with identical calls. + + Parameters + ---------- + bimfile + Path to PLINK .bim file defining the SNPs for this chromosome. + Typically from the 1000 Genomes reference panel, e.g. + ``"1000G_EUR_Phase3_plink/1000G.EUR.QC.22.bim"``. + scores + Per-gene scores indexed by gene IDs that match those in + ``gene_coord_file`` (typically ENSG IDs). Genes absent from + ``gene_coord_file`` are silently ignored. + annot_file + Output annotation file path. Should end in ``.annot.gz``. + gene_coord_file + Tab-separated gene coordinate file. Two formats are accepted: + + * **Headed** — columns ``GENE``, ``CHR``, ``START``, ``END`` + (as produced by :func:`generate_gene_coord_file`). + * **Headless** — four columns ``gene``, ``chr``, ``start``, ``end`` + (the ``.gene.loc`` format used in the combined_pipeline). + windowsize + Base pairs to extend upstream and downstream of each gene body. + Default 100 000 bp matches the combined_pipeline convention. + score_agg + Aggregation rule when multiple gene windows overlap a SNP: + + * ``"max"`` *(default)* — highest gene score (matches combined_pipeline). + * ``"sum"`` — sum all overlapping gene scores. + * ``"mean"`` — average overlapping gene scores. + + Returns + ------- + dict + ``annot_file`` — path to the written file. + ``files_created`` — list containing that path. + ``n_nonzero_snps`` — number of SNPs that received a non-zero score. + ``n_genes_matched`` — number of genes with both a score and coordinates. + + Examples + -------- + **Interchangeable use with binary annotations** + + Both annotation types write CHR/BP/SNP/CM/ANNOT files, so the downstream call + is identical: + + >>> # Binary annotation + >>> make_annot_from_bimfile( + ... bimfile="1000G.EUR.QC.22.bim", + ... annot_file="ExcL2-3.22.annot.gz", + ... gene_set_file="ExcL2-3.GeneSet", + ... gene_coord_file="ENSG_coord.txt", + ... windowsize=100_000, + ... ) + >>> # Continuous annotation + >>> make_continuous_annot_from_bimfile( + ... bimfile="1000G.EUR.QC.22.bim", + ... scores=specificity_df["ExcL2-3"], + ... annot_file="ExcL2-3.22.annot.gz", + ... gene_coord_file="gencode_v39_grch38_ensg.gene.loc", + ... ) + >>> # Identical downstream call for both annotation types: + >>> compute_ld_scores_with_annotations_from_bimfile( + ... bfile_prefix="1000G.EUR.QC.22", + ... annot_file="ExcL2-3.22.annot.gz", + ... out_prefix="ExcL2-3.22", + ... print_snps="hapmap3_snps/hm.22.snp", + ... ) + + **Loop over all chromosomes:** + + >>> scores = pd.read_csv("seismic/scores.csv", index_col=0)["ExcL2-3"] + >>> for chrom in range(1, 23): + ... make_continuous_annot_from_bimfile( + ... bimfile=f"1000G_EUR/1000G.EUR.QC.{chrom}.bim", + ... scores=scores, + ... annot_file=f"annots/ExcL2-3.{chrom}.annot.gz", + ... gene_coord_file="gencode_v39_grch38_ensg.gene.loc", + ... ) + ... compute_ld_scores_with_annotations_from_bimfile( + ... bfile_prefix=f"1000G_EUR/1000G.EUR.QC.{chrom}", + ... annot_file=f"annots/ExcL2-3.{chrom}.annot.gz", + ... out_prefix=f"annots/ExcL2-3.{chrom}", + ... print_snps=f"hapmap3_snps/hm.{chrom}.snp", + ... ) + + See Also + -------- + make_continuous_annot_from_donor_data : Same workflow from a DonorData object. + make_annot_from_bimfile : Binary annotation variant (calls make_annot.py). + compute_ld_scores_with_annotations_from_bimfile : Next step after annotation. + estimate_celltype_specific_heritability : Final h2-cts step. + """ + gene_coords = _load_gene_coord_file(gene_coord_file) + annot_df = _compute_continuous_annot_for_bimfile( + bimfile=bimfile, + scores=scores, + gene_coords=gene_coords, + windowsize=windowsize, + score_agg=score_agg, + ) + + os.makedirs(os.path.dirname(os.path.abspath(annot_file)), exist_ok=True) + annot_df[["CHR", "BP", "SNP", "CM", "ANNOT"]].to_csv( + annot_file, sep="\t", index=False, compression="gzip" + ) + + n_nonzero = int((annot_df["ANNOT"] != 0).sum()) + n_matched = int(gene_coords["gene"].isin(scores.index.astype(str)).sum()) + logger.info( + f"Wrote continuous annotation: {annot_file} " + f"({n_nonzero:,} non-zero SNPs, {n_matched:,} genes matched)" + ) + + return { + "annot_file": annot_file, + "files_created": [annot_file], + "n_nonzero_snps": n_nonzero, + "n_genes_matched": n_matched, + } + + +def make_continuous_annot_from_donor_data( + dd: DonorData, + scores: pd.Series, + annot_file: str, + gene_coord_file: str, + windowsize: int = 100_000, + score_agg: Literal["max", "sum", "mean"] = "max", + out_prefix: str = "ldsc_continuous_annot", + cleanup_files: bool = True, + plink_export_kwargs: dict | None = None, +) -> dict[str, Any]: + """ + Create a continuous S-LDSC annotation file from per-gene scores and a DonorData object. + + Convenience wrapper that exports genotype data from a + :class:`~cellink._core.DonorData` object to PLINK format, then delegates to + :func:`make_continuous_annot_from_bimfile`. + + The output file contains columns CHR, BP, SNP, CM, ANNOT — the same full format + as :func:`make_annot_from_donor_data`, so both annotation types are + interchangeable downstream. + + Parameters + ---------- + dd + DonorData object containing genotype information. + scores + Per-gene scores indexed by gene IDs matching ``gene_coord_file``. + annot_file + Output annotation file path (e.g. ``"CD8_Naive.22.annot.gz"``). + gene_coord_file + Gene coordinate file — see :func:`make_continuous_annot_from_bimfile`. + windowsize + Flanking window in bp around each gene body (default 100 000). + score_agg + Aggregation for overlapping gene windows: ``"max"``, ``"sum"``, or ``"mean"``. + out_prefix + Prefix for the temporary PLINK files created during export. + cleanup_files + Remove temporary ``.bed``/``.bim``/``.fam`` files after annotation is written. + plink_export_kwargs + Extra keyword arguments forwarded to :func:`~cellink.io.to_plink`. + + Returns + ------- + dict + Same as :func:`make_continuous_annot_from_bimfile`. + + Examples + -------- + >>> result = make_continuous_annot_from_donor_data( + ... dd=my_donor_data, + ... scores=specificity_df["ExcL2-3"], + ... annot_file="annots/ExcL2-3.annot.gz", + ... gene_coord_file="gene_coords.txt", + ... ) + >>> # Same call as for binary make_annot_from_donor_data: + >>> compute_ld_scores_with_annotations_from_donor_data( + ... dd=my_donor_data, + ... annot_file="annots/ExcL2-3.annot.gz", + ... out_prefix="annots/ExcL2-3", + ... ) + + See Also + -------- + make_continuous_annot_from_bimfile : Works directly from a .bim file path. + make_annot_from_donor_data : Binary annotation variant. + """ + if plink_export_kwargs is None: + plink_export_kwargs = {} + + logger.info("Exporting genotype data to PLINK format for continuous annotation creation") + to_plink(dd.G, out_prefix, **plink_export_kwargs) + bimfile = f"{out_prefix}.bim" + + results = make_continuous_annot_from_bimfile( + bimfile=bimfile, + scores=scores, + annot_file=annot_file, + gene_coord_file=gene_coord_file, + windowsize=windowsize, + score_agg=score_agg, + ) + + if cleanup_files: + for ext in [".bim", ".fam", ".bed"]: + path = out_prefix + ext + if os.path.isfile(path): + os.remove(path) + logger.info(f"Cleaned up: {path}") + + return results diff --git a/src/cellink/tl/external/_ldsc2magma.py b/src/cellink/tl/external/_ldsc2magma.py new file mode 100644 index 0000000..f9cd348 --- /dev/null +++ b/src/cellink/tl/external/_ldsc2magma.py @@ -0,0 +1,828 @@ +import logging +import os +import re +import subprocess +import tempfile +from pathlib import Path +from typing import Any + +import pandas as pd + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + +def _safe_name(s: str) -> str: + return re.sub(r"[^A-Za-z0-9._-]+", "_", str(s)).strip("_") + + +def _resolve_gene_map(gene_map: "str | Path | pd.Series | None") -> "pd.Series | None": + """Return a Series indexed by gene symbol with ENSG values, or None.""" + if gene_map is None: + return None + if isinstance(gene_map, pd.Series): + return gene_map + return pd.read_csv(gene_map, sep="\t").set_index("gene_name")["ensg_id"] + + +def _to_ensg(index: pd.Index, gmap: "pd.Series | None") -> pd.Index: + """Map an index through gmap (if provided) and keep only ENSG IDs.""" + idx = index.astype(str) + if gmap is not None: + idx = pd.Index([gmap.get(g, g) for g in idx]) + return idx + + +def _run(cmd: list[str], check: bool = True) -> subprocess.CompletedProcess: + logger.info("Running: %s", " ".join(cmd)) + result = subprocess.run(cmd, capture_output=True, text=True) + if result.stdout: + logger.info(result.stdout) + if result.stderr: + logger.warning(result.stderr) + if check and result.returncode != 0: + raise subprocess.CalledProcessError(result.returncode, cmd, result.stdout, result.stderr) + return result + + +def load_ensembl_to_entrez_map(map_tsv: str | Path) -> pd.Series: + """ + Load a mapping TSV with columns: + ensembl_gene_id entrez_id + Returns a Series indexed by ENSG (upper, no version) with values as string Entrez IDs. + """ + map_tsv = Path(map_tsv) + df = pd.read_csv(map_tsv, sep="\t", dtype=str) + + # Accept a few common header variants + col_ens = None + for c in ["ensembl_gene_id", "ENSG", "ensembl", "gene_id"]: + if c in df.columns: + col_ens = c + break + if col_ens is None: + raise ValueError(f"Mapping file missing Ensembl column. Found: {list(df.columns)}") + + col_ent = None + for c in ["entrez_id", "entrezgene", "entrez", "ENTREZID", "ncbi_gene_id"]: + if c in df.columns: + col_ent = c + break + if col_ent is None: + raise ValueError(f"Mapping file missing Entrez column. Found: {list(df.columns)}") + + ens = df[col_ens].astype(str).str.strip().str.upper().str.replace(r"\..*$", "", regex=True) + ent = df[col_ent].astype(str).str.strip() + + m = pd.Series(ent.values, index=ens.values) + m = m[~m.index.duplicated(keep="first")] + return m + + +def genesets_dir_to_entrez_gmt( + *, + geneset_dir: str | Path = "ldsc_genesets", + out_gmt: str | Path | None = None, + ensembl_to_entrez_tsv: str | Path | None = None, + pattern: str = "*.GeneSet", + description: str = "S-LDSC derived gene set", + include_control: bool = False, + control_name: str = "Control", + remove_version_suffix: bool = True, + uppercase: bool = True, + min_genes: int = 1, + sort_genes: bool = False, + dedup_genes: bool = True, + drop_unmapped: bool = True, + allow_mygene_fallback: bool = False, + species: str = "human", + output_basename: str = "genesets.gmt", +) -> Path: + """ + Convert *.GeneSet -> MAGMA .gmt without ID conversion. + + Defaults: + - reads from ./ldsc_genesets + - writes to a sibling directory ./magma_genesets/genesets.gmt + (magma_genesets is created if needed) + """ + geneset_dir = Path(geneset_dir).resolve() + + # ---- Default output location: sibling magma_genesets next to ldsc_genesets ---- + if out_gmt is None: + magma_dir = geneset_dir.parent / "magma_genesets" + magma_dir.mkdir(parents=True, exist_ok=True) + out_gmt = magma_dir / output_basename + else: + out_gmt = Path(out_gmt) + out_gmt.parent.mkdir(parents=True, exist_ok=True) + + # ---- Find input GeneSet files ---- + files = sorted(geneset_dir.glob(pattern)) + if not include_control: + files = [p for p in files if p.stem != control_name] + if not files: + raise FileNotFoundError(f"No files matched {pattern} in {geneset_dir}") + + if ensembl_to_entrez_tsv is not None: + logger.warning("`ensembl_to_entrez_tsv` is ignored. No Ensembl→Entrez conversion is performed.") + if allow_mygene_fallback: + logger.warning("`allow_mygene_fallback` is ignored. No Ensembl→Entrez conversion is performed.") + if not drop_unmapped: + logger.warning("`drop_unmapped=False` has no effect when conversion is disabled.") + if species != "human": + logger.warning("`species` is ignored. No Ensembl→Entrez conversion is performed.") + + def norm_ens(g: str) -> str: + g = str(g).strip() + if remove_version_suffix: + g = g.split(".", 1)[0] + if uppercase: + g = g.upper() + return g + + def prepare_genes(genes: list[str]) -> list[str]: + """Normalize and optionally deduplicate/sort gene IDs without conversion.""" + genes = [norm_ens(g) for g in genes if str(g).strip()] + + if dedup_genes: + seen = set() + genes = [g for g in genes if not (g in seen or seen.add(g))] + if sort_genes: + genes = sorted(genes) + + return genes + + n_written = 0 + n_skipped = 0 + + with Path(out_gmt).open("w", encoding="utf-8") as out: + for fp in files: + set_name = fp.stem + + with fp.open("r", encoding="utf-8") as f: + genes = [line.strip() for line in f if line.strip()] + + output_genes = prepare_genes(genes) + + if len(output_genes) < min_genes: + logger.warning(f"Skipping {fp.name}: only {len(output_genes)} genes after normalization") + n_skipped += 1 + continue + + row = [set_name, description] + output_genes + out.write("\t".join(row) + "\n") + n_written += 1 + + logger.info(f"Wrote {n_written} gene sets to {out_gmt} (skipped {n_skipped})") + return Path(out_gmt) + + +# --------------------------------------------------------------------------- +# LDSC scores → MAGMA input files +# --------------------------------------------------------------------------- + +def scores_to_gmt( + scores: pd.DataFrame, + out_file: "str | Path", + top_frac: float = 0.10, + ascending: bool = False, + gene_map: "str | Path | pd.Series | None" = None, + set_name_prefix: str = "", + min_genes: int = 1, +) -> Path: + """ + Convert a genes × cell-types score DataFrame to MAGMA GMT format. + + Each cell type becomes one gene set containing the top (or bottom) + ``top_frac`` fraction of genes ranked by score. This is the Python-API + equivalent of ``make_magma_gmt.py`` in the combined_pipeline. + + GMT format: ``set_name\\tNA\\tGENE1\\tGENE2\\t...`` (one line per set). + + Parameters + ---------- + scores + DataFrame with genes as rows and cell types as columns. Index should + contain gene identifiers (ENSG IDs or gene symbols). + out_file + Output ``.gmt`` file path. + top_frac + Fraction of genes to select per cell type (default 0.10 = top 10%). + ascending + If ``True``, select the bottom ``top_frac`` genes (lowest scores) + instead of the top. + gene_map + Optional mapping from gene symbols to ENSG IDs. Accepts: + + * ``str`` or ``Path`` — path to a two-column TSV with headers + ``gene_name`` and ``ensg_id``. + * ``pd.Series`` — index = gene symbol, values = ENSG ID. + + If provided, non-ENSG index entries are translated and rows that still + do not look like ENSG IDs after mapping are dropped. + set_name_prefix + String prepended to every set name (e.g. ``"brainscope_scz_seismic_top"``). + min_genes + Minimum number of genes required to write a gene set. Sets with fewer + genes are skipped with a warning. + + Returns + ------- + Path + Path to the written GMT file. + + Examples + -------- + >>> from cellink.tl.external import scores_to_gmt, run_magma_gsa + >>> gmt = scores_to_gmt(specificity_df, "ExcL23_top10.gmt", + ... set_name_prefix="brainscope_seismic_top") + >>> run_magma_gsa(gene_results="scz.genes.raw", set_annot=str(gmt), + ... out_prefix="results/scz_gsa") + + See Also + -------- + genesets_dir_to_entrez_gmt : Convert LDSC .GeneSet files → GMT. + scores_to_covar : Convert scores to MAGMA gene property covariate file. + run_magma_gsa : Run MAGMA gene-set analysis with the GMT. + """ + out_file = Path(out_file) + out_file.parent.mkdir(parents=True, exist_ok=True) + + gmap = _resolve_gene_map(gene_map) + n_genes = len(scores) + n_select = max(1, int(n_genes * top_frac)) + selection = "bottom" if ascending else "top" + + n_written = n_skipped = 0 + with out_file.open("w", encoding="utf-8") as fh: + for ct in scores.columns: + col = scores[ct].dropna() + if len(col) == 0: + continue + + # Rank genes + selected_idx = col.nsmallest(n_select).index if ascending else col.nlargest(n_select).index + + # Map to ENSG and filter + gene_ids = list(_to_ensg(selected_idx, gmap)) + gene_ids = [g for g in gene_ids if g.upper().startswith("ENSG")] + + if len(gene_ids) < min_genes: + logger.warning("Skipping %s: only %d ENSG genes after mapping", ct, len(gene_ids)) + n_skipped += 1 + continue + + set_name = _safe_name(f"{set_name_prefix}_{ct}" if set_name_prefix else ct) + fh.write("\t".join([set_name, "NA"] + gene_ids) + "\n") + n_written += 1 + + logger.info( + "scores_to_gmt: wrote %d gene sets (%s %d%%, %d skipped) → %s", + n_written, selection, int(top_frac * 100), n_skipped, out_file, + ) + return out_file + + +def scores_to_covar( + scores: pd.DataFrame, + out_file: "str | Path", + gene_map: "str | Path | pd.Series | None" = None, + negate: bool = False, +) -> Path: + """ + Convert a genes × cell-types score DataFrame to a MAGMA gene covariate file. + + All genes with at least one non-NaN score are included (no top/bottom + threshold). This is the Python-API equivalent of ``make_magma_covar.py`` + in the combined_pipeline. + + The covariate file is tab-delimited with ``GENE`` as the index name, one + column per cell type, and ``NA`` for missing values: + + .. code-block:: text + + GENE ExcL2-3 ExcL4 InhSST + ENSG00001234 0.52 0.01 0.08 + ENSG00005678 0.11 0.43 0.22 + + Parameters + ---------- + scores + DataFrame with genes as rows and cell types as columns. Index should + contain gene identifiers (ENSG IDs or gene symbols). + out_file + Output ``.covar`` file path. + gene_map + Optional mapping from gene symbols to ENSG IDs — same formats as in + :func:`scores_to_gmt`. Non-ENSG rows are dropped after mapping. + negate + If ``True``, multiply all scores by ``-1`` before writing. Use this + to test enrichment in genes with *low* scores (e.g. negative Vg). + + Returns + ------- + Path + Path to the written covariate file. + + Examples + -------- + >>> from cellink.tl.external import scores_to_covar, run_magma_gpa + >>> covar = scores_to_covar(specificity_df, "brainscope_seismic.covar") + >>> run_magma_gpa(gene_results="scz.genes.raw", gene_covar=str(covar), + ... out_prefix="results/scz_gpa") + + See Also + -------- + scores_to_gmt : Convert scores to GMT for gene-set analysis. + run_magma_gpa : Run MAGMA gene property analysis with the covariate file. + """ + out_file = Path(out_file) + out_file.parent.mkdir(parents=True, exist_ok=True) + + gmap = _resolve_gene_map(gene_map) + + df = scores.copy() + df.index = _to_ensg(df.index, gmap) + + # Keep only ENSG rows + df = df[df.index.str.upper().str.startswith("ENSG")] + + # Deduplicate: keep row with highest mean absolute score + if df.index.duplicated().any(): + df["_mean_abs"] = df.abs().mean(axis=1) + df = df.sort_values("_mean_abs", ascending=False) + df = df[~df.index.duplicated(keep="first")].drop(columns=["_mean_abs"]) + + df = df.dropna(how="all") + + if negate: + df = -df + + df.columns = [_safe_name(c) for c in df.columns] + df.index.name = "GENE" + + df.to_csv(out_file, sep="\t", na_rep="NA") + logger.info("scores_to_covar: wrote %d genes × %d cell types → %s", len(df), df.shape[1], out_file) + return out_file + + +# --------------------------------------------------------------------------- +# MAGMA steps I–III from scratch +# --------------------------------------------------------------------------- + +def run_magma_annotate( + snp_loc: str, + gene_loc: str, + out_prefix: str, + magma_bin: str = "magma", + window_kb: int = 0, + run: bool = True, + **kwargs, +) -> "dict[str, Any]": + """ + Run MAGMA Step I — annotate SNPs to genes. + + Maps each SNP to the gene(s) whose transcribed region (± ``window_kb`` kb) + overlaps its position. Creates a ``.genes.annot`` file consumed by + :func:`run_magma_gene_analysis`. + + Parameters + ---------- + snp_loc + Tab-delimited SNP location file with columns ``SNP``, ``CHR``, ``BP``. + Can be derived from a GWAS summary statistics file. + gene_loc + NCBI/Ensembl gene location file. MAGMA ships ``NCBI37.3.gene.loc`` + and ``NCBI38.gene.loc`` for GRCh37/38. Format: ``ENTREZID CHR START + END STRAND SYMBOL`` (tab-delimited, no header). + out_prefix + Prefix for output files. Creates ``{out_prefix}.genes.annot``. + magma_bin + Path to the MAGMA binary (default: ``"magma"`` assumes it is on PATH). + window_kb + Flanking window in kb added around each gene's transcribed region + (default 0 = gene body only). Use e.g. 35 for ±35 kb. + run + If ``False``, return the command string without executing. + **kwargs + Additional flags passed verbatim to MAGMA as ``--key value``. + + Returns + ------- + dict + ``annot_file`` — path to ``.genes.annot``. + ``files_created`` — list of output paths (if ``run=True``). + ``command`` — command list (if ``run=False``). + + Examples + -------- + >>> run_magma_annotate( + ... snp_loc="gwas_snps.txt", + ... gene_loc="NCBI37.3.gene.loc", + ... out_prefix="results/my_gwas", + ... window_kb=35, + ... ) + + See Also + -------- + run_magma_gene_analysis : Step II — compute gene-level p-values. + """ + cmd = [magma_bin, "--annotate"] + if window_kb: + cmd += ["window=" + str(window_kb)] + cmd += ["--snp-loc", snp_loc, "--gene-loc", gene_loc, "--out", out_prefix] + for k, v in kwargs.items(): + cmd += [f"--{k}", str(v)] + + if not run: + return {"command": cmd} + + os.makedirs(os.path.dirname(os.path.abspath(out_prefix)), exist_ok=True) + _run(cmd) + annot_file = f"{out_prefix}.genes.annot" + return {"annot_file": annot_file, "files_created": [annot_file]} + + +def run_magma_gene_analysis( + bfile: str, + pval_file: str, + gene_annot: str, + out_prefix: str, + n_samples: "int | None" = None, + magma_bin: str = "magma", + run: bool = True, + **kwargs, +) -> "dict[str, Any]": + """ + Run MAGMA Step II — gene-level association analysis. + + Computes gene-level p-values and z-scores from GWAS SNP-level summary + statistics, taking LD structure into account using a reference genotype + panel. The output ``{out_prefix}.genes.raw`` is the input to both + :func:`run_magma_gsa` and :func:`run_magma_gpa`. + + Parameters + ---------- + bfile + PLINK bfile prefix (without extension) for the LD reference panel, + e.g. ``"g1000_eur/g1000_eur"``. + pval_file + GWAS p-value file. Must contain at least ``SNP`` and ``P`` columns + (tab or space delimited). + gene_annot + Path to the ``.genes.annot`` file from :func:`run_magma_annotate`. + out_prefix + Prefix for output files. Creates ``{out_prefix}.genes.raw`` and + ``{out_prefix}.genes.out``. + n_samples + Total GWAS sample size. Required unless ``pval_file`` contains an + ``N`` column. Passed as ``N=`` in the ``--pval`` argument. + magma_bin + Path to the MAGMA binary. + run + If ``False``, return the command without executing. + **kwargs + Additional flags passed verbatim to MAGMA as ``--key value``. + + Returns + ------- + dict + ``gene_results`` — path to ``.genes.raw``. + ``files_created`` — list of output paths (if ``run=True``). + ``command`` — command list (if ``run=False``). + + Examples + -------- + >>> run_magma_gene_analysis( + ... bfile="g1000_eur/g1000_eur", + ... pval_file="scz_gwas.txt", + ... gene_annot="results/my_gwas.genes.annot", + ... out_prefix="results/scz", + ... n_samples=67390, + ... ) + + See Also + -------- + run_magma_annotate : Step I — SNP-to-gene annotation. + run_magma_gsa : Step III — gene-set analysis. + run_magma_gpa : Step III — gene property analysis. + """ + pval_arg = pval_file + if n_samples is not None: + pval_arg = f"{pval_file} N={n_samples}" + + cmd = [ + magma_bin, + "--bfile", bfile, + "--pval", pval_arg, + "--gene-annot", gene_annot, + "--out", out_prefix, + ] + for k, v in kwargs.items(): + cmd += [f"--{k}", str(v)] + + if not run: + return {"command": cmd} + + os.makedirs(os.path.dirname(os.path.abspath(out_prefix)), exist_ok=True) + _run(cmd) + gene_results = f"{out_prefix}.genes.raw" + return { + "gene_results": gene_results, + "files_created": [gene_results, f"{out_prefix}.genes.out", f"{out_prefix}.log"], + } + + +def run_magma_gsa( + gene_results: str, + set_annot: str, + out_prefix: str, + magma_bin: str = "magma", + run: bool = True, + **kwargs, +) -> "dict[str, Any]": + """ + Run MAGMA Step III — gene-set analysis (GSA). + + Tests whether genes in each set have higher GWAS association signals than + background genes, using the gene-level results from + :func:`run_magma_gene_analysis`. Input gene sets are supplied as a GMT + file (see :func:`scores_to_gmt` or :func:`genesets_dir_to_entrez_gmt`). + + **LDSC → MAGMA GSA workflow** + + Two paths lead here from LDSC outputs: + + 1. From continuous per-gene scores (e.g. specificity, SEISMIC): + + >>> gmt = scores_to_gmt(specificity_df, "top10.gmt") + >>> run_magma_gsa(gene_results="scz.genes.raw", set_annot=str(gmt), + ... out_prefix="results/scz_gsa") + + 2. From LDSC binary ``.GeneSet`` files: + + >>> gmt = genesets_dir_to_entrez_gmt(geneset_dir="ldsc_genesets", + ... out_gmt="magma_genesets/genesets.gmt") + >>> run_magma_gsa(gene_results="scz.genes.raw", set_annot=str(gmt), + ... out_prefix="results/scz_gsa") + + Parameters + ---------- + gene_results + Path to ``.genes.raw`` from :func:`run_magma_gene_analysis`. + set_annot + Path to GMT-format gene-set file. Gene identifiers must be ENSG or + Entrez IDs consistent with those in ``gene_results``. + out_prefix + Prefix for output files. Creates ``{out_prefix}.gsa.out``. + magma_bin + Path to the MAGMA binary. + run + If ``False``, return the command without executing. + **kwargs + Additional flags passed verbatim to MAGMA (e.g. ``model="multi"``). + + Returns + ------- + dict + ``results_file`` — path to ``.gsa.out``. + ``files_created`` — list (if ``run=True``). + ``command`` — command list (if ``run=False``). + + Examples + -------- + >>> run_magma_gsa( + ... gene_results="results/scz.genes.raw", + ... set_annot="genesets/top10_seismic.gmt", + ... out_prefix="results/scz_gsa_seismic", + ... ) + + See Also + -------- + scores_to_gmt : Build GMT from a scores DataFrame. + genesets_dir_to_entrez_gmt : Build GMT from LDSC .GeneSet files. + run_magma_gpa : Continuous gene property analysis (alternative to GSA). + """ + cmd = [magma_bin, "--gene-results", gene_results, "--set-annot", set_annot, "--out", out_prefix] + for k, v in kwargs.items(): + cmd += [f"--{k}", str(v)] + + if not run: + return {"command": cmd} + + os.makedirs(os.path.dirname(os.path.abspath(out_prefix)), exist_ok=True) + _run(cmd) + results_file = f"{out_prefix}.gsa.out" + return {"results_file": results_file, "files_created": [results_file, f"{out_prefix}.log"]} + + +def run_magma_gpa( + gene_results: str, + gene_covar: str, + out_prefix: str, + magma_bin: str = "magma", + univariate: bool = False, + run: bool = True, + **kwargs, +) -> "dict[str, Any]": + """ + Run MAGMA Step III — gene property analysis (GPA). + + Tests the linear association between continuous per-gene scores and GWAS + gene-level z-scores. Unlike GSA (which uses a top-N threshold), GPA uses + all genes with scores as quantitative covariates. + + When ``univariate=False`` (default), all cell types are tested jointly in a + single MAGMA call (``--gene-covar``). This is efficient but MAGMA may drop + highly collinear covariates. Set ``univariate=True`` to test each cell + type independently — this is slower but always produces a result for every + cell type (matches ``run_magma_gpa_univariate.py`` in the pipeline). + + **LDSC → MAGMA GPA workflow** + + >>> covar = scores_to_covar(specificity_df, "brainscope_seismic.covar") + >>> run_magma_gpa(gene_results="scz.genes.raw", gene_covar=str(covar), + ... out_prefix="results/scz_gpa_seismic") + + Parameters + ---------- + gene_results + Path to ``.genes.raw`` from :func:`run_magma_gene_analysis`. + gene_covar + Path to ``.covar`` file from :func:`scores_to_covar`. + out_prefix + Prefix for output files. Creates ``{out_prefix}.gsa.out``. + magma_bin + Path to the MAGMA binary. + univariate + If ``True``, run each cell type (covariate column) as a separate MAGMA + call and combine results. Recommended when covariates are highly + correlated (e.g. residual CV or negated scores), which causes MAGMA's + collinearity filter to silently drop columns in joint mode. + run + If ``False``, return the command without executing (only valid when + ``univariate=False``). + **kwargs + Additional flags forwarded to MAGMA in joint mode. + + Returns + ------- + dict + ``results_file`` — path to ``.gsa.out``. + ``files_created`` — list (if ``run=True``). + ``command`` — command list (if ``run=False``, joint mode only). + + Examples + -------- + Joint analysis (default): + + >>> run_magma_gpa( + ... gene_results="results/scz.genes.raw", + ... gene_covar="covars/brainscope_seismic.covar", + ... out_prefix="results/scz_gpa_seismic", + ... ) + + Univariate analysis (safe for correlated scores): + + >>> run_magma_gpa( + ... gene_results="results/scz.genes.raw", + ... gene_covar="covars/brainscope_residual_cv.covar", + ... out_prefix="results/scz_gpa_residual_cv", + ... univariate=True, + ... ) + + See Also + -------- + scores_to_covar : Build covariate file from a scores DataFrame. + run_magma_gsa : Gene-set analysis (binary threshold, alternative to GPA). + """ + out_prefix = str(out_prefix) + os.makedirs(os.path.dirname(os.path.abspath(out_prefix)), exist_ok=True) + + if not univariate: + cmd = [magma_bin, "--gene-results", gene_results, "--gene-covar", gene_covar, "--out", out_prefix] + for k, v in kwargs.items(): + cmd += [f"--{k}", str(v)] + + if not run: + return {"command": cmd} + + _run(cmd) + results_file = f"{out_prefix}.gsa.out" + return {"results_file": results_file, "files_created": [results_file, f"{out_prefix}.log"]} + + # -- Univariate mode: one MAGMA call per cell-type covariate column ------- + covar = pd.read_csv(gene_covar, sep="\t", index_col=0) + cell_types = covar.columns.tolist() + logger.info("GPA univariate: %d cell types", len(cell_types)) + + rows: list[dict] = [] + with tempfile.TemporaryDirectory() as tmpdir: + for ct in cell_types: + ct_covar = os.path.join(tmpdir, "ct.covar") + ct_out = os.path.join(tmpdir, "ct_out") + covar[[ct]].to_csv(ct_covar, sep="\t", na_rep="NA") + + cmd = [magma_bin, "--gene-results", gene_results, "--gene-covar", ct_covar, "--out", ct_out] + result = _run(cmd, check=False) + if result.returncode != 0: + logger.warning("MAGMA failed for %s: %s", ct, result.stderr.strip()) + continue + + gsa_file = ct_out + ".gsa.out" + if not os.path.exists(gsa_file): + logger.warning("No output for %s", ct) + continue + + header = None + with open(gsa_file) as f: + for line in f: + if line.startswith("#"): + continue + parts = line.split() + if header is None: + header = parts + continue + row = dict(zip(header, parts)) + row["FULL_NAME"] = ct + rows.append(row) + break + + logger.info(" %s: done", ct) + + results_file = f"{out_prefix}.gsa.out" + with open(results_file, "w") as f: + f.write("# UNIVARIATE GPA (each cell type tested independently)\n") + f.write( + f"# {'VARIABLE':<36} {'TYPE':<6} {'NGENES':>6} " + f"{'BETA':>12} {'BETA_STD':>12} {'SE':>12} {'P':>12} FULL_NAME\n" + ) + for row in rows: + name = row.get("FULL_NAME", "") + f.write( + f"{name[:36]:<36} " + f"{row.get('TYPE', 'COVAR'):<6} " + f"{row.get('NGENES', 'NA'):>6} " + f"{row.get('BETA', 'NA'):>12} " + f"{row.get('BETA_STD', 'NA'):>12} " + f"{row.get('SE', 'NA'):>12} " + f"{row.get('P', 'NA'):>12} " + f"{name}\n" + ) + + logger.info("GPA univariate: wrote %d cell types → %s", len(rows), results_file) + return {"results_file": results_file, "files_created": [results_file]} + + +if __name__ == "__main__": + import argparse + + logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s") + + p = argparse.ArgumentParser(description="Convert .GeneSet files to MAGMA .gmt without ID conversion") + p.add_argument( + "--geneset_dir", + default="ldsc_genesets", + help="Directory containing *.GeneSet (default: ldsc_genesets)", + ) + p.add_argument( + "--out_gmt", + default=None, + help="Optional output .gmt path. If omitted, writes to sibling magma_genesets/genesets.gmt", + ) + p.add_argument( + "--map_tsv", + default=None, + help="Deprecated and ignored (no Ensembl→Entrez conversion is performed).", + ) + p.add_argument("--include_control", action="store_true") + p.add_argument( + "--allow_mygene_fallback", + action="store_true", + help="Deprecated and ignored (no Ensembl→Entrez conversion is performed).", + ) + p.add_argument( + "--pattern", + default="*.GeneSet", + help="Glob pattern for gene set files (default: *.GeneSet)", + ) + p.add_argument( + "--output_basename", + default="genesets.gmt", + help="Output filename when using default magma_genesets directory (default: genesets.gmt)", + ) + args = p.parse_args() + + genesets_dir_to_entrez_gmt( + geneset_dir=args.geneset_dir, + out_gmt=args.out_gmt, + ensembl_to_entrez_tsv=args.map_tsv, + include_control=args.include_control, + allow_mygene_fallback=args.allow_mygene_fallback, + pattern=args.pattern, + output_basename=args.output_basename, + ) diff --git a/src/cellink/tl/external/_sldsc_utils.py b/src/cellink/tl/external/_sldsc_utils.py new file mode 100644 index 0000000..3416029 --- /dev/null +++ b/src/cellink/tl/external/_sldsc_utils.py @@ -0,0 +1,707 @@ +import logging +import re +from pathlib import Path +from typing import Literal + +import numpy as np +import pandas as pd +import scanpy as sc +from anndata import AnnData +from scipy import sparse +import os +import h5py +import numexpr as ne +from tqdm import tqdm + + +logger = logging.getLogger(__name__) + + +def preprocess_for_sldsc( + adata: AnnData, + *, + celltype_col: str, + log_transform: bool = True, + filter_protein_coding: bool = True, + filter_expressed: bool = True, + filter_mhc: bool = True, + mhc_chr: str = None, + mhc_start: int = None, + mhc_end: int = None, + fetch_annotation: bool = True, + genome_build: Literal["GRCh37", "GRCh38"] = "GRCh37", + gene_identifier_mode: str = "name", + remove_version_suffix: bool = True, + gene_col: str | None = "gene", + biotype_col: str | None = None, + chr_col: str | None = None, + start_col: str | None = None, + end_col: str | None = None, + inplace: bool = True, +) -> tuple[AnnData, pd.DataFrame, pd.DataFrame] | None: + """ + Preprocess single-cell data for S-LDSC cell-type-specific analysis. + + This function performs comprehensive preprocessing including: + - Optional log1p transformation + - Gene annotation fetching from Ensembl BioMart (GRCh37 or GRCh38) + - Gene filtering (protein-coding, expressed, unique names, MHC exclusion) + - Computation of mean expression per cell type + - Computation of specificity scores (Duncan et al. 2025; doi:10.1038/s41593-024-01834-w) + + Parameters + ---------- + adata + Annotated data matrix of shape `n_obs` x `n_vars` (cells x genes). + celltype_col + Column name in `adata.obs` containing cell type labels. + log_transform + Whether to apply log1p transformation. Set to False if already log-transformed. + filter_protein_coding + Whether to filter for protein-coding genes only. + filter_expressed + Whether to filter out genes with zero expression across all cells. + filter_mhc + Whether to exclude genes in the MHC region (chr6:25-34Mb by default). + mhc_chr + Chromosome containing MHC region (default: "6"). + mhc_start + Start position of MHC region in base pairs. + mhc_end + End position of MHC region in base pairs. + fetch_annotation + Whether to fetch gene annotations from Ensembl BioMart. + If False, expects existing annotation columns in adata.var. + genome_build + Genome build version: "GRCh37" or "GRCh38". Only used if fetch_annotation=True. + gene_identifier_mode + Gene identifier: "name" or "ensembl". Only used if fetch_annotation=True. + remove_version_suffix + Whether to remove version suffixes from gene names or gene IDs (e.g., ENSG00000123456.7 → ENSG00000123456). + gene_col + Column name for gene symbols or IDs. If None, uses var_names if fetch_annotation is True is auto-detected. + biotype_col + Column name for gene biotype. Auto-detected if None. + chr_col + Column name for chromosome. Auto-detected if None. + start_col + Column name for gene start position. Auto-detected if None. + end_col + Column name for gene end position. Auto-detected if None. + inplace + Whether to update `adata` in place or return a copy. + + Returns + ------- + AnnData, pd.DataFrame, pd.DataFrame + - Filtered AnnData object + - Cluster-normalized-to-1000 matrix and specificity derived from that (genes x cell types) + - Specificity scores per cell type (genes x cell types) + Returns None if inplace=True. + + Raises + ------ + AssertionError + If celltype_col is not present in adata.obs. + ValueError + If required annotation columns are missing and fetch_annotation=False. + ImportError + If pybiomart is not installed and fetch_annotation=True. + + Examples + -------- + >>> # Using GRCh37 (default) + >>> adata_filtered, mean_expr, specificity = preprocess_for_sldsc(adata, celltype_col="cell_type", inplace=False) + >>> # Using GRCh38 + >>> adata_filtered, mean_expr, specificity = preprocess_for_sldsc( + ... adata, celltype_col="cell_type", genome_build="GRCh38", inplace=False + ... ) + """ + if celltype_col not in adata.obs.columns: + raise ValueError(f"Column '{celltype_col}' not found in adata.obs") + + + + if fetch_annotation: + anno_df = _fetch_ensembl_annotation(genome_build=genome_build, gene_identifier_mode=gene_identifier_mode) + if gene_col is None: + adata.var["gene"] = adata.var_names + adata.var["gene_upper"] = adata.var[gene_col].str.upper() + if remove_version_suffix: + logger.info("Removing version suffixes from Gene IDs") + adata.var["gene_upper"] = adata.var["gene_upper"].str.replace(r"\..*$", "", regex=True) + + adata = _map_gene_annotation(adata, anno_df, gene_col) + + biotype_col = "gene_biotype" + chr_col = "chrom" + start_col = "start" + end_col = "end" + else: + gene_col = _pick_var_col(adata, ["gene_symbol", "gene_name", "symbol", "hgnc_symbol", "gene"], gene_col) + biotype_col = _pick_var_col(adata, ["gene_biotype", "biotype", "feature_biotype", "gene_type"], biotype_col) + chr_col = _pick_var_col(adata, ["chrom", "chr", "chromosome", "seqname"], chr_col) + start_col = _pick_var_col(adata, ["start", "start_position", "gene_start"], start_col) + end_col = _pick_var_col(adata, ["end", "end_position", "gene_end"], end_col) + adata.var_names = adata.var[gene_col].astype(str) + adata.var_names_make_unique() + + logger.info( + f"Using annotation columns: gene={gene_col}, biotype={biotype_col}, chr={chr_col}, start={start_col}, end={end_col}" + ) + + logger.info("Applying gene filters") + masks = {} + + if filter_protein_coding and biotype_col: + biotype = adata.var[biotype_col].astype(str).str.lower() + masks["protein_coding"] = biotype.isin(["protein_coding", "protein-coding", "protein coding"]) + logger.info(f"Protein-coding genes: {masks['protein_coding'].sum()}") + else: + masks["protein_coding"] = pd.Series(True, index=adata.var_names) + if filter_protein_coding: + logger.warning("No biotype column found; skipping protein-coding filter") + + if filter_expressed: + X = adata.X + if sparse.issparse(X): + gene_sum = np.asarray(X.sum(axis=0)).ravel() + else: + gene_sum = X.sum(axis=0).ravel() + masks["expressed"] = pd.Series(gene_sum > 0, index=adata.var_names) + logger.info(f"Expressed genes: {masks['expressed'].sum()}") + else: + masks["expressed"] = pd.Series(True, index=adata.var_names) + + masks["unique"] = pd.Series(True, index=adata.var_names) + + if filter_mhc and all(c for c in [chr_col, start_col, end_col]): + in_mhc_chr = adata.var[chr_col] == str(mhc_chr) + overlaps_mhc = in_mhc_chr & (adata.var[end_col] >= mhc_start) & (adata.var[start_col] <= mhc_end) + masks["not_mhc"] = ~overlaps_mhc.fillna(False) + logger.info(f"Non-MHC genes: {masks['not_mhc'].sum()}") + else: + masks["not_mhc"] = pd.Series(True, index=adata.var_names) + if filter_mhc: + logger.warning("Missing chr/start/end columns; skipping MHC filter") + + mask_keep = pd.Series(True, index=adata.var_names) + for mask_name, mask in masks.items(): + mask_keep &= mask + + n_before = adata.n_vars + n_after = mask_keep.sum() + logger.info(f"Keeping {n_after} / {n_before} genes after filtering") + + adata = adata[:, mask_keep.values].copy() + + + if log_transform: + # Work with categorical clusters + clusters_cat = adata.obs[celltype_col].astype("category") + cluster_names = clusters_cat.cat.categories.to_list() + n_clusters = len(cluster_names) + n_cells, n_genes = adata.shape + logger.info(f"n_cells = {n_cells}, n_genes = {n_genes}, n_clusters = {n_clusters}") + + # matrix: genes × clusters + avg_matrix = np.zeros((n_genes, n_clusters), dtype=np.float64) + X = adata.X # could be csr_matrix or dense + + + # Compute per-cluster log1p mean + logger.info("Applying log1p transformation") + for j, cl in enumerate(tqdm(cluster_names, desc="Aggregating clusters")): + # indices of cells in this cluster + idx = np.where(clusters_cat.values == cl)[0] + if idx.size == 0: + # no cells in this cluster (shouldn't usually happen, but just in case) + avg_matrix[:, j] = 0.0 + continue + + # subset expression for these cells: shape (n_cells_in_cluster, n_genes) + X_sub = X[idx, :] + + # Convert to dense if sparse + if hasattr(X_sub, "toarray"): + X_sub = X_sub.toarray() + + # log1p transform and average over cells (axis 0, since rows=cells, cols=genes) + # Using numexpr to speed up log1p and sum + # log1p(X_sub) is applied per element; sum over cells => axis=0 => length n_genes + # careful: numexpr works on 1D or 2D arrays; we keep it 2D here + log1p_X_sub = ne.evaluate("log1p(X_sub)") + avg_expr = log1p_X_sub.mean(axis=0) # 1D, length n_genes + + # Store as genes × clusters → [gene, cluster_index] + avg_matrix[:, j] = avg_expr + + df = pd.DataFrame( + avg_matrix, + index=adata.var_names, # genes as rows + columns=cluster_names # clusters as columns + ) + + logger.info("Log1p applied.") + + + if not log_transform: + raise ValueError("This preprocessing path expects log_transform=True (needs cluster-level log1p matrix).") + + # Wide table from the matrix you computed + exp_wide = df.copy().reset_index() + exp_wide = exp_wide.rename(columns={"index": "gene"}) + + # If reset_index() produced column named "index" instead: + if "gene" not in exp_wide.columns and "index" in exp_wide.columns: + exp_wide = exp_wide.rename(columns={"index": "gene"}) + + clusters = [c for c in exp_wide.columns if c != "gene"] + + # copy wide table + exp = exp_wide.copy() + + # add_count(gene) + exp["n"] = exp.groupby("gene")["gene"].transform("count") + + # keep only genes with n == 1 (THIS is stricter than your old "unique" mask) + exp = exp.loc[exp["n"] == 1].drop(columns=["n"]) + + # gather/melt to long + exp = exp.melt( + id_vars="gene", + var_name="ClusterID", # cluster name + value_name="Expr_sum_mean" # your log1p mean expression + ) + + logger.info(f"Computing mean expression for {celltype_col}") + # normalize within each cluster to sum to 1000 + exp["Expr_sum_mean"] = ( + exp["Expr_sum_mean"] * 1000.0 / + exp.groupby("ClusterID")["Expr_sum_mean"].transform("sum") + ) + mean_expr_df = exp.pivot(index="gene", columns="ClusterID", values="Expr_sum_mean") + + + logger.info("Computing specificity scores") + + # specificity: fraction of gene's total that comes from this cluster + exp["specificity"] = ( + exp["Expr_sum_mean"] / + exp.groupby("gene")["Expr_sum_mean"].transform("sum") + ) + specificity_df = exp.pivot(index="gene", columns="ClusterID", values="specificity") + + + if not ((specificity_df.values >= 0) & (specificity_df.values <= 1)).all(): + logger.warning("Some specificity values outside [0, 1] range") + + logger.info(f"Final data shape: {adata.shape}") + logger.info(f"Mean expression shape: {mean_expr_df.shape}") + logger.info(f"Specificity shape: {specificity_df.shape}") + + if inplace: + return None + return adata, mean_expr_df, specificity_df + + +def generate_sldsc_genesets( + specificity_df: pd.DataFrame, + adata: AnnData, + *, + out_dir: str | Path, + top_frac: float = 0.10, + gene_col: str | None = "gene", # e.g. "gene" (symbols) OR "ensembl_gene_id" + accession_col: str | None = None, # if you have an explicit Ensembl ID column, pass it (recommended) + remove_version_suffix: bool = True, + include_control: bool = True, + overwrite: bool = False, +) -> pd.DataFrame: + """ + Generate cell-type-specific gene sets for S-LDSC analysis. + + Expects specificity_df to be genes × cell types, indexed by gene identifiers + (symbols or Ensembl IDs). Writes one .GeneSet per cell type containing top N% + genes by specificity, using accession (typically Ensembl gene IDs). + """ + out_dir = Path(out_dir) + + # ---- Safety checks ---- + if specificity_df.index.name != "gene": + # not required, but helps debugging + logger.info(f"specificity_df index name is '{specificity_df.index.name}', expected 'gene' (ok).") + + if out_dir.exists() and not overwrite: + raise FileExistsError(f"Output directory {out_dir} already exists. Set overwrite=True to proceed.") + out_dir.mkdir(parents=True, exist_ok=True) + logger.info(f"Writing gene sets to {out_dir}") + + # ---- Build mapping from specificity_df genes -> accessions to write ---- + spec_genes = pd.Index(specificity_df.index.astype(str)) + + # Normalize the specificity_df index + spec_upper = spec_genes.str.upper() + if remove_version_suffix: + spec_upper = spec_upper.str.replace(r"\..*$", "", regex=True) + + # Case 1: specificity_df already contains Ensembl IDs and you want to write them as-is. + # We detect this if most genes look like ENSG... + ensembl_like = spec_upper.str.match(r"^ENSG\d+$", na=False).mean() > 0.5 + + # If user provided accession_col, use it as the authoritative output IDs + if accession_col is not None: + if accession_col not in adata.var.columns: + raise ValueError(f"Column '{accession_col}' not found in adata.var") + + acc = adata.var[accession_col].astype(str).str.upper() + if remove_version_suffix: + acc = acc.str.replace(r"\..*$", "", regex=True) + + # Decide what to match on (gene_col or var_names) + if gene_col is not None and gene_col in adata.var.columns: + key = adata.var[gene_col].astype(str).str.upper() + if remove_version_suffix: + key = key.str.replace(r"\..*$", "", regex=True) + else: + key = pd.Index(adata.var_names.astype(str)).str.upper() + if remove_version_suffix: + key = key.str.replace(r"\..*$", "", regex=True) + + map_df = pd.DataFrame({"key": key.values, "accession": acc.values}).dropna() + map_df = map_df.drop_duplicates(subset=["key"], keep="first") + + # Map specificity genes -> accession + gene_to_acc = pd.Series(map_df["accession"].values, index=map_df["key"].values) + accessions = gene_to_acc.reindex(spec_upper) + + overlap_mask = accessions.notna() + if overlap_mask.sum() == 0: + raise ValueError("No overlapping genes between specificity_df and adata.var mapping (accession_col).") + + logger.info(f"Overlapping genes after mapping: {overlap_mask.sum()}/{len(specificity_df)}") + specificity_df = specificity_df.loc[overlap_mask.values] + accessions = accessions.loc[overlap_mask.values] + + # Replace index with accessions (what LDSC wants) + specificity_df = specificity_df.copy() + specificity_df.index = accessions.values + + else: + # No accession_col supplied. + # If specificity_df already looks like Ensembl IDs, just use it. + if ensembl_like: + logger.info("specificity_df index looks like Ensembl IDs; using them directly.") + specificity_df = specificity_df.copy() + specificity_df.index = spec_upper.values + else: + # Fall back to matching against adata.var[gene_col] and writing those IDs. + if gene_col is None or gene_col not in adata.var.columns: + raise ValueError( + "specificity_df index does not look like Ensembl IDs, and no valid gene_col/accession_col provided." + ) + + adata_key = adata.var[gene_col].astype(str).str.upper() + if remove_version_suffix: + adata_key = adata_key.str.replace(r"\..*$", "", regex=True) + + overlap = spec_upper.intersection(pd.Index(adata_key)) + if overlap.empty: + raise ValueError("No overlapping genes found between specificity_df index and adata.var[gene_col].") + + logger.info(f"Overlapping genes: {len(overlap)}/{specificity_df.shape[0]}") + specificity_df = specificity_df.loc[spec_upper.isin(overlap).values].copy() + specificity_df.index = spec_upper[spec_upper.isin(overlap)].values # normalized + + # ---- Select top genes per cell type ---- + n_genes = specificity_df.shape[0] + k = max(1, int(np.ceil(top_frac * n_genes))) + logger.info(f"Selecting top {k} genes ({top_frac*100:.1f}%) per cell type") + + summary = [] + for celltype in specificity_df.columns: + top_genes = specificity_df[celltype].nlargest(k).index.astype(str).unique() + + safe_celltype = _safe_filename(celltype) + out_path = out_dir / f"{safe_celltype}.GeneSet" + + with open(out_path, "w") as f: + for gene_id in top_genes: + f.write(f"{gene_id}\n") + + summary.append({"cell_type": celltype, "n_genes": len(top_genes), "output_path": str(out_path)}) + logger.debug(f"Wrote {len(top_genes)} genes for {celltype}") + + # ---- Control geneset ---- + if include_control: + control_path = out_dir / "Control.GeneSet" + with open(control_path, "w") as f: + for gene_id in specificity_df.index.astype(str): + f.write(f"{gene_id}\n") + logger.info(f"Wrote control gene set with {len(specificity_df)} genes") + + summary_df = pd.DataFrame(summary) + logger.info(f"Generated {len(summary)} cell-type-specific gene sets") + return summary_df + + + +def _fetch_ensembl_annotation( + genome_build: Literal["GRCh37", "GRCh38"] = "GRCh37", gene_identifier_mode: str = "ensembl" +) -> pd.DataFrame: + """ + Fetch gene annotations from Ensembl using pybiomart. + + Parameters + ---------- + genome_build + Genome build version: "GRCh37" or "GRCh38". + gene_identifier_mode + Gene identifier: "name" (gene symbols) or "ensembl" (Ensembl IDs). + + Returns + ------- + pd.DataFrame + Gene annotations with columns: gene, chrom, start, end, gene_biotype. + """ + try: + from pybiomart import Server + except ImportError as e: + raise ImportError( + "pybiomart is required for fetching gene annotations. " "Install it with: pip install pybiomart" + ) from e + + if genome_build == "GRCh37": + logger.info("Querying Ensembl BioMart (GRCh37)...") + server = Server(host="http://grch37.ensembl.org") + elif genome_build == "GRCh38": + logger.info("Querying Ensembl BioMart (GRCh38)...") + server = Server(host="http://www.ensembl.org") + else: + raise ValueError(f"Invalid genome_build: {genome_build}. Must be 'GRCh37' or 'GRCh38'") + + dataset = server.marts["ENSEMBL_MART_ENSEMBL"].datasets["hsapiens_gene_ensembl"] + + attributes = [ + "hgnc_symbol", + "external_gene_name", + "ensembl_gene_id", + "chromosome_name", + "start_position", + "end_position", + "gene_biotype", + ] + + logger.info(f"Fetching gene annotations from {genome_build}...") + anno = dataset.query(attributes=attributes, use_attr_names=True) + + anno.columns = [c.strip() for c in anno.columns] + + anno = anno.rename(columns={ + # Pretty BioMart labels + "HGNC symbol": "hgnc_symbol", + "Gene name": "external_gene_name", + "Gene stable ID": "ensembl_gene_id", + "Chromosome/scaffold name": "chrom", + "Gene start (bp)": "start", + "Gene end (bp)": "end", + "Gene type": "gene_biotype", + + # Attribute-name style (very common with GRCh38) + "hgnc_symbol": "hgnc_symbol", + "external_gene_name": "external_gene_name", + "ensembl_gene_id": "ensembl_gene_id", + "chromosome_name": "chrom", + "start_position": "start", + "end_position": "end", + "gene_biotype": "gene_biotype", + }) + + if gene_identifier_mode == "name": + anno["gene"] = anno["hgnc_symbol"].replace("", pd.NA) + anno["gene"] = anno["gene"].fillna(anno["external_gene_name"]) + elif gene_identifier_mode == "ensembl": + anno["gene"] = anno["ensembl_gene_id"].replace("", pd.NA) + else: + raise ValueError(f"Invalid mode: {gene_identifier_mode}. Must be 'name' or 'ensembl'.") + + anno = anno[["gene", "chrom", "start", "end", "gene_biotype"]].dropna(subset=["gene"]) + + logger.info(f"Fetched annotations for {len(anno)} genes from {genome_build}") + return anno + + +def _map_gene_annotation(adata: AnnData, anno_df: pd.DataFrame, gene_col: str | None = "gene") -> AnnData: + """Map gene annotations to adata.var.""" + anno_cols = ["chrom", "start", "end", "gene_biotype"] + + conflicts = [c for c in anno_cols if c in adata.var.columns] + if conflicts: + logger.info(f"Dropping conflicting columns from adata.var before merge: {conflicts}") + adata.var = adata.var.drop(columns=conflicts) + + anno_df["gene_upper"] = anno_df["gene"].astype(str).str.upper() + + anno_df = anno_df.drop_duplicates(subset=["gene_upper"]) + + merged = adata.var.merge(anno_df[["gene_upper"] + anno_cols], on="gene_upper", how="left") + + adata.var = merged.set_index(adata.var.index) + + logger.info(f"Annotated {(~merged['chrom'].isna()).sum()} / {adata.n_vars} genes.") + + return adata + + +def _pick_var_col(adata: AnnData, candidates: list[str], default: str | None) -> str | None: + """Select first existing column from candidates.""" + if default and default in adata.var.columns: + return default + for col in candidates: + if col in adata.var.columns: + return col + return default + + +def _normalize_chromosome(chr_series: pd.Series) -> pd.Series: + """Normalize chromosome labels to standard format.""" + normalized = chr_series.astype(str).str.replace("^chr", "", regex=True).str.upper() + return normalized.str.extract(r"^([0-9XYM]+)", expand=False) + + +def _compute_celltype_means(adata: AnnData, celltype_col: str) -> pd.DataFrame: + """Compute mean expression per cell type.""" + celltypes = pd.Index(adata.obs[celltype_col].astype("category")).categories + + means_list = [] + col_names = [] + + for celltype in celltypes: + mask = (adata.obs[celltype_col] == celltype).values + n_cells = mask.sum() + col_names.append(str(celltype)) + + if n_cells == 0: + means_list.append(np.full(adata.n_vars, np.nan, dtype=float)) + logger.warning(f"Cell type '{celltype}' has 0 cells") + continue + + X_sub = adata.X[mask, :] + if sparse.issparse(X_sub): + means = np.asarray(X_sub.mean(axis=0)).ravel() + else: + means = X_sub.mean(axis=0).ravel() + + means_list.append(means) + + mean_expr_df = pd.DataFrame( + np.column_stack(means_list) if means_list else np.empty((adata.n_vars, 0)), + index=adata.var_names, + columns=col_names, + ) + + return mean_expr_df + + +def _compute_specificity(mean_expr_df: pd.DataFrame) -> pd.DataFrame: + """ + Compute specificity scores using Duncan et al. 2019 method. + + Specificity(gene, celltype) = mean(gene, celltype) / sum(mean(gene, all_celltypes)) + """ + gene_sums = mean_expr_df.sum(axis=1) + denom = gene_sums.replace(0, np.nan) + specificity = mean_expr_df.div(denom, axis=0).fillna(0.0) + + return specificity + + +def _safe_filename(s: str) -> str: + """Convert string to safe filename.""" + s = str(s).strip().replace(" ", "_") + s = s.replace("(", "_").replace(")", "_") + return re.sub(r"[^\w\.\+\-]+", "_", s) + + +def generate_gene_coord_file( + out_path: str | Path, + *, + genome_build: Literal["GRCh37", "GRCh38"] = "GRCh37", + gene_identifier_mode: str = "ensembl", + remove_version_suffix: bool = True, + add_chr_prefix: bool = True, + overwrite: bool = False, +) -> pd.DataFrame: + """ + Generate a gene coordinate file for S-LDSC analysis from Ensembl BioMart. + + Fetches all genes from Ensembl and creates a tab-delimited file with columns: + GENE, CHR, START, END + + Parameters + ---------- + out_path + Output file path (e.g., "gene_coords.txt"). + genome_build + Genome build version: "GRCh37" or "GRCh38". + gene_identifier_mode + Gene identifier: "name" (gene symbols) or "ensembl" (Ensembl IDs). + remove_version_suffix + Whether to remove version suffixes from gene IDs (e.g., ENSG00000123456.7 → ENSG00000123456). + add_chr_prefix + Whether to add "chr" prefix to chromosome names (e.g., "1" → "chr1"). + overwrite + Whether to overwrite existing output file. + + Raises + ------ + FileExistsError + If out_path exists and overwrite=False. + ImportError + If pybiomart is not installed. + + Examples + -------- + >>> # Fetch all genes with Ensembl IDs from GRCh37 + >>> coord_df = generate_gene_coord_file("gene_coords.txt", gene_identifier_mode="ensembl", genome_build="GRCh37") + >>> # Fetch with gene symbols from GRCh38 + >>> coord_df = generate_gene_coord_file( + ... "gene_coords_grch38.txt", gene_identifier_mode="name", genome_build="GRCh38" + ... ) + """ + out_path = Path(out_path) + + if out_path.exists() and not overwrite: + raise FileExistsError(f"Output file {out_path} already exists. Set overwrite=True to proceed.") + + logger.info(f"Fetching gene annotations from Ensembl {genome_build}...") + anno_df = _fetch_ensembl_annotation(genome_build=genome_build, gene_identifier_mode=gene_identifier_mode) + + coord_df = anno_df[["gene", "chrom", "start", "end"]].copy() + coord_df.columns = ["GENE", "CHR", "START", "END"] + + if remove_version_suffix: + logger.info("Removing version suffixes from gene identifiers") + coord_df["GENE"] = coord_df["GENE"].astype(str).str.replace(r"\..*$", "", regex=True) + + coord_df["CHR"] = coord_df["CHR"].astype(str) + if add_chr_prefix: + coord_df["CHR"] = coord_df["CHR"].apply(lambda x: x if x.startswith("chr") else f"chr{x}") + else: + coord_df["CHR"] = coord_df["CHR"].str.replace("^chr", "", regex=True) + + coord_df["START"] = coord_df["START"].astype(int) + coord_df["END"] = coord_df["END"].astype(int) + + n_before = len(coord_df) + coord_df = coord_df.drop_duplicates(subset=["GENE"], keep="first") + n_after = len(coord_df) + + if n_before != n_after: + logger.warning(f"Removed {n_before - n_after} duplicate gene entries") + + coord_df = coord_df.sort_values(["CHR", "START"]) + + logger.info(f"Writing {len(coord_df)} gene coordinates to {out_path}") + coord_df.to_csv(out_path, sep="\t", index=False) + + logger.info(f"Successfully created gene coordinate file: {out_path}") diff --git a/src/cellink/tl/external/config/ldsc.yaml b/src/cellink/tl/external/config/ldsc.yaml new file mode 100644 index 0000000..da6083a --- /dev/null +++ b/src/cellink/tl/external/config/ldsc.yaml @@ -0,0 +1,4 @@ +execution_mode: local +ldsc_command: ldsc.py +make_annot_command: make_annot.py +munge_command: munge_sumstats.py diff --git a/src/cellink/tl/external/config/ldsc_docker.yaml b/src/cellink/tl/external/config/ldsc_docker.yaml new file mode 100644 index 0000000..496b113 --- /dev/null +++ b/src/cellink/tl/external/config/ldsc_docker.yaml @@ -0,0 +1,5 @@ +execution_mode: docker +docker_image: zijingliu/ldsc +ldsc_command: /ldsc/ldsc.py +make_annot_command: /ldsc/make_annot.py +munge_command: /ldsc/munge_sumstats.py diff --git a/src/cellink/tl/external/config/ldsc_singularity.yaml b/src/cellink/tl/external/config/ldsc_singularity.yaml new file mode 100644 index 0000000..58edd29 --- /dev/null +++ b/src/cellink/tl/external/config/ldsc_singularity.yaml @@ -0,0 +1,5 @@ +execution_mode: singularity +singularity_image: /project/genomics/ayshan/containers/ldsc.sif +ldsc_command: /ldsc/ldsc.py +make_annot_command: /ldsc/make_annot.py +munge_command: /ldsc/munge_sumstats.py diff --git a/test_new_functions.py b/test_new_functions.py new file mode 100644 index 0000000..5cd3a9f --- /dev/null +++ b/test_new_functions.py @@ -0,0 +1,844 @@ +""" +Tests for all new cellink.tl.external functions: + - make_continuous_annot_from_bimfile + - make_continuous_annot_from_donor_data (skipped: needs DonorData env) + - scores_to_gmt + - scores_to_covar + - genesets_dir_to_entrez_gmt + - run_magma_annotate + - run_magma_gene_analysis + - run_magma_gsa + - run_magma_gpa (joint and univariate) + +Run from the repo root: + conda run -n singlecell python cellink/test_new_functions.py +""" + +import gzip +import os +import sys +import tempfile +import traceback +from pathlib import Path + +import numpy as np +import pandas as pd + +# Load module files directly via importlib to bypass cellink/__init__.py, +# which requires mudata, zarr, and other heavy deps not installed here. +import importlib.util + +_SRC = Path(__file__).parent / "src" + +def _load(name, rel_path): + spec = importlib.util.spec_from_file_location(name, _SRC / rel_path) + mod = importlib.util.module_from_spec(spec) + sys.modules[name] = mod + spec.loader.exec_module(mod) + return mod + +_ldsc2magma = _load("cellink_ldsc2magma", "cellink/tl/external/_ldsc2magma.py") +scores_to_gmt = _ldsc2magma.scores_to_gmt +scores_to_covar = _ldsc2magma.scores_to_covar +genesets_dir_to_entrez_gmt = _ldsc2magma.genesets_dir_to_entrez_gmt +run_magma_annotate = _ldsc2magma.run_magma_annotate +run_magma_gene_analysis = _ldsc2magma.run_magma_gene_analysis +run_magma_gsa = _ldsc2magma.run_magma_gsa +run_magma_gpa = _ldsc2magma.run_magma_gpa + +# _ldsc.py imports DonorData, cellink.io.to_plink, cellink.resources._utils +# Stub them all out so the module loads without the full cellink install. +import types + +def _stub_module(name, **attrs): + m = types.ModuleType(name) + for k, v in attrs.items(): + setattr(m, k, v) + sys.modules[name] = m + return m + +class _DonorData: pass + +_stub_module("cellink._core", DonorData=_DonorData) +_stub_module("cellink", + _core=sys.modules["cellink._core"]) +_stub_module("cellink.io", to_plink=None) +_stub_module("cellink.resources", _utils=None) +_stub_module("cellink.resources._utils", get_data_home=lambda *a, **kw: "/tmp") + +_ldsc = _load("cellink_ldsc", "cellink/tl/external/_ldsc.py") +make_continuous_annot_from_bimfile = _ldsc.make_continuous_annot_from_bimfile + +# --------------------------------------------------------------------------- +# Known real paths +# --------------------------------------------------------------------------- +GENE_LOC = "/project/genomics/ayshan/ldsc_analysis/data_2/annotation_sources/Genecode/gencode_v39_grch38_ensg.gene.loc" +BIM22 = "/project/genomics/ayshan/ldsc_analysis/data_2/pre_annotations/annot_generation/1000G_EUR_Phase3_plink/1000G.EUR.QC.22.bim" +GENES_RAW = "/project/genomics/ayshan/ldsc_analysis/MODEL/INPUT/magma_zscore/scz_result.genes.raw" +MAGMA_BIN = "/project/genomics/ayshan/ldsc_analysis/psychmagma/magma" +SCORES_CSV = "/project/genomics/ayshan/ldsc_analysis/data_2/single_cell_pre_annotations/1k1k/seismic/scores.csv" + +PASS = [] +FAIL = [] + + +def ok(name): + print(f" [PASS] {name}") + PASS.append(name) + + +def fail(name, exc): + print(f" [FAIL] {name}: {exc}") + FAIL.append(name) + + +# --------------------------------------------------------------------------- +# Helpers: build minimal synthetic data +# --------------------------------------------------------------------------- + +def make_synthetic_scores(n_genes=200, n_cts=5, seed=0): + """Return a genes × cell-types DataFrame with ENSG-style row IDs.""" + rng = np.random.default_rng(seed) + genes = [f"ENSG{i:011d}" for i in range(1, n_genes + 1)] + cts = [f"CT{i}" for i in range(1, n_cts + 1)] + data = rng.random((n_genes, n_cts)) + data[rng.integers(0, n_genes, 20), :] = np.nan # sprinkle some NaN + return pd.DataFrame(data, index=genes, columns=cts) + + +def make_synthetic_bim(chrom=22, n_snps=300, seed=1): + """Return a DataFrame in PLINK .bim format.""" + rng = np.random.default_rng(seed) + bps = np.sort(rng.integers(16_000_000, 51_000_000, n_snps)) + rows = { + "CHR": chrom, + "SNP": [f"rs{i}" for i in range(n_snps)], + "CM": np.zeros(n_snps), + "BP": bps, + "A1": "A", + "A2": "G", + } + return pd.DataFrame(rows)[["CHR", "SNP", "CM", "BP", "A1", "A2"]] + + +def make_synthetic_gene_coord(chrom=22, n_genes=50, seed=2): + """Return a headless gene coordinate DataFrame with ENSG IDs.""" + rng = np.random.default_rng(seed) + starts = np.sort(rng.integers(16_000_000, 50_000_000, n_genes)) + ends = starts + rng.integers(1_000, 100_000, n_genes) + genes = [f"ENSG{i:011d}" for i in range(1, n_genes + 1)] + return pd.DataFrame({ + "gene": genes, + "chr": str(chrom), + "start": starts, + "end": ends, + }) + + +# --------------------------------------------------------------------------- +# Test 1: make_continuous_annot_from_bimfile (synthetic data) +# --------------------------------------------------------------------------- + +def test_continuous_annot_synthetic(): + + scores_df = make_synthetic_scores(n_genes=50) + bim_df = make_synthetic_bim() + coord_df = make_synthetic_gene_coord() + + with tempfile.TemporaryDirectory() as tmp: + bim_path = os.path.join(tmp, "test.22.bim") + coord_path = os.path.join(tmp, "gene_coord.txt") + annot_path = os.path.join(tmp, "CT1.22.annot.gz") + + bim_df.to_csv(bim_path, sep="\t", index=False, header=False) + coord_df.to_csv(coord_path, sep="\t", index=False, header=False) + + # use first cell type column as a Series + scores = scores_df["CT1"].dropna() + + result = make_continuous_annot_from_bimfile( + bimfile=bim_path, + scores=scores, + annot_file=annot_path, + gene_coord_file=coord_path, + windowsize=100_000, + score_agg="max", + ) + + assert os.path.exists(annot_path), "annot file not created" + out = pd.read_csv(annot_path, sep="\t") + assert list(out.columns) == ["CHR", "BP", "SNP", "CM", "ANNOT"], f"bad columns: {list(out.columns)}" + assert len(out) == len(bim_df), "row count mismatch" + assert (out["ANNOT"] >= 0).all(), "negative ANNOT values" + assert result["n_nonzero_snps"] >= 0 + assert result["n_genes_matched"] >= 0 + + ok("make_continuous_annot_from_bimfile (synthetic)") + + +def test_continuous_annot_score_agg_variants(): + """max / sum / mean should all produce valid output with same shape.""" + + scores_df = make_synthetic_scores(n_genes=50) + bim_df = make_synthetic_bim() + coord_df = make_synthetic_gene_coord() + + with tempfile.TemporaryDirectory() as tmp: + bim_path = os.path.join(tmp, "test.22.bim") + coord_path = os.path.join(tmp, "gc.txt") + bim_df.to_csv(bim_path, sep="\t", index=False, header=False) + coord_df.to_csv(coord_path, sep="\t", index=False, header=False) + scores = scores_df["CT1"].dropna() + + results = {} + for agg in ("max", "sum", "mean"): + out_path = os.path.join(tmp, f"CT1_{agg}.annot.gz") + res = make_continuous_annot_from_bimfile( + bimfile=bim_path, scores=scores, annot_file=out_path, + gene_coord_file=coord_path, score_agg=agg, + ) + df = pd.read_csv(out_path, sep="\t") + results[agg] = df["ANNOT"].values + + assert len(results["max"]) == len(bim_df) + # sum >= max for genes with overlapping windows + assert (results["sum"] >= results["max"] - 1e-12).all(), "sum should be >= max" + + ok("make_continuous_annot_from_bimfile score_agg variants") + + +def test_continuous_annot_real_bim(): + """Run against the real chr22 1000G bimfile and gene.loc.""" + + if not os.path.exists(BIM22) or not os.path.exists(GENE_LOC): + print(" [SKIP] make_continuous_annot real bim: files missing") + return + + # Load a handful of real ENSG scores from the seismic file + scores_full = pd.read_csv(SCORES_CSV, index_col=0) + col = scores_full.columns[0] + scores = scores_full[col].dropna().head(2000) + + with tempfile.TemporaryDirectory() as tmp: + annot_path = os.path.join(tmp, "real_ct.22.annot.gz") + result = make_continuous_annot_from_bimfile( + bimfile=BIM22, + scores=scores, + annot_file=annot_path, + gene_coord_file=GENE_LOC, + windowsize=100_000, + ) + out = pd.read_csv(annot_path, sep="\t") + assert list(out.columns) == ["CHR", "BP", "SNP", "CM", "ANNOT"] + assert (out["ANNOT"] >= 0).all() + print(f" real bim chr22: {result['n_nonzero_snps']:,} non-zero SNPs / {len(out):,} total, " + f"{result['n_genes_matched']:,} genes matched") + + ok("make_continuous_annot_from_bimfile (real chr22 bim)") + + +# --------------------------------------------------------------------------- +# Test 2: scores_to_gmt +# --------------------------------------------------------------------------- + +def test_scores_to_gmt_basic(): + + scores = make_synthetic_scores(n_genes=200, n_cts=5) + + with tempfile.TemporaryDirectory() as tmp: + out = os.path.join(tmp, "test.gmt") + result = scores_to_gmt(scores, out, top_frac=0.10) + + assert result.exists() + lines = result.read_text().strip().splitlines() + assert len(lines) == 5, f"expected 5 sets, got {len(lines)}" + + for line in lines: + parts = line.split("\t") + assert parts[1] == "NA", "second field should be NA" + assert len(parts) >= 3, "must have at least one gene" + # all gene IDs should look like ENSG + for g in parts[2:]: + assert g.startswith("ENSG"), f"non-ENSG gene: {g}" + + ok("scores_to_gmt basic") + + +def test_scores_to_gmt_top_frac(): + + scores = make_synthetic_scores(n_genes=100, n_cts=3) + + with tempfile.TemporaryDirectory() as tmp: + out10 = os.path.join(tmp, "top10.gmt") + out20 = os.path.join(tmp, "top20.gmt") + scores_to_gmt(scores, out10, top_frac=0.10) + scores_to_gmt(scores, out20, top_frac=0.20) + + lines10 = Path(out10).read_text().strip().splitlines() + lines20 = Path(out20).read_text().strip().splitlines() + genes10 = len(lines10[0].split("\t")) - 2 + genes20 = len(lines20[0].split("\t")) - 2 + assert genes20 == 2 * genes10, f"expected 20 genes at 20%, got {genes20}" + + ok("scores_to_gmt top_frac scaling") + + +def test_scores_to_gmt_ascending(): + + scores = make_synthetic_scores(n_genes=100, n_cts=2) + + with tempfile.TemporaryDirectory() as tmp: + out_top = os.path.join(tmp, "top.gmt") + out_bot = os.path.join(tmp, "bot.gmt") + scores_to_gmt(scores, out_top, top_frac=0.10, ascending=False) + scores_to_gmt(scores, out_bot, top_frac=0.10, ascending=True) + + top_genes = set(Path(out_top).read_text().strip().splitlines()[0].split("\t")[2:]) + bot_genes = set(Path(out_bot).read_text().strip().splitlines()[0].split("\t")[2:]) + assert top_genes.isdisjoint(bot_genes), "top and bottom sets should not overlap" + + ok("scores_to_gmt ascending mode") + + +def test_scores_to_gmt_with_prefix(): + + scores = make_synthetic_scores(n_genes=50, n_cts=2) + + with tempfile.TemporaryDirectory() as tmp: + out = os.path.join(tmp, "prefixed.gmt") + scores_to_gmt(scores, out, top_frac=0.10, set_name_prefix="myprefix") + lines = Path(out).read_text().strip().splitlines() + for line in lines: + assert line.startswith("myprefix_"), f"prefix not applied: {line[:30]}" + + ok("scores_to_gmt set_name_prefix") + + +def test_scores_to_gmt_gene_map(): + """symbol → ENSG mapping: only mapped ENSG rows should appear in output.""" + + # Use gene symbols as index + genes = [f"GENE{i}" for i in range(50)] + ensg_ids= [f"ENSG{i:011d}" for i in range(1, 51)] + scores = pd.DataFrame( + np.random.default_rng(0).random((50, 2)), + index=genes, columns=["CT1", "CT2"] + ) + gene_map = pd.Series(ensg_ids, index=genes) + + with tempfile.TemporaryDirectory() as tmp: + out = os.path.join(tmp, "mapped.gmt") + scores_to_gmt(scores, out, top_frac=0.10, gene_map=gene_map) + lines = Path(out).read_text().strip().splitlines() + assert len(lines) == 2 + for line in lines: + for g in line.split("\t")[2:]: + assert g.startswith("ENSG"), f"non-ENSG after mapping: {g}" + + ok("scores_to_gmt gene_map translation") + + +# --------------------------------------------------------------------------- +# Test 3: scores_to_covar +# --------------------------------------------------------------------------- + +def test_scores_to_covar_basic(): + + scores = make_synthetic_scores(n_genes=100, n_cts=4) + + with tempfile.TemporaryDirectory() as tmp: + out = os.path.join(tmp, "test.covar") + result = scores_to_covar(scores, out) + + assert result.exists() + df = pd.read_csv(out, sep="\t", index_col=0) + assert df.index.name == "GENE" + assert df.shape[1] == 4 + assert (df.index.str.startswith("ENSG")).all(), "non-ENSG rows in covar" + + ok("scores_to_covar basic") + + +def test_scores_to_covar_negate(): + + scores = make_synthetic_scores(n_genes=50, n_cts=2) + + with tempfile.TemporaryDirectory() as tmp: + out_pos = os.path.join(tmp, "pos.covar") + out_neg = os.path.join(tmp, "neg.covar") + scores_to_covar(scores, out_pos, negate=False) + scores_to_covar(scores, out_neg, negate=True) + + df_pos = pd.read_csv(out_pos, sep="\t", index_col=0).astype(float) + df_neg = pd.read_csv(out_neg, sep="\t", index_col=0).astype(float) + assert np.allclose(df_pos.values, -df_neg.values, equal_nan=True), \ + "negated covar should be sign-flipped" + + ok("scores_to_covar negate") + + +def test_scores_to_covar_dedup(): + """Duplicate ENSG IDs should be deduplicated (keep highest mean |score|).""" + + genes = ["ENSG00000000001"] * 3 + [f"ENSG{i:011d}" for i in range(2, 20)] + scores = pd.DataFrame( + np.random.default_rng(7).random((len(genes), 2)), + index=genes, columns=["CT1", "CT2"], + ) + + with tempfile.TemporaryDirectory() as tmp: + out = os.path.join(tmp, "dedup.covar") + scores_to_covar(scores, out) + df = pd.read_csv(out, sep="\t", index_col=0) + assert not df.index.duplicated().any(), "duplicates not removed" + assert "ENSG00000000001" in df.index + + ok("scores_to_covar dedup") + + +def test_scores_to_covar_gene_map(): + """Symbol-indexed scores should be remapped to ENSG.""" + + genes = [f"GENE{i}" for i in range(30)] + ensg_ids = [f"ENSG{i:011d}" for i in range(1, 31)] + scores = pd.DataFrame( + np.random.default_rng(3).random((30, 2)), + index=genes, columns=["CT1", "CT2"], + ) + gene_map = pd.Series(ensg_ids, index=genes) + + with tempfile.TemporaryDirectory() as tmp: + out = os.path.join(tmp, "mapped.covar") + scores_to_covar(scores, out, gene_map=gene_map) + df = pd.read_csv(out, sep="\t", index_col=0) + assert (df.index.str.startswith("ENSG")).all() + + ok("scores_to_covar gene_map translation") + + +# --------------------------------------------------------------------------- +# Test 4: genesets_dir_to_entrez_gmt +# --------------------------------------------------------------------------- + +def make_geneset_dir(tmp, n_sets=3, n_genes=20): + gdir = os.path.join(tmp, "genesets") + os.makedirs(gdir, exist_ok=True) + gene_pool = [f"ENSG{i:011d}" for i in range(1, 200)] + rng = np.random.default_rng(9) + for i in range(n_sets): + fname = os.path.join(gdir, f"CellType{i}.GeneSet") + chosen = rng.choice(gene_pool, n_genes, replace=False) + Path(fname).write_text("\n".join(chosen) + "\n") + return gdir + + +def test_genesets_dir_to_entrez_gmt_basic(): + + with tempfile.TemporaryDirectory() as tmp: + gdir = make_geneset_dir(tmp, n_sets=3, n_genes=20) + out = os.path.join(tmp, "output.gmt") + result = genesets_dir_to_entrez_gmt(geneset_dir=gdir, out_gmt=out) + + assert result.exists() + lines = result.read_text().strip().splitlines() + assert len(lines) == 3, f"expected 3 sets, got {len(lines)}" + for line in lines: + parts = line.split("\t") + assert parts[1] == "S-LDSC derived gene set" + assert len(parts) >= 4, "too few genes" + + ok("genesets_dir_to_entrez_gmt basic") + + +def test_genesets_dir_to_entrez_gmt_default_output(): + """When out_gmt is None, should write to sibling magma_genesets/genesets.gmt.""" + + with tempfile.TemporaryDirectory() as tmp: + gdir = make_geneset_dir(tmp, n_sets=2, n_genes=10) + result = genesets_dir_to_entrez_gmt(geneset_dir=gdir, out_gmt=None) + + # genesets_dir_to_entrez_gmt calls .resolve() on geneset_dir, so match that + expected = Path(gdir).resolve().parent / "magma_genesets" / "genesets.gmt" + assert result == expected, f"default path wrong: {result}" + assert result.exists() + + ok("genesets_dir_to_entrez_gmt default output path") + + +def test_genesets_dir_to_entrez_gmt_exclude_control(): + + with tempfile.TemporaryDirectory() as tmp: + gdir = make_geneset_dir(tmp, n_sets=3, n_genes=10) + # Add a Control.GeneSet + Path(os.path.join(gdir, "Control.GeneSet")).write_text( + "\n".join([f"ENSG{i:011d}" for i in range(300, 320)]) + "\n" + ) + out_excl = os.path.join(tmp, "no_ctrl.gmt") + out_incl = os.path.join(tmp, "with_ctrl.gmt") + genesets_dir_to_entrez_gmt(geneset_dir=gdir, out_gmt=out_excl, include_control=False) + genesets_dir_to_entrez_gmt(geneset_dir=gdir, out_gmt=out_incl, include_control=True) + + n_excl = len(Path(out_excl).read_text().strip().splitlines()) + n_incl = len(Path(out_incl).read_text().strip().splitlines()) + assert n_incl == n_excl + 1, f"control set not added: {n_excl} vs {n_incl}" + + ok("genesets_dir_to_entrez_gmt include_control") + + +def test_genesets_dir_to_entrez_gmt_real_genesets(): + """Run against GTEx brain GeneSet files that exist on disk.""" + + gdir = "/project/genomics/ayshan/ldsc_analysis/data_2/annotations/GTEx_brain_1000Gv3_ldscores" + if not os.path.isdir(gdir): + print(" [SKIP] genesets_dir_to_entrez_gmt real: dir missing") + return + + with tempfile.TemporaryDirectory() as tmp: + out = os.path.join(tmp, "gtex.gmt") + result = genesets_dir_to_entrez_gmt(geneset_dir=gdir, out_gmt=out) + lines = result.read_text().strip().splitlines() + print(f" GTEx brain: {len(lines)} gene sets written") + assert len(lines) > 0 + + ok("genesets_dir_to_entrez_gmt real GTEx GeneSet files") + + +# --------------------------------------------------------------------------- +# Test 5: run_magma_annotate (run=False — command inspection) +# --------------------------------------------------------------------------- + +def test_run_magma_annotate_dry_run(): + + result = run_magma_annotate( + snp_loc="gwas_snps.txt", + gene_loc="NCBI38.gene.loc", + out_prefix="results/test", + magma_bin=MAGMA_BIN, + window_kb=35, + run=False, + ) + cmd = result["command"] + assert cmd[0] == MAGMA_BIN + assert "--annotate" in cmd + assert "window=35" in cmd + assert "--snp-loc" in cmd + assert "--gene-loc" in cmd + assert "--out" in cmd + + ok("run_magma_annotate dry-run command") + + +def test_run_magma_annotate_no_window(): + + result = run_magma_annotate( + snp_loc="snps.txt", gene_loc="genes.loc", out_prefix="out", + magma_bin=MAGMA_BIN, window_kb=0, run=False, + ) + assert "window=0" not in result["command"], "window=0 should not appear in command" + + ok("run_magma_annotate window_kb=0 omitted") + + +# --------------------------------------------------------------------------- +# Test 6: run_magma_gene_analysis (run=False) +# --------------------------------------------------------------------------- + +def test_run_magma_gene_analysis_dry_run(): + + result = run_magma_gene_analysis( + bfile="g1000_eur/g1000_eur", + pval_file="scz.txt", + gene_annot="scz.genes.annot", + out_prefix="results/scz", + n_samples=67_390, + magma_bin=MAGMA_BIN, + run=False, + ) + cmd = result["command"] + assert "--bfile" in cmd + assert "--pval" in cmd + assert "--gene-annot" in cmd + assert any("N=67390" in c for c in cmd), f"N= not in cmd: {cmd}" + + ok("run_magma_gene_analysis dry-run command") + + +def test_run_magma_gene_analysis_no_n(): + """When n_samples is None, N= should not appear in the pval argument.""" + + result = run_magma_gene_analysis( + bfile="g1000_eur", pval_file="scz.txt", gene_annot="scz.annot", + out_prefix="out", n_samples=None, magma_bin=MAGMA_BIN, run=False, + ) + cmd = result["command"] + assert not any("N=" in c for c in cmd), "N= should not appear when n_samples=None" + + ok("run_magma_gene_analysis n_samples=None") + + +# --------------------------------------------------------------------------- +# Test 7: run_magma_gsa (run=False + real execution if genes.raw available) +# --------------------------------------------------------------------------- + +def test_run_magma_gsa_dry_run(): + + result = run_magma_gsa( + gene_results="scz.genes.raw", + set_annot="genesets.gmt", + out_prefix="results/gsa", + magma_bin=MAGMA_BIN, + run=False, + ) + cmd = result["command"] + assert "--gene-results" in cmd + assert "--set-annot" in cmd + assert "--out" in cmd + + ok("run_magma_gsa dry-run command") + + +def test_run_magma_gsa_real(): + """Execute real GSA using existing genes.raw and a synthetic GMT.""" + + if not os.path.exists(GENES_RAW) or not os.path.exists(MAGMA_BIN): + print(" [SKIP] run_magma_gsa real: files missing") + return + + # Build a minimal GMT from genes actually in genes.raw + gene_ids = [] + with open(GENES_RAW) as f: + for line in f: + if line.startswith("#"): + continue + parts = line.split() + gene_ids.append(parts[0]) + if len(gene_ids) >= 500: + break + + rng = np.random.default_rng(42) + set1 = rng.choice(gene_ids, 100, replace=False).tolist() + set2 = rng.choice(gene_ids, 100, replace=False).tolist() + + with tempfile.TemporaryDirectory() as tmp: + gmt_path = os.path.join(tmp, "test.gmt") + with open(gmt_path, "w") as f: + f.write("TestSet1\tNA\t" + "\t".join(set1) + "\n") + f.write("TestSet2\tNA\t" + "\t".join(set2) + "\n") + + result = run_magma_gsa( + gene_results=GENES_RAW, + set_annot=gmt_path, + out_prefix=os.path.join(tmp, "gsa_test"), + magma_bin=MAGMA_BIN, + ) + assert os.path.exists(result["results_file"]), "GSA output missing" + df = pd.read_csv(result["results_file"], sep=r"\s+", comment="#") + assert "P" in df.columns, f"P column missing from GSA output: {df.columns.tolist()}" + assert len(df) >= 1 + print(f" GSA output: {len(df)} rows, P values: {df['P'].values}") + + ok("run_magma_gsa real execution") + + +# --------------------------------------------------------------------------- +# Test 8: run_magma_gpa joint (run=False + real execution) +# --------------------------------------------------------------------------- + +def test_run_magma_gpa_dry_run(): + + result = run_magma_gpa( + gene_results="scz.genes.raw", + gene_covar="scores.covar", + out_prefix="results/gpa", + magma_bin=MAGMA_BIN, + univariate=False, + run=False, + ) + cmd = result["command"] + assert "--gene-results" in cmd + assert "--gene-covar" in cmd + assert "--out" in cmd + + ok("run_magma_gpa dry-run command (joint)") + + +def test_run_magma_gpa_real_joint(): + """Joint GPA using the real seismic covar file + genes.raw.""" + + if not os.path.exists(GENES_RAW) or not os.path.exists(MAGMA_BIN) or not os.path.exists(SCORES_CSV): + print(" [SKIP] run_magma_gpa real joint: files missing") + return + + scores_full = pd.read_csv(SCORES_CSV, index_col=0) + # Use first 5 cell types to keep it fast + scores = scores_full.iloc[:, :5] + + with tempfile.TemporaryDirectory() as tmp: + covar_path = os.path.join(tmp, "test.covar") + scores_to_covar(scores, covar_path) + + result = run_magma_gpa( + gene_results=GENES_RAW, + gene_covar=covar_path, + out_prefix=os.path.join(tmp, "gpa_joint"), + magma_bin=MAGMA_BIN, + univariate=False, + ) + assert os.path.exists(result["results_file"]) + df = pd.read_csv(result["results_file"], sep=r"\s+", comment="#") + assert "P" in df.columns + print(f" GPA joint: {len(df)} cell types tested, top P={df['P'].min():.3e}") + + ok("run_magma_gpa real joint execution") + + +# --------------------------------------------------------------------------- +# Test 9: run_magma_gpa univariate +# --------------------------------------------------------------------------- + +def test_run_magma_gpa_real_univariate(): + """Univariate GPA: each cell type tested independently.""" + + if not os.path.exists(GENES_RAW) or not os.path.exists(MAGMA_BIN) or not os.path.exists(SCORES_CSV): + print(" [SKIP] run_magma_gpa real univariate: files missing") + return + + scores_full = pd.read_csv(SCORES_CSV, index_col=0) + scores = scores_full.iloc[:, :3] # 3 cell types + + with tempfile.TemporaryDirectory() as tmp: + covar_path = os.path.join(tmp, "test.covar") + scores_to_covar(scores, covar_path) + + result = run_magma_gpa( + gene_results=GENES_RAW, + gene_covar=covar_path, + out_prefix=os.path.join(tmp, "gpa_univ"), + magma_bin=MAGMA_BIN, + univariate=True, + ) + assert os.path.exists(result["results_file"]) + content = Path(result["results_file"]).read_text() + assert "UNIVARIATE" in content + # should have one line per cell type (+ header lines) + data_lines = [l for l in content.splitlines() if l and not l.startswith("#")] + print(f" GPA univariate: {len(data_lines)} cell types in output") + assert len(data_lines) == 3, f"expected 3 CT lines, got {len(data_lines)}" + + ok("run_magma_gpa real univariate execution") + + +# --------------------------------------------------------------------------- +# Test 10: end-to-end scores_to_gmt → run_magma_gsa +# --------------------------------------------------------------------------- + +def test_e2e_scores_to_gmt_then_gsa(): + + if not os.path.exists(GENES_RAW) or not os.path.exists(MAGMA_BIN) or not os.path.exists(SCORES_CSV): + print(" [SKIP] e2e scores_to_gmt→GSA: files missing") + return + + scores_full = pd.read_csv(SCORES_CSV, index_col=0) + scores = scores_full.iloc[:, :4] + + with tempfile.TemporaryDirectory() as tmp: + gmt_path = os.path.join(tmp, "from_scores.gmt") + scores_to_gmt(scores, gmt_path, top_frac=0.10) + + result = run_magma_gsa( + gene_results=GENES_RAW, + set_annot=gmt_path, + out_prefix=os.path.join(tmp, "e2e_gsa"), + magma_bin=MAGMA_BIN, + ) + assert os.path.exists(result["results_file"]) + df = pd.read_csv(result["results_file"], sep=r"\s+", comment="#") + assert "P" in df.columns + print(f" E2E GSA: {len(df)} sets, top P={df['P'].min():.3e}") + + ok("e2e scores_to_gmt → run_magma_gsa") + + +# --------------------------------------------------------------------------- +# Test 11: end-to-end scores_to_covar → run_magma_gpa +# --------------------------------------------------------------------------- + +def test_e2e_scores_to_covar_then_gpa(): + + if not os.path.exists(GENES_RAW) or not os.path.exists(MAGMA_BIN) or not os.path.exists(SCORES_CSV): + print(" [SKIP] e2e scores_to_covar→GPA: files missing") + return + + scores_full = pd.read_csv(SCORES_CSV, index_col=0) + scores = scores_full.iloc[:, :4] + + with tempfile.TemporaryDirectory() as tmp: + covar_path = os.path.join(tmp, "from_scores.covar") + scores_to_covar(scores, covar_path) + + result = run_magma_gpa( + gene_results=GENES_RAW, + gene_covar=covar_path, + out_prefix=os.path.join(tmp, "e2e_gpa"), + magma_bin=MAGMA_BIN, + ) + assert os.path.exists(result["results_file"]) + df = pd.read_csv(result["results_file"], sep=r"\s+", comment="#") + print(f" E2E GPA: {len(df)} cell types, top P={df['P'].min():.3e}") + + ok("e2e scores_to_covar → run_magma_gpa") + + +# --------------------------------------------------------------------------- +# Runner +# --------------------------------------------------------------------------- + +ALL_TESTS = [ + test_continuous_annot_synthetic, + test_continuous_annot_score_agg_variants, + test_continuous_annot_real_bim, + test_scores_to_gmt_basic, + test_scores_to_gmt_top_frac, + test_scores_to_gmt_ascending, + test_scores_to_gmt_with_prefix, + test_scores_to_gmt_gene_map, + test_scores_to_covar_basic, + test_scores_to_covar_negate, + test_scores_to_covar_dedup, + test_scores_to_covar_gene_map, + test_genesets_dir_to_entrez_gmt_basic, + test_genesets_dir_to_entrez_gmt_default_output, + test_genesets_dir_to_entrez_gmt_exclude_control, + test_genesets_dir_to_entrez_gmt_real_genesets, + test_run_magma_annotate_dry_run, + test_run_magma_annotate_no_window, + test_run_magma_gene_analysis_dry_run, + test_run_magma_gene_analysis_no_n, + test_run_magma_gsa_dry_run, + test_run_magma_gsa_real, + test_run_magma_gpa_dry_run, + test_run_magma_gpa_real_joint, + test_run_magma_gpa_real_univariate, + test_e2e_scores_to_gmt_then_gsa, + test_e2e_scores_to_covar_then_gpa, +] + +if __name__ == "__main__": + print(f"\nRunning {len(ALL_TESTS)} tests\n{'='*60}") + for test_fn in ALL_TESTS: + print(f"\n→ {test_fn.__name__}") + try: + test_fn() + except Exception as exc: + fail(test_fn.__name__, exc) + traceback.print_exc() + + print(f"\n{'='*60}") + print(f"Results: {len(PASS)} passed, {len(FAIL)} failed") + if FAIL: + print("Failed tests:") + for name in FAIL: + print(f" - {name}") + sys.exit(1) + else: + print("All tests passed.")