From dd911374074c374284a1b1bf2e0238b105741c37 Mon Sep 17 00:00:00 2001 From: Lucas Arnoldt Date: Tue, 27 Jan 2026 17:25:48 +0100 Subject: [PATCH 1/6] LDSC --- docs/tutorials/cell_level_ldsc_analysis.ipynb | 8149 +++++++++++++++++ src/cellink/io/_sgkit.py | 39 +- src/cellink/resources/__init__.py | 2 +- src/cellink/resources/_datasets.py | 2 +- src/cellink/resources/_ld.py | 85 +- src/cellink/resources/config/1000genomes.yaml | 8 + src/cellink/tl/external/__init__.py | 15 + src/cellink/tl/external/_ldsc.py | 1796 ++++ src/cellink/tl/external/_ldsc2magma.py | 274 + src/cellink/tl/external/_sldsc_utils.py | 705 ++ src/cellink/tl/external/config/ldsc.yaml | 4 + .../tl/external/config/ldsc_docker.yaml | 0 .../tl/external/config/ldsc_singularity.yaml | 5 + 13 files changed, 11066 insertions(+), 18 deletions(-) create mode 100644 docs/tutorials/cell_level_ldsc_analysis.ipynb create mode 100644 src/cellink/tl/external/_ldsc.py create mode 100644 src/cellink/tl/external/_ldsc2magma.py create mode 100644 src/cellink/tl/external/_sldsc_utils.py create mode 100644 src/cellink/tl/external/config/ldsc.yaml create mode 100644 src/cellink/tl/external/config/ldsc_docker.yaml create mode 100644 src/cellink/tl/external/config/ldsc_singularity.yaml diff --git a/docs/tutorials/cell_level_ldsc_analysis.ipynb b/docs/tutorials/cell_level_ldsc_analysis.ipynb new file mode 100644 index 0000000..c05ac36 --- /dev/null +++ b/docs/tutorials/cell_level_ldsc_analysis.ipynb @@ -0,0 +1,8149 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tutorial: Cell-Level LDSC analysis\n", + "\n", + "This tutorial demonstrates how to perform cell-type-specific LD Score regression (LDSC) analysis through the `cellink` package. The `cellink` package provides a unified interface to LDSC and its preparation scripts, making it easier to perform comprehensive genetic analyses that identify which cell types are most relevant to complex traits and diseases.\n", + "\n", + "This notebook assumes familiarity with single-cell data processing and basic statistical genetics concepts. The `cellink` package provides convenient wrapper functions that handle data preparation and formatting for LDSC. For LDSC installation, please follow instructions [here](https://github.com/bulik/ldsc). We recommend utilizing LDSC via a Docker image like [this one](https://hub.docker.com/r/zijingliu/ldsc). For usage on HPCs, please consider using singularity or enroot." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environment Setup\n", + "\n", + "We begin by importing necessary libraries and defining key parameters for our analysis. The `cellink` package provides wrapper functions for LDSC that automatically handle preprocessing, data formatting and preparation." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/sgkit/__init__.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", + " from pkg_resources import DistributionNotFound, get_distribution # type: ignore[import]\n", + "/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "import os\n", + "\n", + "from cellink._core import DAnn, GAnn\n", + "from cellink.resources import get_onek1k\n", + "from cellink.tl.external import (\n", + " preprocess_for_sldsc,\n", + " generate_sldsc_genesets,\n", + " generate_gene_coord_file,\n", + " configure_ldsc_runner,\n", + " make_annot_from_donor_data,\n", + " munge_sumstats,\n", + " estimate_ld_scores_from_donor_data,\n", + " estimate_heritability,\n", + " estimate_genetic_correlation,\n", + " compute_ld_scores_with_annotations_from_donor_data,\n", + " estimate_celltype_specific_heritability,\n", + " genesets_dir_to_entrez_gmt,\n", + ")\n", + "from cellink.resources import get_1000genomes_ld_scores, get_1000genomes_ld_weights\n", + "from cellink.resources import get_gwas_catalog_study_summary_stats\n", + "\n", + "# Analysis parameters\n", + "chrom = 22\n", + "cell_type = \"CD8 Naive\"\n", + "celltype_key = \"predicted.celltype.l2\"\n", + "original_donor_col = \"donor_id\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/project/genomics/ayshan\n" + ] + } + ], + "source": [ + "!pwd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load and Prepare Data\n", + "\n", + "We load the OneK1K dataset, which contains both genotype and single-cell expression data. We also add gene annotations from Ensembl for our analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pybiomart in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (0.2.0)\n", + "Requirement already satisfied: future in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pybiomart) (1.0.0)\n", + "Requirement already satisfied: pandas in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pybiomart) (2.3.3)\n", + "Requirement already satisfied: requests in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pybiomart) (2.32.5)\n", + "Requirement already satisfied: requests-cache in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pybiomart) (1.2.1)\n", + "Requirement already satisfied: numpy>=1.23.2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (1.26.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from pandas->pybiomart) (2025.2)\n", + "Requirement already satisfied: six>=1.5 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas->pybiomart) (1.17.0)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (3.4.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests->pybiomart) (2025.11.12)\n", + "Requirement already satisfied: attrs>=21.2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests-cache->pybiomart) (25.4.0)\n", + "Requirement already satisfied: cattrs>=22.2 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests-cache->pybiomart) (25.3.0)\n", + "Requirement already satisfied: platformdirs>=2.5 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests-cache->pybiomart) (4.5.0)\n", + "Requirement already satisfied: url-normalize>=1.4 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from requests-cache->pybiomart) (2.2.1)\n", + "Requirement already satisfied: typing-extensions>=4.14.0 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from cattrs>=22.2->requests-cache->pybiomart) (4.15.0)\n" + ] + } + ], + "source": [ + "!pip install pybiomart" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/onek1k_cellxgene.h5ad already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/OneK1K.noGP.vcf.gz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/OneK1K.noGP.vcf.gz.csi already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/project/genomics/ayshan/1k1k_dataset/onek1k/gene_counts_Ensembl_105_phenotype_metadata.tsv.gz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/pandas/core/internals/blocks.py:2661: RuntimeWarning: invalid value encountered in cast\n", + " return self.values.astype(_dtype_obj)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset shape: (981, 10595884, 1248980, 36469)\n" + ] + } + ], + "source": [ + "# Load the dataset\n", + "dd = get_onek1k(config_path=\"cellink/src/cellink/resources/config/onek1k.yaml\", data_home=\"/project/genomics/ayshan/1k1k_dataset\", verify_checksum=False)\n", + "print(f\"Dataset shape: {dd.shape}\")\n", + "\n", + "\n", + "# Add gene annotations from Ensembl\n", + "def _get_ensembl_gene_id_start_end_chr():\n", + " from pybiomart import Server\n", + "\n", + " server = Server(host=\"http://www.ensembl.org\")\n", + " dataset = server.marts[\"ENSEMBL_MART_ENSEMBL\"].datasets[\"hsapiens_gene_ensembl\"]\n", + " ensembl_gene_id_start_end_chr = dataset.query(\n", + " attributes=[\"ensembl_gene_id\", \"start_position\", \"end_position\", \"chromosome_name\"]\n", + " )\n", + " ensembl_gene_id_start_end_chr = ensembl_gene_id_start_end_chr.set_index(\"Gene stable ID\")\n", + " ensembl_gene_id_start_end_chr = ensembl_gene_id_start_end_chr.rename(\n", + " columns={\n", + " \"Gene start (bp)\": GAnn.start,\n", + " \"Gene end (bp)\": GAnn.end,\n", + " \"Chromosome/scaffold name\": GAnn.chrom,\n", + " }\n", + " )\n", + " return ensembl_gene_id_start_end_chr\n", + "\n", + "\n", + "ensembl_gene_id_start_end_chr = _get_ensembl_gene_id_start_end_chr()\n", + "dd.C.var = dd.C.var.join(ensembl_gene_id_start_end_chr)\n", + "\n", + "# Set up donor information\n", + "dd.C.obs[DAnn.donor] = dd.C.obs[original_donor_col]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "dd.G.obs[\"donor_id\"] = dd.G.obs.index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cell-Type-Specific LDSC analysis\n", + "\n", + "Cell-type-specific LDSC analysis helps identify which cell types are most relevant to complex traits by testing whether genetic variants associated with a trait are enriched in genes specifically expressed in certain cell types. This analysis follows the method described in [Duncan et al. 2025](https://www.nature.com/articles/s41593-024-01834-w)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 1: Preprocessing and Gene Set Generation\n", + "First, we preprocess the single-cell data to compute cell-type-specific gene expression and identify genes that are specifically expressed in each cell type." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._sldsc_utils:Querying Ensembl BioMart (GRCh38)...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetching gene annotations from GRCh38...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetched annotations for 86371 genes from GRCh38\n", + "INFO:cellink.tl.external._sldsc_utils:Removing version suffixes from Gene IDs\n", + "INFO:cellink.tl.external._sldsc_utils:Dropping conflicting columns from adata.var before merge: ['chrom', 'start', 'end']\n", + "INFO:cellink.tl.external._sldsc_utils:Annotated 35522 / 36469 genes.\n", + "INFO:cellink.tl.external._sldsc_utils:Using annotation columns: gene=gene, biotype=gene_biotype, chr=chrom, start=start, end=end\n", + "INFO:cellink.tl.external._sldsc_utils:Applying gene filters\n", + "INFO:cellink.tl.external._sldsc_utils:Protein-coding genes: 19273\n", + "INFO:cellink.tl.external._sldsc_utils:Expressed genes: 31285\n", + "INFO:cellink.tl.external._sldsc_utils:Non-MHC genes: 36469\n", + "INFO:cellink.tl.external._sldsc_utils:Keeping 18068 / 36469 genes after filtering\n", + "INFO:cellink.tl.external._sldsc_utils:n_cells = 1248980, n_genes = 18068, n_clusters = 31\n", + "INFO:cellink.tl.external._sldsc_utils:Applying log1p transformation\n", + "Aggregating clusters: 100%|██████████| 31/31 [02:27<00:00, 4.75s/it]\n", + "INFO:cellink.tl.external._sldsc_utils:Log1p applied.\n", + "INFO:cellink.tl.external._sldsc_utils:Computing mean expression for predicted.celltype.l2\n", + "INFO:cellink.tl.external._sldsc_utils:Computing specificity scores\n", + "INFO:cellink.tl.external._sldsc_utils:Final data shape: (1248980, 18068)\n", + "INFO:cellink.tl.external._sldsc_utils:Mean expression shape: (18068, 31)\n", + "INFO:cellink.tl.external._sldsc_utils:Specificity shape: (18068, 31)\n" + ] + } + ], + "source": [ + "dd.C.var[\"gene\"] = dd.C.var_names\n", + "adata = dd.C.copy()\n", + "adata_filtered, mean_expr, specificity = preprocess_for_sldsc(\n", + " adata,\n", + " celltype_col=celltype_key,\n", + " gene_identifier_mode=\"ensembl\",\n", + " gene_col=\"gene\",\n", + " genome_build=\"GRCh38\",\n", + " inplace=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we generate gene sets for each cell type containing the top 10% most specifically expressed genes. These gene sets will be used to create genomic annotations." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._sldsc_utils:Writing gene sets to ldsc_genesets\n", + "INFO:cellink.tl.external._sldsc_utils:specificity_df index looks like Ensembl IDs; using them directly.\n", + "INFO:cellink.tl.external._sldsc_utils:Selecting top 1807 genes (10.0%) per cell type\n", + "INFO:cellink.tl.external._sldsc_utils:Wrote control gene set with 18068 genes\n", + "INFO:cellink.tl.external._sldsc_utils:Generated 31 cell-type-specific gene sets\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cell_typen_genesoutput_path
0ASDC1807ldsc_genesets/ASDC.GeneSet
1B intermediate1807ldsc_genesets/B_intermediate.GeneSet
2B memory1807ldsc_genesets/B_memory.GeneSet
3B naive1807ldsc_genesets/B_naive.GeneSet
4CD14 Mono1807ldsc_genesets/CD14_Mono.GeneSet
5CD16 Mono1807ldsc_genesets/CD16_Mono.GeneSet
6CD4 CTL1807ldsc_genesets/CD4_CTL.GeneSet
7CD4 Naive1807ldsc_genesets/CD4_Naive.GeneSet
8CD4 Proliferating1807ldsc_genesets/CD4_Proliferating.GeneSet
9CD4 TCM1807ldsc_genesets/CD4_TCM.GeneSet
10CD4 TEM1807ldsc_genesets/CD4_TEM.GeneSet
11CD8 Naive1807ldsc_genesets/CD8_Naive.GeneSet
12CD8 Proliferating1807ldsc_genesets/CD8_Proliferating.GeneSet
13CD8 TCM1807ldsc_genesets/CD8_TCM.GeneSet
14CD8 TEM1807ldsc_genesets/CD8_TEM.GeneSet
15Doublet1807ldsc_genesets/Doublet.GeneSet
16Eryth1807ldsc_genesets/Eryth.GeneSet
17HSPC1807ldsc_genesets/HSPC.GeneSet
18ILC1807ldsc_genesets/ILC.GeneSet
19MAIT1807ldsc_genesets/MAIT.GeneSet
20NK1807ldsc_genesets/NK.GeneSet
21NK Proliferating1807ldsc_genesets/NK_Proliferating.GeneSet
22NK_CD56bright1807ldsc_genesets/NK_CD56bright.GeneSet
23Plasmablast1807ldsc_genesets/Plasmablast.GeneSet
24Platelet1807ldsc_genesets/Platelet.GeneSet
25Treg1807ldsc_genesets/Treg.GeneSet
26cDC11807ldsc_genesets/cDC1.GeneSet
27cDC21807ldsc_genesets/cDC2.GeneSet
28dnT1807ldsc_genesets/dnT.GeneSet
29gdT1807ldsc_genesets/gdT.GeneSet
30pDC1807ldsc_genesets/pDC.GeneSet
\n", + "
" + ], + "text/plain": [ + " cell_type n_genes output_path\n", + "0 ASDC 1807 ldsc_genesets/ASDC.GeneSet\n", + "1 B intermediate 1807 ldsc_genesets/B_intermediate.GeneSet\n", + "2 B memory 1807 ldsc_genesets/B_memory.GeneSet\n", + "3 B naive 1807 ldsc_genesets/B_naive.GeneSet\n", + "4 CD14 Mono 1807 ldsc_genesets/CD14_Mono.GeneSet\n", + "5 CD16 Mono 1807 ldsc_genesets/CD16_Mono.GeneSet\n", + "6 CD4 CTL 1807 ldsc_genesets/CD4_CTL.GeneSet\n", + "7 CD4 Naive 1807 ldsc_genesets/CD4_Naive.GeneSet\n", + "8 CD4 Proliferating 1807 ldsc_genesets/CD4_Proliferating.GeneSet\n", + "9 CD4 TCM 1807 ldsc_genesets/CD4_TCM.GeneSet\n", + "10 CD4 TEM 1807 ldsc_genesets/CD4_TEM.GeneSet\n", + "11 CD8 Naive 1807 ldsc_genesets/CD8_Naive.GeneSet\n", + "12 CD8 Proliferating 1807 ldsc_genesets/CD8_Proliferating.GeneSet\n", + "13 CD8 TCM 1807 ldsc_genesets/CD8_TCM.GeneSet\n", + "14 CD8 TEM 1807 ldsc_genesets/CD8_TEM.GeneSet\n", + "15 Doublet 1807 ldsc_genesets/Doublet.GeneSet\n", + "16 Eryth 1807 ldsc_genesets/Eryth.GeneSet\n", + "17 HSPC 1807 ldsc_genesets/HSPC.GeneSet\n", + "18 ILC 1807 ldsc_genesets/ILC.GeneSet\n", + "19 MAIT 1807 ldsc_genesets/MAIT.GeneSet\n", + "20 NK 1807 ldsc_genesets/NK.GeneSet\n", + "21 NK Proliferating 1807 ldsc_genesets/NK_Proliferating.GeneSet\n", + "22 NK_CD56bright 1807 ldsc_genesets/NK_CD56bright.GeneSet\n", + "23 Plasmablast 1807 ldsc_genesets/Plasmablast.GeneSet\n", + "24 Platelet 1807 ldsc_genesets/Platelet.GeneSet\n", + "25 Treg 1807 ldsc_genesets/Treg.GeneSet\n", + "26 cDC1 1807 ldsc_genesets/cDC1.GeneSet\n", + "27 cDC2 1807 ldsc_genesets/cDC2.GeneSet\n", + "28 dnT 1807 ldsc_genesets/dnT.GeneSet\n", + "29 gdT 1807 ldsc_genesets/gdT.GeneSet\n", + "30 pDC 1807 ldsc_genesets/pDC.GeneSet" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "summary = generate_sldsc_genesets(specificity, dd.C, out_dir=\"ldsc_genesets\", top_frac=0.10, overwrite=True)\n", + "summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need to generate a gene coordinate file that maps genes to their genomic positions:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._sldsc_utils:Fetching gene annotations from Ensembl GRCh38...\n", + "INFO:cellink.tl.external._sldsc_utils:Querying Ensembl BioMart (GRCh38)...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetching gene annotations from GRCh38...\n", + "INFO:cellink.tl.external._sldsc_utils:Fetched annotations for 86371 genes from GRCh38\n", + "INFO:cellink.tl.external._sldsc_utils:Removing version suffixes from gene identifiers\n", + "WARNING:cellink.tl.external._sldsc_utils:Removed 2 duplicate gene entries\n", + "INFO:cellink.tl.external._sldsc_utils:Writing 86369 gene coordinates to gene_coords.txt\n", + "INFO:cellink.tl.external._sldsc_utils:Successfully created gene coordinate file: gene_coords.txt\n" + ] + } + ], + "source": [ + "generate_gene_coord_file(\"gene_coords.txt\", gene_identifier_mode=\"ensembl\", genome_build=\"GRCh38\", overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Optional: Generate Magma cell specificity file\n", + "\n", + ".GeneSet files generated for LDSC can be used for MAGMA analysis by converting them into .gmt format\n", + "\n", + "Running the GeneSet → MAGMA Conversion Without a Mapping TSV\n", + "\n", + "This pipeline converts .GeneSet files (with Ensembl gene IDs) into a MAGMA-compatible .gmt file (which requires Entrez gene IDs).\n", + "Normally, this conversion needs a mapping file (ensembl → entrez).\n", + "However, there are situations where you may not have this TSV file.\n", + "\n", + "Below are your options and when to use them\n", + "\n", + "\n", + "Recommended: create and reuse a mapping TSV\n", + "\n", + "For real analyses (especially MAGMA), this is the best practice.\n", + "\n", + "Mapping file format\n", + "\n", + "A simple tab-separated file:\n", + "\n", + "ensembl_gene_id entrez_id\n", + "ENSG00000141510 7157\n", + "ENSG00000171862 1956" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting mygene\n", + " Using cached mygene-3.2.2-py2.py3-none-any.whl.metadata (10 kB)\n", + "Collecting biothings-client>=0.2.6 (from mygene)\n", + " Using cached biothings_client-0.4.1-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: httpx>=0.22.0 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from biothings-client>=0.2.6->mygene) (0.28.1)\n", + "Requirement already satisfied: anyio in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from httpx>=0.22.0->biothings-client>=0.2.6->mygene) (4.11.0)\n", + "Requirement already satisfied: certifi in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from httpx>=0.22.0->biothings-client>=0.2.6->mygene) (2025.11.12)\n", + "Requirement already satisfied: httpcore==1.* in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from httpx>=0.22.0->biothings-client>=0.2.6->mygene) (1.0.9)\n", + "Requirement already satisfied: idna in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from httpx>=0.22.0->biothings-client>=0.2.6->mygene) (3.11)\n", + "Requirement already satisfied: h11>=0.16 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from httpcore==1.*->httpx>=0.22.0->biothings-client>=0.2.6->mygene) (0.16.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from anyio->httpx>=0.22.0->biothings-client>=0.2.6->mygene) (1.3.1)\n", + "Requirement already satisfied: typing_extensions>=4.5 in /home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages (from anyio->httpx>=0.22.0->biothings-client>=0.2.6->mygene) (4.15.0)\n", + "Using cached mygene-3.2.2-py2.py3-none-any.whl (5.4 kB)\n", + "Using cached biothings_client-0.4.1-py3-none-any.whl (46 kB)\n", + "Installing collected packages: biothings-client, mygene\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2/2\u001b[0m [mygene]━━━━\u001b[0m \u001b[32m1/2\u001b[0m [mygene]\n", + "\u001b[1A\u001b[2KSuccessfully installed biothings-client-0.4.1 mygene-3.2.2\n" + ] + } + ], + "source": [ + "!pip install mygene" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/sgkit/__init__.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", + " from pkg_resources import DistributionNotFound, get_distribution # type: ignore[import]\n", + "/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000189144']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000168078']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000189144']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000168078']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000189144']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found dup hits:\t[('ENSG00000175711', 2)]\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found dup hits:\t[('ENSG00000175711', 2)]\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000168078']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found no hit:\t['ENSG00000168078']\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found dup hits:\t[('ENSG00000175711', 2)]\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:1 input query terms found dup hits:\t[('ENSG00000175711', 2)]\n", + "INFO:biothings.client:Pass \"returnall=True\" to return complete lists of duplicate or missing query terms.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "WARNING:biothings.client:Input sequence provided is already in string format. No operation performed\n", + "INFO:biothings.client:querying 1-1000 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:querying 1001-1807 ...\n", + "INFO:httpx:HTTP Request: POST https://mygene.info/v3/query/ \"HTTP/1.1 200 OK\"\n", + "INFO:biothings.client:Finished.\n", + "INFO:cellink.tl.external._ldsc2magma:Wrote 31 gene sets to /ictstr01/project_copy/genomics/ayshan/magma_genesets/genesets.gmt (skipped 0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/ictstr01/project_copy/genomics/ayshan/magma_genesets/genesets.gmt\n" + ] + } + ], + "source": [ + "out_gmt = genesets_dir_to_entrez_gmt(\n", + " geneset_dir=\"ldsc_genesets\",\n", + " ensembl_to_entrez_tsv=None,\n", + " allow_mygene_fallback=True,\n", + " include_control=False\n", + ")\n", + "\n", + "print(out_gmt) #TODO: Why is this generated? Where is it used downstream? How is this actually MAGMA specific?\n", + "# Would it make sense to demosntrate the use of the MAGMA runner here? See scDRS/ seismic notebook as an example." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 2: Configure LDSC Runner\n", + "Before running LDSC commands, we need to configure the runner. LDSC can be executed via a local installation or through container solutions like Docker or Singularity. Sample configuration files are provided in `./src/cellink/tl/external/config/` for local execution, Docker, and Singularity." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "runner = configure_ldsc_runner(config_path=\"cellink/src/cellink/tl/external/config/ldsc_singularity.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 3: Prepare Data for Analysis\n", + "To speed up computation in this tutorial, we filter the data to a specific chromosome and a random subset of SNPs. Note: In a real analysis, you would process all chromosomes without subsetting." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╔═ DonorData(n_donors=981, n_cells_per_donor=[333-3,511], donor_id='donor_id') ═══════════════════════════════╗\n",
+       "║ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ║\n",
+       "║ ┃ G (donors)                                          C (cells)                                          ┃ ║\n",
+       "║ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ ║\n",
+       "║ │ AnnData object with n_obs × n_vars = 981 × 10,299  │ AnnData object with n_obs × n_vars = 1,248,980 ×   │ ║\n",
+       "║ │                                                    │ 36,469                                             │ ║\n",
+       "║ │     obs: 'donor_id'                                │     obs: 'orig.ident', 'nCount_RNA',               │ ║\n",
+       "║ │                                                    │ 'nFeature_RNA', 'percent.mt', 'donor_id',          │ ║\n",
+       "║ │                                                    │ 'pool_number', 'predicted.celltype.l2',            │ ║\n",
+       "║ │                                                    │ 'predicted.celltype.l2.score', 'age',              │ ║\n",
+       "║ │                                                    │ 'organism_ontology_term_id',                       │ ║\n",
+       "║ │                                                    │ 'tissue_ontology_term_id',                         │ ║\n",
+       "║ │                                                    │ 'assay_ontology_term_id',                          │ ║\n",
+       "║ │                                                    │ 'disease_ontology_term_id',                        │ ║\n",
+       "║ │                                                    │ 'cell_type_ontology_term_id',                      │ ║\n",
+       "║ │                                                    │ 'self_reported_ethnicity_ontology_term_id',        │ ║\n",
+       "║ │                                                    │ 'development_stage_ontology_term_id',              │ ║\n",
+       "║ │                                                    │ 'sex_ontology_term_id', 'is_primary_data',         │ ║\n",
+       "║ │                                                    │ 'suspension_type', 'tissue_type', 'cell_type',     │ ║\n",
+       "║ │                                                    │ 'assay', 'disease', 'organism', 'sex', 'tissue',   │ ║\n",
+       "║ │                                                    │ 'self_reported_ethnicity', 'development_stage',    │ ║\n",
+       "║ │                                                    │ 'observation_joinid'                               │ ║\n",
+       "║ │     var: 'chrom', 'pos', 'a0', 'a1', 'AF', 'AN',   │     var: 'vst.mean', 'vst.variance',               │ ║\n",
+       "║ │ 'ER2', 'IMPUTED', 'maf', 'NS', 'R2', 'TYPED',      │ 'vst.variance.expected',                           │ ║\n",
+       "║ │ 'TYPED_ONLY', 'id', 'id_mask', 'length',           │ 'vst.variance.standardized', 'vst.variable',       │ ║\n",
+       "║ │ 'quality', 'pos_hg19', 'id_hg19'                   │ 'feature_is_filtered', 'feature_name',             │ ║\n",
+       "║ │                                                    │ 'feature_reference', 'feature_biotype',            │ ║\n",
+       "║ │                                                    │ 'feature_length', 'feature_type', 'start', 'end',  │ ║\n",
+       "║ │                                                    │ 'chrom', 'gene'                                    │ ║\n",
+       "║ │     uns: 'kinship'                                 │     uns: 'cell_type_ontology_term_id_colors',      │ ║\n",
+       "║ │                                                    │ 'citation', 'default_embedding',                   │ ║\n",
+       "║ │                                                    │ 'schema_reference', 'schema_version', 'title'      │ ║\n",
+       "║ │     obsm: 'gPCs'                                   │     obsm: 'X_azimuth_spca', 'X_azimuth_umap',      │ ║\n",
+       "║ │                                                    │ 'X_harmony', 'X_pca', 'X_umap'                     │ ║\n",
+       "║ │     varm: 'filter'                                 │     varm: 'PCs'                                    │ ║\n",
+       "║ └────────────────────────────────────────────────────┴────────────────────────────────────────────────────┘ ║\n",
+       "╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n",
+       "
\n" + ], + "text/plain": [ + "╔═\u001b[1;38;5;197m DonorData(n_donors=981, n_cells_per_donor=[333-3,511], donor_id='donor_id') \u001b[0m═══════════════════════════════╗\n", + "║ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ║\n", + "║ ┃\u001b[1;38;5;197m \u001b[0m\u001b[1;38;5;197mG (donors) \u001b[0m\u001b[1;38;5;197m \u001b[0m┃\u001b[1;38;5;197m \u001b[0m\u001b[1;38;5;197mC (cells) \u001b[0m\u001b[1;38;5;197m \u001b[0m┃ ║\n", + "║ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ ║\n", + "║ │ AnnData object with n_obs × n_vars = 981 × 10,299 │ AnnData object with n_obs × n_vars = 1,248,980 × │ ║\n", + "║ │ │ 36,469 │ ║\n", + "║ │ obs: 'donor_id' │ obs: 'orig.ident', 'nCount_RNA', │ ║\n", + "║ │ │ 'nFeature_RNA', 'percent.mt', \u001b[1;38;5;197m'donor_id', \u001b[0m │ ║\n", + "║ │ │ 'pool_number', 'predicted.celltype.l2', │ ║\n", + "║ │ │ 'predicted.celltype.l2.score', 'age', │ ║\n", + "║ │ │ 'organism_ontology_term_id', │ ║\n", + "║ │ │ 'tissue_ontology_term_id', │ ║\n", + "║ │ │ 'assay_ontology_term_id', │ ║\n", + "║ │ │ 'disease_ontology_term_id', │ ║\n", + "║ │ │ 'cell_type_ontology_term_id', │ ║\n", + "║ │ │ 'self_reported_ethnicity_ontology_term_id', │ ║\n", + "║ │ │ 'development_stage_ontology_term_id', │ ║\n", + "║ │ │ 'sex_ontology_term_id', 'is_primary_data', │ ║\n", + "║ │ │ 'suspension_type', 'tissue_type', 'cell_type', │ ║\n", + "║ │ │ 'assay', 'disease', 'organism', 'sex', 'tissue', │ ║\n", + "║ │ │ 'self_reported_ethnicity', 'development_stage', │ ║\n", + "║ │ │ 'observation_joinid' │ ║\n", + "║ │ var: 'chrom', 'pos', 'a0', 'a1', 'AF', 'AN', │ var: 'vst.mean', 'vst.variance', │ ║\n", + "║ │ 'ER2', 'IMPUTED', 'maf', 'NS', 'R2', 'TYPED', │ 'vst.variance.expected', │ ║\n", + "║ │ 'TYPED_ONLY', 'id', 'id_mask', 'length', │ 'vst.variance.standardized', 'vst.variable', │ ║\n", + "║ │ 'quality', 'pos_hg19', 'id_hg19' │ 'feature_is_filtered', 'feature_name', │ ║\n", + "║ │ │ 'feature_reference', 'feature_biotype', │ ║\n", + "║ │ │ 'feature_length', 'feature_type', 'start', 'end', │ ║\n", + "║ │ │ 'chrom', 'gene' │ ║\n", + "║ │ uns: 'kinship' │ uns: 'cell_type_ontology_term_id_colors', │ ║\n", + "║ │ │ 'citation', 'default_embedding', │ ║\n", + "║ │ │ 'schema_reference', 'schema_version', 'title' │ ║\n", + "║ │ obsm: 'gPCs' │ obsm: 'X_azimuth_spca', 'X_azimuth_umap', │ ║\n", + "║ │ │ 'X_harmony', 'X_pca', 'X_umap' │ ║\n", + "║ │ varm: 'filter' │ varm: 'PCs' │ ║\n", + "║ └────────────────────────────────────────────────────┴────────────────────────────────────────────────────┘ ║\n", + "╚═════════════════════════════════════════════════════════════════════════════════════════════════════════════╝\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.seed(42)\n", + "all_selected_idx = []\n", + "for chrom in range(1, 23):\n", + " chrom_idx = np.where(dd.G.var.chrom == str(chrom))[0]\n", + " n_snps = max(1, int(len(chrom_idx) * 0.001))\n", + " selected_idx = np.random.choice(chrom_idx, n_snps, replace=False)\n", + " all_selected_idx.extend(selected_idx)\n", + "all_selected_idx = np.sort(all_selected_idx)\n", + "\n", + "dd = dd[:, all_selected_idx, :, :].copy()\n", + "dd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 4: Create Cell-Type Annotations\n", + "Now we create binary annotation files that indicate which SNPs are near cell-type-specific genes. This is done using LDSC's `make_annot` functionality, wrapped by `cellink`. We process two cell types (CD8 Naive and CD4 Naive) across all chromosomes." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:55<00:00, 55.32s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_1.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_1.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:25<00:00, 25.84s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_1.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_1.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.39s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_2.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_2.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.57s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_2.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_2.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:47<00:00, 47.75s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_3.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_3.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:26<00:00, 26.14s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_3.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_3.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:26<00:00, 26.09s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_4.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_4.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:25<00:00, 25.97s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_4.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_4.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.15s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_5.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_5.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.27s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_5.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_5.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:37<00:00, 37.43s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_6.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_6.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:25<00:00, 25.59s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_6.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_6.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:37<00:00, 37.38s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_7.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_7.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:26<00:00, 27.00s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_7.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_7.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.51s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_8.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_8.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.54s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_8.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_8.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:36<00:00, 36.10s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_9.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_9.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:26<00:00, 26.34s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_9.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_9.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.33s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_10.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_10.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:13<00:00, 13.32s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD4_Naive_10.annot.gz --gene-set-file ./ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD4_Naive_10.annot.gz --gene-set-file /data/ldsc_genesets/CD4_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 100%|██████████| 1/1 [00:37<00:00, 37.41s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Creating annotation file: /ldsc/make_annot.py --bimfile ldsc_annot.bim --annot-file CD8_Naive_11.annot.gz --gene-set-file ./ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:Executing: singularity exec -B /ictstr01/project_copy/genomics/ayshan:/data /project/genomics/ayshan/containers/ldsc.sif /ldsc/make_annot.py --bimfile /data/ldsc_annot.bim --annot-file /data/CD8_Naive_11.annot.gz --gene-set-file /data/ldsc_genesets/CD8_Naive.GeneSet --gene-coord-file /data/gene_coords.txt --windowsize 100000\n", + "INFO:cellink.tl.external._ldsc:making gene set bed file\n", + "making annot file\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: ldsc_annot.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for annotation creation\n", + "Writing BED: 0%| | 0/1 [00:14 \u001b[39m\u001b[32m4\u001b[39m result = \u001b[43mmake_annot_from_donor_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mdd\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdd_chrom\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mannot_file\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43mf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mcell_type\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;250;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m_\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[33;43m_\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mchrom\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[33;43m.annot.gz\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43mgene_set_file\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[33;43mf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m./ldsc_genesets/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mcell_type\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;250;43m \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m_\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[33;43m.GeneSet\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[43m \u001b[49m\u001b[43mgene_coord_file\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mgene_coords.txt\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 9\u001b[39m \u001b[43m \u001b[49m\u001b[43mwindowsize\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m100000\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 10\u001b[39m \u001b[43m \u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/ictstr01/project_copy/genomics/ayshan/cellink/src/cellink/tl/external/_ldsc.py:1424\u001b[39m, in \u001b[36mmake_annot_from_donor_data\u001b[39m\u001b[34m(dd, annot_file, gene_set_file, gene_coord_file, windowsize, bed_file, nomerge, out_prefix, run, cleanup_files, plink_export_kwargs, runner, **kwargs)\u001b[39m\n\u001b[32m 1421\u001b[39m plink_export_kwargs = {}\n\u001b[32m 1423\u001b[39m logger.info(\u001b[33m\"\u001b[39m\u001b[33mExporting genotype data to PLINK format for annotation creation\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1424\u001b[39m \u001b[43mto_plink\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mG\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout_prefix\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mplink_export_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1425\u001b[39m bimfile = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mout_prefix\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.bim\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 1427\u001b[39m results = _run_ldsc_make_annot(\n\u001b[32m 1428\u001b[39m bimfile=bimfile,\n\u001b[32m 1429\u001b[39m annot_file=annot_file,\n\u001b[32m (...)\u001b[39m\u001b[32m 1437\u001b[39m **kwargs,\n\u001b[32m 1438\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m/ictstr01/project_copy/genomics/ayshan/cellink/src/cellink/io/_export.py:148\u001b[39m, in \u001b[36mto_plink\u001b[39m\u001b[34m(gdata, output_prefix, donor_id, donor_family_id, donor_paternal_id, donor_maternal_id, donor_sex, chrom, pos, a0, a1)\u001b[39m\n\u001b[32m 73\u001b[39m output_prefix += \u001b[33m\"\u001b[39m\u001b[33m.bed\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 75\u001b[39m xarr = xr.DataArray(\n\u001b[32m 76\u001b[39m gdata.X.astype(\u001b[33m\"\u001b[39m\u001b[33mfloat32\u001b[39m\u001b[33m\"\u001b[39m),\n\u001b[32m 77\u001b[39m dims=(\u001b[33m\"\u001b[39m\u001b[33msample\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mvariant\u001b[39m\u001b[33m\"\u001b[39m),\n\u001b[32m (...)\u001b[39m\u001b[32m 146\u001b[39m name=\u001b[33m\"\u001b[39m\u001b[33mgenotypes\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 147\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m148\u001b[39m \u001b[43mwrite_plink1_bin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_prefix\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/pandas_plink/_write.py:180\u001b[39m, in \u001b[36mwrite_plink1_bin\u001b[39m\u001b[34m(G, bed, bim, fam, major, verbose)\u001b[39m\n\u001b[32m 178\u001b[39m G = _fill_sample(G)\n\u001b[32m 179\u001b[39m G = _fill_variant(G)\n\u001b[32m--> \u001b[39m\u001b[32m180\u001b[39m \u001b[43mwrite_bed\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbed\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mG\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmajor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 182\u001b[39m _echo(\u001b[33m\"\u001b[39m\u001b[33mWriting FAM... \u001b[39m\u001b[33m\"\u001b[39m, end=\u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m, disable=\u001b[38;5;129;01mnot\u001b[39;00m verbose)\n\u001b[32m 183\u001b[39m _write_fam(fam, G)\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/pandas_plink/_bed_write.py:44\u001b[39m, in \u001b[36mwrite_bed\u001b[39m\u001b[34m(filepath, X, major, verbose)\u001b[39m\n\u001b[32m 42\u001b[39m row_start = \u001b[32m0\u001b[39m\n\u001b[32m 43\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m tqdm(G.chunks[\u001b[32m0\u001b[39m], \u001b[33m\"\u001b[39m\u001b[33mWriting BED\u001b[39m\u001b[33m\"\u001b[39m, disable=\u001b[38;5;129;01mnot\u001b[39;00m verbose):\n\u001b[32m---> \u001b[39m\u001b[32m44\u001b[39m data = \u001b[43mG\u001b[49m\u001b[43m[\u001b[49m\u001b[43mrow_start\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mrow_start\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcompute\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 45\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m data.dtype \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m [float32, float64]:\n\u001b[32m 46\u001b[39m msg = \u001b[33m\"\u001b[39m\u001b[33mUnsupported data type. \u001b[39m\u001b[33m\"\u001b[39m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/dask/base.py:373\u001b[39m, in \u001b[36mDaskMethodsMixin.compute\u001b[39m\u001b[34m(self, **kwargs)\u001b[39m\n\u001b[32m 349\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mcompute\u001b[39m(\u001b[38;5;28mself\u001b[39m, **kwargs):\n\u001b[32m 350\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Compute this dask collection\u001b[39;00m\n\u001b[32m 351\u001b[39m \n\u001b[32m 352\u001b[39m \u001b[33;03m This turns a lazy Dask collection into its in-memory equivalent.\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 371\u001b[39m \u001b[33;03m dask.compute\u001b[39;00m\n\u001b[32m 372\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m373\u001b[39m (result,) = \u001b[43mcompute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtraverse\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 374\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/site-packages/dask/base.py:681\u001b[39m, in \u001b[36mcompute\u001b[39m\u001b[34m(traverse, optimize_graph, scheduler, get, *args, **kwargs)\u001b[39m\n\u001b[32m 678\u001b[39m expr = expr.optimize()\n\u001b[32m 679\u001b[39m keys = \u001b[38;5;28mlist\u001b[39m(flatten(expr.__dask_keys__()))\n\u001b[32m--> \u001b[39m\u001b[32m681\u001b[39m results = \u001b[43mschedule\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexpr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 683\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m repack(results)\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/queue.py:171\u001b[39m, in \u001b[36mQueue.get\u001b[39m\u001b[34m(self, block, timeout)\u001b[39m\n\u001b[32m 169\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m._qsize():\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mnot_empty\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 172\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m timeout < \u001b[32m0\u001b[39m:\n\u001b[32m 173\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33m'\u001b[39m\u001b[33mtimeout\u001b[39m\u001b[33m'\u001b[39m\u001b[33m must be a non-negative number\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m/home/hpc/ayshan.aliyeva/miniconda3/envs/cellink-env/lib/python3.11/threading.py:327\u001b[39m, in \u001b[36mCondition.wait\u001b[39m\u001b[34m(self, timeout)\u001b[39m\n\u001b[32m 325\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;66;03m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[39;00m\n\u001b[32m 326\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m327\u001b[39m \u001b[43mwaiter\u001b[49m\u001b[43m.\u001b[49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 328\u001b[39m gotit = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 329\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[31mKeyboardInterrupt\u001b[39m: " + ] + } + ], + "source": [ + "for chrom in range(1, 23):\n", + " for cell_type in [\"CD8 Naive\", \"CD4 Naive\"]:\n", + " dd_chrom = dd.sel(G_var=dd.G.var.chrom == str(chrom), C_var=dd.C.var.chrom == str(chrom)).copy()\n", + " result = make_annot_from_donor_data(\n", + " dd=dd_chrom,\n", + " annot_file=f\"{cell_type.replace(' ', '_')}_{chrom}.annot.gz\",\n", + " gene_set_file = f\"./ldsc_genesets/{cell_type.replace(' ', '_')}.GeneSet\",\n", + " gene_coord_file=\"gene_coords.txt\",\n", + " windowsize=100000,\n", + " runner=runner,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/project/genomics/ayshan\n", + "ls: cannot access 'ldsc_annot.bim': No such file or directory\n", + "ls: cannot access 'gene_coords.txt': No such file or directory\n", + "ls: cannot access 'ldsc_genesets/CD8_Naive.GeneSet': No such file or directory\n" + ] + } + ], + "source": [ + "!cd /ictstr01/project_copy/genomics/ayshan\n", + "\n", + "!singularity exec \\\n", + " -B /ictstr01/project_copy/genomics/ayshan:/data \\\n", + " -B /home/aih/ayshan.aliyeva/cellink_data:/cellink_data \\\n", + " /project/genomics/ayshan/containers/ldsc.sif \\\n", + " bash -lc 'pwd; ls; ls -l ldsc_annot.bim gene_coords.txt ldsc_genesets/CD8_Naive.GeneSet'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 5: Compute Cell-Type-Specific LD Scores\n", + "With annotations created, we now compute LD scores that incorporate cell-type-specific information. These LD scores quantify how much genetic variation near cell-type-specific genes contributes to linkage disequilibrium patterns. We here use the function `compute_ld_scores_with_annotations_from_donor_data`. Instead, one could also perform this using 1000G plink data via `compute_ld_scores_with_annotations_from_bimfile` (Plink files may be downloaded via `cellink.resources.get_1000genomes_plink_files`). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.35s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.1 --annot CD8_Naive_1.annot.gz --out cts_ldscores_CD8_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.1 --annot CD8_Naive_1.annot.gz --out cts_ldscores_CD8_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.1 \\\n", + "--bfile cts_ldscores_CD8_Naive.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:51:21 2025\n", + "Read list of 796 SNPs from cts_ldscores_CD8_Naive.1.bim\n", + "Read 1 annotations for 796 SNPs from CD8_Naive_1.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.1.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.1.bed\n", + "After filtering, 796 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 796 SNPs to cts_ldscores_CD8_Naive.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1798 0.1780\n", + "std 0.1526 0.3931\n", + "min 0.0092 -0.0415\n", + "25% 0.0432 -0.0082\n", + "50% 0.1300 0.0062\n", + "75% 0.3086 0.0275\n", + "max 0.4995 1.8826\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0341\n", + "L2 0.0341 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 125\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 796.0000\n", + "mean 0.1570\n", + "std 0.3641\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:51:21 2025\n", + "Total time elapsed: 0.32s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.1.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.1.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.1.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.56s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.1 --annot CD4_Naive_1.annot.gz --out cts_ldscores_CD4_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.1 --annot CD4_Naive_1.annot.gz --out cts_ldscores_CD4_Naive.1 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.1 \\\n", + "--bfile cts_ldscores_CD4_Naive.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:51:44 2025\n", + "Read list of 796 SNPs from cts_ldscores_CD4_Naive.1.bim\n", + "Read 1 annotations for 796 SNPs from CD4_Naive_1.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.1.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.1.bed\n", + "After filtering, 796 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 796 SNPs to cts_ldscores_CD4_Naive.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1798 0.2007\n", + "std 0.1526 0.4154\n", + "min 0.0092 -0.0448\n", + "25% 0.0432 -0.0082\n", + "50% 0.1300 0.0066\n", + "75% 0.3086 0.0327\n", + "max 0.4995 1.8620\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0207\n", + "L2 0.0207 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 143\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 796.0000\n", + "mean 0.1796\n", + "std 0.3841\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:51:45 2025\n", + "Total time elapsed: 0.31s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.1.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.1.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.1.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:04<00:00, 4.99s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.2 --annot CD8_Naive_2.annot.gz --out cts_ldscores_CD8_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.2 --annot CD8_Naive_2.annot.gz --out cts_ldscores_CD8_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.2 \\\n", + "--bfile cts_ldscores_CD8_Naive.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:52:02 2025\n", + "Read list of 864 SNPs from cts_ldscores_CD8_Naive.2.bim\n", + "Read 1 annotations for 864 SNPs from CD8_Naive_2.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.2.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.2.bed\n", + "After filtering, 864 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 864 SNPs to cts_ldscores_CD8_Naive.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1623 0.1578\n", + "std 0.1408 0.4100\n", + "min 0.0087 -0.0418\n", + "25% 0.0381 -0.0092\n", + "50% 0.1131 0.0037\n", + "75% 0.2638 0.0212\n", + "max 0.5000 2.7574\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0279\n", + "L2 0.0279 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 115\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 864.0000\n", + "mean 0.1331\n", + "std 0.3399\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:52:02 2025\n", + "Total time elapsed: 0.33s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.2.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.2.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.2.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.05s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.2 --annot CD4_Naive_2.annot.gz --out cts_ldscores_CD4_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.2 --annot CD4_Naive_2.annot.gz --out cts_ldscores_CD4_Naive.2 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.2 \\\n", + "--bfile cts_ldscores_CD4_Naive.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:52:22 2025\n", + "Read list of 864 SNPs from cts_ldscores_CD4_Naive.2.bim\n", + "Read 1 annotations for 864 SNPs from CD4_Naive_2.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.2.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.2.bed\n", + "After filtering, 864 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 864 SNPs to cts_ldscores_CD4_Naive.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1623 0.1530\n", + "std 0.1408 0.4118\n", + "min 0.0087 -0.0371\n", + "25% 0.0381 -0.0081\n", + "50% 0.1131 0.0050\n", + "75% 0.2638 0.0223\n", + "max 0.5000 2.7688\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0305\n", + "L2 0.0305 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 111\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 864.0000\n", + "mean 0.1285\n", + "std 0.3348\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:52:22 2025\n", + "Total time elapsed: 0.41s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.2.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.2.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.2.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.06s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.3 --annot CD8_Naive_3.annot.gz --out cts_ldscores_CD8_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.3 --annot CD8_Naive_3.annot.gz --out cts_ldscores_CD8_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.3 \\\n", + "--bfile cts_ldscores_CD8_Naive.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:52:49 2025\n", + "Read list of 737 SNPs from cts_ldscores_CD8_Naive.3.bim\n", + "Read 1 annotations for 737 SNPs from CD8_Naive_3.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.3.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.3.bed\n", + "After filtering, 737 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 737 SNPs to cts_ldscores_CD8_Naive.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1702 0.1528\n", + "std 0.1499 0.3819\n", + "min 0.0082 -0.0427\n", + "25% 0.0392 -0.0069\n", + "50% 0.1142 0.0056\n", + "75% 0.2920 0.0225\n", + "max 0.4995 1.9927\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0303\n", + "L2 0.0303 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 100\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 737.0000\n", + "mean 0.1357\n", + "std 0.3427\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:52:50 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.3.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.3.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.3.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.73s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.3 --annot CD4_Naive_3.annot.gz --out cts_ldscores_CD4_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.3 --annot CD4_Naive_3.annot.gz --out cts_ldscores_CD4_Naive.3 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.3 \\\n", + "--bfile cts_ldscores_CD4_Naive.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:53:13 2025\n", + "Read list of 737 SNPs from cts_ldscores_CD4_Naive.3.bim\n", + "Read 1 annotations for 737 SNPs from CD4_Naive_3.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.3.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.3.bed\n", + "After filtering, 737 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 737 SNPs to cts_ldscores_CD4_Naive.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1702 0.1601\n", + "std 0.1499 0.3874\n", + "min 0.0082 -0.0366\n", + "25% 0.0392 -0.0069\n", + "50% 0.1142 0.0065\n", + "75% 0.2920 0.0256\n", + "max 0.4995 2.0018\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0091\n", + "L2 0.0091 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 103\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 737.0000\n", + "mean 0.1398\n", + "std 0.3470\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:53:13 2025\n", + "Total time elapsed: 0.31s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.3.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.3.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.3.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.08s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.4 --annot CD8_Naive_4.annot.gz --out cts_ldscores_CD8_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.4 --annot CD8_Naive_4.annot.gz --out cts_ldscores_CD8_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.4 \\\n", + "--bfile cts_ldscores_CD8_Naive.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:53:36 2025\n", + "Read list of 754 SNPs from cts_ldscores_CD8_Naive.4.bim\n", + "Read 1 annotations for 754 SNPs from CD8_Naive_4.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.4.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.4.bed\n", + "After filtering, 754 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 754 SNPs to cts_ldscores_CD8_Naive.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1720 0.1023\n", + "std 0.1460 0.3066\n", + "min 0.0087 -0.0272\n", + "25% 0.0394 -0.0059\n", + "50% 0.1295 0.0027\n", + "75% 0.2783 0.0144\n", + "max 0.5000 1.3741\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.009\n", + "L2 -0.009 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 69\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 754.0000\n", + "mean 0.0915\n", + "std 0.2885\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:53:36 2025\n", + "Total time elapsed: 0.29s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.4.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.4.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.4.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.33s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.4 --annot CD4_Naive_4.annot.gz --out cts_ldscores_CD4_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.4 --annot CD4_Naive_4.annot.gz --out cts_ldscores_CD4_Naive.4 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.4 \\\n", + "--bfile cts_ldscores_CD4_Naive.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:53:58 2025\n", + "Read list of 754 SNPs from cts_ldscores_CD4_Naive.4.bim\n", + "Read 1 annotations for 754 SNPs from CD4_Naive_4.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.4.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.4.bed\n", + "After filtering, 754 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 754 SNPs to cts_ldscores_CD4_Naive.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1720 0.0894\n", + "std 0.1460 0.2923\n", + "min 0.0087 -0.0256\n", + "25% 0.0394 -0.0052\n", + "50% 0.1295 0.0029\n", + "75% 0.2783 0.0130\n", + "max 0.5000 1.5382\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0013\n", + "L2 0.0013 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 59\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 754.0000\n", + "mean 0.0782\n", + "std 0.2687\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:53:58 2025\n", + "Total time elapsed: 0.28s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.4.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.4.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.4.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.60s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.5 --annot CD8_Naive_5.annot.gz --out cts_ldscores_CD8_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.5 --annot CD8_Naive_5.annot.gz --out cts_ldscores_CD8_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.5 \\\n", + "--bfile cts_ldscores_CD8_Naive.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:54:16 2025\n", + "Read list of 671 SNPs from cts_ldscores_CD8_Naive.5.bim\n", + "Read 1 annotations for 671 SNPs from CD8_Naive_5.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.5.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.5.bed\n", + "After filtering, 671 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 671 SNPs to cts_ldscores_CD8_Naive.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1760 0.1430\n", + "std 0.1514 0.3666\n", + "min 0.0087 -0.0319\n", + "25% 0.0395 -0.0067\n", + "50% 0.1239 0.0033\n", + "75% 0.3017 0.0209\n", + "max 0.4980 1.9526\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0064\n", + "L2 0.0064 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 86\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 671.0000\n", + "mean 0.1282\n", + "std 0.3345\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:54:17 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.5.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.5.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.5.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.02s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.5 --annot CD4_Naive_5.annot.gz --out cts_ldscores_CD4_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.5 --annot CD4_Naive_5.annot.gz --out cts_ldscores_CD4_Naive.5 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.5 \\\n", + "--bfile cts_ldscores_CD4_Naive.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:54:34 2025\n", + "Read list of 671 SNPs from cts_ldscores_CD4_Naive.5.bim\n", + "Read 1 annotations for 671 SNPs from CD4_Naive_5.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.5.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.5.bed\n", + "After filtering, 671 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 671 SNPs to cts_ldscores_CD4_Naive.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1760 0.1453\n", + "std 0.1514 0.3693\n", + "min 0.0087 -0.0328\n", + "25% 0.0395 -0.0076\n", + "50% 0.1239 0.0039\n", + "75% 0.3017 0.0208\n", + "max 0.4980 1.9556\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0023\n", + "L2 -0.0023 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 87\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 671.0000\n", + "mean 0.1297\n", + "std 0.3362\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:54:34 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.5.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.5.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.5.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.98s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.6 --annot CD8_Naive_6.annot.gz --out cts_ldscores_CD8_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.6 --annot CD8_Naive_6.annot.gz --out cts_ldscores_CD8_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.6 \\\n", + "--bfile cts_ldscores_CD8_Naive.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:54:58 2025\n", + "Read list of 701 SNPs from cts_ldscores_CD8_Naive.6.bim\n", + "Read 1 annotations for 701 SNPs from CD8_Naive_6.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.6.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.6.bed\n", + "After filtering, 701 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 701 SNPs to cts_ldscores_CD8_Naive.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1743 0.2136\n", + "std 0.1473 0.4680\n", + "min 0.0087 -0.0376\n", + "25% 0.0418 -0.0047\n", + "50% 0.1310 0.0083\n", + "75% 0.2880 0.0470\n", + "max 0.4995 2.9027\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0757\n", + "L2 0.0757 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 113\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 701.0000\n", + "mean 0.1612\n", + "std 0.3680\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:54:58 2025\n", + "Total time elapsed: 0.27s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.6.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.6.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.6.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.89s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.6 --annot CD4_Naive_6.annot.gz --out cts_ldscores_CD4_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.6 --annot CD4_Naive_6.annot.gz --out cts_ldscores_CD4_Naive.6 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.6 \\\n", + "--bfile cts_ldscores_CD4_Naive.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:55:21 2025\n", + "Read list of 701 SNPs from cts_ldscores_CD4_Naive.6.bim\n", + "Read 1 annotations for 701 SNPs from CD4_Naive_6.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.6.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.6.bed\n", + "After filtering, 701 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 701 SNPs to cts_ldscores_CD4_Naive.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1743 0.2152\n", + "std 0.1473 0.4682\n", + "min 0.0087 -0.0372\n", + "25% 0.0418 -0.0057\n", + "50% 0.1310 0.0086\n", + "75% 0.2880 0.0521\n", + "max 0.4995 2.9119\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0722\n", + "L2 0.0722 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 112\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 701.0000\n", + "mean 0.1598\n", + "std 0.3667\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:55:22 2025\n", + "Total time elapsed: 0.32s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.6.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.6.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.6.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.70s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.7 --annot CD8_Naive_7.annot.gz --out cts_ldscores_CD8_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.7 --annot CD8_Naive_7.annot.gz --out cts_ldscores_CD8_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.7 \\\n", + "--bfile cts_ldscores_CD8_Naive.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:55:44 2025\n", + "Read list of 611 SNPs from cts_ldscores_CD8_Naive.7.bim\n", + "Read 1 annotations for 611 SNPs from CD8_Naive_7.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.7.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.7.bed\n", + "After filtering, 611 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 611 SNPs to cts_ldscores_CD8_Naive.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1808 0.1731\n", + "std 0.1567 0.4343\n", + "min 0.0092 -0.0336\n", + "25% 0.0367 -0.0076\n", + "50% 0.1295 0.0034\n", + "75% 0.3084 0.0217\n", + "max 0.5000 2.9954\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0353\n", + "L2 0.0353 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 91\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 611.0000\n", + "mean 0.1489\n", + "std 0.3563\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:55:45 2025\n", + "Total time elapsed: 0.26s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.7.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.7.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.7.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.73s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.7 --annot CD4_Naive_7.annot.gz --out cts_ldscores_CD4_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.7 --annot CD4_Naive_7.annot.gz --out cts_ldscores_CD4_Naive.7 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.7 \\\n", + "--bfile cts_ldscores_CD4_Naive.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:56:08 2025\n", + "Read list of 611 SNPs from cts_ldscores_CD4_Naive.7.bim\n", + "Read 1 annotations for 611 SNPs from CD4_Naive_7.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.7.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.7.bed\n", + "After filtering, 611 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 611 SNPs to cts_ldscores_CD4_Naive.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1808 0.1483\n", + "std 0.1567 0.4024\n", + "min 0.0092 -0.0304\n", + "25% 0.0367 -0.0061\n", + "50% 0.1295 0.0039\n", + "75% 0.3084 0.0166\n", + "max 0.5000 2.9600\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0053\n", + "L2 -0.0053 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 78\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 611.0000\n", + "mean 0.1277\n", + "std 0.3340\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:56:09 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.7.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.7.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.7.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.23s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.8 --annot CD8_Naive_8.annot.gz --out cts_ldscores_CD8_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.8 --annot CD8_Naive_8.annot.gz --out cts_ldscores_CD8_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.8 \\\n", + "--bfile cts_ldscores_CD8_Naive.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:56:27 2025\n", + "Read list of 562 SNPs from cts_ldscores_CD8_Naive.8.bim\n", + "Read 1 annotations for 562 SNPs from CD8_Naive_8.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.8.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.8.bed\n", + "After filtering, 562 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 562 SNPs to cts_ldscores_CD8_Naive.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1774 0.1353\n", + "std 0.1475 0.3897\n", + "min 0.0087 -0.0349\n", + "25% 0.0449 -0.0058\n", + "50% 0.1376 0.0034\n", + "75% 0.3012 0.0151\n", + "max 0.4995 2.9989\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0589\n", + "L2 0.0589 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 65\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 562.0000\n", + "mean 0.1157\n", + "std 0.3201\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:56:27 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.8.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.8.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.8.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.14s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.8 --annot CD4_Naive_8.annot.gz --out cts_ldscores_CD4_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.8 --annot CD4_Naive_8.annot.gz --out cts_ldscores_CD4_Naive.8 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.8 \\\n", + "--bfile cts_ldscores_CD4_Naive.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:56:46 2025\n", + "Read list of 562 SNPs from cts_ldscores_CD4_Naive.8.bim\n", + "Read 1 annotations for 562 SNPs from CD4_Naive_8.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.8.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.8.bed\n", + "After filtering, 562 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 562 SNPs to cts_ldscores_CD4_Naive.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1774 0.1623\n", + "std 0.1475 0.4241\n", + "min 0.0087 -0.0339\n", + "25% 0.0449 -0.0055\n", + "50% 0.1376 0.0049\n", + "75% 0.3012 0.0210\n", + "max 0.4995 2.9978\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.081\n", + "L2 0.081 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 75\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 562.0000\n", + "mean 0.1335\n", + "std 0.3404\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:56:46 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.8.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.8.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.8.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.85s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.9 --annot CD8_Naive_9.annot.gz --out cts_ldscores_CD8_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.9 --annot CD8_Naive_9.annot.gz --out cts_ldscores_CD8_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.9 \\\n", + "--bfile cts_ldscores_CD8_Naive.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:57:09 2025\n", + "Read list of 440 SNPs from cts_ldscores_CD8_Naive.9.bim\n", + "Read 1 annotations for 440 SNPs from CD8_Naive_9.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.9.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.9.bed\n", + "After filtering, 440 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 440 SNPs to cts_ldscores_CD8_Naive.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1794 0.1553\n", + "std 0.1510 0.3640\n", + "min 0.0092 -0.0335\n", + "25% 0.0401 -0.0058\n", + "50% 0.1412 0.0041\n", + "75% 0.2987 0.0192\n", + "max 0.4985 1.2104\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0037\n", + "L2 0.0037 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 65\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 440.0000\n", + "mean 0.1477\n", + "std 0.3552\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:57:09 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.9.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.9.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.9.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.04s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.9 --annot CD4_Naive_9.annot.gz --out cts_ldscores_CD4_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.9 --annot CD4_Naive_9.annot.gz --out cts_ldscores_CD4_Naive.9 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.9 \\\n", + "--bfile cts_ldscores_CD4_Naive.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:57:33 2025\n", + "Read list of 440 SNPs from cts_ldscores_CD4_Naive.9.bim\n", + "Read 1 annotations for 440 SNPs from CD4_Naive_9.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.9.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.9.bed\n", + "After filtering, 440 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 440 SNPs to cts_ldscores_CD4_Naive.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1794 0.1710\n", + "std 0.1510 0.4031\n", + "min 0.0092 -0.0251\n", + "25% 0.0401 -0.0055\n", + "50% 0.1412 0.0049\n", + "75% 0.2987 0.0183\n", + "max 0.4985 2.0366\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0575\n", + "L2 0.0575 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 67\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 440.0000\n", + "mean 0.1523\n", + "std 0.3597\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:57:33 2025\n", + "Total time elapsed: 0.2s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.9.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.9.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.9.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.40s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.10 --annot CD8_Naive_10.annot.gz --out cts_ldscores_CD8_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.10 --annot CD8_Naive_10.annot.gz --out cts_ldscores_CD8_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.10 \\\n", + "--bfile cts_ldscores_CD8_Naive.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:57:51 2025\n", + "Read list of 523 SNPs from cts_ldscores_CD8_Naive.10.bim\n", + "Read 1 annotations for 523 SNPs from CD8_Naive_10.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.10.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.10.bed\n", + "After filtering, 523 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 523 SNPs to cts_ldscores_CD8_Naive.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1713 0.1529\n", + "std 0.1466 0.4107\n", + "min 0.0082 -0.0333\n", + "25% 0.0403 -0.0082\n", + "50% 0.1300 0.0027\n", + "75% 0.2752 0.0176\n", + "max 0.4959 2.8415\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.009\n", + "L2 -0.009 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 69\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 523.0000\n", + "mean 0.1319\n", + "std 0.3387\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:57:52 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.10.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.10.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.10.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.32s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.10 --annot CD4_Naive_10.annot.gz --out cts_ldscores_CD4_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.10 --annot CD4_Naive_10.annot.gz --out cts_ldscores_CD4_Naive.10 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.10 \\\n", + "--bfile cts_ldscores_CD4_Naive.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:58:09 2025\n", + "Read list of 523 SNPs from cts_ldscores_CD4_Naive.10.bim\n", + "Read 1 annotations for 523 SNPs from CD4_Naive_10.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.10.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.10.bed\n", + "After filtering, 523 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 523 SNPs to cts_ldscores_CD4_Naive.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1713 0.1588\n", + "std 0.1466 0.4224\n", + "min 0.0082 -0.0287\n", + "25% 0.0403 -0.0065\n", + "50% 0.1300 0.0032\n", + "75% 0.2752 0.0173\n", + "max 0.4959 2.8300\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0075\n", + "L2 -0.0075 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 70\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 523.0000\n", + "mean 0.1338\n", + "std 0.3408\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:58:09 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.10.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.10.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.10.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.13s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.11 --annot CD8_Naive_11.annot.gz --out cts_ldscores_CD8_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.11 --annot CD8_Naive_11.annot.gz --out cts_ldscores_CD8_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.11 \\\n", + "--bfile cts_ldscores_CD8_Naive.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:58:32 2025\n", + "Read list of 506 SNPs from cts_ldscores_CD8_Naive.11.bim\n", + "Read 1 annotations for 506 SNPs from CD8_Naive_11.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.11.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.11.bed\n", + "After filtering, 506 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 506 SNPs to cts_ldscores_CD8_Naive.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1762 0.2665\n", + "std 0.1520 0.4811\n", + "min 0.0102 -0.0352\n", + "25% 0.0413 -0.0063\n", + "50% 0.1208 0.0099\n", + "75% 0.2985 0.1766\n", + "max 0.4995 2.5324\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0443\n", + "L2 0.0443 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 108\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 506.0000\n", + "mean 0.2134\n", + "std 0.4101\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:58:32 2025\n", + "Total time elapsed: 0.23s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.11.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.11.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.11.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.10s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.11 --annot CD4_Naive_11.annot.gz --out cts_ldscores_CD4_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.11 --annot CD4_Naive_11.annot.gz --out cts_ldscores_CD4_Naive.11 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.11 \\\n", + "--bfile cts_ldscores_CD4_Naive.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:58:56 2025\n", + "Read list of 506 SNPs from cts_ldscores_CD4_Naive.11.bim\n", + "Read 1 annotations for 506 SNPs from CD4_Naive_11.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.11.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.11.bed\n", + "After filtering, 506 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 506 SNPs to cts_ldscores_CD4_Naive.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1762 0.2682\n", + "std 0.1520 0.4870\n", + "min 0.0102 -0.0380\n", + "25% 0.0413 -0.0056\n", + "50% 0.1208 0.0112\n", + "75% 0.2985 0.1107\n", + "max 0.4995 2.5043\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0298\n", + "L2 0.0298 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 108\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 506.0000\n", + "mean 0.2134\n", + "std 0.4101\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:58:56 2025\n", + "Total time elapsed: 0.28s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.11.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.11.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.11.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.67s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.12 --annot CD8_Naive_12.annot.gz --out cts_ldscores_CD8_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.12 --annot CD8_Naive_12.annot.gz --out cts_ldscores_CD8_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.12 \\\n", + "--bfile cts_ldscores_CD8_Naive.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:59:16 2025\n", + "Read list of 507 SNPs from cts_ldscores_CD8_Naive.12.bim\n", + "Read 1 annotations for 507 SNPs from CD8_Naive_12.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.12.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.12.bed\n", + "After filtering, 507 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 507 SNPs to cts_ldscores_CD8_Naive.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1721 0.2046\n", + "std 0.1498 0.4351\n", + "min 0.0092 -0.0357\n", + "25% 0.0372 -0.0054\n", + "50% 0.1254 0.0068\n", + "75% 0.2918 0.0314\n", + "max 0.5000 2.4147\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0307\n", + "L2 -0.0307 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 92\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 507.0000\n", + "mean 0.1815\n", + "std 0.3858\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:59:16 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.12.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.12.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.12.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.93s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.12 --annot CD4_Naive_12.annot.gz --out cts_ldscores_CD4_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.12 --annot CD4_Naive_12.annot.gz --out cts_ldscores_CD4_Naive.12 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.12 \\\n", + "--bfile cts_ldscores_CD4_Naive.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:59:35 2025\n", + "Read list of 507 SNPs from cts_ldscores_CD4_Naive.12.bim\n", + "Read 1 annotations for 507 SNPs from CD4_Naive_12.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.12.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.12.bed\n", + "After filtering, 507 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 507 SNPs to cts_ldscores_CD4_Naive.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1721 0.2027\n", + "std 0.1498 0.4416\n", + "min 0.0092 -0.0303\n", + "25% 0.0372 -0.0056\n", + "50% 0.1254 0.0065\n", + "75% 0.2918 0.0288\n", + "max 0.5000 2.4133\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0081\n", + "L2 -0.0081 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 90\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 507.0000\n", + "mean 0.1775\n", + "std 0.3825\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:59:35 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.12.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.12.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.12.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.49s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.13 --annot CD8_Naive_13.annot.gz --out cts_ldscores_CD8_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.13 --annot CD8_Naive_13.annot.gz --out cts_ldscores_CD8_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.13 \\\n", + "--bfile cts_ldscores_CD8_Naive.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 20:59:59 2025\n", + "Read list of 383 SNPs from cts_ldscores_CD8_Naive.13.bim\n", + "Read 1 annotations for 383 SNPs from CD8_Naive_13.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.13.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.13.bed\n", + "After filtering, 383 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 383 SNPs to cts_ldscores_CD8_Naive.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1783 0.1073\n", + "std 0.1509 0.3126\n", + "min 0.0097 -0.0222\n", + "25% 0.0413 -0.0052\n", + "50% 0.1356 0.0010\n", + "75% 0.2854 0.0093\n", + "max 0.4959 1.2337\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0339\n", + "L2 -0.0339 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 39\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 383.0000\n", + "mean 0.1018\n", + "std 0.3028\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 20:59:59 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.13.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.13.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.13.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.48s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.13 --annot CD4_Naive_13.annot.gz --out cts_ldscores_CD4_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.13 --annot CD4_Naive_13.annot.gz --out cts_ldscores_CD4_Naive.13 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.13 \\\n", + "--bfile cts_ldscores_CD4_Naive.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:00:23 2025\n", + "Read list of 383 SNPs from cts_ldscores_CD4_Naive.13.bim\n", + "Read 1 annotations for 383 SNPs from CD4_Naive_13.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.13.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.13.bed\n", + "After filtering, 383 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 383 SNPs to cts_ldscores_CD4_Naive.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1783 0.1057\n", + "std 0.1509 0.3105\n", + "min 0.0097 -0.0210\n", + "25% 0.0413 -0.0052\n", + "50% 0.1356 0.0023\n", + "75% 0.2854 0.0110\n", + "max 0.4959 1.3078\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0205\n", + "L2 0.0205 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 38\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 383.0000\n", + "mean 0.0992\n", + "std 0.2993\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:00:24 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.13.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.13.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.13.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.56s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.14 --annot CD8_Naive_14.annot.gz --out cts_ldscores_CD8_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.14 --annot CD8_Naive_14.annot.gz --out cts_ldscores_CD8_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.14 \\\n", + "--bfile cts_ldscores_CD8_Naive.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:00:43 2025\n", + "Read list of 339 SNPs from cts_ldscores_CD8_Naive.14.bim\n", + "Read 1 annotations for 339 SNPs from CD8_Naive_14.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.14.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.14.bed\n", + "After filtering, 339 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 339 SNPs to cts_ldscores_CD8_Naive.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1641 0.1102\n", + "std 0.1444 0.3275\n", + "min 0.0087 -0.0172\n", + "25% 0.0370 -0.0059\n", + "50% 0.1096 0.0007\n", + "75% 0.2808 0.0105\n", + "max 0.4929 1.9204\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0839\n", + "L2 -0.0839 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 34\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 339.0000\n", + "mean 0.1003\n", + "std 0.3008\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:00:43 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.14.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.14.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.14.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.73s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.14 --annot CD4_Naive_14.annot.gz --out cts_ldscores_CD4_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.14 --annot CD4_Naive_14.annot.gz --out cts_ldscores_CD4_Naive.14 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.14 \\\n", + "--bfile cts_ldscores_CD4_Naive.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:01:00 2025\n", + "Read list of 339 SNPs from cts_ldscores_CD4_Naive.14.bim\n", + "Read 1 annotations for 339 SNPs from CD4_Naive_14.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.14.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.14.bed\n", + "After filtering, 339 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 339 SNPs to cts_ldscores_CD4_Naive.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1641 0.0935\n", + "std 0.1444 0.3057\n", + "min 0.0087 -0.0180\n", + "25% 0.0370 -0.0045\n", + "50% 0.1096 0.0010\n", + "75% 0.2808 0.0092\n", + "max 0.4929 1.9183\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0672\n", + "L2 -0.0672 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 28\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 339.0000\n", + "mean 0.0826\n", + "std 0.2757\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:01:01 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.14.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.14.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.14.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.30s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.15 --annot CD8_Naive_15.annot.gz --out cts_ldscores_CD8_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.15 --annot CD8_Naive_15.annot.gz --out cts_ldscores_CD8_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.15 \\\n", + "--bfile cts_ldscores_CD8_Naive.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:01:24 2025\n", + "Read list of 290 SNPs from cts_ldscores_CD8_Naive.15.bim\n", + "Read 1 annotations for 290 SNPs from CD8_Naive_15.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.15.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.15.bed\n", + "After filtering, 290 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 290 SNPs to cts_ldscores_CD8_Naive.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1834 0.1941\n", + "std 0.1504 0.4318\n", + "min 0.0092 -0.0209\n", + "25% 0.0477 -0.0045\n", + "50% 0.1430 0.0036\n", + "75% 0.3072 0.0215\n", + "max 0.4964 1.9951\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0693\n", + "L2 -0.0693 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 49\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 290.0000\n", + "mean 0.1690\n", + "std 0.3754\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:01:24 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.15.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.15.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.15.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.90s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.15 --annot CD4_Naive_15.annot.gz --out cts_ldscores_CD4_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.15 --annot CD4_Naive_15.annot.gz --out cts_ldscores_CD4_Naive.15 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.15 \\\n", + "--bfile cts_ldscores_CD4_Naive.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:01:49 2025\n", + "Read list of 290 SNPs from cts_ldscores_CD4_Naive.15.bim\n", + "Read 1 annotations for 290 SNPs from CD4_Naive_15.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.15.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.15.bed\n", + "After filtering, 290 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 290 SNPs to cts_ldscores_CD4_Naive.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1834 0.2427\n", + "std 0.1504 0.4641\n", + "min 0.0092 -0.0211\n", + "25% 0.0477 -0.0041\n", + "50% 0.1430 0.0055\n", + "75% 0.3072 0.0437\n", + "max 0.4964 1.9847\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0453\n", + "L2 -0.0453 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 62\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 290.0000\n", + "mean 0.2138\n", + "std 0.4107\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:01:49 2025\n", + "Total time elapsed: 0.2s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.15.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.15.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.15.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.38s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.16 --annot CD8_Naive_16.annot.gz --out cts_ldscores_CD8_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.16 --annot CD8_Naive_16.annot.gz --out cts_ldscores_CD8_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.16 \\\n", + "--bfile cts_ldscores_CD8_Naive.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:02:07 2025\n", + "Read list of 312 SNPs from cts_ldscores_CD8_Naive.16.bim\n", + "Read 1 annotations for 312 SNPs from CD8_Naive_16.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.16.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.16.bed\n", + "After filtering, 312 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 312 SNPs to cts_ldscores_CD8_Naive.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1761 0.1561\n", + "std 0.1502 0.3667\n", + "min 0.0097 -0.0232\n", + "25% 0.0391 -0.0038\n", + "50% 0.1381 0.0028\n", + "75% 0.2792 0.0163\n", + "max 0.4990 1.4741\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0282\n", + "L2 -0.0282 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 45\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 312.0000\n", + "mean 0.1442\n", + "std 0.3519\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:02:08 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.16.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.16.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.16.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.83s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.16 --annot CD4_Naive_16.annot.gz --out cts_ldscores_CD4_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.16 --annot CD4_Naive_16.annot.gz --out cts_ldscores_CD4_Naive.16 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.16 \\\n", + "--bfile cts_ldscores_CD4_Naive.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:02:26 2025\n", + "Read list of 312 SNPs from cts_ldscores_CD4_Naive.16.bim\n", + "Read 1 annotations for 312 SNPs from CD4_Naive_16.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.16.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.16.bed\n", + "After filtering, 312 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 312 SNPs to cts_ldscores_CD4_Naive.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1761 0.1616\n", + "std 0.1502 0.3680\n", + "min 0.0097 -0.0214\n", + "25% 0.0391 -0.0037\n", + "50% 0.1381 0.0043\n", + "75% 0.2792 0.0144\n", + "max 0.4990 1.1301\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.1229\n", + "L2 -0.1229 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 47\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 312.0000\n", + "mean 0.1506\n", + "std 0.3583\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:02:26 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.16.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.16.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.16.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.84s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.17 --annot CD8_Naive_17.annot.gz --out cts_ldscores_CD8_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.17 --annot CD8_Naive_17.annot.gz --out cts_ldscores_CD8_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.17 \\\n", + "--bfile cts_ldscores_CD8_Naive.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:02:44 2025\n", + "Read list of 272 SNPs from cts_ldscores_CD8_Naive.17.bim\n", + "Read 1 annotations for 272 SNPs from CD8_Naive_17.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.17.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.17.bed\n", + "After filtering, 272 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 272 SNPs to cts_ldscores_CD8_Naive.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1944 0.2967\n", + "std 0.1557 0.4779\n", + "min 0.0082 -0.0303\n", + "25% 0.0401 -0.0016\n", + "50% 0.1656 0.0116\n", + "75% 0.3336 0.9886\n", + "max 0.4954 1.7618\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0708\n", + "L2 0.0708 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 72\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 272.0000\n", + "mean 0.2647\n", + "std 0.4420\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 1.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:02:44 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.17.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.17.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.17.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.19s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.17 --annot CD4_Naive_17.annot.gz --out cts_ldscores_CD4_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.17 --annot CD4_Naive_17.annot.gz --out cts_ldscores_CD4_Naive.17 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.17 \\\n", + "--bfile cts_ldscores_CD4_Naive.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:03 2025\n", + "Read list of 272 SNPs from cts_ldscores_CD4_Naive.17.bim\n", + "Read 1 annotations for 272 SNPs from CD4_Naive_17.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.17.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.17.bed\n", + "After filtering, 272 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 272 SNPs to cts_ldscores_CD4_Naive.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1944 0.2763\n", + "std 0.1557 0.5245\n", + "min 0.0082 -0.0315\n", + "25% 0.0401 -0.0046\n", + "50% 0.1656 0.0067\n", + "75% 0.3336 0.2849\n", + "max 0.4954 2.8720\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0414\n", + "L2 0.0414 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 60\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 272.0000\n", + "mean 0.2206\n", + "std 0.4154\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:03 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.17.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.17.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.17.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.43s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.18 --annot CD8_Naive_18.annot.gz --out cts_ldscores_CD8_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.18 --annot CD8_Naive_18.annot.gz --out cts_ldscores_CD8_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.18 \\\n", + "--bfile cts_ldscores_CD8_Naive.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:20 2025\n", + "Read list of 294 SNPs from cts_ldscores_CD8_Naive.18.bim\n", + "Read 1 annotations for 294 SNPs from CD8_Naive_18.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.18.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.18.bed\n", + "After filtering, 294 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 294 SNPs to cts_ldscores_CD8_Naive.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1912 0.0927\n", + "std 0.1518 0.3541\n", + "min 0.0097 -0.0158\n", + "25% 0.0515 -0.0052\n", + "50% 0.1590 -0.0011\n", + "75% 0.3068 0.0060\n", + "max 0.4939 2.2866\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0585\n", + "L2 0.0585 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 21\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 294.0000\n", + "mean 0.0714\n", + "std 0.2580\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:21 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.18.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.18.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.18.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.64s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.18 --annot CD4_Naive_18.annot.gz --out cts_ldscores_CD4_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.18 --annot CD4_Naive_18.annot.gz --out cts_ldscores_CD4_Naive.18 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.18 \\\n", + "--bfile cts_ldscores_CD4_Naive.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:38 2025\n", + "Read list of 294 SNPs from cts_ldscores_CD4_Naive.18.bim\n", + "Read 1 annotations for 294 SNPs from CD4_Naive_18.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.18.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.18.bed\n", + "After filtering, 294 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 294 SNPs to cts_ldscores_CD4_Naive.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1912 0.0869\n", + "std 0.1518 0.3529\n", + "min 0.0097 -0.0148\n", + "25% 0.0515 -0.0060\n", + "50% 0.1590 -0.0006\n", + "75% 0.3068 0.0051\n", + "max 0.4939 2.2790\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1123\n", + "L2 0.1123 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 19\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 294.0000\n", + "mean 0.0646\n", + "std 0.2463\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:38 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.18.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.18.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.18.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:07<00:00, 7.12s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.19 --annot CD8_Naive_19.annot.gz --out cts_ldscores_CD8_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.19 --annot CD8_Naive_19.annot.gz --out cts_ldscores_CD8_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.19 \\\n", + "--bfile cts_ldscores_CD8_Naive.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:03:58 2025\n", + "Read list of 239 SNPs from cts_ldscores_CD8_Naive.19.bim\n", + "Read 1 annotations for 239 SNPs from CD8_Naive_19.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.19.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.19.bed\n", + "After filtering, 239 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 239 SNPs to cts_ldscores_CD8_Naive.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1813 0.4960\n", + "std 0.1483 0.5393\n", + "min 0.0102 -0.0273\n", + "25% 0.0418 0.0026\n", + "50% 0.1448 0.0425\n", + "75% 0.3007 1.0072\n", + "max 0.4969 2.0049\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0458\n", + "L2 0.0458 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 108\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 239.0000\n", + "mean 0.4519\n", + "std 0.4987\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 1.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:03:58 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.19.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.19.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.19.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:09<00:00, 9.06s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.19 --annot CD4_Naive_19.annot.gz --out cts_ldscores_CD4_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.19 --annot CD4_Naive_19.annot.gz --out cts_ldscores_CD4_Naive.19 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.19 \\\n", + "--bfile cts_ldscores_CD4_Naive.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:04:20 2025\n", + "Read list of 239 SNPs from cts_ldscores_CD4_Naive.19.bim\n", + "Read 1 annotations for 239 SNPs from CD4_Naive_19.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.19.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.19.bed\n", + "After filtering, 239 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 239 SNPs to cts_ldscores_CD4_Naive.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1813 0.3652\n", + "std 0.1483 0.5128\n", + "min 0.0102 -0.0269\n", + "25% 0.0418 -0.0020\n", + "50% 0.1448 0.0140\n", + "75% 0.3007 0.9950\n", + "max 0.4969 2.0049\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.085\n", + "L2 0.085 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 79\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 239.0000\n", + "mean 0.3305\n", + "std 0.4714\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 1.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:04:20 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.19.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.19.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.19.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:07<00:00, 7.31s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.20 --annot CD8_Naive_20.annot.gz --out cts_ldscores_CD8_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.20 --annot CD8_Naive_20.annot.gz --out cts_ldscores_CD8_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.20 \\\n", + "--bfile cts_ldscores_CD8_Naive.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:04:56 2025\n", + "Read list of 227 SNPs from cts_ldscores_CD8_Naive.20.bim\n", + "Read 1 annotations for 227 SNPs from CD8_Naive_20.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.20.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.20.bed\n", + "After filtering, 227 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 227 SNPs to cts_ldscores_CD8_Naive.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1749 0.0991\n", + "std 0.1404 0.3103\n", + "min 0.0102 -0.0135\n", + "25% 0.0538 -0.0043\n", + "50% 0.1386 -0.0004\n", + "75% 0.2796 0.0066\n", + "max 0.4837 1.4853\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0314\n", + "L2 -0.0314 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 21\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 227.0000\n", + "mean 0.0925\n", + "std 0.2904\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:04:56 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.20.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.20.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.20.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:10<00:00, 10.35s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.20 --annot CD4_Naive_20.annot.gz --out cts_ldscores_CD4_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.20 --annot CD4_Naive_20.annot.gz --out cts_ldscores_CD4_Naive.20 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.20 \\\n", + "--bfile cts_ldscores_CD4_Naive.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:05:35 2025\n", + "Read list of 227 SNPs from cts_ldscores_CD4_Naive.20.bim\n", + "Read 1 annotations for 227 SNPs from CD4_Naive_20.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.20.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.20.bed\n", + "After filtering, 227 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 227 SNPs to cts_ldscores_CD4_Naive.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1749 0.1306\n", + "std 0.1404 0.3364\n", + "min 0.0102 -0.0149\n", + "25% 0.0538 -0.0047\n", + "50% 0.1386 0.0015\n", + "75% 0.2796 0.0112\n", + "max 0.4837 1.0987\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.1106\n", + "L2 -0.1106 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 28\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 227.0000\n", + "mean 0.1233\n", + "std 0.3296\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:05:35 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.20.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.20.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.20.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.64s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.21 --annot CD8_Naive_21.annot.gz --out cts_ldscores_CD8_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.21 --annot CD8_Naive_21.annot.gz --out cts_ldscores_CD8_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.21 \\\n", + "--bfile cts_ldscores_CD8_Naive.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:06:04 2025\n", + "Read list of 135 SNPs from cts_ldscores_CD8_Naive.21.bim\n", + "Read 1 annotations for 135 SNPs from CD8_Naive_21.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.21.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.21.bed\n", + "After filtering, 135 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 135 SNPs to cts_ldscores_CD8_Naive.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1925 0.1890\n", + "std 0.1475 0.3923\n", + "min 0.0087 -0.0117\n", + "25% 0.0581 -0.0030\n", + "50% 0.1590 0.0026\n", + "75% 0.3129 0.0169\n", + "max 0.4893 1.2037\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0929\n", + "L2 -0.0929 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 24\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 135.0000\n", + "mean 0.1778\n", + "std 0.3837\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:06:04 2025\n", + "Total time elapsed: 0.21s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.21.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.21.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.21.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.82s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.21 --annot CD4_Naive_21.annot.gz --out cts_ldscores_CD4_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.21 --annot CD4_Naive_21.annot.gz --out cts_ldscores_CD4_Naive.21 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.21 \\\n", + "--bfile cts_ldscores_CD4_Naive.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:06:26 2025\n", + "Read list of 135 SNPs from cts_ldscores_CD4_Naive.21.bim\n", + "Read 1 annotations for 135 SNPs from CD4_Naive_21.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.21.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.21.bed\n", + "After filtering, 135 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 135 SNPs to cts_ldscores_CD4_Naive.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1925 0.1647\n", + "std 0.1475 0.3729\n", + "min 0.0087 -0.0129\n", + "25% 0.0581 -0.0026\n", + "50% 0.1590 0.0018\n", + "75% 0.3129 0.0085\n", + "max 0.4893 1.2044\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0325\n", + "L2 -0.0325 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 21\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 135.0000\n", + "mean 0.1556\n", + "std 0.3638\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:06:26 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.21.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.21.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.21.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD8 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.24s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.22 --annot CD8_Naive_22.annot.gz --out cts_ldscores_CD8_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD8_Naive.22 --annot CD8_Naive_22.annot.gz --out cts_ldscores_CD8_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD8_Naive.22 \\\n", + "--bfile cts_ldscores_CD8_Naive.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD8_Naive_22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:06:48 2025\n", + "Read list of 136 SNPs from cts_ldscores_CD8_Naive.22.bim\n", + "Read 1 annotations for 136 SNPs from CD8_Naive_22.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD8_Naive.22.fam\n", + "Reading genotypes from cts_ldscores_CD8_Naive.22.bed\n", + "After filtering, 136 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 136 SNPs to cts_ldscores_CD8_Naive.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD8_Naive.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1617 0.2025\n", + "std 0.1358 0.4513\n", + "min 0.0102 -0.0143\n", + "25% 0.0405 -0.0031\n", + "50% 0.1241 0.0030\n", + "75% 0.2683 0.0165\n", + "max 0.4944 2.1050\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0641\n", + "L2 -0.0641 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 24\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 136.0000\n", + "mean 0.1765\n", + "std 0.3826\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:06:48 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.22.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.22.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD8_Naive.22.bed\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing cell type: CD4 Naive\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score computation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.40s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.22 --annot CD4_Naive_22.annot.gz --out cts_ldscores_CD4_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --l2 --bfile cts_ldscores_CD4_Naive.22 --annot CD4_Naive_22.annot.gz --out cts_ldscores_CD4_Naive.22 --ld-wind-cm 1.0 --thin-annot --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out cts_ldscores_CD4_Naive.22 \\\n", + "--bfile cts_ldscores_CD4_Naive.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot CD4_Naive_22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:07:10 2025\n", + "Read list of 136 SNPs from cts_ldscores_CD4_Naive.22.bim\n", + "Read 1 annotations for 136 SNPs from CD4_Naive_22.annot.gz\n", + "Read list of 981 individuals from cts_ldscores_CD4_Naive.22.fam\n", + "Reading genotypes from cts_ldscores_CD4_Naive.22.bed\n", + "After filtering, 136 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 136 SNPs to cts_ldscores_CD4_Naive.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in cts_ldscores_CD4_Naive.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1617 0.2171\n", + "std 0.1358 0.4606\n", + "min 0.0102 -0.0137\n", + "25% 0.0405 -0.0025\n", + "50% 0.1241 0.0033\n", + "75% 0.2683 0.0197\n", + "max 0.4944 2.1059\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.017\n", + "L2 -0.017 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 26\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 136.0000\n", + "mean 0.1912\n", + "std 0.3947\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Thu Nov 6 21:07:11 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.22.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.22.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: cts_ldscores_CD4_Naive.22.bed\n" + ] + } + ], + "source": [ + "for chrom in range(1, 23):\n", + " for cell_type in [\"CD8 Naive\", \"CD4 Naive\"]:\n", + " print(f\"Processing cell type: {cell_type}\")\n", + "\n", + " dd_chrom = dd.sel(G_var=dd.G.var.chrom == str(chrom), C_var=dd.C.var.chrom == str(chrom)).copy()\n", + " results = compute_ld_scores_with_annotations_from_donor_data(\n", + " dd=dd_chrom,\n", + " annot_file=f\"{cell_type.replace(' ', '_')}_{chrom}.annot.gz\",\n", + " out_prefix=f\"cts_ldscores_{cell_type.replace(' ', '_')}.{chrom}\",\n", + " run=True,\n", + " runner=runner,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 6: Prepare Reference LD Scores and Weights\n", + "For the final analysis, we need baseline LD scores and regression weights. These control for genomic confounders and ensure proper statistical inference. We download these from the 1000 Genomes reference panel." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:/Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/1000G_Phase3_baselineLD_v2.2_ldscores.tgz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n", + "INFO:root:/Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/1000G_Phase3_weights_hm3_no_MHC.tgz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n" + ] + } + ], + "source": [ + "ldscores_path, ldscores_prefix = get_1000genomes_ld_scores(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")\n", + "ldweights_path, ldweights_prefix = get_1000genomes_ld_weights(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a control file listing all cell-type-specific LD score prefixes:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"celltype_ldscores.txt\", \"w\") as f:\n", + " f.write(\"CD8_Naive\\tcts_ldscores_CD8_Naive.\\n\") # ,{os.path.join(ldscores_path, ldscores_prefix)}\n", + " f.write(\"CD4_Naive\\tcts_ldscores_CD4_Naive.\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need baseline LD scores. Here we compute them from our donor data (in a real analysis, you would use pre-computed baseline LD scores):" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.06s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.1 --l2 --out BaselineLD.1 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.1 --l2 --out BaselineLD.1 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.1 \\\n", + "--bfile BaselineLD.1 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:09:35 2025\n", + "Read list of 796 SNPs from BaselineLD.1.bim\n", + "Read list of 981 individuals from BaselineLD.1.fam\n", + "Reading genotypes from BaselineLD.1.bed\n", + "After filtering, 796 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 796 SNPs to BaselineLD.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1798 1.1546\n", + "std 0.1526 0.2973\n", + "min 0.0092 0.8847\n", + "25% 0.0432 1.0024\n", + "50% 0.1300 1.0518\n", + "75% 0.3086 1.1259\n", + "max 0.4995 2.8918\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.129\n", + "L2 0.129 1.000\n", + "Analysis finished at Thu Nov 6 21:09:35 2025\n", + "Total time elapsed: 0.31s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.1.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.1.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.1.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:08<00:00, 8.55s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.2 --l2 --out BaselineLD.2 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.2 --l2 --out BaselineLD.2 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.2 \\\n", + "--bfile BaselineLD.2 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:10:01 2025\n", + "Read list of 864 SNPs from BaselineLD.2.bim\n", + "Read list of 981 individuals from BaselineLD.2.fam\n", + "Reading genotypes from BaselineLD.2.bed\n", + "After filtering, 864 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 864 SNPs to BaselineLD.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1623 1.1604\n", + "std 0.1408 0.3164\n", + "min 0.0087 0.8871\n", + "25% 0.0381 1.0030\n", + "50% 0.1131 1.0526\n", + "75% 0.2638 1.1510\n", + "max 0.5000 2.9248\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1575\n", + "L2 0.1575 1.0000\n", + "Analysis finished at Thu Nov 6 21:10:02 2025\n", + "Total time elapsed: 0.34s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.2.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.2.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.2.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:15<00:00, 15.01s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.3 --l2 --out BaselineLD.3 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.3 --l2 --out BaselineLD.3 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.3 \\\n", + "--bfile BaselineLD.3 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:10:33 2025\n", + "Read list of 737 SNPs from BaselineLD.3.bim\n", + "Read list of 981 individuals from BaselineLD.3.fam\n", + "Reading genotypes from BaselineLD.3.bed\n", + "After filtering, 737 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 737 SNPs to BaselineLD.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1702 1.1388\n", + "std 0.1499 0.2501\n", + "min 0.0082 0.8972\n", + "25% 0.0392 1.0100\n", + "50% 0.1142 1.0548\n", + "75% 0.2920 1.1376\n", + "max 0.4995 2.6729\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.2076\n", + "L2 0.2076 1.0000\n", + "Analysis finished at Thu Nov 6 21:10:33 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.3.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.3.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.3.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.59s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.4 --l2 --out BaselineLD.4 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.4 --l2 --out BaselineLD.4 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.4 \\\n", + "--bfile BaselineLD.4 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:11:02 2025\n", + "Read list of 754 SNPs from BaselineLD.4.bim\n", + "Read list of 981 individuals from BaselineLD.4.fam\n", + "Reading genotypes from BaselineLD.4.bed\n", + "After filtering, 754 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 754 SNPs to BaselineLD.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1720 1.1587\n", + "std 0.1460 0.3023\n", + "min 0.0087 0.8834\n", + "25% 0.0394 1.0077\n", + "50% 0.1295 1.0548\n", + "75% 0.2783 1.1547\n", + "max 0.5000 3.5811\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0516\n", + "L2 0.0516 1.0000\n", + "Analysis finished at Thu Nov 6 21:11:02 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.4.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.4.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.4.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.21s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.5 --l2 --out BaselineLD.5 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.5 --l2 --out BaselineLD.5 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.5 \\\n", + "--bfile BaselineLD.5 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:11:21 2025\n", + "Read list of 671 SNPs from BaselineLD.5.bim\n", + "Read list of 981 individuals from BaselineLD.5.fam\n", + "Reading genotypes from BaselineLD.5.bed\n", + "After filtering, 671 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 671 SNPs to BaselineLD.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1760 1.1557\n", + "std 0.1514 0.3052\n", + "min 0.0087 0.8851\n", + "25% 0.0395 1.0035\n", + "50% 0.1239 1.0486\n", + "75% 0.3017 1.1545\n", + "max 0.4980 3.3168\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.268\n", + "L2 0.268 1.000\n", + "Analysis finished at Thu Nov 6 21:11:22 2025\n", + "Total time elapsed: 0.27s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.5.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.5.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.5.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.67s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.6 --l2 --out BaselineLD.6 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.6 --l2 --out BaselineLD.6 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.6 \\\n", + "--bfile BaselineLD.6 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:11:48 2025\n", + "Read list of 701 SNPs from BaselineLD.6.bim\n", + "Read list of 981 individuals from BaselineLD.6.fam\n", + "Reading genotypes from BaselineLD.6.bed\n", + "After filtering, 701 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 701 SNPs to BaselineLD.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1743 1.2584\n", + "std 0.1473 0.4762\n", + "min 0.0087 0.9079\n", + "25% 0.0418 1.0161\n", + "50% 0.1310 1.0778\n", + "75% 0.2880 1.2383\n", + "max 0.4995 4.5224\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1685\n", + "L2 0.1685 1.0000\n", + "Analysis finished at Thu Nov 6 21:11:48 2025\n", + "Total time elapsed: 0.3s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.6.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.6.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.6.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.33s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.7 --l2 --out BaselineLD.7 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.7 --l2 --out BaselineLD.7 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.7 \\\n", + "--bfile BaselineLD.7 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:12:14 2025\n", + "Read list of 611 SNPs from BaselineLD.7.bim\n", + "Read list of 981 individuals from BaselineLD.7.fam\n", + "Reading genotypes from BaselineLD.7.bed\n", + "After filtering, 611 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 611 SNPs to BaselineLD.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1808 1.1485\n", + "std 0.1567 0.3127\n", + "min 0.0092 0.9191\n", + "25% 0.0367 1.0038\n", + "50% 0.1295 1.0474\n", + "75% 0.3084 1.1255\n", + "max 0.5000 3.0536\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1759\n", + "L2 0.1759 1.0000\n", + "Analysis finished at Thu Nov 6 21:12:14 2025\n", + "Total time elapsed: 0.27s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.7.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.7.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.7.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.25s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.8 --l2 --out BaselineLD.8 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.8 --l2 --out BaselineLD.8 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.8 \\\n", + "--bfile BaselineLD.8 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:12:35 2025\n", + "Read list of 562 SNPs from BaselineLD.8.bim\n", + "Read list of 981 individuals from BaselineLD.8.fam\n", + "Reading genotypes from BaselineLD.8.bed\n", + "After filtering, 562 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 562 SNPs to BaselineLD.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1774 1.1633\n", + "std 0.1475 0.3349\n", + "min 0.0087 0.9183\n", + "25% 0.0449 1.0160\n", + "50% 0.1376 1.0559\n", + "75% 0.3012 1.1600\n", + "max 0.4995 3.2220\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.177\n", + "L2 0.177 1.000\n", + "Analysis finished at Thu Nov 6 21:12:35 2025\n", + "Total time elapsed: 0.24s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.8.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.8.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.8.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.52s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.9 --l2 --out BaselineLD.9 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.9 --l2 --out BaselineLD.9 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.9 \\\n", + "--bfile BaselineLD.9 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:13:01 2025\n", + "Read list of 440 SNPs from BaselineLD.9.bim\n", + "Read list of 981 individuals from BaselineLD.9.fam\n", + "Reading genotypes from BaselineLD.9.bed\n", + "After filtering, 440 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 440 SNPs to BaselineLD.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1794 1.1344\n", + "std 0.1510 0.2982\n", + "min 0.0092 0.9294\n", + "25% 0.0401 1.0108\n", + "50% 0.1412 1.0428\n", + "75% 0.2987 1.1075\n", + "max 0.4985 3.0546\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.2587\n", + "L2 0.2587 1.0000\n", + "Analysis finished at Thu Nov 6 21:13:01 2025\n", + "Total time elapsed: 0.23s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.9.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.9.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.9.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.87s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.10 --l2 --out BaselineLD.10 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.10 --l2 --out BaselineLD.10 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.10 \\\n", + "--bfile BaselineLD.10 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:13:21 2025\n", + "Read list of 523 SNPs from BaselineLD.10.bim\n", + "Read list of 981 individuals from BaselineLD.10.fam\n", + "Reading genotypes from BaselineLD.10.bed\n", + "After filtering, 523 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 523 SNPs to BaselineLD.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1713 1.1418\n", + "std 0.1466 0.2758\n", + "min 0.0082 0.9121\n", + "25% 0.0403 1.0123\n", + "50% 0.1300 1.0485\n", + "75% 0.2752 1.1195\n", + "max 0.4959 2.8504\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1955\n", + "L2 0.1955 1.0000\n", + "Analysis finished at Thu Nov 6 21:13:21 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.10.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.10.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.10.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:12<00:00, 12.12s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.11 --l2 --out BaselineLD.11 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.11 --l2 --out BaselineLD.11 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.11 \\\n", + "--bfile BaselineLD.11 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:13:46 2025\n", + "Read list of 506 SNPs from BaselineLD.11.bim\n", + "Read list of 981 individuals from BaselineLD.11.fam\n", + "Reading genotypes from BaselineLD.11.bed\n", + "After filtering, 506 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 506 SNPs to BaselineLD.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1762 1.2376\n", + "std 0.1520 0.5903\n", + "min 0.0102 0.9036\n", + "25% 0.0413 1.0065\n", + "50% 0.1208 1.0547\n", + "75% 0.2985 1.1684\n", + "max 0.4995 5.9738\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0226\n", + "L2 0.0226 1.0000\n", + "Analysis finished at Thu Nov 6 21:13:46 2025\n", + "Total time elapsed: 0.22s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.11.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.11.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.11.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.03s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.12 --l2 --out BaselineLD.12 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.12 --l2 --out BaselineLD.12 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.12 \\\n", + "--bfile BaselineLD.12 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:14:05 2025\n", + "Read list of 507 SNPs from BaselineLD.12.bim\n", + "Read list of 981 individuals from BaselineLD.12.fam\n", + "Reading genotypes from BaselineLD.12.bed\n", + "After filtering, 507 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 507 SNPs to BaselineLD.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1721 1.1261\n", + "std 0.1498 0.2455\n", + "min 0.0092 0.9320\n", + "25% 0.0372 1.0033\n", + "50% 0.1254 1.0379\n", + "75% 0.2918 1.1104\n", + "max 0.5000 2.4573\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1584\n", + "L2 0.1584 1.0000\n", + "Analysis finished at Thu Nov 6 21:14:05 2025\n", + "Total time elapsed: 0.19s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.12.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.12.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.12.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.40s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.13 --l2 --out BaselineLD.13 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.13 --l2 --out BaselineLD.13 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.13 \\\n", + "--bfile BaselineLD.13 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:14:29 2025\n", + "Read list of 383 SNPs from BaselineLD.13.bim\n", + "Read list of 981 individuals from BaselineLD.13.fam\n", + "Reading genotypes from BaselineLD.13.bed\n", + "After filtering, 383 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 383 SNPs to BaselineLD.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1783 1.1527\n", + "std 0.1509 0.2924\n", + "min 0.0097 0.9433\n", + "25% 0.0413 1.0108\n", + "50% 0.1356 1.0419\n", + "75% 0.2854 1.1182\n", + "max 0.4959 2.4980\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1471\n", + "L2 0.1471 1.0000\n", + "Analysis finished at Thu Nov 6 21:14:29 2025\n", + "Total time elapsed: 0.18s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.13.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.13.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.13.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.37s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.14 --l2 --out BaselineLD.14 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.14 --l2 --out BaselineLD.14 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.14 \\\n", + "--bfile BaselineLD.14 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:14:48 2025\n", + "Read list of 339 SNPs from BaselineLD.14.bim\n", + "Read list of 981 individuals from BaselineLD.14.fam\n", + "Reading genotypes from BaselineLD.14.bed\n", + "After filtering, 339 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 339 SNPs to BaselineLD.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1641 1.1955\n", + "std 0.1444 0.3391\n", + "min 0.0087 0.9398\n", + "25% 0.0370 1.0106\n", + "50% 0.1096 1.0488\n", + "75% 0.2808 1.1684\n", + "max 0.4929 2.3924\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1236\n", + "L2 0.1236 1.0000\n", + "Analysis finished at Thu Nov 6 21:14:48 2025\n", + "Total time elapsed: 0.17s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.14.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.14.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.14.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:11<00:00, 11.09s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.15 --l2 --out BaselineLD.15 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.15 --l2 --out BaselineLD.15 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.15 \\\n", + "--bfile BaselineLD.15 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:15:13 2025\n", + "Read list of 290 SNPs from BaselineLD.15.bim\n", + "Read list of 981 individuals from BaselineLD.15.fam\n", + "Reading genotypes from BaselineLD.15.bed\n", + "After filtering, 290 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 290 SNPs to BaselineLD.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1834 1.1574\n", + "std 0.1504 0.3490\n", + "min 0.0092 0.9385\n", + "25% 0.0477 1.0066\n", + "50% 0.1430 1.0358\n", + "75% 0.3072 1.1152\n", + "max 0.4964 2.9996\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1579\n", + "L2 0.1579 1.0000\n", + "Analysis finished at Thu Nov 6 21:15:13 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.15.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.15.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.15.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.77s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.16 --l2 --out BaselineLD.16 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.16 --l2 --out BaselineLD.16 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.16 \\\n", + "--bfile BaselineLD.16 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:15:31 2025\n", + "Read list of 312 SNPs from BaselineLD.16.bim\n", + "Read list of 981 individuals from BaselineLD.16.fam\n", + "Reading genotypes from BaselineLD.16.bed\n", + "After filtering, 312 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 312 SNPs to BaselineLD.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1761 1.1088\n", + "std 0.1502 0.2288\n", + "min 0.0097 0.9313\n", + "25% 0.0391 1.0001\n", + "50% 0.1381 1.0333\n", + "75% 0.2792 1.0959\n", + "max 0.4990 2.3852\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0326\n", + "L2 0.0326 1.0000\n", + "Analysis finished at Thu Nov 6 21:15:31 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.16.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.16.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.16.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.92s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.17 --l2 --out BaselineLD.17 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.17 --l2 --out BaselineLD.17 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.17 \\\n", + "--bfile BaselineLD.17 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:15:50 2025\n", + "Read list of 272 SNPs from BaselineLD.17.bim\n", + "Read list of 981 individuals from BaselineLD.17.fam\n", + "Reading genotypes from BaselineLD.17.bed\n", + "After filtering, 272 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 272 SNPs to BaselineLD.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1944 1.1639\n", + "std 0.1557 0.3838\n", + "min 0.0082 0.9350\n", + "25% 0.0401 1.0009\n", + "50% 0.1656 1.0279\n", + "75% 0.3336 1.0893\n", + "max 0.4954 3.4348\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.074\n", + "L2 0.074 1.000\n", + "Analysis finished at Thu Nov 6 21:15:50 2025\n", + "Total time elapsed: 0.17s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.17.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.17.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.17.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:05<00:00, 5.57s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.18 --l2 --out BaselineLD.18 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.18 --l2 --out BaselineLD.18 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.18 \\\n", + "--bfile BaselineLD.18 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:16:08 2025\n", + "Read list of 294 SNPs from BaselineLD.18.bim\n", + "Read list of 981 individuals from BaselineLD.18.fam\n", + "Reading genotypes from BaselineLD.18.bed\n", + "After filtering, 294 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 294 SNPs to BaselineLD.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1912 1.1399\n", + "std 0.1518 0.2752\n", + "min 0.0097 0.9468\n", + "25% 0.0515 1.0082\n", + "50% 0.1590 1.0393\n", + "75% 0.3068 1.1086\n", + "max 0.4939 2.6077\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1146\n", + "L2 0.1146 1.0000\n", + "Analysis finished at Thu Nov 6 21:16:08 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.18.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.18.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.18.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:07<00:00, 7.76s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.19 --l2 --out BaselineLD.19 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.19 --l2 --out BaselineLD.19 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.19 \\\n", + "--bfile BaselineLD.19 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:16:29 2025\n", + "Read list of 239 SNPs from BaselineLD.19.bim\n", + "Read list of 981 individuals from BaselineLD.19.fam\n", + "Reading genotypes from BaselineLD.19.bed\n", + "After filtering, 239 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 239 SNPs to BaselineLD.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1813 1.1145\n", + "std 0.1483 0.2572\n", + "min 0.0102 0.9521\n", + "25% 0.0418 1.0033\n", + "50% 0.1448 1.0256\n", + "75% 0.3007 1.0950\n", + "max 0.4969 2.5550\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1708\n", + "L2 0.1708 1.0000\n", + "Analysis finished at Thu Nov 6 21:16:29 2025\n", + "Total time elapsed: 0.16s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.19.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.19.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.19.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:06<00:00, 6.31s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.20 --l2 --out BaselineLD.20 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.20 --l2 --out BaselineLD.20 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.20 \\\n", + "--bfile BaselineLD.20 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:16:48 2025\n", + "Read list of 227 SNPs from BaselineLD.20.bim\n", + "Read list of 981 individuals from BaselineLD.20.fam\n", + "Reading genotypes from BaselineLD.20.bed\n", + "After filtering, 227 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 227 SNPs to BaselineLD.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1749 1.0926\n", + "std 0.1404 0.2111\n", + "min 0.0102 0.9478\n", + "25% 0.0538 0.9939\n", + "50% 0.1386 1.0248\n", + "75% 0.2796 1.0713\n", + "max 0.4837 2.2165\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.2494\n", + "L2 0.2494 1.0000\n", + "Analysis finished at Thu Nov 6 21:16:48 2025\n", + "Total time elapsed: 0.15s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.20.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.20.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.20.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.27s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.21 --l2 --out BaselineLD.21 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.21 --l2 --out BaselineLD.21 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.21 \\\n", + "--bfile BaselineLD.21 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:17:04 2025\n", + "Read list of 135 SNPs from BaselineLD.21.bim\n", + "Read list of 981 individuals from BaselineLD.21.fam\n", + "Reading genotypes from BaselineLD.21.bed\n", + "After filtering, 135 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 135 SNPs to BaselineLD.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1925 1.1085\n", + "std 0.1475 0.2201\n", + "min 0.0087 0.9721\n", + "25% 0.0581 1.0043\n", + "50% 0.1590 1.0215\n", + "75% 0.3129 1.0810\n", + "max 0.4893 2.1313\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.301\n", + "L2 0.301 1.000\n", + "Analysis finished at Thu Nov 6 21:17:04 2025\n", + "Total time elapsed: 0.16s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.21.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.21.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.21.bed\n", + "INFO:cellink.tl.external._ldsc:Exporting genotype data to PLINK format for LD score estimation\n", + "Writing BED: 100%|██████████| 1/1 [00:02<00:00, 2.15s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing FAM... done.\n", + "Writing BIM... done.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "INFO:cellink.tl.external._ldsc:Estimating LD scores: /ldsc/ldsc.py --bfile BaselineLD.22 --l2 --out BaselineLD.22 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --bfile BaselineLD.22 --l2 --out BaselineLD.22 --ld-wind-cm 1.0 --yes-really\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out BaselineLD.22 \\\n", + "--bfile BaselineLD.22 \\\n", + "--yes-really \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Thu Nov 6 21:17:19 2025\n", + "Read list of 136 SNPs from BaselineLD.22.bim\n", + "Read list of 981 individuals from BaselineLD.22.fam\n", + "Reading genotypes from BaselineLD.22.bed\n", + "After filtering, 136 SNPs remain\n", + "Estimating LD Score.\n", + "Writing LD Scores for 136 SNPs to BaselineLD.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in BaselineLD.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.1617 1.0711\n", + "std 0.1358 0.1791\n", + "min 0.0102 0.9639\n", + "25% 0.0405 1.0011\n", + "50% 0.1241 1.0171\n", + "75% 0.2683 1.0539\n", + "max 0.4944 2.1255\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0206\n", + "L2 -0.0206 1.0000\n", + "Analysis finished at Thu Nov 6 21:17:19 2025\n", + "Total time elapsed: 0.14s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.22.bim\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.22.fam\n", + "INFO:cellink.tl.external._ldsc:Cleaned up file: BaselineLD.22.bed\n" + ] + } + ], + "source": [ + "for chrom in range(1, 23):\n", + " dd_chrom = dd.sel(G_var=dd.G.var.chrom == str(chrom), C_var=dd.C.var.chrom == str(chrom)).copy()\n", + " result = estimate_ld_scores_from_donor_data(dd=dd_chrom, out_prefix=f\"BaselineLD.{chrom}\", run=True, runner=runner)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 7: Prepare GWAS Summary Statistics\n", + "For this demonstration, we generate fake GWAS summary statistics. In a real analysis, you would use actual GWAS summary statistics from published studies." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated 9239 SNPs (8239 real, 1000 fake) -> fake_munged.sumstats.gz\n" + ] + }, + { + "data": { + "text/plain": [ + "'fake_munged.sumstats.gz'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def generate_fake_sumstats(dd, output_file=\"fake_munged.sumstats.gz\", subset_frac=0.8, n_extra_snps=1000, seed=42):\n", + " np.random.seed(seed)\n", + "\n", + " n_real = int(len(dd.G.var) * subset_frac)\n", + " real_idx = np.random.choice(len(dd.G.var), n_real, replace=False)\n", + "\n", + " snp_ids = dd.G.var.index[real_idx].tolist()\n", + " allele_0 = dd.G.var[\"a0\"].iloc[real_idx].tolist()\n", + " allele_1 = dd.G.var[\"a1\"].iloc[real_idx].tolist()\n", + "\n", + " for i in range(n_extra_snps):\n", + " chrom = np.random.randint(1, 23)\n", + " pos = np.random.randint(1000000, 50000000)\n", + " ref = np.random.choice([\"A\", \"C\", \"G\", \"T\"])\n", + " alt = np.random.choice([a for a in [\"A\", \"C\", \"G\", \"T\"] if a != ref])\n", + " snp_ids.append(f\"{chrom}_{pos}_{ref}_{alt}\")\n", + " allele_0.append(ref)\n", + " allele_1.append(alt)\n", + "\n", + " z_scores = np.random.randn(len(snp_ids))\n", + " large_effect_idx = np.random.choice(len(snp_ids), int(len(snp_ids) * 0.01), replace=False)\n", + " z_scores[large_effect_idx] = np.random.randn(len(large_effect_idx)) * 3\n", + "\n", + " fake_sumstats = pd.DataFrame({\"SNP\": snp_ids, \"A1\": allele_1, \"A2\": allele_0, \"Z\": z_scores, \"N\": 336924.0})\n", + "\n", + " fake_sumstats.to_csv(output_file, sep=\"\\t\", index=False, compression=\"gzip\", float_format=\"%.3f\")\n", + " print(f\"Generated {len(fake_sumstats)} SNPs ({n_real} real, {n_extra_snps} fake) -> {output_file}\")\n", + " return output_file\n", + "\n", + "\n", + "generate_fake_sumstats(dd, subset_frac=0.8, n_extra_snps=1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 8: Run Cell-Type-Specific Heritability Analysis\n", + "Finally, we run the cell-type-specific heritability analysis, which tests whether each cell type shows enrichment for trait heritability." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Running cell-type-specific heritability analysis: /ldsc/ldsc.py --h2-cts fake_munged.sumstats.gz --ref-ld-chr BaselineLD. --w-ld-chr BaselineLD. --ref-ld-chr-cts celltype_ldscores.txt --out CHD_CD8_Naive_h2\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --h2-cts fake_munged.sumstats.gz --ref-ld-chr BaselineLD. --w-ld-chr BaselineLD. --ref-ld-chr-cts celltype_ldscores.txt --out CHD_CD8_Naive_h2\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2-cts fake_munged.sumstats.gz \\\n", + "--ref-ld-chr BaselineLD. \\\n", + "--out CHD_CD8_Naive_h2 \\\n", + "--ref-ld-chr-cts celltype_ldscores.txt \\\n", + "--w-ld-chr BaselineLD. \n", + "\n", + "Beginning analysis at Thu Nov 6 22:23:46 2025\n", + "Reading summary statistics from fake_munged.sumstats.gz ...\n", + "Read summary statistics for 9239 SNPs.\n", + "Reading reference panel LD Score from BaselineLD.[1-22] ...\n", + "Read reference panel LD Scores for 10299 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from BaselineLD.[1-22] ...\n", + "Read regression weight LD Scores for 10299 SNPs.\n", + "After merging with reference panel LD, 8239 SNPs remain.\n", + "After merging with regression SNP LD, 8239 SNPs remain.\n", + "WARNING: number of SNPs less than 200k; this is almost always bad.\n", + "Removed 0 SNPs with chi^2 > 336.924 (8239 SNPs remain)\n", + "Reading cts reference panel LD Score from cts_ldscores_CD8_Naive.[1-22] ...\n", + "Performing regression.\n", + "Reading cts reference panel LD Score from cts_ldscores_CD4_Naive.[1-22] ...\n", + "Performing regression.\n", + "Results printed to CHD_CD8_Naive_h2.cell_type_results.txt\n", + "Analysis finished at Thu Nov 6 22:23:47 2025\n", + "Total time elapsed: 0.25s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "result = estimate_celltype_specific_heritability(\n", + " sumstats_file=\"fake_munged.sumstats.gz\",\n", + " ref_ld_chr=\"BaselineLD.\",\n", + " w_ld_chr=\"BaselineLD.\",\n", + " ref_ld_chr_cts=\"celltype_ldscores.txt\",\n", + " out_prefix=\"CHD_CD8_Naive_h2\",\n", + " run=True,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Standard LDSC Analyses\n", + "Beyond cell-type-specific analysis, LDSC can also be used for standard heritability estimation and genetic correlation analysis. Here we demonstrate these capabilities.\n", + "#### SNP Heritability Estimation\n", + "First, we download real GWAS summary statistics from the GWAS Catalog and prepare them for LDSC analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "runner = configure_ldsc_runner(config_path=\"../../src/cellink/tl/external/config/ldsc_docker.yaml\")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Fetching https://www.ebi.ac.uk/gwas/rest/api/v2/studies/GCST004787\n", + "INFO:root:Found harmonised file: 28714975-GCST004787-EFO_0001645.h.tsv.gz\n", + "INFO:root:Downloading http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST004001-GCST005000/GCST004787/harmonised/28714975-GCST004787-EFO_0001645.h.tsv.gz to /Users/larnoldt/cellink_data/GCST004787_summary_stats.tsv.gz\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/Users/larnoldt/cellink_data/GCST004787_summary_stats.tsv.gz')" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gwas_summary_statistic_path_1 = get_gwas_catalog_study_summary_stats(\"GCST004787\", return_path=True)\n", + "gwas_summary_statistic_path_1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "LDSC requires \"munged\" (cleaned and standardized) summary statistics. The `munge_sumstats` function performs quality control, standardizes column names, and prepares the data:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Running munge_sumstats: /ldsc/munge_sumstats.py --sumstats /Users/larnoldt/cellink_data/GCST004787_summary_stats.tsv.gz --out GCST004787_summary_stats_munged --N 336924 --signed-sumstats logor,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/munge_sumstats.py --sumstats /cellink_data/GCST004787_summary_stats.tsv.gz --out GCST004787_summary_stats_munged --N 336924 --signed-sumstats logor,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./munge_sumstats.py \\\n", + "--signed-sumstats logor,0 \\\n", + "--out GCST004787_summary_stats_munged \\\n", + "--N 336924.0 \\\n", + "--a1 effect_allele \\\n", + "--a2 other_allele \\\n", + "--snp variant_id \\\n", + "--sumstats /cellink_data/GCST004787_summary_stats.tsv.gz \\\n", + "--p p_value \n", + "\n", + "Interpreting column names as follows:\n", + "effect_allele:\tAllele 1, interpreted as ref allele for signed sumstat.\n", + "logor:\tDirectional summary statistic as specified by --signed-sumstats.\n", + "other_allele:\tAllele 2, interpreted as non-ref allele for signed sumstat.\n", + "variant_id:\tVariant ID (e.g., rs number)\n", + "p_value:\tp-Value\n", + "\n", + "Reading sumstats from /cellink_data/GCST004787_summary_stats.tsv.gz into memory 5000000 SNPs at a time.\n", + ".. done\n", + "Read 9020474 SNPs from --sumstats file.\n", + "Removed 0 SNPs with missing values.\n", + "Removed 0 SNPs with INFO <= 0.9.\n", + "Removed 0 SNPs with MAF <= 0.01.\n", + "Removed 0 SNPs with out-of-bounds p-values.\n", + "Removed 1855539 variants that were not SNPs or were strand-ambiguous.\n", + "7164935 SNPs remain.\n", + "Removed 9 SNPs with duplicated rs numbers (7164926 SNPs remain).\n", + "Using N = 336924.0\n", + "Median value of SIGNED_SUMSTATS was 0.00966, which seems sensible.\n", + "Writing summary statistics for 7164926 SNPs (7164926 with nonmissing beta) to GCST004787_summary_stats_munged.sumstats.gz.\n", + "\n", + "Metadata:\n", + "Mean chi^2 = 1.078\n", + "Lambda GC = 0.999\n", + "Max chi^2 = 458.046\n", + "2243 Genome-wide significant SNPs (some may have been removed by filtering).\n", + "\n", + "Conversion finished at Thu Nov 6 10:22:30 2025\n", + "Total time elapsed: 1.0m:24.42s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "munged_file_1 = munge_sumstats(\n", + " sumstats_file=gwas_summary_statistic_path_1,\n", + " out_prefix=str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_munged\"),\n", + " info_min=0.9,\n", + " maf_min=0.01,\n", + " signed_sumstats=(\"logor\", 0),\n", + " run=True,\n", + " p_col=\"p_value\",\n", + " snp_col=\"variant_id\",\n", + " a1_col=\"effect_allele\",\n", + " a2_col=\"other_allele\",\n", + " n_samples=336924,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download reference LD scores and weights for heritability estimation:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:/Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/1000G_Phase3_baselineLD_v2.2_ldscores.tgz already exists\n", + "WARNING:root:No checksum provided, skipping verification\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Downloading https://zenodo.org/records/10515792/files/1000G_Phase3_weights_hm3_no_MHC.tgz?download=1 to /Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/1000G_Phase3_weights_hm3_no_MHC.tgz\n" + ] + } + ], + "source": [ + "ldscores_path, ldscores_prefix = get_1000genomes_ld_scores(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")\n", + "ldweights_path, ldweights_prefix = get_1000genomes_ld_weights(\n", + " config_path=\"../../src/cellink/resources/config/1000genomes.yaml\", population=\"EUR\", return_path=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now estimate SNP heritability:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Estimating heritability: /ldsc/ldsc.py --h2 GCST004787_summary_stats_munged.sumstats.gz --ref-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out GCST004787_summary_stats_h2\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --h2 GCST004787_summary_stats_munged.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out GCST004787_summary_stats_h2\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 GCST004787_summary_stats_munged.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. \\\n", + "--out GCST004787_summary_stats_h2 \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \n", + "\n", + "Beginning analysis at Thu Nov 6 10:37:03 2025\n", + "Reading summary statistics from GCST004787_summary_stats_munged.sumstats.gz ...\n", + "Read summary statistics for 7164926 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177210 SNPs remain.\n", + "After merging with regression SNP LD, 1174301 SNPs remain.\n", + "Removed 3 SNPs with chi^2 > 336.924 (1174298 SNPs remain)\n", + "Total Observed scale h2: 0.0482 (0.0031)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale h2: -1.2111e-02 9.6569e-04 -1.5190e-03 -1.4271e-04 -1.4653e-04 6.8730e-05 1.8687e-03 -9.9421e-03 1.8025e-02 1.1469e-02 -1.0165e-03 -9.5619e-03 1.6673e-04 1.0070e-03 2.8008e-03 2.5649e-04 3.5439e-04 -1.1089e-03 4.3438e-03 4.2590e-03 -3.0551e-03 -5.3830e-04 -7.5871e-04 -1.1994e-03 -7.7712e-04 -2.9209e-03 -1.9523e-03 1.0687e-02 2.2717e-03 1.2578e-02 5.5138e-03 -2.1547e-03 -1.8587e-03 -2.6358e-03 -8.1469e-04 -1.7456e-03 -2.3468e-03 -1.4169e-03 -1.2684e-03 2.7806e-03 -3.6505e-04 -2.0104e-03 -4.8563e-03 1.8942e-04 2.2270e-03 -3.0406e-03 -1.3599e-03 8.8194e-04 3.1442e-03 -1.1567e-04 -4.2058e-04 4.6045e-03 -7.1973e-03 -1.5086e-02 1.2136e-03 8.1949e-04 3.0214e-03 4.8475e-03 3.3215e-03 2.1972e-03 5.2089e-03 3.9394e-03 4.3883e-03 6.0610e-03 4.0806e-03 -8.3636e-09 -1.8387e-05 -5.2094e-03 -6.6823e-03 -3.7387e-04 -6.2807e-03 1.1181e-16 5.0034e-03 3.4716e-03 -7.7899e-04 -5.3968e-04\n", + " 8.2235e-04 3.9258e-05 -2.0397e-03 -7.0521e-03 5.7726e-03 1.7156e-02 2.6581e-03 -8.3309e-04 4.1682e-03 -3.2282e-03 1.1065e-03 -1.0149e-03 -3.9125e-03 -5.0393e-05 1.9047e-03 2.8446e-03 4.1525e-03 -2.0550e-04 2.5138e-03 2.6276e-04 4.5676e-04\n", + "Observed scale h2 SE: 3.6876e-02 2.8241e-03 3.4639e-03 4.1116e-03 8.9579e-03 4.0255e-03 4.9871e-03 7.6983e-03 1.0069e-02 1.4079e-02 2.4109e-02 1.2775e-02 1.1922e-03 1.9142e-03 3.3326e-03 3.4785e-03 8.1126e-03 7.6878e-03 5.5114e-03 2.1808e-03 6.6970e-03 3.7155e-03 9.1362e-03 1.0647e-02 5.1892e-03 5.8420e-03 7.1083e-03 4.7557e-03 5.6549e-03 5.1946e-03 4.8613e-03 2.5779e-03 1.9454e-03 1.4419e-03 2.2687e-03 3.2906e-03 1.8913e-03 2.0055e-02 6.1729e-03 2.3233e-03 1.0197e-03 6.0144e-03 8.4002e-03 1.1188e-02 1.0522e-02 2.0156e-03 2.3500e-03 1.8394e-03 3.3389e-03 7.7766e-04 1.4525e-03 2.5864e-03 4.0733e-03 1.1147e-02 1.8261e-03 9.4725e-04 1.2719e-03 1.2614e-03 1.0969e-03 8.3697e-04 1.0659e-03 1.2968e-03 1.0163e-03 1.5718e-03 2.0401e-03 1.8306e-08 9.6177e-06 2.8681e-03 7.6236e-03 3.0607e-03 8.4407e-03 1.0669e-16 1.8470e-03 2.3519e-03 2.1682e-03 3.0374e-03\n", + " 1.3747e-03 8.5399e-04 3.5229e-03 1.8007e-02 5.5563e-03 1.4238e-02 3.2248e-03 5.6425e-03 2.2698e-03 2.4325e-03 1.9759e-03 9.2022e-04 2.3846e-03 1.4766e-03 1.5829e-03 2.2442e-03 1.6180e-03 1.7903e-03 1.3109e-03 6.2655e-04 4.3958e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of h2g: -2.5111e-01 2.0022e-02 -3.1493e-02 -2.9588e-03 -3.0380e-03 1.4250e-03 3.8744e-02 -2.0613e-01 3.7372e-01 2.3779e-01 -2.1075e-02 -1.9825e-01 3.4570e-03 2.0879e-02 5.8070e-02 5.3178e-03 7.3477e-03 -2.2992e-02 9.0061e-02 8.8303e-02 -6.3341e-02 -1.1161e-02 -1.5730e-02 -2.4867e-02 -1.6112e-02 -6.0560e-02 -4.0478e-02 2.2157e-01 4.7100e-02 2.6079e-01 1.1432e-01 -4.4673e-02 -3.8536e-02 -5.4650e-02 -1.6891e-02 -3.6192e-02 -4.8657e-02 -2.9378e-02 -2.6298e-02 5.7650e-02 -7.5687e-03 -4.1682e-02 -1.0069e-01 3.9273e-03 4.6174e-02 -6.3042e-02 -2.8196e-02 1.8285e-02 6.5190e-02 -2.3983e-03 -8.7199e-03 9.5466e-02 -1.4922e-01 -3.1278e-01 2.5162e-02 1.6991e-02 6.2644e-02 1.0050e-01 6.8866e-02 4.5555e-02 1.0800e-01 8.1676e-02 9.0984e-02 1.2566e-01 8.4605e-02 -1.7341e-07 -3.8123e-04 -1.0801e-01 -1.3855e-01 -7.7515e-03 -1.3022e-01 2.3181e-15 1.0374e-01 7.1978e-02 -1.6151e-02 -1.1189e-02\n", + " 1.7050e-02 8.1395e-04 -4.2290e-02 -1.4621e-01 1.1968e-01 3.5570e-01 5.5111e-02 -1.7273e-02 8.6421e-02 -6.6931e-02 2.2941e-02 -2.1042e-02 -8.1119e-02 -1.0448e-03 3.9491e-02 5.8977e-02 8.6096e-02 -4.2608e-03 5.2118e-02 5.4478e-03 9.4701e-03\n", + "Enrichment: -4.5303e+00 2.5332e+01 -1.1510e+01 -2.1637e+00 -1.7939e-01 1.0795e+00 1.4878e+01 -2.7473e+01 1.6736e+01 3.9015e+01 -2.2940e+00 -1.0836e+01 1.4458e+01 2.5609e+01 2.4990e+01 2.0037e+00 1.5841e+00 -2.0790e+00 4.1764e+00 5.0711e+01 -4.2557e+00 -3.0223e+00 -1.6806e+00 -1.0595e+00 -1.5950e+00 -2.6411e+01 -5.4955e+00 3.2725e+01 2.2252e+01 3.7549e+01 1.9742e+01 -2.0802e+00 -7.4435e+01 -1.1912e+02 -1.2245e+01 -1.4092e+01 -8.2893e+01 -1.1522e+00 -1.8359e+00 6.2203e+00 -4.3029e+01 -5.7417e+00 -8.6471e+00 2.0526e-01 1.9991e+00 -6.3872e+01 -3.0771e+01 2.9539e+01 7.6995e+01 -7.9242e+00 -7.3557e+00 8.2385e+01 -3.9697e+01 -3.2332e+00 5.5701e+01 2.9947e+00 1.1318e+01 1.8204e+01 1.2338e+01 8.3594e+00 1.9554e+01 1.4775e+01 1.6377e+01 2.2425e+01 1.5502e+01 -9.2218e-01 -2.4595e+00 -1.2552e+00 -5.4235e-01 -7.8690e-01 -2.3372e+02 -1.7808e+00 1.8099e+02 7.8485e+01 -2.1790e+01 -6.3619e+00\n", + " 9.8590e+01 5.4079e+00 -2.5919e+01 -6.9871e+00 1.0071e+02 2.0194e+01 5.1599e+01 -1.9905e+00 1.1507e+02 -6.8492e+01 2.7199e+01 -1.0578e+02 -4.4045e+01 -1.9239e+00 1.7083e+02 1.9999e+02 3.0230e+02 -8.5157e+00 1.4186e+01 3.9367e+01 2.9607e+02\n", + "Coefficients: -2.0317e-09 1.1361e-08 -5.1620e-09 -9.7038e-10 -8.0450e-11 4.8414e-10 6.6726e-09 -1.2321e-08 7.5057e-09 1.7497e-08 -1.0288e-09 -4.8598e-09 6.4839e-09 1.1485e-08 1.1207e-08 8.9859e-10 7.1042e-10 -9.3238e-10 1.8730e-09 2.2742e-08 -1.9086e-09 -1.3554e-09 -7.5369e-10 -4.7514e-10 -7.1532e-10 -1.1845e-08 -2.4646e-09 1.4676e-08 9.9796e-09 1.6840e-08 8.8538e-09 -9.3290e-10 -3.3382e-08 -5.3422e-08 -5.4914e-09 -6.3198e-09 -3.7175e-08 -5.1671e-10 -8.2333e-10 2.7896e-09 -1.9297e-08 -2.5750e-09 -3.8780e-09 9.2052e-11 8.9653e-10 -2.8645e-08 -1.3800e-08 1.3247e-08 3.4530e-08 -3.5538e-09 -3.2988e-09 3.6947e-08 -1.7803e-08 -1.4500e-09 2.4980e-08 1.3431e-09 5.0759e-09 8.1639e-09 5.5335e-09 3.7490e-09 8.7694e-09 6.6261e-09 7.3447e-09 1.0057e-08 6.9523e-09 -4.1357e-10 -1.1030e-09 -5.6292e-10 -2.4323e-10 -3.5290e-10 -1.0482e-07 -7.9863e-10 8.1170e-08 3.5198e-08 -9.7721e-09 -2.8531e-09\n", + " 4.4215e-08 2.4253e-09 -1.1624e-08 -3.1335e-09 4.5168e-08 9.0565e-09 2.3141e-08 -8.9270e-10 5.1606e-08 -3.0717e-08 1.2198e-08 -4.7438e-08 -1.9753e-08 -8.6281e-10 7.6613e-08 8.9689e-08 1.3558e-07 -3.8191e-09 6.3620e-09 1.7655e-08 1.3278e-07\n", + "Coefficient SE: 6.1861e-09 3.3224e-08 1.1772e-08 2.7958e-08 4.9183e-09 2.8356e-08 1.7807e-08 9.5404e-09 4.1928e-09 2.1478e-08 2.4402e-08 6.4928e-09 4.6362e-08 2.1831e-08 1.3335e-08 1.2187e-08 1.6263e-08 6.4638e-09 2.3765e-09 1.1645e-08 4.1838e-09 9.3553e-09 9.0757e-09 4.2179e-09 4.7765e-09 2.3690e-08 8.9733e-09 6.5309e-09 2.4842e-08 6.9545e-09 7.8060e-09 1.1161e-09 3.4939e-08 2.9223e-08 1.5292e-08 1.1913e-08 2.9960e-08 7.3135e-09 4.0069e-09 2.3308e-09 5.3903e-08 7.7035e-09 6.7080e-09 5.4371e-09 4.2360e-09 1.8988e-08 2.3846e-08 2.7630e-08 3.6668e-08 2.3892e-08 1.1393e-08 2.0754e-08 1.0075e-08 1.0714e-09 3.7586e-08 1.5524e-09 2.1367e-09 2.1244e-09 1.8274e-09 1.4281e-09 1.7946e-09 2.1813e-09 1.7010e-09 2.6081e-09 3.4758e-09 9.0521e-10 5.7694e-10 3.0992e-10 2.7749e-10 2.8891e-09 1.4086e-07 7.6207e-10 2.9964e-08 2.3846e-08 2.7199e-08 1.6058e-08\n", + " 7.3915e-08 5.2758e-08 2.0077e-08 8.0013e-09 4.3475e-08 7.5160e-09 2.8074e-08 6.0463e-09 2.8102e-08 2.3146e-08 2.1782e-08 4.3013e-08 1.2039e-08 2.5282e-08 6.3669e-08 7.0758e-08 5.2827e-08 3.3272e-08 3.3177e-09 4.2098e-08 1.2779e-07\n", + "Lambda GC: 1.0466\n", + "Mean Chi^2: 1.139\n", + "Intercept: 0.8491 (0.0094)\n", + "Ratio < 0 (usually indicates GC correction).\n", + "Analysis finished at Thu Nov 6 10:37:31 2025\n", + "Total time elapsed: 27.21s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "h2_results = estimate_heritability(\n", + " sumstats_file=str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_munged.sumstats.gz\"),\n", + " ref_ld_chr=os.path.join(ldscores_path, ldscores_prefix),\n", + " w_ld_chr=os.path.join(ldweights_path, ldweights_prefix),\n", + " out_prefix=str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_h2\"),\n", + " run=True,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Genetic Correlation Analysis\n", + "Genetic correlation analysis quantifies the shared genetic basis between two traits. We download a second GWAS for coronary artery disease and compute the genetic correlation." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Fetching https://www.ebi.ac.uk/gwas/rest/api/v2/studies/GCST90043957\n", + "INFO:root:Found harmonised file: 34737426-GCST90043957-MONDO_0021661.h.tsv.gz\n", + "INFO:root:Downloading http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST90043001-GCST90044000/GCST90043957/harmonised/34737426-GCST90043957-MONDO_0021661.h.tsv.gz to /Users/larnoldt/cellink_data/GCST90043957_summary_stats.tsv.gz\n" + ] + }, + { + "data": { + "text/plain": [ + "PosixPath('/Users/larnoldt/cellink_data/GCST90043957_summary_stats.tsv.gz')" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gwas_summary_statistic_path_2 = get_gwas_catalog_study_summary_stats(\"GCST90043957\", return_path=True)\n", + "gwas_summary_statistic_path_2" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Running munge_sumstats: /ldsc/munge_sumstats.py --sumstats /Users/larnoldt/cellink_data/GCST90043957_summary_stats.tsv.gz --out GCST90043957_summary_stats_munged --N 456348 --signed-sumstats beta,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/munge_sumstats.py --sumstats /cellink_data/GCST90043957_summary_stats.tsv.gz --out GCST90043957_summary_stats_munged --N 456348 --signed-sumstats beta,0 --p p_value --a1 effect_allele --a2 other_allele --snp variant_id\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./munge_sumstats.py \\\n", + "--signed-sumstats beta,0 \\\n", + "--out GCST90043957_summary_stats_munged \\\n", + "--N 456348.0 \\\n", + "--a1 effect_allele \\\n", + "--a2 other_allele \\\n", + "--snp variant_id \\\n", + "--sumstats /cellink_data/GCST90043957_summary_stats.tsv.gz \\\n", + "--p p_value \n", + "\n", + "Interpreting column names as follows:\n", + "p_value:\tp-Value\n", + "other_allele:\tAllele 2, interpreted as non-ref allele for signed sumstat.\n", + "n:\tSample size\n", + "beta:\tDirectional summary statistic as specified by --signed-sumstats.\n", + "variant_id:\tVariant ID (e.g., rs number)\n", + "effect_allele:\tAllele 1, interpreted as ref allele for signed sumstat.\n", + "\n", + "Reading sumstats from /cellink_data/GCST90043957_summary_stats.tsv.gz into memory 5000000 SNPs at a time.\n", + "... done\n", + "Read 11831294 SNPs from --sumstats file.\n", + "Removed 0 SNPs with missing values.\n", + "Removed 0 SNPs with INFO <= 0.9.\n", + "Removed 0 SNPs with MAF <= 0.01.\n", + "Removed 0 SNPs with out-of-bounds p-values.\n", + "Removed 1811396 variants that were not SNPs or were strand-ambiguous.\n", + "10019898 SNPs remain.\n", + "Removed 0 SNPs with duplicated rs numbers (10019898 SNPs remain).\n", + "Removed 0 SNPs with N < 304180.0 (10019898 SNPs remain).\n", + "Median value of SIGNED_SUMSTATS was -0.0001342485, which seems sensible.\n", + "Writing summary statistics for 10019898 SNPs (10019898 with nonmissing beta) to GCST90043957_summary_stats_munged.sumstats.gz.\n", + "\n", + "Metadata:\n", + "Mean chi^2 = 1.156\n", + "Lambda GC = 1.112\n", + "Max chi^2 = 378.956\n", + "2388 Genome-wide significant SNPs (some may have been removed by filtering).\n", + "\n", + "Conversion finished at Thu Nov 6 22:30:15 2025\n", + "Total time elapsed: 1.0m:54.97s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "munged_file_2 = munge_sumstats(\n", + " sumstats_file=gwas_summary_statistic_path_2,\n", + " out_prefix=str(Path(Path(gwas_summary_statistic_path_2).stem).stem + \"_munged\"),\n", + " info_min=0.9,\n", + " maf_min=0.01,\n", + " signed_sumstats=(\"beta\", 0),\n", + " run=True,\n", + " p_col=\"p_value\",\n", + " snp_col=\"variant_id\",\n", + " a1_col=\"effect_allele\",\n", + " a2_col=\"other_allele\",\n", + " n_samples=456348,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, estimate the genetic correlation between the two traits:" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:cellink.tl.external._ldsc:Estimating genetic correlation: /ldsc/ldsc.py --rg GCST004787_summary_stats_munged.sumstats.gz,GCST90043957_summary_stats_munged.sumstats.gz --ref-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /Users/larnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out CHD_rg\n", + "INFO:cellink.tl.external._ldsc:Executing: docker run --rm -v /Users/larnoldt/sc-genetics/docs/tutorials:/data -v /Users/larnoldt/cellink_data:/cellink_data -w /data zijingliu/ldsc /ldsc/ldsc.py --rg GCST004787_summary_stats_munged.sumstats.gz,GCST90043957_summary_stats_munged.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out CHD_rg\n", + "INFO:cellink.tl.external._ldsc:*********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD. \\\n", + "--out CHD_rg \\\n", + "--rg GCST004787_summary_stats_munged.sumstats.gz,GCST90043957_summary_stats_munged.sumstats.gz \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \n", + "\n", + "Beginning analysis at Thu Nov 6 22:30:48 2025\n", + "Reading summary statistics from GCST004787_summary_stats_munged.sumstats.gz ...\n", + "Read summary statistics for 7164926 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177210 SNPs remain.\n", + "After merging with regression SNP LD, 1174301 SNPs remain.\n", + "Computing rg for phenotype 2/2\n", + "Reading summary statistics from GCST90043957_summary_stats_munged.sumstats.gz ...\n", + "Read summary statistics for 10019898 SNPs.\n", + "After merging with summary statistics, 1157270 SNPs remain.\n", + "1157270 SNPs with valid alleles.\n", + "\n", + "Heritability of phenotype 1\n", + "---------------------------\n", + "Total Observed scale h2: 0.0491 (0.0038)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale h2: -1.0015e-02 1.0538e-03 -2.4187e-03 -8.5473e-04 -2.3173e-03 -7.0296e-04 3.3579e-03 -1.5240e-02 1.6266e-02 1.1006e-02 2.9414e-03 -1.3212e-02 -9.9847e-05 1.4003e-03 3.1795e-03 -2.5359e-04 4.9086e-04 1.4679e-03 5.3681e-03 3.7131e-03 -4.7677e-03 7.1913e-04 2.2203e-03 -3.9735e-03 -4.0377e-04 -3.1958e-03 4.9703e-04 9.7101e-03 1.9432e-03 1.2466e-02 6.1202e-03 -2.6276e-03 -1.9326e-03 -3.1809e-03 -9.6151e-04 -6.3418e-04 -2.9363e-03 -4.9169e-03 -1.3964e-03 2.6407e-03 -4.3309e-04 -3.2521e-04 -8.7740e-03 -5.4192e-05 5.4624e-03 -3.0598e-03 -1.4553e-03 9.4411e-04 3.3972e-03 -1.8424e-04 -5.0624e-04 4.0449e-03 -5.9383e-03 -1.3807e-02 1.6324e-03 2.2984e-03 4.9572e-03 5.2428e-03 3.9244e-03 3.0600e-03 3.8396e-03 4.6460e-03 5.1612e-03 5.8238e-03 4.4137e-03 -1.7508e-08 -3.0546e-05 -6.6402e-03 -5.6393e-03 3.4633e-03 -7.1287e-03 1.6247e-17 4.7571e-03 3.7948e-03 -1.2112e-03 -6.3892e-05\n", + " 6.4765e-04 -2.5847e-05 -1.9629e-03 -1.8892e-03 4.7842e-03 1.2490e-02 2.5051e-03 -1.0641e-03 3.6368e-03 -3.1866e-03 1.7460e-03 -1.0900e-03 -4.7720e-03 -3.4389e-04 1.1120e-03 2.4022e-03 4.8712e-03 -2.9330e-04 2.5735e-03 5.4408e-04 3.0727e-04\n", + "Observed scale h2 SE: 3.3113e-02 3.0412e-03 3.7903e-03 4.0891e-03 9.6479e-03 4.4886e-03 6.0158e-03 8.4307e-03 1.0836e-02 1.5152e-02 2.6129e-02 1.4501e-02 1.3498e-03 2.1675e-03 3.4629e-03 3.8180e-03 8.7109e-03 7.7280e-03 5.8321e-03 2.4646e-03 7.1905e-03 4.0641e-03 9.6940e-03 1.2233e-02 6.3812e-03 6.6464e-03 7.4992e-03 4.8999e-03 6.0768e-03 5.5779e-03 5.4241e-03 2.4000e-03 2.0993e-03 1.5682e-03 2.4484e-03 3.9649e-03 2.2279e-03 1.9340e-02 6.7085e-03 2.3778e-03 1.1476e-03 6.5687e-03 8.8161e-03 1.0342e-02 1.1430e-02 2.2670e-03 2.5392e-03 2.0371e-03 3.6266e-03 8.5538e-04 1.5581e-03 2.5421e-03 4.3575e-03 1.2080e-02 1.9372e-03 1.9705e-03 2.0474e-03 1.3799e-03 1.1893e-03 9.8355e-04 1.3933e-03 1.5192e-03 1.0290e-03 1.5327e-03 2.1380e-03 2.1371e-08 1.2226e-05 3.0942e-03 7.3861e-03 4.8725e-03 8.1873e-03 1.3327e-16 1.9197e-03 2.5245e-03 2.1664e-03 3.2087e-03\n", + " 1.4117e-03 9.2452e-04 3.4364e-03 1.8878e-02 6.3746e-03 1.5571e-02 3.4471e-03 6.2454e-03 2.2727e-03 2.6651e-03 2.0385e-03 9.7837e-04 2.3306e-03 1.5901e-03 1.5159e-03 2.2262e-03 1.8028e-03 1.8342e-03 1.2877e-03 6.7567e-04 4.3004e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of h2g: -2.0388e-01 2.1451e-02 -4.9237e-02 -1.7400e-02 -4.7173e-02 -1.4310e-02 6.8356e-02 -3.1025e-01 3.3112e-01 2.2404e-01 5.9878e-02 -2.6896e-01 -2.0326e-03 2.8506e-02 6.4726e-02 -5.1624e-03 9.9925e-03 2.9882e-02 1.0928e-01 7.5588e-02 -9.7055e-02 1.4639e-02 4.5199e-02 -8.0888e-02 -8.2195e-03 -6.5057e-02 1.0118e-02 1.9767e-01 3.9557e-02 2.5376e-01 1.2459e-01 -5.3490e-02 -3.9343e-02 -6.4753e-02 -1.9574e-02 -1.2910e-02 -5.9775e-02 -1.0009e-01 -2.8427e-02 5.3757e-02 -8.8164e-03 -6.6202e-03 -1.7861e-01 -1.1032e-03 1.1120e-01 -6.2289e-02 -2.9625e-02 1.9219e-02 6.9157e-02 -3.7505e-03 -1.0306e-02 8.2343e-02 -1.2089e-01 -2.8107e-01 3.3232e-02 4.6788e-02 1.0091e-01 1.0673e-01 7.9890e-02 6.2292e-02 7.8162e-02 9.4579e-02 1.0507e-01 1.1856e-01 8.9849e-02 -3.5640e-07 -6.2183e-04 -1.3518e-01 -1.1480e-01 7.0503e-02 -1.4512e-01 3.3073e-16 9.6841e-02 7.7252e-02 -2.4656e-02 -1.3007e-03\n", + " 1.3184e-02 -5.2616e-04 -3.9960e-02 -3.8459e-02 9.7392e-02 2.5427e-01 5.0996e-02 -2.1661e-02 7.4035e-02 -6.4869e-02 3.5543e-02 -2.2189e-02 -9.7145e-02 -7.0006e-03 2.2637e-02 4.8901e-02 9.9163e-02 -5.9708e-03 5.2390e-02 1.1076e-02 6.2552e-03\n", + "Enrichment: -3.6782e+00 2.7141e+01 -1.7995e+01 -1.2724e+01 -2.7855e+00 -1.0841e+01 2.6250e+01 -4.1350e+01 1.4828e+01 3.6759e+01 6.5178e+00 -1.4701e+01 -8.5008e+00 3.4965e+01 2.7854e+01 -1.9451e+00 2.1543e+00 2.7021e+00 5.0676e+00 4.3409e+01 -6.5209e+00 3.9643e+00 4.8288e+00 -3.4462e+00 -8.1368e-01 -2.8372e+01 1.3737e+00 2.9194e+01 1.8689e+01 3.6537e+01 2.1516e+01 -2.4907e+00 -7.5992e+01 -1.4114e+02 -1.4189e+01 -5.0267e+00 -1.0183e+02 -3.9255e+00 -1.9844e+00 5.8003e+00 -5.0123e+01 -9.1193e-01 -1.5340e+01 -5.7657e-02 4.8142e+00 -6.3109e+01 -3.2330e+01 3.1047e+01 8.1680e+01 -1.2392e+01 -8.6933e+00 7.1060e+01 -3.2158e+01 -2.9054e+00 7.3564e+01 8.2467e+00 1.8232e+01 1.9331e+01 1.4314e+01 1.1431e+01 1.4152e+01 1.7109e+01 1.8912e+01 2.1156e+01 1.6463e+01 -1.8954e+00 -4.0117e+00 -1.5709e+00 -4.4940e-01 7.1571e+00 -2.6046e+02 -2.5407e-01 1.6896e+02 8.4235e+01 -3.3263e+01 -7.3951e-01\n", + " 7.6237e+01 -3.4958e+00 -2.4491e+01 -1.8378e+00 8.1955e+01 1.4435e+01 4.7746e+01 -2.4963e+00 9.8579e+01 -6.6382e+01 4.2139e+01 -1.1154e+02 -5.2746e+01 -1.2891e+01 9.7922e+01 1.6582e+02 3.4819e+02 -1.1933e+01 1.4260e+01 8.0036e+01 1.9556e+02\n", + "Coefficients: -1.6801e-09 1.2397e-08 -8.2195e-09 -5.8119e-09 -1.2723e-09 -4.9517e-09 1.1990e-08 -1.8887e-08 6.7730e-09 1.6790e-08 2.9771e-09 -6.7151e-09 -3.8828e-09 1.5970e-08 1.2723e-08 -8.8846e-10 9.8399e-10 1.2342e-09 2.3147e-09 1.9827e-08 -2.9785e-09 1.8107e-09 2.2056e-09 -1.5741e-09 -3.7166e-10 -1.2959e-08 6.2744e-10 1.3335e-08 8.5363e-09 1.6689e-08 9.8275e-09 -1.1377e-09 -3.4710e-08 -6.4468e-08 -6.4811e-09 -2.2960e-09 -4.6514e-08 -1.7930e-09 -9.0642e-10 2.6494e-09 -2.2894e-08 -4.1654e-10 -7.0065e-09 -2.6336e-11 2.1990e-09 -2.8826e-08 -1.4767e-08 1.4181e-08 3.7308e-08 -5.6602e-09 -3.9708e-09 3.2458e-08 -1.4689e-08 -1.3271e-09 3.3601e-08 3.7668e-09 8.3278e-09 8.8297e-09 6.5379e-09 5.2210e-09 6.4641e-09 7.8146e-09 8.6383e-09 9.6633e-09 7.5197e-09 -8.6573e-10 -1.8324e-09 -7.1753e-10 -2.0527e-10 3.2691e-09 -1.1897e-07 -1.1605e-10 7.7175e-08 3.8475e-08 -1.5193e-08 -3.3778e-10\n", + " 3.4822e-08 -1.5967e-09 -1.1187e-08 -8.3945e-10 3.7434e-08 6.5935e-09 2.1809e-08 -1.1402e-09 4.5027e-08 -3.0321e-08 1.9247e-08 -5.0948e-08 -2.4092e-08 -5.8879e-09 4.4727e-08 7.5740e-08 1.5904e-07 -5.4507e-09 6.5133e-09 3.6557e-08 8.9323e-08\n", + "Coefficient SE: 5.5549e-09 3.5779e-08 1.2881e-08 2.7805e-08 5.2972e-09 3.1618e-08 2.1481e-08 1.0448e-08 4.5121e-09 2.3115e-08 2.6446e-08 7.3700e-09 5.2489e-08 2.4720e-08 1.3856e-08 1.3376e-08 1.7462e-08 6.4977e-09 2.5148e-09 1.3160e-08 4.4921e-09 1.0233e-08 9.6299e-09 4.8462e-09 5.8738e-09 2.6952e-08 9.4668e-09 6.7290e-09 2.6695e-08 7.4676e-09 8.7098e-09 1.0391e-09 3.7704e-08 3.1785e-08 1.6503e-08 1.4355e-08 3.5291e-08 7.0525e-09 4.3545e-09 2.3856e-09 6.0666e-08 8.4134e-09 7.0401e-09 5.0257e-09 4.6013e-09 2.1357e-08 2.5766e-08 3.0599e-08 3.9828e-08 2.6280e-08 1.2221e-08 2.0399e-08 1.0778e-08 1.1611e-09 3.9874e-08 3.2294e-09 3.4395e-09 2.3239e-09 1.9813e-09 1.6782e-09 2.3457e-09 2.5554e-09 1.7222e-09 2.5432e-09 3.6425e-09 1.0568e-09 7.3341e-10 3.3436e-10 2.6885e-10 4.5993e-09 1.3663e-07 9.5193e-10 3.1143e-08 2.5595e-08 2.7176e-08 1.6964e-08\n", + " 7.5904e-08 5.7115e-08 1.9584e-08 8.3880e-09 4.9878e-08 8.2198e-09 3.0010e-08 6.6923e-09 2.8138e-08 2.5359e-08 2.2473e-08 4.5731e-08 1.1766e-08 2.7226e-08 6.0971e-08 7.0190e-08 5.8859e-08 3.4086e-08 3.2591e-09 4.5399e-08 1.2501e-07\n", + "Lambda GC: 1.0466\n", + "Mean Chi^2: 1.142\n", + "Intercept: 0.8533 (0.0121)\n", + "Ratio < 0 (usually indicates GC correction).\n", + "\n", + "Heritability of phenotype 2/2\n", + "-----------------------------\n", + "Total Observed scale h2: 0.0297 (0.0032)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale h2: 8.8839e-04 -9.3805e-04 2.5936e-04 -1.0410e-03 -8.5140e-03 -2.9552e-03 2.5074e-03 -4.2855e-03 1.9348e-02 -1.6913e-02 2.8922e-02 -1.6714e-02 8.3074e-04 -1.5534e-04 2.2472e-03 3.2617e-03 -7.8316e-03 9.2509e-03 4.9165e-03 6.0522e-04 -3.5348e-03 3.7162e-03 -4.8263e-03 -2.1888e-03 -1.0812e-03 -2.6360e-03 1.7446e-03 7.5089e-03 3.0158e-03 4.8341e-03 8.7325e-03 2.0837e-03 -2.4400e-03 -2.2472e-03 1.3217e-03 2.7870e-03 -3.2071e-03 -6.9366e-03 8.3931e-04 -1.2416e-03 -7.6842e-05 -2.0082e-03 -1.1581e-02 -4.2742e-03 2.5723e-03 -3.2393e-03 -1.6740e-03 9.2816e-04 3.2906e-03 -1.5070e-03 -2.2401e-05 9.4201e-04 -2.9080e-03 -2.6246e-03 2.6168e-03 1.1527e-03 2.0262e-03 2.4177e-03 2.8496e-03 1.4368e-03 1.6993e-03 3.2907e-03 2.8629e-03 4.0534e-03 3.6632e-03 -1.2251e-08 -2.1761e-05 -5.9192e-03 -5.1659e-03 -4.6335e-04 -9.8274e-03 -6.0863e-17 1.0092e-03 1.8435e-03 6.1848e-06 1.0243e-03\n", + " 1.4544e-03 7.6913e-04 -1.5412e-03 1.3678e-02 1.3692e-03 4.1833e-04 2.0872e-04 -1.1583e-03 3.5822e-03 -2.8837e-03 1.5556e-03 -1.5021e-03 -1.1339e-03 -8.1364e-05 1.7519e-03 1.8571e-03 2.7442e-03 -3.5932e-04 3.8293e-04 -8.6324e-05 3.1916e-04\n", + "Observed scale h2 SE: 2.9650e-02 2.0989e-03 3.5977e-03 3.5999e-03 8.8784e-03 4.5947e-03 5.6360e-03 6.5695e-03 1.0205e-02 1.4792e-02 2.5825e-02 1.3656e-02 9.0434e-04 1.7933e-03 2.7224e-03 2.9873e-03 6.8048e-03 6.1199e-03 4.9819e-03 2.0055e-03 6.0473e-03 3.2420e-03 7.0628e-03 8.9577e-03 5.3871e-03 5.7062e-03 5.2790e-03 3.8313e-03 5.5824e-03 4.8573e-03 3.9929e-03 2.1181e-03 1.9731e-03 1.2732e-03 2.1772e-03 3.9637e-03 2.0869e-03 1.8437e-02 5.0498e-03 1.8207e-03 1.0379e-03 4.6858e-03 6.8138e-03 9.6916e-03 9.2174e-03 1.7571e-03 2.0083e-03 1.3270e-03 3.8174e-03 6.9828e-04 1.2639e-03 1.8807e-03 4.0674e-03 9.6844e-03 1.5332e-03 1.6927e-03 1.2729e-03 9.6167e-04 8.5544e-04 8.5599e-04 9.6612e-04 1.1168e-03 1.0875e-03 1.3383e-03 2.1800e-03 1.5217e-08 1.1085e-05 2.7410e-03 5.4126e-03 3.3349e-03 5.5275e-03 8.8838e-17 1.4751e-03 2.0632e-03 1.7612e-03 2.7837e-03\n", + " 1.0161e-03 8.5864e-04 2.6315e-03 1.4837e-02 6.1369e-03 1.2928e-02 2.8169e-03 5.1240e-03 2.1247e-03 2.2462e-03 1.7575e-03 8.4163e-04 1.8861e-03 1.3695e-03 1.1031e-03 1.5723e-03 1.5232e-03 1.6419e-03 9.7241e-04 4.6907e-04 3.5868e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of h2g: 2.9961e-02 -3.1636e-02 8.7468e-03 -3.5110e-02 -2.8714e-01 -9.9667e-02 8.4564e-02 -1.4453e-01 6.5253e-01 -5.7040e-01 9.7542e-01 -5.6368e-01 2.8017e-02 -5.2389e-03 7.5787e-02 1.1000e-01 -2.6412e-01 3.1199e-01 1.6581e-01 2.0411e-02 -1.1921e-01 1.2533e-01 -1.6277e-01 -7.3818e-02 -3.6465e-02 -8.8899e-02 5.8836e-02 2.5324e-01 1.0171e-01 1.6303e-01 2.9451e-01 7.0273e-02 -8.2290e-02 -7.5789e-02 4.4576e-02 9.3992e-02 -1.0816e-01 -2.3394e-01 2.8306e-02 -4.1874e-02 -2.5915e-03 -6.7726e-02 -3.9059e-01 -1.4415e-01 8.6752e-02 -1.0925e-01 -5.6455e-02 3.1302e-02 1.1098e-01 -5.0824e-02 -7.5547e-04 3.1770e-02 -9.8074e-02 -8.8516e-02 8.8252e-02 3.8876e-02 6.8333e-02 8.1537e-02 9.6103e-02 4.8456e-02 5.7311e-02 1.1098e-01 9.6552e-02 1.3670e-01 1.2354e-01 -4.1316e-07 -7.3390e-04 -1.9963e-01 -1.7422e-01 -1.5627e-02 -3.3143e-01 -2.0526e-15 3.4034e-02 6.2174e-02 2.0858e-04 3.4544e-02\n", + " 4.9051e-02 2.5939e-02 -5.1976e-02 4.6130e-01 4.6175e-02 1.4108e-02 7.0393e-03 -3.9064e-02 1.2081e-01 -9.7252e-02 5.2464e-02 -5.0659e-02 -3.8242e-02 -2.7440e-03 5.9083e-02 6.2631e-02 9.2550e-02 -1.2118e-02 1.2914e-02 -2.9113e-03 1.0764e-02\n", + "Enrichment: 5.4053e-01 -4.0027e+01 3.1968e+00 -2.5675e+01 -1.6955e+01 -7.5504e+01 3.2474e+01 -1.9263e+01 2.9222e+01 -9.3586e+01 1.0617e+02 -3.0811e+01 1.1717e+02 -6.4258e+00 3.2614e+01 4.1447e+01 -5.6943e+01 2.8211e+01 7.6892e+00 1.1722e+01 -8.0096e+00 3.3938e+01 -1.7389e+01 -3.1450e+00 -3.6098e+00 -3.8770e+01 7.9878e+00 3.7401e+01 4.8053e+01 2.3474e+01 5.0859e+01 3.2722e+00 -1.5895e+02 -1.6520e+02 3.2314e+01 3.6597e+01 -1.8426e+02 -9.1747e+00 1.9760e+00 -4.5181e+00 -1.4733e+01 -9.3292e+00 -3.3544e+01 -7.5338e+00 3.7559e+00 -1.1068e+02 -6.1610e+01 5.0566e+01 1.3107e+02 -1.6793e+02 -6.3728e-01 2.7417e+01 -2.6090e+01 -9.1499e-01 1.9536e+02 6.8521e+00 1.2346e+01 1.4768e+01 1.7218e+01 8.8917e+00 1.0377e+01 2.0076e+01 1.7379e+01 2.4394e+01 2.2637e+01 -2.1972e+00 -4.7347e+00 -2.3199e+00 -6.8201e-01 -1.5863e+00 -5.9486e+02 1.5768e+00 5.9381e+01 6.7794e+01 2.8141e-01 1.9641e+01\n", + " 2.8363e+02 1.7234e+02 -3.1856e+01 2.2044e+01 3.8856e+01 8.0096e-01 6.5907e+00 -4.5018e+00 1.6086e+02 -9.9521e+01 6.2201e+01 -2.5466e+02 -2.0764e+01 -5.0528e+00 2.5558e+02 2.1238e+02 3.2497e+02 -2.4220e+01 3.5151e+00 -2.1038e+01 3.3651e+02\n", + "Coefficients: 1.4903e-10 -1.1036e-08 8.8138e-10 -7.0788e-09 -4.6747e-09 -2.0817e-08 8.9534e-09 -5.3109e-09 8.0567e-09 -2.5802e-08 2.9273e-08 -8.4948e-09 3.2306e-08 -1.7716e-09 8.9920e-09 1.1427e-08 -1.5699e-08 7.7780e-09 2.1200e-09 3.2317e-09 -2.2083e-09 9.3570e-09 -4.7943e-09 -8.6710e-10 -9.9525e-10 -1.0689e-08 2.2023e-09 1.0312e-08 1.3248e-08 6.4718e-09 1.4022e-08 9.0217e-10 -4.3823e-08 -4.5546e-08 8.9093e-09 1.0090e-08 -5.0802e-08 -2.5295e-09 5.4480e-10 -1.2457e-09 -4.0620e-09 -2.5721e-09 -9.2484e-09 -2.0771e-09 1.0355e-09 -3.0516e-08 -1.6986e-08 1.3942e-08 3.6137e-08 -4.6299e-08 -1.7570e-10 7.5590e-09 -7.1931e-09 -2.5227e-10 5.3862e-08 1.8892e-09 3.4039e-09 4.0718e-09 4.7472e-09 2.4515e-09 2.8609e-09 5.5350e-09 4.7916e-09 6.7256e-09 6.2411e-09 -6.0579e-10 -1.3054e-09 -6.3962e-10 -1.8804e-10 -4.3736e-10 -1.6401e-07 4.3474e-10 1.6372e-08 1.8691e-08 7.7585e-11 5.4150e-09\n", + " 7.8200e-08 4.7515e-08 -8.7829e-09 6.0777e-09 1.0713e-08 2.2083e-10 1.8171e-09 -1.2412e-09 4.4350e-08 -2.7439e-08 1.7149e-08 -7.0211e-08 -5.7247e-09 -1.3931e-09 7.0465e-08 5.8553e-08 8.9596e-08 -6.6776e-09 9.6913e-10 -5.8002e-09 9.2778e-08\n", + "Coefficient SE: 4.9738e-09 2.4692e-08 1.2226e-08 2.4478e-08 4.8747e-09 3.2365e-08 2.0125e-08 8.1415e-09 4.2494e-09 2.2566e-08 2.6138e-08 6.9407e-09 3.5168e-08 2.0452e-08 1.0893e-08 1.0466e-08 1.3641e-08 5.1456e-09 2.1482e-09 1.0709e-08 3.7779e-09 8.1631e-09 7.0161e-09 3.5486e-09 4.9586e-09 2.3139e-08 6.6641e-09 5.2614e-09 2.4524e-08 6.5028e-09 6.4116e-09 9.1706e-10 3.5437e-08 2.5804e-08 1.4676e-08 1.4350e-08 3.3059e-08 6.7235e-09 3.2779e-09 1.8266e-09 5.4867e-08 6.0018e-09 5.4412e-09 4.7098e-09 3.7106e-09 1.6553e-08 2.0379e-08 1.9932e-08 4.1923e-08 2.1453e-08 9.9135e-09 1.5092e-08 1.0061e-08 9.3083e-10 3.1559e-08 2.7741e-09 2.1384e-09 1.6196e-09 1.4251e-09 1.4605e-09 1.6265e-09 1.8785e-09 1.8202e-09 2.2207e-09 3.7141e-09 7.5249e-10 6.6493e-10 2.9619e-10 1.9701e-10 3.1478e-09 9.2247e-08 6.3456e-10 2.3931e-08 2.0918e-08 2.2094e-08 1.4716e-08\n", + " 5.4635e-08 5.3045e-08 1.4996e-08 6.5926e-09 4.8018e-08 6.8248e-09 2.4523e-08 5.4907e-09 2.6305e-08 2.1373e-08 1.9375e-08 3.9339e-08 9.5223e-09 2.3449e-08 4.4370e-08 4.9575e-08 4.9729e-08 3.0514e-08 2.4610e-09 3.1517e-08 1.0427e-07\n", + "Lambda GC: 1.1908\n", + "Mean Chi^2: 1.2634\n", + "Intercept: 1.0261 (0.0119)\n", + "Ratio: 0.0992 (0.0452)\n", + "\n", + "Genetic Covariance\n", + "------------------\n", + "Total Observed scale gencov: 0.042 (0.0028)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0\n", + "Observed scale gencov: 4.0572e-02 -1.6669e-03 3.1592e-03 4.1247e-03 3.9495e-03 -3.2056e-03 2.6912e-03 1.6858e-03 6.2850e-03 9.1468e-03 4.4117e-03 -4.9844e-03 4.7380e-04 1.4304e-04 1.4632e-03 3.1222e-03 -6.7569e-03 -1.8984e-04 2.6434e-03 8.5961e-04 3.6739e-03 4.9158e-03 3.7673e-03 -1.0337e-02 -6.6240e-03 -2.8831e-03 1.1834e-03 8.0947e-03 3.0511e-03 5.5734e-03 3.0505e-03 1.1373e-03 -1.3913e-03 -1.9972e-03 3.5823e-04 -1.0687e-03 -2.9276e-03 -9.8307e-03 -5.5376e-03 -3.6785e-04 -7.0872e-04 -9.8307e-03 -3.0002e-03 -8.6648e-03 1.8813e-03 -1.4124e-03 -6.5603e-04 1.9911e-03 2.1165e-03 -1.2635e-03 -1.7451e-03 1.9816e-04 -5.0904e-03 -1.4118e-02 4.0085e-04 7.9698e-04 2.0265e-03 2.9401e-03 2.9314e-03 7.6991e-04 3.9815e-03 1.2263e-03 3.8370e-03 4.5887e-03 6.7958e-03 -2.4995e-08 -3.1508e-05 -1.2315e-03 -1.6629e-02 -5.4121e-03 -1.1852e-02 3.8827e-17 3.7332e-03 3.2676e-03 -2.3151e-04 -3.8085e-05\n", + " 7.6824e-04 1.8288e-03 -1.2957e-04 -5.2788e-03 -3.7168e-03 4.8980e-03 4.6803e-03 6.5474e-03 3.3841e-03 -7.2881e-04 -8.2463e-04 -1.0779e-03 2.3076e-03 -1.0272e-03 3.3716e-03 4.1005e-03 1.9414e-03 -7.2782e-04 7.9713e-04 -8.8215e-04 4.2808e-04\n", + "Observed scale gencov SE: 5.2173e-02 2.6063e-03 3.7021e-03 2.8447e-03 6.0285e-03 2.9222e-03 3.3289e-03 5.7966e-03 9.1277e-03 8.9377e-03 1.6397e-02 1.0584e-02 7.7051e-04 1.4768e-03 2.7325e-03 2.4247e-03 6.4332e-03 6.3571e-03 4.6036e-03 1.5999e-03 6.0999e-03 2.8057e-03 6.6400e-03 8.8631e-03 4.5303e-03 4.1952e-03 5.0407e-03 4.4812e-03 3.8085e-03 4.7169e-03 3.6894e-03 2.4768e-03 1.9606e-03 1.2380e-03 1.9378e-03 2.4272e-03 1.4339e-03 2.4136e-02 5.1363e-03 2.0651e-03 7.4823e-04 5.2539e-03 5.5072e-03 1.4024e-02 7.4177e-03 1.7716e-03 1.7839e-03 1.7261e-03 3.5178e-03 6.7507e-04 1.7741e-03 1.9158e-03 3.5763e-03 1.0298e-02 1.3807e-03 8.6784e-04 9.9155e-04 8.8151e-04 1.0061e-03 9.7862e-04 1.1429e-03 1.1571e-03 1.6367e-03 1.3505e-03 3.7714e-03 1.7863e-08 1.0453e-05 2.3375e-03 8.4771e-03 4.3907e-03 6.7778e-03 9.4404e-17 1.5976e-03 1.6119e-03 1.6703e-03 2.7225e-03\n", + " 1.0998e-03 1.0836e-03 3.4137e-03 1.5120e-02 3.6770e-03 1.4093e-02 2.0971e-03 4.8909e-03 1.8984e-03 1.6996e-03 1.9937e-03 8.6094e-04 2.6317e-03 1.4989e-03 1.8308e-03 2.2868e-03 1.4014e-03 1.7966e-03 1.0874e-03 4.8382e-04 4.4978e-04\n", + "Proportion of SNPs: 5.5429e-02 7.9037e-04 2.7361e-03 1.3675e-03 1.6935e-02 1.3200e-03 2.6040e-03 7.5030e-03 2.2330e-02 6.0949e-03 9.1869e-03 1.8295e-02 2.3911e-04 8.1529e-04 2.3237e-03 2.6540e-03 4.6384e-03 1.1059e-02 2.1564e-02 1.7413e-03 1.4884e-02 3.6928e-03 9.3602e-03 2.3472e-02 1.0102e-02 2.2930e-03 7.3657e-03 6.7709e-03 2.1166e-03 6.9453e-03 5.7906e-03 2.1476e-02 5.1772e-04 4.5878e-04 1.3795e-03 2.5683e-03 5.8698e-04 2.5498e-02 1.4325e-02 9.2680e-03 1.7590e-04 7.2596e-03 1.1644e-02 1.9134e-02 2.3098e-02 9.8701e-04 9.1633e-04 6.1903e-04 8.4668e-04 3.0265e-04 1.1855e-03 1.1588e-03 3.7591e-03 9.6741e-02 4.5174e-04 5.6735e-03 5.5349e-03 5.5211e-03 5.5814e-03 5.4496e-03 5.5230e-03 5.5281e-03 5.5556e-03 5.6038e-03 5.4576e-03 1.8804e-07 1.5500e-04 8.6049e-02 2.5545e-01 9.8508e-03 5.5716e-04 -1.3018e-15 5.7316e-04 9.1710e-04 7.4122e-04 1.7588e-03\n", + " 1.7294e-04 1.5051e-04 1.6316e-03 2.0926e-02 1.1884e-03 1.7614e-02 1.0681e-03 8.6774e-03 7.5102e-04 9.7721e-04 8.4346e-04 1.9893e-04 1.8418e-03 5.4308e-04 2.3117e-04 2.9490e-04 2.8480e-04 5.0034e-04 3.6740e-03 1.3839e-04 3.1986e-05\n", + "Proportion of gencov: 9.6611e-01 -3.9694e-02 7.5229e-02 9.8219e-02 9.4048e-02 -7.6334e-02 6.4086e-02 4.0144e-02 1.4966e-01 2.1781e-01 1.0505e-01 -1.1869e-01 1.1282e-02 3.4062e-03 3.4842e-02 7.4348e-02 -1.6090e-01 -4.5205e-03 6.2946e-02 2.0470e-02 8.7484e-02 1.1706e-01 8.9708e-02 -2.4615e-01 -1.5773e-01 -6.8654e-02 2.8179e-02 1.9276e-01 7.2655e-02 1.3272e-01 7.2641e-02 2.7083e-02 -3.3131e-02 -4.7557e-02 8.5303e-03 -2.5448e-02 -6.9714e-02 -2.3410e-01 -1.3187e-01 -8.7594e-03 -1.6876e-02 -2.3409e-01 -7.1443e-02 -2.0633e-01 4.4799e-02 -3.3633e-02 -1.5622e-02 4.7412e-02 5.0399e-02 -3.0088e-02 -4.1554e-02 4.7187e-03 -1.2122e-01 -3.3618e-01 9.5452e-03 1.8978e-02 4.8256e-02 7.0011e-02 6.9805e-02 1.8334e-02 9.4811e-02 2.9201e-02 9.1368e-02 1.0927e-01 1.6183e-01 -5.9521e-07 -7.5030e-04 -2.9325e-02 -3.9597e-01 -1.2888e-01 -2.8224e-01 9.2456e-16 8.8898e-02 7.7811e-02 -5.5130e-03 -9.0691e-04\n", + " 1.8294e-02 4.3548e-02 -3.0855e-03 -1.2570e-01 -8.8506e-02 1.1663e-01 1.1145e-01 1.5591e-01 8.0585e-02 -1.7355e-02 -1.9637e-02 -2.5667e-02 5.4949e-02 -2.4461e-02 8.0287e-02 9.7643e-02 4.6229e-02 -1.7331e-02 1.8982e-02 -2.1006e-02 1.0194e-02\n", + "Enrichment: 1.7430e+01 -5.0222e+01 2.7495e+01 7.1826e+01 5.5534e+00 -5.7828e+01 2.4610e+01 5.3504e+00 6.7023e+00 3.5736e+01 1.1435e+01 -6.4877e+00 4.7186e+01 4.1779e+00 1.4994e+01 2.8013e+01 -3.4688e+01 -4.0877e-01 2.9190e+00 1.1755e+01 5.8778e+00 3.1698e+01 9.5840e+00 -1.0487e+01 -1.5615e+01 -2.9941e+01 3.8257e+00 2.8469e+01 3.4326e+01 1.9109e+01 1.2545e+01 1.2611e+00 -6.3993e+01 -1.0366e+02 6.1838e+00 -9.9085e+00 -1.1877e+02 -9.1809e+00 -9.2054e+00 -9.4512e-01 -9.5946e+01 -3.2246e+01 -6.1357e+00 -1.0784e+01 1.9396e+00 -3.4076e+01 -1.7048e+01 7.6591e+01 5.9525e+01 -9.9414e+01 -3.5053e+01 4.0721e+00 -3.2246e+01 -3.4751e+00 2.1130e+01 3.3450e+00 8.7186e+00 1.2681e+01 1.2507e+01 3.3642e+00 1.7166e+01 5.2822e+00 1.6446e+01 1.9499e+01 2.9651e+01 -3.1653e+00 -4.8405e+00 -3.4080e-01 -1.5501e+00 -1.3083e+01 -5.0656e+02 -7.1024e-01 1.5510e+02 8.4845e+01 -7.4377e+00 -5.1564e-01\n", + " 1.0578e+02 2.8933e+02 -1.8911e+00 -6.0069e+00 -7.4477e+01 6.6217e+00 1.0435e+02 1.7967e+01 1.0730e+02 -1.7760e+01 -2.3281e+01 -1.2903e+02 2.9835e+01 -4.5041e+01 3.4730e+02 3.3110e+02 1.6232e+02 -3.4639e+01 5.1666e+00 -1.5179e+02 3.1869e+02\n", + "Mean z1*z2: 0.4205\n", + "Intercept: 0.1475 (0.0083)\n", + "\n", + "Genetic Correlation\n", + "-------------------\n", + "Genetic Correlation: 1.1003 (0.0858)\n", + "Z-score: 12.8278\n", + "P: 1.1459e-37\n", + "\n", + "\n", + "Summary of Genetic Correlation Results\n", + "p1 p2 rg se z p h2_obs h2_obs_se h2_int h2_int_se gcov_int gcov_int_se\n", + "GCST004787_summary_stats_munged.sumstats.gz GCST90043957_summary_stats_munged.sumstats.gz 1.1003 0.0858 12.8278 1.1459e-37 0.0297 0.0032 1.0261 0.0119 0.1475 0.0083\n", + "\n", + "Analysis finished at Thu Nov 6 22:31:52 2025\n", + "Total time elapsed: 1.0m:3.9s\n", + "\n", + "WARNING:cellink.tl.external._ldsc:WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n", + "\n" + ] + } + ], + "source": [ + "rg_results = estimate_genetic_correlation(\n", + " sumstats_files=[\n", + " str(Path(Path(gwas_summary_statistic_path_1).stem).stem + \"_munged.sumstats.gz\"),\n", + " str(Path(Path(gwas_summary_statistic_path_2).stem).stem + \"_munged.sumstats.gz\"),\n", + " ],\n", + " ref_ld_chr=os.path.join(ldscores_path, ldscores_prefix),\n", + " w_ld_chr=os.path.join(ldweights_path, ldweights_prefix),\n", + " out_prefix=\"CHD_rg\",\n", + " run=True,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Summary\n", + "This tutorial demonstrated how to perform comprehensive LDSC analyses using the `cellink` package, including:\n", + "\n", + "1. Cell-type-specific heritability analysis: Identifying which cell types are most relevant to complex traits\n", + "2. SNP heritability estimation: Quantifying the proportion of trait variance explained by common genetic variants\n", + "3. Genetic correlation analysis: Measuring shared genetic architecture between traits\n", + "\n", + "The `cellink` package simplifies these analyses by providing unified wrapper functions that handle data formatting, file management, and command execution for LDSC and its auxiliary tools." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "single_cell_base3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/cellink/io/_sgkit.py b/src/cellink/io/_sgkit.py index e549702..c43d429 100644 --- a/src/cellink/io/_sgkit.py +++ b/src/cellink/io/_sgkit.py @@ -57,7 +57,13 @@ class SgVars: def from_sgkit_dataset( - sgkit_dataset: xr.Dataset, *, var_rename: dict = None, obs_rename: dict = None, hard_call: bool = True + sgkit_dataset: xr.Dataset, + *, var_rename: + dict = None, + obs_rename: + dict = None, + hard_call: + bool = True ) -> AnnData: """Read SgKit Zarr Format @@ -128,7 +134,14 @@ def from_sgkit_dataset( return gdata -def read_sgkit_zarr(path: str | Path, *, var_rename=None, obs_rename=None, hard_call=True, **kwargs) -> AnnData: +def read_sgkit_zarr( + path: str | Path, + *, + var_rename=None, + obs_rename=None, + hard_call=True, + **kwargs +) -> AnnData: """Read SgKit Zarr Format Params @@ -147,7 +160,14 @@ def read_sgkit_zarr(path: str | Path, *, var_rename=None, obs_rename=None, hard_ return gdata -def read_plink(path: str | Path = None, *, var_rename=None, obs_rename=None, hard_call=True, **kwargs) -> AnnData: +def read_plink( + path: str | Path = None, + *, + var_rename=None, + obs_rename=None, + hard_call=True, + **kwargs +) -> AnnData: """Read Plink Format Params @@ -168,7 +188,16 @@ def read_plink(path: str | Path = None, *, var_rename=None, obs_rename=None, har return gdata -def read_bgen(path: str | Path = None, *, var_rename=None, obs_rename=None, hard_call=True, **kwargs) -> AnnData: +def read_bgen( + path: str | Path = None, + metafile_path: str | Path = None, + sample_path: str | Path = None, + *, + var_rename=None, + obs_rename=None, + hard_call=True, + **kwargs, +) -> AnnData: """Read bgen Format Params @@ -184,6 +213,6 @@ def read_bgen(path: str | Path = None, *, var_rename=None, obs_rename=None, hard """ from sgkit.io import bgen as sg_bgen - sgkit_dataset = sg_bgen.read_bgen(path=path, **kwargs) + sgkit_dataset = sg_bgen.read_bgen(path=path, metafile_path=metafile_path, sample_path=sample_path, **kwargs) gdata = from_sgkit_dataset(sgkit_dataset, var_rename=var_rename, obs_rename=obs_rename, hard_call=hard_call) return gdata diff --git a/src/cellink/resources/__init__.py b/src/cellink/resources/__init__.py index d70e566..6c598cc 100644 --- a/src/cellink/resources/__init__.py +++ b/src/cellink/resources/__init__.py @@ -8,4 +8,4 @@ get_pgs_catalog_score, get_pgs_catalog_scores, ) -from ._ld import get_1000genomes_ld_scores, get_1000genomes_ld_weights +from ._ld import get_1000genomes_ld_scores, get_1000genomes_ld_weights, get_1000genomes_plink_files diff --git a/src/cellink/resources/_datasets.py b/src/cellink/resources/_datasets.py index 13943a8..21ed766 100644 --- a/src/cellink/resources/_datasets.py +++ b/src/cellink/resources/_datasets.py @@ -5,10 +5,10 @@ import pandas as pd import cellink as cl -from cellink._core import DonorData from cellink.io import read_h5_dd, read_zarr_dd from cellink.resources._datasets_utils import plink_filter_prune, plink_kinship, preprocess_vcf_to_plink, try_liftover from cellink.resources._utils import _download_file, _load_config, _run, get_data_home +from .._core import DonorData logging.basicConfig(level=logging.INFO) diff --git a/src/cellink/resources/_ld.py b/src/cellink/resources/_ld.py index a0ccc60..aea112f 100644 --- a/src/cellink/resources/_ld.py +++ b/src/cellink/resources/_ld.py @@ -36,24 +36,29 @@ def _extract_or_refresh(tgz_path: Path, extract_path: Path, refresh: bool = Fals else: shutil.rmtree(item) - if not any(p for p in extract_path.iterdir() if p != tgz_path): + existing_contents = [p for p in extract_path.iterdir() if p != tgz_path] + + if not existing_contents: with tarfile.open(tgz_path, "r:gz") as tar: tar.extractall(path=extract_path) - contents = list(extract_path.iterdir()) - if len(contents) == 2 and contents[1].is_dir(): - for item in contents[1].iterdir(): + contents = [p for p in extract_path.iterdir() if p != tgz_path] + + if len(contents) == 1 and contents[0].is_dir(): + nested_dir = contents[0] + + for item in nested_dir.iterdir(): shutil.move(str(item), str(extract_path)) - contents[1].rmdir() + nested_dir.rmdir() def get_1000genomes_ld_scores( - config_path: Union[str, Path] = "./cellink/resources/config/1000genomes.yaml", + config_path: str | Path = "./cellink/resources/config/1000genomes.yaml", population: str = "EUR", - data_home: Optional[Union[str, Path]] = None, + data_home: str | Path | None = None, return_path: bool = False, refresh: bool = False, -) -> Union[Tuple[pd.DataFrame, pd.DataFrame, str], Tuple[Path, str]]: +) -> tuple[pd.DataFrame, pd.DataFrame, str] | tuple[Path, str]: """ Download, extract, and load precomputed 1000 Genomes linkage disequilibrium (LD) scores. @@ -135,12 +140,12 @@ def get_1000genomes_ld_scores( def get_1000genomes_ld_weights( - config_path: Union[str, Path] = "./cellink/resources/config/1000genomes.yaml", + config_path: str | Path = "./cellink/resources/config/1000genomes.yaml", population: str = "EUR", - data_home: Optional[Union[str, Path]] = None, + data_home: str | Path | None = None, return_path: bool = False, refresh: bool = False, -) -> Union[Tuple[pd.DataFrame, pd.DataFrame], Tuple[Path, str]]: +) -> tuple[pd.DataFrame, pd.DataFrame] | tuple[Path, str]: """ Download, extract, and load precomputed 1000 Genomes LD weights. @@ -210,9 +215,67 @@ def get_1000genomes_ld_weights( return annot, weights +def get_1000genomes_plink_files( + config_path: str | Path = "./cellink/resources/config/1000genomes.yaml", + population: str = "EUR", + data_home: str | Path | None = None, + refresh: bool = False, +) -> Path: + """ + Download and extract 1000 Genomes PLINK files (BED/BIM/FAM format). + This function downloads population-specific PLINK files from the 1000 Genomes project, + extracts them to a local directory, and returns the path to the extracted files. + Parameters + ---------- + config_path : str or pathlib.Path, default='./cellink/resources/config/1000genomes.yaml' + Path to YAML configuration file specifying URLs and file names for PLINK files. + population : str, default='EUR' + Population code for PLINK files. Currently only 'EUR' is supported. + data_home : str or pathlib.Path, optional + Root directory where data will be stored. Defaults to user-specific cache directory. + refresh : bool, default=False + If True, re-downloads and re-extracts files even if they already exist locally. + Returns + ------- + - pathlib.Path + Path to the directory containing extracted PLINK files (.bed, .bim, .fam). + Files are named as: {prefix}{chrom}.bed/bim/fam where chrom ranges from 1-22. + - prefix : str + File name prefix used in the extracted data. + Raises + ------ + ValueError + If `population` is not supported in the configuration. + Examples + -------- + >>> plink_dir = get_1000genomes_plink_files(population="EUR") + >>> # Access chromosome 1 files at: + >>> # plink_dir / "1000G.EUR.QC.1.bed" + >>> # plink_dir / "1000G.EUR.QC.1.bim" + >>> # plink_dir / "1000G.EUR.QC.1.fam" + """ + data_home = get_data_home(data_home) + DATA = data_home / f"1000genomes_plink_{population}" + DATA.mkdir(exist_ok=True) + + config = _load_config(config_path) + if population not in config["plink_files"]: + raise ValueError(f"population must be one of {list(config['plink_files'].keys())}") + + prefix = config["plink_files"]["prefix"] + tgz_path = DATA / config["plink_files"][population]["filename"] + + _download_file(config["plink_files"][population]["url"], tgz_path, checksum=None) + _extract_or_refresh(tgz_path, DATA, refresh=refresh) + + return DATA, prefix if __name__ == "__main__": annot, ldscores, prefix = get_1000genomes_ld_scores(population="EUR") annot, ldscores, prefix = get_1000genomes_ld_scores(population="EAS") + annot, weights, prefix = get_1000genomes_ld_weights(population="EUR") annot, weights, prefix = get_1000genomes_ld_weights(population="EAS") + + plink_files, prefix = get_1000genomes_plink_files(population="EUR") + plink_files, prefix = get_1000genomes_plink_files(population="EAS") \ No newline at end of file diff --git a/src/cellink/resources/config/1000genomes.yaml b/src/cellink/resources/config/1000genomes.yaml index 2e1fe8a..0e8ef84 100644 --- a/src/cellink/resources/config/1000genomes.yaml +++ b/src/cellink/resources/config/1000genomes.yaml @@ -131,6 +131,14 @@ remote_files: - filename: "ALL.chr22.phase3_shapeit2_mvncall_integrated_v5b.20130502.genotypes.vcf.gz.tbi" url: "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/ALL.chr22.phase3_shapeit2_mvncall_integrated_v5b.20130502.genotypes.vcf.gz.tbi" checksum: "27de6b77af65d300bb968e8e372439deb949389e4395eb0dd251f9ba7d73bbed" +plink_files: + prefix: 1000G.EUR.QC. + EUR: + filename: "1000G_Phase3_plinkfiles.tgz" + url: "https://zenodo.org/records/7796478/files/1000G_Phase3_plinkfiles.tgz?download=1" + EAS: + filename: "1000G_Phase3_EAS_plinkfiles.tgz" + url: "https://zenodo.org/records/7796478/files/1000G_Phase3_EAS_plinkfiles.tgz?download=1" ld_scores: prefix: baselineLD. EUR: diff --git a/src/cellink/tl/external/__init__.py b/src/cellink/tl/external/__init__.py index 2c4e573..4f940ad 100644 --- a/src/cellink/tl/external/__init__.py +++ b/src/cellink/tl/external/__init__.py @@ -15,6 +15,21 @@ from ._scdrs import run_scdrs from ._seismic import run_seismic from ._magma import run_magma_pipeline +from ._ldsc import ( + compute_ld_scores_with_annotations_from_bimfile, + compute_ld_scores_with_annotations_from_donor_data, + configure_ldsc_runner, + estimate_celltype_specific_heritability, + estimate_genetic_correlation, + estimate_heritability, + estimate_ld_scores_from_bimfile, + estimate_ld_scores_from_donor_data, + make_annot_from_bimfile, + make_annot_from_donor_data, + munge_sumstats, +) +from ._sldsc_utils import generate_gene_coord_file, generate_sldsc_genesets, preprocess_for_sldsc +from ._ldsc2magma import load_ensembl_to_entrez_map, genesets_dir_to_entrez_gmt __all__ = [ "read_jaxqtl_results", diff --git a/src/cellink/tl/external/_ldsc.py b/src/cellink/tl/external/_ldsc.py new file mode 100644 index 0000000..e6e4a3c --- /dev/null +++ b/src/cellink/tl/external/_ldsc.py @@ -0,0 +1,1796 @@ +import logging +import os +import subprocess +from typing import Any +import shlex + +import yaml + +from cellink._core import DonorData +from cellink.io import to_plink +from cellink.resources._utils import get_data_home +from cellink.tl._runner import BaseToolRunner + +logger = logging.getLogger(__name__) + + +class LDSCRunner(BaseToolRunner): + """LDSC Runner with support for local, docker, and singularity""" + + def __init__(self, config_path: str | None = None, config_dict: dict | None = None): + required_fields = [ + "execution_mode", + "ldsc_command", + "make_annot_command", + "munge_command" + ] + prefix_tokens = [] + super().__init__(config_path, config_dict, required_fields, prefix_tokens) + + def _load_config(self, config_path: str | None, config_dict: dict | None) -> dict: + if config_dict: + return config_dict + if config_path and os.path.exists(config_path): + with open(config_path) as f: + return yaml.safe_load(f) + return { + "execution_mode": "local", + "docker_image": "zijingliu/ldsc", + "singularity_image": None, + "ldsc_command": "ldsc.py", + "make_annot_command": "make_annot.py", + "munge_command": "munge_sumstats.py", + } + + @property + def ldsc_command(self) -> str: + return self.config["ldsc_command"] + + @property + def make_annot_command(self) -> str: + return self.config["make_annot_command"] + + @property + def munge_command(self) -> str: + return self.config["munge_command"] + + @property + def execution_mode(self) -> str: + return self.config["execution_mode"] + + +_ldsc_runner = None + + +def configure_ldsc_runner(config_path: str | None = None, config_dict: dict | None = None) -> LDSCRunner: + global _ldsc_runner + _ldsc_runner = LDSCRunner(config_path=config_path, config_dict=config_dict) + return _ldsc_runner + + +def get_ldsc_runner() -> LDSCRunner: + global _ldsc_runner + if _ldsc_runner is None: + _ldsc_runner = LDSCRunner() + return _ldsc_runner + + +def munge_sumstats( + sumstats_file: str, + out_prefix: str = "GWAS_summary_statistics_munged", + n_samples: int | None = None, + merge_alleles: str | None = None, + snplist: str | None = None, + info_min: float = 0.9, + maf_min: float = 0.01, + a1_inc: bool = False, + signed_sumstats: tuple[str, float] | None = None, + p_col: str | None = None, + a1_col: str | None = None, + a2_col: str | None = None, + snp_col: str | None = None, + n_col: str | None = None, + info_col: str | None = None, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> str | None: + """ + Munge (clean and standardize) GWAS summary statistics for LDSC analysis + + This function processes raw GWAS summary statistics files to prepare them for + LD Score regression analysis. It performs quality control, standardizes column + names, filters SNPs, and aligns alleles to a reference panel. + + Parameters + ---------- + sumstats_file : str + Path to input GWAS summary statistics file. Can be plain text or gzipped. + Should contain columns for SNP ID, effect allele, other allele, and p-value. + out_prefix : str, default "GWAS_summary_statistics_munged" + Prefix for output files. Will create {out_prefix}.sumstats.gz + n_samples : int, optional + Total sample size. If the summary statistics file has a sample size column, + this will be used to verify it. If there's no sample size column, this will + be added to all SNPs. + merge_alleles : str, optional + Path to reference allele file (e.g., w_hm3.snplist) for aligning alleles + and removing strand-ambiguous SNPs. Recommended for downstream analysis. + snplist : str, optional + Path to file with SNP IDs to keep. Only SNPs in this list will be retained. + info_min : float, default 0.9 + Minimum INFO score for SNP inclusion. SNPs with INFO < info_min are removed. + maf_min : float, default 0.01 + Minimum minor allele frequency for SNP inclusion. SNPs with MAF < maf_min + are removed. + a1_inc : bool, default False + If True, A1 is the effect allele (increasing allele). If False, A1 is the + other allele and the sign of the effect will be flipped. + signed_sumstats : tuple[str, float], optional + Tuple of (column_name, sign) for identifying the direction of effect. + Example: ("OR", 1) means odds ratios where values >1 indicate positive effect. + Example: ("BETA", 0) means betas where values >0 indicate positive effect. + p_col : str, optional + Name of the p-value column if non-standard (default: "P") + a1_col : str, optional + Name of the effect allele column if non-standard (default: "A1") + a2_col : str, optional + Name of the other allele column if non-standard (default: "A2") + snp_col : str, optional + Name of the SNP ID column if non-standard (default: "SNP") + n_col : str, optional + Name of the sample size column if non-standard (default: "N") + info_col : str, optional + Name of the INFO score column if non-standard (default: "INFO") + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to munge_sumstats.py + Common options include: + - ignore: List of columns to ignore + - daner: Set if input is in daner format (PGC) + - no-alleles: Don't require allele information + - merge-alleles: Alternative way to specify reference alleles + + Returns + ------- + dict + Results dictionary containing: + - 'sumstats_file': Path to the munged summary statistics file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Raises + ------ + subprocess.CalledProcessError + If the munging process fails (e.g., due to malformed input file) + + Examples + -------- + Basic usage with standard column names: + >>> result = munge_sumstats( + ... sumstats_file="height_gwas.txt.gz", + ... out_prefix="height_munged", + ... n_samples=253288, + ... merge_alleles="w_hm3.snplist", + ... ) + + With custom column names: + >>> result = munge_sumstats( + ... sumstats_file="custom_gwas.txt", + ... out_prefix="custom_munged", + ... n_samples=50000, + ... snp_col="RSID", + ... a1_col="EFFECT_ALLELE", + ... a2_col="OTHER_ALLELE", + ... p_col="PVAL", + ... signed_sumstats=("BETA", 0), + ... ) + + Case-control study with odds ratios: + >>> result = munge_sumstats( + ... sumstats_file="case_control_gwas.txt.gz", + ... out_prefix="case_control_munged", + ... n_samples=10000, + ... merge_alleles="w_hm3.snplist", + ... signed_sumstats=("OR", 1), + ... a1_inc=True, + ... ) + + Just generate the command without running: + >>> result = munge_sumstats( + ... sumstats_file="height_gwas.txt.gz", out_prefix="height_munged", n_samples=253288, run=False + ... ) + >>> print(result["command"]) + + Notes + ----- + - The function expects summary statistics files to follow standard GWAS format + - Strand-ambiguous SNPs (A/T or G/C) are removed when merge_alleles is used + - The output file will be gzipped and named {out_prefix}.sumstats.gz + - It's highly recommended to use merge_alleles with a reference panel (e.g., HapMap3) + to ensure proper allele alignment + - For binary traits, signed_sumstats should typically be ("OR", 1) or ("BETA", 0) + - For quantitative traits, signed_sumstats is typically ("BETA", 0) or ("Z", 0) + """ + if runner is None: + runner = get_ldsc_runner() + + cmd = f"{runner.munge_command} --sumstats {sumstats_file} --out {out_prefix}" + + if n_samples is not None: + cmd += f" --N {n_samples}" + if merge_alleles is not None: + cmd += f" --merge-alleles {merge_alleles}" + if snplist is not None: + cmd += f" --merge {snplist}" + if info_min != 0.9: + cmd += f" --info-min {info_min}" + if maf_min != 0.01: + cmd += f" --maf-min {maf_min}" + if a1_inc: + cmd += " --a1-inc" + + if signed_sumstats is not None: + col, min_val = signed_sumstats + cmd += f" --signed-sumstats {col},{min_val}" + if p_col is not None: + cmd += f" --p {p_col}" + if a1_col is not None: + cmd += f" --a1 {a1_col}" + if a2_col is not None: + cmd += f" --a2 {a2_col}" + if snp_col is not None: + cmd += f" --snp {snp_col}" + if n_col is not None: + cmd += f" --N-col {n_col}" + if info_col is not None: + cmd += f" --info {info_col}" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = [sumstats_file] + if merge_alleles: + file_paths.append(merge_alleles) + if snplist: + file_paths.append(snplist) + + if run: + logger.info(f"Running munge_sumstats: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + return f"{out_prefix}.sumstats.gz" + else: + return runner._build_container_command(cmd, file_paths) + + +def _run_ldsc_estimate_ld_scores( + bfile_prefix: str, + out_prefix: str, + ld_wind_cm: float = 1.0, + ld_wind_kb: int | None = None, + ld_wind_snp: int | None = None, + annot_file: str | None = None, + thin_annot: bool = False, + print_snps: str | None = None, + maf_min: float = 0.01, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> str | None: + """ + Estimate LD Scores from genotype data + """ + if runner is None: + runner = get_ldsc_runner() + + cmd = f"{runner.ldsc_command} --bfile {bfile_prefix} --l2 --out {out_prefix}" + + flags = [ld_wind_kb, ld_wind_snp, ld_wind_cm] + non_null_flags = sum(f is not None for f in flags) + + if non_null_flags > 1: + raise ValueError("Only one of ld_wind_kb, ld_wind_snp, or ld_wind_cm may be specified.") + + if ld_wind_kb is not None: + cmd += f" --ld-wind-kb {ld_wind_kb}" + elif ld_wind_snp is not None: + cmd += f" --ld-wind-snp {ld_wind_snp}" + else: + cmd += f" --ld-wind-cm {ld_wind_cm}" + + if annot_file is not None: + cmd += f" --annot {annot_file}" + if thin_annot: + cmd += " --thin-annot" + + if print_snps is not None: + cmd += f" --print-snps {print_snps}" + + if maf_min != 0.01: + cmd += f" --maf {maf_min}" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + cmd += " --yes-really" + + file_paths = [f"{bfile_prefix}.bed", f"{bfile_prefix}.bim", f"{bfile_prefix}.fam"] + if annot_file: + file_paths.append(annot_file) + if print_snps: + file_paths.append(print_snps) + + if run: + logger.info(f"Estimating LD scores: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + return f"{out_prefix}.l2.ldscore.gz" + else: + return runner._build_container_command(cmd, file_paths) + + +def estimate_ld_scores_from_bimfile( + bfile_prefix: str, + out_prefix: str, + ld_wind_cm: float = 1.0, + ld_wind_kb: int | None = None, + ld_wind_snp: int | None = None, + annot_file: str | None = None, + thin_annot: bool = False, + print_snps: str | None = None, + maf_min: float = 0.01, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Estimate LD scores from PLINK bfile (works with any bfile, including 1000G) + + Parameters + ---------- + bfile_prefix : str + Path to PLINK binary files (without .bed/.bim/.fam extension) + out_prefix : str + Prefix for output files + ld_wind_cm : float, default 1.0 + LD window size in centiMorgans + ld_wind_kb : int, optional + LD window size in kilobases (alternative to ld_wind_cm) + ld_wind_snp : int, optional + LD window size in number of SNPs (alternative to ld_wind_cm) + annot_file : str, optional + Annotation file for computing category-specific LD scores + thin_annot : bool, default False + Thin the annot file by removing columns with <1% SNPs + print_snps : str, optional + File with SNP IDs to restrict LD score computation + maf_min : float, default 0.01 + Minimum MAF threshold + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use + **kwargs + Additional arguments passed to ldsc.py + + Returns + ------- + dict + Results dictionary with: + - 'ld_scores_file': Path to LD scores file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + >>> # Using 1000G reference panel + >>> result = estimate_ld_scores_from_bimfile( + ... bfile_prefix="1000G_EUR_Phase3_plink/1000G.EUR.QC.22", + ... out_prefix="my_ldscores_chr22", + ... annot_file="immune_genes.22.annot.gz", + ... print_snps="hm3_snps.txt", + ... ) + """ + if runner is None: + runner = get_ldsc_runner() + + results = {} + + result_file = _run_ldsc_estimate_ld_scores( + bfile_prefix=bfile_prefix, + out_prefix=out_prefix, + ld_wind_cm=ld_wind_cm, + ld_wind_kb=ld_wind_kb, + ld_wind_snp=ld_wind_snp, + annot_file=annot_file, + thin_annot=thin_annot, + print_snps=print_snps, + maf_min=maf_min, + run=run, + runner=runner, + **kwargs, + ) + + if run: + results["ld_scores_file"] = result_file + results["files_created"] = [ + f"{out_prefix}.l2.ldscore.gz", + f"{out_prefix}.l2.M", + f"{out_prefix}.l2.M_5_50", + f"{out_prefix}.log", + ] + else: + results["command"] = result_file + + return results + + +def estimate_ld_scores_from_donor_data( + dd: DonorData, + out_prefix: str = "ldscores", + ld_wind_cm: float = 1.0, + ld_wind_kb: int | None = None, + ld_wind_snp: int | None = None, + annot_file: str | None = None, + thin_annot: bool = False, + print_snps: str | None = None, + maf_min: float = 0.01, + cleanup_files: bool = True, + plink_export_kwargs: dict | None = None, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Estimate LD scores from DonorData object + + This convenience function exports genotype data from DonorData to PLINK format, + then computes LD scores. + + Parameters + ---------- + dd : DonorData + DonorData object containing genotype information + out_prefix : str, default "ldscores" + Prefix for output files (also used for temporary PLINK files) + cleanup_files : bool, default True + Whether to remove temporary PLINK files after computing LD scores + plink_export_kwargs : dict, optional + Additional keyword arguments to pass to to_plink() + ... (other parameters as in estimate_ld_scores_from_bimfile) + + Returns + ------- + dict + Results dictionary (same as estimate_ld_scores_from_bimfile) + + Examples + -------- + >>> result = estimate_ld_scores_from_donor_data( + ... dd=my_donor_data, out_prefix="my_ldscores", annot_file="immune_genes.annot.gz", ld_wind_cm=1.0 + ... ) + """ + if runner is None: + runner = get_ldsc_runner() + + if plink_export_kwargs is None: + plink_export_kwargs = {} + + logger.info("Exporting genotype data to PLINK format for LD score estimation") + to_plink(dd.G, out_prefix, **plink_export_kwargs) + + results = estimate_ld_scores_from_bimfile( + bfile_prefix=out_prefix, + out_prefix=out_prefix, + ld_wind_cm=ld_wind_cm, + ld_wind_kb=ld_wind_kb, + ld_wind_snp=ld_wind_snp, + annot_file=annot_file, + thin_annot=thin_annot, + print_snps=print_snps, + maf_min=maf_min, + run=run, + runner=runner, + **kwargs, + ) + + if cleanup_files and run: + extensions = [".bim", ".fam", ".bed"] + for ext in extensions: + filename = out_prefix + ext + if os.path.isfile(filename): + os.remove(filename) + logger.info(f"Cleaned up file: {filename}") + + return results + + +def _run_ldsc_heritability( + sumstats_file: str, + ref_ld_chr: str, + w_ld_chr: str, + out_prefix: str, + overlap_annot: bool = False, + frqfile_chr: str | None = None, + not_m_5_50: bool = False, + print_coefficients: bool = False, + print_delete_vals: bool = False, + samp_prev: float | None = None, + pop_prev: float | None = None, + intercept_h2: float | None = None, + no_intercept: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> str | None: + """ + Estimate SNP heritability using LD Score regression + """ + if runner is None: + runner = get_ldsc_runner() + + cmd = ( + f"{runner.ldsc_command} --h2 {sumstats_file} --ref-ld-chr {ref_ld_chr} --w-ld-chr {w_ld_chr} --out {out_prefix}" + ) + + if overlap_annot: + cmd += " --overlap-annot" + if frqfile_chr is None: + logger.warning("--overlap-annot requires --frqfile-chr") + + if frqfile_chr is not None: + cmd += f" --frqfile-chr {frqfile_chr}" + + if not_m_5_50: + cmd += " --not-M-5-50" + + if print_coefficients: + cmd += " --print-coefficients" + + if print_delete_vals: + cmd += " --print-delete-vals" + + if samp_prev is not None: + cmd += f" --samp-prev {samp_prev}" + + if pop_prev is not None: + cmd += f" --pop-prev {pop_prev}" + + if intercept_h2 is not None: + cmd += f" --intercept-h2 {intercept_h2}" + + if no_intercept: + cmd += " --no-intercept" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = [sumstats_file, ref_ld_chr, w_ld_chr] + if frqfile_chr: + file_paths.append(frqfile_chr) + + if run: + logger.info(f"Estimating heritability: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + return f"{out_prefix}.log" + else: + return runner._build_container_command(cmd, file_paths) + + +def estimate_heritability( + sumstats_file: str, + ref_ld_chr: str, + w_ld_chr: str, + out_prefix: str, + overlap_annot: bool = False, + frqfile_chr: str | None = None, + not_m_5_50: bool = False, + print_coefficients: bool = False, + print_delete_vals: bool = False, + samp_prev: float | None = None, + pop_prev: float | None = None, + intercept_h2: float | None = None, + no_intercept: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Estimate SNP heritability using LD Score regression + + Convenience wrapper around run_ldsc_heritability with validation and + structured output. + + Parameters + ---------- + sumstats_file : str + Path to munged summary statistics file (.sumstats.gz) + ref_ld_chr : str + Prefix for reference LD scores (with @, e.g., "baseline.") + w_ld_chr : str + Prefix for regression weights (with @, e.g., "weights.") + out_prefix : str + Prefix for output files + overlap_annot : bool, default False + Use overlapping annotation model + frqfile_chr : str, optional + Prefix for allele frequency files (required with overlap_annot) + not_m_5_50 : bool, default False + Don't restrict to common SNPs for estimating h2 + print_coefficients : bool, default False + Print coefficient estimates + print_delete_vals : bool, default False + Print delete values + samp_prev : float, optional + Sample prevalence (for binary traits) + pop_prev : float, optional + Population prevalence (for binary traits) + intercept_h2 : float, optional + Constrain the LD Score regression intercept + no_intercept : bool, default False + Force intercept to 1 + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use + **kwargs + Additional arguments passed to ldsc.py + + Returns + ------- + dict + Results dictionary with: + - 'log_file': Path to log file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + >>> result = estimate_heritability( + ... sumstats_file="height_munged.sumstats.gz", + ... ref_ld_chr="baseline_v1.2/baseline.", + ... w_ld_chr="weights_hm3_no_hla/weights.", + ... out_prefix="height_h2", + ... ) + """ + if runner is None: + runner = get_ldsc_runner() + + if not sumstats_file: + raise ValueError("sumstats_file is required") + if not ref_ld_chr: + raise ValueError("ref_ld_chr is required") + if not w_ld_chr: + raise ValueError("w_ld_chr is required") + + results = {} + + result_file = _run_ldsc_heritability( + sumstats_file=sumstats_file, + ref_ld_chr=ref_ld_chr, + w_ld_chr=w_ld_chr, + out_prefix=out_prefix, + overlap_annot=overlap_annot, + frqfile_chr=frqfile_chr, + not_m_5_50=not_m_5_50, + print_coefficients=print_coefficients, + print_delete_vals=print_delete_vals, + samp_prev=samp_prev, + pop_prev=pop_prev, + intercept_h2=intercept_h2, + no_intercept=no_intercept, + run=run, + runner=runner, + **kwargs, + ) + + if run: + results["log_file"] = result_file + results["files_created"] = [f"{out_prefix}.log"] + else: + results["command"] = result_file + + return results + + +def _run_ldsc_genetic_correlation( + sumstats_files: list[str], + ref_ld_chr: str, + w_ld_chr: str, + out_prefix: str, + overlap_annot: bool = False, + frqfile_chr: str | None = None, + not_m_5_50: bool = False, + print_coefficients: bool = False, + print_delete_vals: bool = False, + samp_prev: list[float] | None = None, + pop_prev: list[float] | None = None, + intercept_h2: list[float] | None = None, + intercept_gencov: list[float] | None = None, + no_intercept: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> str | None: + """ + Estimate genetic correlation using LD Score regression + """ + if runner is None: + runner = get_ldsc_runner() + + sumstats_str = ",".join(sumstats_files) + cmd = ( + f"{runner.ldsc_command} --rg {sumstats_str} --ref-ld-chr {ref_ld_chr} --w-ld-chr {w_ld_chr} --out {out_prefix}" + ) + + if overlap_annot: + cmd += " --overlap-annot" + if frqfile_chr is None: + logger.warning("--overlap-annot requires --frqfile-chr") + + if frqfile_chr is not None: + cmd += f" --frqfile-chr {frqfile_chr}" + + if not_m_5_50: + cmd += " --not-M-5-50" + + if print_coefficients: + cmd += " --print-coefficients" + + if print_delete_vals: + cmd += " --print-delete-vals" + + if samp_prev is not None: + samp_prev_str = ",".join([str(x) if x is not None else "nan" for x in samp_prev]) + cmd += f" --samp-prev {samp_prev_str}" + + if pop_prev is not None: + pop_prev_str = ",".join([str(x) if x is not None else "nan" for x in pop_prev]) + cmd += f" --pop-prev {pop_prev_str}" + + if intercept_h2 is not None: + intercept_h2_str = ",".join([str(x) for x in intercept_h2]) + cmd += f" --intercept-h2 {intercept_h2_str}" + + if intercept_gencov is not None: + intercept_gencov_str = ",".join([str(x).replace("-", "N") for x in intercept_gencov]) + cmd += f" --intercept-gencov {intercept_gencov_str}" + + if no_intercept: + cmd += " --no-intercept" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = sumstats_files + [ref_ld_chr, w_ld_chr] + if frqfile_chr: + file_paths.append(frqfile_chr) + + if run: + logger.info(f"Estimating genetic correlation: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + return f"{out_prefix}.log" + else: + return runner._build_container_command(cmd, file_paths) + + +def estimate_genetic_correlation( + sumstats_files: list[str], + ref_ld_chr: str, + w_ld_chr: str, + out_prefix: str, + overlap_annot: bool = False, + frqfile_chr: str | None = None, + not_m_5_50: bool = False, + print_coefficients: bool = False, + print_delete_vals: bool = False, + samp_prev: list[float] | None = None, + pop_prev: list[float] | None = None, + intercept_h2: list[float] | None = None, + intercept_gencov: list[float] | None = None, + no_intercept: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Estimate genetic correlation using LD Score regression + + Convenience wrapper around run_ldsc_genetic_correlation with validation + and structured output. + + Parameters + ---------- + sumstats_files : list[str] + List of paths to munged summary statistics files (.sumstats.gz) + ref_ld_chr : str + Prefix for reference LD scores (with @, e.g., "baseline.") + w_ld_chr : str + Prefix for regression weights (with @, e.g., "weights.") + out_prefix : str + Prefix for output files + overlap_annot : bool, default False + Use overlapping annotation model + frqfile_chr : str, optional + Prefix for allele frequency files (required with overlap_annot) + not_m_5_50 : bool, default False + Don't restrict to common SNPs + print_coefficients : bool, default False + Print coefficient estimates + print_delete_vals : bool, default False + Print delete values + samp_prev : list[float], optional + Sample prevalences for each trait (use None for quantitative traits) + pop_prev : list[float], optional + Population prevalences for each trait + intercept_h2 : list[float], optional + Constrain h2 intercepts for each trait + intercept_gencov : list[float], optional + Constrain genetic covariance intercepts + no_intercept : bool, default False + Force intercepts to 1 and 0 + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use + **kwargs + Additional arguments passed to ldsc.py + + Returns + ------- + dict + Results dictionary with: + - 'log_file': Path to log file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + >>> result = estimate_genetic_correlation( + ... sumstats_files=["height_munged.sumstats.gz", "bmi_munged.sumstats.gz"], + ... ref_ld_chr="baseline_v1.2/baseline.", + ... w_ld_chr="weights_hm3_no_hla/weights.", + ... out_prefix="height_bmi_rg", + ... ) + """ + if runner is None: + runner = get_ldsc_runner() + + if not sumstats_files or len(sumstats_files) < 2: + raise ValueError("sumstats_files must contain at least 2 files for genetic correlation") + if not ref_ld_chr: + raise ValueError("ref_ld_chr is required") + if not w_ld_chr: + raise ValueError("w_ld_chr is required") + + results = {} + + result_file = _run_ldsc_genetic_correlation( + sumstats_files=sumstats_files, + ref_ld_chr=ref_ld_chr, + w_ld_chr=w_ld_chr, + out_prefix=out_prefix, + overlap_annot=overlap_annot, + frqfile_chr=frqfile_chr, + not_m_5_50=not_m_5_50, + print_coefficients=print_coefficients, + print_delete_vals=print_delete_vals, + samp_prev=samp_prev, + pop_prev=pop_prev, + intercept_h2=intercept_h2, + intercept_gencov=intercept_gencov, + no_intercept=no_intercept, + run=run, + runner=runner, + **kwargs, + ) + + if run: + results["log_file"] = result_file + results["files_created"] = [f"{out_prefix}.log"] + else: + results["command"] = result_file + + return results + + +def _run_ldsc_make_annot( + bimfile: str, + annot_file: str, + gene_set_file: str | None = None, + gene_coord_file: str | None = None, + windowsize: int | None = None, + bed_file: str | None = None, + nomerge: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> str | None: + """ + Internal function to create annotation files using make_annot.py + + Either gene_set_file or bed_file must be provided. + Returns annot_file path if run=True, otherwise command string. + """ + if runner is None: + runner = get_ldsc_runner() + + if gene_set_file is None and bed_file is None: + raise ValueError("Either gene_set_file or bed_file must be provided") + + cmd = f"{runner.make_annot_command} --bimfile {bimfile} --annot-file {annot_file}" + + if gene_set_file is not None: + cmd += f" --gene-set-file {gene_set_file}" + + if gene_coord_file is not None: + cmd += f" --gene-coord-file {gene_coord_file}" + + if windowsize is not None: + cmd += f" --windowsize {windowsize}" + + if bed_file is not None: + cmd += f" --bed-file {bed_file}" + + if nomerge: + cmd += " --nomerge" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = [bimfile] + if gene_set_file: + file_paths.append(gene_set_file) + if gene_coord_file: + file_paths.append(gene_coord_file) + if bed_file: + file_paths.append(bed_file) + + if run: + logger.info(f"Creating annotation file: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + return annot_file + else: + return runner._build_container_command(cmd, file_paths) + + +def make_annot_from_bimfile( + bimfile: str, + annot_file: str, + gene_set_file: str | None = None, + gene_coord_file: str | None = None, + windowsize: int | None = None, + bed_file: str | None = None, + nomerge: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Create annotation file from a PLINK bimfile + + This function creates binary annotation files that indicate which SNPs belong to + specific genomic regions or gene sets. These annotations can be used with LDSC + to compute category-specific LD scores. Works with any PLINK bimfile, including + standard reference panels like 1000 Genomes. + + Parameters + ---------- + bimfile : str + Path to PLINK .bim file (e.g., from 1000 Genomes reference panel). + This defines the SNPs for which annotations will be created. + annot_file : str + The name of the annot file to output. Should typically end in .annot or .annot.gz + gene_set_file : str, optional + A file of gene names, one line per gene. Used for gene-set based annotations. + Either this or bed_file must be provided. + gene_coord_file : str, optional + A file with columns GENE, CHR, START, and END, where START and END are + base pair coordinates of TSS and TES. This file can contain more genes + than are in the gene set. Default ENSG_coord.txt is provided by LDSC. + Only used with gene_set_file. + windowsize : int, optional + How many base pairs to add around the transcribed region to make the annotation. + Only used with gene-set based annotations. Typical values: 0-500000 (0-500kb). + bed_file : str, optional + The UCSC bed file with the regions that make up your annotation. + Used for region-based annotations. Either this or gene_set_file must be provided. + nomerge : bool, default False + Don't merge the bed file; make an annot file with values proportional to + the number of intervals in the bedfile overlapping the SNP. Only used with bed_file. + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to make_annot.py + + Returns + ------- + dict + Results dictionary containing: + - 'annot_file': Path to the created annotation file + - 'files_created': List of files created (if run=True) + - 'command': Command string (if run=False) + + Raises + ------ + ValueError + If neither gene_set_file nor bed_file is provided + + Examples + -------- + Gene-set based annotation for chromosome 22: + >>> result = make_annot_from_bimfile( + ... bimfile="1000G_EUR_Phase3_plink/1000G.EUR.QC.22.bim", + ... annot_file="immune_genes.22.annot.gz", + ... gene_set_file="immune_genes.txt", + ... gene_coord_file="ENSG_coord.txt", + ... windowsize=100000, + ... ) + + BED-file based annotation for enhancer regions: + >>> result = make_annot_from_bimfile( + ... bimfile="1000G.EUR.QC.1.bim", annot_file="enhancers.1.annot.gz", bed_file="enhancers.bed" + ... ) + + Generate command without running: + >>> result = make_annot_from_bimfile( + ... bimfile="1000G.EUR.QC.22.bim", + ... annot_file="my_annot.22.annot.gz", + ... gene_set_file="my_genes.txt", + ... gene_coord_file="ENSG_coord.txt", + ... windowsize=50000, + ... run=False, + ... ) + >>> print(result["command"]) + + Notes + ----- + - Either gene_set_file or bed_file must be provided, but not both + - gene_coord_file and windowsize are only used with gene_set_file + - nomerge is only used with bed_file + - The output annotation file has one row per SNP in the bimfile, with 1 + indicating the SNP is in the annotation and 0 otherwise + - For whole-genome analyses, this should be run separately for each chromosome + - Typical workflow: Create annotations for chr 1-22, then compute LD scores + for each chromosome using these annotations + + See Also + -------- + make_annot_from_donor_data : Create annotations from DonorData object + estimate_ld_scores_from_bimfile : Compute LD scores using annotations + """ + if runner is None: + runner = get_ldsc_runner() + + results = {"annot_file": annot_file, "files_created": []} + + result_file = _run_ldsc_make_annot( + bimfile=bimfile, + annot_file=annot_file, + gene_set_file=gene_set_file, + gene_coord_file=gene_coord_file, + windowsize=windowsize, + bed_file=bed_file, + nomerge=nomerge, + run=run, + runner=runner, + **kwargs, + ) + + if run: + results["annot_file"] = result_file + results["files_created"].append(annot_file) + else: + results["command"] = result_file + + return results + + +def make_annot_from_donor_data( + dd: DonorData, + annot_file: str, + gene_set_file: str | None = None, + gene_coord_file: str | None = None, + windowsize: int | None = None, + bed_file: str | None = None, + nomerge: bool = False, + out_prefix: str = "ldsc_annot", + run: bool = True, + cleanup_files: bool = True, + plink_export_kwargs: dict | None = None, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Create annotation file from DonorData object + + This convenience function exports genotype data from a DonorData object to PLINK + format, then creates binary annotation files that indicate which SNPs belong to + specific genomic regions or gene sets. These annotations can be used with LDSC + to compute category-specific LD scores. + + Parameters + ---------- + dd : DonorData + DonorData object containing genotype information + annot_file : str + The name of the annot file to output. Should typically end in .annot or .annot.gz + gene_set_file : str, optional + A file of gene names, one line per gene. Used for gene-set based annotations. + Either this or bed_file must be provided. + gene_coord_file : str, optional + A file with columns GENE, CHR, START, and END, where START and END are + base pair coordinates of TSS and TES. This file can contain more genes + than are in the gene set. Default ENSG_coord.txt is provided by LDSC. + Only used with gene_set_file. + windowsize : int, optional + How many base pairs to add around the transcribed region to make the annotation. + Only used with gene-set based annotations. Typical values: 0-500000 (0-500kb). + Common choices: + - 0: Only SNPs within gene body + - 10000: ±10kb around gene + - 100000: ±100kb around gene (default in many studies) + bed_file : str, optional + The UCSC bed file with the regions that make up your annotation. + Used for region-based annotations. Either this or gene_set_file must be provided. + nomerge : bool, default False + Don't merge the bed file; make an annot file with values proportional to + the number of intervals in the bedfile overlapping the SNP. Only used with bed_file. + out_prefix : str, default "ldsc_annot" + Prefix for temporary PLINK files created during export + run : bool, default True + Whether to execute the command or just return it + cleanup_files : bool, default True + Whether to remove temporary PLINK files after creating annotations. + If True, removes {out_prefix}.bed, .bim, and .fam files. + plink_export_kwargs : dict, optional + Additional keyword arguments to pass to to_plink() + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to make_annot.py + + Returns + ------- + dict + Results dictionary containing: + - 'annot_file': Path to the created annotation file + - 'files_created': List of files created (if run=True) + - 'command': Command string (if run=False) + + Raises + ------ + ValueError + If neither gene_set_file nor bed_file is provided + + Examples + -------- + Create gene-set annotation from DonorData: + >>> result = make_annot_from_donor_data( + ... dd=my_donor_data, + ... annot_file="immune_genes.annot.gz", + ... gene_set_file="immune_genes.txt", + ... gene_coord_file="ENSG_coord.txt", + ... windowsize=100000, + ... ) + + Create BED-file annotation for regulatory regions: + >>> result = make_annot_from_donor_data(dd=my_donor_data, annot_file="enhancers.annot.gz", bed_file="enhancers.bed") + + Keep temporary PLINK files for inspection: + >>> result = make_annot_from_donor_data( + ... dd=my_donor_data, + ... annot_file="my_annot.annot.gz", + ... gene_set_file="my_genes.txt", + ... gene_coord_file="ENSG_coord.txt", + ... windowsize=50000, + ... cleanup_files=False, + ... ) + + Generate command without running: + >>> result = make_annot_from_donor_data( + ... dd=my_donor_data, + ... annot_file="my_annot.annot.gz", + ... gene_set_file="my_genes.txt", + ... gene_coord_file="ENSG_coord.txt", + ... windowsize=100000, + ... run=False, + ... ) + >>> print(result["command"]) + + Notes + ----- + - This function exports dd.G to PLINK format, creates the annotation, + then optionally cleans up the temporary PLINK files + - Either gene_set_file or bed_file must be provided, but not both + - The output annotation file has one row per SNP, with 1 indicating the SNP + is in the annotation and 0 otherwise + - gene_coord_file should contain coordinates for all genes you might annotate, + not just those in your specific gene set + - For gene-based annotations, the annotation includes SNPs within windowsize bp + of the transcribed region (TSS to TES) + - Temporary PLINK files are created in the current directory and cleaned up by + default, but you can set cleanup_files=False to keep them + + See Also + -------- + make_annot_from_bimfile : Create annotations from existing PLINK bimfile + estimate_ld_scores_from_donor_data : Compute LD scores from DonorData + """ + if plink_export_kwargs is None: + plink_export_kwargs = {} + + logger.info("Exporting genotype data to PLINK format for annotation creation") + to_plink(dd.G, out_prefix, **plink_export_kwargs) + bimfile = f"{out_prefix}.bim" + + results = _run_ldsc_make_annot( + bimfile=bimfile, + annot_file=annot_file, + gene_set_file=gene_set_file, + gene_coord_file=gene_coord_file, + windowsize=windowsize, + bed_file=bed_file, + nomerge=nomerge, + run=run, + runner=runner, + **kwargs, + ) + + if cleanup_files and run: + extensions = [".bim", ".fam", ".bed"] + for ext in extensions: + filename = out_prefix + ext + if os.path.isfile(filename): + os.remove(filename) + logger.info(f"Cleaned up file: {filename}") + + return results + + +def compute_ld_scores_with_annotations_from_bimfile( + bfile_prefix: str, + annot_file: str, + out_prefix: str, + ld_wind_cm: float = 1.0, + ld_wind_kb: int | None = None, + ld_wind_snp: int | None = None, + print_snps: str | None = None, + thin_annot: bool = True, + maf_min: float = 0.01, + yes_really: bool = True, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Compute LD scores with cell-type-specific annotations from PLINK bfile + + This is the first step in cell-type-specific LDSC analysis. It computes + LD scores for SNPs while incorporating cell-type-specific gene annotations. + This function should be run for each chromosome and each cell type. + + Works with any PLINK bfile, including standard reference panels like 1000 Genomes. + + Parameters + ---------- + bfile_prefix : str + Path to PLINK binary files (without .bed/.bim/.fam extension). + Typically from 1000 Genomes reference panel, e.g., + "1000G_EUR_Phase3_plink/1000G.EUR.QC.22" + annot_file : str + Path to the annotation file created by make_annot_from_donor_data() + or make_annot_from_bimfile(). Should end in .annot.gz + Example: "CD8_Naive.22.annot.gz" + out_prefix : str + Prefix for output files. Will create: + - {out_prefix}.l2.ldscore.gz (LD scores) + - {out_prefix}.l2.M (number of SNPs) + - {out_prefix}.l2.M_5_50 (number of common SNPs) + - {out_prefix}.log (log file) + ld_wind_cm : float, default 1.0 + LD window size in centiMorgans. Only one of ld_wind_cm, ld_wind_kb, + or ld_wind_snp can be specified. + ld_wind_kb : int, optional + LD window size in kilobases (alternative to ld_wind_cm) + ld_wind_snp : int, optional + LD window size in number of SNPs (alternative to ld_wind_cm) + print_snps : str, optional + Path to file with SNP IDs (one per row) to restrict LD score computation. + Commonly used with HapMap3 SNPs (e.g., "hapmap3_snps/hm.22.snp"). + The sum r^2 will still include all SNPs, but only listed SNPs will + have LD scores computed. + thin_annot : bool, default True + Assume annotation files only have annotations (no SNP, CM, CHR, BP columns). + Should typically be True for annotations created by make_annot functions. + maf_min : float, default 0.01 + Minimum minor allele frequency threshold + yes_really : bool, default True + Required flag for computing whole-chromosome LD scores + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to ldsc.py + + Returns + ------- + dict + Results dictionary containing: + - 'ld_scores_file': Path to LD scores file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + Basic usage for chromosome 22: + >>> result = compute_ld_scores_with_annotations_from_bimfile( + ... bfile_prefix="1000G_EUR_Phase3_plink/1000G.EUR.QC.22", + ... annot_file="CD8_Naive.22.annot.gz", + ... out_prefix="CD8_Naive.22", + ... print_snps="hapmap3_snps/hm.22.snp", + ... ) + + For all chromosomes (in a loop): + >>> for chrom in range(1, 23): + ... result = compute_ld_scores_with_annotations_from_bimfile( + ... bfile_prefix=f"1000G_EUR/1000G.EUR.QC.{chrom}", + ... annot_file=f"CD8_Naive.{chrom}.annot.gz", + ... out_prefix=f"CD8_Naive.{chrom}", + ... print_snps=f"hapmap3_snps/hm.{chrom}.snp", + ... ) + + Just generate command without running: + >>> result = compute_ld_scores_with_annotations_from_bimfile( + ... bfile_prefix="1000G.EUR.QC.22", annot_file="CD8_Naive.22.annot.gz", out_prefix="CD8_Naive.22", run=False + ... ) + >>> print(result["command"]) + + Notes + ----- + - This function is specifically for cell-type-specific analysis workflow + - Should be run separately for each chromosome (1-22) + - The annotation file should be created first using make_annot_from_donor_data() + or make_annot_from_bimfile() + - print_snps is typically used to restrict to HapMap3 SNPs for better + matching with standard reference LD scores + - After computing LD scores for all chromosomes, use + estimate_celltype_specific_heritability() for the actual analysis + + See Also + -------- + compute_ld_scores_with_annotations_from_donor_data : Compute from DonorData + make_annot_from_donor_data : Create annotations from DonorData + estimate_celltype_specific_heritability : Run cell-type-specific analysis + """ + if runner is None: + runner = get_ldsc_runner() + + cmd = f"{runner.ldsc_command} --l2 --bfile {bfile_prefix} --annot {annot_file} --out {out_prefix}" + + flags = [ld_wind_kb, ld_wind_snp, ld_wind_cm] + non_null_flags = sum(f is not None for f in flags) + + if non_null_flags > 1: + raise ValueError("Only one of ld_wind_kb, ld_wind_snp, or ld_wind_cm may be specified.") + + if ld_wind_kb is not None: + cmd += f" --ld-wind-kb {ld_wind_kb}" + elif ld_wind_snp is not None: + cmd += f" --ld-wind-snp {ld_wind_snp}" + else: + cmd += f" --ld-wind-cm {ld_wind_cm}" + + if thin_annot: + cmd += " --thin-annot" + + if print_snps is not None: + cmd += f" --print-snps {print_snps}" + + if maf_min != 0.01: + cmd += f" --maf {maf_min}" + + if yes_really: + cmd += " --yes-really" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = [f"{bfile_prefix}.bed", f"{bfile_prefix}.bim", f"{bfile_prefix}.fam", annot_file] + if print_snps: + file_paths.append(print_snps) + + if run: + logger.info(f"Computing LD scores with annotations: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + + return { + "ld_scores_file": f"{out_prefix}.l2.ldscore.gz", + "files_created": [ + f"{out_prefix}.l2.ldscore.gz", + f"{out_prefix}.l2.M", + f"{out_prefix}.l2.M_5_50", + f"{out_prefix}.log", + ], + } + else: + return {"command": runner._build_container_command(cmd, file_paths)} + + +def compute_ld_scores_with_annotations_from_donor_data( + dd: DonorData, + annot_file: str, + out_prefix: str = "ldscores_annot", + ld_wind_cm: float = 1.0, + ld_wind_kb: int | None = None, + ld_wind_snp: int | None = None, + print_snps: str | None = None, + thin_annot: bool = True, + maf_min: float = 0.01, + yes_really: bool = True, + cleanup_files: bool = True, + plink_export_kwargs: dict | None = None, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Compute LD scores with cell-type-specific annotations from DonorData object + + This convenience function exports genotype data from DonorData to PLINK format, + then computes LD scores with cell-type-specific annotations. This is useful when + you want to compute LD scores from your own genotype data rather than using a + reference panel like 1000 Genomes. + + Parameters + ---------- + dd : DonorData + DonorData object containing genotype information + annot_file : str + Path to the annotation file created by make_annot_from_donor_data() + or make_annot_from_bimfile(). Should end in .annot.gz + Example: "CD8_Naive.annot.gz" + out_prefix : str, default "ldscores_annot" + Prefix for output files (also used for temporary PLINK files). + Will create: + - {out_prefix}.l2.ldscore.gz (LD scores) + - {out_prefix}.l2.M (number of SNPs) + - {out_prefix}.l2.M_5_50 (number of common SNPs) + - {out_prefix}.log (log file) + ld_wind_cm : float, default 1.0 + LD window size in centiMorgans. Only one of ld_wind_cm, ld_wind_kb, + or ld_wind_snp can be specified. + ld_wind_kb : int, optional + LD window size in kilobases (alternative to ld_wind_cm) + ld_wind_snp : int, optional + LD window size in number of SNPs (alternative to ld_wind_cm) + print_snps : str, optional + Path to file with SNP IDs (one per row) to restrict LD score computation. + Commonly used with HapMap3 SNPs (e.g., "hapmap3_snps/hm.22.snp"). + thin_annot : bool, default True + Assume annotation files only have annotations (no SNP, CM, CHR, BP columns). + Should typically be True for annotations created by make_annot functions. + maf_min : float, default 0.01 + Minimum minor allele frequency threshold + yes_really : bool, default True + Required flag for computing whole-chromosome LD scores + cleanup_files : bool, default True + Whether to remove temporary PLINK files after computing LD scores. + If True, removes {out_prefix}.bed, .bim, and .fam files. + plink_export_kwargs : dict, optional + Additional keyword arguments to pass to to_plink() + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to ldsc.py + + Returns + ------- + dict + Results dictionary containing: + - 'ld_scores_file': Path to LD scores file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + Basic usage: + >>> result = compute_ld_scores_with_annotations_from_donor_data( + ... dd=my_donor_data, + ... annot_file="CD8_Naive.annot.gz", + ... out_prefix="CD8_Naive_ldscores", + ... print_snps="hapmap3_snps.txt", + ... ) + + Complete workflow for cell-type analysis: + >>> # 1. Create annotation from DonorData + >>> annot_result = make_annot_from_donor_data( + ... dd=my_donor_data, + ... annot_file="CD8_Naive.annot.gz", + ... gene_set_file="CD8_Naive.GeneSet", + ... gene_coord_file="gene_coords.txt", + ... windowsize=100000, + ... ) + + >>> # 2. Compute LD scores with annotations + >>> ldsc_result = compute_ld_scores_with_annotations_from_donor_data( + ... dd=my_donor_data, annot_file="CD8_Naive.annot.gz", out_prefix="CD8_Naive_ldscores" + ... ) + + Keep temporary PLINK files: + >>> result = compute_ld_scores_with_annotations_from_donor_data( + ... dd=my_donor_data, annot_file="immune_genes.annot.gz", out_prefix="my_ldscores", cleanup_files=False + ... ) + + Just generate command: + >>> result = compute_ld_scores_with_annotations_from_donor_data( + ... dd=my_donor_data, annot_file="CD8_Naive.annot.gz", out_prefix="CD8_Naive_ldscores", run=False + ... ) + >>> print(result["command"]) + + Notes + ----- + - This function exports dd.G to PLINK format, computes LD scores with annotations, + then optionally cleans up the temporary PLINK files + - The annotation file must match the SNPs in the DonorData object + - Typically used when you have your own genotype data and want to compute + custom LD scores rather than using pre-computed reference LD scores + - For standard cell-type-specific heritability analysis, it's more common to use + compute_ld_scores_with_annotations_from_bimfile() with 1000 Genomes data + - Temporary PLINK files are created in the current directory and cleaned up by + default, but you can set cleanup_files=False to keep them + + See Also + -------- + compute_ld_scores_with_annotations_from_bimfile : Compute from existing PLINK files + make_annot_from_donor_data : Create annotations from DonorData + estimate_celltype_specific_heritability : Run cell-type-specific analysis + """ + if runner is None: + runner = get_ldsc_runner() + + if plink_export_kwargs is None: + plink_export_kwargs = {} + + logger.info("Exporting genotype data to PLINK format for LD score computation") + to_plink(dd.G, out_prefix, **plink_export_kwargs) + + results = compute_ld_scores_with_annotations_from_bimfile( + bfile_prefix=out_prefix, + annot_file=annot_file, + out_prefix=out_prefix, + ld_wind_cm=ld_wind_cm, + ld_wind_kb=ld_wind_kb, + ld_wind_snp=ld_wind_snp, + print_snps=print_snps, + thin_annot=thin_annot, + maf_min=maf_min, + yes_really=yes_really, + run=run, + runner=runner, + **kwargs, + ) + + if cleanup_files and run: + extensions = [".bim", ".fam", ".bed"] + for ext in extensions: + filename = out_prefix + ext + if os.path.isfile(filename): + os.remove(filename) + logger.info(f"Cleaned up file: {filename}") + + return results + + +def estimate_celltype_specific_heritability( + sumstats_file: str, + ref_ld_chr: str, + w_ld_chr: str, + ref_ld_chr_cts: str, + out_prefix: str, + print_all_cts: bool = False, + run: bool = True, + runner: LDSCRunner | None = None, + **kwargs, +) -> dict[str, Any]: + """ + Estimate cell-type-specific heritability using LD Score regression + + This is the second step in cell-type-specific LDSC analysis. It tests whether + SNP heritability is enriched in specific cell types by regressing GWAS summary + statistics against cell-type-specific LD scores. + + This function requires that LD scores with cell-type annotations have already + been computed using compute_ld_scores_with_annotations() for all chromosomes. + + Parameters + ---------- + sumstats_file : str + Path to munged summary statistics file (.sumstats.gz) from munge_sumstats() + ref_ld_chr : str + Prefix for baseline reference LD scores (with @, e.g., "baseline_v1.2/baseline."). + These are the standard LD scores used for controlling confounders. + w_ld_chr : str + Prefix for regression weights (with @, e.g., "weights_hm3_no_hla/weights."). + These are standard weights files from the LDSC resources. + ref_ld_chr_cts : str + Path to control file listing cell-type-specific LD score prefixes. + This file should have two tab-separated columns per line: + - Cell type name + - Prefix for that cell type's LD scores (with @ for chromosome) + + Example file content: + ``` + CD8_Naive cts_ldscores/CD8_Naive. + CD4_Memory cts_ldscores/CD4_Memory. + B_cells cts_ldscores/B_cells. + ``` + + LDSC will look for files like: + cts_ldscores/CD8_Naive.1.l2.ldscore.gz through + cts_ldscores/CD8_Naive.22.l2.ldscore.gz + out_prefix : str + Prefix for output files. Will create: + - {out_prefix}.cell_type_results.txt (main results) + - {out_prefix}.log (log file) + print_all_cts : bool, default False + Print results for all cell types (not just significant ones) + run : bool, default True + Whether to execute the command or just return it + runner : LDSCRunner, optional + Runner instance to use. If None, uses the global runner. + **kwargs + Additional command line arguments to pass to ldsc.py + + Returns + ------- + dict + Results dictionary containing: + - 'results_file': Path to cell type results file (if run=True) + - 'log_file': Path to log file (if run=True) + - 'files_created': List of created files (if run=True) + - 'command': Command string (if run=False) + + Examples + -------- + Basic usage after computing LD scores: + >>> # First create control file + >>> with open("celltype_ldscores.txt", "w") as f: + ... f.write("CD8_Naive\\tcts_ldscores/CD8_Naive.\\n") + ... f.write("CD4_Memory\\tcts_ldscores/CD4_Memory.\\n") + ... f.write("B_cells\\tcts_ldscores/B_cells.\\n") + + >>> # Run cell-type-specific analysis + >>> result = estimate_celltype_specific_heritability( + ... sumstats_file="height_munged.sumstats.gz", + ... ref_ld_chr="baseline_v1.2/baseline.", + ... w_ld_chr="weights_hm3_no_hla/weights.", + ... ref_ld_chr_cts="celltype_ldscores.txt", + ... out_prefix="height_celltype_results", + ... ) + + Complete workflow example: + >>> # 1. Prepare annotations for each cell type and chromosome + >>> for cell_type in ["CD8_Naive", "CD4_Memory"]: + ... for chrom in range(1, 23): + ... make_annot_from_donor_data( + ... dd=dd_chr, + ... annot_file=f"annots/{cell_type}.{chrom}.annot.gz", + ... gene_set_file=f"genesets/{cell_type}.GeneSet", + ... gene_coord_file="gene_coords.txt", + ... ) + + >>> # 2. Compute LD scores for each cell type and chromosome + >>> for cell_type in ["CD8_Naive", "CD4_Memory"]: + ... for chrom in range(1, 23): + ... compute_ld_scores_with_annotations( + ... bfile_prefix=f"1000G/1000G.EUR.QC.{chrom}", + ... annot_file=f"annots/{cell_type}.{chrom}.annot.gz", + ... out_prefix=f"cts_ldscores/{cell_type}.{chrom}", + ... print_snps=f"hapmap3/hm.{chrom}.snp", + ... ) + + >>> # 3. Create control file + >>> with open("celltype_ldscores.txt", "w") as f: + ... f.write("CD8_Naive\\tcts_ldscores/CD8_Naive.\\n") + ... f.write("CD4_Memory\\tcts_ldscores/CD4_Memory.\\n") + + >>> # 4. Run cell-type-specific analysis + >>> result = estimate_celltype_specific_heritability( + ... sumstats_file="disease_munged.sumstats.gz", + ... ref_ld_chr="baseline_v1.2/baseline.", + ... w_ld_chr="weights_hm3_no_hla/weights.", + ... ref_ld_chr_cts="celltype_ldscores.txt", + ... out_prefix="disease_celltype", + ... ) + + Notes + ----- + - This function performs the final cell-type-specific heritability analysis + - Requires baseline LD scores and weights (can be downloaded from LDSC resources) + - The ref_ld_chr_cts file format is critical: tab-separated, cell type name + then prefix with @ or chromosome numbers appended + - Tests whether heritability is enriched in genes specific to each cell type + - Results show coefficient estimates and p-values for each cell type + - Significant positive coefficients indicate heritability enrichment in that cell type + + See Also + -------- + compute_ld_scores_with_annotations : Compute LD scores with annotations + make_annot_from_donor_data : Create cell-type-specific annotations + munge_sumstats : Prepare GWAS summary statistics + """ + if runner is None: + runner = get_ldsc_runner() + + if not sumstats_file: + raise ValueError("sumstats_file is required") + if not ref_ld_chr: + raise ValueError("ref_ld_chr is required") + if not w_ld_chr: + raise ValueError("w_ld_chr is required") + if not ref_ld_chr_cts: + raise ValueError("ref_ld_chr_cts is required") + + cmd = ( + f"{runner.ldsc_command} --h2-cts {sumstats_file} " + f"--ref-ld-chr {ref_ld_chr} " + f"--w-ld-chr {w_ld_chr} " + f"--ref-ld-chr-cts {ref_ld_chr_cts} " + f"--out {out_prefix}" + ) + + if print_all_cts: + cmd += " --print-all-cts" + + for flag, value in kwargs.items(): + if isinstance(value, bool): + if value: + cmd += f" --{flag}" + elif value is not None: + cmd += f" --{flag} {value}" + + file_paths = [sumstats_file, ref_ld_chr, w_ld_chr, ref_ld_chr_cts] + + if run: + logger.info(f"Running cell-type-specific heritability analysis: {cmd}") + runner.run_command(cmd, file_paths=file_paths, check=True) + + return { + "results_file": f"{out_prefix}.cell_type_results.txt", + "log_file": f"{out_prefix}.log", + "files_created": [f"{out_prefix}.cell_type_results.txt", f"{out_prefix}.log"], + } + else: + return {"command": runner._build_container_command(cmd, file_paths)} \ No newline at end of file diff --git a/src/cellink/tl/external/_ldsc2magma.py b/src/cellink/tl/external/_ldsc2magma.py new file mode 100644 index 0000000..61c9faf --- /dev/null +++ b/src/cellink/tl/external/_ldsc2magma.py @@ -0,0 +1,274 @@ +import logging +from pathlib import Path +import pandas as pd + +logger = logging.getLogger(__name__) + +#TODO: HOW is MAGMA CALLED THEN? +def load_ensembl_to_entrez_map(map_tsv: str | Path) -> pd.Series: #TODO: Is this a private function or not. Where is this used? Can't find any anywhere this is called? + """ + Load a mapping TSV with columns: + ensembl_gene_id entrez_id + Returns a Series indexed by ENSG (upper, no version) with values as string Entrez IDs. + """ + #TODO EXPAND ON DOCUMENTATION IF PUBLIC FUNCTION + map_tsv = Path(map_tsv) + df = pd.read_csv(map_tsv, sep="\t", dtype=str) + + # Accept a few common header variants + col_ens = None + for c in ["ensembl_gene_id", "ENSG", "ensembl", "gene_id"]: + if c in df.columns: + col_ens = c + break + if col_ens is None: + raise ValueError(f"Mapping file missing Ensembl column. Found: {list(df.columns)}") + + col_ent = None + for c in ["entrez_id", "entrezgene", "entrez", "ENTREZID", "ncbi_gene_id"]: + if c in df.columns: + col_ent = c + break + if col_ent is None: + raise ValueError(f"Mapping file missing Entrez column. Found: {list(df.columns)}") + + ens = df[col_ens].astype(str).str.strip().str.upper().str.replace(r"\..*$", "", regex=True) + ent = df[col_ent].astype(str).str.strip() + + m = pd.Series(ent.values, index=ens.values) + m = m[~m.index.duplicated(keep="first")] + return m + + +def genesets_dir_to_entrez_gmt( + *, + geneset_dir: str | Path = "ldsc_genesets", + out_gmt: str | Path | None = None, + ensembl_to_entrez_tsv: str | Path | None = None, + pattern: str = "*.GeneSet", + description: str = "S-LDSC derived gene set (Entrez)", + include_control: bool = False, + control_name: str = "Control", + remove_version_suffix: bool = True, + uppercase: bool = True, + min_genes: int = 1, + sort_genes: bool = False, + dedup_genes: bool = True, + drop_unmapped: bool = True, + allow_mygene_fallback: bool = False, + species: str = "human", + output_basename: str = "genesets.gmt", +) -> Path: + """ + Convert *.GeneSet (Ensembl IDs) -> MAGMA .gmt with Entrez IDs. + + Defaults: + - reads from ./ldsc_genesets + - writes to a sibling directory ./magma_genesets/genesets.gmt + (magma_genesets is created if needed) + + Preferred: provide ensembl_to_entrez_tsv for offline mapping. + Optional: allow_mygene_fallback=True to query mygene.info (needs internet). + """ + #TODO EXPAND ON DOCUMENTATION SINCE PUBLIC FUNCTION + geneset_dir = Path(geneset_dir).resolve() + + # ---- Default output location: sibling magma_genesets next to ldsc_genesets ---- + if out_gmt is None: + magma_dir = geneset_dir.parent / "magma_genesets" + magma_dir.mkdir(parents=True, exist_ok=True) + out_gmt = magma_dir / output_basename + else: + out_gmt = Path(out_gmt) + out_gmt.parent.mkdir(parents=True, exist_ok=True) + + # ---- Find input GeneSet files ---- + files = sorted(geneset_dir.glob(pattern)) + if not include_control: + files = [p for p in files if p.stem != control_name] + if not files: + raise FileNotFoundError(f"No files matched {pattern} in {geneset_dir}") + + # ---- Load offline map if provided ---- + ens2ent = None + if ensembl_to_entrez_tsv is not None: + ens2ent = load_ensembl_to_entrez_map(ensembl_to_entrez_tsv) + logger.info(f"Loaded Ensembl→Entrez map with {ens2ent.shape[0]} entries") + + # ---- Optional online fallback ---- + mg = None + if allow_mygene_fallback: + try: + import mygene # type: ignore + mg = mygene.MyGeneInfo() + except Exception as e: + raise ImportError("allow_mygene_fallback=True requires 'mygene' (pip install mygene).") from e + + def norm_ens(g: str) -> str: + g = str(g).strip() + if remove_version_suffix: + g = g.split(".", 1)[0] + if uppercase: + g = g.upper() + return g + + def map_to_entrez(ens_genes: list[str]) -> list[str]: + """Return list of Entrez IDs (strings), filtered if drop_unmapped.""" + ens_genes = [norm_ens(g) for g in ens_genes if str(g).strip()] + if dedup_genes: + seen = set() + ens_genes = [g for g in ens_genes if not (g in seen or seen.add(g))] + + entrez: list[str] = [] + missing: list[str] = [] + + if ens2ent is not None: + mapped = ens2ent.reindex(ens_genes) + for g, e in zip(ens_genes, mapped.values): + if pd.isna(e) or str(e) in ["", "nan", "None"]: + missing.append(g) + else: + entrez.append(str(e)) + else: + missing = ens_genes[:] # everything missing if no offline map + + + # mygene fallback for missing + if mg is not None and missing: #TODO: Let's instead of mygene rely on biomart mappings used throughout the package, e.g. in _sldsc_utils.py + res = mg.querymany( + missing, + scopes="ensembl.gene", + fields="entrezgene", + species=species, + as_dataframe=True, + returnall=False, + ) + if not isinstance(res, pd.DataFrame): + res = pd.DataFrame(res) + + # Ensure a 'query' column exists + if "query" not in res.columns: + res = res.reset_index() + if "query" not in res.columns: + res = res.rename(columns={res.columns[0]: "query"}) + + if "entrezgene" in res.columns: + # Normalize query ids + res["query_norm"] = ( + res["query"] + .astype(str) + .str.strip() + .str.upper() + .str.replace(r"\..*$", "", regex=True) + ) + + # Keep only rows with an actual entrezgene value + res = res.dropna(subset=["entrezgene"]) + + # If multiple hits per query, keep the first (or you can choose another policy) + res = res.drop_duplicates(subset=["query_norm"], keep="first") + + # Build mapping Series with unique index + map2 = pd.Series( + res["entrezgene"].astype("Int64").astype(str).values, + index=res["query_norm"].values, + ) + + for g in missing: + key = norm_ens(g) + e = map2.get(key, None) + + # e should now be scalar; still guard against weird cases + if isinstance(e, pd.Series): + # pick first non-null if somehow still a Series + e = e.dropna().iloc[0] if not e.dropna().empty else None + + if e is None or str(e) in ["", "nan", "None", ""]: + if not drop_unmapped: + continue + else: + entrez.append(str(e)) + + + # De-dup / sort output + if dedup_genes: + seen = set() + entrez = [g for g in entrez if not (g in seen or seen.add(g))] + if sort_genes: + entrez = sorted(entrez) + + return entrez + + n_written = 0 + n_skipped = 0 + + with Path(out_gmt).open("w", encoding="utf-8") as out: + for fp in files: + set_name = fp.stem + + with fp.open("r", encoding="utf-8") as f: + ens_genes = [line.strip() for line in f if line.strip()] + + entrez_genes = map_to_entrez(ens_genes) + + if len(entrez_genes) < min_genes: + logger.warning(f"Skipping {fp.name}: only {len(entrez_genes)} mapped Entrez genes") + n_skipped += 1 + continue + + row = [set_name, description] + entrez_genes + out.write("\t".join(row) + "\n") + n_written += 1 + + logger.info(f"Wrote {n_written} gene sets to {out_gmt} (skipped {n_skipped})") + return Path(out_gmt) + +#TODO: REMOVE THIS MAIN BLOCK IF THIS IS A PRIVATE MODULE +if __name__ == "__main__": + import argparse + + logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s") + + p = argparse.ArgumentParser(description="Convert Ensembl .GeneSet files to MAGMA .gmt with Entrez IDs") + p.add_argument( + "--geneset_dir", + default="ldsc_genesets", + help="Directory containing *.GeneSet (default: ldsc_genesets)", + ) + p.add_argument( + "--out_gmt", + default=None, + help="Optional output .gmt path. If omitted, writes to sibling magma_genesets/genesets.gmt", + ) + p.add_argument( + "--map_tsv", + default=None, + help="TSV with columns ensembl_gene_id and entrez_id (offline mapping)", + ) + p.add_argument("--include_control", action="store_true") + p.add_argument( + "--allow_mygene_fallback", + action="store_true", + help="Use mygene.info for unmapped genes (needs internet)", + ) + p.add_argument( + "--pattern", + default="*.GeneSet", + help="Glob pattern for gene set files (default: *.GeneSet)", + ) + p.add_argument( + "--output_basename", + default="genesets.gmt", + help="Output filename when using default magma_genesets directory (default: genesets.gmt)", + ) + args = p.parse_args() + + genesets_dir_to_entrez_gmt( + geneset_dir=args.geneset_dir, + out_gmt=args.out_gmt, + ensembl_to_entrez_tsv=args.map_tsv, + include_control=args.include_control, + allow_mygene_fallback=args.allow_mygene_fallback, + pattern=args.pattern, + output_basename=args.output_basename, + ) \ No newline at end of file diff --git a/src/cellink/tl/external/_sldsc_utils.py b/src/cellink/tl/external/_sldsc_utils.py new file mode 100644 index 0000000..9c43941 --- /dev/null +++ b/src/cellink/tl/external/_sldsc_utils.py @@ -0,0 +1,705 @@ +import logging +import re +from pathlib import Path +from typing import Literal + +import numpy as np +import pandas as pd +import scanpy as sc +from anndata import AnnData +from scipy import sparse +import os +import h5py +import numexpr as ne +from tqdm import tqdm + + +logger = logging.getLogger(__name__) + + +def preprocess_for_sldsc( + adata: AnnData, + *, + celltype_col: str, + log_transform: bool = True, + filter_protein_coding: bool = True, + filter_expressed: bool = True, + filter_mhc: bool = True, + mhc_chr: str = None, + mhc_start: int = None, + mhc_end: int = None, + fetch_annotation: bool = True, + genome_build: Literal["GRCh37", "GRCh38"] = "GRCh37", + gene_identifier_mode: str = "name", + remove_version_suffix: bool = True, + gene_col: str | None = "gene", + biotype_col: str | None = None, + chr_col: str | None = None, + start_col: str | None = None, + end_col: str | None = None, + inplace: bool = True, +) -> tuple[AnnData, pd.DataFrame, pd.DataFrame] | None: + """ + Preprocess single-cell data for S-LDSC cell-type-specific analysis. + + This function performs comprehensive preprocessing including: + - Optional log1p transformation + - Gene annotation fetching from Ensembl BioMart (GRCh37 or GRCh38) + - Gene filtering (protein-coding, expressed, unique names, MHC exclusion) + - Computation of mean expression per cell type + - Computation of specificity scores (Duncan et al. 2025; doi:10.1038/s41593-024-01834-w) + + Parameters + ---------- + adata + Annotated data matrix of shape `n_obs` x `n_vars` (cells x genes). + celltype_col + Column name in `adata.obs` containing cell type labels. + log_transform + Whether to apply log1p transformation. Set to False if already log-transformed. + filter_protein_coding + Whether to filter for protein-coding genes only. + filter_expressed + Whether to filter out genes with zero expression across all cells. + filter_mhc + Whether to exclude genes in the MHC region (chr6:25-34Mb by default). + mhc_chr + Chromosome containing MHC region (default: "6"). + mhc_start + Start position of MHC region in base pairs. + mhc_end + End position of MHC region in base pairs. + fetch_annotation + Whether to fetch gene annotations from Ensembl BioMart. + If False, expects existing annotation columns in adata.var. + genome_build + Genome build version: "GRCh37" or "GRCh38". Only used if fetch_annotation=True. + gene_identifier_mode + Gene identifier: "name" or "ensembl". Only used if fetch_annotation=True. + remove_version_suffix + Whether to remove version suffixes from gene names or gene IDs (e.g., ENSG00000123456.7 → ENSG00000123456). + gene_col + Column name for gene symbols or IDs. If None, uses var_names if fetch_annotation is True is auto-detected. + biotype_col + Column name for gene biotype. Auto-detected if None. + chr_col + Column name for chromosome. Auto-detected if None. + start_col + Column name for gene start position. Auto-detected if None. + end_col + Column name for gene end position. Auto-detected if None. + inplace + Whether to update `adata` in place or return a copy. + + Returns + ------- + AnnData, pd.DataFrame, pd.DataFrame + - Filtered AnnData object + - Cluster-normalized-to-1000 matrix and specificity derived from that (genes x cell types) + - Specificity scores per cell type (genes x cell types) + Returns None if inplace=True. + + Raises + ------ + AssertionError + If celltype_col is not present in adata.obs. + ValueError + If required annotation columns are missing and fetch_annotation=False. + ImportError + If pybiomart is not installed and fetch_annotation=True. + + Examples + -------- + >>> # Using GRCh37 (default) + >>> adata_filtered, mean_expr, specificity = preprocess_for_sldsc(adata, celltype_col="cell_type", inplace=False) + >>> # Using GRCh38 + >>> adata_filtered, mean_expr, specificity = preprocess_for_sldsc( + ... adata, celltype_col="cell_type", genome_build="GRCh38", inplace=False + ... ) + """ + if celltype_col not in adata.obs.columns: + raise ValueError(f"Column '{celltype_col}' not found in adata.obs") + + + + if fetch_annotation: + anno_df = _fetch_ensembl_annotation(genome_build=genome_build, gene_identifier_mode=gene_identifier_mode) + if gene_col is None: + adata.var["gene"] = adata.var_names + adata.var["gene_upper"] = adata.var[gene_col].str.upper() + if remove_version_suffix: + logger.info("Removing version suffixes from Gene IDs") + adata.var["gene_upper"] = adata.var["gene_upper"].str.replace(r"\..*$", "", regex=True) + + adata = _map_gene_annotation(adata, anno_df, gene_col) + + biotype_col = "gene_biotype" + chr_col = "chrom" + start_col = "start" + end_col = "end" + else: + gene_col = _pick_var_col(adata, ["gene_symbol", "gene_name", "symbol", "hgnc_symbol", "gene"], gene_col) + biotype_col = _pick_var_col(adata, ["gene_biotype", "biotype", "feature_biotype", "gene_type"], biotype_col) + chr_col = _pick_var_col(adata, ["chrom", "chr", "chromosome", "seqname"], chr_col) + start_col = _pick_var_col(adata, ["start", "start_position", "gene_start"], start_col) + end_col = _pick_var_col(adata, ["end", "end_position", "gene_end"], end_col) + adata.var_names = adata.var[gene_col].astype(str) + adata.var_names_make_unique() + + logger.info( + f"Using annotation columns: gene={gene_col}, biotype={biotype_col}, chr={chr_col}, start={start_col}, end={end_col}" + ) + + logger.info("Applying gene filters") + masks = {} + + if filter_protein_coding and biotype_col: + biotype = adata.var[biotype_col].astype(str).str.lower() + masks["protein_coding"] = biotype.isin(["protein_coding", "protein-coding", "protein coding"]) + logger.info(f"Protein-coding genes: {masks['protein_coding'].sum()}") + else: + masks["protein_coding"] = pd.Series(True, index=adata.var_names) + if filter_protein_coding: + logger.warning("No biotype column found; skipping protein-coding filter") + + if filter_expressed: + X = adata.X + if sparse.issparse(X): + gene_sum = np.asarray(X.sum(axis=0)).ravel() + else: + gene_sum = X.sum(axis=0).ravel() + masks["expressed"] = pd.Series(gene_sum > 0, index=adata.var_names) + logger.info(f"Expressed genes: {masks['expressed'].sum()}") + else: + masks["expressed"] = pd.Series(True, index=adata.var_names) + + masks["unique"] = pd.Series(True, index=adata.var_names) + + if filter_mhc and all(c for c in [chr_col, start_col, end_col]): + in_mhc_chr = adata.var[chr_col] == str(mhc_chr) + overlaps_mhc = in_mhc_chr & (adata.var[end_col] >= mhc_start) & (adata.var[start_col] <= mhc_end) + masks["not_mhc"] = ~overlaps_mhc.fillna(False) + logger.info(f"Non-MHC genes: {masks['not_mhc'].sum()}") + else: + masks["not_mhc"] = pd.Series(True, index=adata.var_names) + if filter_mhc: + logger.warning("Missing chr/start/end columns; skipping MHC filter") + + mask_keep = pd.Series(True, index=adata.var_names) + for mask_name, mask in masks.items(): + mask_keep &= mask + + n_before = adata.n_vars + n_after = mask_keep.sum() + logger.info(f"Keeping {n_after} / {n_before} genes after filtering") + + adata = adata[:, mask_keep.values].copy() + + + if log_transform: + # Work with categorical clusters + clusters_cat = adata.obs[celltype_col].astype("category") + cluster_names = clusters_cat.cat.categories.to_list() + n_clusters = len(cluster_names) + n_cells, n_genes = adata.shape + logger.info(f"n_cells = {n_cells}, n_genes = {n_genes}, n_clusters = {n_clusters}") + + # matrix: genes × clusters + avg_matrix = np.zeros((n_genes, n_clusters), dtype=np.float64) + X = adata.X # could be csr_matrix or dense + + + # Compute per-cluster log1p mean + logger.info("Applying log1p transformation") + for j, cl in enumerate(tqdm(cluster_names, desc="Aggregating clusters")): + # indices of cells in this cluster + idx = np.where(clusters_cat.values == cl)[0] + if idx.size == 0: + # no cells in this cluster (shouldn't usually happen, but just in case) + avg_matrix[:, j] = 0.0 + continue + + # subset expression for these cells: shape (n_cells_in_cluster, n_genes) + X_sub = X[idx, :] + + # Convert to dense if sparse + if hasattr(X_sub, "toarray"): + X_sub = X_sub.toarray() + + # log1p transform and average over cells (axis 0, since rows=cells, cols=genes) + # Using numexpr to speed up log1p and sum + # log1p(X_sub) is applied per element; sum over cells => axis=0 => length n_genes + # careful: numexpr works on 1D or 2D arrays; we keep it 2D here + log1p_X_sub = ne.evaluate("log1p(X_sub)") + avg_expr = log1p_X_sub.mean(axis=0) # 1D, length n_genes + + # Store as genes × clusters → [gene, cluster_index] + avg_matrix[:, j] = avg_expr + + df = pd.DataFrame( + avg_matrix, + index=adata.var_names, # genes as rows + columns=cluster_names # clusters as columns + ) + + logger.info("Log1p applied.") + + + if not log_transform: + raise ValueError("This preprocessing path expects log_transform=True (needs cluster-level log1p matrix).") + + # Wide table from the matrix you computed + exp_wide = df.copy().reset_index() + exp_wide = exp_wide.rename(columns={"index": "gene"}) + + # If reset_index() produced column named "index" instead: + if "gene" not in exp_wide.columns and "index" in exp_wide.columns: + exp_wide = exp_wide.rename(columns={"index": "gene"}) + + clusters = [c for c in exp_wide.columns if c != "gene"] + + # copy wide table + exp = exp_wide.copy() + + # add_count(gene) + exp["n"] = exp.groupby("gene")["gene"].transform("count") + + # keep only genes with n == 1 (THIS is stricter than your old "unique" mask) + exp = exp.loc[exp["n"] == 1].drop(columns=["n"]) + + # gather/melt to long + exp = exp.melt( + id_vars="gene", + var_name="ClusterID", # cluster name + value_name="Expr_sum_mean" # your log1p mean expression + ) + + logger.info(f"Computing mean expression for {celltype_col}") + # normalize within each cluster to sum to 1000 + exp["Expr_sum_mean"] = ( + exp["Expr_sum_mean"] * 1000.0 / + exp.groupby("ClusterID")["Expr_sum_mean"].transform("sum") + ) + mean_expr_df = exp.pivot(index="gene", columns="ClusterID", values="Expr_sum_mean") + + + logger.info("Computing specificity scores") + + # specificity: fraction of gene's total that comes from this cluster + exp["specificity"] = ( + exp["Expr_sum_mean"] / + exp.groupby("gene")["Expr_sum_mean"].transform("sum") + ) + specificity_df = exp.pivot(index="gene", columns="ClusterID", values="specificity") + + + if not ((specificity_df.values >= 0) & (specificity_df.values <= 1)).all(): + logger.warning("Some specificity values outside [0, 1] range") + + logger.info(f"Final data shape: {adata.shape}") + logger.info(f"Mean expression shape: {mean_expr_df.shape}") + logger.info(f"Specificity shape: {specificity_df.shape}") + + if inplace: + return None + return adata, mean_expr_df, specificity_df + + +def generate_sldsc_genesets( + specificity_df: pd.DataFrame, + adata: AnnData, + *, + out_dir: str | Path, + top_frac: float = 0.10, + gene_col: str | None = "gene", # e.g. "gene" (symbols) OR "ensembl_gene_id" + accession_col: str | None = None, # if you have an explicit Ensembl ID column, pass it (recommended) + remove_version_suffix: bool = True, + include_control: bool = True, + overwrite: bool = False, +) -> pd.DataFrame: + """ + Generate cell-type-specific gene sets for S-LDSC analysis. + + Expects specificity_df to be genes × cell types, indexed by gene identifiers + (symbols or Ensembl IDs). Writes one .GeneSet per cell type containing top N% + genes by specificity, using accession (typically Ensembl gene IDs). + """ + out_dir = Path(out_dir) + + # ---- Safety checks ---- + if specificity_df.index.name != "gene": + # not required, but helps debugging + logger.info(f"specificity_df index name is '{specificity_df.index.name}', expected 'gene' (ok).") + + if out_dir.exists() and not overwrite: + raise FileExistsError(f"Output directory {out_dir} already exists. Set overwrite=True to proceed.") + out_dir.mkdir(parents=True, exist_ok=True) + logger.info(f"Writing gene sets to {out_dir}") + + # ---- Build mapping from specificity_df genes -> accessions to write ---- + spec_genes = pd.Index(specificity_df.index.astype(str)) + + # Normalize the specificity_df index + spec_upper = spec_genes.str.upper() + if remove_version_suffix: + spec_upper = spec_upper.str.replace(r"\..*$", "", regex=True) + + # Case 1: specificity_df already contains Ensembl IDs and you want to write them as-is. + # We detect this if most genes look like ENSG... + ensembl_like = spec_upper.str.match(r"^ENSG\d+$", na=False).mean() > 0.5 + + # If user provided accession_col, use it as the authoritative output IDs + if accession_col is not None: + if accession_col not in adata.var.columns: + raise ValueError(f"Column '{accession_col}' not found in adata.var") + + acc = adata.var[accession_col].astype(str).str.upper() + if remove_version_suffix: + acc = acc.str.replace(r"\..*$", "", regex=True) + + # Decide what to match on (gene_col or var_names) + if gene_col is not None and gene_col in adata.var.columns: + key = adata.var[gene_col].astype(str).str.upper() + if remove_version_suffix: + key = key.str.replace(r"\..*$", "", regex=True) + else: + key = pd.Index(adata.var_names.astype(str)).str.upper() + if remove_version_suffix: + key = key.str.replace(r"\..*$", "", regex=True) + + map_df = pd.DataFrame({"key": key.values, "accession": acc.values}).dropna() + map_df = map_df.drop_duplicates(subset=["key"], keep="first") + + # Map specificity genes -> accession + gene_to_acc = pd.Series(map_df["accession"].values, index=map_df["key"].values) + accessions = gene_to_acc.reindex(spec_upper) + + overlap_mask = accessions.notna() + if overlap_mask.sum() == 0: + raise ValueError("No overlapping genes between specificity_df and adata.var mapping (accession_col).") + + logger.info(f"Overlapping genes after mapping: {overlap_mask.sum()}/{len(specificity_df)}") + specificity_df = specificity_df.loc[overlap_mask.values] + accessions = accessions.loc[overlap_mask.values] + + # Replace index with accessions (what LDSC wants) + specificity_df = specificity_df.copy() + specificity_df.index = accessions.values + + else: + # No accession_col supplied. + # If specificity_df already looks like Ensembl IDs, just use it. + if ensembl_like: + logger.info("specificity_df index looks like Ensembl IDs; using them directly.") + specificity_df = specificity_df.copy() + specificity_df.index = spec_upper.values + else: + # Fall back to matching against adata.var[gene_col] and writing those IDs. + if gene_col is None or gene_col not in adata.var.columns: + raise ValueError( + "specificity_df index does not look like Ensembl IDs, and no valid gene_col/accession_col provided." + ) + + adata_key = adata.var[gene_col].astype(str).str.upper() + if remove_version_suffix: + adata_key = adata_key.str.replace(r"\..*$", "", regex=True) + + overlap = spec_upper.intersection(pd.Index(adata_key)) + if overlap.empty: + raise ValueError("No overlapping genes found between specificity_df index and adata.var[gene_col].") + + logger.info(f"Overlapping genes: {len(overlap)}/{specificity_df.shape[0]}") + specificity_df = specificity_df.loc[spec_upper.isin(overlap).values].copy() + specificity_df.index = spec_upper[spec_upper.isin(overlap)].values # normalized + + # ---- Select top genes per cell type ---- + n_genes = specificity_df.shape[0] + k = max(1, int(np.ceil(top_frac * n_genes))) + logger.info(f"Selecting top {k} genes ({top_frac*100:.1f}%) per cell type") + + summary = [] + for celltype in specificity_df.columns: + top_genes = specificity_df[celltype].nlargest(k).index.astype(str).unique() + + safe_celltype = _safe_filename(celltype) + out_path = out_dir / f"{safe_celltype}.GeneSet" + + with open(out_path, "w") as f: + for gene_id in top_genes: + f.write(f"{gene_id}\n") + + summary.append({"cell_type": celltype, "n_genes": len(top_genes), "output_path": str(out_path)}) + logger.debug(f"Wrote {len(top_genes)} genes for {celltype}") + + # ---- Control geneset ---- + if include_control: + control_path = out_dir / "Control.GeneSet" + with open(control_path, "w") as f: + for gene_id in specificity_df.index.astype(str): + f.write(f"{gene_id}\n") + logger.info(f"Wrote control gene set with {len(specificity_df)} genes") + + summary_df = pd.DataFrame(summary) + logger.info(f"Generated {len(summary)} cell-type-specific gene sets") + return summary_df + + +def _fetch_ensembl_annotation( + genome_build: Literal["GRCh37", "GRCh38"] = "GRCh37", gene_identifier_mode: str = "ensembl" +) -> pd.DataFrame: + """ + Fetch gene annotations from Ensembl using pybiomart. + + Parameters + ---------- + genome_build + Genome build version: "GRCh37" or "GRCh38". + gene_identifier_mode + Gene identifier: "name" (gene symbols) or "ensembl" (Ensembl IDs). + + Returns + ------- + pd.DataFrame + Gene annotations with columns: gene, chrom, start, end, gene_biotype. + """ + try: + from pybiomart import Server + except ImportError as e: + raise ImportError( + "pybiomart is required for fetching gene annotations. " "Install it with: pip install pybiomart" + ) from e + + if genome_build == "GRCh37": + logger.info("Querying Ensembl BioMart (GRCh37)...") + server = Server(host="http://grch37.ensembl.org") + elif genome_build == "GRCh38": + logger.info("Querying Ensembl BioMart (GRCh38)...") + server = Server(host="http://www.ensembl.org") + else: + raise ValueError(f"Invalid genome_build: {genome_build}. Must be 'GRCh37' or 'GRCh38'") + + dataset = server.marts["ENSEMBL_MART_ENSEMBL"].datasets["hsapiens_gene_ensembl"] + + attributes = [ + "hgnc_symbol", + "external_gene_name", + "ensembl_gene_id", + "chromosome_name", + "start_position", + "end_position", + "gene_biotype", + ] + + logger.info(f"Fetching gene annotations from {genome_build}...") + anno = dataset.query(attributes=attributes, use_attr_names=True) + + anno.columns = [c.strip() for c in anno.columns] + + anno = anno.rename(columns={ + #BioMart labels + "HGNC symbol": "hgnc_symbol", + "Gene name": "external_gene_name", + "Gene stable ID": "ensembl_gene_id", + "Chromosome/scaffold name": "chrom", + "Gene start (bp)": "start", + "Gene end (bp)": "end", + "Gene type": "gene_biotype", + # Attribute-name style + "hgnc_symbol": "hgnc_symbol", + "external_gene_name": "external_gene_name", + "ensembl_gene_id": "ensembl_gene_id", + "chromosome_name": "chrom", + "start_position": "start", + "end_position": "end", + "gene_biotype": "gene_biotype", + }) + + if gene_identifier_mode == "name": + anno["gene"] = anno["hgnc_symbol"].replace("", pd.NA) + anno["gene"] = anno["gene"].fillna(anno["external_gene_name"]) + elif gene_identifier_mode == "ensembl": + anno["gene"] = anno["ensembl_gene_id"].replace("", pd.NA) + else: + raise ValueError(f"Invalid mode: {gene_identifier_mode}. Must be 'name' or 'ensembl'.") + + anno = anno[["gene", "chrom", "start", "end", "gene_biotype"]].dropna(subset=["gene"]) + + logger.info(f"Fetched annotations for {len(anno)} genes from {genome_build}") + return anno + + +def _map_gene_annotation(adata: AnnData, anno_df: pd.DataFrame, gene_col: str | None = "gene") -> AnnData: + """Map gene annotations to adata.var.""" + anno_cols = ["chrom", "start", "end", "gene_biotype"] + + conflicts = [c for c in anno_cols if c in adata.var.columns] + if conflicts: + logger.info(f"Dropping conflicting columns from adata.var before merge: {conflicts}") + adata.var = adata.var.drop(columns=conflicts) + + anno_df["gene_upper"] = anno_df["gene"].astype(str).str.upper() + + anno_df = anno_df.drop_duplicates(subset=["gene_upper"]) + + merged = adata.var.merge(anno_df[["gene_upper"] + anno_cols], on="gene_upper", how="left") + + adata.var = merged.set_index(adata.var.index) + + logger.info(f"Annotated {(~merged['chrom'].isna()).sum()} / {adata.n_vars} genes.") + + return adata + + +def _pick_var_col(adata: AnnData, candidates: list[str], default: str | None) -> str | None: + """Select first existing column from candidates.""" + if default and default in adata.var.columns: + return default + for col in candidates: + if col in adata.var.columns: + return col + return default + + +def _normalize_chromosome(chr_series: pd.Series) -> pd.Series: + """Normalize chromosome labels to standard format.""" + normalized = chr_series.astype(str).str.replace("^chr", "", regex=True).str.upper() + return normalized.str.extract(r"^([0-9XYM]+)", expand=False) + + +def _compute_celltype_means(adata: AnnData, celltype_col: str) -> pd.DataFrame: + """Compute mean expression per cell type.""" + celltypes = pd.Index(adata.obs[celltype_col].astype("category")).categories + + means_list = [] + col_names = [] + + for celltype in celltypes: + mask = (adata.obs[celltype_col] == celltype).values + n_cells = mask.sum() + col_names.append(str(celltype)) + + if n_cells == 0: + means_list.append(np.full(adata.n_vars, np.nan, dtype=float)) + logger.warning(f"Cell type '{celltype}' has 0 cells") + continue + + X_sub = adata.X[mask, :] + if sparse.issparse(X_sub): + means = np.asarray(X_sub.mean(axis=0)).ravel() + else: + means = X_sub.mean(axis=0).ravel() + + means_list.append(means) + + mean_expr_df = pd.DataFrame( + np.column_stack(means_list) if means_list else np.empty((adata.n_vars, 0)), + index=adata.var_names, + columns=col_names, + ) + + return mean_expr_df + + +def _compute_specificity(mean_expr_df: pd.DataFrame) -> pd.DataFrame: + """ + Compute specificity scores using Duncan et al. 2019 method. + + Specificity(gene, celltype) = mean(gene, celltype) / sum(mean(gene, all_celltypes)) + """ + gene_sums = mean_expr_df.sum(axis=1) + denom = gene_sums.replace(0, np.nan) + specificity = mean_expr_df.div(denom, axis=0).fillna(0.0) + + return specificity + + +def _safe_filename(s: str) -> str: + """Convert string to safe filename.""" + s = str(s).strip().replace(" ", "_") + s = s.replace("(", "_").replace(")", "_") + return re.sub(r"[^\w\.\+\-]+", "_", s) + + +def generate_gene_coord_file( + out_path: str | Path, + *, + genome_build: Literal["GRCh37", "GRCh38"] = "GRCh37", + gene_identifier_mode: str = "ensembl", + remove_version_suffix: bool = True, + add_chr_prefix: bool = True, + overwrite: bool = False, +) -> pd.DataFrame: + """ + Generate a gene coordinate file for S-LDSC analysis from Ensembl BioMart. + + Fetches all genes from Ensembl and creates a tab-delimited file with columns: + GENE, CHR, START, END + + Parameters + ---------- + out_path + Output file path (e.g., "gene_coords.txt"). + genome_build + Genome build version: "GRCh37" or "GRCh38". + gene_identifier_mode + Gene identifier: "name" (gene symbols) or "ensembl" (Ensembl IDs). + remove_version_suffix + Whether to remove version suffixes from gene IDs (e.g., ENSG00000123456.7 → ENSG00000123456). + add_chr_prefix + Whether to add "chr" prefix to chromosome names (e.g., "1" → "chr1"). + overwrite + Whether to overwrite existing output file. + + Raises + ------ + FileExistsError + If out_path exists and overwrite=False. + ImportError + If pybiomart is not installed. + + Examples + -------- + >>> # Fetch all genes with Ensembl IDs from GRCh37 + >>> coord_df = generate_gene_coord_file("gene_coords.txt", gene_identifier_mode="ensembl", genome_build="GRCh37") + >>> # Fetch with gene symbols from GRCh38 + >>> coord_df = generate_gene_coord_file( + ... "gene_coords_grch38.txt", gene_identifier_mode="name", genome_build="GRCh38" + ... ) + """ + out_path = Path(out_path) + + if out_path.exists() and not overwrite: + raise FileExistsError(f"Output file {out_path} already exists. Set overwrite=True to proceed.") + + logger.info(f"Fetching gene annotations from Ensembl {genome_build}...") + anno_df = _fetch_ensembl_annotation(genome_build=genome_build, gene_identifier_mode=gene_identifier_mode) + + coord_df = anno_df[["gene", "chrom", "start", "end"]].copy() + coord_df.columns = ["GENE", "CHR", "START", "END"] + + if remove_version_suffix: + logger.info("Removing version suffixes from gene identifiers") + coord_df["GENE"] = coord_df["GENE"].astype(str).str.replace(r"\..*$", "", regex=True) + + coord_df["CHR"] = coord_df["CHR"].astype(str) + if add_chr_prefix: + coord_df["CHR"] = coord_df["CHR"].apply(lambda x: x if x.startswith("chr") else f"chr{x}") + else: + coord_df["CHR"] = coord_df["CHR"].str.replace("^chr", "", regex=True) + + coord_df["START"] = coord_df["START"].astype(int) + coord_df["END"] = coord_df["END"].astype(int) + + n_before = len(coord_df) + coord_df = coord_df.drop_duplicates(subset=["GENE"], keep="first") + n_after = len(coord_df) + + if n_before != n_after: + logger.warning(f"Removed {n_before - n_after} duplicate gene entries") + + coord_df = coord_df.sort_values(["CHR", "START"]) + + logger.info(f"Writing {len(coord_df)} gene coordinates to {out_path}") + coord_df.to_csv(out_path, sep="\t", index=False) + + logger.info(f"Successfully created gene coordinate file: {out_path}") \ No newline at end of file diff --git a/src/cellink/tl/external/config/ldsc.yaml b/src/cellink/tl/external/config/ldsc.yaml new file mode 100644 index 0000000..80b14f3 --- /dev/null +++ b/src/cellink/tl/external/config/ldsc.yaml @@ -0,0 +1,4 @@ +execution_mode: local +ldsc_command: ldsc.py +make_annot_command: make_annot.py +munge_command: munge_sumstats.py \ No newline at end of file diff --git a/src/cellink/tl/external/config/ldsc_docker.yaml b/src/cellink/tl/external/config/ldsc_docker.yaml new file mode 100644 index 0000000..e69de29 diff --git a/src/cellink/tl/external/config/ldsc_singularity.yaml b/src/cellink/tl/external/config/ldsc_singularity.yaml new file mode 100644 index 0000000..91748f1 --- /dev/null +++ b/src/cellink/tl/external/config/ldsc_singularity.yaml @@ -0,0 +1,5 @@ +execution_mode: singularity +singularity_image: ldsc.sif #TODO CHANGED THIS FROM /project/genomics/ayshan/containers/ldsc.sif to ldsc.sif, so it's general for everyone - Try to put a symlink there +ldsc_command: /ldsc/ldsc.py +make_annot_command: /ldsc/make_annot.py +munge_command: /ldsc/munge_sumstats.py \ No newline at end of file From 7e877faad1f5ea0c1a4674058d5d74e68785e1a4 Mon Sep 17 00:00:00 2001 From: Lucas Arnoldt Date: Tue, 27 Jan 2026 17:25:57 +0100 Subject: [PATCH 2/6] LDSC --- docs/tutorials/cell_level_ldsc_analysis.ipynb | 17 ++-- src/cellink/io/_sgkit.py | 26 +----- src/cellink/resources/_datasets.py | 1 + src/cellink/resources/_ld.py | 19 +++-- src/cellink/tl/external/__init__.py | 28 +++---- src/cellink/tl/external/_ldsc.py | 12 +-- src/cellink/tl/external/_ldsc2magma.py | 36 +++++---- src/cellink/tl/external/_sldsc_utils.py | 81 ++++++++----------- src/cellink/tl/external/config/ldsc.yaml | 2 +- .../tl/external/config/ldsc_singularity.yaml | 2 +- 10 files changed, 95 insertions(+), 129 deletions(-) diff --git a/docs/tutorials/cell_level_ldsc_analysis.ipynb b/docs/tutorials/cell_level_ldsc_analysis.ipynb index c05ac36..3d433ca 100644 --- a/docs/tutorials/cell_level_ldsc_analysis.ipynb +++ b/docs/tutorials/cell_level_ldsc_analysis.ipynb @@ -160,7 +160,11 @@ ], "source": [ "# Load the dataset\n", - "dd = get_onek1k(config_path=\"cellink/src/cellink/resources/config/onek1k.yaml\", data_home=\"/project/genomics/ayshan/1k1k_dataset\", verify_checksum=False)\n", + "dd = get_onek1k(\n", + " config_path=\"cellink/src/cellink/resources/config/onek1k.yaml\",\n", + " data_home=\"/project/genomics/ayshan/1k1k_dataset\",\n", + " verify_checksum=False,\n", + ")\n", "print(f\"Dataset shape: {dd.shape}\")\n", "\n", "\n", @@ -907,13 +911,10 @@ ], "source": [ "out_gmt = genesets_dir_to_entrez_gmt(\n", - " geneset_dir=\"ldsc_genesets\",\n", - " ensembl_to_entrez_tsv=None,\n", - " allow_mygene_fallback=True,\n", - " include_control=False\n", + " geneset_dir=\"ldsc_genesets\", ensembl_to_entrez_tsv=None, allow_mygene_fallback=True, include_control=False\n", ")\n", "\n", - "print(out_gmt) #TODO: Why is this generated? Where is it used downstream? How is this actually MAGMA specific?\n", + "print(out_gmt) # TODO: Why is this generated? Where is it used downstream? How is this actually MAGMA specific?\n", "# Would it make sense to demosntrate the use of the MAGMA runner here? See scDRS/ seismic notebook as an example." ] }, @@ -1628,7 +1629,7 @@ " result = make_annot_from_donor_data(\n", " dd=dd_chrom,\n", " annot_file=f\"{cell_type.replace(' ', '_')}_{chrom}.annot.gz\",\n", - " gene_set_file = f\"./ldsc_genesets/{cell_type.replace(' ', '_')}.GeneSet\",\n", + " gene_set_file=f\"./ldsc_genesets/{cell_type.replace(' ', '_')}.GeneSet\",\n", " gene_coord_file=\"gene_coords.txt\",\n", " windowsize=100000,\n", " runner=runner,\n", @@ -1658,7 +1659,7 @@ " -B /ictstr01/project_copy/genomics/ayshan:/data \\\n", " -B /home/aih/ayshan.aliyeva/cellink_data:/cellink_data \\\n", " /project/genomics/ayshan/containers/ldsc.sif \\\n", - " bash -lc 'pwd; ls; ls -l ldsc_annot.bim gene_coords.txt ldsc_genesets/CD8_Naive.GeneSet'\n" + " bash -lc 'pwd; ls; ls -l ldsc_annot.bim gene_coords.txt ldsc_genesets/CD8_Naive.GeneSet'" ] }, { diff --git a/src/cellink/io/_sgkit.py b/src/cellink/io/_sgkit.py index c43d429..f0d62ba 100644 --- a/src/cellink/io/_sgkit.py +++ b/src/cellink/io/_sgkit.py @@ -57,13 +57,7 @@ class SgVars: def from_sgkit_dataset( - sgkit_dataset: xr.Dataset, - *, var_rename: - dict = None, - obs_rename: - dict = None, - hard_call: - bool = True + sgkit_dataset: xr.Dataset, *, var_rename: dict = None, obs_rename: dict = None, hard_call: bool = True ) -> AnnData: """Read SgKit Zarr Format @@ -134,14 +128,7 @@ def from_sgkit_dataset( return gdata -def read_sgkit_zarr( - path: str | Path, - *, - var_rename=None, - obs_rename=None, - hard_call=True, - **kwargs -) -> AnnData: +def read_sgkit_zarr(path: str | Path, *, var_rename=None, obs_rename=None, hard_call=True, **kwargs) -> AnnData: """Read SgKit Zarr Format Params @@ -160,14 +147,7 @@ def read_sgkit_zarr( return gdata -def read_plink( - path: str | Path = None, - *, - var_rename=None, - obs_rename=None, - hard_call=True, - **kwargs -) -> AnnData: +def read_plink(path: str | Path = None, *, var_rename=None, obs_rename=None, hard_call=True, **kwargs) -> AnnData: """Read Plink Format Params diff --git a/src/cellink/resources/_datasets.py b/src/cellink/resources/_datasets.py index 21ed766..d4a5b4d 100644 --- a/src/cellink/resources/_datasets.py +++ b/src/cellink/resources/_datasets.py @@ -8,6 +8,7 @@ from cellink.io import read_h5_dd, read_zarr_dd from cellink.resources._datasets_utils import plink_filter_prune, plink_kinship, preprocess_vcf_to_plink, try_liftover from cellink.resources._utils import _download_file, _load_config, _run, get_data_home + from .._core import DonorData logging.basicConfig(level=logging.INFO) diff --git a/src/cellink/resources/_ld.py b/src/cellink/resources/_ld.py index aea112f..21d57c0 100644 --- a/src/cellink/resources/_ld.py +++ b/src/cellink/resources/_ld.py @@ -1,9 +1,8 @@ import shutil import tarfile +from pathlib import Path import pandas as pd -from pathlib import Path -from typing import Optional, Tuple, Union from cellink.resources._utils import _download_file, _load_config, get_data_home @@ -63,7 +62,7 @@ def get_1000genomes_ld_scores( Download, extract, and load precomputed 1000 Genomes linkage disequilibrium (LD) scores. This function downloads population-specific LD scores from the 1000 Genomes project, - extracts them to a local directory, and concatenates chromosome-wise annotation and + extracts them to a local directory, and concatenates chromosome-wise annotation and LD score files into pandas DataFrames. Parameters @@ -89,7 +88,7 @@ def get_1000genomes_ld_scores( Concatenated LD score files for all chromosomes. - prefix : str File name prefix used in the extracted data. - + If `return_path=True`, returns `(DATA, prefix)`: - DATA : pathlib.Path Path to the directory containing extracted files. @@ -150,7 +149,7 @@ def get_1000genomes_ld_weights( Download, extract, and load precomputed 1000 Genomes LD weights. This function downloads population-specific LD weights from the 1000 Genomes project, - extracts them to a local directory, and concatenates chromosome-wise weight files + extracts them to a local directory, and concatenates chromosome-wise weight files into a single pandas DataFrame. Parameters @@ -173,7 +172,7 @@ def get_1000genomes_ld_weights( - None : placeholder for compatibility with LD scores interface. - weights : pd.DataFrame Concatenated LD weight files for all chromosomes. - + If `return_path=True`, returns `(DATA, prefix)`: - DATA : pathlib.Path Path to the directory containing extracted files. @@ -215,6 +214,7 @@ def get_1000genomes_ld_weights( return annot, weights + def get_1000genomes_plink_files( config_path: str | Path = "./cellink/resources/config/1000genomes.yaml", population: str = "EUR", @@ -225,6 +225,7 @@ def get_1000genomes_plink_files( Download and extract 1000 Genomes PLINK files (BED/BIM/FAM format). This function downloads population-specific PLINK files from the 1000 Genomes project, extracts them to a local directory, and returns the path to the extracted files. + Parameters ---------- config_path : str or pathlib.Path, default='./cellink/resources/config/1000genomes.yaml' @@ -235,6 +236,7 @@ def get_1000genomes_plink_files( Root directory where data will be stored. Defaults to user-specific cache directory. refresh : bool, default=False If True, re-downloads and re-extracts files even if they already exist locally. + Returns ------- - pathlib.Path @@ -242,10 +244,12 @@ def get_1000genomes_plink_files( Files are named as: {prefix}{chrom}.bed/bim/fam where chrom ranges from 1-22. - prefix : str File name prefix used in the extracted data. + Raises ------ ValueError If `population` is not supported in the configuration. + Examples -------- >>> plink_dir = get_1000genomes_plink_files(population="EUR") @@ -270,6 +274,7 @@ def get_1000genomes_plink_files( return DATA, prefix + if __name__ == "__main__": annot, ldscores, prefix = get_1000genomes_ld_scores(population="EUR") annot, ldscores, prefix = get_1000genomes_ld_scores(population="EAS") @@ -278,4 +283,4 @@ def get_1000genomes_plink_files( annot, weights, prefix = get_1000genomes_ld_weights(population="EAS") plink_files, prefix = get_1000genomes_plink_files(population="EUR") - plink_files, prefix = get_1000genomes_plink_files(population="EAS") \ No newline at end of file + plink_files, prefix = get_1000genomes_plink_files(population="EAS") diff --git a/src/cellink/tl/external/__init__.py b/src/cellink/tl/external/__init__.py index 4f940ad..b77e92c 100644 --- a/src/cellink/tl/external/__init__.py +++ b/src/cellink/tl/external/__init__.py @@ -1,20 +1,8 @@ import importlib from typing import Any -from ._ld import calculate_ld -from ._pc import calculate_pcs -from ._saigeqtl import ( - configure_saigeqtl_runner, - get_saigeqtl_runner, - make_group_file, - read_saigeqtl_results, - run_saigeqtl, -) -from ._tensorqtl import read_tensorqtl_results, run_tensorqtl from ._jaxqtl import read_jaxqtl_results, run_jaxqtl -from ._scdrs import run_scdrs -from ._seismic import run_seismic -from ._magma import run_magma_pipeline +from ._ld import calculate_ld from ._ldsc import ( compute_ld_scores_with_annotations_from_bimfile, compute_ld_scores_with_annotations_from_donor_data, @@ -28,8 +16,20 @@ make_annot_from_donor_data, munge_sumstats, ) +from ._ldsc2magma import genesets_dir_to_entrez_gmt, load_ensembl_to_entrez_map +from ._magma import run_magma_pipeline +from ._pc import calculate_pcs +from ._saigeqtl import ( + configure_saigeqtl_runner, + get_saigeqtl_runner, + make_group_file, + read_saigeqtl_results, + run_saigeqtl, +) +from ._scdrs import run_scdrs +from ._seismic import run_seismic from ._sldsc_utils import generate_gene_coord_file, generate_sldsc_genesets, preprocess_for_sldsc -from ._ldsc2magma import load_ensembl_to_entrez_map, genesets_dir_to_entrez_gmt +from ._tensorqtl import read_tensorqtl_results, run_tensorqtl __all__ = [ "read_jaxqtl_results", diff --git a/src/cellink/tl/external/_ldsc.py b/src/cellink/tl/external/_ldsc.py index e6e4a3c..dcc152f 100644 --- a/src/cellink/tl/external/_ldsc.py +++ b/src/cellink/tl/external/_ldsc.py @@ -1,14 +1,11 @@ import logging import os -import subprocess from typing import Any -import shlex import yaml from cellink._core import DonorData from cellink.io import to_plink -from cellink.resources._utils import get_data_home from cellink.tl._runner import BaseToolRunner logger = logging.getLogger(__name__) @@ -18,12 +15,7 @@ class LDSCRunner(BaseToolRunner): """LDSC Runner with support for local, docker, and singularity""" def __init__(self, config_path: str | None = None, config_dict: dict | None = None): - required_fields = [ - "execution_mode", - "ldsc_command", - "make_annot_command", - "munge_command" - ] + required_fields = ["execution_mode", "ldsc_command", "make_annot_command", "munge_command"] prefix_tokens = [] super().__init__(config_path, config_dict, required_fields, prefix_tokens) @@ -1793,4 +1785,4 @@ def estimate_celltype_specific_heritability( "files_created": [f"{out_prefix}.cell_type_results.txt", f"{out_prefix}.log"], } else: - return {"command": runner._build_container_command(cmd, file_paths)} \ No newline at end of file + return {"command": runner._build_container_command(cmd, file_paths)} diff --git a/src/cellink/tl/external/_ldsc2magma.py b/src/cellink/tl/external/_ldsc2magma.py index 61c9faf..98d4518 100644 --- a/src/cellink/tl/external/_ldsc2magma.py +++ b/src/cellink/tl/external/_ldsc2magma.py @@ -1,17 +1,21 @@ import logging from pathlib import Path + import pandas as pd logger = logging.getLogger(__name__) -#TODO: HOW is MAGMA CALLED THEN? -def load_ensembl_to_entrez_map(map_tsv: str | Path) -> pd.Series: #TODO: Is this a private function or not. Where is this used? Can't find any anywhere this is called? + +# TODO: HOW is MAGMA CALLED THEN? +def load_ensembl_to_entrez_map( + map_tsv: str | Path, +) -> pd.Series: # TODO: Is this a private function or not. Where is this used? Can't find any anywhere this is called? """ - Load a mapping TSV with columns: + Load a mapping TSV with columns: ensembl_gene_id entrez_id Returns a Series indexed by ENSG (upper, no version) with values as string Entrez IDs. """ - #TODO EXPAND ON DOCUMENTATION IF PUBLIC FUNCTION + # TODO EXPAND ON DOCUMENTATION IF PUBLIC FUNCTION map_tsv = Path(map_tsv) df = pd.read_csv(map_tsv, sep="\t", dtype=str) @@ -60,7 +64,7 @@ def genesets_dir_to_entrez_gmt( output_basename: str = "genesets.gmt", ) -> Path: """ - Convert *.GeneSet (Ensembl IDs) -> MAGMA .gmt with Entrez IDs. + Convert *.GeneSet (Ensembl IDs) -> MAGMA .gmt with Entrez IDs. Defaults: - reads from ./ldsc_genesets @@ -70,7 +74,7 @@ def genesets_dir_to_entrez_gmt( Preferred: provide ensembl_to_entrez_tsv for offline mapping. Optional: allow_mygene_fallback=True to query mygene.info (needs internet). """ - #TODO EXPAND ON DOCUMENTATION SINCE PUBLIC FUNCTION + # TODO EXPAND ON DOCUMENTATION SINCE PUBLIC FUNCTION geneset_dir = Path(geneset_dir).resolve() # ---- Default output location: sibling magma_genesets next to ldsc_genesets ---- @@ -100,6 +104,7 @@ def genesets_dir_to_entrez_gmt( if allow_mygene_fallback: try: import mygene # type: ignore + mg = mygene.MyGeneInfo() except Exception as e: raise ImportError("allow_mygene_fallback=True requires 'mygene' (pip install mygene).") from e @@ -124,7 +129,7 @@ def map_to_entrez(ens_genes: list[str]) -> list[str]: if ens2ent is not None: mapped = ens2ent.reindex(ens_genes) - for g, e in zip(ens_genes, mapped.values): + for g, e in zip(ens_genes, mapped.values, strict=False): if pd.isna(e) or str(e) in ["", "nan", "None"]: missing.append(g) else: @@ -132,9 +137,10 @@ def map_to_entrez(ens_genes: list[str]) -> list[str]: else: missing = ens_genes[:] # everything missing if no offline map - # mygene fallback for missing - if mg is not None and missing: #TODO: Let's instead of mygene rely on biomart mappings used throughout the package, e.g. in _sldsc_utils.py + if ( + mg is not None and missing + ): # TODO: Let's instead of mygene rely on biomart mappings used throughout the package, e.g. in _sldsc_utils.py res = mg.querymany( missing, scopes="ensembl.gene", @@ -155,11 +161,7 @@ def map_to_entrez(ens_genes: list[str]) -> list[str]: if "entrezgene" in res.columns: # Normalize query ids res["query_norm"] = ( - res["query"] - .astype(str) - .str.strip() - .str.upper() - .str.replace(r"\..*$", "", regex=True) + res["query"].astype(str).str.strip().str.upper().str.replace(r"\..*$", "", regex=True) ) # Keep only rows with an actual entrezgene value @@ -189,7 +191,6 @@ def map_to_entrez(ens_genes: list[str]) -> list[str]: else: entrez.append(str(e)) - # De-dup / sort output if dedup_genes: seen = set() @@ -223,7 +224,8 @@ def map_to_entrez(ens_genes: list[str]) -> list[str]: logger.info(f"Wrote {n_written} gene sets to {out_gmt} (skipped {n_skipped})") return Path(out_gmt) -#TODO: REMOVE THIS MAIN BLOCK IF THIS IS A PRIVATE MODULE + +# TODO: REMOVE THIS MAIN BLOCK IF THIS IS A PRIVATE MODULE if __name__ == "__main__": import argparse @@ -271,4 +273,4 @@ def map_to_entrez(ens_genes: list[str]) -> list[str]: allow_mygene_fallback=args.allow_mygene_fallback, pattern=args.pattern, output_basename=args.output_basename, - ) \ No newline at end of file + ) diff --git a/src/cellink/tl/external/_sldsc_utils.py b/src/cellink/tl/external/_sldsc_utils.py index 9c43941..efeb114 100644 --- a/src/cellink/tl/external/_sldsc_utils.py +++ b/src/cellink/tl/external/_sldsc_utils.py @@ -3,17 +3,13 @@ from pathlib import Path from typing import Literal +import numexpr as ne import numpy as np import pandas as pd -import scanpy as sc from anndata import AnnData from scipy import sparse -import os -import h5py -import numexpr as ne from tqdm import tqdm - logger = logging.getLogger(__name__) @@ -120,8 +116,6 @@ def preprocess_for_sldsc( if celltype_col not in adata.obs.columns: raise ValueError(f"Column '{celltype_col}' not found in adata.obs") - - if fetch_annotation: anno_df = _fetch_ensembl_annotation(genome_build=genome_build, gene_identifier_mode=gene_identifier_mode) if gene_col is None: @@ -195,7 +189,6 @@ def preprocess_for_sldsc( adata = adata[:, mask_keep.values].copy() - if log_transform: # Work with categorical clusters clusters_cat = adata.obs[celltype_col].astype("category") @@ -206,16 +199,15 @@ def preprocess_for_sldsc( # matrix: genes × clusters avg_matrix = np.zeros((n_genes, n_clusters), dtype=np.float64) - X = adata.X # could be csr_matrix or dense + X = adata.X # could be csr_matrix or dense - # Compute per-cluster log1p mean logger.info("Applying log1p transformation") for j, cl in enumerate(tqdm(cluster_names, desc="Aggregating clusters")): # indices of cells in this cluster idx = np.where(clusters_cat.values == cl)[0] if idx.size == 0: - # no cells in this cluster (shouldn't usually happen, but just in case) + # no cells in this cluster (shouldn't usually happen, but just in case) avg_matrix[:, j] = 0.0 continue @@ -235,16 +227,15 @@ def preprocess_for_sldsc( # Store as genes × clusters → [gene, cluster_index] avg_matrix[:, j] = avg_expr - + df = pd.DataFrame( avg_matrix, - index=adata.var_names, # genes as rows - columns=cluster_names # clusters as columns + index=adata.var_names, # genes as rows + columns=cluster_names, # clusters as columns ) logger.info("Log1p applied.") - if not log_transform: raise ValueError("This preprocessing path expects log_transform=True (needs cluster-level log1p matrix).") @@ -270,29 +261,21 @@ def preprocess_for_sldsc( # gather/melt to long exp = exp.melt( id_vars="gene", - var_name="ClusterID", # cluster name - value_name="Expr_sum_mean" # your log1p mean expression + var_name="ClusterID", # cluster name + value_name="Expr_sum_mean", # your log1p mean expression ) logger.info(f"Computing mean expression for {celltype_col}") # normalize within each cluster to sum to 1000 - exp["Expr_sum_mean"] = ( - exp["Expr_sum_mean"] * 1000.0 / - exp.groupby("ClusterID")["Expr_sum_mean"].transform("sum") - ) + exp["Expr_sum_mean"] = exp["Expr_sum_mean"] * 1000.0 / exp.groupby("ClusterID")["Expr_sum_mean"].transform("sum") mean_expr_df = exp.pivot(index="gene", columns="ClusterID", values="Expr_sum_mean") - logger.info("Computing specificity scores") # specificity: fraction of gene's total that comes from this cluster - exp["specificity"] = ( - exp["Expr_sum_mean"] / - exp.groupby("gene")["Expr_sum_mean"].transform("sum") - ) + exp["specificity"] = exp["Expr_sum_mean"] / exp.groupby("gene")["Expr_sum_mean"].transform("sum") specificity_df = exp.pivot(index="gene", columns="ClusterID", values="specificity") - if not ((specificity_df.values >= 0) & (specificity_df.values <= 1)).all(): logger.warning("Some specificity values outside [0, 1] range") @@ -311,8 +294,8 @@ def generate_sldsc_genesets( *, out_dir: str | Path, top_frac: float = 0.10, - gene_col: str | None = "gene", # e.g. "gene" (symbols) OR "ensembl_gene_id" - accession_col: str | None = None, # if you have an explicit Ensembl ID column, pass it (recommended) + gene_col: str | None = "gene", # e.g. "gene" (symbols) OR "ensembl_gene_id" + accession_col: str | None = None, # if you have an explicit Ensembl ID column, pass it (recommended) remove_version_suffix: bool = True, include_control: bool = True, overwrite: bool = False, @@ -495,24 +478,26 @@ def _fetch_ensembl_annotation( anno.columns = [c.strip() for c in anno.columns] - anno = anno.rename(columns={ - #BioMart labels - "HGNC symbol": "hgnc_symbol", - "Gene name": "external_gene_name", - "Gene stable ID": "ensembl_gene_id", - "Chromosome/scaffold name": "chrom", - "Gene start (bp)": "start", - "Gene end (bp)": "end", - "Gene type": "gene_biotype", - # Attribute-name style - "hgnc_symbol": "hgnc_symbol", - "external_gene_name": "external_gene_name", - "ensembl_gene_id": "ensembl_gene_id", - "chromosome_name": "chrom", - "start_position": "start", - "end_position": "end", - "gene_biotype": "gene_biotype", - }) + anno = anno.rename( + columns={ + # BioMart labels + "HGNC symbol": "hgnc_symbol", + "Gene name": "external_gene_name", + "Gene stable ID": "ensembl_gene_id", + "Chromosome/scaffold name": "chrom", + "Gene start (bp)": "start", + "Gene end (bp)": "end", + "Gene type": "gene_biotype", + # Attribute-name style + "hgnc_symbol": "hgnc_symbol", + "external_gene_name": "external_gene_name", + "ensembl_gene_id": "ensembl_gene_id", + "chromosome_name": "chrom", + "start_position": "start", + "end_position": "end", + "gene_biotype": "gene_biotype", + } + ) if gene_identifier_mode == "name": anno["gene"] = anno["hgnc_symbol"].replace("", pd.NA) @@ -702,4 +687,4 @@ def generate_gene_coord_file( logger.info(f"Writing {len(coord_df)} gene coordinates to {out_path}") coord_df.to_csv(out_path, sep="\t", index=False) - logger.info(f"Successfully created gene coordinate file: {out_path}") \ No newline at end of file + logger.info(f"Successfully created gene coordinate file: {out_path}") diff --git a/src/cellink/tl/external/config/ldsc.yaml b/src/cellink/tl/external/config/ldsc.yaml index 80b14f3..da6083a 100644 --- a/src/cellink/tl/external/config/ldsc.yaml +++ b/src/cellink/tl/external/config/ldsc.yaml @@ -1,4 +1,4 @@ execution_mode: local ldsc_command: ldsc.py make_annot_command: make_annot.py -munge_command: munge_sumstats.py \ No newline at end of file +munge_command: munge_sumstats.py diff --git a/src/cellink/tl/external/config/ldsc_singularity.yaml b/src/cellink/tl/external/config/ldsc_singularity.yaml index 91748f1..ef94183 100644 --- a/src/cellink/tl/external/config/ldsc_singularity.yaml +++ b/src/cellink/tl/external/config/ldsc_singularity.yaml @@ -2,4 +2,4 @@ execution_mode: singularity singularity_image: ldsc.sif #TODO CHANGED THIS FROM /project/genomics/ayshan/containers/ldsc.sif to ldsc.sif, so it's general for everyone - Try to put a symlink there ldsc_command: /ldsc/ldsc.py make_annot_command: /ldsc/make_annot.py -munge_command: /ldsc/munge_sumstats.py \ No newline at end of file +munge_command: /ldsc/munge_sumstats.py From 20962e4c01f6d043ee4fd090e1cd8605416c0f51 Mon Sep 17 00:00:00 2001 From: Lucas Arnoldt Date: Tue, 27 Jan 2026 17:37:15 +0100 Subject: [PATCH 3/6] LDSC --- src/cellink/resources/_gwas_prs_qtl.py | 103 ------------------------- src/cellink/tl/external/_ldsc2magma.py | 5 +- 2 files changed, 2 insertions(+), 106 deletions(-) diff --git a/src/cellink/resources/_gwas_prs_qtl.py b/src/cellink/resources/_gwas_prs_qtl.py index bd5bfa7..37ffee8 100644 --- a/src/cellink/resources/_gwas_prs_qtl.py +++ b/src/cellink/resources/_gwas_prs_qtl.py @@ -98,109 +98,6 @@ def get_gwas_catalog_study(accession_id: str, **params: Any) -> dict: """ return _fetch(f"studies/{accession_id}", params=params, paginate=False) -""" -def get_gwas_catalog_study_summary_stats( - accession_id: str, dest: str | Path | None = None, return_path: bool = False, **params: Any -) -> pd.DataFrame | Path: - - study_meta = _fetch(f"{GWAS_API_BASE}/studies/{accession_id}", params=params, paginate=False) - - if "full_summary_stats" not in study_meta: - raise ValueError(f"Study {accession_id} does not have full summary statistics available") - - base_url = study_meta["full_summary_stats"] - harmonised_url = f"{base_url}/harmonised" - - import re - - try: - r = requests.get(harmonised_url) - r.raise_for_status() - - all_files = re.findall(r'href="([^"]*\.tsv\.gz)"', r.text) - - h_files = [f for f in all_files if f.endswith(".h.tsv.gz") and not f.endswith(".h.tsv.gz-meta.yaml")] - - if h_files: - - def build_priority(filename): - filename_lower = filename.lower() - if "build38" in filename_lower or "hg38" in filename_lower or "grch38" in filename_lower: - return 2 - elif "build37" in filename_lower or "hg19" in filename_lower or "grch37" in filename_lower: - return 1 - else: - return 0 - - h_files.sort(key=build_priority, reverse=True) - filename = h_files[0] - url = f"{harmonised_url}/{filename}" - logging.info(f"Found harmonised file: {filename}") - else: - raise ValueError("No harmonised .h.tsv.gz files found") - - except Exception as e: - logging.warning(f"Could not find harmonised files ({e}), trying base directory") - - try: - r = requests.get(base_url) - r.raise_for_status() - files = re.findall(r'href="([^"]*\.tsv\.gz)"', r.text) - - if files: - - def build_priority(filename): - filename_lower = filename.lower() - if "build38" in filename_lower or "hg38" in filename_lower or "grch38" in filename_lower: - return 2 - elif "build37" in filename_lower or "hg19" in filename_lower or "grch37" in filename_lower: - return 1 - else: - return 0 - - files.sort(key=build_priority, reverse=True) - filename = files[0] - url = f"{base_url}/{filename}" - else: - possible_files = [ - f"{accession_id}_buildGRCh38.tsv.gz", - f"{accession_id}_buildGRCh37.tsv.gz", - f"{accession_id}.tsv.gz", - ] - - for filename in possible_files: - test_url = f"{base_url}/{filename}" - try: - test_r = requests.head(test_url) - if test_r.status_code == 200: - url = test_url - break - except: - continue - else: - raise ValueError(f"Could not find summary statistics file for {accession_id}") - - except Exception as e2: - raise ValueError(f"Could not find summary statistics for {accession_id}: {e2}") - - if not dest: - data_home = get_data_home() - dest = data_home / f"{accession_id}_summary_stats.tsv.gz" - - logging.info(f"Downloading {url} to {dest}") - - try: - urlretrieve(url, dest) - except Exception as e: - raise RuntimeError(f"Failed to download summary statistics from {url}: {e}") - - if return_path: - return dest - - data = pd.read_csv(dest, compression="gzip", delimiter="\t") - return data -""" - def get_gwas_catalog_study_summary_stats( accession_id: str, dest: str | Path | None = None, diff --git a/src/cellink/tl/external/_ldsc2magma.py b/src/cellink/tl/external/_ldsc2magma.py index 98d4518..0d632a2 100644 --- a/src/cellink/tl/external/_ldsc2magma.py +++ b/src/cellink/tl/external/_ldsc2magma.py @@ -6,8 +6,7 @@ logger = logging.getLogger(__name__) -# TODO: HOW is MAGMA CALLED THEN? -def load_ensembl_to_entrez_map( +def _load_ensembl_to_entrez_map( map_tsv: str | Path, ) -> pd.Series: # TODO: Is this a private function or not. Where is this used? Can't find any anywhere this is called? """ @@ -96,7 +95,7 @@ def genesets_dir_to_entrez_gmt( # ---- Load offline map if provided ---- ens2ent = None if ensembl_to_entrez_tsv is not None: - ens2ent = load_ensembl_to_entrez_map(ensembl_to_entrez_tsv) + ens2ent = _load_ensembl_to_entrez_map(ensembl_to_entrez_tsv) logger.info(f"Loaded Ensembl→Entrez map with {ens2ent.shape[0]} entries") # ---- Optional online fallback ---- From 25e6088bbe2c0b83fe730d3ad0f7029fc085240d Mon Sep 17 00:00:00 2001 From: Lucas Arnoldt Date: Thu, 30 Apr 2026 01:10:06 +0200 Subject: [PATCH 4/6] sclinker --- docs/tutorials/tutorial_sclinker.ipynb | 19667 ++++++++++++++++ src/cellink/resources/__init__.py | 8 +- src/cellink/resources/_ld.py | 115 +- src/cellink/resources/config/1000genomes.yaml | 9 + src/cellink/tl/_runner.py | 19 +- src/cellink/tl/external/__init__.py | 66 +- src/cellink/tl/external/_joint_nmf.py | 294 + src/cellink/tl/external/_ldsc.py | 71 +- src/cellink/tl/external/_sclinker.py | 793 + src/cellink/tl/external/_sclinker_utils.py | 2511 ++ 10 files changed, 23548 insertions(+), 5 deletions(-) create mode 100644 docs/tutorials/tutorial_sclinker.ipynb create mode 100644 src/cellink/tl/external/_joint_nmf.py create mode 100644 src/cellink/tl/external/_sclinker.py create mode 100644 src/cellink/tl/external/_sclinker_utils.py diff --git a/docs/tutorials/tutorial_sclinker.ipynb b/docs/tutorials/tutorial_sclinker.ipynb new file mode 100644 index 0000000..b56edfd --- /dev/null +++ b/docs/tutorials/tutorial_sclinker.ipynb @@ -0,0 +1,19667 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "75736332", + "metadata": {}, + "source": [ + "# Tutorial: sc-linker via cellink\n", + "**Integrating single-cell RNA-seq, epigenomics and GWAS to identify disease-critical cell types**\n", + "\n", + "Based on: Jagadeesh\\*, Dey\\* et al., *Nature Genetics* 2022.\n", + "\n", + "Please read the **Pipeline architecture** section before running — sc-linker has several details that differ from simpler methods like scDRS or Seismic.\n", + "\n", + "---\n", + "\n", + "## Pipeline architecture\n", + "\n", + "```\n", + "scRNA-seq\n", + " │\n", + " ▼ Step 1 (cellink)\n", + "Gene programs (genes × cell_types, scores in [0,1])\n", + " │\n", + " ▼ Step 2 (cellink → bedtools)\n", + "Bedgraphs — two strategies per program:\n", + " a) ABC_Road_{tissue} : gene_score × enhancer_activity (Roadmap ∪ ABC links)\n", + " b) 100kb : gene_score × 1 (gene body ± 100 kb window)\n", + " │\n", + " ▼ Step 2c (cellink → bedtools intersect)\n", + "SNP annotations (.annot.gz, one per chromosome × program × strategy)\n", + " │\n", + " ▼ Step 2d (cellink → ldsc.py --l2)\n", + "LD scores (.l2.ldscore.gz, one per chromosome × program × strategy)\n", + " │\n", + " ▼ Step 3 (cellink → ldsc.py --h2 --overlap-annot)\n", + "Heritability enrichment (.log, one per program × strategy × trait)\n", + " │\n", + " ▼ Step 4 (cellink)\n", + "E-score = Enrichment(program) − Enrichment(AllCoding_control)\n", + "```\n", + "\n", + "**Key differences from scDRS / Seismic**\n", + "- Uses S-LDSC `--h2` (partitioned heritability), **not** `--h2-cts` (cell-type-specific mode)\n", + "- Requires `bedtools` binary for interval merging\n", + "- Always runs two SNP-to-gene strategies: ABC\\_Road (primary) and 100 kb (secondary)\n", + "- E-score is computed in postprocessing, not directly from LDSC output\n", + "- Requires ~5–50 GB of reference data from the Broad server\n", + "\n", + "**Known LDSC bug affecting sc-linker** (ldsc issue [#342](https://github.com/bulik/ldsc/issues/342), PR [#341](https://github.com/bulik/ldsc/pull/341)): \n", + "When running `--h2 --overlap-annot` with multiple chromosomes, newer pandas versions cause an `IndexError` in `ldsc/ldscore/parse.py`. \n", + "If you hit this crash, see the **Troubleshooting** section at the end of the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7451f2f8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/lustre/groups/ml01/workspace/lucas.arnoldt/cellink_pgen/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 20:25:16,140] INFO:root: /home/icb/lucas.arnoldt/cellink_data/dummy_onek1k/dummy_onek1k.dd.h5 already exists\n", + "[2026-04-08 20:25:16,141] WARNING:root: No checksum provided, skipping verification\n", + "[2026-04-08 20:25:20,497] INFO:root: Loaded dummy OneK1K dataset: (100, 146939, 125366, 34073)\n", + "Dataset: (100, 146939, 125366, 34073)\n", + "Cell types: ['gdT', 'NK', 'CD8 TEM', 'CD4 Naive', 'CD4 TCM']\n", + "Categories (31, object): ['ASDC', 'B intermediate', 'B memory', 'B naive', ..., 'cDC2', 'dnT', 'gdT', 'pDC']\n" + ] + } + ], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "os.environ[\"OMP_NUM_THREADS\"] = \"1\"\n", + "os.environ[\"OPENBLAS_NUM_THREADS\"] = \"1\"\n", + "\n", + "import scanpy as sc\n", + "from cellink.resources import get_dummy_onek1k\n", + "from cellink.resources import (\n", + " get_1000genomes_ld_scores,\n", + " get_1000genomes_ld_weights,\n", + " get_1000genomes_plink_files,\n", + " get_1000genomes_frq,\n", + " get_1000genomes_hapmap3,\n", + ")\n", + "from cellink.tl.external import (\n", + " configure_ldsc_runner,\n", + " estimate_heritability,\n", + " munge_sumstats,\n", + " compute_ld_scores_with_annotations_from_bimfile,\n", + ")\n", + "from cellink.tl.external import (\n", + " compute_celltype_programs,\n", + " compute_diseaseprogression_programs,\n", + " compute_nmf_programs,\n", + " compute_joint_nmf_programs,\n", + ")\n", + "from cellink.tl.external import (\n", + " download_sclinker_enhancer_links,\n", + " load_roadmap_links,\n", + " load_abc_links,\n", + " load_gene_annotation,\n", + " genescores_to_annotations,\n", + " compute_ld_scores_for_sclinker,\n", + " load_sclinker_heritability_results,\n", + " compute_escore,\n", + " compute_ppi_gene_scores,\n", + " run_sclinker_heritability,\n", + ")\n", + "\n", + "dd = get_dummy_onek1k(\n", + " config_path=\"../../src/cellink/resources/config/dummy_onek1k.yaml\",\n", + " verify_checksum=False,\n", + ")\n", + "print(f\"Dataset: {dd.shape}\")\n", + "print(f\"Cell types: {dd.C.obs['predicted.celltype.l2'].unique()[:5]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "dc117b77", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Part 0: Configure the LDSC runner\n", + "\n", + "cellink runs LDSC through a **Singularity** container via `LDSCRunner`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fa07bde8", + "metadata": {}, + "outputs": [], + "source": [ + "runner = configure_ldsc_runner(config_dict={\n", + " \"execution_mode\": \"singularity\",\n", + " \"singularity_image\": \"/home/icb/lucas.arnoldt/workspace/projects/ldsc.sif\",\n", + "\n", + " \"ldsc_command\": \"/ldsc/ldsc.py\",\n", + " \"make_annot_command\": \"/ldsc/make_annot.py\",\n", + " \"munge_command\": \"/ldsc/munge_sumstats.py\",\n", + "\n", + " # Path to parse.py inside the container — used by the troubleshooting\n", + " # cell at the end of this notebook if you hit an IndexError.\n", + " \"parse_script\": \"/ldsc/ldscore/parse.py\",\n", + "\n", + " \"singularity_patch_strategy\": \"overlay\",\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "9a720dae", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Part 1: Download reference files\n", + "\n", + "### 1a: 1000 Genomes LD scores, weights and PLINK files\n", + "\n", + "cellink already has helpers in `cellink.resources._1000genomes` that\n", + "download and cache the 1000G files. We use those directly instead of\n", + "calling `gsutil` manually." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "dce7ab7a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 20:25:20,766] INFO:root: /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/1000G_Phase3_baselineLD_v2.2_ldscores.tgz already exists\n", + "[2026-04-08 20:25:20,767] WARNING:root: No checksum provided, skipping verification\n", + "LD scores : /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR (prefix: baselineLD.)\n", + "[2026-04-08 20:25:20,797] INFO:root: /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/1000G_Phase3_weights_hm3_no_MHC.tgz already exists\n", + "[2026-04-08 20:25:20,797] WARNING:root: No checksum provided, skipping verification\n", + "LD weights : /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR (prefix: weights.hm3_noMHC.)\n", + "[2026-04-08 20:25:20,811] INFO:root: /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G_Phase3_plinkfiles.tgz already exists\n", + "[2026-04-08 20:25:20,812] WARNING:root: No checksum provided, skipping verification\n", + "PLINK : /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR (prefix: 1000G.EUR.QC.)\n", + "[2026-04-08 20:25:20,855] INFO:root: /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G_Phase3_frq.tgz already exists\n", + "[2026-04-08 20:25:20,855] WARNING:root: No checksum provided, skipping verification\n", + "Frq files : /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR (prefix: 1000G.EUR.QC.)\n", + "[2026-04-08 20:25:20,878] INFO:root: /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt already exists\n", + "[2026-04-08 20:25:20,878] WARNING:root: No checksum provided, skipping verification\n", + "HapMap3 : /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "\n", + "ref_ld_chr : /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.\n", + "w_ld_chr : /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.\n", + "bim_prefix : /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.\n", + "frq_chr : /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC.\n" + ] + } + ], + "source": [ + "# All 1000G files come from Zenodo (https://zenodo.org/records/10515792)\n", + "\n", + "_cfg = \"../../src/cellink/resources/config/1000genomes.yaml\"\n", + "\n", + "# Baseline LD scores (--ref-ld-chr)\n", + "ld_scores_dir, ld_prefix = get_1000genomes_ld_scores(population=\"EUR\", return_path=True, config_path=_cfg)\n", + "print(f\"LD scores : {ld_scores_dir} (prefix: {ld_prefix})\")\n", + "\n", + "# Regression weights (--w-ld-chr)\n", + "ld_weights_dir, weights_prefix = get_1000genomes_ld_weights(population=\"EUR\", return_path=True, config_path=_cfg)\n", + "print(f\"LD weights : {ld_weights_dir} (prefix: {weights_prefix})\")\n", + "\n", + "# PLINK files (BIM/BED/FAM — for computing per-annotation LD scores)\n", + "plink_dir, plink_prefix = get_1000genomes_plink_files(population=\"EUR\", config_path=_cfg)\n", + "print(f\"PLINK : {plink_dir} (prefix: {plink_prefix})\")\n", + "\n", + "# Allele frequency files (--frqfile-chr, required with --overlap-annot)\n", + "frq_dir, frq_prefix = get_1000genomes_frq(population=\"EUR\", config_path=_cfg)\n", + "print(f\"Frq files : {frq_dir} (prefix: {frq_prefix})\")\n", + "\n", + "# HapMap3 SNP list (--print-snps, restricts LD score computation)\n", + "hapmap3_snps = get_1000genomes_hapmap3(config_path=_cfg)\n", + "print(f\"HapMap3 : {hapmap3_snps}\")\n", + "\n", + "# Prefix strings for ldsc.py\n", + "ref_ld_chr = str(ld_scores_dir / ld_prefix) # e.g. .../baselineLD.\n", + "w_ld_chr = str(ld_weights_dir / weights_prefix) # e.g. .../weights.hm3_noMHC.\n", + "bim_prefix = str(plink_dir / plink_prefix) # e.g. .../1000G.EUR.QC.\n", + "frq_chr = str(frq_dir / frq_prefix) # e.g. .../1000G.EUR.QC.\n", + "\n", + "print(f\"\\nref_ld_chr : {ref_ld_chr}\")\n", + "print(f\"w_ld_chr : {w_ld_chr}\")\n", + "print(f\"bim_prefix : {bim_prefix}\")\n", + "print(f\"frq_chr : {frq_chr}\")" + ] + }, + { + "cell_type": "markdown", + "id": "f973035f", + "metadata": {}, + "source": [ + "### 1b: sc-linker enhancer-gene links and gene coordinates\n", + "\n", + "These files are not part of the standard 1000G bundle — they come from the\n", + "Broad sc-linker server and are specific to this pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9c303870", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 20:43:33,416] INFO:cellink.tl.external._sclinker_utils: Already exists, skipping: sclinker_refs/RoadmapUABCannot_regions_to_genes.txt.gz\n", + "[2026-04-08 20:43:33,475] INFO:cellink.tl.external._sclinker_utils: Already exists, skipping: sclinker_refs/AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz\n", + "[2026-04-08 20:43:33,475] INFO:cellink.tl.external._sclinker_utils: Already exists, skipping: sclinker_refs/Roadmap_map_EID_names.txt\n", + "[2026-04-08 20:43:33,476] INFO:cellink.tl.external._sclinker_utils: sc-linker enhancer files in sclinker_refs: ['roadmap', 'abc', 'roadmap_eid']\n", + "{'roadmap': PosixPath('sclinker_refs/RoadmapUABCannot_regions_to_genes.txt.gz'), 'abc': PosixPath('sclinker_refs/AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz'), 'roadmap_eid': PosixPath('sclinker_refs/Roadmap_map_EID_names.txt')}\n" + ] + } + ], + "source": [ + "# Roadmap, ABC, and gene_annotation.txt are downloaded via HTTPS from GCS.\n", + "# Source: https://storage.googleapis.com/broad-alkesgroup-public/LDSCORE/Jagadeesh_Dey_sclinker/extras/\n", + "#\n", + "# Files downloaded per tissue:\n", + "# Roadmap_{tissue}_E.txt.gz\n", + "# ABCpaper_NasserFulcoEngreitz2020_{tissue}_AvgHiC.txt.gz\n", + "# gene_annotation.txt (shared across tissues, downloaded once)\n", + "#\n", + "# Available tissues: BLD BRN GI LNG LIV KID SKIN FAT HRT\n", + "enhancer_refs = download_sclinker_enhancer_links(\n", + " out_dir=\"sclinker_refs\",\n", + " tissue=\"BLD\",\n", + ")\n", + "print(enhancer_refs)" + ] + }, + { + "cell_type": "markdown", + "id": "40640439", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Part 2: Step 1 — Gene programs\n", + "\n", + "### 2a: Cell-type programs" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f06b22f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using 5 cell types: ['CD4 TCM', 'CD4 Naive', 'CD8 TEM', 'NK', 'B naive']\n", + "Cells: 93,886\n", + "Using adata.var['feature_name'] as gene_name\n", + "var_names sample: ['ENSG00000284662', 'ENSG00000229905', 'ENSG00000237491']\n", + "[2026-04-08 20:43:41,867] INFO:cellink.tl.external._sclinker: Running rank_genes_groups for predicted.celltype.l2 (93886 cells after filtering)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "... storing 'donor_id' as categorical\n", + "... storing 'chrom' as categorical\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING: It seems you use rank_genes_groups on the raw count data. Please logarithmize your data before calling rank_genes_groups.\n", + "[2026-04-08 20:44:57,975] INFO:cellink.tl.external._sclinker: Mapped var_names from ENSG IDs to HGNC gene names using adata.var['gene_name']. This is required for matching against Roadmap/ABC TargetGene columns.\n", + "[2026-04-08 20:44:58,615] INFO:cellink.tl.external._sclinker: Saved cell-type program matrices to gene_programs/celltype\n", + "Gene programs: (34073, 5)\n", + "Index sample (should be HGNC): ['C1orf115_ENSG00000162817', 'SCNN1B_ENSG00000168447', 'ENSG00000225794.3', 'TIFAB_ENSG00000255833', 'ANKRD23_ENSG00000163126']\n" + ] + } + ], + "source": [ + "counts = dd.C.obs[\"predicted.celltype.l2\"].value_counts()\n", + "\n", + "TOP_N = 5\n", + "top_celltypes = counts.head(TOP_N).index\n", + "adata = dd.C[dd.C.obs[\"predicted.celltype.l2\"].isin(top_celltypes)].copy()\n", + "print(f\"Using {TOP_N} cell types: {top_celltypes.tolist()}\")\n", + "print(f\"Cells: {adata.n_obs:,}\")\n", + "\n", + "# If var_names are ENSG IDs (e.g. ENSG00000...), the Roadmap/ABC TargetGene\n", + "# column uses HGNC names — they won't match. Add gene_name to adata.var if\n", + "# not already present. The dummy OneK1K dataset uses ENSG IDs as var_names.\n", + "if adata.var_names.str.startswith(\"ENSG\").mean() > 0.5:\n", + " if \"gene_name\" not in adata.var.columns:\n", + " # Try common column names\n", + " for col in [\"gene_names\", \"feature_name\", \"gene_symbol\", \"Symbol\"]:\n", + " if col in adata.var.columns:\n", + " adata.var[\"gene_name\"] = adata.var[col]\n", + " print(f\"Using adata.var['{col}'] as gene_name\")\n", + " break\n", + " else:\n", + " # Last resort: query BioMart (slow, cached after first call)\n", + " print(\"var_names are ENSG IDs — querying BioMart for HGNC mapping ...\")\n", + " from cellink.tl.external import get_gene_annotation\n", + " gene_coord = get_gene_annotation(gene_id_type=\"ensembl\")\n", + " import pandas as _pd\n", + " ensg2hgnc = _pd.read_csv(gene_coord, sep=\" \")[[\"GENE\"]].copy()\n", + " # get_gene_annotation with ensembl gives ENSG→coord, we need ENSG→HGNC\n", + " # Use the HGNC file instead\n", + " hgnc_coord = get_gene_annotation(gene_id_type=\"hgnc\")\n", + " # Build map from ensembl cache that was written alongside hgnc\n", + " from pathlib import Path\n", + " from cellink.resources._utils import get_data_home\n", + " ensembl_f = get_data_home() / \"gene_coord_ensembl.txt\"\n", + " hgnc_f = get_data_home() / \"gene_coord_hgnc.txt\"\n", + " if ensembl_f.exists() and hgnc_f.exists():\n", + " # Both files are 1:1 gene lists in the same order from BioMart\n", + " # Re-query to get the proper ENSG→HGNC map\n", + " from cellink.tl.external._sldsc_utils import _query_biomart_and_write_gene_coords\n", + " _query_biomart_and_write_gene_coords(get_data_home())\n", + " print(\" Done. HGNC names will be applied automatically in compute_celltype_programs.\")\n", + " else:\n", + " print(\"adata.var['gene_name'] already present — will map ENSG→HGNC automatically\")\n", + "\n", + "print(f\"var_names sample: {adata.var_names[:3].tolist()}\")\n", + "\n", + "ct_programs = compute_celltype_programs(\n", + " adata,\n", + " celltype_col=\"predicted.celltype.l2\",\n", + " out_dir=\"gene_programs/celltype\",\n", + " save=True,\n", + ")\n", + "genescores = ct_programs[\"genescores\"]\n", + "print(f\"Gene programs: {genescores.shape}\")\n", + "print(f\"Index sample (should be HGNC): {genescores.index[:5].tolist()}\")" + ] + }, + { + "cell_type": "markdown", + "id": "37a258d4", + "metadata": {}, + "source": [ + "### 2b: Disease-progression programs (requires matched healthy/disease data)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "76d74bab", + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment and adapt when you have an AnnData with both healthy and disease cells.\n", + "#\n", + "# disease_programs = compute_diseaseprogression_programs(\n", + "# adata_disease,\n", + "# celltype_col=\"cell_type\",\n", + "# diagnosis_col=\"disease_status\",\n", + "# healthy_label=\"Healthy\",\n", + "# disease_label=\"Disease\",\n", + "# out_dir=\"gene_programs/disease\",\n", + "# save=True,\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "id": "e59085ae", + "metadata": {}, + "source": [ + "### 2c: NMF cellular process programs" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9e8e3ecf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 16:08:18,220] INFO:cellink.tl.external._sclinker: Setting n_components = 5 cell types + 10 = 15\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 16:08:30,832] INFO:cellink.tl.external._sclinker: Fitting NMF with 15 components on (93886, 34073) matrix\n", + "[2026-04-08 16:09:35,348] INFO:cellink.tl.external._sclinker: torchnmf: using GPU (NVIDIA A100 80GB PCIe)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/lustre/groups/ml01/workspace/lucas.arnoldt/cellink_pgen/lib/python3.12/site-packages/torch/autograd/graph.py:865: UserWarning: Attempting to run cuBLAS, but there was no current CUDA context! Attempting to set the primary context... (Triggered internally at /pytorch/aten/src/ATen/cuda/CublasHandlePool.cpp:330.)\n", + " return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NMF programs: cells=(93886, 15), genes=(34073, 15)\n" + ] + } + ], + "source": [ + "# Backend priority:\n", + "# 1. torchnmf (GPU) — install with: pip install torchnmf\n", + "# device='cuda' → uses GPU if available, warns and falls back to CPU if not\n", + "# device='cpu' → forces CPU even if GPU is present\n", + "# 2. sklearn NMF (init='nndsvda', solver='cd') — always available, slower\n", + "#\n", + "# On a (125k × 34k) matrix torchnmf on GPU is ~10-20x faster than sklearn.\n", + "# If you are on CPU only, set device='cpu' to suppress the CUDA warning.\n", + "W, H, corr = compute_nmf_programs(\n", + " adata,\n", + " celltype_col=\"predicted.celltype.l2\",\n", + " out_dir=\"gene_programs/nmf\",\n", + " save=True,\n", + " device=\"cuda\", # change to \"cpu\" if no GPU available\n", + ")\n", + "nmf_genescores = corr.clip(lower=0)\n", + "print(f\"NMF programs: cells={W.shape}, genes={H.shape}\")" + ] + }, + { + "cell_type": "markdown", + "id": "3ff63070", + "metadata": {}, + "source": [ + "### 2d: AllCoding control program\n", + "\n", + "sc-linker always runs an \"all protein-coding genes\" control alongside every\n", + "program. The E-score subtracts the AllCoding enrichment:\n", + "\n", + " E-score(program, trait) = Enrichment(program) − Enrichment(AllCoding)\n", + "\n", + "We add it as an extra column so it goes through the same annotation and\n", + "LD score pipeline automatically." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "eacde290", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 16:10:39,940] INFO:cellink.tl.external._sclinker_utils: Querying Ensembl BioMart for human gene coordinates ...\n", + "[2026-04-08 16:10:40,485] INFO:cellink.tl.external._sclinker_utils: BioMart returned 78,691 gene entries\n", + "[2026-04-08 16:10:40,813] INFO:cellink.tl.external._sclinker_utils: Wrote 78,691 unique ENSG entries → gene_coord_ensembl.txt\n", + "[2026-04-08 16:10:40,907] INFO:cellink.tl.external._sclinker_utils: Wrote 41,417 unique HGNC entries → gene_coord_hgnc.txt\n", + "[2026-04-08 16:10:40,924] INFO:cellink.tl.external._sclinker_utils: Loaded 41,417 gene coordinates (gene_id_type='hgnc') from gene_coord_hgnc.txt\n", + "Gene annotations: 41,417 unique genes\n", + "Programs including AllCoding: 6 total\n", + "Unique genes: 69,390\n" + ] + } + ], + "source": [ + "gene_annot = load_gene_annotation(gene_id_type=\"hgnc\", refresh=True)\n", + "print(f\"Gene annotations: {len(gene_annot):,} unique genes\")\n", + "\n", + "# AllCoding control: every protein-coding gene gets score 1\n", + "all_coding = pd.DataFrame(\n", + " np.ones(len(gene_annot)),\n", + " index=gene_annot[\"GENE\"].str.upper(),\n", + " columns=[\"AllCoding\"],\n", + ")\n", + "# Drop any duplicate gene names in the AllCoding index before concat\n", + "all_coding = all_coding[~all_coding.index.duplicated(keep=\"first\")]\n", + "\n", + "# Combine cell-type programs + AllCoding; deduplicate index\n", + "all_genescores = pd.concat([genescores, all_coding], axis=1).fillna(0)\n", + "# Drop duplicate rows (same gene appearing twice in either source)\n", + "all_genescores = all_genescores[~all_genescores.index.duplicated(keep=\"first\")]\n", + "print(f\"Programs including AllCoding: {all_genescores.shape[1]} total\")\n", + "print(f\"Unique genes: {all_genescores.shape[0]:,}\")" + ] + }, + { + "cell_type": "markdown", + "id": "77e12974", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Part 3: Step 2 — Gene programs → SNP annotations\n", + "\n", + "This creates **two strategies** per program, matching the sc-linker paper:\n", + "- `ABC_Road_BLD/` — Roadmap ∪ ABC enhancer-gene links, weighted by activity (primary)\n", + "- `100kb/` — gene body ± 100 kb window, uniform weight (secondary)\n", + "\n", + "Requires `bedtools` in `PATH` for correct interval merging." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6b1eaf91", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 16:10:54,921] INFO:cellink.tl.external._sclinker_utils: Loaded 39,160,028 Roadmap links from RoadmapUABCannot_regions_to_genes.txt.gz. Columns: ['chr', 'start', 'end', 'TargetGene', 'tissuename']\n", + "[2026-04-08 16:11:07,696] INFO:cellink.tl.external._sclinker_utils: Filtered to 0 Roadmap rows for tissue=BLD via column 'tissuename' (keywords: ['blood', 'mononuclear', 't cell', 't-cell', 'b cell', 'b-cell', 'nk cell', 'cd4', 'cd8', 'erythro', 'hsc', 'monocyte', 'neutrophil', 'lymph'])\n", + "[2026-04-08 16:11:07,698] WARNING:cellink.tl.external._sclinker_utils: Roadmap tissue filter for tissue='BLD' returned 0 rows. Using ALL rows instead. This usually means the tissuename values don't match the keywords. Check unique tissuename values in the file:\n", + " import pandas as pd; pd.read_csv('sclinker_refs/RoadmapUABCannot_regions_to_genes.txt.gz', sep=',')[['tissuename']].drop_duplicates().head(30)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 16:11:42,097] INFO:cellink.tl.external._sclinker_utils: Loaded 8,923,516 ABC predictions from AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz. Columns: ['chr', 'start', 'end', 'name', 'class', 'activity_base', 'TargetGene', 'TargetGeneTSS', 'TargetGeneExpression', 'TargetGenePromoterActivityQuantile', 'TargetGeneIsExpressed', 'distance', 'isSelfPromoter', 'hic_contact', 'powerlaw_contact', 'powerlaw_contact_reference', 'hic_contact_pl_scaled', 'hic_pseudocount', 'hic_contact_pl_scaled_adj', 'ABC.Score.Numerator', 'ABC.Score', 'powerlaw.Score.Numerator', 'powerlaw.Score', 'CellType']\n", + "[2026-04-08 16:11:46,295] INFO:cellink.tl.external._sclinker_utils: Filtered to 1,463,008 ABC rows for tissue=BLD (keywords: ['blood', 'k562', 'gm12878', 'cd4', 'cd8', 'nk-cell', 'monocyte'])\n", + "[2026-04-08 16:11:46,297] INFO:cellink.tl.external._sclinker_utils: Using cached gene coordinates (hgnc): /home/icb/lucas.arnoldt/cellink_data/gene_coord_hgnc.txt\n", + "[2026-04-08 16:11:46,314] INFO:cellink.tl.external._sclinker_utils: Loaded 41,417 gene coordinates (gene_id_type='hgnc') from gene_coord_hgnc.txt\n", + "[2026-04-08 16:11:46,315] INFO:cellink.tl.external._sclinker_utils: Computing ABC_Road bedgraphs...\n", + "[2026-04-08 16:11:46,316] INFO:cellink.tl.external._sclinker_utils: Auto-detected Roadmap gene column: 'TargetGene'\n", + "[2026-04-08 16:11:46,316] INFO:cellink.tl.external._sclinker_utils: Auto-detected ABC gene column: 'TargetGene'\n", + "[2026-04-08 16:11:51,109] INFO:cellink.tl.external._sclinker_utils: Building Roadmap gene index ...\n", + "[2026-04-08 16:12:02,648] INFO:cellink.tl.external._sclinker_utils: 24,231 unique genes in Roadmap index\n", + "[2026-04-08 16:12:02,649] INFO:cellink.tl.external._sclinker_utils: Building ABC gene index ...\n", + "[2026-04-08 16:12:04,177] INFO:cellink.tl.external._sclinker_utils: 20,916 unique genes in ABC index\n", + "[2026-04-08 16:12:20,487] INFO:cellink.tl.external._sclinker_utils: Computing 100kb bedgraphs...\n", + "[2026-04-08 16:12:25,578] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.1.annot.gz: 8,453 / 779,354 SNPs annotated\n", + "[2026-04-08 16:12:30,514] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.2.annot.gz: 7,093 / 839,590 SNPs annotated\n", + "[2026-04-08 16:12:34,724] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.3.annot.gz: 6,622 / 706,350 SNPs annotated\n", + "[2026-04-08 16:12:38,263] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.4.annot.gz: 3,037 / 729,645 SNPs annotated\n", + "[2026-04-08 16:12:41,790] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.5.annot.gz: 5,266 / 633,015 SNPs annotated\n", + "[2026-04-08 16:12:45,271] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.6.annot.gz: 6,931 / 664,016 SNPs annotated\n", + "[2026-04-08 16:12:48,221] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.7.annot.gz: 3,610 / 589,569 SNPs annotated\n", + "[2026-04-08 16:12:51,016] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.8.annot.gz: 3,014 / 549,971 SNPs annotated\n", + "[2026-04-08 16:12:53,360] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.9.annot.gz: 4,278 / 438,106 SNPs annotated\n", + "[2026-04-08 16:12:56,088] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.10.annot.gz: 4,588 / 510,501 SNPs annotated\n", + "[2026-04-08 16:12:58,511] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.11.annot.gz: 3,173 / 493,922 SNPs annotated\n", + "[2026-04-08 16:13:01,033] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.12.annot.gz: 3,827 / 480,110 SNPs annotated\n", + "[2026-04-08 16:13:03,008] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.13.annot.gz: 3,861 / 366,200 SNPs annotated\n", + "[2026-04-08 16:13:04,556] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.14.annot.gz: 2,172 / 324,698 SNPs annotated\n", + "[2026-04-08 16:13:06,082] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.15.annot.gz: 3,003 / 287,001 SNPs annotated\n", + "[2026-04-08 16:13:07,486] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.16.annot.gz: 1,828 / 316,981 SNPs annotated\n", + "[2026-04-08 16:13:08,808] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.17.annot.gz: 1,965 / 269,222 SNPs annotated\n", + "[2026-04-08 16:13:10,267] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.18.annot.gz: 2,659 / 285,156 SNPs annotated\n", + "[2026-04-08 16:13:11,320] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.19.annot.gz: 1,871 / 232,363 SNPs annotated\n", + "[2026-04-08 16:13:12,423] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.20.annot.gz: 3,059 / 221,626 SNPs annotated\n", + "[2026-04-08 16:13:13,110] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.21.annot.gz: 1,778 / 138,712 SNPs annotated\n", + "[2026-04-08 16:13:13,806] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.22.annot.gz: 2,013 / 141,123 SNPs annotated\n", + "[2026-04-08 16:13:13,809] INFO:cellink.tl.external._sclinker_utils: B naive_L2/ABC_Road_BLD: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.*.annot.gz\n", + "[2026-04-08 16:13:17,134] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.1.annot.gz: 100,165 / 779,354 SNPs annotated\n", + "[2026-04-08 16:13:20,648] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.2.annot.gz: 90,123 / 839,590 SNPs annotated\n", + "[2026-04-08 16:13:23,592] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.3.annot.gz: 80,260 / 706,350 SNPs annotated\n", + "[2026-04-08 16:13:26,507] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.4.annot.gz: 43,495 / 729,645 SNPs annotated\n", + "[2026-04-08 16:13:29,179] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.5.annot.gz: 68,942 / 633,015 SNPs annotated\n", + "[2026-04-08 16:13:31,958] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.6.annot.gz: 86,146 / 664,016 SNPs annotated\n", + "[2026-04-08 16:13:34,421] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.7.annot.gz: 56,979 / 589,569 SNPs annotated\n", + "[2026-04-08 16:13:36,660] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.8.annot.gz: 46,871 / 549,971 SNPs annotated\n", + "[2026-04-08 16:13:38,441] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.9.annot.gz: 37,407 / 438,106 SNPs annotated\n", + "[2026-04-08 16:13:40,547] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.10.annot.gz: 53,274 / 510,501 SNPs annotated\n", + "[2026-04-08 16:13:42,546] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.11.annot.gz: 40,559 / 493,922 SNPs annotated\n", + "[2026-04-08 16:13:44,499] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.12.annot.gz: 44,685 / 480,110 SNPs annotated\n", + "[2026-04-08 16:13:46,040] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.13.annot.gz: 39,023 / 366,200 SNPs annotated\n", + "[2026-04-08 16:13:47,377] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.14.annot.gz: 35,940 / 324,698 SNPs annotated\n", + "[2026-04-08 16:13:48,571] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.15.annot.gz: 36,858 / 287,001 SNPs annotated\n", + "[2026-04-08 16:13:49,913] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.16.annot.gz: 38,014 / 316,981 SNPs annotated\n", + "[2026-04-08 16:13:51,031] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.17.annot.gz: 29,245 / 269,222 SNPs annotated\n", + "[2026-04-08 16:13:52,230] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.18.annot.gz: 22,120 / 285,156 SNPs annotated\n", + "[2026-04-08 16:13:53,233] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.19.annot.gz: 39,485 / 232,363 SNPs annotated\n", + "[2026-04-08 16:13:54,143] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.20.annot.gz: 16,589 / 221,626 SNPs annotated\n", + "[2026-04-08 16:13:54,756] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.21.annot.gz: 16,359 / 138,712 SNPs annotated\n", + "[2026-04-08 16:13:55,394] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.22.annot.gz: 24,828 / 141,123 SNPs annotated\n", + "[2026-04-08 16:13:55,396] INFO:cellink.tl.external._sclinker_utils: B naive_L2/100kb: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.*.annot.gz\n", + "[2026-04-08 16:14:00,310] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.1.annot.gz: 8,984 / 779,354 SNPs annotated\n", + "[2026-04-08 16:14:05,230] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.2.annot.gz: 7,318 / 839,590 SNPs annotated\n", + "[2026-04-08 16:14:09,620] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.3.annot.gz: 8,152 / 706,350 SNPs annotated\n", + "[2026-04-08 16:14:13,487] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.4.annot.gz: 4,953 / 729,645 SNPs annotated\n", + "[2026-04-08 16:14:16,834] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.5.annot.gz: 5,028 / 633,015 SNPs annotated\n", + "[2026-04-08 16:14:20,317] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.6.annot.gz: 6,634 / 664,016 SNPs annotated\n", + "[2026-04-08 16:14:23,456] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.7.annot.gz: 5,148 / 589,569 SNPs annotated\n", + "[2026-04-08 16:14:26,336] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.8.annot.gz: 4,342 / 549,971 SNPs annotated\n", + "[2026-04-08 16:14:28,418] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.9.annot.gz: 2,936 / 438,106 SNPs annotated\n", + "[2026-04-08 16:14:31,222] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.10.annot.gz: 5,270 / 510,501 SNPs annotated\n", + "[2026-04-08 16:14:33,588] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.11.annot.gz: 3,465 / 493,922 SNPs annotated\n", + "[2026-04-08 16:14:36,204] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.12.annot.gz: 3,598 / 480,110 SNPs annotated\n", + "[2026-04-08 16:14:38,144] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.13.annot.gz: 3,842 / 366,200 SNPs annotated\n", + "[2026-04-08 16:14:40,611] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.14.annot.gz: 4,123 / 324,698 SNPs annotated\n", + "[2026-04-08 16:14:42,261] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.15.annot.gz: 3,543 / 287,001 SNPs annotated\n", + "[2026-04-08 16:14:43,878] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.16.annot.gz: 3,288 / 316,981 SNPs annotated\n", + "[2026-04-08 16:14:45,344] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.17.annot.gz: 5,221 / 269,222 SNPs annotated\n", + "[2026-04-08 16:14:46,677] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.18.annot.gz: 2,234 / 285,156 SNPs annotated\n", + "[2026-04-08 16:14:47,775] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.19.annot.gz: 2,227 / 232,363 SNPs annotated\n", + "[2026-04-08 16:14:49,000] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.20.annot.gz: 4,467 / 221,626 SNPs annotated\n", + "[2026-04-08 16:14:49,742] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.21.annot.gz: 4,353 / 138,712 SNPs annotated\n", + "[2026-04-08 16:14:50,430] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.22.annot.gz: 3,160 / 141,123 SNPs annotated\n", + "[2026-04-08 16:14:50,433] INFO:cellink.tl.external._sclinker_utils: CD4 Naive_L2/ABC_Road_BLD: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.*.annot.gz\n", + "[2026-04-08 16:14:53,781] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.1.annot.gz: 107,761 / 779,354 SNPs annotated\n", + "[2026-04-08 16:14:57,573] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.2.annot.gz: 91,129 / 839,590 SNPs annotated\n", + "[2026-04-08 16:15:00,576] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.3.annot.gz: 92,609 / 706,350 SNPs annotated\n", + "[2026-04-08 16:15:03,556] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.4.annot.gz: 58,310 / 729,645 SNPs annotated\n", + "[2026-04-08 16:15:06,187] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.5.annot.gz: 76,964 / 633,015 SNPs annotated\n", + "[2026-04-08 16:15:08,994] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.6.annot.gz: 91,884 / 664,016 SNPs annotated\n", + "[2026-04-08 16:15:11,433] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.7.annot.gz: 55,833 / 589,569 SNPs annotated\n", + "[2026-04-08 16:15:13,735] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.8.annot.gz: 55,720 / 549,971 SNPs annotated\n", + "[2026-04-08 16:15:15,554] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.9.annot.gz: 37,332 / 438,106 SNPs annotated\n", + "[2026-04-08 16:15:17,715] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.10.annot.gz: 67,217 / 510,501 SNPs annotated\n", + "[2026-04-08 16:15:19,789] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.11.annot.gz: 56,279 / 493,922 SNPs annotated\n", + "[2026-04-08 16:15:21,795] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.12.annot.gz: 56,987 / 480,110 SNPs annotated\n", + "[2026-04-08 16:15:23,346] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.13.annot.gz: 40,504 / 366,200 SNPs annotated\n", + "[2026-04-08 16:15:24,703] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.14.annot.gz: 45,277 / 324,698 SNPs annotated\n", + "[2026-04-08 16:15:26,010] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.15.annot.gz: 52,143 / 287,001 SNPs annotated\n", + "[2026-04-08 16:15:27,357] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.16.annot.gz: 42,638 / 316,981 SNPs annotated\n", + "[2026-04-08 16:15:28,531] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.17.annot.gz: 47,743 / 269,222 SNPs annotated\n", + "[2026-04-08 16:15:29,725] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.18.annot.gz: 25,391 / 285,156 SNPs annotated\n", + "[2026-04-08 16:15:30,738] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.19.annot.gz: 38,850 / 232,363 SNPs annotated\n", + "[2026-04-08 16:15:31,660] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.20.annot.gz: 25,489 / 221,626 SNPs annotated\n", + "[2026-04-08 16:15:32,300] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.21.annot.gz: 32,247 / 138,712 SNPs annotated\n", + "[2026-04-08 16:15:32,936] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.22.annot.gz: 34,162 / 141,123 SNPs annotated\n", + "[2026-04-08 16:15:32,939] INFO:cellink.tl.external._sclinker_utils: CD4 Naive_L2/100kb: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.*.annot.gz\n", + "[2026-04-08 16:15:37,967] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.1.annot.gz: 9,833 / 779,354 SNPs annotated\n", + "[2026-04-08 16:15:43,381] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.2.annot.gz: 8,069 / 839,590 SNPs annotated\n", + "[2026-04-08 16:15:48,413] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.3.annot.gz: 10,845 / 706,350 SNPs annotated\n", + "[2026-04-08 16:15:52,303] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.4.annot.gz: 3,833 / 729,645 SNPs annotated\n", + "[2026-04-08 16:15:56,109] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.5.annot.gz: 7,872 / 633,015 SNPs annotated\n", + "[2026-04-08 16:15:59,912] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.6.annot.gz: 8,695 / 664,016 SNPs annotated\n", + "[2026-04-08 16:16:03,216] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.7.annot.gz: 4,952 / 589,569 SNPs annotated\n", + "[2026-04-08 16:16:06,193] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.8.annot.gz: 4,545 / 549,971 SNPs annotated\n", + "[2026-04-08 16:16:08,404] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.9.annot.gz: 3,800 / 438,106 SNPs annotated\n", + "[2026-04-08 16:16:11,235] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.10.annot.gz: 6,194 / 510,501 SNPs annotated\n", + "[2026-04-08 16:16:13,706] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.11.annot.gz: 3,721 / 493,922 SNPs annotated\n", + "[2026-04-08 16:16:16,268] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.12.annot.gz: 4,537 / 480,110 SNPs annotated\n", + "[2026-04-08 16:16:18,450] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.13.annot.gz: 5,318 / 366,200 SNPs annotated\n", + "[2026-04-08 16:16:20,042] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.14.annot.gz: 3,634 / 324,698 SNPs annotated\n", + "[2026-04-08 16:16:21,552] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.15.annot.gz: 3,864 / 287,001 SNPs annotated\n", + "[2026-04-08 16:16:23,177] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.16.annot.gz: 4,058 / 316,981 SNPs annotated\n", + "[2026-04-08 16:16:24,742] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.17.annot.gz: 6,103 / 269,222 SNPs annotated\n", + "[2026-04-08 16:16:26,202] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.18.annot.gz: 3,298 / 285,156 SNPs annotated\n", + "[2026-04-08 16:16:27,286] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.19.annot.gz: 2,507 / 232,363 SNPs annotated\n", + "[2026-04-08 16:16:28,542] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.20.annot.gz: 4,871 / 221,626 SNPs annotated\n", + "[2026-04-08 16:16:29,234] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.21.annot.gz: 3,467 / 138,712 SNPs annotated\n", + "[2026-04-08 16:16:29,943] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.22.annot.gz: 3,513 / 141,123 SNPs annotated\n", + "[2026-04-08 16:16:29,946] INFO:cellink.tl.external._sclinker_utils: CD4 TCM_L2/ABC_Road_BLD: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.*.annot.gz\n", + "[2026-04-08 16:16:33,309] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.1.annot.gz: 112,026 / 779,354 SNPs annotated\n", + "[2026-04-08 16:16:36,927] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.2.annot.gz: 124,896 / 839,590 SNPs annotated\n", + "[2026-04-08 16:16:39,988] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.3.annot.gz: 127,883 / 706,350 SNPs annotated\n", + "[2026-04-08 16:16:43,030] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.4.annot.gz: 70,209 / 729,645 SNPs annotated\n", + "[2026-04-08 16:16:45,724] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.5.annot.gz: 86,381 / 633,015 SNPs annotated\n", + "[2026-04-08 16:16:48,563] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.6.annot.gz: 98,575 / 664,016 SNPs annotated\n", + "[2026-04-08 16:16:51,058] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.7.annot.gz: 73,803 / 589,569 SNPs annotated\n", + "[2026-04-08 16:16:53,373] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.8.annot.gz: 75,046 / 549,971 SNPs annotated\n", + "[2026-04-08 16:16:55,192] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.9.annot.gz: 46,906 / 438,106 SNPs annotated\n", + "[2026-04-08 16:16:57,358] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.10.annot.gz: 71,672 / 510,501 SNPs annotated\n", + "[2026-04-08 16:16:59,460] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.11.annot.gz: 76,931 / 493,922 SNPs annotated\n", + "[2026-04-08 16:17:01,510] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.12.annot.gz: 60,229 / 480,110 SNPs annotated\n", + "[2026-04-08 16:17:03,057] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.13.annot.gz: 56,141 / 366,200 SNPs annotated\n", + "[2026-04-08 16:17:04,455] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.14.annot.gz: 52,025 / 324,698 SNPs annotated\n", + "[2026-04-08 16:17:05,714] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.15.annot.gz: 53,708 / 287,001 SNPs annotated\n", + "[2026-04-08 16:17:07,104] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.16.annot.gz: 56,127 / 316,981 SNPs annotated\n", + "[2026-04-08 16:17:08,299] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.17.annot.gz: 56,284 / 269,222 SNPs annotated\n", + "[2026-04-08 16:17:09,534] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.18.annot.gz: 38,733 / 285,156 SNPs annotated\n", + "[2026-04-08 16:17:10,574] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.19.annot.gz: 54,715 / 232,363 SNPs annotated\n", + "[2026-04-08 16:17:11,492] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.20.annot.gz: 24,369 / 221,626 SNPs annotated\n", + "[2026-04-08 16:17:12,092] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.21.annot.gz: 24,850 / 138,712 SNPs annotated\n", + "[2026-04-08 16:17:12,739] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.22.annot.gz: 35,719 / 141,123 SNPs annotated\n", + "[2026-04-08 16:17:12,741] INFO:cellink.tl.external._sclinker_utils: CD4 TCM_L2/100kb: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.*.annot.gz\n", + "[2026-04-08 16:17:17,034] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.1.annot.gz: 4,835 / 779,354 SNPs annotated\n", + "[2026-04-08 16:17:22,031] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.2.annot.gz: 7,082 / 839,590 SNPs annotated\n", + "[2026-04-08 16:17:25,796] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.3.annot.gz: 4,861 / 706,350 SNPs annotated\n", + "[2026-04-08 16:17:29,160] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.4.annot.gz: 2,559 / 729,645 SNPs annotated\n", + "[2026-04-08 16:17:32,432] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.5.annot.gz: 3,972 / 633,015 SNPs annotated\n", + "[2026-04-08 16:17:35,862] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.6.annot.gz: 7,186 / 664,016 SNPs annotated\n", + "[2026-04-08 16:17:39,010] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.7.annot.gz: 4,527 / 589,569 SNPs annotated\n", + "[2026-04-08 16:17:41,614] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.8.annot.gz: 2,512 / 549,971 SNPs annotated\n", + "[2026-04-08 16:17:43,772] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.9.annot.gz: 3,126 / 438,106 SNPs annotated\n", + "[2026-04-08 16:17:46,532] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.10.annot.gz: 5,326 / 510,501 SNPs annotated\n", + "[2026-04-08 16:17:48,879] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.11.annot.gz: 3,012 / 493,922 SNPs annotated\n", + "[2026-04-08 16:17:51,169] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.12.annot.gz: 2,798 / 480,110 SNPs annotated\n", + "[2026-04-08 16:17:52,965] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.13.annot.gz: 2,288 / 366,200 SNPs annotated\n", + "[2026-04-08 16:17:54,344] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.14.annot.gz: 1,580 / 324,698 SNPs annotated\n", + "[2026-04-08 16:17:55,703] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.15.annot.gz: 1,770 / 287,001 SNPs annotated\n", + "[2026-04-08 16:17:57,287] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.16.annot.gz: 4,285 / 316,981 SNPs annotated\n", + "[2026-04-08 16:17:58,601] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.17.annot.gz: 3,336 / 269,222 SNPs annotated\n", + "[2026-04-08 16:17:59,814] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.18.annot.gz: 1,321 / 285,156 SNPs annotated\n", + "[2026-04-08 16:18:00,826] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.19.annot.gz: 1,852 / 232,363 SNPs annotated\n", + "[2026-04-08 16:18:01,868] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.20.annot.gz: 2,387 / 221,626 SNPs annotated\n", + "[2026-04-08 16:18:02,512] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.21.annot.gz: 2,046 / 138,712 SNPs annotated\n", + "[2026-04-08 16:18:03,175] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.22.annot.gz: 2,080 / 141,123 SNPs annotated\n", + "[2026-04-08 16:18:03,177] INFO:cellink.tl.external._sclinker_utils: CD8 TEM_L2/ABC_Road_BLD: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.*.annot.gz\n", + "[2026-04-08 16:18:06,477] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.1.annot.gz: 80,760 / 779,354 SNPs annotated\n", + "[2026-04-08 16:18:10,027] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.2.annot.gz: 94,928 / 839,590 SNPs annotated\n", + "[2026-04-08 16:18:12,934] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.3.annot.gz: 58,376 / 706,350 SNPs annotated\n", + "[2026-04-08 16:18:15,838] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.4.annot.gz: 44,437 / 729,645 SNPs annotated\n", + "[2026-04-08 16:18:18,445] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.5.annot.gz: 59,293 / 633,015 SNPs annotated\n", + "[2026-04-08 16:18:21,188] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.6.annot.gz: 69,647 / 664,016 SNPs annotated\n", + "[2026-04-08 16:18:23,670] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.7.annot.gz: 59,709 / 589,569 SNPs annotated\n", + "[2026-04-08 16:18:25,921] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.8.annot.gz: 50,875 / 549,971 SNPs annotated\n", + "[2026-04-08 16:18:27,695] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.9.annot.gz: 30,761 / 438,106 SNPs annotated\n", + "[2026-04-08 16:18:29,824] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.10.annot.gz: 55,085 / 510,501 SNPs annotated\n", + "[2026-04-08 16:18:31,861] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.11.annot.gz: 43,642 / 493,922 SNPs annotated\n", + "[2026-04-08 16:18:33,896] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.12.annot.gz: 55,263 / 480,110 SNPs annotated\n", + "[2026-04-08 16:18:35,389] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.13.annot.gz: 34,143 / 366,200 SNPs annotated\n", + "[2026-04-08 16:18:36,772] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.14.annot.gz: 45,757 / 324,698 SNPs annotated\n", + "[2026-04-08 16:18:37,983] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.15.annot.gz: 28,207 / 287,001 SNPs annotated\n", + "[2026-04-08 16:18:39,317] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.16.annot.gz: 38,463 / 316,981 SNPs annotated\n", + "[2026-04-08 16:18:40,520] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.17.annot.gz: 42,201 / 269,222 SNPs annotated\n", + "[2026-04-08 16:18:41,685] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.18.annot.gz: 21,540 / 285,156 SNPs annotated\n", + "[2026-04-08 16:18:42,682] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.19.annot.gz: 38,485 / 232,363 SNPs annotated\n", + "[2026-04-08 16:18:43,617] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.20.annot.gz: 25,140 / 221,626 SNPs annotated\n", + "[2026-04-08 16:18:44,202] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.21.annot.gz: 19,354 / 138,712 SNPs annotated\n", + "[2026-04-08 16:18:44,886] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.22.annot.gz: 32,785 / 141,123 SNPs annotated\n", + "[2026-04-08 16:18:44,888] INFO:cellink.tl.external._sclinker_utils: CD8 TEM_L2/100kb: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.*.annot.gz\n", + "[2026-04-08 16:18:49,696] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.1.annot.gz: 8,595 / 779,354 SNPs annotated\n", + "[2026-04-08 16:18:54,781] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.2.annot.gz: 7,941 / 839,590 SNPs annotated\n", + "[2026-04-08 16:18:59,109] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.3.annot.gz: 7,788 / 706,350 SNPs annotated\n", + "[2026-04-08 16:19:02,955] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.4.annot.gz: 4,554 / 729,645 SNPs annotated\n", + "[2026-04-08 16:19:06,317] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.5.annot.gz: 4,666 / 633,015 SNPs annotated\n", + "[2026-04-08 16:19:09,459] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.6.annot.gz: 4,576 / 664,016 SNPs annotated\n", + "[2026-04-08 16:19:12,778] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.7.annot.gz: 6,079 / 589,569 SNPs annotated\n", + "[2026-04-08 16:19:15,512] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.8.annot.gz: 3,387 / 549,971 SNPs annotated\n", + "[2026-04-08 16:19:17,663] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.9.annot.gz: 4,021 / 438,106 SNPs annotated\n", + "[2026-04-08 16:19:20,402] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.10.annot.gz: 4,899 / 510,501 SNPs annotated\n", + "[2026-04-08 16:19:22,765] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.11.annot.gz: 3,073 / 493,922 SNPs annotated\n", + "[2026-04-08 16:19:25,075] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.12.annot.gz: 3,451 / 480,110 SNPs annotated\n", + "[2026-04-08 16:19:26,812] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.13.annot.gz: 2,492 / 366,200 SNPs annotated\n", + "[2026-04-08 16:19:28,307] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.14.annot.gz: 1,980 / 324,698 SNPs annotated\n", + "[2026-04-08 16:19:29,654] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.15.annot.gz: 2,117 / 287,001 SNPs annotated\n", + "[2026-04-08 16:19:31,316] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.16.annot.gz: 4,606 / 316,981 SNPs annotated\n", + "[2026-04-08 16:19:32,687] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.17.annot.gz: 3,488 / 269,222 SNPs annotated\n", + "[2026-04-08 16:19:34,036] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.18.annot.gz: 2,428 / 285,156 SNPs annotated\n", + "[2026-04-08 16:19:35,131] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.19.annot.gz: 2,454 / 232,363 SNPs annotated\n", + "[2026-04-08 16:19:36,175] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.20.annot.gz: 2,318 / 221,626 SNPs annotated\n", + "[2026-04-08 16:19:36,880] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.21.annot.gz: 3,042 / 138,712 SNPs annotated\n", + "[2026-04-08 16:19:37,505] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.22.annot.gz: 2,066 / 141,123 SNPs annotated\n", + "[2026-04-08 16:19:37,507] INFO:cellink.tl.external._sclinker_utils: NK_L2/ABC_Road_BLD: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.*.annot.gz\n", + "[2026-04-08 16:19:40,918] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.1.annot.gz: 107,789 / 779,354 SNPs annotated\n", + "[2026-04-08 16:19:44,474] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.2.annot.gz: 117,329 / 839,590 SNPs annotated\n", + "[2026-04-08 16:19:47,383] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.3.annot.gz: 67,988 / 706,350 SNPs annotated\n", + "[2026-04-08 16:19:50,368] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.4.annot.gz: 65,378 / 729,645 SNPs annotated\n", + "[2026-04-08 16:19:52,997] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.5.annot.gz: 66,878 / 633,015 SNPs annotated\n", + "[2026-04-08 16:19:55,783] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.6.annot.gz: 76,936 / 664,016 SNPs annotated\n", + "[2026-04-08 16:19:58,250] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.7.annot.gz: 62,765 / 589,569 SNPs annotated\n", + "[2026-04-08 16:20:00,526] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.8.annot.gz: 58,020 / 549,971 SNPs annotated\n", + "[2026-04-08 16:20:02,348] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.9.annot.gz: 49,163 / 438,106 SNPs annotated\n", + "[2026-04-08 16:20:04,517] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.10.annot.gz: 64,019 / 510,501 SNPs annotated\n", + "[2026-04-08 16:20:06,656] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.11.annot.gz: 42,444 / 493,922 SNPs annotated\n", + "[2026-04-08 16:20:08,663] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.12.annot.gz: 62,487 / 480,110 SNPs annotated\n", + "[2026-04-08 16:20:10,157] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.13.annot.gz: 26,983 / 366,200 SNPs annotated\n", + "[2026-04-08 16:20:11,522] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.14.annot.gz: 46,483 / 324,698 SNPs annotated\n", + "[2026-04-08 16:20:12,757] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.15.annot.gz: 41,981 / 287,001 SNPs annotated\n", + "[2026-04-08 16:20:14,107] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.16.annot.gz: 49,727 / 316,981 SNPs annotated\n", + "[2026-04-08 16:20:15,473] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.17.annot.gz: 45,536 / 269,222 SNPs annotated\n", + "[2026-04-08 16:20:16,637] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.18.annot.gz: 23,722 / 285,156 SNPs annotated\n", + "[2026-04-08 16:20:17,680] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.19.annot.gz: 52,862 / 232,363 SNPs annotated\n", + "[2026-04-08 16:20:18,615] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.20.annot.gz: 29,540 / 221,626 SNPs annotated\n", + "[2026-04-08 16:20:19,215] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.21.annot.gz: 23,790 / 138,712 SNPs annotated\n", + "[2026-04-08 16:20:19,844] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.22.annot.gz: 32,099 / 141,123 SNPs annotated\n", + "[2026-04-08 16:20:19,846] INFO:cellink.tl.external._sclinker_utils: NK_L2/100kb: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.*.annot.gz\n", + "[2026-04-08 16:20:47,495] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.1.annot.gz: 124,790 / 779,354 SNPs annotated\n", + "[2026-04-08 16:21:13,667] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.2.annot.gz: 105,183 / 839,590 SNPs annotated\n", + "[2026-04-08 16:21:32,357] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.3.annot.gz: 83,926 / 706,350 SNPs annotated\n", + "[2026-04-08 16:21:48,045] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.4.annot.gz: 68,465 / 729,645 SNPs annotated\n", + "[2026-04-08 16:22:01,870] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.5.annot.gz: 69,503 / 633,015 SNPs annotated\n", + "[2026-04-08 16:22:17,268] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.6.annot.gz: 87,642 / 664,016 SNPs annotated\n", + "[2026-04-08 16:22:29,717] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.7.annot.gz: 67,574 / 589,569 SNPs annotated\n", + "[2026-04-08 16:22:40,401] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.8.annot.gz: 61,199 / 549,971 SNPs annotated\n", + "[2026-04-08 16:22:48,183] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.9.annot.gz: 57,907 / 438,106 SNPs annotated\n", + "[2026-04-08 16:22:58,721] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.10.annot.gz: 67,840 / 510,501 SNPs annotated\n", + "[2026-04-08 16:23:09,365] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.11.annot.gz: 70,997 / 493,922 SNPs annotated\n", + "[2026-04-08 16:23:19,808] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.12.annot.gz: 66,479 / 480,110 SNPs annotated\n", + "[2026-04-08 16:23:24,810] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.13.annot.gz: 34,133 / 366,200 SNPs annotated\n", + "[2026-04-08 16:23:29,638] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.14.annot.gz: 41,454 / 324,698 SNPs annotated\n", + "[2026-04-08 16:23:34,658] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.15.annot.gz: 44,777 / 287,001 SNPs annotated\n", + "[2026-04-08 16:23:39,767] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.16.annot.gz: 52,346 / 316,981 SNPs annotated\n", + "[2026-04-08 16:23:44,957] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.17.annot.gz: 60,832 / 269,222 SNPs annotated\n", + "[2026-04-08 16:23:48,509] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.18.annot.gz: 30,610 / 285,156 SNPs annotated\n", + "[2026-04-08 16:23:52,687] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.19.annot.gz: 57,930 / 232,363 SNPs annotated\n", + "[2026-04-08 16:23:55,977] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.20.annot.gz: 39,748 / 221,626 SNPs annotated\n", + "[2026-04-08 16:23:57,341] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.21.annot.gz: 19,618 / 138,712 SNPs annotated\n", + "[2026-04-08 16:23:59,125] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.22.annot.gz: 32,438 / 141,123 SNPs annotated\n", + "[2026-04-08 16:23:59,127] INFO:cellink.tl.external._sclinker_utils: AllCoding/ABC_Road_BLD: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.*.annot.gz\n", + "[2026-04-08 16:24:03,577] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.1.annot.gz: 724,640 / 779,354 SNPs annotated\n", + "[2026-04-08 16:24:08,352] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.2.annot.gz: 823,850 / 839,590 SNPs annotated\n", + "[2026-04-08 16:24:12,251] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.3.annot.gz: 639,094 / 706,350 SNPs annotated\n", + "[2026-04-08 16:24:16,350] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.4.annot.gz: 729,404 / 729,645 SNPs annotated\n", + "[2026-04-08 16:24:19,908] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.5.annot.gz: 620,333 / 633,015 SNPs annotated\n", + "[2026-04-08 16:24:23,654] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.6.annot.gz: 645,660 / 664,016 SNPs annotated\n", + "[2026-04-08 16:24:26,871] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.7.annot.gz: 522,237 / 589,569 SNPs annotated\n", + "[2026-04-08 16:24:29,929] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.8.annot.gz: 510,726 / 549,971 SNPs annotated\n", + "[2026-04-08 16:24:32,410] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.9.annot.gz: 381,850 / 438,106 SNPs annotated\n", + "[2026-04-08 16:24:35,310] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.10.annot.gz: 461,100 / 510,501 SNPs annotated\n", + "[2026-04-08 16:24:38,075] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.11.annot.gz: 448,908 / 493,922 SNPs annotated\n", + "[2026-04-08 16:24:40,755] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.12.annot.gz: 457,954 / 480,110 SNPs annotated\n", + "[2026-04-08 16:24:42,884] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.13.annot.gz: 366,200 / 366,200 SNPs annotated\n", + "[2026-04-08 16:24:44,689] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.14.annot.gz: 290,248 / 324,698 SNPs annotated\n", + "[2026-04-08 16:24:46,334] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.15.annot.gz: 287,001 / 287,001 SNPs annotated\n", + "[2026-04-08 16:24:48,156] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.16.annot.gz: 291,139 / 316,981 SNPs annotated\n", + "[2026-04-08 16:24:49,665] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.17.annot.gz: 252,680 / 269,222 SNPs annotated\n", + "[2026-04-08 16:24:51,273] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.18.annot.gz: 281,995 / 285,156 SNPs annotated\n", + "[2026-04-08 16:24:52,656] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.19.annot.gz: 228,546 / 232,363 SNPs annotated\n", + "[2026-04-08 16:24:53,896] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.20.annot.gz: 213,877 / 221,626 SNPs annotated\n", + "[2026-04-08 16:24:54,764] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.21.annot.gz: 138,712 / 138,712 SNPs annotated\n", + "[2026-04-08 16:24:55,569] INFO:cellink.tl.external._sclinker_utils: Wrote /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.22.annot.gz: 141,123 / 141,123 SNPs annotated\n", + "[2026-04-08 16:24:55,571] INFO:cellink.tl.external._sclinker_utils: AllCoding/100kb: annotations at /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.*.annot.gz\n", + "['B naive_L2', 'CD4 Naive_L2', 'CD4 TCM_L2']\n" + ] + } + ], + "source": [ + "# Pass tissue= to filter the combined file to the relevant tissue.\n", + "# Pass eid_map_file= so Roadmap EIDs are mapped correctly.\n", + "roadmap = load_roadmap_links(\n", + " enhancer_refs[\"roadmap\"],\n", + " tissue=\"BLD\",\n", + " eid_map_file=enhancer_refs[\"roadmap_eid\"],\n", + ")\n", + "abc = load_abc_links(\n", + " enhancer_refs[\"abc\"],\n", + " tissue=\"BLD\",\n", + ")\n", + "# cellink auto-locates ENSG_coord.txt (container, cache, or downloads from GitHub)\n", + "gene_annot = load_gene_annotation()\n", + "\n", + "annotation_prefixes = genescores_to_annotations(\n", + " genescores=all_genescores,\n", + " roadmap_links=roadmap,\n", + " abc_links=abc,\n", + " gene_annotation=gene_annot,\n", + " bim_prefix=bim_prefix,\n", + " out_dir=\"annotations\",\n", + " tissue=\"BLD\",\n", + " chromosomes=list(range(1, 23)),\n", + " save_bedgraphs=True,\n", + " use_bedtools=True,\n", + ")\n", + "print(list(annotation_prefixes.keys())[:3])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "00f79a25", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'B naive_L2': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.'},\n", + " 'CD4 Naive_L2': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.'},\n", + " 'CD4 TCM_L2': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.'},\n", + " 'CD8 TEM_L2': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.'},\n", + " 'NK_L2': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.'},\n", + " 'AllCoding': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.'}}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "annotation_prefixes" + ] + }, + { + "cell_type": "markdown", + "id": "cdfc05a9", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Part 4: Step 3a — Compute LD scores\n", + "\n", + "One `.l2.ldscore.gz` file per chromosome per program per strategy.\n", + "This loops over all (program, strategy, chromosome) combinations and calls\n", + "`ldsc.py --l2` via the existing cellink `compute_ld_scores_with_annotations_from_bimfile`." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "681bce07", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 16:24:56,926] INFO:cellink.tl.external._sclinker_utils: Computing LD scores: 264 jobs (6 programs × 2 strategies × 22 chromosomes), n_jobs=28\n", + "[2026-04-08 16:24:56,931] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,960] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,973] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,974] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,975] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,976] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,977] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,978] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,978] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,979] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,979] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,980] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,981] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.2.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,981] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.1.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,981] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.5.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,981] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.6.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,981] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,982] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,982] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.3.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,982] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,983] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.4.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,983] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.7.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,983] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:56,984] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,000] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,001] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.9.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,001] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,002] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,003] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,031] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,033] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,033] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.11.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,033] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.8.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,033] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,033] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.10.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,033] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,034] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,034] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.12.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,035] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,035] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,040] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.14.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,041] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.13.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,042] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.15.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,045] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.16.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,046] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.18.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,046] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.17.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,049] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.21.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,050] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.19.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,050] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.20.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,050] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.22.annot.gz --out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,052] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.1.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,056] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.2.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,058] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.3.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,059] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.4.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,060] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.5.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:24:57,061] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.6.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 16:26:14,305] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 0.6063\n", + "std 0.1414 3.4633\n", + "min 0.0051 -0.2929\n", + "25% 0.1186 0.0000\n", + "50% 0.2321 0.0001\n", + "75% 0.3650 0.0167\n", + "max 0.5000 79.2382\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0378\n", + "L2 -0.0378 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1063.4008\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0000\n", + "mean 0.0077\n", + "std 0.3048\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 27.6579\n", + "Analysis finished at Wed Apr 8 16:26:13 2026\n", + "Total time elapsed: 1.0m:12.79s\n", + "\n", + "[2026-04-08 16:26:14,409] INFO:cellink.tl.external._sclinker_utils: [1/264] done: B naive_L2/ABC_Road_BLD/chr21\n", + "[2026-04-08 16:26:14,409] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:26:14,419] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.7.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:26:35,342] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 1.0562\n", + "std 0.1435 13.1895\n", + "min 0.0051 -3.9612\n", + "25% 0.1094 0.0000\n", + "50% 0.2280 0.0004\n", + "75% 0.3579 0.0278\n", + "max 0.5000 514.1584\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0131\n", + "L2 0.0131 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3658.6591\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0000\n", + "mean 0.0259\n", + "std 1.3313\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 124.3109\n", + "Analysis finished at Wed Apr 8 16:26:35 2026\n", + "Total time elapsed: 1.0m:33.93s\n", + "\n", + "[2026-04-08 16:26:35,345] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:26:35,346] INFO:cellink.tl.external._sclinker_utils: [2/264] done: B naive_L2/ABC_Road_BLD/chr22\n", + "[2026-04-08 16:26:35,348] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.8.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:26:36,020] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 0.4167\n", + "std 0.1444 5.2074\n", + "min 0.0051 -0.3726\n", + "25% 0.1145 0.0000\n", + "50% 0.2352 0.0000\n", + "75% 0.3681 0.0011\n", + "max 0.5000 112.8975\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0268\n", + "L2 0.0268 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 767.4959\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 0.0029\n", + "std 0.0965\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 7.8277\n", + "Analysis finished at Wed Apr 8 16:26:35 2026\n", + "Total time elapsed: 1.0m:34.66s\n", + "\n", + "[2026-04-08 16:26:36,022] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:26:36,022] INFO:cellink.tl.external._sclinker_utils: [3/264] done: B naive_L2/ABC_Road_BLD/chr17\n", + "[2026-04-08 16:26:36,030] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.9.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:26:40,950] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 5.4326\n", + "std 0.1427 53.7027\n", + "min 0.0051 -5.1547\n", + "25% 0.1074 0.0000\n", + "50% 0.2229 0.0000\n", + "75% 0.3558 0.0090\n", + "max 0.5000 1277.5235\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0003\n", + "L2 -0.0003 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 13241.1358\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 0.0597\n", + "std 1.9440\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 162.0000\n", + "Analysis finished at Wed Apr 8 16:26:40 2026\n", + "Total time elapsed: 1.0m:39.6s\n", + "\n", + "[2026-04-08 16:26:40,951] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:26:40,951] INFO:cellink.tl.external._sclinker_utils: [4/264] done: B naive_L2/ABC_Road_BLD/chr20\n", + "[2026-04-08 16:26:40,954] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.10.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:27:12,033] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 0.0402\n", + "std 0.1423 0.2253\n", + "min 0.0051 -0.0366\n", + "25% 0.1115 0.0000\n", + "50% 0.2260 0.0000\n", + "75% 0.3569 0.0006\n", + "max 0.5000 5.5059\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0347\n", + "L2 0.0347 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 184.2945\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 0.0006\n", + "std 0.0165\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.7076\n", + "Analysis finished at Wed Apr 8 16:27:11 2026\n", + "Total time elapsed: 2.0m:10.52s\n", + "\n", + "[2026-04-08 16:27:12,035] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:27:12,035] INFO:cellink.tl.external._sclinker_utils: [5/264] done: B naive_L2/ABC_Road_BLD/chr18\n", + "[2026-04-08 16:27:12,050] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.11.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:27:29,317] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 17.0559\n", + "std 0.1424 206.9853\n", + "min 0.0051 -54.9897\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3507 0.0013\n", + "max 0.5000 6688.4476\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0254\n", + "L2 0.0254 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 77835.8722\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 0.2397\n", + "std 13.6097\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1107.1987\n", + "Analysis finished at Wed Apr 8 16:27:29 2026\n", + "Total time elapsed: 2.0m:27.9s\n", + "\n", + "[2026-04-08 16:27:29,319] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:27:29,322] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.12.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:27:29,319] INFO:cellink.tl.external._sclinker_utils: [6/264] done: B naive_L2/ABC_Road_BLD/chr14\n", + "[2026-04-08 16:27:40,975] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 0.4810\n", + "std 0.1426 4.5539\n", + "min 0.0051 -0.7103\n", + "25% 0.1145 0.0000\n", + "50% 0.2311 0.0000\n", + "75% 0.3599 0.0000\n", + "max 0.5000 177.7965\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0235\n", + "L2 0.0235 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1659.8564\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 0.0052\n", + "std 0.1981\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 17.9475\n", + "Analysis finished at Wed Apr 8 16:27:40 2026\n", + "Total time elapsed: 2.0m:39.59s\n", + "\n", + "[2026-04-08 16:27:40,978] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:27:40,979] INFO:cellink.tl.external._sclinker_utils: [7/264] done: B naive_L2/ABC_Road_BLD/chr16\n", + "[2026-04-08 16:27:40,987] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.13.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:27:47,340] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 3.6556\n", + "std 0.1441 24.8351\n", + "min 0.0051 -2.7568\n", + "25% 0.1104 0.0000\n", + "50% 0.2270 0.0000\n", + "75% 0.3640 0.0791\n", + "max 0.5000 486.0636\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.00 0.04\n", + "L2 0.04 1.00\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7225.6845\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0000\n", + "mean 0.0252\n", + "std 1.0387\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 128.0581\n", + "Analysis finished at Wed Apr 8 16:27:47 2026\n", + "Total time elapsed: 2.0m:45.91s\n", + "\n", + "[2026-04-08 16:27:47,344] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:27:47,345] INFO:cellink.tl.external._sclinker_utils: [8/264] done: B naive_L2/ABC_Road_BLD/chr15\n", + "[2026-04-08 16:27:47,351] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.14.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:28:11,362] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 8.9466\n", + "std 0.1420 61.5078\n", + "min 0.0051 -7.3827\n", + "25% 0.1145 0.0000\n", + "50% 0.2342 0.0005\n", + "75% 0.3569 0.9133\n", + "max 0.5000 1831.2817\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0252\n", + "L2 0.0252 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 14783.8088\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 0.0636\n", + "std 2.0985\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 127.6945\n", + "Analysis finished at Wed Apr 8 16:28:11 2026\n", + "Total time elapsed: 3.0m:9.93s\n", + "\n", + "[2026-04-08 16:28:11,371] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:28:11,372] INFO:cellink.tl.external._sclinker_utils: [9/264] done: B naive_L2/ABC_Road_BLD/chr19\n", + "[2026-04-08 16:28:11,378] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.15.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:29:02,621] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 4.9895\n", + "std 0.1423 105.7200\n", + "min 0.0051 -40.8628\n", + "25% 0.1115 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3569 0.0049\n", + "max 0.5000 8509.9521\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0087\n", + "L2 0.0087 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 36342.7538\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 0.0830\n", + "std 4.6689\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 379.0000\n", + "Analysis finished at Wed Apr 8 16:29:02 2026\n", + "Total time elapsed: 4.0m:1.14s\n", + "\n", + "[2026-04-08 16:29:02,622] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:29:02,622] INFO:cellink.tl.external._sclinker_utils: [10/264] done: B naive_L2/ABC_Road_BLD/chr9\n", + "[2026-04-08 16:29:02,627] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.16.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:29:11,763] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:26:38 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 0.5576\n", + "std 0.1423 5.1907\n", + "min 0.0051 -0.7772\n", + "25% 0.1115 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3569 0.0026\n", + "max 0.5000 111.1402\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0238\n", + "L2 0.0238 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2910.4797\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 0.0066\n", + "std 0.0716\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0009\n", + "Analysis finished at Wed Apr 8 16:29:11 2026\n", + "Total time elapsed: 2.0m:33.22s\n", + "\n", + "[2026-04-08 16:29:11,769] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:29:11,769] INFO:cellink.tl.external._sclinker_utils: [11/264] done: B naive_L2/100kb/chr9\n", + "[2026-04-08 16:29:11,774] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.17.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:29:38,041] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 16.1289\n", + "std 0.1429 219.8297\n", + "min 0.0051 -48.5939\n", + "25% 0.1084 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3558 0.0127\n", + "max 0.5000 6594.3511\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0098\n", + "L2 0.0098 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 45860.3083\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 0.0955\n", + "std 9.8631\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1667.0000\n", + "Analysis finished at Wed Apr 8 16:29:37 2026\n", + "Total time elapsed: 4.0m:36.57s\n", + "\n", + "[2026-04-08 16:29:38,046] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:29:38,047] INFO:cellink.tl.external._sclinker_utils: [12/264] done: B naive_L2/ABC_Road_BLD/chr12\n", + "[2026-04-08 16:29:38,056] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.18.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:29:47,029] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 0.5419\n", + "std 0.1420 7.7364\n", + "min 0.0051 -1.4112\n", + "25% 0.1155 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0001\n", + "max 0.5000 400.8500\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0031\n", + "L2 0.0031 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3362.4193\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 0.0068\n", + "std 0.4925\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 109.6162\n", + "Analysis finished at Wed Apr 8 16:29:46 2026\n", + "Total time elapsed: 4.0m:45.47s\n", + "\n", + "[2026-04-08 16:29:47,031] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:29:47,031] INFO:cellink.tl.external._sclinker_utils: [13/264] done: B naive_L2/ABC_Road_BLD/chr11\n", + "[2026-04-08 16:29:47,035] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.19.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:30:17,673] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 1.7353\n", + "std 0.1428 18.6178\n", + "min 0.0051 -5.3612\n", + "25% 0.1074 0.0000\n", + "50% 0.2209 0.0002\n", + "75% 0.3548 0.0100\n", + "max 0.5000 729.2620\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0004\n", + "L2 0.0004 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7240.121\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0000\n", + "mean 0.0198\n", + "std 1.4662\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 194.8148\n", + "Analysis finished at Wed Apr 8 16:30:17 2026\n", + "Total time elapsed: 5.0m:16.26s\n", + "\n", + "[2026-04-08 16:30:17,741] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:30:17,741] INFO:cellink.tl.external._sclinker_utils: [14/264] done: B naive_L2/ABC_Road_BLD/chr13\n", + "[2026-04-08 16:30:17,779] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.20.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:30:18,235] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 15.0047\n", + "std 0.1428 189.3210\n", + "min 0.0051 -47.1575\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0006\n", + "max 0.5000 6218.7551\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0347\n", + "L2 0.0347 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 55519.0352\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 0.0942\n", + "std 7.3626\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1247.5348\n", + "Analysis finished at Wed Apr 8 16:30:17 2026\n", + "Total time elapsed: 5.0m:16.82s\n", + "\n", + "[2026-04-08 16:30:18,237] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:30:18,238] INFO:cellink.tl.external._sclinker_utils: [15/264] done: B naive_L2/ABC_Road_BLD/chr7\n", + "[2026-04-08 16:30:18,264] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.21.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:30:28,704] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 0.0652\n", + "std 0.1431 0.5769\n", + "min 0.0051 -0.0757\n", + "25% 0.1115 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3609 0.0009\n", + "max 0.5000 24.0836\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0219\n", + "L2 0.0219 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 354.826\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 0.0006\n", + "std 0.0268\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 4.6839\n", + "Analysis finished at Wed Apr 8 16:30:28 2026\n", + "Total time elapsed: 5.0m:27.11s\n", + "\n", + "[2026-04-08 16:30:28,707] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/B_naive_L2/100kb/B_naive_L2.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:30:28,708] INFO:cellink.tl.external._sclinker_utils: [16/264] done: B naive_L2/ABC_Road_BLD/chr8\n", + "[2026-04-08 16:30:28,749] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/B_naive_L2/100kb/B_naive_L2.22.annot.gz --out /data/ldscores/B_naive_L2/100kb/B_naive_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:30:47,520] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:29:13 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 0.2704\n", + "std 0.1444 2.5605\n", + "min 0.0051 -0.2625\n", + "25% 0.1145 0.0000\n", + "50% 0.2352 0.0000\n", + "75% 0.3681 0.0058\n", + "max 0.5000 62.4337\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0359\n", + "L2 0.0359 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 518.8309\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 0.0019\n", + "std 0.0156\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.2067\n", + "Analysis finished at Wed Apr 8 16:30:47 2026\n", + "Total time elapsed: 1.0m:33.52s\n", + "\n", + "[2026-04-08 16:30:47,521] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:30:47,522] INFO:cellink.tl.external._sclinker_utils: [17/264] done: B naive_L2/100kb/chr17\n", + "[2026-04-08 16:30:47,527] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.1.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:02,746] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 0.1557\n", + "std 0.1422 1.4757\n", + "min 0.0051 -0.4005\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3579 0.0001\n", + "max 0.5000 59.0886\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0015\n", + "L2 0.0015 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 790.4816\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 0.0011\n", + "std 0.0664\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 11.2554\n", + "Analysis finished at Wed Apr 8 16:31:02 2026\n", + "Total time elapsed: 6.0m:1.35s\n", + "\n", + "[2026-04-08 16:31:02,749] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:02,749] INFO:cellink.tl.external._sclinker_utils: [18/264] done: B naive_L2/ABC_Road_BLD/chr4\n", + "[2026-04-08 16:31:02,773] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.2.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:12,267] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:27:43 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 1.4103\n", + "std 0.1428 12.5161\n", + "min 0.0051 -1.6945\n", + "25% 0.1074 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3548 0.0121\n", + "max 0.5000 236.5951\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.016\n", + "L2 0.016 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3835.2699\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0000\n", + "mean 0.0105\n", + "std 0.0914\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.8714\n", + "Analysis finished at Wed Apr 8 16:31:12 2026\n", + "Total time elapsed: 3.0m:28.66s\n", + "\n", + "[2026-04-08 16:31:12,271] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:12,271] INFO:cellink.tl.external._sclinker_utils: [19/264] done: B naive_L2/100kb/chr13\n", + "[2026-04-08 16:31:12,298] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.3.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:30,130] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 0.1220\n", + "std 0.1422 0.9945\n", + "min 0.0051 -0.2383\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3579 0.0018\n", + "max 0.5000 32.5501\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0244\n", + "L2 0.0244 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 764.0809\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 0.0010\n", + "std 0.0111\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.2170\n", + "Analysis finished at Wed Apr 8 16:31:29 2026\n", + "Total time elapsed: 6.0m:28.56s\n", + "\n", + "[2026-04-08 16:31:30,352] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:30,353] INFO:cellink.tl.external._sclinker_utils: [20/264] done: B naive_L2/100kb/chr4\n", + "[2026-04-08 16:31:30,361] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.4.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:30,951] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:30:20 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 1.7319\n", + "std 0.1414 11.2395\n", + "min 0.0051 -1.5474\n", + "25% 0.1186 0.0000\n", + "50% 0.2321 0.0000\n", + "75% 0.3650 0.0128\n", + "max 0.5000 222.7351\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0397\n", + "L2 0.0397 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2472.5312\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0000\n", + "mean 0.0178\n", + "std 0.1128\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0218\n", + "Analysis finished at Wed Apr 8 16:31:30 2026\n", + "Total time elapsed: 1.0m:10.25s\n", + "\n", + "[2026-04-08 16:31:30,952] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:30,952] INFO:cellink.tl.external._sclinker_utils: [21/264] done: B naive_L2/100kb/chr21\n", + "[2026-04-08 16:31:30,955] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.5.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:31,582] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:29:04 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 0.6479\n", + "std 0.1426 4.4296\n", + "min 0.0051 -0.2806\n", + "25% 0.1145 0.0000\n", + "50% 0.2311 0.0000\n", + "75% 0.3599 0.0083\n", + "max 0.5000 98.6516\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0324\n", + "L2 -0.0324 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1467.2311\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 0.0046\n", + "std 0.0281\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.2808\n", + "Analysis finished at Wed Apr 8 16:31:31 2026\n", + "Total time elapsed: 2.0m:26.35s\n", + "\n", + "[2026-04-08 16:31:31,585] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:31,585] INFO:cellink.tl.external._sclinker_utils: [22/264] done: B naive_L2/100kb/chr16\n", + "[2026-04-08 16:31:31,593] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.6.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:31,775] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:28:13 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 1.8032e+00\n", + "std 0.1441 1.2665e+01\n", + "min 0.0051 -1.2341e+00\n", + "25% 0.1104 0.0000e+00\n", + "50% 0.2270 3.3276e-05\n", + "75% 0.3640 3.1169e-02\n", + "max 0.5000 1.8710e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0542\n", + "L2 0.0542 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3007.4348\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0000\n", + "mean 0.0105\n", + "std 0.1178\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.9347\n", + "Analysis finished at Wed Apr 8 16:31:31 2026\n", + "Total time elapsed: 3.0m:17.66s\n", + "\n", + "[2026-04-08 16:31:31,777] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:31,777] INFO:cellink.tl.external._sclinker_utils: [23/264] done: B naive_L2/100kb/chr15\n", + "[2026-04-08 16:31:31,781] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.7.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:33,885] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:27:49 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 2.6407\n", + "std 0.1424 29.6765\n", + "min 0.0051 -8.2288\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3507 0.0247\n", + "max 0.5000 731.3570\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0088\n", + "L2 0.0088 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 18508.2998\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 0.0570\n", + "std 0.4371\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 3.7555\n", + "Analysis finished at Wed Apr 8 16:31:33 2026\n", + "Total time elapsed: 3.0m:44.07s\n", + "\n", + "[2026-04-08 16:31:33,886] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:33,887] INFO:cellink.tl.external._sclinker_utils: [24/264] done: B naive_L2/100kb/chr14\n", + "[2026-04-08 16:31:33,891] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.8.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:37,912] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:30:20 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 0.3690\n", + "std 0.1427 3.6764\n", + "min 0.0051 -0.6983\n", + "25% 0.1074 0.0000\n", + "50% 0.2229 0.0000\n", + "75% 0.3558 0.0025\n", + "max 0.5000 110.3180\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0269\n", + "L2 0.0269 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1383.7911\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 0.0062\n", + "std 0.0666\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:31:37 2026\n", + "Total time elapsed: 1.0m:17.66s\n", + "\n", + "[2026-04-08 16:31:37,916] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:37,916] INFO:cellink.tl.external._sclinker_utils: [25/264] done: B naive_L2/100kb/chr20\n", + "[2026-04-08 16:31:37,923] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.9.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:42,900] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 0.5494\n", + "std 0.1426 5.5686\n", + "min 0.0051 -1.2777\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3538 0.0049\n", + "max 0.5000 189.4805\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.012\n", + "L2 0.012 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2176.7101\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 0.0043\n", + "std 0.2693\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 113.3766\n", + "Analysis finished at Wed Apr 8 16:31:42 2026\n", + "Total time elapsed: 6.0m:41.51s\n", + "\n", + "[2026-04-08 16:31:42,941] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:42,941] INFO:cellink.tl.external._sclinker_utils: [26/264] done: B naive_L2/ABC_Road_BLD/chr10\n", + "[2026-04-08 16:31:42,954] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.10.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:43,542] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:29:40 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 0.3918\n", + "std 0.1423 4.8064\n", + "min 0.0051 -1.0312\n", + "25% 0.1115 0.0000\n", + "50% 0.2260 0.0000\n", + "75% 0.3569 0.0005\n", + "max 0.5000 136.9330\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0238\n", + "L2 0.0238 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1297.4821\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 0.0046\n", + "std 0.0632\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:31:43 2026\n", + "Total time elapsed: 2.0m:3.06s\n", + "\n", + "[2026-04-08 16:31:43,545] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:43,546] INFO:cellink.tl.external._sclinker_utils: [27/264] done: B naive_L2/100kb/chr18\n", + "[2026-04-08 16:31:43,549] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.11.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:47,991] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:27:31 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 2.1551\n", + "std 0.1429 17.1541\n", + "min 0.0051 -3.3558\n", + "25% 0.1084 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3558 0.0304\n", + "max 0.5000 379.1115\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0225\n", + "L2 0.0225 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 11645.295\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 0.0243\n", + "std 0.1990\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2.4689\n", + "Analysis finished at Wed Apr 8 16:31:47 2026\n", + "Total time elapsed: 4.0m:16.0s\n", + "\n", + "[2026-04-08 16:31:47,992] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:47,992] INFO:cellink.tl.external._sclinker_utils: [28/264] done: B naive_L2/100kb/chr12\n", + "[2026-04-08 16:31:47,998] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.12.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:54,882] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:30:31 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 1.7482\n", + "std 0.1435 19.4364\n", + "min 0.0051 -1.8579\n", + "25% 0.1094 0.0000\n", + "50% 0.2280 0.0005\n", + "75% 0.3579 0.0594\n", + "max 0.5000 462.0576\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.036\n", + "L2 -0.036 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 9038.403\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0000\n", + "mean 0.0640\n", + "std 0.4443\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 3.2613\n", + "Analysis finished at Wed Apr 8 16:31:54 2026\n", + "Total time elapsed: 1.0m:23.4s\n", + "\n", + "[2026-04-08 16:31:54,885] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:31:54,885] INFO:cellink.tl.external._sclinker_utils: [29/264] done: B naive_L2/100kb/chr22\n", + "[2026-04-08 16:31:54,889] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.13.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:11,557] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 0.8801\n", + "std 0.1421 5.6639\n", + "min 0.0051 -0.4837\n", + "25% 0.1125 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3569 0.0163\n", + "max 0.5000 98.5797\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0281\n", + "L2 0.0281 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4340.3556\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 0.0069\n", + "std 0.0565\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.7024\n", + "Analysis finished at Wed Apr 8 16:32:11 2026\n", + "Total time elapsed: 7.0m:10.05s\n", + "\n", + "[2026-04-08 16:32:11,558] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:11,559] INFO:cellink.tl.external._sclinker_utils: [30/264] done: B naive_L2/100kb/chr5\n", + "[2026-04-08 16:32:11,571] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.14.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:15,941] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 1.8924\n", + "std 0.1421 19.2833\n", + "min 0.0051 -5.1749\n", + "25% 0.1125 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3569 0.0074\n", + "max 0.5000 982.1608\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0004\n", + "L2 0.0004 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 16621.4705\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 0.0263\n", + "std 1.5611\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 295.6820\n", + "Analysis finished at Wed Apr 8 16:32:15 2026\n", + "Total time elapsed: 7.0m:14.44s\n", + "\n", + "[2026-04-08 16:32:15,947] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:15,947] INFO:cellink.tl.external._sclinker_utils: [31/264] done: B naive_L2/ABC_Road_BLD/chr5\n", + "[2026-04-08 16:32:15,956] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.15.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:20,104] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:26:37 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 0.3623\n", + "std 0.1431 2.7158\n", + "min 0.0051 -0.3101\n", + "25% 0.1115 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3609 0.0064\n", + "max 0.5000 57.0690\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.007\n", + "L2 0.007 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1532.1863\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 0.0028\n", + "std 0.0208\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.3374\n", + "Analysis finished at Wed Apr 8 16:32:19 2026\n", + "Total time elapsed: 5.0m:42.46s\n", + "\n", + "[2026-04-08 16:32:20,105] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:20,105] INFO:cellink.tl.external._sclinker_utils: [32/264] done: B naive_L2/100kb/chr8\n", + "[2026-04-08 16:32:20,108] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.16.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:27,419] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:26:43 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 0.3432\n", + "std 0.1426 1.9865\n", + "min 0.0051 -0.3582\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3538 0.0130\n", + "max 0.5000 37.0191\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0382\n", + "L2 0.0382 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1450.9092\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 0.0028\n", + "std 0.0233\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.4153\n", + "Analysis finished at Wed Apr 8 16:32:27 2026\n", + "Total time elapsed: 5.0m:44.09s\n", + "\n", + "[2026-04-08 16:32:27,423] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:27,423] INFO:cellink.tl.external._sclinker_utils: [33/264] done: B naive_L2/100kb/chr10\n", + "[2026-04-08 16:32:27,427] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.17.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:29,960] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 1.3198\n", + "std 0.1432 10.4616\n", + "min 0.0051 -1.4698\n", + "25% 0.1084 0.0000\n", + "50% 0.2239 0.0001\n", + "75% 0.3558 0.0497\n", + "max 0.5000 245.8270\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0343\n", + "L2 0.0343 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7751.1581\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 0.0099\n", + "std 0.0789\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0174\n", + "Analysis finished at Wed Apr 8 16:32:29 2026\n", + "Total time elapsed: 7.0m:28.46s\n", + "\n", + "[2026-04-08 16:32:29,963] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:29,967] INFO:cellink.tl.external._sclinker_utils: [34/264] done: B naive_L2/100kb/chr1\n", + "[2026-04-08 16:32:29,969] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.18.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:33,614] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:29:49 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 7.2159\n", + "std 0.1420 37.4260\n", + "min 0.0051 -3.3917\n", + "25% 0.1145 0.0000\n", + "50% 0.2342 0.0014\n", + "75% 0.3569 0.4253\n", + "max 0.5000 679.5418\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0123\n", + "L2 -0.0123 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 11460.3445\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 0.0493\n", + "std 0.2118\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.2243\n", + "Analysis finished at Wed Apr 8 16:32:33 2026\n", + "Total time elapsed: 2.0m:43.84s\n", + "\n", + "[2026-04-08 16:32:33,616] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:33,616] INFO:cellink.tl.external._sclinker_utils: [35/264] done: B naive_L2/100kb/chr19\n", + "[2026-04-08 16:32:33,676] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.19.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:38,244] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 1.0355\n", + "std 0.1431 8.2811\n", + "min 0.0051 -1.1958\n", + "25% 0.1125 0.0000\n", + "50% 0.2290 0.0000\n", + "75% 0.3620 0.0228\n", + "max 0.5000 212.4268\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0118\n", + "L2 0.0118 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7752.7617\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 0.0092\n", + "std 0.0960\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.3551\n", + "Analysis finished at Wed Apr 8 16:32:38 2026\n", + "Total time elapsed: 7.0m:36.87s\n", + "\n", + "[2026-04-08 16:32:38,245] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:38,245] INFO:cellink.tl.external._sclinker_utils: [36/264] done: B naive_L2/100kb/chr2\n", + "[2026-04-08 16:32:38,271] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.20.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:38,523] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 0.5061\n", + "std 0.1426 4.4653\n", + "min 0.0051 -0.7656\n", + "25% 0.1166 0.0000\n", + "50% 0.2321 0.0000\n", + "75% 0.3630 0.0251\n", + "max 0.5000 123.1263\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0152\n", + "L2 0.0152 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3277.0407\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 0.0046\n", + "std 0.0534\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:32:38 2026\n", + "Total time elapsed: 7.0m:37.16s\n", + "\n", + "[2026-04-08 16:32:38,523] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:38,524] INFO:cellink.tl.external._sclinker_utils: [37/264] done: B naive_L2/100kb/chr3\n", + "[2026-04-08 16:32:38,529] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.21.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:41,560] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:26:17 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 0.9220\n", + "std 0.1428 6.7089\n", + "min 0.0051 -0.9082\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0141\n", + "max 0.5000 170.0062\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0299\n", + "L2 0.0299 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4595.4575\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 0.0078\n", + "std 0.0725\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.8654\n", + "Analysis finished at Wed Apr 8 16:32:41 2026\n", + "Total time elapsed: 6.0m:24.05s\n", + "\n", + "[2026-04-08 16:32:41,561] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:41,562] INFO:cellink.tl.external._sclinker_utils: [38/264] done: B naive_L2/100kb/chr7\n", + "[2026-04-08 16:32:41,787] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.22.annot.gz --out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:49,938] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 1.5988\n", + "std 0.1426 24.5275\n", + "min 0.0051 -5.5888\n", + "25% 0.1166 0.0000\n", + "50% 0.2321 0.0000\n", + "75% 0.3630 0.0129\n", + "max 0.5000 1226.3109\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.018\n", + "L2 0.018 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 8893.8035\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 0.0126\n", + "std 0.9209\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 180.0520\n", + "Analysis finished at Wed Apr 8 16:32:49 2026\n", + "Total time elapsed: 7.0m:48.44s\n", + "\n", + "[2026-04-08 16:32:49,939] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:32:49,940] INFO:cellink.tl.external._sclinker_utils: [39/264] done: B naive_L2/ABC_Road_BLD/chr3\n", + "[2026-04-08 16:32:49,949] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.1.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:33:40,065] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 4.2188\n", + "std 0.1432 57.0207\n", + "min 0.0051 -12.7226\n", + "25% 0.1084 0.0000\n", + "50% 0.2239 0.0000\n", + "75% 0.3558 0.0343\n", + "max 0.5000 2106.7739\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0146\n", + "L2 0.0146 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 22920.7862\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 0.0294\n", + "std 1.6959\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 237.8495\n", + "Analysis finished at Wed Apr 8 16:33:39 2026\n", + "Total time elapsed: 8.0m:38.49s\n", + "\n", + "[2026-04-08 16:33:40,085] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:33:40,085] INFO:cellink.tl.external._sclinker_utils: [40/264] done: B naive_L2/ABC_Road_BLD/chr1\n", + "[2026-04-08 16:33:40,093] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.2.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:33:50,991] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 2.9101\n", + "std 0.1431 42.4460\n", + "min 0.0051 -9.8478\n", + "25% 0.1125 0.0000\n", + "50% 0.2290 0.0000\n", + "75% 0.3620 0.0065\n", + "max 0.5000 1849.1440\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0164\n", + "L2 0.0164 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 21788.5443\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 0.0260\n", + "std 1.5952\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 164.7017\n", + "Analysis finished at Wed Apr 8 16:33:50 2026\n", + "Total time elapsed: 8.0m:49.55s\n", + "\n", + "[2026-04-08 16:33:50,993] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:33:50,993] INFO:cellink.tl.external._sclinker_utils: [41/264] done: B naive_L2/ABC_Road_BLD/chr2\n", + "[2026-04-08 16:33:51,000] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.3.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:33:57,199] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:32:44 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 12.8452\n", + "std 0.1435 116.3682\n", + "min 0.0051 -29.3545\n", + "25% 0.1094 0.0000\n", + "50% 0.2280 0.0020\n", + "75% 0.3579 0.4322\n", + "max 0.5000 4544.5787\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0025\n", + "L2 0.0025 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 34640.387\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0000\n", + "mean 0.2455\n", + "std 6.4354\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 262.1720\n", + "Analysis finished at Wed Apr 8 16:33:57 2026\n", + "Total time elapsed: 1.0m:12.54s\n", + "\n", + "[2026-04-08 16:33:57,201] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:33:57,201] INFO:cellink.tl.external._sclinker_utils: [42/264] done: CD4 Naive_L2/ABC_Road_BLD/chr22\n", + "[2026-04-08 16:33:57,216] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.4.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:34:19,225] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:27:14 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 0.3548\n", + "std 0.1420 2.2015\n", + "min 0.0051 -0.2086\n", + "25% 0.1155 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0108\n", + "max 0.5000 40.7315\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0479\n", + "L2 0.0479 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1441.4814\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 0.0029\n", + "std 0.0232\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.3061\n", + "Analysis finished at Wed Apr 8 16:34:18 2026\n", + "Total time elapsed: 7.0m:4.21s\n", + "\n", + "[2026-04-08 16:34:19,226] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:34:19,226] INFO:cellink.tl.external._sclinker_utils: [43/264] done: B naive_L2/100kb/chr11\n", + "[2026-04-08 16:34:19,233] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.5.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:34:26,509] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:32:40 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 6.1201e+00\n", + "std 0.1414 4.4514e+01\n", + "min 0.0051 -4.5468e+00\n", + "25% 0.1186 5.4393e-06\n", + "50% 0.2321 5.4383e-03\n", + "75% 0.3650 4.3479e-01\n", + "max 0.5000 1.1852e+03\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0139\n", + "L2 0.0139 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 9305.7943\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0000\n", + "mean 0.0671\n", + "std 2.0538\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 244.2559\n", + "Analysis finished at Wed Apr 8 16:34:26 2026\n", + "Total time elapsed: 1.0m:45.78s\n", + "\n", + "[2026-04-08 16:34:26,510] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:34:26,513] INFO:cellink.tl.external._sclinker_utils: [44/264] done: CD4 Naive_L2/ABC_Road_BLD/chr21\n", + "[2026-04-08 16:34:26,540] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.6.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:34:52,917] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:40 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 4.2166e+00\n", + "std 0.1423 1.0527e+02\n", + "min 0.0051 -4.0851e+01\n", + "25% 0.1115 0.0000e+00\n", + "50% 0.2249 0.0000e+00\n", + "75% 0.3569 1.5969e-05\n", + "max 0.5000 8.5111e+03\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0065\n", + "L2 0.0065 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 33344.319\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 0.0761\n", + "std 4.6414\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 379.0000\n", + "Analysis finished at Wed Apr 8 16:34:52 2026\n", + "Total time elapsed: 3.0m:12.49s\n", + "\n", + "[2026-04-08 16:34:52,918] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:34:52,918] INFO:cellink.tl.external._sclinker_utils: [45/264] done: CD4 Naive_L2/ABC_Road_BLD/chr9\n", + "[2026-04-08 16:34:52,925] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.7.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:04,332] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:32:32 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 1.0989\n", + "std 0.1423 8.9955\n", + "min 0.0051 -1.1556\n", + "25% 0.1115 0.0000\n", + "50% 0.2260 0.0000\n", + "75% 0.3569 0.0008\n", + "max 0.5000 174.5855\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0068\n", + "L2 0.0068 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4064.2409\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 0.0143\n", + "std 0.5591\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 71.5680\n", + "Analysis finished at Wed Apr 8 16:35:04 2026\n", + "Total time elapsed: 2.0m:32.1s\n", + "\n", + "[2026-04-08 16:35:04,446] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:04,446] INFO:cellink.tl.external._sclinker_utils: [46/264] done: CD4 Naive_L2/ABC_Road_BLD/chr18\n", + "[2026-04-08 16:35:04,481] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.8.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:07,056] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/B_naive_L2/ABC_Road_BLD/B_naive_L2.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 108.0048\n", + "std 0.1431 2583.2846\n", + "min 0.0051 -584.6952\n", + "25% 0.1104 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3579 0.0030\n", + "max 0.5000 234096.8252\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0024\n", + "L2 0.0024 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 630381.4581\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 0.9493\n", + "std 24.4301\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 796.9867\n", + "Analysis finished at Wed Apr 8 16:35:06 2026\n", + "Total time elapsed: 10.0m:5.6s\n", + "\n", + "[2026-04-08 16:35:07,057] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:07,058] INFO:cellink.tl.external._sclinker_utils: [47/264] done: B naive_L2/ABC_Road_BLD/chr6\n", + "[2026-04-08 16:35:07,063] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.9.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:10,961] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:32:22 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 14.5848\n", + "std 0.1426 246.9881\n", + "min 0.0051 -31.0981\n", + "25% 0.1145 0.0000\n", + "50% 0.2311 0.0000\n", + "75% 0.3599 0.0095\n", + "max 0.5000 6775.9150\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0258\n", + "L2 0.0258 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 39514.5166\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 0.1247\n", + "std 5.7398\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 357.6975\n", + "Analysis finished at Wed Apr 8 16:35:10 2026\n", + "Total time elapsed: 2.0m:48.36s\n", + "\n", + "[2026-04-08 16:35:10,962] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:10,963] INFO:cellink.tl.external._sclinker_utils: [48/264] done: CD4 Naive_L2/ABC_Road_BLD/chr16\n", + "[2026-04-08 16:35:10,981] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.10.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:19,339] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:32:13 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 48.6922\n", + "std 0.1424 774.0811\n", + "min 0.0051 -180.8206\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3507 0.0879\n", + "max 0.5000 26294.9972\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0013\n", + "L2 0.0013 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 151240.2955\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 0.4658\n", + "std 25.8237\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1700.0000\n", + "Analysis finished at Wed Apr 8 16:35:19 2026\n", + "Total time elapsed: 3.0m:5.62s\n", + "\n", + "[2026-04-08 16:35:19,343] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:19,343] INFO:cellink.tl.external._sclinker_utils: [49/264] done: CD4 Naive_L2/ABC_Road_BLD/chr14\n", + "[2026-04-08 16:35:19,348] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.11.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:19,500] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:32:40 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 7.0283\n", + "std 0.1427 68.2590\n", + "min 0.0051 -6.0361\n", + "25% 0.1074 0.0000\n", + "50% 0.2229 0.0002\n", + "75% 0.3558 0.0785\n", + "max 0.5000 1874.7610\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0013\n", + "L2 0.0013 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 17473.3297\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 0.0788\n", + "std 2.6761\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 209.0788\n", + "Analysis finished at Wed Apr 8 16:35:19 2026\n", + "Total time elapsed: 2.0m:39.12s\n", + "\n", + "[2026-04-08 16:35:19,500] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:19,501] INFO:cellink.tl.external._sclinker_utils: [50/264] done: CD4 Naive_L2/ABC_Road_BLD/chr20\n", + "[2026-04-08 16:35:19,505] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.12.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:37,431] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:32:29 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 6.0764\n", + "std 0.1444 43.5865\n", + "min 0.0051 -8.7398\n", + "25% 0.1145 0.0000\n", + "50% 0.2352 0.0003\n", + "75% 0.3681 0.0813\n", + "max 0.5000 1129.8611\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0095\n", + "L2 0.0095 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 30018.9419\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 0.1115\n", + "std 3.2600\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 250.0000\n", + "Analysis finished at Wed Apr 8 16:35:37 2026\n", + "Total time elapsed: 3.0m:7.57s\n", + "\n", + "[2026-04-08 16:35:37,438] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:37,438] INFO:cellink.tl.external._sclinker_utils: [51/264] done: CD4 Naive_L2/ABC_Road_BLD/chr17\n", + "[2026-04-08 16:35:37,454] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.13.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:43,760] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/B_naive_L2/100kb/B_naive_L2.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/B_naive_L2/100kb/B_naive_L2.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:25:01 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/B_naive_L2/100kb/B_naive_L2.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/B_naive_L2/100kb/B_naive_L2.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/B_naive_L2/100kb/B_naive_L2.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 2.2975\n", + "std 0.1431 30.5332\n", + "min 0.0051 -7.0671\n", + "25% 0.1104 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3579 0.0139\n", + "max 0.5000 1497.5730\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0018\n", + "L2 -0.0018 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 18195.1826\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 0.0274\n", + "std 0.1645\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.1744\n", + "Analysis finished at Wed Apr 8 16:35:43 2026\n", + "Total time elapsed: 10.0m:42.3s\n", + "\n", + "[2026-04-08 16:35:43,762] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:43,762] INFO:cellink.tl.external._sclinker_utils: [52/264] done: B naive_L2/100kb/chr6\n", + "[2026-04-08 16:35:43,766] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.14.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:47,257] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:32:18 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 1.2768\n", + "std 0.1441 7.1119\n", + "min 0.0051 -1.3190\n", + "25% 0.1104 0.0000\n", + "50% 0.2270 0.0002\n", + "75% 0.3640 0.0396\n", + "max 0.5000 180.6331\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0385\n", + "L2 0.0385 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3148.4102\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0000\n", + "mean 0.0110\n", + "std 0.5729\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 78.0000\n", + "Analysis finished at Wed Apr 8 16:35:47 2026\n", + "Total time elapsed: 3.0m:28.46s\n", + "\n", + "[2026-04-08 16:35:47,260] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:47,260] INFO:cellink.tl.external._sclinker_utils: [53/264] done: CD4 Naive_L2/ABC_Road_BLD/chr15\n", + "[2026-04-08 16:35:47,265] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.15.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:58,879] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:32:36 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 7.2709\n", + "std 0.1420 78.4104\n", + "min 0.0051 -6.6110\n", + "25% 0.1145 0.0000\n", + "50% 0.2342 0.0007\n", + "75% 0.3569 0.1515\n", + "max 0.5000 2298.3931\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0066\n", + "L2 -0.0066 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 6815.1077\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 0.0293\n", + "std 1.2463\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 98.4546\n", + "Analysis finished at Wed Apr 8 16:35:58 2026\n", + "Total time elapsed: 3.0m:22.61s\n", + "\n", + "[2026-04-08 16:35:58,887] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:35:58,888] INFO:cellink.tl.external._sclinker_utils: [54/264] done: CD4 Naive_L2/ABC_Road_BLD/chr19\n", + "[2026-04-08 16:35:58,892] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.16.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:36:06,892] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:56 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 9.3196e-01\n", + "std 0.1428 5.4208e+00\n", + "min 0.0051 -1.7676e+00\n", + "25% 0.1074 0.0000e+00\n", + "50% 0.2209 7.0400e-05\n", + "75% 0.3548 3.5973e-02\n", + "max 0.5000 1.5858e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0279\n", + "L2 0.0279 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5289.5999\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0000\n", + "mean 0.0144\n", + "std 0.6718\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 120.9934\n", + "Analysis finished at Wed Apr 8 16:36:06 2026\n", + "Total time elapsed: 4.0m:9.64s\n", + "\n", + "[2026-04-08 16:36:06,894] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:36:06,895] INFO:cellink.tl.external._sclinker_utils: [55/264] done: CD4 Naive_L2/ABC_Road_BLD/chr13\n", + "[2026-04-08 16:36:06,899] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.17.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:36:16,507] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:50 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 4.2634\n", + "std 0.1429 61.6637\n", + "min 0.0051 -18.1019\n", + "25% 0.1084 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3558 0.0132\n", + "max 0.5000 4259.8131\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0022\n", + "L2 -0.0022 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 24370.5085\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 0.0508\n", + "std 3.1692\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 533.3138\n", + "Analysis finished at Wed Apr 8 16:36:16 2026\n", + "Total time elapsed: 4.0m:26.26s\n", + "\n", + "[2026-04-08 16:36:16,509] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:36:16,510] INFO:cellink.tl.external._sclinker_utils: [56/264] done: CD4 Naive_L2/ABC_Road_BLD/chr12\n", + "[2026-04-08 16:36:16,530] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.18.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:36:45,847] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:35 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 12.0551\n", + "std 0.1431 165.7764\n", + "min 0.0051 -20.5856\n", + "25% 0.1115 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3609 0.0038\n", + "max 0.5000 6321.3774\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0103\n", + "L2 0.0103 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 57832.4178\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 0.1052\n", + "std 4.8080\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 416.9076\n", + "Analysis finished at Wed Apr 8 16:36:45 2026\n", + "Total time elapsed: 5.0m:9.7s\n", + "\n", + "[2026-04-08 16:36:45,848] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:36:45,849] INFO:cellink.tl.external._sclinker_utils: [57/264] done: CD4 Naive_L2/ABC_Road_BLD/chr8\n", + "[2026-04-08 16:36:45,853] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.19.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:37:06,904] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:33 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 1.5771\n", + "std 0.1428 17.3226\n", + "min 0.0051 -7.0836\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0248\n", + "max 0.5000 1318.0417\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0081\n", + "L2 0.0081 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 18739.4237\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 0.0318\n", + "std 1.5403\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 202.0007\n", + "Analysis finished at Wed Apr 8 16:37:06 2026\n", + "Total time elapsed: 5.0m:32.64s\n", + "\n", + "[2026-04-08 16:37:06,907] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:37:06,907] INFO:cellink.tl.external._sclinker_utils: [58/264] done: CD4 Naive_L2/ABC_Road_BLD/chr7\n", + "[2026-04-08 16:37:06,913] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.20.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:37:16,424] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:44 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 1.8988e+00\n", + "std 0.1426 1.7908e+01\n", + "min 0.0051 -3.4539e+00\n", + "25% 0.1074 0.0000e+00\n", + "50% 0.2198 1.1983e-05\n", + "75% 0.3538 8.0456e-03\n", + "max 0.5000 6.6779e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.005\n", + "L2 -0.005 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 15991.4503\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 0.0313\n", + "std 1.3431\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 182.3690\n", + "Analysis finished at Wed Apr 8 16:37:16 2026\n", + "Total time elapsed: 5.0m:31.15s\n", + "\n", + "[2026-04-08 16:37:16,426] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:37:16,426] INFO:cellink.tl.external._sclinker_utils: [59/264] done: CD4 Naive_L2/ABC_Road_BLD/chr10\n", + "[2026-04-08 16:37:16,482] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.21.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:37:44,394] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:05 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 1.3939\n", + "std 0.1431 18.1153\n", + "min 0.0051 -2.9318\n", + "25% 0.1125 0.0000\n", + "50% 0.2290 0.0000\n", + "75% 0.3620 0.0052\n", + "max 0.5000 709.7000\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0201\n", + "L2 -0.0201 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 8685.289\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 0.0103\n", + "std 0.5585\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 120.9763\n", + "Analysis finished at Wed Apr 8 16:37:44 2026\n", + "Total time elapsed: 6.0m:38.93s\n", + "\n", + "[2026-04-08 16:37:44,397] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:37:44,397] INFO:cellink.tl.external._sclinker_utils: [60/264] done: CD4 Naive_L2/ABC_Road_BLD/chr2\n", + "[2026-04-08 16:37:44,424] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.22.annot.gz --out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:38:40,307] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:36:18 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 0.6412\n", + "std 0.1423 7.2638\n", + "min 0.0051 -1.2155\n", + "25% 0.1115 0.0000\n", + "50% 0.2260 0.0000\n", + "75% 0.3569 0.0005\n", + "max 0.5000 206.6278\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0223\n", + "L2 0.0223 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2338.9423\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 0.0082\n", + "std 0.0750\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.9973\n", + "Analysis finished at Wed Apr 8 16:38:39 2026\n", + "Total time elapsed: 2.0m:21.14s\n", + "\n", + "[2026-04-08 16:38:40,309] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:38:40,310] INFO:cellink.tl.external._sclinker_utils: [61/264] done: CD4 Naive_L2/100kb/chr18\n", + "[2026-04-08 16:38:40,316] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.1.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:38:48,576] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:35:39 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 0.6417\n", + "std 0.1428 4.6668\n", + "min 0.0051 -0.7515\n", + "25% 0.1074 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3548 0.0106\n", + "max 0.5000 103.6647\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0118\n", + "L2 0.0118 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3265.0372\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0000\n", + "mean 0.0089\n", + "std 0.0792\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.9859\n", + "Analysis finished at Wed Apr 8 16:38:48 2026\n", + "Total time elapsed: 3.0m:8.28s\n", + "\n", + "[2026-04-08 16:38:48,582] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:38:48,583] INFO:cellink.tl.external._sclinker_utils: [62/264] done: CD4 Naive_L2/100kb/chr13\n", + "[2026-04-08 16:38:48,587] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.2.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:38:49,024] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:37:18 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 1.0734\n", + "std 0.1414 3.9847\n", + "min 0.0051 -0.3341\n", + "25% 0.1186 0.0000\n", + "50% 0.2321 0.0021\n", + "75% 0.3650 0.1280\n", + "max 0.5000 42.3584\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0542\n", + "L2 0.0542 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1826.1215\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0000\n", + "mean 0.0132\n", + "std 0.0492\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.3837\n", + "Analysis finished at Wed Apr 8 16:38:48 2026\n", + "Total time elapsed: 1.0m:29.82s\n", + "\n", + "[2026-04-08 16:38:49,027] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:38:49,027] INFO:cellink.tl.external._sclinker_utils: [63/264] done: CD4 Naive_L2/100kb/chr21\n", + "[2026-04-08 16:38:49,033] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.3.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:38:54,914] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:35:45 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 1.2307\n", + "std 0.1424 8.4873\n", + "min 0.0051 -1.1280\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3507 0.0410\n", + "max 0.5000 194.5783\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.011\n", + "L2 -0.011 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4866.0972\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 0.0150\n", + "std 0.1022\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0621\n", + "Analysis finished at Wed Apr 8 16:38:54 2026\n", + "Total time elapsed: 3.0m:8.8s\n", + "\n", + "[2026-04-08 16:38:54,916] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:38:54,916] INFO:cellink.tl.external._sclinker_utils: [64/264] done: CD4 Naive_L2/100kb/chr14\n", + "[2026-04-08 16:38:54,934] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.4.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:01,001] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:35:08 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 0.6283\n", + "std 0.1423 4.6258\n", + "min 0.0051 -0.7609\n", + "25% 0.1115 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3569 0.0054\n", + "max 0.5000 95.6511\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0304\n", + "L2 0.0304 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3436.1857\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 0.0078\n", + "std 0.0764\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0014\n", + "Analysis finished at Wed Apr 8 16:39:00 2026\n", + "Total time elapsed: 3.0m:51.84s\n", + "\n", + "[2026-04-08 16:39:01,006] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:01,006] INFO:cellink.tl.external._sclinker_utils: [65/264] done: CD4 Naive_L2/100kb/chr9\n", + "[2026-04-08 16:39:01,012] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.5.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:01,832] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:35:49 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 4.6154\n", + "std 0.1441 39.2036\n", + "min 0.0051 -3.8674\n", + "25% 0.1104 0.0000\n", + "50% 0.2270 0.0011\n", + "75% 0.3640 0.0900\n", + "max 0.5000 918.1982\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0118\n", + "L2 0.0118 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 13191.2272\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0000\n", + "mean 0.0460\n", + "std 0.2179\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2.0000\n", + "Analysis finished at Wed Apr 8 16:39:01 2026\n", + "Total time elapsed: 3.0m:12.32s\n", + "\n", + "[2026-04-08 16:39:01,833] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:01,833] INFO:cellink.tl.external._sclinker_utils: [66/264] done: CD4 Naive_L2/100kb/chr15\n", + "[2026-04-08 16:39:01,840] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.6.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:09,149] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:45 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 6.3802\n", + "std 0.1420 101.2319\n", + "min 0.0051 -18.6849\n", + "25% 0.1155 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0019\n", + "max 0.5000 3840.0745\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0133\n", + "L2 -0.0133 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 29217.3756\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 0.0592\n", + "std 3.5927\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 483.0000\n", + "Analysis finished at Wed Apr 8 16:39:08 2026\n", + "Total time elapsed: 7.0m:23.3s\n", + "\n", + "[2026-04-08 16:39:09,150] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:09,151] INFO:cellink.tl.external._sclinker_utils: [67/264] done: CD4 Naive_L2/ABC_Road_BLD/chr11\n", + "[2026-04-08 16:39:09,154] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.7.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:11,701] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:37:09 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 1.1462\n", + "std 0.1427 8.0795\n", + "min 0.0051 -0.7043\n", + "25% 0.1074 0.0000\n", + "50% 0.2229 0.0000\n", + "75% 0.3558 0.0434\n", + "max 0.5000 150.8362\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0624\n", + "L2 0.0624 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2097.0321\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 0.0095\n", + "std 0.0723\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:39:11 2026\n", + "Total time elapsed: 2.0m:2.38s\n", + "\n", + "[2026-04-08 16:39:11,705] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:11,706] INFO:cellink.tl.external._sclinker_utils: [68/264] done: CD4 Naive_L2/100kb/chr20\n", + "[2026-04-08 16:39:11,726] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.8.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:12,875] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:36:09 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 3.0271\n", + "std 0.1444 17.6603\n", + "min 0.0051 -1.2480\n", + "25% 0.1145 0.0000\n", + "50% 0.2352 0.0002\n", + "75% 0.3681 0.0609\n", + "max 0.5000 270.2687\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0402\n", + "L2 0.0402 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7574.5535\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 0.0281\n", + "std 0.1619\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.4350\n", + "Analysis finished at Wed Apr 8 16:39:12 2026\n", + "Total time elapsed: 3.0m:3.41s\n", + "\n", + "[2026-04-08 16:39:12,878] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:12,879] INFO:cellink.tl.external._sclinker_utils: [69/264] done: CD4 Naive_L2/100kb/chr17\n", + "[2026-04-08 16:39:12,885] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.9.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:22,971] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:36:00 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 1.1295\n", + "std 0.1426 8.0477\n", + "min 0.0051 -0.7719\n", + "25% 0.1145 0.0000\n", + "50% 0.2311 0.0000\n", + "75% 0.3599 0.0206\n", + "max 0.5000 180.1170\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0196\n", + "L2 0.0196 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4121.9472\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 0.0130\n", + "std 0.0904\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0016\n", + "Analysis finished at Wed Apr 8 16:39:22 2026\n", + "Total time elapsed: 3.0m:21.91s\n", + "\n", + "[2026-04-08 16:39:22,972] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:22,972] INFO:cellink.tl.external._sclinker_utils: [70/264] done: CD4 Naive_L2/100kb/chr16\n", + "[2026-04-08 16:39:22,991] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.10.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:25,173] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:32 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 3.0433\n", + "std 0.1422 27.2548\n", + "min 0.0051 -5.6222\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3579 0.0020\n", + "max 0.5000 829.4704\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0142\n", + "L2 -0.0142 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 15328.613\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 0.0210\n", + "std 1.2342\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 264.0000\n", + "Analysis finished at Wed Apr 8 16:39:24 2026\n", + "Total time elapsed: 7.0m:52.34s\n", + "\n", + "[2026-04-08 16:39:25,174] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:25,175] INFO:cellink.tl.external._sclinker_utils: [71/264] done: CD4 Naive_L2/ABC_Road_BLD/chr4\n", + "[2026-04-08 16:39:25,268] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.11.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:29,988] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:37:49 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 2.0199\n", + "std 0.1435 6.9480\n", + "min 0.0051 -0.6870\n", + "25% 0.1094 0.0000\n", + "50% 0.2280 0.0076\n", + "75% 0.3579 0.3512\n", + "max 0.5000 68.7072\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0143\n", + "L2 0.0143 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3844.2675\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0000\n", + "mean 0.0272\n", + "std 0.1217\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:39:29 2026\n", + "Total time elapsed: 1.0m:40.36s\n", + "\n", + "[2026-04-08 16:39:29,989] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:29,990] INFO:cellink.tl.external._sclinker_utils: [72/264] done: CD4 Naive_L2/100kb/chr22\n", + "[2026-04-08 16:39:29,994] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.12.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:31,342] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:35:21 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 2.1010e+00\n", + "std 0.1429 1.1986e+01\n", + "min 0.0051 -1.2558e+00\n", + "25% 0.1084 0.0000e+00\n", + "50% 0.2209 4.2885e-05\n", + "75% 0.3558 2.5532e-02\n", + "max 0.5000 1.8802e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0117\n", + "L2 0.0117 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 9209.9298\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 0.0192\n", + "std 0.1356\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.2274\n", + "Analysis finished at Wed Apr 8 16:39:31 2026\n", + "Total time elapsed: 4.0m:9.63s\n", + "\n", + "[2026-04-08 16:39:31,343] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:31,343] INFO:cellink.tl.external._sclinker_utils: [73/264] done: CD4 Naive_L2/100kb/chr12\n", + "[2026-04-08 16:39:31,362] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.13.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:33,201] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:36:48 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 1.6143\n", + "std 0.1420 7.8177\n", + "min 0.0051 -0.8598\n", + "25% 0.1145 0.0000\n", + "50% 0.2342 0.0028\n", + "75% 0.3569 0.2295\n", + "max 0.5000 140.0555\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0036\n", + "L2 -0.0036 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2949.4164\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 0.0127\n", + "std 0.0786\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:39:32 2026\n", + "Total time elapsed: 2.0m:44.32s\n", + "\n", + "[2026-04-08 16:39:33,204] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:33,206] INFO:cellink.tl.external._sclinker_utils: [74/264] done: CD4 Naive_L2/100kb/chr19\n", + "[2026-04-08 16:39:33,241] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.14.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:35,063] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:30:50 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 7.0691\n", + "std 0.1432 95.8228\n", + "min 0.0051 -28.3708\n", + "25% 0.1084 0.0000\n", + "50% 0.2239 0.0003\n", + "75% 0.3558 0.0595\n", + "max 0.5000 6234.1268\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0139\n", + "L2 0.0139 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 59301.1408\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 0.0761\n", + "std 5.3790\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 723.0000\n", + "Analysis finished at Wed Apr 8 16:39:34 2026\n", + "Total time elapsed: 8.0m:44.75s\n", + "\n", + "[2026-04-08 16:39:35,064] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:35,064] INFO:cellink.tl.external._sclinker_utils: [75/264] done: CD4 Naive_L2/ABC_Road_BLD/chr1\n", + "[2026-04-08 16:39:35,084] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.15.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:44,684] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:35:06 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 1.9141\n", + "std 0.1431 12.3625\n", + "min 0.0051 -1.3691\n", + "25% 0.1115 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3609 0.0135\n", + "max 0.5000 212.0814\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0044\n", + "L2 -0.0044 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 12251.1937\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 0.0223\n", + "std 0.1406\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0160\n", + "Analysis finished at Wed Apr 8 16:39:44 2026\n", + "Total time elapsed: 4.0m:38.01s\n", + "\n", + "[2026-04-08 16:39:44,700] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:44,700] INFO:cellink.tl.external._sclinker_utils: [76/264] done: CD4 Naive_L2/100kb/chr8\n", + "[2026-04-08 16:39:44,730] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.16.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:46,045] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:34:54 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 1.9038\n", + "std 0.1428 18.4170\n", + "min 0.0051 -1.0818\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0171\n", + "max 0.5000 551.3085\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0197\n", + "L2 -0.0197 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 9043.7518\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 0.0153\n", + "std 0.1422\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2.0631\n", + "Analysis finished at Wed Apr 8 16:39:45 2026\n", + "Total time elapsed: 4.0m:50.82s\n", + "\n", + "[2026-04-08 16:39:46,046] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:39:46,046] INFO:cellink.tl.external._sclinker_utils: [77/264] done: CD4 Naive_L2/100kb/chr7\n", + "[2026-04-08 16:39:46,054] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.17.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:40:04,924] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:33 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 2.8025\n", + "std 0.1421 34.8457\n", + "min 0.0051 -14.7732\n", + "25% 0.1125 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3569 0.0018\n", + "max 0.5000 4615.8952\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0045\n", + "L2 -0.0045 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 30583.7048\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 0.0483\n", + "std 2.4857\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 302.4067\n", + "Analysis finished at Wed Apr 8 16:40:04 2026\n", + "Total time elapsed: 8.0m:31.31s\n", + "\n", + "[2026-04-08 16:40:04,933] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:40:04,937] INFO:cellink.tl.external._sclinker_utils: [78/264] done: CD4 Naive_L2/ABC_Road_BLD/chr5\n", + "[2026-04-08 16:40:04,948] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.18.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:40:18,671] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:14 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 1.2595\n", + "std 0.1426 20.9990\n", + "min 0.0051 -4.7796\n", + "25% 0.1166 0.0000\n", + "50% 0.2321 0.0001\n", + "75% 0.3630 0.0138\n", + "max 0.5000 1043.8994\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0192\n", + "L2 0.0192 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 6701.481\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 0.0095\n", + "std 0.8238\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 153.3423\n", + "Analysis finished at Wed Apr 8 16:40:18 2026\n", + "Total time elapsed: 9.0m:3.9s\n", + "\n", + "[2026-04-08 16:40:18,750] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:40:18,750] INFO:cellink.tl.external._sclinker_utils: [79/264] done: CD4 Naive_L2/ABC_Road_BLD/chr3\n", + "[2026-04-08 16:40:18,773] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.19.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:40:28,416] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:33:59 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 1.8482\n", + "std 0.1422 14.7927\n", + "min 0.0051 -1.2347\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3579 0.0063\n", + "max 0.5000 324.4044\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.029\n", + "L2 -0.029 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7685.5713\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 0.0105\n", + "std 0.0902\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0051\n", + "Analysis finished at Wed Apr 8 16:40:28 2026\n", + "Total time elapsed: 6.0m:28.93s\n", + "\n", + "[2026-04-08 16:40:28,420] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:40:28,420] INFO:cellink.tl.external._sclinker_utils: [80/264] done: CD4 Naive_L2/100kb/chr4\n", + "[2026-04-08 16:40:28,443] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.20.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:40:40,089] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:32:52 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 2.0462\n", + "std 0.1432 16.2802\n", + "min 0.0051 -1.6707\n", + "25% 0.1084 0.0000\n", + "50% 0.2239 0.0003\n", + "75% 0.3558 0.0578\n", + "max 0.5000 492.5180\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0255\n", + "L2 0.0255 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 9839.3155\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 0.0126\n", + "std 0.1075\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2.0025\n", + "Analysis finished at Wed Apr 8 16:40:39 2026\n", + "Total time elapsed: 7.0m:47.57s\n", + "\n", + "[2026-04-08 16:40:40,092] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:40:40,092] INFO:cellink.tl.external._sclinker_utils: [81/264] done: CD4 Naive_L2/100kb/chr1\n", + "[2026-04-08 16:40:40,100] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.21.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:40:43,919] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:35:12 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 0.5012\n", + "std 0.1426 3.7724\n", + "min 0.0051 -0.7132\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3538 0.0178\n", + "max 0.5000 99.3413\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0113\n", + "L2 -0.0113 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2630.9263\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 0.0052\n", + "std 0.0571\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0024\n", + "Analysis finished at Wed Apr 8 16:40:43 2026\n", + "Total time elapsed: 5.0m:30.97s\n", + "\n", + "[2026-04-08 16:40:43,920] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:40:43,920] INFO:cellink.tl.external._sclinker_utils: [82/264] done: CD4 Naive_L2/100kb/chr10\n", + "[2026-04-08 16:40:43,946] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.22.annot.gz --out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:41:08,676] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:33:53 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 6.5664e-01\n", + "std 0.1426 7.2053e+00\n", + "min 0.0051 -1.3688e+00\n", + "25% 0.1166 0.0000e+00\n", + "50% 0.2321 9.0755e-05\n", + "75% 0.3630 2.1570e-02\n", + "max 0.5000 2.6712e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0066\n", + "L2 0.0066 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3814.2083\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 0.0054\n", + "std 0.0523\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.8522\n", + "Analysis finished at Wed Apr 8 16:41:08 2026\n", + "Total time elapsed: 7.0m:14.88s\n", + "\n", + "[2026-04-08 16:41:08,688] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:41:08,689] INFO:cellink.tl.external._sclinker_utils: [83/264] done: CD4 Naive_L2/100kb/chr3\n", + "[2026-04-08 16:41:08,695] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.1.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:41:35,052] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:35:21 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 1.4721e+00\n", + "std 0.1420 1.0552e+01\n", + "min 0.0051 -1.1196e+00\n", + "25% 0.1155 0.0000e+00\n", + "50% 0.2301 1.7045e-05\n", + "75% 0.3609 3.7515e-02\n", + "max 0.5000 2.8103e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0543\n", + "L2 0.0543 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 6335.1205\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 0.0128\n", + "std 0.0960\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.1384\n", + "Analysis finished at Wed Apr 8 16:41:34 2026\n", + "Total time elapsed: 6.0m:13.24s\n", + "\n", + "[2026-04-08 16:41:35,053] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:41:35,053] INFO:cellink.tl.external._sclinker_utils: [84/264] done: CD4 Naive_L2/100kb/chr11\n", + "[2026-04-08 16:41:35,074] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.2.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:41:41,168] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:31:33 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 2.5486\n", + "std 0.1431 26.4756\n", + "min 0.0051 -9.8977\n", + "25% 0.1104 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3579 0.0057\n", + "max 0.5000 730.5967\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0074\n", + "L2 0.0074 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 16312.3868\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 0.0246\n", + "std 0.9392\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 116.0000\n", + "Analysis finished at Wed Apr 8 16:41:40 2026\n", + "Total time elapsed: 10.0m:7.07s\n", + "\n", + "[2026-04-08 16:41:41,170] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:41:41,172] INFO:cellink.tl.external._sclinker_utils: [85/264] done: CD4 Naive_L2/ABC_Road_BLD/chr6\n", + "[2026-04-08 16:41:41,174] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.3.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:12,110] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:40:07 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 5.8504\n", + "std 0.1423 54.0323\n", + "min 0.0051 -5.4437\n", + "25% 0.1115 0.0000\n", + "50% 0.2260 0.0000\n", + "75% 0.3569 0.0120\n", + "max 0.5000 1275.6777\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0025\n", + "L2 -0.0025 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 15496.927\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 0.0543\n", + "std 2.0092\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 153.3279\n", + "Analysis finished at Wed Apr 8 16:42:11 2026\n", + "Total time elapsed: 2.0m:4.23s\n", + "\n", + "[2026-04-08 16:42:12,112] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:12,112] INFO:cellink.tl.external._sclinker_utils: [86/264] done: CD4 TCM_L2/ABC_Road_BLD/chr18\n", + "[2026-04-08 16:42:12,116] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.4.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:13,736] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:40:42 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 8.9532\n", + "std 0.1414 75.7467\n", + "min 0.0051 -20.1743\n", + "25% 0.1186 0.0000\n", + "50% 0.2321 0.0026\n", + "75% 0.3650 0.3004\n", + "max 0.5000 2559.4290\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0381\n", + "L2 0.0381 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 22746.9963\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0000\n", + "mean 0.1640\n", + "std 5.0989\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 264.5061\n", + "Analysis finished at Wed Apr 8 16:42:13 2026\n", + "Total time elapsed: 1.0m:31.11s\n", + "\n", + "[2026-04-08 16:42:13,737] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:13,737] INFO:cellink.tl.external._sclinker_utils: [87/264] done: CD4 TCM_L2/ABC_Road_BLD/chr21\n", + "[2026-04-08 16:42:13,756] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.5.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:18,026] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:35 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 4.7050e+01\n", + "std 0.1424 7.7361e+02\n", + "min 0.0051 -1.8082e+02\n", + "25% 0.1074 0.0000e+00\n", + "50% 0.2198 3.4706e-05\n", + "75% 0.3507 3.5025e-02\n", + "max 0.5000 2.6295e+04\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0017\n", + "L2 0.0017 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 143066.2926\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 0.4406\n", + "std 25.7883\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1700.0000\n", + "Analysis finished at Wed Apr 8 16:42:17 2026\n", + "Total time elapsed: 2.0m:42.62s\n", + "\n", + "[2026-04-08 16:42:18,027] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:18,028] INFO:cellink.tl.external._sclinker_utils: [88/264] done: CD4 TCM_L2/ABC_Road_BLD/chr14\n", + "[2026-04-08 16:42:18,056] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.6.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:18,534] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:34:21 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 1.2114e+00\n", + "std 0.1421 8.4987e+00\n", + "min 0.0051 -1.3207e+00\n", + "25% 0.1125 0.0000e+00\n", + "50% 0.2280 7.3524e-05\n", + "75% 0.3569 3.8105e-02\n", + "max 0.5000 1.7155e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.038\n", + "L2 0.038 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 6062.0274\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 0.0096\n", + "std 0.0796\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0749\n", + "Analysis finished at Wed Apr 8 16:42:18 2026\n", + "Total time elapsed: 7.0m:57.14s\n", + "\n", + "[2026-04-08 16:42:18,535] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:18,536] INFO:cellink.tl.external._sclinker_utils: [89/264] done: CD4 Naive_L2/100kb/chr5\n", + "[2026-04-08 16:42:18,570] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.7.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:19,239] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:33:42 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 0.9834\n", + "std 0.1431 6.5748\n", + "min 0.0051 -0.9256\n", + "25% 0.1125 0.0000\n", + "50% 0.2290 0.0000\n", + "75% 0.3620 0.0576\n", + "max 0.5000 176.0743\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0047\n", + "L2 -0.0047 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 6100.7454\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 0.0073\n", + "std 0.0627\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0050\n", + "Analysis finished at Wed Apr 8 16:42:18 2026\n", + "Total time elapsed: 8.0m:36.18s\n", + "\n", + "[2026-04-08 16:42:19,241] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:19,241] INFO:cellink.tl.external._sclinker_utils: [90/264] done: CD4 Naive_L2/100kb/chr2\n", + "[2026-04-08 16:42:19,245] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.8.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:28,481] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:40:46 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 11.2506\n", + "std 0.1435 71.9070\n", + "min 0.0051 -16.5246\n", + "25% 0.1094 0.0000\n", + "50% 0.2280 0.0071\n", + "75% 0.3579 1.2982\n", + "max 0.5000 2523.8481\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0111\n", + "L2 0.0111 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 29537.4662\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0000\n", + "mean 0.2093\n", + "std 4.1678\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 149.6910\n", + "Analysis finished at Wed Apr 8 16:42:28 2026\n", + "Total time elapsed: 1.0m:42.1s\n", + "\n", + "[2026-04-08 16:42:28,485] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:28,485] INFO:cellink.tl.external._sclinker_utils: [91/264] done: CD4 TCM_L2/ABC_Road_BLD/chr22\n", + "[2026-04-08 16:42:28,492] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.9.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:41,065] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:14 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 5.1635\n", + "std 0.1423 107.6019\n", + "min 0.0051 -40.9821\n", + "25% 0.1115 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3569 0.0007\n", + "max 0.5000 8507.1141\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0093\n", + "L2 0.0093 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 41450.2157\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 0.0946\n", + "std 4.8795\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 379.0000\n", + "Analysis finished at Wed Apr 8 16:42:40 2026\n", + "Total time elapsed: 3.0m:25.87s\n", + "\n", + "[2026-04-08 16:42:41,067] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:41,067] INFO:cellink.tl.external._sclinker_utils: [92/264] done: CD4 TCM_L2/ABC_Road_BLD/chr9\n", + "[2026-04-08 16:42:41,071] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.10.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:54,639] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:40:20 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 2.6587\n", + "std 0.1420 31.0197\n", + "min 0.0051 -2.8764\n", + "25% 0.1145 0.0000\n", + "50% 0.2342 0.0003\n", + "75% 0.3569 0.0271\n", + "max 0.5000 917.2505\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0068\n", + "L2 -0.0068 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2367.6391\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 0.0102\n", + "std 0.4843\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 39.3091\n", + "Analysis finished at Wed Apr 8 16:42:54 2026\n", + "Total time elapsed: 2.0m:33.59s\n", + "\n", + "[2026-04-08 16:42:54,643] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:54,644] INFO:cellink.tl.external._sclinker_utils: [93/264] done: CD4 TCM_L2/ABC_Road_BLD/chr19\n", + "[2026-04-08 16:42:54,647] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.11.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:57,368] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:36 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 1.8487e+00\n", + "std 0.1441 1.3351e+01\n", + "min 0.0051 -2.9274e+00\n", + "25% 0.1104 0.0000e+00\n", + "50% 0.2270 4.5755e-05\n", + "75% 0.3640 5.9276e-02\n", + "max 0.5000 2.6159e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0104\n", + "L2 0.0104 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4129.9216\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0000\n", + "mean 0.0144\n", + "std 0.5369\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 41.5717\n", + "Analysis finished at Wed Apr 8 16:42:57 2026\n", + "Total time elapsed: 3.0m:20.25s\n", + "\n", + "[2026-04-08 16:42:57,369] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:42:57,369] INFO:cellink.tl.external._sclinker_utils: [94/264] done: CD4 TCM_L2/ABC_Road_BLD/chr15\n", + "[2026-04-08 16:42:57,382] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.12.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:43:02,929] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:33 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 5.2964\n", + "std 0.1428 82.5297\n", + "min 0.0051 -17.0962\n", + "25% 0.1074 0.0000\n", + "50% 0.2209 0.0003\n", + "75% 0.3548 0.0929\n", + "max 0.5000 3831.7892\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0111\n", + "L2 0.0111 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 20955.1894\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0000\n", + "mean 0.0572\n", + "std 2.3074\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 167.8410\n", + "Analysis finished at Wed Apr 8 16:43:02 2026\n", + "Total time elapsed: 3.0m:29.39s\n", + "\n", + "[2026-04-08 16:43:02,930] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:43:02,930] INFO:cellink.tl.external._sclinker_utils: [95/264] done: CD4 TCM_L2/ABC_Road_BLD/chr13\n", + "[2026-04-08 16:43:02,942] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.13.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:43:08,091] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:48 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 6.7069\n", + "std 0.1444 46.4069\n", + "min 0.0051 -5.9475\n", + "25% 0.1145 0.0000\n", + "50% 0.2352 0.0018\n", + "75% 0.3681 0.2277\n", + "max 0.5000 1109.0929\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0079\n", + "L2 0.0079 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 24597.3603\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 0.0914\n", + "std 2.2867\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 164.1209\n", + "Analysis finished at Wed Apr 8 16:43:07 2026\n", + "Total time elapsed: 3.0m:19.8s\n", + "\n", + "[2026-04-08 16:43:08,094] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:43:08,095] INFO:cellink.tl.external._sclinker_utils: [96/264] done: CD4 TCM_L2/ABC_Road_BLD/chr17\n", + "[2026-04-08 16:43:08,144] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.14.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:43:17,150] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:40:30 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 7.4911\n", + "std 0.1427 68.4188\n", + "min 0.0051 -6.1490\n", + "25% 0.1074 0.0000\n", + "50% 0.2229 0.0009\n", + "75% 0.3558 0.1212\n", + "max 0.5000 1866.1932\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0048\n", + "L2 -0.0048 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 17490.2277\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 0.0789\n", + "std 2.6883\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 208.8043\n", + "Analysis finished at Wed Apr 8 16:43:16 2026\n", + "Total time elapsed: 2.0m:46.3s\n", + "\n", + "[2026-04-08 16:43:17,153] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:43:17,153] INFO:cellink.tl.external._sclinker_utils: [97/264] done: CD4 TCM_L2/ABC_Road_BLD/chr20\n", + "[2026-04-08 16:43:17,173] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.15.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:43:35,681] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:46 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 5.2666e+00\n", + "std 0.1426 8.9265e+01\n", + "min 0.0051 -1.1249e+01\n", + "25% 0.1145 0.0000e+00\n", + "50% 0.2311 1.2532e-05\n", + "75% 0.3599 2.9960e-02\n", + "max 0.5000 2.4490e+03\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0235\n", + "L2 0.0235 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 14058.4316\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 0.0444\n", + "std 2.0740\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 129.2666\n", + "Analysis finished at Wed Apr 8 16:43:35 2026\n", + "Total time elapsed: 3.0m:48.7s\n", + "\n", + "[2026-04-08 16:43:35,683] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:43:35,683] INFO:cellink.tl.external._sclinker_utils: [98/264] done: CD4 TCM_L2/ABC_Road_BLD/chr16\n", + "[2026-04-08 16:43:35,745] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.16.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:44:40,583] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:31 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 8.6578e+00\n", + "std 0.1429 9.5091e+01\n", + "min 0.0051 -1.7400e+01\n", + "25% 0.1084 0.0000e+00\n", + "50% 0.2209 5.5417e-05\n", + "75% 0.3558 2.9111e-02\n", + "max 0.5000 3.9745e+03\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0059\n", + "L2 0.0059 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 36167.8062\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 0.0753\n", + "std 3.4190\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 321.4350\n", + "Analysis finished at Wed Apr 8 16:44:40 2026\n", + "Total time elapsed: 5.0m:8.21s\n", + "\n", + "[2026-04-08 16:44:40,606] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:44:40,606] INFO:cellink.tl.external._sclinker_utils: [99/264] done: CD4 TCM_L2/ABC_Road_BLD/chr12\n", + "[2026-04-08 16:44:40,611] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.17.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:45:09,848] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:13 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 6.4831\n", + "std 0.1431 80.7857\n", + "min 0.0051 -17.3032\n", + "25% 0.1115 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3609 0.0030\n", + "max 0.5000 2708.9368\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0465\n", + "L2 -0.0465 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 33382.2196\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 0.0607\n", + "std 3.2481\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 352.7255\n", + "Analysis finished at Wed Apr 8 16:45:09 2026\n", + "Total time elapsed: 5.0m:55.81s\n", + "\n", + "[2026-04-08 16:45:09,893] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:45:09,893] INFO:cellink.tl.external._sclinker_utils: [100/264] done: CD4 TCM_L2/ABC_Road_BLD/chr8\n", + "[2026-04-08 16:45:09,897] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.18.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:45:24,418] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:34:28 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/CD4_Naive_L2/100kb/CD4_Naive_L2.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 0.9671\n", + "std 0.1431 8.7041\n", + "min 0.0051 -0.8820\n", + "25% 0.1104 0.0000\n", + "50% 0.2249 0.0002\n", + "75% 0.3579 0.0301\n", + "max 0.5000 198.8430\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0122\n", + "L2 -0.0122 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 6700.4748\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 0.0101\n", + "std 0.0873\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0029\n", + "Analysis finished at Wed Apr 8 16:45:24 2026\n", + "Total time elapsed: 10.0m:55.28s\n", + "\n", + "[2026-04-08 16:45:24,502] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:45:24,504] INFO:cellink.tl.external._sclinker_utils: [101/264] done: CD4 Naive_L2/100kb/chr6\n", + "[2026-04-08 16:45:24,513] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.19.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:45:36,599] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:24 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 1.4545e+00\n", + "std 0.1426 1.0437e+01\n", + "min 0.0051 -2.4355e+00\n", + "25% 0.1074 0.0000e+00\n", + "50% 0.2198 3.2594e-05\n", + "75% 0.3538 1.3115e-02\n", + "max 0.5000 5.5155e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0027\n", + "L2 -0.0027 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 12905.9822\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 0.0253\n", + "std 0.9977\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 105.0000\n", + "Analysis finished at Wed Apr 8 16:45:36 2026\n", + "Total time elapsed: 6.0m:11.35s\n", + "\n", + "[2026-04-08 16:45:36,603] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:45:36,603] INFO:cellink.tl.external._sclinker_utils: [102/264] done: CD4 TCM_L2/ABC_Road_BLD/chr10\n", + "[2026-04-08 16:45:36,620] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.20.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:45:48,423] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:10 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 18.1237\n", + "std 0.1428 363.0904\n", + "min 0.0051 -95.5278\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0061\n", + "max 0.5000 13349.6574\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0003\n", + "L2 -0.0003 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 113263.165\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 0.1921\n", + "std 10.2149\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 958.1881\n", + "Analysis finished at Wed Apr 8 16:45:48 2026\n", + "Total time elapsed: 6.0m:37.16s\n", + "\n", + "[2026-04-08 16:45:48,425] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:45:48,426] INFO:cellink.tl.external._sclinker_utils: [103/264] done: CD4 TCM_L2/ABC_Road_BLD/chr7\n", + "[2026-04-08 16:45:48,435] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.21.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:45:51,173] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:42:30 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 0.6683\n", + "std 0.1423 5.6038\n", + "min 0.0051 -1.0009\n", + "25% 0.1115 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3569 0.0043\n", + "max 0.5000 156.8563\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0202\n", + "L2 0.0202 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5242.2869\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 0.0120\n", + "std 0.1006\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.1592\n", + "Analysis finished at Wed Apr 8 16:45:50 2026\n", + "Total time elapsed: 3.0m:20.46s\n", + "\n", + "[2026-04-08 16:45:51,175] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:45:51,175] INFO:cellink.tl.external._sclinker_utils: [104/264] done: CD4 TCM_L2/100kb/chr9\n", + "[2026-04-08 16:45:51,180] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.22.annot.gz --out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:46:09,843] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:38:56 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 2.6257\n", + "std 0.1422 25.8662\n", + "min 0.0051 -5.6222\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3579 0.0011\n", + "max 0.5000 829.4704\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0186\n", + "L2 -0.0186 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 14638.2843\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 0.0201\n", + "std 1.2179\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 264.0000\n", + "Analysis finished at Wed Apr 8 16:46:09 2026\n", + "Total time elapsed: 7.0m:12.6s\n", + "\n", + "[2026-04-08 16:46:09,844] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:46:09,844] INFO:cellink.tl.external._sclinker_utils: [105/264] done: CD4 TCM_L2/ABC_Road_BLD/chr4\n", + "[2026-04-08 16:46:09,874] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.1.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:46:41,173] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:43:05 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 1.0424\n", + "std 0.1428 6.8819\n", + "min 0.0051 -0.8836\n", + "25% 0.1074 0.0000\n", + "50% 0.2209 0.0002\n", + "75% 0.3548 0.0476\n", + "max 0.5000 165.6981\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0024\n", + "L2 0.0024 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4628.9121\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0000\n", + "mean 0.0126\n", + "std 0.0934\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:46:40 2026\n", + "Total time elapsed: 3.0m:35.38s\n", + "\n", + "[2026-04-08 16:46:41,183] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:46:41,183] INFO:cellink.tl.external._sclinker_utils: [106/264] done: CD4 TCM_L2/100kb/chr13\n", + "[2026-04-08 16:46:41,191] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.2.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:46:44,370] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:43:19 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 1.8444\n", + "std 0.1441 8.1853\n", + "min 0.0051 -0.7368\n", + "25% 0.1104 0.0000\n", + "50% 0.2270 0.0032\n", + "75% 0.3640 0.1422\n", + "max 0.5000 115.4847\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0251\n", + "L2 0.0251 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5527.2673\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0000\n", + "mean 0.0193\n", + "std 0.0967\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:46:44 2026\n", + "Total time elapsed: 3.0m:24.81s\n", + "\n", + "[2026-04-08 16:46:44,371] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:46:44,371] INFO:cellink.tl.external._sclinker_utils: [107/264] done: CD4 TCM_L2/100kb/chr15\n", + "[2026-04-08 16:46:44,408] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.3.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:00,888] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:43:10 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 1.4418\n", + "std 0.1424 10.3345\n", + "min 0.0051 -1.1179\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0009\n", + "75% 0.3507 0.0484\n", + "max 0.5000 182.0024\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0066\n", + "L2 -0.0066 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4976.0522\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 0.0153\n", + "std 0.1171\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.1005\n", + "Analysis finished at Wed Apr 8 16:47:00 2026\n", + "Total time elapsed: 3.0m:50.58s\n", + "\n", + "[2026-04-08 16:47:00,889] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:00,890] INFO:cellink.tl.external._sclinker_utils: [108/264] done: CD4 TCM_L2/100kb/chr14\n", + "[2026-04-08 16:47:00,896] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.4.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:05,261] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:45:50 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 1.6572\n", + "std 0.1414 10.0515\n", + "min 0.0051 -0.8704\n", + "25% 0.1186 0.0000\n", + "50% 0.2321 0.0000\n", + "75% 0.3650 0.0381\n", + "max 0.5000 118.2414\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0538\n", + "L2 0.0538 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2162.49\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0000\n", + "mean 0.0156\n", + "std 0.1100\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:47:05 2026\n", + "Total time elapsed: 1.0m:14.45s\n", + "\n", + "[2026-04-08 16:47:05,263] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:05,263] INFO:cellink.tl.external._sclinker_utils: [109/264] done: CD4 TCM_L2/100kb/chr21\n", + "[2026-04-08 16:47:05,268] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.5.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:15,674] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:43:37 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 0.3384\n", + "std 0.1426 2.3347\n", + "min 0.0051 -0.1842\n", + "25% 0.1145 0.0000\n", + "50% 0.2311 0.0005\n", + "75% 0.3599 0.0583\n", + "max 0.5000 45.9210\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0386\n", + "L2 0.0386 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1295.4328\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 0.0041\n", + "std 0.0255\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.3256\n", + "Analysis finished at Wed Apr 8 16:47:15 2026\n", + "Total time elapsed: 3.0m:37.73s\n", + "\n", + "[2026-04-08 16:47:15,676] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:15,677] INFO:cellink.tl.external._sclinker_utils: [110/264] done: CD4 TCM_L2/100kb/chr16\n", + "[2026-04-08 16:47:15,721] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.6.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:16,058] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:45:39 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 0.4850\n", + "std 0.1427 3.6368\n", + "min 0.0051 -0.7043\n", + "25% 0.1074 0.0000\n", + "50% 0.2229 0.0000\n", + "75% 0.3558 0.0283\n", + "max 0.5000 110.3180\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0256\n", + "L2 0.0256 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1510.1834\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 0.0068\n", + "std 0.0649\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:47:15 2026\n", + "Total time elapsed: 1.0m:36.45s\n", + "\n", + "[2026-04-08 16:47:16,066] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:16,067] INFO:cellink.tl.external._sclinker_utils: [111/264] done: CD4 TCM_L2/100kb/chr20\n", + "[2026-04-08 16:47:16,070] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.7.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:19,091] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:42:59 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 2.0837\n", + "std 0.1429 12.4124\n", + "min 0.0051 -1.6904\n", + "25% 0.1084 0.0000\n", + "50% 0.2209 0.0002\n", + "75% 0.3558 0.0341\n", + "max 0.5000 201.0216\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0135\n", + "L2 0.0135 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 9867.3488\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 0.0206\n", + "std 0.1354\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0757\n", + "Analysis finished at Wed Apr 8 16:47:18 2026\n", + "Total time elapsed: 4.0m:19.51s\n", + "\n", + "[2026-04-08 16:47:19,093] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:19,093] INFO:cellink.tl.external._sclinker_utils: [112/264] done: CD4 TCM_L2/100kb/chr12\n", + "[2026-04-08 16:47:19,099] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.8.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:27,641] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:45:53 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 2.0786\n", + "std 0.1435 6.0769\n", + "min 0.0051 -0.5284\n", + "25% 0.1094 0.0000\n", + "50% 0.2280 0.0039\n", + "75% 0.3579 0.4594\n", + "max 0.5000 47.6512\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0313\n", + "L2 0.0313 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3768.4013\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0000\n", + "mean 0.0267\n", + "std 0.1057\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0004\n", + "max 0.7690\n", + "Analysis finished at Wed Apr 8 16:47:27 2026\n", + "Total time elapsed: 1.0m:33.77s\n", + "\n", + "[2026-04-08 16:47:27,646] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:27,647] INFO:cellink.tl.external._sclinker_utils: [113/264] done: CD4 TCM_L2/100kb/chr22\n", + "[2026-04-08 16:47:27,656] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.9.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:31,015] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:42:21 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 1.6434\n", + "std 0.1431 10.1647\n", + "min 0.0051 -1.0053\n", + "25% 0.1115 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3609 0.0160\n", + "max 0.5000 212.4231\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0106\n", + "L2 0.0106 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 9976.6909\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 0.0181\n", + "std 0.1095\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0045\n", + "Analysis finished at Wed Apr 8 16:47:30 2026\n", + "Total time elapsed: 5.0m:9.36s\n", + "\n", + "[2026-04-08 16:47:31,017] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:31,017] INFO:cellink.tl.external._sclinker_utils: [114/264] done: CD4 TCM_L2/100kb/chr8\n", + "[2026-04-08 16:47:31,024] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.10.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:31,798] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:44:45 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 3.5195\n", + "std 0.1444 16.7127\n", + "min 0.0051 -1.0505\n", + "25% 0.1145 0.0000\n", + "50% 0.2352 0.0011\n", + "75% 0.3681 0.1840\n", + "max 0.5000 219.0104\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0343\n", + "L2 0.0343 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7509.5162\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 0.0279\n", + "std 0.1290\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:47:31 2026\n", + "Total time elapsed: 2.0m:46.25s\n", + "\n", + "[2026-04-08 16:47:31,799] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:31,799] INFO:cellink.tl.external._sclinker_utils: [115/264] done: CD4 TCM_L2/100kb/chr17\n", + "[2026-04-08 16:47:31,802] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.11.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:41,059] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:45:12 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 0.7354\n", + "std 0.1423 7.6938\n", + "min 0.0051 -1.1378\n", + "25% 0.1115 0.0000\n", + "50% 0.2260 0.0000\n", + "75% 0.3569 0.0084\n", + "max 0.5000 186.8329\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0028\n", + "L2 0.0028 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2575.8858\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 0.0090\n", + "std 0.0898\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0548\n", + "Analysis finished at Wed Apr 8 16:47:40 2026\n", + "Total time elapsed: 2.0m:28.38s\n", + "\n", + "[2026-04-08 16:47:41,062] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:41,062] INFO:cellink.tl.external._sclinker_utils: [116/264] done: CD4 TCM_L2/100kb/chr18\n", + "[2026-04-08 16:47:41,084] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.12.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:51,113] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:42:43 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 0.7737\n", + "std 0.1426 5.7740\n", + "min 0.0051 -0.7990\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0001\n", + "75% 0.3538 0.0305\n", + "max 0.5000 107.3423\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0198\n", + "L2 0.0198 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4222.0863\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 0.0083\n", + "std 0.0759\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0036\n", + "Analysis finished at Wed Apr 8 16:47:50 2026\n", + "Total time elapsed: 5.0m:7.87s\n", + "\n", + "[2026-04-08 16:47:51,114] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:51,115] INFO:cellink.tl.external._sclinker_utils: [117/264] done: CD4 TCM_L2/100kb/chr10\n", + "[2026-04-08 16:47:51,118] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.13.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:52,869] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:27 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 8.7461\n", + "std 0.1420 155.8581\n", + "min 0.0051 -37.2767\n", + "25% 0.1155 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0023\n", + "max 0.5000 8351.4684\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0172\n", + "L2 -0.0172 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 36200.4678\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 0.0733\n", + "std 8.6762\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1531.2637\n", + "Analysis finished at Wed Apr 8 16:47:52 2026\n", + "Total time elapsed: 8.0m:24.99s\n", + "\n", + "[2026-04-08 16:47:52,872] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:52,873] INFO:cellink.tl.external._sclinker_utils: [118/264] done: CD4 TCM_L2/ABC_Road_BLD/chr11\n", + "[2026-04-08 16:47:52,939] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.14.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:53,686] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:38:50 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 0.8159\n", + "std 0.1431 12.4600\n", + "min 0.0051 -1.8642\n", + "25% 0.1125 0.0000\n", + "50% 0.2290 0.0000\n", + "75% 0.3620 0.0061\n", + "max 0.5000 646.7332\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0065\n", + "L2 -0.0065 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5061.3286\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 0.0060\n", + "std 0.4101\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 122.4124\n", + "Analysis finished at Wed Apr 8 16:47:53 2026\n", + "Total time elapsed: 9.0m:2.7s\n", + "\n", + "[2026-04-08 16:47:53,690] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:47:53,691] INFO:cellink.tl.external._sclinker_utils: [119/264] done: CD4 TCM_L2/ABC_Road_BLD/chr2\n", + "[2026-04-08 16:47:53,705] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.15.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:48:14,949] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:38:42 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 6.4618\n", + "std 0.1432 87.1589\n", + "min 0.0051 -28.3271\n", + "25% 0.1084 0.0000\n", + "50% 0.2239 0.0003\n", + "75% 0.3558 0.0359\n", + "max 0.5000 6225.3993\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0157\n", + "L2 0.0157 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 57267.2014\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 0.0735\n", + "std 5.2240\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 722.1040\n", + "Analysis finished at Wed Apr 8 16:48:14 2026\n", + "Total time elapsed: 9.0m:31.94s\n", + "\n", + "[2026-04-08 16:48:14,952] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:48:14,952] INFO:cellink.tl.external._sclinker_utils: [120/264] done: CD4 TCM_L2/ABC_Road_BLD/chr1\n", + "[2026-04-08 16:48:14,963] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.16.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:48:39,020] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:42:14 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 1.6431\n", + "std 0.1422 15.2086\n", + "min 0.0051 -1.2292\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3579 0.0078\n", + "max 0.5000 324.2594\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.027\n", + "L2 -0.027 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5218.4771\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 0.0072\n", + "std 0.0756\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0073\n", + "Analysis finished at Wed Apr 8 16:48:38 2026\n", + "Total time elapsed: 6.0m:24.61s\n", + "\n", + "[2026-04-08 16:48:39,021] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:48:39,021] INFO:cellink.tl.external._sclinker_utils: [121/264] done: CD4 TCM_L2/100kb/chr4\n", + "[2026-04-08 16:48:39,048] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.17.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:48:39,767] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:02 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 1.4043\n", + "std 0.1421 13.4184\n", + "min 0.0051 -3.4072\n", + "25% 0.1125 0.0000\n", + "50% 0.2280 0.0003\n", + "75% 0.3569 0.0321\n", + "max 0.5000 579.6280\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0236\n", + "L2 0.0236 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 10974.3465\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 0.0173\n", + "std 0.7076\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 86.0688\n", + "Analysis finished at Wed Apr 8 16:48:39 2026\n", + "Total time elapsed: 9.0m:36.42s\n", + "\n", + "[2026-04-08 16:48:39,769] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:48:39,770] INFO:cellink.tl.external._sclinker_utils: [122/264] done: CD4 TCM_L2/ABC_Road_BLD/chr5\n", + "[2026-04-08 16:48:39,788] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.18.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:48:54,808] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:45:26 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 0.8293\n", + "std 0.1420 3.9776\n", + "min 0.0051 -0.3522\n", + "25% 0.1145 0.0000\n", + "50% 0.2342 0.0089\n", + "75% 0.3569 0.1533\n", + "max 0.5000 52.4218\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0386\n", + "L2 0.0386 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1881.5607\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 0.0081\n", + "std 0.0540\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.6882\n", + "Analysis finished at Wed Apr 8 16:48:54 2026\n", + "Total time elapsed: 3.0m:27.9s\n", + "\n", + "[2026-04-08 16:48:54,809] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:48:54,810] INFO:cellink.tl.external._sclinker_utils: [123/264] done: CD4 TCM_L2/100kb/chr19\n", + "[2026-04-08 16:48:54,813] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.19.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:48:58,879] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:41:10 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 1.9042\n", + "std 0.1432 12.3480\n", + "min 0.0051 -1.0396\n", + "25% 0.1084 0.0000\n", + "50% 0.2239 0.0007\n", + "75% 0.3558 0.0531\n", + "max 0.5000 239.8233\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0338\n", + "L2 0.0338 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 10531.0536\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 0.0135\n", + "std 0.1004\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.5668\n", + "Analysis finished at Wed Apr 8 16:48:58 2026\n", + "Total time elapsed: 7.0m:47.75s\n", + "\n", + "[2026-04-08 16:48:58,881] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:48:58,881] INFO:cellink.tl.external._sclinker_utils: [124/264] done: CD4 TCM_L2/100kb/chr1\n", + "[2026-04-08 16:48:58,912] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.20.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:49:04,970] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:39:03 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 3.6108\n", + "std 0.1431 41.7648\n", + "min 0.0051 -8.4911\n", + "25% 0.1104 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3579 0.0061\n", + "max 0.5000 1618.1379\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0049\n", + "L2 0.0049 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 23649.6245\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 0.0356\n", + "std 1.2254\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 127.4433\n", + "Analysis finished at Wed Apr 8 16:49:04 2026\n", + "Total time elapsed: 10.0m:0.89s\n", + "\n", + "[2026-04-08 16:49:04,971] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:49:04,971] INFO:cellink.tl.external._sclinker_utils: [125/264] done: CD4 TCM_L2/ABC_Road_BLD/chr6\n", + "[2026-04-08 16:49:04,974] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.21.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:49:10,665] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:38:51 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 1.3960\n", + "std 0.1426 19.1742\n", + "min 0.0051 -4.0383\n", + "25% 0.1166 0.0000\n", + "50% 0.2321 0.0009\n", + "75% 0.3630 0.0239\n", + "max 0.5000 885.2224\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0241\n", + "L2 0.0241 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7027.255\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 0.0099\n", + "std 0.6908\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 129.9885\n", + "Analysis finished at Wed Apr 8 16:49:10 2026\n", + "Total time elapsed: 10.0m:19.06s\n", + "\n", + "[2026-04-08 16:49:10,667] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:49:10,668] INFO:cellink.tl.external._sclinker_utils: [126/264] done: CD4 TCM_L2/ABC_Road_BLD/chr3\n", + "[2026-04-08 16:49:10,672] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.22.annot.gz --out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:49:16,605] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:42:20 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 1.5857e+00\n", + "std 0.1428 1.1843e+01\n", + "min 0.0051 -1.0212e+00\n", + "25% 0.1125 0.0000e+00\n", + "50% 0.2301 2.2818e-05\n", + "75% 0.3609 1.8295e-02\n", + "max 0.5000 2.9061e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0171\n", + "L2 -0.0171 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 10976.4559\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 0.0186\n", + "std 0.1556\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2.0156\n", + "Analysis finished at Wed Apr 8 16:49:16 2026\n", + "Total time elapsed: 6.0m:55.72s\n", + "\n", + "[2026-04-08 16:49:16,607] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:49:16,607] INFO:cellink.tl.external._sclinker_utils: [127/264] done: CD4 TCM_L2/100kb/chr7\n", + "[2026-04-08 16:49:16,633] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.1.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:49:47,680] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:55 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 0.8087\n", + "std 0.1424 28.6410\n", + "min 0.0051 -3.7221\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3507 0.0000\n", + "max 0.5000 1534.1516\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0271\n", + "L2 -0.0271 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3067.2644\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 0.0094\n", + "std 0.6304\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 51.9796\n", + "Analysis finished at Wed Apr 8 16:49:47 2026\n", + "Total time elapsed: 1.0m:52.17s\n", + "\n", + "[2026-04-08 16:49:47,681] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:49:47,683] INFO:cellink.tl.external._sclinker_utils: [128/264] done: CD8 TEM_L2/ABC_Road_BLD/chr14\n", + "[2026-04-08 16:49:47,685] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.2.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:49:48,988] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:41:37 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 1.1835\n", + "std 0.1431 7.8297\n", + "min 0.0051 -1.0176\n", + "25% 0.1125 0.0000\n", + "50% 0.2290 0.0002\n", + "75% 0.3620 0.0585\n", + "max 0.5000 191.5739\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.009\n", + "L2 -0.009 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 8100.7836\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 0.0096\n", + "std 0.0761\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0226\n", + "Analysis finished at Wed Apr 8 16:49:48 2026\n", + "Total time elapsed: 8.0m:11.22s\n", + "\n", + "[2026-04-08 16:49:49,012] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:49:49,013] INFO:cellink.tl.external._sclinker_utils: [129/264] done: CD4 TCM_L2/100kb/chr2\n", + "[2026-04-08 16:49:49,020] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.3.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:13,861] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:49:07 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 1.0250e+00\n", + "std 0.1414 9.2635e+00\n", + "min 0.0051 -1.2703e+00\n", + "25% 0.1186 0.0000e+00\n", + "50% 0.2321 8.4890e-05\n", + "75% 0.3650 2.1108e-02\n", + "max 0.5000 2.3063e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0494\n", + "L2 0.0494 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1807.1289\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0000\n", + "mean 0.0130\n", + "std 0.4602\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 32.6826\n", + "Analysis finished at Wed Apr 8 16:50:13 2026\n", + "Total time elapsed: 1.0m:6.55s\n", + "\n", + "[2026-04-08 16:50:13,864] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:13,864] INFO:cellink.tl.external._sclinker_utils: [130/264] done: CD8 TEM_L2/ABC_Road_BLD/chr21\n", + "[2026-04-08 16:50:13,896] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.4.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:35,271] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:56 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 0.3653\n", + "std 0.1441 2.3938\n", + "min 0.0051 -0.3410\n", + "25% 0.1104 0.0000\n", + "50% 0.2270 0.0000\n", + "75% 0.3640 0.0004\n", + "max 0.5000 32.7230\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0159\n", + "L2 -0.0159 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 839.033\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0000\n", + "mean 0.0029\n", + "std 0.0977\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 8.5168\n", + "Analysis finished at Wed Apr 8 16:50:34 2026\n", + "Total time elapsed: 2.0m:38.9s\n", + "\n", + "[2026-04-08 16:50:35,367] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:35,368] INFO:cellink.tl.external._sclinker_utils: [131/264] done: CD8 TEM_L2/ABC_Road_BLD/chr15\n", + "[2026-04-08 16:50:35,375] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.5.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:39,534] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:49:12 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 3.0439e+00\n", + "std 0.1435 2.7536e+01\n", + "min 0.0051 -4.0108e+00\n", + "25% 0.1094 0.0000e+00\n", + "50% 0.2280 6.1640e-06\n", + "75% 0.3579 1.2590e-02\n", + "max 0.5000 6.0309e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0215\n", + "L2 0.0215 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 6494.9056\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0000\n", + "mean 0.0460\n", + "std 1.5642\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 145.0000\n", + "Analysis finished at Wed Apr 8 16:50:39 2026\n", + "Total time elapsed: 1.0m:26.33s\n", + "\n", + "[2026-04-08 16:50:39,537] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:39,537] INFO:cellink.tl.external._sclinker_utils: [132/264] done: CD8 TEM_L2/ABC_Road_BLD/chr22\n", + "[2026-04-08 16:50:39,563] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.6.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:44,660] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:41:43 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 0.5727\n", + "std 0.1426 5.9483\n", + "min 0.0051 -1.0890\n", + "25% 0.1166 0.0000\n", + "50% 0.2321 0.0009\n", + "75% 0.3630 0.0445\n", + "max 0.5000 226.3067\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0095\n", + "L2 0.0095 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2902.0938\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 0.0041\n", + "std 0.0380\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.7220\n", + "Analysis finished at Wed Apr 8 16:50:44 2026\n", + "Total time elapsed: 9.0m:1.05s\n", + "\n", + "[2026-04-08 16:50:44,663] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:44,663] INFO:cellink.tl.external._sclinker_utils: [133/264] done: CD4 TCM_L2/100kb/chr3\n", + "[2026-04-08 16:50:44,708] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.7.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:47,579] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:30 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 0.8184\n", + "std 0.1423 18.0666\n", + "min 0.0051 -6.4575\n", + "25% 0.1115 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3569 0.0002\n", + "max 0.5000 934.1412\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0145\n", + "L2 0.0145 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7511.0152\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 0.0171\n", + "std 1.1767\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 146.1268\n", + "Analysis finished at Wed Apr 8 16:50:47 2026\n", + "Total time elapsed: 3.0m:16.9s\n", + "\n", + "[2026-04-08 16:50:47,584] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:47,585] INFO:cellink.tl.external._sclinker_utils: [134/264] done: CD8 TEM_L2/ABC_Road_BLD/chr9\n", + "[2026-04-08 16:50:47,593] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.8.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:58,872] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:42:15 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 0.7317\n", + "std 0.1421 5.1896\n", + "min 0.0051 -0.7514\n", + "25% 0.1125 0.0000\n", + "50% 0.2280 0.0008\n", + "75% 0.3569 0.0358\n", + "max 0.5000 125.5363\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0232\n", + "L2 0.0232 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3944.8652\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 0.0062\n", + "std 0.0550\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:50:58 2026\n", + "Total time elapsed: 8.0m:42.86s\n", + "\n", + "[2026-04-08 16:50:58,879] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:50:58,880] INFO:cellink.tl.external._sclinker_utils: [135/264] done: CD4 TCM_L2/100kb/chr5\n", + "[2026-04-08 16:50:58,899] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.9.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:00,087] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:48:42 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 1.4493e+00\n", + "std 0.1423 4.0445e+01\n", + "min 0.0051 -1.2339e+01\n", + "25% 0.1115 0.0000e+00\n", + "50% 0.2260 0.0000e+00\n", + "75% 0.3569 1.5306e-05\n", + "max 0.5000 3.2439e+03\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0094\n", + "L2 -0.0094 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 20234.7428\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 0.0710\n", + "std 4.7049\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 394.8970\n", + "Analysis finished at Wed Apr 8 16:50:59 2026\n", + "Total time elapsed: 2.0m:17.67s\n", + "\n", + "[2026-04-08 16:51:00,089] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:00,089] INFO:cellink.tl.external._sclinker_utils: [136/264] done: CD8 TEM_L2/ABC_Road_BLD/chr18\n", + "[2026-04-08 16:51:00,129] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.10.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:09,046] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:49:00 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 6.7034e-02\n", + "std 0.1427 5.1744e-01\n", + "min 0.0051 -4.4518e-02\n", + "25% 0.1074 0.0000e+00\n", + "50% 0.2229 2.4971e-05\n", + "75% 0.3558 4.7150e-03\n", + "max 0.5000 1.4069e+01\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0077\n", + "L2 -0.0077 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 162.0949\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 0.0007\n", + "std 0.0256\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2.2589\n", + "Analysis finished at Wed Apr 8 16:51:08 2026\n", + "Total time elapsed: 2.0m:7.92s\n", + "\n", + "[2026-04-08 16:51:09,052] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:09,052] INFO:cellink.tl.external._sclinker_utils: [137/264] done: CD8 TEM_L2/ABC_Road_BLD/chr20\n", + "[2026-04-08 16:51:09,059] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.11.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:14,365] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:42:56 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 1.6928\n", + "std 0.1420 11.6619\n", + "min 0.0051 -1.2831\n", + "25% 0.1155 0.0000\n", + "50% 0.2301 0.0008\n", + "75% 0.3609 0.0497\n", + "max 0.5000 324.2978\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0585\n", + "L2 0.0585 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7692.4899\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 0.0156\n", + "std 0.1156\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.3137\n", + "Analysis finished at Wed Apr 8 16:51:14 2026\n", + "Total time elapsed: 8.0m:17.55s\n", + "\n", + "[2026-04-08 16:51:14,370] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:14,372] INFO:cellink.tl.external._sclinker_utils: [138/264] done: CD4 TCM_L2/100kb/chr11\n", + "[2026-04-08 16:51:14,377] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.12.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:18,826] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:43 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 0.3589\n", + "std 0.1429 4.4312\n", + "min 0.0051 -2.0775\n", + "25% 0.1084 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3558 0.0002\n", + "max 0.5000 221.8414\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0193\n", + "L2 0.0193 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5436.0187\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 0.0113\n", + "std 0.7161\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 108.1908\n", + "Analysis finished at Wed Apr 8 16:51:18 2026\n", + "Total time elapsed: 3.0m:35.46s\n", + "\n", + "[2026-04-08 16:51:18,828] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:18,828] INFO:cellink.tl.external._sclinker_utils: [139/264] done: CD8 TEM_L2/ABC_Road_BLD/chr12\n", + "[2026-04-08 16:51:18,843] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.13.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:23,652] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:02 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 1.0757\n", + "std 0.1422 15.0527\n", + "min 0.0051 -1.2962\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3579 0.0000\n", + "max 0.5000 698.6973\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0022\n", + "L2 -0.0022 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 10003.7603\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 0.0137\n", + "std 0.9662\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 155.6528\n", + "Analysis finished at Wed Apr 8 16:51:23 2026\n", + "Total time elapsed: 4.0m:20.66s\n", + "\n", + "[2026-04-08 16:51:23,653] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:23,653] INFO:cellink.tl.external._sclinker_utils: [140/264] done: CD8 TEM_L2/ABC_Road_BLD/chr4\n", + "[2026-04-08 16:51:23,656] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.14.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:38,136] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:48:57 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 1.9774e-01\n", + "std 0.1420 1.6666e+00\n", + "min 0.0051 -3.9916e-01\n", + "25% 0.1145 0.0000e+00\n", + "50% 0.2342 2.8787e-06\n", + "75% 0.3569 3.0076e-03\n", + "max 0.5000 3.9417e+01\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0154\n", + "L2 -0.0154 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 621.8147\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 0.0027\n", + "std 0.1283\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 14.7928\n", + "Analysis finished at Wed Apr 8 16:51:37 2026\n", + "Total time elapsed: 2.0m:40.67s\n", + "\n", + "[2026-04-08 16:51:38,141] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:38,141] INFO:cellink.tl.external._sclinker_utils: [141/264] done: CD8 TEM_L2/ABC_Road_BLD/chr19\n", + "[2026-04-08 16:51:38,160] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.15.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:43,980] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:48:41 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 1.0142\n", + "std 0.1444 17.1686\n", + "min 0.0051 -1.3079\n", + "25% 0.1145 0.0000\n", + "50% 0.2352 0.0000\n", + "75% 0.3681 0.0074\n", + "max 0.5000 528.8850\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0054\n", + "L2 0.0054 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1657.8277\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 0.0062\n", + "std 0.2768\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 22.2046\n", + "Analysis finished at Wed Apr 8 16:51:43 2026\n", + "Total time elapsed: 3.0m:2.23s\n", + "\n", + "[2026-04-08 16:51:44,048] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:51:44,048] INFO:cellink.tl.external._sclinker_utils: [142/264] done: CD8 TEM_L2/ABC_Road_BLD/chr17\n", + "[2026-04-08 16:51:44,054] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.16.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:01,438] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:48:17 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 1.0236\n", + "std 0.1426 22.1060\n", + "min 0.0051 -3.5404\n", + "25% 0.1145 0.0000\n", + "50% 0.2311 0.0000\n", + "75% 0.3599 0.0022\n", + "max 0.5000 826.6002\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0331\n", + "L2 0.0331 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3790.8711\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 0.0120\n", + "std 0.7724\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 118.7001\n", + "Analysis finished at Wed Apr 8 16:52:01 2026\n", + "Total time elapsed: 3.0m:43.83s\n", + "\n", + "[2026-04-08 16:52:01,440] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:01,441] INFO:cellink.tl.external._sclinker_utils: [143/264] done: CD8 TEM_L2/ABC_Road_BLD/chr16\n", + "[2026-04-08 16:52:01,477] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.17.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:20,075] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:46:46 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 0.0301\n", + "std 0.1426 0.2424\n", + "min 0.0051 -0.0546\n", + "25% 0.1166 0.0000\n", + "50% 0.2321 0.0000\n", + "75% 0.3630 0.0005\n", + "max 0.5000 12.4101\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0243\n", + "L2 0.0243 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 267.9553\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 0.0004\n", + "std 0.0197\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 4.0263\n", + "Analysis finished at Wed Apr 8 16:52:19 2026\n", + "Total time elapsed: 5.0m:33.12s\n", + "\n", + "[2026-04-08 16:52:20,078] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:20,078] INFO:cellink.tl.external._sclinker_utils: [144/264] done: CD8 TEM_L2/ABC_Road_BLD/chr3\n", + "[2026-04-08 16:52:20,087] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.18.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:27,284] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:53 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 0.0306\n", + "std 0.1428 0.2361\n", + "min 0.0051 -0.0303\n", + "25% 0.1074 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3548 0.0016\n", + "max 0.5000 8.4957\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0188\n", + "L2 0.0188 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 162.827\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0000\n", + "mean 0.0004\n", + "std 0.0195\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2.1342\n", + "Analysis finished at Wed Apr 8 16:52:27 2026\n", + "Total time elapsed: 4.0m:33.66s\n", + "\n", + "[2026-04-08 16:52:27,285] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:27,285] INFO:cellink.tl.external._sclinker_utils: [145/264] done: CD8 TEM_L2/ABC_Road_BLD/chr13\n", + "[2026-04-08 16:52:27,289] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.19.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:29,960] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:33 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 1.8170\n", + "std 0.1426 17.8613\n", + "min 0.0051 -5.0421\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3538 0.0038\n", + "max 0.5000 906.7351\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0238\n", + "L2 0.0238 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 9471.6852\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 0.0186\n", + "std 0.9652\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 133.8353\n", + "Analysis finished at Wed Apr 8 16:52:29 2026\n", + "Total time elapsed: 4.0m:56.56s\n", + "\n", + "[2026-04-08 16:52:29,961] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:29,961] INFO:cellink.tl.external._sclinker_utils: [146/264] done: CD8 TEM_L2/ABC_Road_BLD/chr10\n", + "[2026-04-08 16:52:29,978] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.20.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:37,199] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:42:20 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/CD4_TCM_L2/100kb/CD4_TCM_L2.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 1.4900e+00\n", + "std 0.1431 9.8646e+00\n", + "min 0.0051 -1.3625e+00\n", + "25% 0.1104 0.0000e+00\n", + "50% 0.2249 4.1837e-05\n", + "75% 0.3579 2.1283e-02\n", + "max 0.5000 1.9101e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0078\n", + "L2 -0.0078 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 8245.9777\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 0.0124\n", + "std 0.0989\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.5343\n", + "Analysis finished at Wed Apr 8 16:52:36 2026\n", + "Total time elapsed: 10.0m:16.82s\n", + "\n", + "[2026-04-08 16:52:37,202] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:37,202] INFO:cellink.tl.external._sclinker_utils: [147/264] done: CD4 TCM_L2/100kb/chr6\n", + "[2026-04-08 16:52:37,234] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.21.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:53,322] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:17 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 12.5172\n", + "std 0.1428 177.0289\n", + "min 0.0051 -44.4153\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0150\n", + "max 0.5000 6206.8609\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0025\n", + "L2 0.0025 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 62760.3492\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 0.1065\n", + "std 4.9861\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 445.4757\n", + "Analysis finished at Wed Apr 8 16:52:53 2026\n", + "Total time elapsed: 5.0m:35.16s\n", + "\n", + "[2026-04-08 16:52:53,323] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:52:53,323] INFO:cellink.tl.external._sclinker_utils: [148/264] done: CD8 TEM_L2/ABC_Road_BLD/chr7\n", + "[2026-04-08 16:52:53,328] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.22.annot.gz --out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:02,527] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:46:12 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 1.8851\n", + "std 0.1432 40.0162\n", + "min 0.0051 -10.6045\n", + "25% 0.1084 0.0000\n", + "50% 0.2239 0.0000\n", + "75% 0.3558 0.0007\n", + "max 0.5000 2646.2747\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0042\n", + "L2 0.0042 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 23998.8498\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 0.0308\n", + "std 2.2366\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 314.0000\n", + "Analysis finished at Wed Apr 8 16:53:02 2026\n", + "Total time elapsed: 6.0m:50.16s\n", + "\n", + "[2026-04-08 16:53:02,528] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:02,528] INFO:cellink.tl.external._sclinker_utils: [149/264] done: CD8 TEM_L2/ABC_Road_BLD/chr1\n", + "[2026-04-08 16:53:02,575] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.1.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:05,488] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:33 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 13.3390\n", + "std 0.1420 266.0236\n", + "min 0.0051 -61.6745\n", + "25% 0.1155 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0005\n", + "max 0.5000 13868.7449\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0059\n", + "L2 -0.0059 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 46389.6456\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 0.0939\n", + "std 12.5768\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2208.0990\n", + "Analysis finished at Wed Apr 8 16:53:05 2026\n", + "Total time elapsed: 5.0m:31.39s\n", + "\n", + "[2026-04-08 16:53:05,490] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:05,490] INFO:cellink.tl.external._sclinker_utils: [150/264] done: CD8 TEM_L2/ABC_Road_BLD/chr11\n", + "[2026-04-08 16:53:05,493] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.2.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:26,403] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:51:00 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 0.3756\n", + "std 0.1423 5.9195\n", + "min 0.0051 -1.7075\n", + "25% 0.1115 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3569 0.0005\n", + "max 0.5000 195.9294\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.017\n", + "L2 -0.017 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2040.4671\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 0.0047\n", + "std 0.0568\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.8452\n", + "Analysis finished at Wed Apr 8 16:53:26 2026\n", + "Total time elapsed: 2.0m:25.26s\n", + "\n", + "[2026-04-08 16:53:26,408] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:26,409] INFO:cellink.tl.external._sclinker_utils: [151/264] done: CD8 TEM_L2/100kb/chr9\n", + "[2026-04-08 16:53:26,416] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.3.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:30,586] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:52:39 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 0.5807\n", + "std 0.1414 3.9553\n", + "min 0.0051 -0.3721\n", + "25% 0.1186 0.0000\n", + "50% 0.2321 0.0000\n", + "75% 0.3650 0.0075\n", + "max 0.5000 50.9360\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.045\n", + "L2 0.045 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 751.1648\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0000\n", + "mean 0.0054\n", + "std 0.0387\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.3307\n", + "Analysis finished at Wed Apr 8 16:53:30 2026\n", + "Total time elapsed: 50.88s\n", + "\n", + "[2026-04-08 16:53:30,587] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:30,587] INFO:cellink.tl.external._sclinker_utils: [152/264] done: CD8 TEM_L2/100kb/chr21\n", + "[2026-04-08 16:53:30,597] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.4.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:33,551] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:07 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 0.2478\n", + "std 0.1421 4.2551\n", + "min 0.0051 -1.5753\n", + "25% 0.1125 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3569 0.0004\n", + "max 0.5000 249.3590\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0244\n", + "L2 0.0244 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1935.0113\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 0.0031\n", + "std 0.3838\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 85.6699\n", + "Analysis finished at Wed Apr 8 16:53:33 2026\n", + "Total time elapsed: 6.0m:25.97s\n", + "\n", + "[2026-04-08 16:53:33,554] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:33,555] INFO:cellink.tl.external._sclinker_utils: [153/264] done: CD8 TEM_L2/ABC_Road_BLD/chr5\n", + "[2026-04-08 16:53:33,559] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.5.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:43,795] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:51:40 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 0.2364\n", + "std 0.1441 2.0443\n", + "min 0.0051 -0.3214\n", + "25% 0.1104 0.0000\n", + "50% 0.2270 0.0000\n", + "75% 0.3640 0.0043\n", + "max 0.5000 39.2164\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0464\n", + "L2 -0.0464 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 593.345\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0000\n", + "mean 0.0021\n", + "std 0.0179\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.2302\n", + "Analysis finished at Wed Apr 8 16:53:43 2026\n", + "Total time elapsed: 2.0m:3.05s\n", + "\n", + "[2026-04-08 16:53:43,796] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:43,796] INFO:cellink.tl.external._sclinker_utils: [154/264] done: CD8 TEM_L2/100kb/chr15\n", + "[2026-04-08 16:53:43,837] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.6.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:44,388] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:20 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 0.2679\n", + "std 0.1431 5.3042\n", + "min 0.0051 -1.4573\n", + "25% 0.1115 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3609 0.0004\n", + "max 0.5000 265.6201\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0143\n", + "L2 0.0143 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1869.9321\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 0.0034\n", + "std 0.3239\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 48.1743\n", + "Analysis finished at Wed Apr 8 16:53:44 2026\n", + "Total time elapsed: 6.0m:23.22s\n", + "\n", + "[2026-04-08 16:53:44,389] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:53:44,389] INFO:cellink.tl.external._sclinker_utils: [155/264] done: CD8 TEM_L2/ABC_Road_BLD/chr8\n", + "[2026-04-08 16:53:44,416] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.7.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:24,765] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:51:46 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 0.2491\n", + "std 0.1426 2.1479\n", + "min 0.0051 -0.3156\n", + "25% 0.1145 0.0000\n", + "50% 0.2311 0.0000\n", + "75% 0.3599 0.0016\n", + "max 0.5000 50.0029\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0192\n", + "L2 0.0192 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 938.9289\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 0.0030\n", + "std 0.0328\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.5395\n", + "Analysis finished at Wed Apr 8 16:54:24 2026\n", + "Total time elapsed: 2.0m:37.97s\n", + "\n", + "[2026-04-08 16:54:24,768] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:24,768] INFO:cellink.tl.external._sclinker_utils: [156/264] done: CD8 TEM_L2/100kb/chr16\n", + "[2026-04-08 16:54:24,775] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.8.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:27,383] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:46:43 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 0.5634\n", + "std 0.1431 5.5283\n", + "min 0.0051 -0.6104\n", + "25% 0.1125 0.0000\n", + "50% 0.2290 0.0000\n", + "75% 0.3620 0.0023\n", + "max 0.5000 209.5482\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.014\n", + "L2 0.014 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4618.8092\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 0.0055\n", + "std 0.3027\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 45.0000\n", + "Analysis finished at Wed Apr 8 16:54:26 2026\n", + "Total time elapsed: 7.0m:43.6s\n", + "\n", + "[2026-04-08 16:54:27,384] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:27,385] INFO:cellink.tl.external._sclinker_utils: [157/264] done: CD8 TEM_L2/ABC_Road_BLD/chr2\n", + "[2026-04-08 16:54:27,390] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.9.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:27,955] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:52:55 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 0.6913\n", + "std 0.1435 4.6140\n", + "min 0.0051 -0.5145\n", + "25% 0.1094 0.0000\n", + "50% 0.2280 0.0014\n", + "75% 0.3579 0.0791\n", + "max 0.5000 67.9196\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0247\n", + "L2 0.0247 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1434.1889\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0000\n", + "mean 0.0102\n", + "std 0.0848\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:54:27 2026\n", + "Total time elapsed: 1.0m:32.32s\n", + "\n", + "[2026-04-08 16:54:27,957] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:27,958] INFO:cellink.tl.external._sclinker_utils: [158/264] done: CD8 TEM_L2/100kb/chr22\n", + "[2026-04-08 16:54:27,963] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.10.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:36,355] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:52:32 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 0.1004\n", + "std 0.1427 0.5496\n", + "min 0.0051 -0.0312\n", + "25% 0.1074 0.0000\n", + "50% 0.2229 0.0000\n", + "75% 0.3558 0.0101\n", + "max 0.5000 9.5084\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0327\n", + "L2 0.0327 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 157.0271\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 0.0007\n", + "std 0.0043\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.0493\n", + "Analysis finished at Wed Apr 8 16:54:36 2026\n", + "Total time elapsed: 2.0m:3.92s\n", + "\n", + "[2026-04-08 16:54:36,355] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:36,356] INFO:cellink.tl.external._sclinker_utils: [159/264] done: CD8 TEM_L2/100kb/chr20\n", + "[2026-04-08 16:54:36,360] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.11.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:36,627] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:52:23 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 0.1803\n", + "std 0.1423 1.9339\n", + "min 0.0051 -0.2805\n", + "25% 0.1115 0.0000\n", + "50% 0.2260 0.0000\n", + "75% 0.3569 0.0004\n", + "max 0.5000 61.6016\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0168\n", + "L2 0.0168 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1298.0916\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 0.0046\n", + "std 0.0534\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.7262\n", + "Analysis finished at Wed Apr 8 16:54:36 2026\n", + "Total time elapsed: 2.0m:12.97s\n", + "\n", + "[2026-04-08 16:54:36,629] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:36,629] INFO:cellink.tl.external._sclinker_utils: [160/264] done: CD8 TEM_L2/100kb/chr18\n", + "[2026-04-08 16:54:36,639] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.12.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:51,539] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:52:06 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 1.4737\n", + "std 0.1444 9.4629\n", + "min 0.0051 -0.7231\n", + "25% 0.1145 0.0000\n", + "50% 0.2352 0.0002\n", + "75% 0.3681 0.0342\n", + "max 0.5000 151.4402\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0103\n", + "L2 -0.0103 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3932.2959\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 0.0146\n", + "std 0.1102\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0978\n", + "Analysis finished at Wed Apr 8 16:54:51 2026\n", + "Total time elapsed: 2.0m:44.42s\n", + "\n", + "[2026-04-08 16:54:51,541] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:51,541] INFO:cellink.tl.external._sclinker_utils: [161/264] done: CD8 TEM_L2/100kb/chr17\n", + "[2026-04-08 16:54:51,545] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.13.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:59,685] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:51:25 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 3.0257e-01\n", + "std 0.1424 3.1330e+00\n", + "min 0.0051 -5.6506e-01\n", + "25% 0.1074 0.0000e+00\n", + "50% 0.2198 5.5178e-06\n", + "75% 0.3507 1.1057e-02\n", + "max 0.5000 8.1808e+01\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0291\n", + "L2 0.0291 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1162.4527\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 0.0036\n", + "std 0.0374\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.6397\n", + "Analysis finished at Wed Apr 8 16:54:59 2026\n", + "Total time elapsed: 3.0m:33.58s\n", + "\n", + "[2026-04-08 16:54:59,692] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:54:59,694] INFO:cellink.tl.external._sclinker_utils: [162/264] done: CD8 TEM_L2/100kb/chr14\n", + "[2026-04-08 16:54:59,728] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.14.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:55:05,280] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:51:02 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 0.4975\n", + "std 0.1426 3.9242\n", + "min 0.0051 -0.4411\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3538 0.0092\n", + "max 0.5000 73.1049\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.025\n", + "L2 0.025 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2139.2738\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 0.0042\n", + "std 0.0369\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.4719\n", + "Analysis finished at Wed Apr 8 16:55:04 2026\n", + "Total time elapsed: 4.0m:2.82s\n", + "\n", + "[2026-04-08 16:55:05,281] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:55:05,282] INFO:cellink.tl.external._sclinker_utils: [163/264] done: CD8 TEM_L2/100kb/chr10\n", + "[2026-04-08 16:55:05,288] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.15.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:55:22,308] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:51:21 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 0.0529\n", + "std 0.1428 0.3783\n", + "min 0.0051 -0.0487\n", + "25% 0.1074 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3548 0.0016\n", + "max 0.5000 6.6978\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0122\n", + "L2 -0.0122 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 176.8201\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0000\n", + "mean 0.0005\n", + "std 0.0041\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.0701\n", + "Analysis finished at Wed Apr 8 16:55:21 2026\n", + "Total time elapsed: 4.0m:0.81s\n", + "\n", + "[2026-04-08 16:55:22,311] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:55:22,311] INFO:cellink.tl.external._sclinker_utils: [164/264] done: CD8 TEM_L2/100kb/chr13\n", + "[2026-04-08 16:55:22,316] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.16.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:55:39,392] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:52:29 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 0.2580\n", + "std 0.1420 2.7879\n", + "min 0.0051 -0.4579\n", + "25% 0.1145 0.0000\n", + "50% 0.2342 0.0009\n", + "75% 0.3569 0.0206\n", + "max 0.5000 75.4555\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0363\n", + "L2 0.0363 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 962.9246\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 0.0041\n", + "std 0.0546\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 16:55:39 2026\n", + "Total time elapsed: 3.0m:9.91s\n", + "\n", + "[2026-04-08 16:55:39,398] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:55:39,398] INFO:cellink.tl.external._sclinker_utils: [165/264] done: CD8 TEM_L2/100kb/chr19\n", + "[2026-04-08 16:55:39,402] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.17.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:55:55,617] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:50:16 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 0.1349\n", + "std 0.1422 1.6871\n", + "min 0.0051 -0.4484\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3579 0.0008\n", + "max 0.5000 60.8425\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0154\n", + "L2 0.0154 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1396.1582\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 0.0019\n", + "std 0.0387\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.9988\n", + "Analysis finished at Wed Apr 8 16:55:55 2026\n", + "Total time elapsed: 5.0m:39.23s\n", + "\n", + "[2026-04-08 16:55:55,622] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:55:55,622] INFO:cellink.tl.external._sclinker_utils: [166/264] done: CD8 TEM_L2/100kb/chr4\n", + "[2026-04-08 16:55:55,628] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.18.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:55:58,862] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:51:16 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 1.3945\n", + "std 0.1429 10.2152\n", + "min 0.0051 -0.7684\n", + "25% 0.1084 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3558 0.0079\n", + "max 0.5000 155.7985\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0382\n", + "L2 0.0382 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 9921.8125\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 0.0207\n", + "std 0.1608\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2.0283\n", + "Analysis finished at Wed Apr 8 16:55:58 2026\n", + "Total time elapsed: 4.0m:41.7s\n", + "\n", + "[2026-04-08 16:55:58,864] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:55:58,864] INFO:cellink.tl.external._sclinker_utils: [167/264] done: CD8 TEM_L2/100kb/chr12\n", + "[2026-04-08 16:55:58,872] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.19.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:56:00,529] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:49:51 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 0.2752\n", + "std 0.1426 3.4029\n", + "min 0.0051 -0.9732\n", + "25% 0.1166 0.0000\n", + "50% 0.2321 0.0000\n", + "75% 0.3630 0.0026\n", + "max 0.5000 136.1045\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0186\n", + "L2 0.0186 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2300.6349\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 0.0033\n", + "std 0.0524\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0072\n", + "Analysis finished at Wed Apr 8 16:56:00 2026\n", + "Total time elapsed: 6.0m:8.84s\n", + "\n", + "[2026-04-08 16:56:00,533] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:56:00,533] INFO:cellink.tl.external._sclinker_utils: [168/264] done: CD8 TEM_L2/100kb/chr3\n", + "[2026-04-08 16:56:00,537] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.20.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:56:03,331] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:49:18 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 0.5599\n", + "std 0.1432 5.6535\n", + "min 0.0051 -1.1963\n", + "25% 0.1084 0.0000\n", + "50% 0.2239 0.0000\n", + "75% 0.3558 0.0070\n", + "max 0.5000 172.7050\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.006\n", + "L2 -0.006 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5292.0099\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 0.0068\n", + "std 0.0800\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.4256\n", + "Analysis finished at Wed Apr 8 16:56:03 2026\n", + "Total time elapsed: 6.0m:44.14s\n", + "\n", + "[2026-04-08 16:56:03,332] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:56:03,332] INFO:cellink.tl.external._sclinker_utils: [169/264] done: CD8 TEM_L2/100kb/chr1\n", + "[2026-04-08 16:56:03,338] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.21.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:56:48,879] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:47:17 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 174.5378\n", + "std 0.1431 3221.9162\n", + "min 0.0051 -130.9324\n", + "25% 0.1104 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3579 0.0003\n", + "max 0.5000 241038.6920\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0011\n", + "L2 0.0011 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1.1367e+06\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 1.7118\n", + "std 35.4593\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 959.0000\n", + "Analysis finished at Wed Apr 8 16:56:48 2026\n", + "Total time elapsed: 9.0m:30.94s\n", + "\n", + "[2026-04-08 16:56:48,884] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/ABC_Road_BLD/NK_L2.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:56:48,885] INFO:cellink.tl.external._sclinker_utils: [170/264] done: CD8 TEM_L2/ABC_Road_BLD/chr6\n", + "[2026-04-08 16:56:48,892] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.22.annot.gz --out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:56:49,182] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:50:46 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 2.6947\n", + "std 0.1428 23.3681\n", + "min 0.0051 -2.9992\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0168\n", + "max 0.5000 611.2200\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0073\n", + "L2 -0.0073 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 14042.3999\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 0.0238\n", + "std 0.2373\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 3.6440\n", + "Analysis finished at Wed Apr 8 16:56:48 2026\n", + "Total time elapsed: 6.0m:2.05s\n", + "\n", + "[2026-04-08 16:56:49,185] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:56:49,186] INFO:cellink.tl.external._sclinker_utils: [171/264] done: CD8 TEM_L2/100kb/chr7\n", + "[2026-04-08 16:56:49,229] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/NK_L2/100kb/NK_L2.1.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:29,617] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:50:49 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 0.4243\n", + "std 0.1431 4.7913\n", + "min 0.0051 -0.7778\n", + "25% 0.1115 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3609 0.0054\n", + "max 0.5000 146.2742\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0404\n", + "L2 0.0404 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1681.0448\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 0.0031\n", + "std 0.0428\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.9970\n", + "Analysis finished at Wed Apr 8 16:57:29 2026\n", + "Total time elapsed: 6.0m:39.31s\n", + "\n", + "[2026-04-08 16:57:29,633] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:29,634] INFO:cellink.tl.external._sclinker_utils: [172/264] done: CD8 TEM_L2/100kb/chr8\n", + "[2026-04-08 16:57:29,644] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/NK_L2/100kb/NK_L2.2.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:38,790] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:50:37 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 0.0323\n", + "std 0.1421 0.1802\n", + "min 0.0051 -0.0256\n", + "25% 0.1125 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3569 0.0023\n", + "max 0.5000 5.7209\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0067\n", + "L2 0.0067 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 165.5004\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 0.0003\n", + "std 0.0019\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.0321\n", + "Analysis finished at Wed Apr 8 16:57:38 2026\n", + "Total time elapsed: 7.0m:0.57s\n", + "\n", + "[2026-04-08 16:57:38,793] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:38,793] INFO:cellink.tl.external._sclinker_utils: [173/264] done: CD8 TEM_L2/100kb/chr5\n", + "[2026-04-08 16:57:38,799] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/NK_L2/100kb/NK_L2.3.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:40,306] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:55:02 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 2.8175\n", + "std 0.1424 63.8278\n", + "min 0.0051 -11.3202\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3507 0.0002\n", + "max 0.5000 3566.2647\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0235\n", + "L2 -0.0235 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 12332.3208\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 0.0380\n", + "std 1.4120\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 73.3271\n", + "Analysis finished at Wed Apr 8 16:57:40 2026\n", + "Total time elapsed: 2.0m:37.75s\n", + "\n", + "[2026-04-08 16:57:40,308] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:40,308] INFO:cellink.tl.external._sclinker_utils: [174/264] done: NK_L2/ABC_Road_BLD/chr14\n", + "[2026-04-08 16:57:40,312] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/NK_L2/100kb/NK_L2.4.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:49,086] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:56:05 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 2.4114\n", + "std 0.1414 10.5030\n", + "min 0.0051 -1.3009\n", + "25% 0.1186 0.0000\n", + "50% 0.2321 0.0038\n", + "75% 0.3650 0.2257\n", + "max 0.5000 204.7686\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0038\n", + "L2 -0.0038 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3446.2298\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0000\n", + "mean 0.0248\n", + "std 0.6683\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 53.0084\n", + "Analysis finished at Wed Apr 8 16:57:48 2026\n", + "Total time elapsed: 1.0m:43.48s\n", + "\n", + "[2026-04-08 16:57:49,087] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:49,087] INFO:cellink.tl.external._sclinker_utils: [175/264] done: NK_L2/ABC_Road_BLD/chr21\n", + "[2026-04-08 16:57:49,093] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/NK_L2/100kb/NK_L2.5.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:55,463] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:49:50 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 0.8600\n", + "std 0.1431 8.5747\n", + "min 0.0051 -1.2601\n", + "25% 0.1125 0.0000\n", + "50% 0.2290 0.0000\n", + "75% 0.3620 0.0065\n", + "max 0.5000 314.8700\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0132\n", + "L2 0.0132 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 8717.1269\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 0.0104\n", + "std 0.1051\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.9159\n", + "Analysis finished at Wed Apr 8 16:57:55 2026\n", + "Total time elapsed: 8.0m:4.97s\n", + "\n", + "[2026-04-08 16:57:55,464] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:55,464] INFO:cellink.tl.external._sclinker_utils: [176/264] done: CD8 TEM_L2/100kb/chr2\n", + "[2026-04-08 16:57:55,471] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/NK_L2/100kb/NK_L2.6.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:56,720] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:54:54 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 0.2925\n", + "std 0.1428 3.0066\n", + "min 0.0051 -0.8766\n", + "25% 0.1074 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3548 0.0008\n", + "max 0.5000 128.5339\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0006\n", + "L2 0.0006 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1232.0365\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0000\n", + "mean 0.0034\n", + "std 0.2557\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 32.5419\n", + "Analysis finished at Wed Apr 8 16:57:56 2026\n", + "Total time elapsed: 3.0m:2.38s\n", + "\n", + "[2026-04-08 16:57:56,722] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:56,723] INFO:cellink.tl.external._sclinker_utils: [177/264] done: NK_L2/ABC_Road_BLD/chr13\n", + "[2026-04-08 16:57:56,741] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/NK_L2/100kb/NK_L2.7.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:59,060] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:56:51 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 1.0845\n", + "std 0.1435 8.3706\n", + "min 0.0051 -1.5983\n", + "25% 0.1094 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3579 0.0228\n", + "max 0.5000 227.0189\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0392\n", + "L2 0.0392 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4815.1959\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0000\n", + "mean 0.0341\n", + "std 0.9227\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 61.7687\n", + "Analysis finished at Wed Apr 8 16:57:58 2026\n", + "Total time elapsed: 1.0m:7.23s\n", + "\n", + "[2026-04-08 16:57:59,062] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:59,062] INFO:cellink.tl.external._sclinker_utils: [178/264] done: NK_L2/ABC_Road_BLD/chr22\n", + "[2026-04-08 16:57:59,078] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/NK_L2/100kb/NK_L2.8.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:59,763] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:55:57 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 1.8011\n", + "std 0.1423 22.4810\n", + "min 0.0051 -19.6912\n", + "25% 0.1115 0.0000\n", + "50% 0.2260 0.0000\n", + "75% 0.3569 0.0014\n", + "max 0.5000 1260.5688\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0017\n", + "L2 0.0017 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 22257.7642\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 0.0781\n", + "std 4.9558\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 734.0000\n", + "Analysis finished at Wed Apr 8 16:57:59 2026\n", + "Total time elapsed: 2.0m:1.75s\n", + "\n", + "[2026-04-08 16:57:59,764] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:57:59,764] INFO:cellink.tl.external._sclinker_utils: [179/264] done: NK_L2/ABC_Road_BLD/chr18\n", + "[2026-04-08 16:57:59,779] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/NK_L2/100kb/NK_L2.9.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:04,694] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:56:02 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 1.9519\n", + "std 0.1427 23.7737\n", + "min 0.0051 -3.0008\n", + "25% 0.1074 0.0000\n", + "50% 0.2229 0.0000\n", + "75% 0.3558 0.0032\n", + "max 0.5000 685.8228\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0409\n", + "L2 -0.0409 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3833.3593\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 0.0173\n", + "std 0.7371\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 81.0000\n", + "Analysis finished at Wed Apr 8 16:58:04 2026\n", + "Total time elapsed: 2.0m:2.01s\n", + "\n", + "[2026-04-08 16:58:04,695] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:04,695] INFO:cellink.tl.external._sclinker_utils: [180/264] done: NK_L2/ABC_Road_BLD/chr20\n", + "[2026-04-08 16:58:04,704] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/NK_L2/100kb/NK_L2.10.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:05,080] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:55:08 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 0.1542\n", + "std 0.1441 1.0715\n", + "min 0.0051 -0.1625\n", + "25% 0.1104 0.0000\n", + "50% 0.2270 0.0000\n", + "75% 0.3640 0.0009\n", + "max 0.5000 22.5182\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0299\n", + "L2 0.0299 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 403.5194\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0000\n", + "mean 0.0014\n", + "std 0.0615\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 8.6372\n", + "Analysis finished at Wed Apr 8 16:58:04 2026\n", + "Total time elapsed: 2.0m:56.73s\n", + "\n", + "[2026-04-08 16:58:05,082] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:05,083] INFO:cellink.tl.external._sclinker_utils: [181/264] done: NK_L2/ABC_Road_BLD/chr15\n", + "[2026-04-08 16:58:05,086] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/NK_L2/100kb/NK_L2.11.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:08,414] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:55:41 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 8.8638e+00\n", + "std 0.1444 5.2574e+01\n", + "min 0.0051 -7.0594e+00\n", + "25% 0.1145 0.0000e+00\n", + "50% 0.2352 5.9477e-05\n", + "75% 0.3681 7.0676e-02\n", + "max 0.5000 1.0365e+03\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0433\n", + "L2 0.0433 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 16084.7178\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 0.0597\n", + "std 2.8855\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 305.0953\n", + "Analysis finished at Wed Apr 8 16:58:08 2026\n", + "Total time elapsed: 2.0m:26.5s\n", + "\n", + "[2026-04-08 16:58:08,417] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:08,417] INFO:cellink.tl.external._sclinker_utils: [182/264] done: NK_L2/ABC_Road_BLD/chr17\n", + "[2026-04-08 16:58:08,421] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/NK_L2/100kb/NK_L2.12.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:13,724] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:55:24 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 3.1220e+00\n", + "std 0.1426 4.8665e+01\n", + "min 0.0051 -6.4249e+00\n", + "25% 0.1145 0.0000e+00\n", + "50% 0.2311 6.0454e-06\n", + "75% 0.3599 4.9495e-03\n", + "max 0.5000 1.6351e+03\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0261\n", + "L2 0.0261 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 12139.3027\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 0.0383\n", + "std 1.7268\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 222.4052\n", + "Analysis finished at Wed Apr 8 16:58:13 2026\n", + "Total time elapsed: 2.0m:49.0s\n", + "\n", + "[2026-04-08 16:58:13,728] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:13,728] INFO:cellink.tl.external._sclinker_utils: [183/264] done: NK_L2/ABC_Road_BLD/chr16\n", + "[2026-04-08 16:58:13,731] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/NK_L2/100kb/NK_L2.13.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:27,632] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:54:29 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 4.4830\n", + "std 0.1423 41.0257\n", + "min 0.0051 -7.7268\n", + "25% 0.1115 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3569 0.0064\n", + "max 0.5000 989.8915\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0638\n", + "L2 0.0638 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 19238.2659\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 0.0439\n", + "std 1.8493\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 261.0000\n", + "Analysis finished at Wed Apr 8 16:58:27 2026\n", + "Total time elapsed: 3.0m:57.66s\n", + "\n", + "[2026-04-08 16:58:27,637] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:27,638] INFO:cellink.tl.external._sclinker_utils: [184/264] done: NK_L2/ABC_Road_BLD/chr9\n", + "[2026-04-08 16:58:27,642] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/NK_L2/100kb/NK_L2.14.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:36,502] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:54:38 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 8.5583\n", + "std 0.1429 106.0654\n", + "min 0.0051 -22.6482\n", + "25% 0.1084 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3558 0.0039\n", + "max 0.5000 3073.4378\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0153\n", + "L2 0.0153 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 28451.2216\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 0.0593\n", + "std 4.8146\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 776.9409\n", + "Analysis finished at Wed Apr 8 16:58:36 2026\n", + "Total time elapsed: 3.0m:57.5s\n", + "\n", + "[2026-04-08 16:58:36,503] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:36,504] INFO:cellink.tl.external._sclinker_utils: [185/264] done: NK_L2/ABC_Road_BLD/chr12\n", + "[2026-04-08 16:58:36,524] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/NK_L2/100kb/NK_L2.15.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:45,702] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:51:10 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 0.6002\n", + "std 0.1420 8.6605\n", + "min 0.0051 -1.4284\n", + "25% 0.1155 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0045\n", + "max 0.5000 292.9915\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0325\n", + "L2 0.0325 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1750.8071\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 0.0035\n", + "std 0.0776\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2.0000\n", + "Analysis finished at Wed Apr 8 16:58:45 2026\n", + "Total time elapsed: 7.0m:34.6s\n", + "\n", + "[2026-04-08 16:58:45,704] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:45,704] INFO:cellink.tl.external._sclinker_utils: [186/264] done: CD8 TEM_L2/100kb/chr11\n", + "[2026-04-08 16:58:45,734] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/NK_L2/100kb/NK_L2.16.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:50,924] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:56:00 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 2.9977\n", + "std 0.1420 22.0660\n", + "min 0.0051 -3.6078\n", + "25% 0.1145 0.0000\n", + "50% 0.2342 0.0011\n", + "75% 0.3569 0.1395\n", + "max 0.5000 798.3880\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0085\n", + "L2 0.0085 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7646.6776\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 0.0329\n", + "std 1.1782\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 93.6374\n", + "Analysis finished at Wed Apr 8 16:58:50 2026\n", + "Total time elapsed: 2.0m:49.71s\n", + "\n", + "[2026-04-08 16:58:50,928] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:50,928] INFO:cellink.tl.external._sclinker_utils: [187/264] done: NK_L2/ABC_Road_BLD/chr19\n", + "[2026-04-08 16:58:50,943] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/NK_L2/100kb/NK_L2.17.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:52,954] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:54:38 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 7.3500\n", + "std 0.1420 145.1707\n", + "min 0.0051 -37.2967\n", + "25% 0.1155 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0003\n", + "max 0.5000 8350.0869\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0092\n", + "L2 -0.0092 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 28018.052\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 0.0567\n", + "std 8.4622\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1531.0220\n", + "Analysis finished at Wed Apr 8 16:58:52 2026\n", + "Total time elapsed: 4.0m:14.19s\n", + "\n", + "[2026-04-08 16:58:52,956] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:58:52,956] INFO:cellink.tl.external._sclinker_utils: [188/264] done: NK_L2/ABC_Road_BLD/chr11\n", + "[2026-04-08 16:58:52,963] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/NK_L2/100kb/NK_L2.18.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:59:11,110] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:54:27 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 0.0258\n", + "std 0.1431 0.1543\n", + "min 0.0051 -0.0143\n", + "25% 0.1115 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3609 0.0010\n", + "max 0.5000 4.7115\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0734\n", + "L2 0.0734 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 117.5947\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 0.0002\n", + "std 0.0071\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.6954\n", + "Analysis finished at Wed Apr 8 16:59:10 2026\n", + "Total time elapsed: 4.0m:43.47s\n", + "\n", + "[2026-04-08 16:59:11,112] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:59:11,113] INFO:cellink.tl.external._sclinker_utils: [189/264] done: NK_L2/ABC_Road_BLD/chr8\n", + "[2026-04-08 16:59:11,118] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/NK_L2/100kb/NK_L2.19.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:59:23,182] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:53:35 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 0.5915\n", + "std 0.1421 5.9422\n", + "min 0.0051 -1.7379\n", + "25% 0.1125 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3569 0.0009\n", + "max 0.5000 274.0667\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0131\n", + "L2 0.0131 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5350.6732\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 0.0085\n", + "std 0.4958\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 94.2016\n", + "Analysis finished at Wed Apr 8 16:59:22 2026\n", + "Total time elapsed: 5.0m:47.4s\n", + "\n", + "[2026-04-08 16:59:23,184] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 16:59:23,184] INFO:cellink.tl.external._sclinker_utils: [190/264] done: NK_L2/ABC_Road_BLD/chr5\n", + "[2026-04-08 16:59:23,191] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/NK_L2/100kb/NK_L2.20.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:00:01,319] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:50:41 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/CD8_TEM_L2/100kb/CD8_TEM_L2.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 2.8387\n", + "std 0.1431 40.3233\n", + "min 0.0051 -0.8362\n", + "25% 0.1104 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3579 0.0007\n", + "max 0.5000 1406.6215\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0044\n", + "L2 -0.0044 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 22021.4597\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 0.0332\n", + "std 0.2105\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.5816\n", + "Analysis finished at Wed Apr 8 17:00:01 2026\n", + "Total time elapsed: 9.0m:19.45s\n", + "\n", + "[2026-04-08 17:00:01,320] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:00:01,321] INFO:cellink.tl.external._sclinker_utils: [191/264] done: CD8 TEM_L2/100kb/chr6\n", + "[2026-04-08 17:00:01,327] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/NK_L2/100kb/NK_L2.21.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:00:43,475] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:53:32 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 0.2015\n", + "std 0.1422 1.9622\n", + "min 0.0051 -0.3766\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3579 0.0014\n", + "max 0.5000 62.6763\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0231\n", + "L2 0.0231 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1120.0308\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 0.0015\n", + "std 0.1003\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 27.4246\n", + "Analysis finished at Wed Apr 8 17:00:43 2026\n", + "Total time elapsed: 7.0m:10.5s\n", + "\n", + "[2026-04-08 17:00:43,565] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/NK_L2/100kb/NK_L2.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:00:43,565] INFO:cellink.tl.external._sclinker_utils: [192/264] done: NK_L2/ABC_Road_BLD/chr4\n", + "[2026-04-08 17:00:43,607] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/NK_L2/100kb/NK_L2.22.annot.gz --out /data/ldscores/NK_L2/100kb/NK_L2.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:00:47,488] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:53:28 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 0.4426\n", + "std 0.1426 4.3712\n", + "min 0.0051 -1.1436\n", + "25% 0.1166 0.0000\n", + "50% 0.2321 0.0000\n", + "75% 0.3630 0.0070\n", + "max 0.5000 192.0067\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0031\n", + "L2 0.0031 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4092.0222\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 0.0058\n", + "std 0.3192\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 74.0522\n", + "Analysis finished at Wed Apr 8 17:00:47 2026\n", + "Total time elapsed: 7.0m:18.79s\n", + "\n", + "[2026-04-08 17:00:47,490] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:00:47,490] INFO:cellink.tl.external._sclinker_utils: [193/264] done: NK_L2/ABC_Road_BLD/chr3\n", + "[2026-04-08 17:00:47,509] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.1.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:00:48,829] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:58:15 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/NK_L2/100kb/NK_L2.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 0.1757\n", + "std 0.1428 1.3687\n", + "min 0.0051 -0.1791\n", + "25% 0.1074 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3548 0.0006\n", + "max 0.5000 32.2320\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.028\n", + "L2 0.028 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 513.8596\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0000\n", + "mean 0.0014\n", + "std 0.0110\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.1379\n", + "Analysis finished at Wed Apr 8 17:00:48 2026\n", + "Total time elapsed: 2.0m:33.34s\n", + "\n", + "[2026-04-08 17:00:48,832] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:00:48,832] INFO:cellink.tl.external._sclinker_utils: [194/264] done: NK_L2/100kb/chr13\n", + "[2026-04-08 17:00:48,851] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.2.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:00:54,587] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:54:30 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 1.5211e+00\n", + "std 0.1426 1.5377e+01\n", + "min 0.0051 -3.2733e+00\n", + "25% 0.1074 0.0000e+00\n", + "50% 0.2198 2.2100e-05\n", + "75% 0.3538 7.1721e-03\n", + "max 0.5000 4.9353e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0297\n", + "L2 0.0297 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7815.5418\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 0.0153\n", + "std 0.7137\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 64.2279\n", + "Analysis finished at Wed Apr 8 17:00:54 2026\n", + "Total time elapsed: 6.0m:23.95s\n", + "\n", + "[2026-04-08 17:00:54,588] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:00:54,589] INFO:cellink.tl.external._sclinker_utils: [195/264] done: NK_L2/ABC_Road_BLD/chr10\n", + "[2026-04-08 17:00:54,596] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.3.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:07,588] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:58:55 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/NK_L2/100kb/NK_L2.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 0.2897\n", + "std 0.1423 3.3674\n", + "min 0.0051 -0.6350\n", + "25% 0.1115 0.0000\n", + "50% 0.2260 0.0000\n", + "75% 0.3569 0.0008\n", + "max 0.5000 98.7158\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0211\n", + "L2 0.0211 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 999.8265\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 0.0035\n", + "std 0.0491\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.8893\n", + "Analysis finished at Wed Apr 8 17:01:07 2026\n", + "Total time elapsed: 2.0m:11.98s\n", + "\n", + "[2026-04-08 17:01:07,611] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:07,614] INFO:cellink.tl.external._sclinker_utils: [196/264] done: NK_L2/100kb/chr18\n", + "[2026-04-08 17:01:07,639] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.4.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:12,307] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:53:07 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 0.6650\n", + "std 0.1431 8.5263\n", + "min 0.0051 -2.3599\n", + "25% 0.1125 0.0000\n", + "50% 0.2290 0.0000\n", + "75% 0.3620 0.0042\n", + "max 0.5000 473.8385\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0148\n", + "L2 0.0148 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 9135.6312\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 0.0109\n", + "std 0.6049\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 87.4617\n", + "Analysis finished at Wed Apr 8 17:01:12 2026\n", + "Total time elapsed: 8.0m:4.38s\n", + "\n", + "[2026-04-08 17:01:12,309] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:12,309] INFO:cellink.tl.external._sclinker_utils: [197/264] done: NK_L2/ABC_Road_BLD/chr2\n", + "[2026-04-08 17:01:12,338] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.5.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:16,329] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:58:41 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/NK_L2/100kb/NK_L2.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 4.0103e-01\n", + "std 0.1441 3.6268e+00\n", + "min 0.0051 -7.4167e-01\n", + "25% 0.1104 0.0000e+00\n", + "50% 0.2270 1.2750e-05\n", + "75% 0.3640 2.7274e-02\n", + "max 0.5000 9.8053e+01\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0145\n", + "L2 -0.0145 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1864.4048\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0000\n", + "mean 0.0065\n", + "std 0.0653\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.8849\n", + "Analysis finished at Wed Apr 8 17:01:16 2026\n", + "Total time elapsed: 2.0m:34.5s\n", + "\n", + "[2026-04-08 17:01:16,331] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:16,331] INFO:cellink.tl.external._sclinker_utils: [198/264] done: NK_L2/100kb/chr15\n", + "[2026-04-08 17:01:16,361] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.6.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:16,464] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:00:03 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/NK_L2/100kb/NK_L2.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 2.0005\n", + "std 0.1414 13.0709\n", + "min 0.0051 -1.5944\n", + "25% 0.1186 0.0000\n", + "50% 0.2321 0.0004\n", + "75% 0.3650 0.0262\n", + "max 0.5000 188.1350\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0282\n", + "L2 0.0282 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2685.5599\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0000\n", + "mean 0.0194\n", + "std 0.1207\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 17:01:16 2026\n", + "Total time elapsed: 1.0m:12.47s\n", + "\n", + "[2026-04-08 17:01:16,466] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:16,466] INFO:cellink.tl.external._sclinker_utils: [199/264] done: NK_L2/100kb/chr21\n", + "[2026-04-08 17:01:16,471] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.7.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:27,924] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:53:46 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 33.9266\n", + "std 0.1428 320.3905\n", + "min 0.0051 -39.9882\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0594\n", + "max 0.5000 8731.8831\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0382\n", + "L2 0.0382 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 97384.7235\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 0.1652\n", + "std 7.6392\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 937.9912\n", + "Analysis finished at Wed Apr 8 17:01:27 2026\n", + "Total time elapsed: 7.0m:41.32s\n", + "\n", + "[2026-04-08 17:01:27,926] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:27,926] INFO:cellink.tl.external._sclinker_utils: [200/264] done: NK_L2/ABC_Road_BLD/chr7\n", + "[2026-04-08 17:01:27,950] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.8.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:30,134] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:53:04 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 2.1638e+00\n", + "std 0.1432 3.4802e+01\n", + "min 0.0051 -1.0605e+01\n", + "25% 0.1084 0.0000e+00\n", + "50% 0.2239 2.8832e-05\n", + "75% 0.3558 1.5597e-02\n", + "max 0.5000 2.6463e+03\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0005\n", + "L2 -0.0005 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 25816.2849\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 0.0331\n", + "std 1.9765\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 314.0000\n", + "Analysis finished at Wed Apr 8 17:01:29 2026\n", + "Total time elapsed: 8.0m:25.86s\n", + "\n", + "[2026-04-08 17:01:30,135] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:30,135] INFO:cellink.tl.external._sclinker_utils: [201/264] done: NK_L2/ABC_Road_BLD/chr1\n", + "[2026-04-08 17:01:30,145] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.9.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:31,331] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:59:26 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/NK_L2/100kb/NK_L2.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 1.5524\n", + "std 0.1427 8.7704\n", + "min 0.0051 -0.7649\n", + "25% 0.1074 0.0000\n", + "50% 0.2229 0.0000\n", + "75% 0.3558 0.0130\n", + "max 0.5000 130.7992\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0209\n", + "L2 0.0209 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5314.0817\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 0.0240\n", + "std 0.1431\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 17:01:31 2026\n", + "Total time elapsed: 2.0m:4.83s\n", + "\n", + "[2026-04-08 17:01:31,332] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:31,333] INFO:cellink.tl.external._sclinker_utils: [202/264] done: NK_L2/100kb/chr20\n", + "[2026-04-08 17:01:31,348] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.10.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:32,257] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:58:30 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/NK_L2/100kb/NK_L2.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 4.5846e-01\n", + "std 0.1424 2.3435e+00\n", + "min 0.0051 -3.5294e-01\n", + "25% 0.1074 0.0000e+00\n", + "50% 0.2198 6.2358e-05\n", + "75% 0.3507 3.1737e-02\n", + "max 0.5000 3.9341e+01\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0261\n", + "L2 0.0261 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1675.3675\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 0.0052\n", + "std 0.0286\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.2693\n", + "Analysis finished at Wed Apr 8 17:01:32 2026\n", + "Total time elapsed: 3.0m:1.86s\n", + "\n", + "[2026-04-08 17:01:32,257] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:32,257] INFO:cellink.tl.external._sclinker_utils: [203/264] done: NK_L2/100kb/chr14\n", + "[2026-04-08 17:01:32,262] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.11.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:33,976] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:58:53 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/NK_L2/100kb/NK_L2.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 1.7271\n", + "std 0.1444 10.3251\n", + "min 0.0051 -1.0142\n", + "25% 0.1145 0.0000\n", + "50% 0.2352 0.0008\n", + "75% 0.3681 0.1490\n", + "max 0.5000 269.5184\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0075\n", + "L2 -0.0075 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 6075.8055\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 0.0226\n", + "std 0.1570\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.7631\n", + "Analysis finished at Wed Apr 8 17:01:33 2026\n", + "Total time elapsed: 2.0m:40.3s\n", + "\n", + "[2026-04-08 17:01:33,977] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:01:33,977] INFO:cellink.tl.external._sclinker_utils: [204/264] done: NK_L2/100kb/chr17\n", + "[2026-04-08 17:01:33,981] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.12.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:02:01,112] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:58:01 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/NK_L2/100kb/NK_L2.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 1.5207\n", + "std 0.1423 9.0953\n", + "min 0.0051 -1.0639\n", + "25% 0.1115 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3569 0.0277\n", + "max 0.5000 145.4758\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0156\n", + "L2 0.0156 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 6863.4829\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 0.0157\n", + "std 0.0943\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 17:02:00 2026\n", + "Total time elapsed: 3.0m:58.94s\n", + "\n", + "[2026-04-08 17:02:01,113] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:02:01,114] INFO:cellink.tl.external._sclinker_utils: [205/264] done: NK_L2/100kb/chr9\n", + "[2026-04-08 17:02:01,123] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.13.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:02:01,942] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:00:45 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/NK_L2/100kb/NK_L2.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 0.4309\n", + "std 0.1435 1.3191\n", + "min 0.0051 -0.0900\n", + "25% 0.1094 0.0000\n", + "50% 0.2280 0.0015\n", + "75% 0.3579 0.1033\n", + "max 0.5000 14.3336\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0109\n", + "L2 0.0109 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1001.6199\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0000\n", + "mean 0.0071\n", + "std 0.0302\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.3286\n", + "Analysis finished at Wed Apr 8 17:02:01 2026\n", + "Total time elapsed: 1.0m:15.84s\n", + "\n", + "[2026-04-08 17:02:01,943] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:02:01,943] INFO:cellink.tl.external._sclinker_utils: [206/264] done: NK_L2/100kb/chr22\n", + "[2026-04-08 17:02:01,973] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.14.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:02:16,981] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:58:48 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/NK_L2/100kb/NK_L2.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 1.7577e+00\n", + "std 0.1426 1.9630e+01\n", + "min 0.0051 -1.3513e+00\n", + "25% 0.1145 0.0000e+00\n", + "50% 0.2311 1.7705e-05\n", + "75% 0.3599 1.8627e-02\n", + "max 0.5000 4.7104e+02\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0034\n", + "L2 0.0034 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4052.7326\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 0.0128\n", + "std 0.1043\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0544\n", + "Analysis finished at Wed Apr 8 17:02:16 2026\n", + "Total time elapsed: 3.0m:28.23s\n", + "\n", + "[2026-04-08 17:02:16,983] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:02:16,983] INFO:cellink.tl.external._sclinker_utils: [207/264] done: NK_L2/100kb/chr16\n", + "[2026-04-08 17:02:16,986] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.15.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:02:26,285] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:59:12 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/NK_L2/100kb/NK_L2.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 1.4149\n", + "std 0.1420 5.9764\n", + "min 0.0051 -0.4760\n", + "25% 0.1145 0.0000\n", + "50% 0.2342 0.0145\n", + "75% 0.3569 0.2533\n", + "max 0.5000 75.4304\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.00 0.03\n", + "L2 0.03 1.00\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3816.8633\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 0.0164\n", + "std 0.0933\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 17:02:26 2026\n", + "Total time elapsed: 3.0m:13.44s\n", + "\n", + "[2026-04-08 17:02:26,286] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:02:26,286] INFO:cellink.tl.external._sclinker_utils: [208/264] done: NK_L2/100kb/chr19\n", + "[2026-04-08 17:02:26,290] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.16.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:02:30,367] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:53:45 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/NK_L2/ABC_Road_BLD/NK_L2.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 6.9594e+01\n", + "std 0.1431 1.2467e+03\n", + "min 0.0051 -3.3849e+02\n", + "25% 0.1104 0.0000e+00\n", + "50% 0.2249 0.0000e+00\n", + "75% 0.3579 9.7887e-05\n", + "max 0.5000 8.4325e+04\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 -0.002\n", + "L2 -0.002 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 541896.1384\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 0.8161\n", + "std 25.9033\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 959.0000\n", + "Analysis finished at Wed Apr 8 17:02:30 2026\n", + "Total time elapsed: 8.0m:44.64s\n", + "\n", + "[2026-04-08 17:02:30,557] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:02:30,557] INFO:cellink.tl.external._sclinker_utils: [209/264] done: NK_L2/ABC_Road_BLD/chr6\n", + "[2026-04-08 17:02:30,565] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.17.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:03:33,519] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:58:10 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/NK_L2/100kb/NK_L2.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 1.2703\n", + "std 0.1429 7.7341\n", + "min 0.0051 -0.6616\n", + "25% 0.1084 0.0000\n", + "50% 0.2209 0.0000\n", + "75% 0.3558 0.0487\n", + "max 0.5000 140.3429\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0422\n", + "L2 0.0422 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5983.7998\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 0.0125\n", + "std 0.0874\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 17:03:33 2026\n", + "Total time elapsed: 5.0m:23.07s\n", + "\n", + "[2026-04-08 17:03:33,521] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:03:33,522] INFO:cellink.tl.external._sclinker_utils: [210/264] done: NK_L2/100kb/chr12\n", + "[2026-04-08 17:03:33,528] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.18.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:03:58,828] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:58:06 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/NK_L2/100kb/NK_L2.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 0.5830\n", + "std 0.1426 5.1493\n", + "min 0.0051 -0.7426\n", + "25% 0.1074 0.0000\n", + "50% 0.2198 0.0000\n", + "75% 0.3538 0.0162\n", + "max 0.5000 135.1837\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0199\n", + "L2 0.0199 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2664.285\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 0.0052\n", + "std 0.0441\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.6789\n", + "Analysis finished at Wed Apr 8 17:03:58 2026\n", + "Total time elapsed: 5.0m:52.01s\n", + "\n", + "[2026-04-08 17:03:58,830] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:03:58,830] INFO:cellink.tl.external._sclinker_utils: [211/264] done: NK_L2/100kb/chr10\n", + "[2026-04-08 17:03:58,872] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.19.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:11,268] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:57:58 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/NK_L2/100kb/NK_L2.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 2.4904\n", + "std 0.1428 14.8395\n", + "min 0.0051 -1.7225\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0545\n", + "max 0.5000 341.4959\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0226\n", + "L2 0.0226 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 11259.1026\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 0.0191\n", + "std 0.1432\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2.0376\n", + "Analysis finished at Wed Apr 8 17:04:11 2026\n", + "Total time elapsed: 6.0m:12.1s\n", + "\n", + "[2026-04-08 17:04:11,274] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:11,274] INFO:cellink.tl.external._sclinker_utils: [212/264] done: NK_L2/100kb/chr7\n", + "[2026-04-08 17:04:11,278] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.20.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:38,050] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:57:51 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/NK_L2/100kb/NK_L2.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 0.1112\n", + "std 0.1421 0.6107\n", + "min 0.0051 -0.0690\n", + "25% 0.1125 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3569 0.0100\n", + "max 0.5000 12.2473\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0335\n", + "L2 0.0335 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 585.1562\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 0.0009\n", + "std 0.0063\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.1103\n", + "Analysis finished at Wed Apr 8 17:04:37 2026\n", + "Total time elapsed: 6.0m:46.75s\n", + "\n", + "[2026-04-08 17:04:38,051] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:38,052] INFO:cellink.tl.external._sclinker_utils: [213/264] done: NK_L2/100kb/chr5\n", + "[2026-04-08 17:04:38,058] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.21.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:38,547] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:58:01 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/NK_L2/100kb/NK_L2.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 0.5109\n", + "std 0.1431 4.2737\n", + "min 0.0051 -0.7779\n", + "25% 0.1115 0.0000\n", + "50% 0.2280 0.0000\n", + "75% 0.3609 0.0157\n", + "max 0.5000 146.7135\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0377\n", + "L2 0.0377 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2797.8583\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 0.0051\n", + "std 0.0527\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 17:04:38 2026\n", + "Total time elapsed: 6.0m:37.23s\n", + "\n", + "[2026-04-08 17:04:38,549] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/ABC_Road_BLD/AllCoding.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:38,549] INFO:cellink.tl.external._sclinker_utils: [214/264] done: NK_L2/100kb/chr8\n", + "[2026-04-08 17:04:38,565] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.22.annot.gz --out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:41,001] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:56:51 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/NK_L2/100kb/NK_L2.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 0.9069\n", + "std 0.1432 6.9064\n", + "min 0.0051 -1.4795\n", + "25% 0.1084 0.0000\n", + "50% 0.2239 0.0004\n", + "75% 0.3558 0.0309\n", + "max 0.5000 206.6840\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0133\n", + "L2 -0.0133 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 8435.4544\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 0.0108\n", + "std 0.0861\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0972\n", + "Analysis finished at Wed Apr 8 17:04:40 2026\n", + "Total time elapsed: 7.0m:48.8s\n", + "\n", + "[2026-04-08 17:04:41,002] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.1.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.1 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:41,002] INFO:cellink.tl.external._sclinker_utils: [215/264] done: NK_L2/100kb/chr1\n", + "[2026-04-08 17:04:41,006] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 --annot /data/annotations/AllCoding/100kb/AllCoding.1.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.1 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:54,049] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:57:43 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/NK_L2/100kb/NK_L2.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 0.2207\n", + "std 0.1422 1.6612\n", + "min 0.0051 -0.3538\n", + "25% 0.1125 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3579 0.0040\n", + "max 0.5000 48.0050\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0378\n", + "L2 0.0378 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1737.9457\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 0.0024\n", + "std 0.0314\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 0.7880\n", + "Analysis finished at Wed Apr 8 17:04:53 2026\n", + "Total time elapsed: 7.0m:10.46s\n", + "\n", + "[2026-04-08 17:04:54,050] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.2.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.2 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:54,050] INFO:cellink.tl.external._sclinker_utils: [216/264] done: NK_L2/100kb/chr4\n", + "[2026-04-08 17:04:54,055] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 --annot /data/annotations/AllCoding/100kb/AllCoding.2.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.2 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:59,620] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:58:06 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/NK_L2/100kb/NK_L2.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 0.3538\n", + "std 0.1420 3.8246\n", + "min 0.0051 -0.6071\n", + "25% 0.1155 0.0000\n", + "50% 0.2301 0.0000\n", + "75% 0.3609 0.0044\n", + "max 0.5000 131.2896\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0352\n", + "L2 0.0352 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1442.0045\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 0.0029\n", + "std 0.0371\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 17:04:59 2026\n", + "Total time elapsed: 6.0m:52.44s\n", + "\n", + "[2026-04-08 17:04:59,622] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.3.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.3 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:04:59,622] INFO:cellink.tl.external._sclinker_utils: [217/264] done: NK_L2/100kb/chr11\n", + "[2026-04-08 17:04:59,626] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 --annot /data/annotations/AllCoding/100kb/AllCoding.3.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.3 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:05:04,716] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:57:41 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/NK_L2/100kb/NK_L2.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 0.4584\n", + "std 0.1426 3.8944\n", + "min 0.0051 -1.0976\n", + "25% 0.1166 0.0000\n", + "50% 0.2321 0.0000\n", + "75% 0.3630 0.0114\n", + "max 0.5000 133.0225\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0267\n", + "L2 0.0267 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3253.2155\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 0.0046\n", + "std 0.0597\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.0000\n", + "Analysis finished at Wed Apr 8 17:05:04 2026\n", + "Total time elapsed: 7.0m:23.07s\n", + "\n", + "[2026-04-08 17:05:04,720] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.4.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.4 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:05:04,720] INFO:cellink.tl.external._sclinker_utils: [218/264] done: NK_L2/100kb/chr3\n", + "[2026-04-08 17:05:04,751] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 --annot /data/annotations/AllCoding/100kb/AllCoding.4.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.4 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:05:14,768] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:02:32 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 15204.3135\n", + "std 0.1444 39166.7109\n", + "min 0.0051 -1463.5114\n", + "25% 0.1145 242.8150\n", + "50% 0.2352 2959.2980\n", + "75% 0.3681 14595.7831\n", + "max 0.5000 600923.0284\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0701\n", + "L2 0.0701 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4.2677e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 158.5203\n", + "std 739.7376\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 13426.0000\n", + "Analysis finished at Wed Apr 8 17:05:14 2026\n", + "Total time elapsed: 2.0m:41.95s\n", + "\n", + "[2026-04-08 17:05:14,769] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.5.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.5 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:05:14,769] INFO:cellink.tl.external._sclinker_utils: [219/264] done: AllCoding/ABC_Road_BLD/chr17\n", + "[2026-04-08 17:05:14,772] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 --annot /data/annotations/AllCoding/100kb/AllCoding.5.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.5 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:05:58,240] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:02:19 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 11084.9350\n", + "std 0.1441 33477.0833\n", + "min 0.0051 -2703.5024\n", + "25% 0.1104 69.0068\n", + "50% 0.2270 1406.2165\n", + "75% 0.3640 9503.1769\n", + "max 0.5000 779923.7224\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0147\n", + "L2 0.0147 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2.9372e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0000\n", + "mean 102.3426\n", + "std 813.2821\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 22445.0000\n", + "Analysis finished at Wed Apr 8 17:05:57 2026\n", + "Total time elapsed: 3.0m:38.61s\n", + "\n", + "[2026-04-08 17:05:58,306] INFO:cellink.tl.external._sclinker_utils: [220/264] done: AllCoding/ABC_Road_BLD/chr15\n", + "[2026-04-08 17:05:58,306] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.6.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.6 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:05:58,311] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 --annot /data/annotations/AllCoding/100kb/AllCoding.6.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.6 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:06:15,508] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:57:33 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/NK_L2/100kb/NK_L2.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 0.8567\n", + "std 0.1431 9.0654\n", + "min 0.0051 -2.0015\n", + "25% 0.1125 0.0000\n", + "50% 0.2290 0.0004\n", + "75% 0.3620 0.0273\n", + "max 0.5000 333.0791\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0134\n", + "L2 0.0134 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 11375.7352\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 0.0135\n", + "std 0.1487\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 2.0038\n", + "Analysis finished at Wed Apr 8 17:06:15 2026\n", + "Total time elapsed: 8.0m:41.24s\n", + "\n", + "[2026-04-08 17:06:15,513] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.7.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.7 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:06:15,514] INFO:cellink.tl.external._sclinker_utils: [221/264] done: NK_L2/100kb/chr2\n", + "[2026-04-08 17:06:15,517] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 --annot /data/annotations/AllCoding/100kb/AllCoding.7.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.7 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:06:44,509] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:04:42 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 4817.3587\n", + "std 0.1414 12148.3607\n", + "min 0.0051 -371.5000\n", + "25% 0.1186 23.1352\n", + "50% 0.2321 457.6439\n", + "75% 0.3650 3879.8411\n", + "max 0.5000 181261.7136\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0596\n", + "L2 0.0596 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 8409826.0\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0000\n", + "mean 60.6280\n", + "std 341.2962\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 6040.0000\n", + "Analysis finished at Wed Apr 8 17:06:44 2026\n", + "Total time elapsed: 2.0m:1.85s\n", + "\n", + "[2026-04-08 17:06:44,512] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.8.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.8 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:06:44,512] INFO:cellink.tl.external._sclinker_utils: [222/264] done: AllCoding/ABC_Road_BLD/chr21\n", + "[2026-04-08 17:06:44,524] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 --annot /data/annotations/AllCoding/100kb/AllCoding.8.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.8 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:06:46,534] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:04:42 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 16596.7667\n", + "std 0.1435 35613.6162\n", + "min 0.0051 -2356.5237\n", + "25% 0.1094 131.4789\n", + "50% 0.2280 3033.3626\n", + "75% 0.3579 17096.0679\n", + "max 0.5000 512337.6730\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1035\n", + "L2 0.1035 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2.3671e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0000\n", + "mean 167.7300\n", + "std 885.1125\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 14758.0000\n", + "Analysis finished at Wed Apr 8 17:06:46 2026\n", + "Total time elapsed: 2.0m:3.89s\n", + "\n", + "[2026-04-08 17:06:46,536] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.9.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.9 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:06:46,537] INFO:cellink.tl.external._sclinker_utils: [223/264] done: AllCoding/ABC_Road_BLD/chr22\n", + "[2026-04-08 17:06:46,551] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 --annot /data/annotations/AllCoding/100kb/AllCoding.9.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.9 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:07:08,796] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:04:13 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 8156.1155\n", + "std 0.1427 20669.8716\n", + "min 0.0051 -811.7976\n", + "25% 0.1074 111.9709\n", + "50% 0.2229 883.7171\n", + "75% 0.3558 5830.8292\n", + "max 0.5000 296511.3588\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0829\n", + "L2 0.0829 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2.1506e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 97.0387\n", + "std 578.0631\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 13141.0000\n", + "Analysis finished at Wed Apr 8 17:07:08 2026\n", + "Total time elapsed: 2.0m:55.22s\n", + "\n", + "[2026-04-08 17:07:08,797] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.10.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.10 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:07:08,798] INFO:cellink.tl.external._sclinker_utils: [224/264] done: AllCoding/ABC_Road_BLD/chr20\n", + "[2026-04-08 17:07:08,817] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 --annot /data/annotations/AllCoding/100kb/AllCoding.10.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.10 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:07:09,327] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:02:03 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 6500.3838\n", + "std 0.1424 22765.4984\n", + "min 0.0051 -3339.7884\n", + "25% 0.1074 72.1647\n", + "50% 0.2198 932.1329\n", + "75% 0.3507 5371.6480\n", + "max 0.5000 681829.9981\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0695\n", + "L2 0.0695 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2.6146e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 80.5246\n", + "std 664.0844\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 16065.0000\n", + "Analysis finished at Wed Apr 8 17:07:09 2026\n", + "Total time elapsed: 5.0m:5.3s\n", + "\n", + "[2026-04-08 17:07:09,328] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.11.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.11 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:07:09,328] INFO:cellink.tl.external._sclinker_utils: [225/264] done: AllCoding/ABC_Road_BLD/chr14\n", + "[2026-04-08 17:07:09,332] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 --annot /data/annotations/AllCoding/100kb/AllCoding.11.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.11 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:07:25,060] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:04:00 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 9240.5499\n", + "std 0.1420 17113.0904\n", + "min 0.0051 -885.4515\n", + "25% 0.1145 1089.6525\n", + "50% 0.2342 3606.8046\n", + "75% 0.3569 9650.1124\n", + "max 0.5000 273778.0154\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0277\n", + "L2 0.0277 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.0466e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 131.1120\n", + "std 570.1997\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 8316.0000\n", + "Analysis finished at Wed Apr 8 17:07:24 2026\n", + "Total time elapsed: 3.0m:23.89s\n", + "\n", + "[2026-04-08 17:07:25,065] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.12.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.12 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:07:25,066] INFO:cellink.tl.external._sclinker_utils: [226/264] done: AllCoding/ABC_Road_BLD/chr19\n", + "[2026-04-08 17:07:25,096] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 --annot /data/annotations/AllCoding/100kb/AllCoding.12.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.12 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:07:39,151] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:02:28 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 9192.9358\n", + "std 0.1426 23373.2762\n", + "min 0.0051 -1390.7142\n", + "25% 0.1145 30.1419\n", + "50% 0.2311 736.4629\n", + "75% 0.3599 6261.5588\n", + "max 0.5000 255630.8485\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0155\n", + "L2 0.0155 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.0392e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 95.8791\n", + "std 548.4286\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 10448.0000\n", + "Analysis finished at Wed Apr 8 17:07:38 2026\n", + "Total time elapsed: 5.0m:10.78s\n", + "\n", + "[2026-04-08 17:07:39,153] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.13.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.13 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:07:39,154] INFO:cellink.tl.external._sclinker_utils: [227/264] done: AllCoding/ABC_Road_BLD/chr16\n", + "[2026-04-08 17:07:39,158] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 --annot /data/annotations/AllCoding/100kb/AllCoding.13.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.13 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:07:56,980] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/NK_L2/100kb/NK_L2.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/NK_L2/100kb/NK_L2.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 16:57:57 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/NK_L2/100kb/NK_L2.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/NK_L2/100kb/NK_L2.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/NK_L2/100kb/NK_L2.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 2.9558\n", + "std 0.1431 38.6821\n", + "min 0.0051 -1.0096\n", + "25% 0.1104 0.0000\n", + "50% 0.2249 0.0000\n", + "75% 0.3579 0.0030\n", + "max 0.5000 1447.7295\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0067\n", + "L2 -0.0067 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 21808.0878\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 0.0328\n", + "std 0.2268\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 1.7869\n", + "Analysis finished at Wed Apr 8 17:07:56 2026\n", + "Total time elapsed: 9.0m:58.83s\n", + "\n", + "[2026-04-08 17:07:56,988] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.14.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.14 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:07:56,988] INFO:cellink.tl.external._sclinker_utils: [228/264] done: NK_L2/100kb/chr6\n", + "[2026-04-08 17:07:56,992] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 --annot /data/annotations/AllCoding/100kb/AllCoding.14.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.14 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:08:18,841] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:03:35 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 2901.2584\n", + "std 0.1423 9612.4461\n", + "min 0.0051 -875.2371\n", + "25% 0.1115 23.9365\n", + "50% 0.2260 314.4933\n", + "75% 0.3569 2010.6800\n", + "max 0.5000 225292.0058\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.013\n", + "L2 0.013 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1.0600e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 37.1734\n", + "std 260.6097\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 5853.0000\n", + "Analysis finished at Wed Apr 8 17:08:18 2026\n", + "Total time elapsed: 4.0m:42.83s\n", + "\n", + "[2026-04-08 17:08:18,841] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.15.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.15 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:08:18,842] INFO:cellink.tl.external._sclinker_utils: [229/264] done: AllCoding/ABC_Road_BLD/chr18\n", + "[2026-04-08 17:08:18,851] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 --annot /data/annotations/AllCoding/100kb/AllCoding.15.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.15 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:08:35,089] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:01:32 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 8293.5062\n", + "std 0.1423 33045.4635\n", + "min 0.0051 -3945.8366\n", + "25% 0.1115 70.1729\n", + "50% 0.2249 817.7905\n", + "75% 0.3569 4474.1327\n", + "max 0.5000 737384.9478\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0284\n", + "L2 0.0284 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.8904e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 88.8013\n", + "std 655.0942\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 17940.0000\n", + "Analysis finished at Wed Apr 8 17:08:34 2026\n", + "Total time elapsed: 7.0m:2.74s\n", + "\n", + "[2026-04-08 17:08:35,092] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.16.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.16 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:08:35,092] INFO:cellink.tl.external._sclinker_utils: [230/264] done: AllCoding/ABC_Road_BLD/chr9\n", + "[2026-04-08 17:08:35,116] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 --annot /data/annotations/AllCoding/100kb/AllCoding.16.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.16 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:09:03,655] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:02:03 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 3009.1405\n", + "std 0.1428 11771.4867\n", + "min 0.0051 -1149.2121\n", + "25% 0.1074 27.3814\n", + "50% 0.2209 216.3646\n", + "75% 0.3548 1436.6255\n", + "max 0.5000 231218.0498\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0316\n", + "L2 0.0316 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1.1326e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0000\n", + "mean 30.9276\n", + "std 266.8920\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 7521.0000\n", + "Analysis finished at Wed Apr 8 17:09:03 2026\n", + "Total time elapsed: 7.0m:0.37s\n", + "\n", + "[2026-04-08 17:09:03,656] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.17.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.17 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:09:03,656] INFO:cellink.tl.external._sclinker_utils: [231/264] done: AllCoding/ABC_Road_BLD/chr13\n", + "[2026-04-08 17:09:03,661] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 --annot /data/annotations/AllCoding/100kb/AllCoding.17.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.17 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:10:05,561] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:01:33 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 6690.9665\n", + "std 0.1426 25045.5676\n", + "min 0.0051 -4000.7271\n", + "25% 0.1074 78.0491\n", + "50% 0.2198 792.5676\n", + "75% 0.3538 4511.5256\n", + "max 0.5000 760481.5652\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0369\n", + "L2 0.0369 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.4419e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 67.4211\n", + "std 514.4562\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 11931.0000\n", + "Analysis finished at Wed Apr 8 17:10:05 2026\n", + "Total time elapsed: 8.0m:32.22s\n", + "\n", + "[2026-04-08 17:10:05,564] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.18.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.18 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:10:05,565] INFO:cellink.tl.external._sclinker_utils: [232/264] done: AllCoding/ABC_Road_BLD/chr10\n", + "[2026-04-08 17:10:05,570] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 --annot /data/annotations/AllCoding/100kb/AllCoding.18.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.18 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:10:45,535] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:01:29 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 5192.6874\n", + "std 0.1431 20727.7021\n", + "min 0.0051 -1483.4562\n", + "25% 0.1115 60.4816\n", + "50% 0.2280 539.2872\n", + "75% 0.3609 2755.9983\n", + "max 0.5000 719764.4207\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.076\n", + "L2 0.076 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2.5008e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 45.4724\n", + "std 423.4213\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 19315.0000\n", + "Analysis finished at Wed Apr 8 17:10:45 2026\n", + "Total time elapsed: 9.0m:15.41s\n", + "\n", + "[2026-04-08 17:10:45,540] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.19.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.19 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:10:45,540] INFO:cellink.tl.external._sclinker_utils: [233/264] done: AllCoding/ABC_Road_BLD/chr8\n", + "[2026-04-08 17:10:45,583] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 --annot /data/annotations/AllCoding/100kb/AllCoding.19.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.19 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:11:06,484] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:01:33 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 9559.5477\n", + "std 0.1420 23321.2122\n", + "min 0.0051 -1366.8251\n", + "25% 0.1155 151.3942\n", + "50% 0.2301 1322.7629\n", + "75% 0.3609 7448.9663\n", + "max 0.5000 306075.5687\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.026\n", + "L2 0.026 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4.5121e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 91.3517\n", + "std 603.0902\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 13297.0000\n", + "Analysis finished at Wed Apr 8 17:11:06 2026\n", + "Total time elapsed: 9.0m:32.31s\n", + "\n", + "[2026-04-08 17:11:06,593] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.20.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.20 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:11:06,594] INFO:cellink.tl.external._sclinker_utils: [234/264] done: AllCoding/ABC_Road_BLD/chr11\n", + "[2026-04-08 17:11:06,598] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 --annot /data/annotations/AllCoding/100kb/AllCoding.20.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.20 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:11:07,261] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:01:35 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 7579.0356\n", + "std 0.1429 24010.7912\n", + "min 0.0051 -1643.0196\n", + "25% 0.1084 188.7498\n", + "50% 0.2209 1010.1825\n", + "75% 0.3558 4939.1789\n", + "max 0.5000 495037.6529\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0371\n", + "L2 0.0371 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4.0121e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 83.5661\n", + "std 638.4825\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 18550.0000\n", + "Analysis finished at Wed Apr 8 17:11:07 2026\n", + "Total time elapsed: 9.0m:31.39s\n", + "\n", + "[2026-04-08 17:11:07,262] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.21.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.21 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:11:07,262] INFO:cellink.tl.external._sclinker_utils: [235/264] done: AllCoding/ABC_Road_BLD/chr12\n", + "[2026-04-08 17:11:07,287] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 --annot /data/annotations/AllCoding/100kb/AllCoding.21.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.21 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:11:40,245] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:00:49 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 9202.7011\n", + "std 0.1432 24332.6022\n", + "min 0.0051 -3018.0793\n", + "25% 0.1084 213.4314\n", + "50% 0.2239 1671.6828\n", + "75% 0.3558 7880.6540\n", + "max 0.5000 677800.7434\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0428\n", + "L2 0.0428 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7.8970e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 101.3277\n", + "std 647.9555\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 19534.0000\n", + "Analysis finished at Wed Apr 8 17:11:39 2026\n", + "Total time elapsed: 10.0m:50.53s\n", + "\n", + "[2026-04-08 17:11:40,283] INFO:cellink.tl.external._ldsc: Computing LD scores with annotations: /ldsc/ldsc.py --l2 --bfile /home/icb/lucas.arnoldt/cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/annotations/AllCoding/100kb/AllCoding.22.annot.gz --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.22 --ld-wind-cm 1.0 --thin-annot --print-snps /home/icb/lucas.arnoldt/cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:11:40,283] INFO:cellink.tl.external._sclinker_utils: [236/264] done: AllCoding/ABC_Road_BLD/chr1\n", + "[2026-04-08 17:11:40,305] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --l2 --bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 --annot /data/annotations/AllCoding/100kb/AllCoding.22.annot.gz --out /data/ldscores/AllCoding/100kb/AllCoding.22 --ld-wind-cm 1.0 --thin-annot --print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt --yes-really\n", + "[2026-04-08 17:12:02,267] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:01:18 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 5836.7607\n", + "std 0.1428 21036.9564\n", + "min 0.0051 -1649.4763\n", + "25% 0.1125 93.8465\n", + "50% 0.2301 683.8033\n", + "75% 0.3609 3354.7776\n", + "max 0.5000 470414.0439\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0398\n", + "L2 0.0398 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.0008e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 50.8984\n", + "std 403.6151\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 14825.0000\n", + "Analysis finished at Wed Apr 8 17:12:02 2026\n", + "Total time elapsed: 10.0m:43.47s\n", + "\n", + "[2026-04-08 17:12:02,269] INFO:cellink.tl.external._sclinker_utils: [237/264] done: AllCoding/ABC_Road_BLD/chr7\n", + "[2026-04-08 17:12:20,342] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.17 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.17.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:09:05 2026\n", + "Read list of 269222 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bim\n", + "Read 1 annotations for 269222 SNPs from /data/annotations/AllCoding/100kb/AllCoding.17.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.17.bed\n", + "After filtering, 269222 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 32218 SNPs will be printed.\n", + "Writing LD Scores for 32218 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.17.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.17.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2428 2.5882e+04\n", + "std 0.1444 6.2153e+04\n", + "min 0.0051 -5.7281e+02\n", + "25% 0.1145 3.4434e+03\n", + "50% 0.2352 9.4545e+03\n", + "75% 0.3681 2.8165e+04\n", + "max 0.5000 1.6035e+06\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0853\n", + "L2 0.0853 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5.5198e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 269222.0000\n", + "mean 205.0261\n", + "std 169.5764\n", + "min 0.0000\n", + "25% 66.0000\n", + "50% 179.0000\n", + "75% 221.0000\n", + "max 535.0000\n", + "Analysis finished at Wed Apr 8 17:12:20 2026\n", + "Total time elapsed: 3.0m:14.56s\n", + "\n", + "[2026-04-08 17:12:20,343] INFO:cellink.tl.external._sclinker_utils: [238/264] done: AllCoding/100kb/chr17\n", + "[2026-04-08 17:12:34,790] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.15 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.15.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:08:20 2026\n", + "Read list of 287001 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bim\n", + "Read 1 annotations for 287001 SNPs from /data/annotations/AllCoding/100kb/AllCoding.15.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.15.bed\n", + "After filtering, 287001 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35839 SNPs will be printed.\n", + "Writing LD Scores for 35839 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.15.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.15.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2379 1.4434e+05\n", + "std 0.1441 1.2657e+05\n", + "min 0.0051 -4.4209e+03\n", + "25% 0.1104 5.9269e+04\n", + "50% 0.2270 1.0338e+05\n", + "75% 0.3640 1.8491e+05\n", + "max 0.5000 1.2522e+06\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1419\n", + "L2 0.1419 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT NaN\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.9147e+08\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 287001.0\n", + "mean 1364.0\n", + "std 0.0\n", + "min 1364.0\n", + "25% 1364.0\n", + "50% 1364.0\n", + "75% 1364.0\n", + "max 1364.0\n", + "Analysis finished at Wed Apr 8 17:12:34 2026\n", + "Total time elapsed: 4.0m:13.8s\n", + "\n", + "[2026-04-08 17:12:34,791] INFO:cellink.tl.external._sclinker_utils: [239/264] done: AllCoding/100kb/chr15\n", + "[2026-04-08 17:12:52,544] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.14 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.14.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:07:58 2026\n", + "Read list of 324698 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bim\n", + "Read 1 annotations for 324698 SNPs from /data/annotations/AllCoding/100kb/AllCoding.14.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.14.bed\n", + "After filtering, 324698 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 39484 SNPs will be printed.\n", + "Writing LD Scores for 39484 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.14.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.14.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2321 12537.1020\n", + "std 0.1424 24357.7269\n", + "min 0.0051 -225.6456\n", + "25% 0.1074 1986.1238\n", + "50% 0.2198 5888.7491\n", + "75% 0.3507 13677.1726\n", + "max 0.5000 275224.9122\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0005\n", + "L2 0.0005 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.7434e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 324698.0000\n", + "mean 115.2899\n", + "std 110.9969\n", + "min 0.0000\n", + "25% 15.0000\n", + "50% 100.0000\n", + "75% 204.0000\n", + "max 370.0000\n", + "Analysis finished at Wed Apr 8 17:12:52 2026\n", + "Total time elapsed: 4.0m:53.48s\n", + "\n", + "[2026-04-08 17:12:52,546] INFO:cellink.tl.external._sclinker_utils: [240/264] done: AllCoding/100kb/chr14\n", + "[2026-04-08 17:13:04,811] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:01:14 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 6554.4926\n", + "std 0.1421 22096.3160\n", + "min 0.0051 -2432.1650\n", + "25% 0.1125 79.1568\n", + "50% 0.2280 717.3352\n", + "75% 0.3569 3663.8393\n", + "max 0.5000 466353.4668\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.00 0.04\n", + "L2 0.04 1.00\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.6314e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 57.3663\n", + "std 474.1901\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 16898.0000\n", + "Analysis finished at Wed Apr 8 17:13:04 2026\n", + "Total time elapsed: 11.0m:50.09s\n", + "\n", + "[2026-04-08 17:13:04,976] INFO:cellink.tl.external._sclinker_utils: [241/264] done: AllCoding/ABC_Road_BLD/chr5\n", + "[2026-04-08 17:13:04,990] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.16 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.16.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:08:37 2026\n", + "Read list of 316981 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bim\n", + "Read 1 annotations for 316981 SNPs from /data/annotations/AllCoding/100kb/AllCoding.16.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.16.bed\n", + "After filtering, 316981 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 36526 SNPs will be printed.\n", + "Writing LD Scores for 36526 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.16.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.16.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2389 9816.5507\n", + "std 0.1426 16382.1156\n", + "min 0.0051 -471.7792\n", + "25% 0.1145 823.2214\n", + "50% 0.2311 3347.7202\n", + "75% 0.3599 10948.0793\n", + "max 0.5000 310249.6807\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0429\n", + "L2 0.0429 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.0932e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 316981.0000\n", + "mean 97.5824\n", + "std 100.0914\n", + "min 0.0000\n", + "25% 16.0000\n", + "50% 59.0000\n", + "75% 228.0000\n", + "max 276.0000\n", + "Analysis finished at Wed Apr 8 17:13:04 2026\n", + "Total time elapsed: 4.0m:27.75s\n", + "\n", + "[2026-04-08 17:13:04,991] INFO:cellink.tl.external._sclinker_utils: [242/264] done: AllCoding/100kb/chr16\n", + "[2026-04-08 17:13:08,836] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.21 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.21.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:11:09 2026\n", + "Read list of 138712 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bim\n", + "Read 1 annotations for 138712 SNPs from /data/annotations/AllCoding/100kb/AllCoding.21.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.21.bed\n", + "After filtering, 138712 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17040 SNPs will be printed.\n", + "Writing LD Scores for 17040 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.21.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.21.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2420 51455.9563\n", + "std 0.1414 38739.3841\n", + "min 0.0051 158.1755\n", + "25% 0.1186 23873.3901\n", + "50% 0.2321 41287.5762\n", + "75% 0.3650 67850.6421\n", + "max 0.5000 268250.4428\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1988\n", + "L2 0.1988 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT NaN\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7.8234e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 138712.0\n", + "mean 564.0\n", + "std 0.0\n", + "min 564.0\n", + "25% 564.0\n", + "50% 564.0\n", + "75% 564.0\n", + "max 564.0\n", + "Analysis finished at Wed Apr 8 17:13:08 2026\n", + "Total time elapsed: 1.0m:59.43s\n", + "\n", + "[2026-04-08 17:13:08,837] INFO:cellink.tl.external._sclinker_utils: [243/264] done: AllCoding/100kb/chr21\n", + "[2026-04-08 17:13:27,198] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:00:56 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 7382.8620\n", + "std 0.1426 23449.4300\n", + "min 0.0051 -1659.9479\n", + "25% 0.1166 89.2129\n", + "50% 0.2321 842.2854\n", + "75% 0.3630 5119.7569\n", + "max 0.5000 352919.9739\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0309\n", + "L2 0.0309 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.8002e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 53.8006\n", + "std 395.5567\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 18066.0000\n", + "Analysis finished at Wed Apr 8 17:13:26 2026\n", + "Total time elapsed: 12.0m:30.11s\n", + "\n", + "[2026-04-08 17:13:27,199] INFO:cellink.tl.external._sclinker_utils: [244/264] done: AllCoding/ABC_Road_BLD/chr3\n", + "[2026-04-08 17:13:28,293] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.18 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.18.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:10:07 2026\n", + "Read list of 285156 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bim\n", + "Read 1 annotations for 285156 SNPs from /data/annotations/AllCoding/100kb/AllCoding.18.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.18.bed\n", + "After filtering, 285156 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 35513 SNPs will be printed.\n", + "Writing LD Scores for 35513 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.18.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.18.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2364 65296.3016\n", + "std 0.1423 54140.9858\n", + "min 0.0051 -1616.0843\n", + "25% 0.1115 28172.0950\n", + "50% 0.2260 51498.2275\n", + "75% 0.3569 85874.0696\n", + "max 0.5000 398457.1679\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1503\n", + "L2 0.1503 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1.8137e+08\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 285156.0000\n", + "mean 636.0343\n", + "std 122.4900\n", + "min 0.0000\n", + "25% 660.0000\n", + "50% 660.0000\n", + "75% 660.0000\n", + "max 660.0000\n", + "Analysis finished at Wed Apr 8 17:13:28 2026\n", + "Total time elapsed: 3.0m:20.53s\n", + "\n", + "[2026-04-08 17:13:28,294] INFO:cellink.tl.external._sclinker_utils: [245/264] done: AllCoding/100kb/chr18\n", + "[2026-04-08 17:13:39,195] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.9 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.9.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:06:48 2026\n", + "Read list of 438106 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bim\n", + "Read 1 annotations for 438106 SNPs from /data/annotations/AllCoding/100kb/AllCoding.9.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.9.bed\n", + "After filtering, 438106 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 55464 SNPs will be printed.\n", + "Writing LD Scores for 55464 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.9.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.9.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2359 6262.2824\n", + "std 0.1423 9082.1678\n", + "min 0.0051 -184.8479\n", + "25% 0.1115 523.5241\n", + "50% 0.2249 2817.1959\n", + "75% 0.3569 8188.1947\n", + "max 0.5000 89801.6184\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0971\n", + "L2 0.0971 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2.7435e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 438106.0000\n", + "mean 62.6227\n", + "std 64.7486\n", + "min 0.0000\n", + "25% 6.0000\n", + "50% 52.0000\n", + "75% 97.0000\n", + "max 203.0000\n", + "Analysis finished at Wed Apr 8 17:13:38 2026\n", + "Total time elapsed: 6.0m:50.55s\n", + "\n", + "[2026-04-08 17:13:39,206] INFO:cellink.tl.external._sclinker_utils: [246/264] done: AllCoding/100kb/chr9\n", + "[2026-04-08 17:13:47,672] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.20 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.20.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:11:08 2026\n", + "Read list of 221626 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bim\n", + "Read 1 annotations for 221626 SNPs from /data/annotations/AllCoding/100kb/AllCoding.20.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.20.bed\n", + "After filtering, 221626 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 31101 SNPs will be printed.\n", + "Writing LD Scores for 31101 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.20.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.20.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2344 31848.2631\n", + "std 0.1427 50224.3949\n", + "min 0.0051 -105.2959\n", + "25% 0.1074 2469.4425\n", + "50% 0.2229 11797.8424\n", + "75% 0.3558 41494.0889\n", + "max 0.5000 393026.8018\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1104\n", + "L2 0.1104 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7.9613e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 221626.0000\n", + "mean 359.2203\n", + "std 307.1674\n", + "min 0.0000\n", + "25% 36.0000\n", + "50% 655.0000\n", + "75% 655.0000\n", + "max 655.0000\n", + "Analysis finished at Wed Apr 8 17:13:47 2026\n", + "Total time elapsed: 2.0m:38.99s\n", + "\n", + "[2026-04-08 17:13:47,673] INFO:cellink.tl.external._sclinker_utils: [247/264] done: AllCoding/100kb/chr20\n", + "[2026-04-08 17:13:50,053] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.22 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.22.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:11:42 2026\n", + "Read list of 141123 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bim\n", + "Read 1 annotations for 141123 SNPs from /data/annotations/AllCoding/100kb/AllCoding.22.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.22.bed\n", + "After filtering, 141123 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 17489 SNPs will be printed.\n", + "Writing LD Scores for 17489 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.22.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.22.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2366 87268.7508\n", + "std 0.1435 79798.3690\n", + "min 0.0051 808.5137\n", + "25% 0.1094 35094.1583\n", + "50% 0.2280 60664.0623\n", + "75% 0.3579 112102.6375\n", + "max 0.5000 579519.4677\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.138\n", + "L2 0.138 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT NaN\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1.3590e+08\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 141123.0\n", + "mean 963.0\n", + "std 0.0\n", + "min 963.0\n", + "25% 963.0\n", + "50% 963.0\n", + "75% 963.0\n", + "max 963.0\n", + "Analysis finished at Wed Apr 8 17:13:49 2026\n", + "Total time elapsed: 2.0m:7.68s\n", + "\n", + "[2026-04-08 17:13:50,054] INFO:cellink.tl.external._sclinker_utils: [248/264] done: AllCoding/100kb/chr22\n", + "[2026-04-08 17:13:53,876] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.19 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.19.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:10:49 2026\n", + "Read list of 232363 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bim\n", + "Read 1 annotations for 232363 SNPs from /data/annotations/AllCoding/100kb/AllCoding.19.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.19.bed\n", + "After filtering, 232363 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 22509 SNPs will be printed.\n", + "Writing LD Scores for 22509 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.19.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.19.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 96835.3518\n", + "std 0.1420 103970.0438\n", + "min 0.0051 -716.9238\n", + "25% 0.1145 36066.3710\n", + "50% 0.2342 66023.3635\n", + "75% 0.3569 116501.5192\n", + "max 0.5000 780333.4479\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0846\n", + "L2 0.0846 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2.2923e+08\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 232363.0000\n", + "mean 986.5318\n", + "std 306.5035\n", + "min 0.0000\n", + "25% 919.0000\n", + "50% 919.0000\n", + "75% 1197.0000\n", + "max 1197.0000\n", + "Analysis finished at Wed Apr 8 17:13:53 2026\n", + "Total time elapsed: 3.0m:3.85s\n", + "\n", + "[2026-04-08 17:13:53,878] INFO:cellink.tl.external._sclinker_utils: [249/264] done: AllCoding/100kb/chr19\n", + "[2026-04-08 17:14:03,772] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.13 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.13.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:07:41 2026\n", + "Read list of 366200 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bim\n", + "Read 1 annotations for 366200 SNPs from /data/annotations/AllCoding/100kb/AllCoding.13.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.13.bed\n", + "After filtering, 366200 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 45546 SNPs will be printed.\n", + "Writing LD Scores for 45546 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.13.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.13.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2338 122321.8543\n", + "std 0.1428 101820.8243\n", + "min 0.0051 -5686.8663\n", + "25% 0.1074 53167.5604\n", + "50% 0.2209 94817.2977\n", + "75% 0.3548 159034.1149\n", + "max 0.5000 895527.3402\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1686\n", + "L2 0.1686 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT NaN\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.9000e+08\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 366200.0\n", + "mean 1065.0\n", + "std 0.0\n", + "min 1065.0\n", + "25% 1065.0\n", + "50% 1065.0\n", + "75% 1065.0\n", + "max 1065.0\n", + "Analysis finished at Wed Apr 8 17:14:03 2026\n", + "Total time elapsed: 6.0m:22.54s\n", + "\n", + "[2026-04-08 17:14:03,773] INFO:cellink.tl.external._sclinker_utils: [250/264] done: AllCoding/100kb/chr13\n", + "[2026-04-08 17:14:22,613] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.1 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.1.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:04:42 2026\n", + "Read list of 779354 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bim\n", + "Read 1 annotations for 779354 SNPs from /data/annotations/AllCoding/100kb/AllCoding.1.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.1.bed\n", + "After filtering, 779354 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 98642 SNPs will be printed.\n", + "Writing LD Scores for 98642 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.1.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.1.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2346 16793.1553\n", + "std 0.1432 28782.0588\n", + "min 0.0051 -409.4740\n", + "25% 0.1084 2214.0526\n", + "50% 0.2239 7010.5432\n", + "75% 0.3558 18121.5920\n", + "max 0.5000 292020.2267\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0799\n", + "L2 0.0799 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1.1217e+08\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 779354.0000\n", + "mean 143.9223\n", + "std 150.5874\n", + "min 0.0000\n", + "25% 30.0000\n", + "50% 80.0000\n", + "75% 197.0000\n", + "max 490.0000\n", + "Analysis finished at Wed Apr 8 17:14:22 2026\n", + "Total time elapsed: 9.0m:39.46s\n", + "\n", + "[2026-04-08 17:14:22,613] INFO:cellink.tl.external._sclinker_utils: [251/264] done: AllCoding/100kb/chr1\n", + "[2026-04-08 17:14:29,385] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:01:09 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 3771.1690\n", + "std 0.1422 11030.1116\n", + "min 0.0051 -1307.0192\n", + "25% 0.1125 51.3163\n", + "50% 0.2301 460.0981\n", + "75% 0.3579 2601.7416\n", + "max 0.5000 397576.2052\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0633\n", + "L2 0.0633 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2.3695e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 32.4742\n", + "std 293.0354\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 10315.0000\n", + "Analysis finished at Wed Apr 8 17:14:29 2026\n", + "Total time elapsed: 13.0m:19.57s\n", + "\n", + "[2026-04-08 17:14:29,386] INFO:cellink.tl.external._sclinker_utils: [252/264] done: AllCoding/ABC_Road_BLD/chr4\n", + "[2026-04-08 17:15:04,273] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.12 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.12.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:07:26 2026\n", + "Read list of 480110 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bim\n", + "Read 1 annotations for 480110 SNPs from /data/annotations/AllCoding/100kb/AllCoding.12.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.12.bed\n", + "After filtering, 480110 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 58543 SNPs will be printed.\n", + "Writing LD Scores for 58543 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.12.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.12.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2334 8.7377e+04\n", + "std 0.1429 2.1342e+05\n", + "min 0.0051 -3.2776e+03\n", + "25% 0.1084 2.2098e+03\n", + "50% 0.2209 2.1044e+04\n", + "75% 0.3558 1.0474e+05\n", + "max 0.5000 3.4057e+06\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1076\n", + "L2 0.1076 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 2.9896e+08\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 480110.0000\n", + "mean 622.6981\n", + "std 561.5736\n", + "min 0.0000\n", + "25% 32.0000\n", + "50% 1169.0000\n", + "75% 1169.0000\n", + "max 1169.0000\n", + "Analysis finished at Wed Apr 8 17:15:04 2026\n", + "Total time elapsed: 7.0m:37.22s\n", + "\n", + "[2026-04-08 17:15:04,274] INFO:cellink.tl.external._sclinker_utils: [253/264] done: AllCoding/100kb/chr12\n", + "[2026-04-08 17:15:12,372] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.10 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.10.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:07:10 2026\n", + "Read list of 510501 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bim\n", + "Read 1 annotations for 510501 SNPs from /data/annotations/AllCoding/100kb/AllCoding.10.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.10.bed\n", + "After filtering, 510501 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64067 SNPs will be printed.\n", + "Writing LD Scores for 64067 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.10.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.10.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2328 9316.1404\n", + "std 0.1426 17312.3947\n", + "min 0.0051 -483.7497\n", + "25% 0.1074 912.9149\n", + "50% 0.2198 3117.3552\n", + "75% 0.3538 8580.6873\n", + "max 0.5000 155380.0471\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0762\n", + "L2 0.0762 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.5422e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 510501.0000\n", + "mean 69.3873\n", + "std 90.2886\n", + "min 0.0000\n", + "25% 10.0000\n", + "50% 48.0000\n", + "75% 74.0000\n", + "max 312.0000\n", + "Analysis finished at Wed Apr 8 17:15:12 2026\n", + "Total time elapsed: 8.0m:1.52s\n", + "\n", + "[2026-04-08 17:15:12,373] INFO:cellink.tl.external._sclinker_utils: [254/264] done: AllCoding/100kb/chr10\n", + "[2026-04-08 17:15:15,987] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:00:50 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 8083.4584\n", + "std 0.1431 27114.9096\n", + "min 0.0051 -3437.6904\n", + "25% 0.1125 98.0323\n", + "50% 0.2290 985.7586\n", + "75% 0.3620 5251.2125\n", + "max 0.5000 719933.9195\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0339\n", + "L2 0.0339 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 5.9446e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 70.8034\n", + "std 559.4229\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 17090.0000\n", + "Analysis finished at Wed Apr 8 17:15:15 2026\n", + "Total time elapsed: 14.0m:24.89s\n", + "\n", + "[2026-04-08 17:15:15,988] INFO:cellink.tl.external._sclinker_utils: [255/264] done: AllCoding/ABC_Road_BLD/chr2\n", + "[2026-04-08 17:15:30,968] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.7 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.7.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:06:17 2026\n", + "Read list of 589569 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bim\n", + "Read 1 annotations for 589569 SNPs from /data/annotations/AllCoding/100kb/AllCoding.7.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.7.bed\n", + "After filtering, 589569 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 66171 SNPs will be printed.\n", + "Writing LD Scores for 66171 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.7.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.7.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2390 6495.8162\n", + "std 0.1428 10098.7471\n", + "min 0.0051 -153.8311\n", + "25% 0.1125 952.6145\n", + "50% 0.2301 2920.6960\n", + "75% 0.3609 7337.1277\n", + "max 0.5000 96366.8736\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0775\n", + "L2 0.0775 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 3.1800e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 589569.0000\n", + "mean 53.9378\n", + "std 62.3559\n", + "min 0.0000\n", + "25% 9.0000\n", + "50% 39.0000\n", + "75% 79.0000\n", + "max 242.0000\n", + "Analysis finished at Wed Apr 8 17:15:30 2026\n", + "Total time elapsed: 9.0m:13.32s\n", + "\n", + "[2026-04-08 17:15:30,972] INFO:cellink.tl.external._sclinker_utils: [256/264] done: AllCoding/100kb/chr7\n", + "[2026-04-08 17:15:41,349] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.8 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.8.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:06:46 2026\n", + "Read list of 549971 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bim\n", + "Read 1 annotations for 549971 SNPs from /data/annotations/AllCoding/100kb/AllCoding.8.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.8.bed\n", + "After filtering, 549971 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 64975 SNPs will be printed.\n", + "Writing LD Scores for 64975 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.8.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.8.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2380 24890.7586\n", + "std 0.1431 42116.0783\n", + "min 0.0051 -210.1890\n", + "25% 0.1115 1769.5765\n", + "50% 0.2280 6402.5697\n", + "75% 0.3609 32131.0595\n", + "max 0.5000 380156.9490\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1267\n", + "L2 0.1267 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1.0164e+08\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 549971.0000\n", + "mean 184.8031\n", + "std 210.9887\n", + "min 0.0000\n", + "25% 12.0000\n", + "50% 51.0000\n", + "75% 489.0000\n", + "max 489.0000\n", + "Analysis finished at Wed Apr 8 17:15:41 2026\n", + "Total time elapsed: 8.0m:54.77s\n", + "\n", + "[2026-04-08 17:15:41,349] INFO:cellink.tl.external._sclinker_utils: [257/264] done: AllCoding/100kb/chr8\n", + "[2026-04-08 17:15:44,035] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.11 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.11.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:07:10 2026\n", + "Read list of 493922 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bim\n", + "Read 1 annotations for 493922 SNPs from /data/annotations/AllCoding/100kb/AllCoding.11.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.11.bed\n", + "After filtering, 493922 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 60977 SNPs will be printed.\n", + "Writing LD Scores for 60977 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.11.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.11.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2398 4.1969e+04\n", + "std 0.1420 1.3932e+05\n", + "min 0.0051 -3.1810e+03\n", + "25% 0.1155 1.7927e+03\n", + "50% 0.2301 6.6709e+03\n", + "75% 0.3609 2.5710e+04\n", + "max 0.5000 2.2157e+06\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 -0.0359\n", + "L2 -0.0359 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1.1066e+08\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 493922.0000\n", + "mean 224.0517\n", + "std 303.9491\n", + "min 0.0000\n", + "25% 21.0000\n", + "50% 62.0000\n", + "75% 424.0000\n", + "max 850.0000\n", + "Analysis finished at Wed Apr 8 17:15:43 2026\n", + "Total time elapsed: 8.0m:33.25s\n", + "\n", + "[2026-04-08 17:15:44,036] INFO:cellink.tl.external._sclinker_utils: [258/264] done: AllCoding/100kb/chr11\n", + "[2026-04-08 17:15:45,029] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.5 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.5.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:05:16 2026\n", + "Read list of 633015 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bim\n", + "Read 1 annotations for 633015 SNPs from /data/annotations/AllCoding/100kb/AllCoding.5.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.5.bed\n", + "After filtering, 633015 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75164 SNPs will be printed.\n", + "Writing LD Scores for 75164 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.5.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.5.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2369 2.1487e+05\n", + "std 0.1421 2.3012e+05\n", + "min 0.0051 -2.0782e+03\n", + "25% 0.1125 7.6469e+04\n", + "50% 0.2280 1.5694e+05\n", + "75% 0.3569 2.8381e+05\n", + "max 0.5000 3.3202e+06\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1513\n", + "L2 0.1513 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1.0029e+09\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 633015.0000\n", + "mean 1584.3756\n", + "std 508.3651\n", + "min 0.0000\n", + "25% 1749.0000\n", + "50% 1749.0000\n", + "75% 1749.0000\n", + "max 1749.0000\n", + "Analysis finished at Wed Apr 8 17:15:44 2026\n", + "Total time elapsed: 10.0m:28.3s\n", + "\n", + "[2026-04-08 17:15:45,030] INFO:cellink.tl.external._sclinker_utils: [259/264] done: AllCoding/100kb/chr5\n", + "[2026-04-08 17:16:03,357] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:01:18 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/AllCoding/ABC_Road_BLD/AllCoding.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 8286.9228\n", + "std 0.1431 31639.0813\n", + "min 0.0051 -2526.1099\n", + "25% 0.1104 191.2632\n", + "50% 0.2249 924.4171\n", + "75% 0.3579 3939.4337\n", + "max 0.5000 974268.9168\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.000 0.003\n", + "L2 0.003 1.000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 4.9684e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 74.8233\n", + "std 593.3078\n", + "min 0.0000\n", + "25% 0.0000\n", + "50% 0.0000\n", + "75% 0.0000\n", + "max 21242.0000\n", + "Analysis finished at Wed Apr 8 17:16:03 2026\n", + "Total time elapsed: 14.0m:44.7s\n", + "\n", + "[2026-04-08 17:16:03,357] INFO:cellink.tl.external._sclinker_utils: [260/264] done: AllCoding/ABC_Road_BLD/chr6\n", + "[2026-04-08 17:16:15,428] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.2 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.2.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:04:56 2026\n", + "Read list of 839590 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bim\n", + "Read 1 annotations for 839590 SNPs from /data/annotations/AllCoding/100kb/AllCoding.2.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.2.bed\n", + "After filtering, 839590 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 99735 SNPs will be printed.\n", + "Writing LD Scores for 99735 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.2.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.2.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2384 2.7568e+05\n", + "std 0.1431 2.5041e+05\n", + "min 0.0051 -3.0752e+03\n", + "25% 0.1125 9.7881e+04\n", + "50% 0.2290 2.1760e+05\n", + "75% 0.3620 3.8505e+05\n", + "max 0.5000 1.6322e+06\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1492\n", + "L2 0.1492 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1.7556e+09\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 839590.0000\n", + "mean 2091.0476\n", + "std 862.6020\n", + "min 0.0000\n", + "25% 2459.0000\n", + "50% 2459.0000\n", + "75% 2459.0000\n", + "max 2459.0000\n", + "Analysis finished at Wed Apr 8 17:16:15 2026\n", + "Total time elapsed: 11.0m:19.1s\n", + "\n", + "[2026-04-08 17:16:15,499] INFO:cellink.tl.external._sclinker_utils: [261/264] done: AllCoding/100kb/chr2\n", + "[2026-04-08 17:16:23,043] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.4 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.4.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:05:06 2026\n", + "Read list of 729645 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bim\n", + "Read 1 annotations for 729645 SNPs from /data/annotations/AllCoding/100kb/AllCoding.4.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.4.bed\n", + "After filtering, 729645 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 74924 SNPs will be printed.\n", + "Writing LD Scores for 74924 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.4.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.4.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2375 2.1330e+05\n", + "std 0.1422 1.8619e+05\n", + "min 0.0051 -6.8840e+03\n", + "25% 0.1125 9.1585e+04\n", + "50% 0.2301 1.6763e+05\n", + "75% 0.3579 2.8335e+05\n", + "max 0.5000 1.8546e+06\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1543\n", + "L2 0.1543 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 1.0810e+09\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 729645.0000\n", + "mean 1481.5413\n", + "std 345.3944\n", + "min 0.0000\n", + "25% 1568.0000\n", + "50% 1568.0000\n", + "75% 1568.0000\n", + "max 1568.0000\n", + "Analysis finished at Wed Apr 8 17:16:22 2026\n", + "Total time elapsed: 11.0m:16.76s\n", + "\n", + "[2026-04-08 17:16:23,044] INFO:cellink.tl.external._sclinker_utils: [262/264] done: AllCoding/100kb/chr4\n", + "[2026-04-08 17:16:30,630] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.3 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.3.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:05:01 2026\n", + "Read list of 706350 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bim\n", + "Read 1 annotations for 706350 SNPs from /data/annotations/AllCoding/100kb/AllCoding.3.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.3.bed\n", + "After filtering, 706350 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 83036 SNPs will be printed.\n", + "Writing LD Scores for 83036 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.3.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.3.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2408 13372.6517\n", + "std 0.1426 27574.4789\n", + "min 0.0051 -640.4550\n", + "25% 0.1166 1441.2395\n", + "50% 0.2321 4227.2585\n", + "75% 0.3630 12354.5971\n", + "max 0.5000 347260.9404\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.0359\n", + "L2 0.0359 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 6.4861e+07\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 706350.0000\n", + "mean 91.8258\n", + "std 115.1368\n", + "min 0.0000\n", + "25% 14.0000\n", + "50% 45.0000\n", + "75% 99.0000\n", + "max 372.0000\n", + "Analysis finished at Wed Apr 8 17:16:30 2026\n", + "Total time elapsed: 11.0m:29.11s\n", + "\n", + "[2026-04-08 17:16:30,630] INFO:cellink.tl.external._sclinker_utils: [263/264] done: AllCoding/100kb/chr3\n", + "[2026-04-08 17:17:56,572] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--print-snps /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt \\\n", + "--ld-wind-cm 1.0 \\\n", + "--out /data/ldscores/AllCoding/100kb/AllCoding.6 \\\n", + "--bfile /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6 \\\n", + "--thin-annot \\\n", + "--yes-really \\\n", + "--annot /data/annotations/AllCoding/100kb/AllCoding.6.annot.gz \\\n", + "--l2 \n", + "\n", + "Beginning analysis at Wed Apr 8 17:06:00 2026\n", + "Read list of 664016 SNPs from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bim\n", + "Read 1 annotations for 664016 SNPs from /data/annotations/AllCoding/100kb/AllCoding.6.annot.gz\n", + "Read list of 489 individuals from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.fam\n", + "Reading genotypes from /cellink_data/1000genomes_plink_EUR/1000G.EUR.QC.6.bed\n", + "After filtering, 664016 SNPs remain\n", + "Estimating LD Score.\n", + "Reading list of 1217311 SNPs for which to print LD Scores from /cellink_data/1000genomes_hapmap3/hm3_no_MHC.list.txt\n", + "After merging with --print-snps, LD Scores for 75358 SNPs will be printed.\n", + "Writing LD Scores for 75358 SNPs to /data/ldscores/AllCoding/100kb/AllCoding.6.l2.ldscore.gz\n", + "\n", + "Summary of LD Scores in /data/ldscores/AllCoding/100kb/AllCoding.6.l2.ldscore.gz\n", + " MAF L2\n", + "mean 0.2357 1.8603e+05\n", + "std 0.1431 2.6513e+05\n", + "min 0.0051 -1.4098e+03\n", + "25% 0.1104 4.2324e+03\n", + "50% 0.2249 1.1204e+05\n", + "75% 0.3579 2.4745e+05\n", + "max 0.5000 2.7396e+06\n", + "\n", + "MAF/LD Score Correlation Matrix\n", + " MAF L2\n", + "MAF 1.0000 0.1153\n", + "L2 0.1153 1.0000\n", + "\n", + "Annotation Correlation Matrix\n", + " ANNOT\n", + "ANNOT 1.0\n", + "\n", + "Annotation Matrix Column Sums\n", + "ANNOT 7.8515e+08\n", + "\n", + "Summary of Annotation Matrix Row Sums\n", + "count 664016.0000\n", + "mean 1182.4225\n", + "std 715.5915\n", + "min 0.0000\n", + "25% 62.0000\n", + "50% 1628.0000\n", + "75% 1628.0000\n", + "max 1628.0000\n", + "Analysis finished at Wed Apr 8 17:17:56 2026\n", + "Total time elapsed: 11.0m:56.25s\n", + "\n", + "[2026-04-08 17:17:56,615] INFO:cellink.tl.external._sclinker_utils: [264/264] done: AllCoding/100kb/chr6\n", + "['B naive_L2', 'CD4 Naive_L2', 'CD4 TCM_L2', 'CD8 TEM_L2', 'NK_L2', 'AllCoding']\n" + ] + } + ], + "source": [ + "# ── LD score computation ──────────────────────────────────────────────────────\n", + "# LDSC --h2 always reads chromosomes 1-22; you cannot pass a subset.\n", + "# n_jobs parallelises across chromosomes — use as many as your node has cores.\n", + "# With n_jobs=4: AllCoding × 2 strategies × 22 chr ≈ 11 parallel batches ≈ ~15 min.\n", + "# For a full analysis with many programs, increase n_jobs or submit as a batch job.\n", + "# ─────────────────────────────────────────────────────────────────────────────\n", + "\n", + "ld_prefixes = compute_ld_scores_for_sclinker(\n", + " annotation_prefixes=annotation_prefixes,\n", + " bim_prefix=bim_prefix,\n", + " ld_scores_dir=os.path.abspath(\"ldscores\"),\n", + " hapmap3_snps_file=str(hapmap3_snps),\n", + " chromosomes=list(range(1, 23)), # all 22 chromosomes — required by LDSC\n", + " n_jobs=28, # increase if more cores are available\n", + " runner=runner,\n", + ")\n", + "print(list(ld_prefixes.keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3ef33849", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'B naive_L2': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2.'},\n", + " 'CD4 Naive_L2': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.'},\n", + " 'CD4 TCM_L2': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.'},\n", + " 'CD8 TEM_L2': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.'},\n", + " 'NK_L2': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2.'},\n", + " 'AllCoding': {'ABC_Road_BLD': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding.',\n", + " '100kb': '/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding.'}}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ld_prefixes" + ] + }, + { + "cell_type": "markdown", + "id": "9427c051", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Part 5: Munge GWAS summary statistics\n", + "\n", + "Converts raw GWAS sumstats into the `.sumstats.gz` format expected by LDSC.\n", + "Uses cellink's existing `munge_sumstats` wrapper from `_ldsc.py`." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "dd0e81d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 17:17:57,646] INFO:root: Fetching https://www.ebi.ac.uk/gwas/rest/api/v2/studies/GCST006250\n", + "[2026-04-08 17:17:57,936] INFO:root: Selected file with build unknown (priority selection): 29942086-GCST006250-EFO_0004337.h.tsv.gz\n", + "[2026-04-08 17:17:57,936] INFO:root: Using harmonised summary statistics (build: unknown)\n", + "[2026-04-08 17:17:57,938] INFO:root: Downloading http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST006001-GCST007000/GCST006250/harmonised/29942086-GCST006250-EFO_0004337.h.tsv.gz to /home/icb/lucas.arnoldt/cellink_data/GCST006250_summary_stats.tsv.gz\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloaded GWAS to: /home/icb/lucas.arnoldt/cellink_data/GCST006250_summary_stats.tsv.gz\n" + ] + } + ], + "source": [ + "# ── Download a real GWAS for the tutorial ────────────────────────────────────\n", + "# We use IBD (Inflammatory Bowel Disease, GCST006250, de Lange et al. 2017)\n", + "# — a large immune-mediated trait with N~86k, harmonised summary stats with\n", + "# rsIDs, and strong signal in blood cell types (relevant to OneK1K dataset).\n", + "#\n", + "# The GWAS catalog harmonised file has these key columns:\n", + "# rsid → SNP identifier (passed to munge as --snp rsid)\n", + "# effect_allele → A1\n", + "# other_allele → A2\n", + "# beta → effect size\n", + "# standard_error → SE\n", + "# p_value → p-value\n", + "# n → per-variant N (or use n_samples below if absent)\n", + "#\n", + "# get_gwas_catalog_study_summary_stats() downloads the harmonised .tsv.gz\n", + "# and returns a DataFrame; return_path=True gives just the file path.\n", + "from cellink.resources import get_gwas_catalog_study_summary_stats\n", + "\n", + "GWAS_ACCESSION = \"GCST006250\" # IBD GWAS — change for a different trait\n", + "N_SAMPLES = 86640 # approximate total N for this study\n", + "\n", + "gwas_path = get_gwas_catalog_study_summary_stats(\n", + " GWAS_ACCESSION,\n", + " return_path=True,\n", + ")\n", + "print(f\"Downloaded GWAS to: {gwas_path}\")\n", + "\n", + "# Peek at the columns to confirm rsid is present\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "1b0a9f66", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_456353/3760024279.py:2: DtypeWarning: Columns (2) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " gwas_head = pd.read_csv(gwas_path, sep=\"\\t\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Columns: ['hm_variant_id', 'hm_rsid', 'hm_chrom', 'hm_pos', 'hm_other_allele', 'hm_effect_allele', 'hm_beta', 'hm_odds_ratio', 'hm_ci_lower', 'hm_ci_upper', 'hm_effect_allele_frequency', 'hm_code', 'variant_id', 'chromosome', 'base_pair_location', 'effect_allele', 'other_allele', 'eaf_ref', 'z', 'beta', 'standard_error', 'p_value', 'n_analyzed', 'mininfo', 'effectdirection', 'effect_allele_frequency', 'odds_ratio', 'ci_lower', 'ci_upper']\n", + " hm_rsid hm_effect_allele hm_other_allele hm_beta standard_error \\\n", + "0 rs79817489 T C -0.006750 0.005404 \n", + "1 rs184120752 A C -0.006362 0.007982 \n", + "2 rs10904045 T C 0.004285 0.002854 \n", + "3 rs11251906 A C 0.001104 0.007363 \n", + "4 rs6560828 A G 0.001924 0.002833 \n", + "\n", + " p_value \n", + "0 0.2116 \n", + "1 0.4252 \n", + "2 0.1333 \n", + "3 0.8807 \n", + "4 0.4974 \n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "gwas_head = pd.read_csv(gwas_path, sep=\"\\t\")\n", + "print(\"Columns:\", gwas_head.columns.tolist())\n", + "print(gwas_head[[\"hm_rsid\", \"hm_effect_allele\", \"hm_other_allele\", \"hm_beta\",\n", + " \"standard_error\", \"p_value\"]].head())" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c4e8e572", + "metadata": {}, + "outputs": [], + "source": [ + "gwas_head[[\n", + " \"hm_rsid\",\n", + " \"hm_effect_allele\",\n", + " \"hm_other_allele\",\n", + " \"hm_beta\",\n", + " \"standard_error\",\n", + " \"p_value\"\n", + "]].to_csv(\n", + " \"gwas_sumstat_ibd_filt.tsv.gz\",\n", + " sep=\"\\t\",\n", + " index=False,\n", + " compression=\"gzip\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "e2778fea", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 17:24:47,796] INFO:cellink.tl.external._ldsc: Running munge_sumstats: /ldsc/munge_sumstats.py --sumstats gwas_sumstat_ibd_filt.tsv.gz --out ibd --N 86640 --signed-sumstats hm_beta,0 --p p_value --a1 hm_effect_allele --a2 hm_other_allele --snp hm_rsid\n", + "[2026-04-08 17:24:47,798] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/munge_sumstats.py --sumstats /data/gwas_sumstat_ibd_filt.tsv.gz --out ibd --N 86640 --signed-sumstats hm_beta,0 --p p_value --a1 hm_effect_allele --a2 hm_other_allele --snp hm_rsid\n", + "[2026-04-08 17:26:25,030] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./munge_sumstats.py \\\n", + "--signed-sumstats hm_beta,0 \\\n", + "--out ibd \\\n", + "--N 86640.0 \\\n", + "--a1 hm_effect_allele \\\n", + "--a2 hm_other_allele \\\n", + "--snp hm_rsid \\\n", + "--sumstats /data/gwas_sumstat_ibd_filt.tsv.gz \\\n", + "--p p_value \n", + "\n", + "Interpreting column names as follows:\n", + "hm_rsid:\tVariant ID (e.g., rs number)\n", + "hm_other_allele:\tAllele 2, interpreted as non-ref allele for signed sumstat.\n", + "p_value:\tp-Value\n", + "hm_effect_allele:\tAllele 1, interpreted as ref allele for signed sumstat.\n", + "hm_beta:\tDirectional summary statistic as specified by --signed-sumstats.\n", + "\n", + "Reading sumstats from /data/gwas_sumstat_ibd_filt.tsv.gz into memory 5000000 SNPs at a time.\n", + ".. done\n", + "Read 9294801 SNPs from --sumstats file.\n", + "Removed 18620 SNPs with missing values.\n", + "Removed 0 SNPs with INFO <= 0.9.\n", + "Removed 0 SNPs with MAF <= 0.01.\n", + "Removed 0 SNPs with out-of-bounds p-values.\n", + "Removed 1287308 variants that were not SNPs or were strand-ambiguous.\n", + "7988873 SNPs remain.\n", + "Removed 1 SNPs with duplicated rs numbers (7988872 SNPs remain).\n", + "Using N = 86640.0\n", + "Median value of SIGNED_SUMSTATS was -9.75766770277e-05, which seems sensible.\n", + "Writing summary statistics for 7988872 SNPs (7988872 with nonmissing beta) to ibd.sumstats.gz.\n", + "\n", + "Metadata:\n", + "Mean chi^2 = 1.7\n", + "Lambda GC = 1.464\n", + "Max chi^2 = 133.377\n", + "11796 Genome-wide significant SNPs (some may have been removed by filtering).\n", + "\n", + "Conversion finished at Wed Apr 8 17:26:24 2026\n", + "Total time elapsed: 1.0m:33.51s\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "'ibd.sumstats.gz'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "os.makedirs(\"sumstats\", exist_ok=True)\n", + "\n", + "# merge_alleles is OPTIONAL — it restricts the munged sumstats to SNPs in a\n", + "# reference panel file with columns SNP, A1, A2. The hm3_no_MHC.list.txt\n", + "# from get_1000genomes_hapmap3() is a bare rsID list (no alleles), so it\n", + "# cannot be used for --merge-alleles. For sc-linker this is fine: the\n", + "# --print-snps flag already restricted LD scores to HapMap3 SNPs during\n", + "# compute_ld_scores_for_sclinker, so no further filtering is needed here.\n", + "munge_sumstats(\n", + " sumstats_file=\"gwas_sumstat_ibd_filt.tsv.gz\",\n", + " out_prefix=\"ibd\",\n", + " n_samples=N_SAMPLES,\n", + " snp_col=\"hm_rsid\",\n", + " a1_col=\"hm_effect_allele\",\n", + " a2_col=\"hm_other_allele\",\n", + " signed_sumstats=(\"hm_beta\", 0), # string format: \"colname,null_value\"\n", + " p_col=\"p_value\",\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "df5e7110", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 17:26:25,386] INFO:cellink.tl.external._sclinker_utils: S-LDSC: B naive_L2/ABC_Road_BLD/ibd\n", + "[2026-04-08 17:26:25,387] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/B_naive_L2/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:26:25,392] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/B_naive_L2/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:28:25,687] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2. \\\n", + "--out /data/sldsc_results/B_naive_L2/ABC_Road_BLD/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:26:26 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/B_naive_L2/ABC_Road_BLD/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/B_naive_L2/ABC_Road_BLD/ibd.part_delete.\n", + "Total Observed scale h2: 0.5702 (0.0203)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.074 (0.0127)\n", + "Ratio: 0.0723 (0.0124)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.[1-22] ...\n", + "Results printed to /data/sldsc_results/B_naive_L2/ABC_Road_BLD/ibd.results\n", + "Analysis finished at Wed Apr 8 17:28:25 2026\n", + "Total time elapsed: 1.0m:59.26s\n", + "\n", + "[2026-04-08 17:28:25,820] INFO:cellink.tl.external._sclinker_utils: S-LDSC: B naive_L2/100kb/ibd\n", + "[2026-04-08 17:28:25,821] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/B_naive_L2/100kb/B_naive_L2. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/B_naive_L2/100kb/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:28:25,828] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/B_naive_L2/100kb/B_naive_L2. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/B_naive_L2/100kb/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:30:19,478] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/B_naive_L2/100kb/B_naive_L2. \\\n", + "--out /data/sldsc_results/B_naive_L2/100kb/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:28:26 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/B_naive_L2/100kb/B_naive_L2.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/B_naive_L2/100kb/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/B_naive_L2/100kb/ibd.part_delete.\n", + "Total Observed scale h2: 0.5698 (0.02)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.0741 (0.0127)\n", + "Ratio: 0.0723 (0.0124)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/B_naive_L2/100kb/B_naive_L2.[1-22] ...\n", + "Results printed to /data/sldsc_results/B_naive_L2/100kb/ibd.results\n", + "Analysis finished at Wed Apr 8 17:30:19 2026\n", + "Total time elapsed: 1.0m:52.57s\n", + "\n", + "[2026-04-08 17:30:19,757] INFO:cellink.tl.external._sclinker_utils: S-LDSC: CD4 Naive_L2/ABC_Road_BLD/ibd\n", + "[2026-04-08 17:30:19,758] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/CD4_Naive_L2/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:30:19,760] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/CD4_Naive_L2/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:32:14,144] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2. \\\n", + "--out /data/sldsc_results/CD4_Naive_L2/ABC_Road_BLD/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:30:21 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/CD4_Naive_L2/ABC_Road_BLD/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/CD4_Naive_L2/ABC_Road_BLD/ibd.part_delete.\n", + "Total Observed scale h2: 0.5701 (0.0201)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.074 (0.0127)\n", + "Ratio: 0.0722 (0.0124)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_Naive_L2/ABC_Road_BLD/CD4_Naive_L2.[1-22] ...\n", + "Results printed to /data/sldsc_results/CD4_Naive_L2/ABC_Road_BLD/ibd.results\n", + "Analysis finished at Wed Apr 8 17:32:14 2026\n", + "Total time elapsed: 1.0m:52.95s\n", + "\n", + "[2026-04-08 17:32:14,206] INFO:cellink.tl.external._sclinker_utils: S-LDSC: CD4 Naive_L2/100kb/ibd\n", + "[2026-04-08 17:32:14,207] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/CD4_Naive_L2/100kb/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:32:14,210] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/CD4_Naive_L2/100kb/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:34:07,598] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2. \\\n", + "--out /data/sldsc_results/CD4_Naive_L2/100kb/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:32:15 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/CD4_Naive_L2/100kb/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/CD4_Naive_L2/100kb/ibd.part_delete.\n", + "Total Observed scale h2: 0.5699 (0.02)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.074 (0.0127)\n", + "Ratio: 0.0722 (0.0124)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_Naive_L2/100kb/CD4_Naive_L2.[1-22] ...\n", + "Results printed to /data/sldsc_results/CD4_Naive_L2/100kb/ibd.results\n", + "Analysis finished at Wed Apr 8 17:34:07 2026\n", + "Total time elapsed: 1.0m:52.4s\n", + "\n", + "[2026-04-08 17:34:07,795] INFO:cellink.tl.external._sclinker_utils: S-LDSC: CD4 TCM_L2/ABC_Road_BLD/ibd\n", + "[2026-04-08 17:34:07,796] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/CD4_TCM_L2/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:34:07,797] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/CD4_TCM_L2/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:36:01,865] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2. \\\n", + "--out /data/sldsc_results/CD4_TCM_L2/ABC_Road_BLD/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:34:08 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/CD4_TCM_L2/ABC_Road_BLD/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/CD4_TCM_L2/ABC_Road_BLD/ibd.part_delete.\n", + "Total Observed scale h2: 0.57 (0.02)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.0741 (0.0127)\n", + "Ratio: 0.0723 (0.0124)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_TCM_L2/ABC_Road_BLD/CD4_TCM_L2.[1-22] ...\n", + "Results printed to /data/sldsc_results/CD4_TCM_L2/ABC_Road_BLD/ibd.results\n", + "Analysis finished at Wed Apr 8 17:36:01 2026\n", + "Total time elapsed: 1.0m:53.07s\n", + "\n", + "[2026-04-08 17:36:02,074] INFO:cellink.tl.external._sclinker_utils: S-LDSC: CD4 TCM_L2/100kb/ibd\n", + "[2026-04-08 17:36:02,074] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/CD4_TCM_L2/100kb/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:36:02,077] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/CD4_TCM_L2/100kb/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:37:56,592] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2. \\\n", + "--out /data/sldsc_results/CD4_TCM_L2/100kb/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:36:02 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/CD4_TCM_L2/100kb/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/CD4_TCM_L2/100kb/ibd.part_delete.\n", + "Total Observed scale h2: 0.5699 (0.02)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.074 (0.0127)\n", + "Ratio: 0.0722 (0.0124)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD4_TCM_L2/100kb/CD4_TCM_L2.[1-22] ...\n", + "Results printed to /data/sldsc_results/CD4_TCM_L2/100kb/ibd.results\n", + "Analysis finished at Wed Apr 8 17:37:56 2026\n", + "Total time elapsed: 1.0m:53.47s\n", + "\n", + "[2026-04-08 17:37:56,806] INFO:cellink.tl.external._sclinker_utils: S-LDSC: CD8 TEM_L2/ABC_Road_BLD/ibd\n", + "[2026-04-08 17:37:56,807] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/CD8_TEM_L2/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:37:56,811] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/CD8_TEM_L2/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:39:53,273] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2. \\\n", + "--out /data/sldsc_results/CD8_TEM_L2/ABC_Road_BLD/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:38:00 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/CD8_TEM_L2/ABC_Road_BLD/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/CD8_TEM_L2/ABC_Road_BLD/ibd.part_delete.\n", + "Total Observed scale h2: 0.5683 (0.0203)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.0739 (0.0127)\n", + "Ratio: 0.0722 (0.0124)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD8_TEM_L2/ABC_Road_BLD/CD8_TEM_L2.[1-22] ...\n", + "Results printed to /data/sldsc_results/CD8_TEM_L2/ABC_Road_BLD/ibd.results\n", + "Analysis finished at Wed Apr 8 17:39:53 2026\n", + "Total time elapsed: 1.0m:53.01s\n", + "\n", + "[2026-04-08 17:39:53,365] INFO:cellink.tl.external._sclinker_utils: S-LDSC: CD8 TEM_L2/100kb/ibd\n", + "[2026-04-08 17:39:53,366] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/CD8_TEM_L2/100kb/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:39:53,367] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/CD8_TEM_L2/100kb/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:41:47,904] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2. \\\n", + "--out /data/sldsc_results/CD8_TEM_L2/100kb/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:39:54 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/CD8_TEM_L2/100kb/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/CD8_TEM_L2/100kb/ibd.part_delete.\n", + "Total Observed scale h2: 0.57 (0.02)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.0741 (0.0127)\n", + "Ratio: 0.0723 (0.0124)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/CD8_TEM_L2/100kb/CD8_TEM_L2.[1-22] ...\n", + "Results printed to /data/sldsc_results/CD8_TEM_L2/100kb/ibd.results\n", + "Analysis finished at Wed Apr 8 17:41:47 2026\n", + "Total time elapsed: 1.0m:53.5s\n", + "\n", + "[2026-04-08 17:41:48,152] INFO:cellink.tl.external._sclinker_utils: S-LDSC: NK_L2/ABC_Road_BLD/ibd\n", + "[2026-04-08 17:41:48,152] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/ABC_Road_BLD/NK_L2. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/NK_L2/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:41:48,170] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/NK_L2/ABC_Road_BLD/NK_L2. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/NK_L2/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:43:42,124] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/NK_L2/ABC_Road_BLD/NK_L2. \\\n", + "--out /data/sldsc_results/NK_L2/ABC_Road_BLD/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:41:49 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/NK_L2/ABC_Road_BLD/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/NK_L2/ABC_Road_BLD/ibd.part_delete.\n", + "Total Observed scale h2: 0.568 (0.0202)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.074 (0.0127)\n", + "Ratio: 0.0722 (0.0124)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/NK_L2/ABC_Road_BLD/NK_L2.[1-22] ...\n", + "Results printed to /data/sldsc_results/NK_L2/ABC_Road_BLD/ibd.results\n", + "Analysis finished at Wed Apr 8 17:43:41 2026\n", + "Total time elapsed: 1.0m:52.92s\n", + "\n", + "[2026-04-08 17:43:42,249] INFO:cellink.tl.external._sclinker_utils: S-LDSC: NK_L2/100kb/ibd\n", + "[2026-04-08 17:43:42,250] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/NK_L2/100kb/NK_L2. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/NK_L2/100kb/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:43:42,252] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/NK_L2/100kb/NK_L2. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/NK_L2/100kb/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:45:39,188] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/NK_L2/100kb/NK_L2. \\\n", + "--out /data/sldsc_results/NK_L2/100kb/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:43:45 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/NK_L2/100kb/NK_L2.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/NK_L2/100kb/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/NK_L2/100kb/ibd.part_delete.\n", + "Total Observed scale h2: 0.5704 (0.0201)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.0741 (0.0127)\n", + "Ratio: 0.0724 (0.0124)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/NK_L2/100kb/NK_L2.[1-22] ...\n", + "Results printed to /data/sldsc_results/NK_L2/100kb/ibd.results\n", + "Analysis finished at Wed Apr 8 17:45:39 2026\n", + "Total time elapsed: 1.0m:53.72s\n", + "\n", + "[2026-04-08 17:45:39,395] INFO:cellink.tl.external._sclinker_utils: S-LDSC: AllCoding/ABC_Road_BLD/ibd\n", + "[2026-04-08 17:45:39,395] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/ABC_Road_BLD/AllCoding. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/AllCoding/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:45:39,399] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/AllCoding/ABC_Road_BLD/AllCoding. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/AllCoding/ABC_Road_BLD/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:47:33,182] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/AllCoding/ABC_Road_BLD/AllCoding. \\\n", + "--out /data/sldsc_results/AllCoding/ABC_Road_BLD/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:45:40 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/AllCoding/ABC_Road_BLD/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/AllCoding/ABC_Road_BLD/ibd.part_delete.\n", + "Total Observed scale h2: 0.561 (0.0219)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.0764 (0.0141)\n", + "Ratio: 0.0746 (0.0138)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/AllCoding/ABC_Road_BLD/AllCoding.[1-22] ...\n", + "Results printed to /data/sldsc_results/AllCoding/ABC_Road_BLD/ibd.results\n", + "Analysis finished at Wed Apr 8 17:47:33 2026\n", + "Total time elapsed: 1.0m:52.77s\n", + "\n", + "[2026-04-08 17:47:33,630] INFO:cellink.tl.external._sclinker_utils: S-LDSC: AllCoding/100kb/ibd\n", + "[2026-04-08 17:47:33,631] INFO:cellink.tl.external._ldsc: Estimating heritability: /ldsc/ldsc.py --h2 /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ibd.sumstats.gz --ref-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/ldscores/AllCoding/100kb/AllCoding. --w-ld-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials/sldsc_results/AllCoding/100kb/ibd --overlap-annot --frqfile-chr /home/icb/lucas.arnoldt/cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:47:33,632] INFO:cellink.tl._runner: Executing: singularity exec -B /ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials:/data -B /home/icb/lucas.arnoldt/cellink_data:/cellink_data /home/icb/lucas.arnoldt/workspace/projects/ldsc.sif /ldsc/ldsc.py --h2 /data/ibd.sumstats.gz --ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/AllCoding/100kb/AllCoding. --w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. --out /data/sldsc_results/AllCoding/100kb/ibd --overlap-annot --frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. --print-coefficients --print-delete-vals\n", + "[2026-04-08 17:49:30,122] INFO:cellink.tl._runner: *********************************************************************\n", + "* LD Score Regression (LDSC)\n", + "* Version 1.0.1\n", + "* (C) 2014-2019 Brendan Bulik-Sullivan and Hilary Finucane\n", + "* Broad Institute of MIT and Harvard / MIT Department of Mathematics\n", + "* GNU General Public License v3\n", + "*********************************************************************\n", + "Call: \n", + "./ldsc.py \\\n", + "--h2 /data/ibd.sumstats.gz \\\n", + "--ref-ld-chr /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/AllCoding/100kb/AllCoding. \\\n", + "--out /data/sldsc_results/AllCoding/100kb/ibd \\\n", + "--overlap-annot \\\n", + "--frqfile-chr /cellink_data/1000genomes_frq_EUR/1000G.EUR.QC. \\\n", + "--w-ld-chr /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC. \\\n", + "--print-coefficients \\\n", + "--print-delete-vals \n", + "\n", + "Beginning analysis at Wed Apr 8 17:47:34 2026\n", + "Reading summary statistics from /data/ibd.sumstats.gz ...\n", + "Read summary statistics for 7988872 SNPs.\n", + "Reading reference panel LD Score from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/AllCoding/100kb/AllCoding.[1-22] ...\n", + "Read reference panel LD Scores for 1190321 SNPs.\n", + "Removing partitioned LD Scores with zero variance.\n", + "Reading regression weight LD Score from /cellink_data/1000genomes_ld_weights_EUR/weights.hm3_noMHC.[1-22] ...\n", + "Read regression weight LD Scores for 1187349 SNPs.\n", + "After merging with reference panel LD, 1177317 SNPs remain.\n", + "After merging with regression SNP LD, 1174367 SNPs remain.\n", + "Removed 145 SNPs with chi^2 > 86.64 (1174222 SNPs remain)\n", + "Printing block jackknife delete values to /data/sldsc_results/AllCoding/100kb/ibd.delete.\n", + "Printing partitioned block jackknife delete values to /data/sldsc_results/AllCoding/100kb/ibd.part_delete.\n", + "Total Observed scale h2: 0.5654 (0.0231)\n", + "Categories: baseL2_0 Coding_UCSCL2_0 Coding_UCSC.flanking.500L2_0 Conserved_LindbladTohL2_0 Conserved_LindbladToh.flanking.500L2_0 CTCF_HoffmanL2_0 CTCF_Hoffman.flanking.500L2_0 DGF_ENCODEL2_0 DGF_ENCODE.flanking.500L2_0 DHS_peaks_TrynkaL2_0 DHS_TrynkaL2_0 DHS_Trynka.flanking.500L2_0 Enhancer_AnderssonL2_0 Enhancer_Andersson.flanking.500L2_0 Enhancer_HoffmanL2_0 Enhancer_Hoffman.flanking.500L2_0 FetalDHS_TrynkaL2_0 FetalDHS_Trynka.flanking.500L2_0 H3K27ac_HniszL2_0 H3K27ac_Hnisz.flanking.500L2_0 H3K27ac_PGC2L2_0 H3K27ac_PGC2.flanking.500L2_0 H3K4me1_peaks_TrynkaL2_0 H3K4me1_TrynkaL2_0 H3K4me1_Trynka.flanking.500L2_0 H3K4me3_peaks_TrynkaL2_0 H3K4me3_TrynkaL2_0 H3K4me3_Trynka.flanking.500L2_0 H3K9ac_peaks_TrynkaL2_0 H3K9ac_TrynkaL2_0 H3K9ac_Trynka.flanking.500L2_0 Intron_UCSCL2_0 Intron_UCSC.flanking.500L2_0 PromoterFlanking_HoffmanL2_0 PromoterFlanking_Hoffman.flanking.500L2_0 Promoter_UCSCL2_0 Promoter_UCSC.flanking.500L2_0 Repressed_HoffmanL2_0 Repressed_Hoffman.flanking.500L2_0 SuperEnhancer_HniszL2_0 SuperEnhancer_Hnisz.flanking.500L2_0 TFBS_ENCODEL2_0 TFBS_ENCODE.flanking.500L2_0 Transcr_HoffmanL2_0 Transcr_Hoffman.flanking.500L2_0 TSS_HoffmanL2_0 TSS_Hoffman.flanking.500L2_0 UTR_3_UCSCL2_0 UTR_3_UCSC.flanking.500L2_0 UTR_5_UCSCL2_0 UTR_5_UCSC.flanking.500L2_0 WeakEnhancer_HoffmanL2_0 WeakEnhancer_Hoffman.flanking.500L2_0 GERP.NSL2_0 GERP.RSsup4L2_0 MAFbin1L2_0 MAFbin2L2_0 MAFbin3L2_0 MAFbin4L2_0 MAFbin5L2_0 MAFbin6L2_0 MAFbin7L2_0 MAFbin8L2_0 MAFbin9L2_0 MAFbin10L2_0 MAF_Adj_Predicted_Allele_AgeL2_0 MAF_Adj_LLD_AFRL2_0 Recomb_Rate_10kbL2_0 Nucleotide_Diversity_10kbL2_0 Backgrd_Selection_StatL2_0 CpG_Content_50kbL2_0 MAF_Adj_ASMCL2_0 GTEx_eQTL_MaxCPPL2_0 BLUEPRINT_H3K27acQTL_MaxCPPL2_0 BLUEPRINT_H3K4me1QTL_MaxCPPL2_0 BLUEPRINT_DNA_methylation_MaxCPPL2_0 synonymousL2_0 non_synonymousL2_0 Conserved_Vertebrate_phastCons46wayL2_0 Conserved_Vertebrate_phastCons46way.flanking.500L2_0 Conserved_Mammal_phastCons46wayL2_0 Conserved_Mammal_phastCons46way.flanking.500L2_0 Conserved_Primate_phastCons46wayL2_0 Conserved_Primate_phastCons46way.flanking.500L2_0 BivFlnkL2_0 BivFlnk.flanking.500L2_0 Human_Promoter_VillarL2_0 Human_Promoter_Villar.flanking.500L2_0 Human_Enhancer_VillarL2_0 Human_Enhancer_Villar.flanking.500L2_0 Ancient_Sequence_Age_Human_PromoterL2_0 Ancient_Sequence_Age_Human_Promoter.flanking.500L2_0 Ancient_Sequence_Age_Human_EnhancerL2_0 Ancient_Sequence_Age_Human_Enhancer.flanking.500L2_0 Human_Enhancer_Villar_Species_Enhancer_CountL2_0 Human_Promoter_Villar_ExACL2_0 Human_Promoter_Villar_ExAC.flanking.500L2_0 L2_1\n", + "Lambda GC: 1.7218\n", + "Mean Chi^2: 2.0244\n", + "Intercept: 1.0773 (0.0157)\n", + "Ratio: 0.0754 (0.0153)\n", + "Reading annot matrix from /cellink_data/1000genomes_ld_scores_EUR/baselineLD.,/data/ldscores/AllCoding/100kb/AllCoding.[1-22] ...\n", + "Results printed to /data/sldsc_results/AllCoding/100kb/ibd.results\n", + "Analysis finished at Wed Apr 8 17:49:29 2026\n", + "Total time elapsed: 1.0m:55.35s\n", + "\n" + ] + } + ], + "source": [ + "# annotation_prefixes is required so LDSC --overlap-annot can find the\n", + "# .annot.gz files alongside the LD scores (cellink symlinks them automatically).\n", + "# sumstats path must be absolute so it survives Singularity bind-mount remapping.\n", + "import os\n", + "sumstats_abs = os.path.abspath(\"ibd.sumstats.gz\")\n", + "\n", + "sldsc_log_paths = run_sclinker_heritability(\n", + " ld_prefixes=ld_prefixes,\n", + " sumstats_files=[sumstats_abs],\n", + " ref_ld_chr=ref_ld_chr,\n", + " w_ld_chr=w_ld_chr,\n", + " out_dir=os.path.abspath(\"sldsc_results\"),\n", + " annotation_prefixes=annotation_prefixes, # for --overlap-annot symlink\n", + " frqfile_chr=frq_chr,\n", + " runner=runner,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "02093864", + "metadata": {}, + "outputs": [], + "source": [ + "os.makedirs(\"sldsc_results\", exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "020e5758", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.getcwd()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "43222434", + "metadata": {}, + "outputs": [], + "source": [ + "#/data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.1.annot[./gz/bz2]" + ] + }, + { + "cell_type": "markdown", + "id": "e9e6d35a", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Part 7: Load results and compute E-scores" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c5a00f1d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-04-08 20:25:20,894] INFO:cellink.tl.external._sclinker_utils: Found 12 log files in sldsc_results\n", + "[2026-04-08 20:25:21,880] INFO:cellink.tl.external._sclinker_utils: Loaded 12 results: 6 programs, 1 traits\n", + "Shape: (12, 11)\n", + "Columns: ['h2_obs', 'h2_obs_se', 'Enrichment', 'Enrichment_std_error', 'Coefficient', 'Coefficient_std_error', 'Coefficient_z_score', 'Enrichment_z_score', 'program', 'strategy', 'trait']\n", + " program strategy trait Enrichment Enrichment_std_error h2_obs\n", + "0 AllCoding 100kb ibd 1.009800 0.030181 0.5654\n", + "1 AllCoding ABC_Road_BLD ibd 0.368499 1.026320 0.5610\n", + "2 B_naive_L2 100kb ibd 0.550202 0.254807 0.5698\n", + "3 B_naive_L2 ABC_Road_BLD ibd 1.263520 21.239855 0.5702\n", + "4 CD4_Naive_L2 100kb ibd 0.885100 0.139707 0.5699\n", + " program strategy trait Enrichment E_score E_score_z\n", + "0 AllCoding 100kb ibd 1.009800 0.000000 0.000000\n", + "1 AllCoding ABC_Road_BLD ibd 0.368499 0.000000 0.000000\n", + "2 B_naive_L2 100kb ibd 0.550202 -0.459598 -1.791188\n", + "3 B_naive_L2 ABC_Road_BLD ibd 1.263520 0.895021 0.042090\n", + "4 CD4_Naive_L2 100kb ibd 0.885100 -0.124700 -0.872450\n", + "5 CD4_Naive_L2 ABC_Road_BLD ibd 17.018680 16.650182 0.728061\n", + "6 CD4_TCM_L2 100kb ibd 0.887840 -0.121960 -0.932623\n", + "7 CD4_TCM_L2 ABC_Road_BLD ibd 9.233740 8.865241 0.605904\n", + "8 CD8_TEM_L2 100kb ibd 0.289665 -0.720134 -2.882152\n", + "9 CD8_TEM_L2 ABC_Road_BLD ibd -16.555558 -16.924056 -0.599301\n" + ] + } + ], + "source": [ + "results_df = load_sclinker_heritability_results(\"sldsc_results\")\n", + "\n", + "if results_df.empty:\n", + " print(\"No results parsed yet. Check that run_sclinker_heritability completed \"\n", + " \"and that annotation_prefixes= was passed (needed for --overlap-annot).\")\n", + "else:\n", + " print(f\"Shape: {results_df.shape}\")\n", + " print(f\"Columns: {results_df.columns.tolist()}\")\n", + " cols = [c for c in [\"program\",\"strategy\",\"trait\",\"Enrichment\",\"Enrichment_std_error\",\"h2_obs\"]\n", + " if c in results_df.columns]\n", + " print(results_df[cols].head())\n", + "\n", + " results_df = compute_escore(results_df, control_program=\"AllCoding\")\n", + " e_cols = [c for c in [\"program\",\"strategy\",\"trait\",\"Enrichment\",\"E_score\",\"E_score_z\"]\n", + " if c in results_df.columns]\n", + " print(results_df[e_cols].head(10))" + ] + }, + { + "cell_type": "markdown", + "id": "760d5d76", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Part 8: Visualise results\n", + "\n", + "Bubble plot replicating sc-linker Fig. 2d style:\n", + "- dot **size** ∝ E-score\n", + "- dot **opacity** ∝ significance (|E_score_z|)\n", + "- dashed threshold at E-score = 2 (paper significance cutoff)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d13ed3ee", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqUAAAGGCAYAAABCCPg3AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAdjFJREFUeJzt3XdYU1cDBvD3hhGU6QIcKLgnUjeOghNxolXR2qJW3Ftr1davbq1b2zprW7Vq3do66kKte49qBawbFXCxN8n5/qBJjawEE8J4f33uU3Nz7r0n15i8OfeccyUhhAARERERkRHJjF0BIiIiIiKGUiIiIiIyOoZSIiIiIjI6hlIiIiIiMjqGUiIiIiIyOoZSIiIiIjI6hlIiIiIiMjqGUiIiIiIyOoZSIiIiIjI6hlJ6b/3794ezs7PGOkmSMH369BztT5IkjBw58v0rRkZz6dIlmJub4/Hjxzpt9z7vGyr4Hj16BEmSsH79emNXRe9Ur+3kyZM6b+vp6Yn+/ftrrOvduzd69eqln8oR5RKGUqK3rF+/HpIkZbpcuHDB2FXMF7766iv06dMHFSpUMHZVckVW75mhQ4cau3pUCE2aNAm7du3CzZs3jV0VIq2ZGrsCVDAlJCTA1DT/vr1mzpwJFxeXdOsrV65shNrkLzdu3MCxY8dw7tw5nbfNz++btm3bws/PL936qlWrGqE2VNh98MEHaNCgARYvXoyNGzcauzpEWsmfn/6U51lYWBi7CpmKi4uDpaVllmW8vb3RoEGDXKqRfiUmJsLc3BwymXEuhPz8888oX748mjRpovO2efl9k52qVavik08+MXY1ckQIgcTERBQpUsTYVcmTlEolkpOT8937s1evXpg2bRpWrlwJKysrY1eHKFu8fF8IxMTEYOzYsXB2doZcLoe9vT3atm2La9euaZS7ePEiOnTogGLFisHS0hKurq5Yvnx5jo75bt/A6dOnQ5Ik3Lt3D/3794ednR1sbW0xYMAAxMfHZ7u/2bNnQyaT4bvvvlOv++OPP9CiRQtYWlrC2toaHTt2xN9//62xXf/+/WFlZYX79++jQ4cOsLa2Rt++fXP0mrT13XffoVatWihatCiKFSuGBg0aYMuWLRplnj17hoEDB6JMmTKQy+VwcXHBsGHDkJycrC7z4MED9OzZE8WLF0fRokXRpEkTHDhwQGM/J0+ehCRJ2Lp1K6ZOnYqyZcuiaNGiiI6OBpD2d9q+fXvY2tqiaNGi8PDwwNmzZzX2oe37Q1t79+5Fq1atIEmSxvorV67Ay8sLJUuWRJEiReDi4oLPPvtMo8zb7xtVH7vMlrdp8zrzMm3OjVKpxPLly1GnTh1YWFigVKlSaN++Pa5cuaIuk5qailmzZqFSpUqQy+VwdnbGl19+iaSkJI19OTs7o1OnTjh8+DAaNGiAIkWKYM2aNQCAyMhIjB07Fk5OTpDL5ahcuTLmz58PpVKZ49enr/d7Zo4fP67+LLCzs0PXrl0RGBioUSajvu/Af59Nb1P1a9+8eTNq1aoFuVyOQ4cO6f7C35Onp2em739t+tW2bdsWcXFxOHr0qOErS6QHbCktBIYOHYqdO3di5MiRqFmzJl6/fo0zZ84gMDAQ9erVAwAcPXoUnTp1QunSpTFmzBg4OjoiMDAQ+/fvx5gxY/RWl169esHFxQXz5s3DtWvXsG7dOtjb22P+/PmZbjN16lTMnTsXa9aswaBBgwAAv/zyC/r16wcvLy/Mnz8f8fHxWLVqFZo3b47r169rfPmkpqbCy8sLzZs3x6JFi1C0aNFs6xkVFYVXr15prJMkCSVKlMhyux9++AGjR49Gjx49MGbMGCQmJuKvv/7CxYsX8fHHHwMAnj9/jkaNGiEyMhKDBw9G9erV8ezZM+zcuRPx8fEwNzdHeHg4mjZtivj4eIwePRolSpTAhg0b0KVLF+zcuRPdunXTOO6sWbNgbm6Ozz//HElJSTA3N8fx48fh7e2N+vXrY9q0aZDJZPj555/RqlUrnD59Go0aNQKg3ftDW8+ePcOTJ0/SbffixQu0a9cOpUqVwuTJk2FnZ4dHjx5h9+7dme6rVKlS+OWXXzTWpaSkYNy4cTA3N1ev0/Z1Zic2NhZFixbVaGFOSkqCTCaDmZlZttsnJiame88AgI2NjUZ936XtuRk4cCDWr18Pb29v+Pv7IzU1FadPn8aFCxfUrfr+/v7YsGEDevTogQkTJuDixYuYN28eAgMDsWfPHo39BQcHo0+fPhgyZAgGDRqEatWqIT4+Hh4eHnj27BmGDBmC8uXL49y5c5gyZQpCQ0OxbNmybM/Duwzxfn/bsWPH4O3tjYoVK2L69OlISEjAd999h2bNmuHatWsZBlFtHD9+HNu3b8fIkSNRsmRJrfaTkpKC1NRUjRZnIQTi4+OzvTqTka+++gr+/v4a6zZt2oTDhw/D3t4+2+1r1qyJIkWK4OzZs1meQ6I8Q1CBZ2trK0aMGJHp86mpqcLFxUVUqFBBREREaDynVCqz3X+/fv1EhQoVNNYBENOmTVM/njZtmgAgPvvsM41y3bp1EyVKlEi3raq+EyZMEDKZTKxfv179fExMjLCzsxODBg3S2C4sLEzY2tpqrO/Xr58AICZPnpzt6xBCiJ9//lkAyHCRy+XZbt+1a1dRq1atLMv4+fkJmUwmLl++nO451fkeO3asACBOnz6tfi4mJka4uLgIZ2dnoVAohBBCnDhxQgAQFStWFPHx8Rr7qVKlivDy8tL4O4yPjxcuLi6ibdu26nXZvT90cezYMQFA7Nu3T2P9nj17BIAMX/Pb3n3fvGv48OHCxMREHD9+XAih2+vMTHh4uGjevLkAIJydncX169dFXFyc6NSpk5DJZOKXX37Jdh+ZvWcAiF9//TXLbbU5N8ePHxcAxOjRo9M9p3rdN27cEACEv7+/xvOff/65AKA+Z0IIUaFCBQFAHDp0SKPsrFmzhKWlpbh7967G+smTJwsTExPx5MmTLF9LRvT5fn/48KEAIH7++Wd1OTc3N2Fvby9ev36tXnfz5k0hk8mEn5+fel1Gn1NC/PfZ9DYAQiaTib///lvr1/n1118LuVwuzM3Nxfz584UQQqxatUpYW1uL1q1bZ7u96rWdOHEi0zJnz54VZmZm6T5HPTw8RL9+/TLcpmrVqsLb21vr10FkTLx8XwjY2dnh4sWLeP78eYbPX79+HQ8fPsTYsWNhZ2en8dy7l7Xe17sjkVu0aIHXr1+rLzerCCEwcuRILF++HJs2bUK/fv3Uzx09ehSRkZHo06cPXr16pV5MTEzQuHFjnDhxIt1xhw0bplM9V6xYgaNHj2osf/zxR7bb2dnZ4enTp7h8+XKGzyuVSuzduxedO3fOsM+q6nwfPHgQjRo1QvPmzdXPWVlZYfDgwXj06BHu3LmjsV2/fv00Wmdu3LiBf/75Bx9//DFev36tPkdxcXFo3bo1Tp06pb4cm937QxevX78GABQrVkxjvep9tX//fqSkpORo3xs3bsTKlSuxYMECtGzZEoBurzMzX331Fby9vREVFYXhw4ejc+fOGDZsGJRKJR49eqRu4c5O165d071njh49qq5rZrQ5N7t27YIkSZg2bVq6595+zwDA+PHjNZ6fMGECAKS7FO7i4gIvLy+NdTt27ECLFi1QrFgxjX9bbdq0gUKhwKlTp7J8Le8y1PtdJTQ0FDdu3ED//v1RvHhx9XpXV1e0bdtWfU5ywsPDAzVr1tSq7NmzZ3H48GE8fPgQZ8+exbJlyzBlyhRMmzYNe/fufa96qISFhaFHjx5wc3PDypUrtd5O9XdJlB/w8n0hsGDBAvTr1w9OTk6oX78+OnToAD8/P1SsWBEAcP/+fQBA7dq1M91HQkICoqKiNNY5OjrqXJfy5ctrPFaFl4iICNjY2KjXb9y4EbGxsVi1ahX69Omjsc0///wDAGjVqlWGx3h7PwBgamqKcuXK6VTPRo0aZTnQ6eXLl1AoFOrHVlZWsLKywqRJk3Ds2DE0atQIlStXRrt27fDxxx+jWbNm6u2io6OzPNcA8PjxYzRu3Djd+ho1aqiff3sf784UoDpHb4f5d0VFRaFYsWLZvj9yQgih8djDwwMfffQRZsyYgaVLl8LT0xM+Pj74+OOPIZfLs93fjRs3MHToUPTp00cjdOnyOjNz8eJFrFmzBjKZDBMnTsT169cREBCA4OBgnS65litXDm3atMn0+djYWMTGxqofm5iYoFSpUlqdm/v376NMmTIawetdjx8/hkwmSzdDhKOjI+zs7NLNGZvR7BL//PMP/vrrL5QqVSrDY7x48SLT42fEUO/3t7cDgGrVqmW47eHDh7Ua2JiRjM5PZo4ePYrPP/8cpUuXRunSpbF+/Xp4eXlhz549mX5O6SI1NRW9evWCQqHA7t27tfo3oyKE0HvjApGhMJQWAr169UKLFi2wZ88eHDlyBAsXLsT8+fOxe/dueHt7a7WPbdu2YcCAARrr3g0e2jAxMclw/bv7atasGW7cuIHvv/8evXr10vgyVrV8/fLLLxkG43enFJLL5Xofid6wYUONL/lp06Zh+vTpqFGjBoKDg7F//34cOnQIu3btwsqVK/H1119jxowZeq3D294dNa06RwsXLoSbm1uG26hG4+rj/aGi6nMbERGhsV6SJOzcuRMXLlzAvn37cPjwYXz22WdYvHgxLly4kOXI4IiICHz00UeoWrUq1q1bl+PXmZW3v7TLlSuHuLi4HPcDzMyiRYs03gMVKlRQD+bK6bnJ7rVkJaOR9kqlEm3btsUXX3yR4Tb5eXqrzM7L2z8u36bLTATvBr+yZctCkiS8fPlSt0pmYuLEiTh//jyOHTum8w/siIgIVKlSRS/1IDI0htJConTp0hg+fDiGDx+OFy9eoF69epgzZw68vb1RqVIlAMDt27czbenx8vLK1RGclStXxoIFC+Dp6Yn27dsjICAA1tbWAKCur729fZYtU4a0efNmJCQkqB+/3apoaWkJX19f+Pr6Ijk5Gd27d8ecOXMwZcoUlCpVCjY2Nrh9+3aW+69QoQKCg4PTrQ8KClI/nxXVObKxsdHqHGX1/tBF9erVAQAPHz7M8PkmTZqgSZMmmDNnDrZs2YK+ffti69at6QZzqCiVSvTt2xeRkZE4duxYukFqur7OjDRs2BDffPMNhg0bhv3792Pr1q3o3LkzevToga1bt6J06dI52u+7/Pz8NC5Pvxt6sjo3lSpVwuHDh/HmzZtMW0srVKgApVKJf/75R93CCADh4eGIjIzU6kYGlSpVQmxsrN7+XRn6/a5an9m2JUuWVP+wKFasGCIjI9OV0/WuYxlp06YNJk2ahMaNGyM6Ohq9e/fG+PHjMXHiRDg7O6NNmzY5bq3cunUrli1bhmXLlsHDw0OnbVNTUxESEoIuXbrk6NhEuY19Sgs4hUKR7rK7vb09ypQpo54mpl69enBxccGyZcvSfWirWjBLly6NNm3aaCyG5urqioMHDyIwMBCdO3dWh0AvLy/Y2Nhg7ty5GfbB01frRFaaNWumcS5UoVTVp1LF3NwcNWvWhBACKSkpkMlk8PHxwb59+zSm8lFRne8OHTrg0qVLOH/+vPq5uLg4rF27Fs7Oztn2datfvz4qVaqERYsWaVwyVlGdI23eH7ooW7YsnJyc0r22iIiIdK3hqpbNrI4zY8YMHD58GL/++muGl1O1fZ1Z+eabb3D06FEUL14ckydPxubNm7FmzRoUL14cZcqUwaZNm7LdhzYqVqyo8Z5RdenQ5tx89NFHEEJk2Nr+9nsGQLoR8kuWLAEAdOzYMds69urVC+fPn8fhw4fTPRcZGYnU1NRs9/E2Q7/fS5cuDTc3N2zYsEHjs+v27ds4cuSI+pwAaYE7KioKf/31l3pdaGhoulkJcqJFixbo2LEjqlatCjc3N3Tq1AmLFi3C4sWL0a1bN7Rt2zZH+719+zb8/f3xySef5GgWlDt37iAxMRFNmzbN0fGJchtbSgu4mJgYlCtXDj169EDdunVhZWWFY8eO4fLly1i8eDGAtC+OVatWoXPnznBzc8OAAQNQunRpBAUF4e+//87wCyq3NGnSBL/99hs6dOiAHj16YO/evbCxscGqVavw6aefol69eujduzdKlSqFJ0+e4MCBA2jWrBm+//779zruH3/8oW6leVvTpk2z7GvZrl07ODo6olmzZnBwcEBgYCC+//57dOzYUd3SO3fuXBw5cgQeHh4YPHgwatSogdDQUOzYsQNnzpyBnZ0dJk+ejF9//RXe3t4YPXo0ihcvjg0bNuDhw4fYtWtXtt0RZDIZ1q1bB29vb9SqVQsDBgxA2bJl8ezZM5w4cQI2NjbYt2+fVu8PXXXt2hV79uzRuKS5YcMGrFy5Et26dUOlSpUQExODH374ATY2NhrB4W23bt3CrFmz8OGHH+LFixfpwuEnn3yi9evMSqlSpXD8+HEkJiZqTI6+Z88eREVFad1/7+7duxkGWAcHhyxDiTbnpmXLlvj000/x7bff4p9//kH79u2hVCpx+vRptGzZEiNHjkTdunXRr18/rF27FpGRkfDw8MClS5ewYcMG+Pj4ZDvgCki7TPz777+jU6dO6N+/P+rXr4+4uDjcunULO3fuxKNHj1CyZEkAafN+qt6TWU2XZOj3+8KFC+Ht7Q13d3cMHDhQPSWUra2txlzJvXv3xqRJk9CtWzeMHj1aPY1c1apVczwn79u++uorTJ48GUqlUj2F2MCBA/Hpp58iLi4uR/tUdZn68MMP0723svssAtL6uhYtWjTHoZgo1xlhxD/loqSkJDFx4kRRt25dYW1tLSwtLUXdunXFypUr05U9c+aMaNu2rbqcq6ur+O6777I9hi5TQr18+VKjnGoKpocPH2ps++4URb/99pswNTUVvr6+GtMheXl5CVtbW2FhYSEqVaok+vfvL65cuaJRN0tLy2xfw7v1yWx5eyqajKxZs0Z8+OGHokSJEkIul4tKlSqJiRMniqioKI1yjx8/Fn5+fqJUqVJCLpeLihUrihEjRoikpCR1mfv374sePXoIOzs7YWFhIRo1aiT279+vsR/VlFA7duzIsD7Xr18X3bt3V9enQoUKolevXiIgIEAIodv7Q1vXrl1LN73PtWvXRJ8+fUT58uWFXC4X9vb2olOnThp/V0Jovm9Ury2zRZfXaWhZ1dPDwyPLbbU9N6mpqWLhwoWievXqwtzcXJQqVUp4e3uLq1evqsukpKSIGTNmCBcXF2FmZiacnJzElClTRGJiosa+KlSoIDp27JhhfWJiYsSUKVNE5cqVhbm5uShZsqRo2rSpWLRokUhOTlaX++ijj0SRIkXSTSOXEX293zOaEkqItKnImjVrJooUKSJsbGxE586dxZ07d9LV48iRI6J27drC3NxcVKtWTWzatCnTKaH0NU2atjKaEko1dVd2n0WZTQnVuHFj8cknnxi+8kR6IgmRg9EqRERZaN26NcqUKZNu8nsqOBwcHODn54eFCxcauyoFwqNHj+Di4oITJ07A09NTp209PT3h7OyscZenGzduoF69erh27VqmgwCJ8hr2KSUivZs7dy62bduml0EklPf8/fffSEhIwKRJk4xdFcrEN998o57XlCi/YJ9SItK7xo0ba9zXnAqWWrVqpbvhBeUtW7duNXYViHTGllIiIiIiMjqGUiIiIiNzdnaGEELn/qQAcPLkSY3+pETffPMNJEnC2LFjsyy3Y8cOVK9eHRYWFqhTp45ebon7PhhKiYiIiAqIy5cvY82aNXB1dc2y3Llz59CnTx8MHDgQ169fh4+PD3x8fLK92YUhcfQ9ERERUQEQGxuLevXqYeXKlZg9ezbc3NzS3VBDxdfXF3Fxcdi/f796XZMmTeDm5obVq1fnUo01FfiBTkqlEs+fP4e1tXWOb/NGRERE+ZMQAjExMShTpky2Nx4xlMTERJ0Hf4q3bkCiIpfLs7yhx4gRI9CxY0e0adMGs2fPznL/58+fx/jx4zXWeXl5Ye/evTrVU58KfCh9/vw5nJycjF0NIiIiMqKQkBCUK1cu14+bmJiIUlbWiFXodpteKyurdLdPnjZtmsadyt62detWXLt2DZcvX9Zq/2FhYXBwcNBY5+DggLCwMJ3qqU8FPpSqbu0YEhICGxsbI9eGiIiIclN0dDScnJzUeSC3JScnI1aRinEutSCXmWi1TZJSgaUP/06XXTJrJQ0JCcGYMWNw9OhRjdsl5zcFPpSqmr5tbGwYSomIiAopY3fhszAxhYWJdqFUUuiWXa5evYoXL16gXr166nUKhQKnTp3C999/j6SkJJi8c2xHR0eEh4drrAsPD4ejo6NWdTQEjr4nIiIiMjCZjosuWrdujVu3buHGjRvqpUGDBujbty9u3LiRLpACgLu7OwICAjTWHT16FO7u7rq+NL0p8C2lRERERMYmSWmLtmV1YW1tjdq1a2uss7S0RIkSJdTr/fz8ULZsWcybNw8AMGbMGHh4eGDx4sXo2LEjtm7diitXrmDt2rW6HVyP2FJKREREZGCGbCnVxpMnTxAaGqp+3LRpU2zZsgVr165F3bp1sXPnTuzduzdduM1NbCklIiIiMjCZJEGmZROotuWycvLkySwfA0DPnj3Rs2fP9z6WvjCUEhERERmY9O+ibdnCiKGUiIiIyMBkUtqibdnCiKGUiIiIyMB06StaWAf8MJQSERERGZj073/ali2MGEqJiIiIDIx9SrPHUEpERERkYOxTmj2GUiIiIiIDk6B9X9FCmkkZSomIiIgMTZIkSFrOP6ptuYKGoZSIiIjIwDj6PnsMpUREREQGxj6l2WMoJSIiIjIwtpRmj6GUiIiIyMA4JVT2GEqJiIiIDIyX77PHUEpERERkYLx8nz2GUiIiIiIDk6S0RduyhRFDKREREZGBySBBpmVvUW3LFTQMpUREREQGJunQp5QtpURERERkEBx9nz2GUiIiIiID40Cn7DGUEhERERlYWijVtk9p4cRQSkRERGRgnKc0e4U1jBMREREVCKtWrYKrqytsbGxgY2MDd3d3/PHHH5mWX79+PSRJ0lgsLCxyscYZY0spERERkYEZsqW0XLly+Oabb1ClShUIIbBhwwZ07doV169fR61atTLcxsbGBsHBwerHUh4Y8s9QSkRERGRghpyntHPnzhqP58yZg1WrVuHChQuZhlJJkuDo6KjTcQyNl++JiIiIDEx1RydtFwCIjo7WWJKSkrI9jkKhwNatWxEXFwd3d/dMy8XGxqJChQpwcnJC165d8ffff+vrpeYYQykRERGRgcl0XADAyckJtra26mXevHmZ7v/WrVuwsrKCXC7H0KFDsWfPHtSsWTPDstWqVcNPP/2E3377DZs2bYJSqUTTpk3x9OlTfb3cHOHleyIiIiIDy8nk+SEhIbCxsVGvl8vlmW5TrVo13LhxA1FRUdi5cyf69euHP//8M8Ng6u7urtGK2rRpU9SoUQNr1qzBrFmztKyl/jGUEhERERmYTJIg03IwkaqcajS9NszNzVG5cmUAQP369XH58mUsX74ca9asyXZbMzMzfPDBB7h3755WxzIUXr4nIiIiMjBJx+V9KZVKrfqgAmn9UG/duoXSpUvr4cg5x5ZSIiIionxsypQp8Pb2Rvny5RETE4MtW7bg5MmTOHz4MADAz88PZcuWVfdJnTlzJpo0aYLKlSsjMjISCxcuxOPHj+Hv72/Ml8FQSkRERJQbDDUT6IsXL+Dn54fQ0FDY2trC1dUVhw8fRtu2bQEAT548gUz238XxiIgIDBo0CGFhYShWrBjq16+Pc+fOZTowKrdIQghh1BoYWHR0NGxtbREVFaV1vwwiIiIqGIydA1TH31WvESxNtGsLjFOk4qNrlwpddmFLKREREZGB5WT0fWHDUEpERERkYG/PP6pN2cKosL5uIiIiIspD2FJKREREZGDSv/9pW7YwYiglIiIiMjD2Kc0eQykRERGRgTGUZo+hlIiIiMjAZFLaom3ZwoihlIiIiMjA2FKaPY6+JyIiIiKjY0spERERkYFJACQtm0ALa0spQykRERGRgfHyffYYSomIiIgMTAYJMi3jprblChqGUiIiIiIDY0tp9hhKiYiIiAxMknToU1pIUylH3xMRERGR0bGllIiIiMjApH//07ZsYcRQSkRERGRgkg53dCqsl+8ZSomIiIgMjAOdssdQSkRERGRgDKXZYyglIiIiMjjG0uxw9D0RERGRgammhNJ20cWqVavg6uoKGxsb2NjYwN3dHX/88UeW2+zYsQPVq1eHhYUF6tSpg4MHD77Hq9MPhlIiIiIiA5PpuOiiXLly+Oabb3D16lVcuXIFrVq1QteuXfH3339nWP7cuXPo06cPBg4ciOvXr8PHxwc+Pj64fft2Dl+dfkhCCGHUGhhYdHQ0bG1tERUVBRsbG2NXh4iIiHKRsXOA6vhnGjWFlal2vSZjU1PR/NK596pz8eLFsXDhQgwcODDdc76+voiLi8P+/fvV65o0aQI3NzesXr06R8fTB7aUEhERERmYJEk6LTmlUCiwdetWxMXFwd3dPcMy58+fR5s2bTTWeXl54fz58zk+rj5woBMRERGRgeVkmFN0dLTGerlcDrlcnuE2t27dgru7OxITE2FlZYU9e/agZs2aGZYNCwuDg4ODxjoHBweEhYVpWUPDYEspERERUR7k5OQEW1tb9TJv3rxMy1arVg03btzAxYsXMWzYMPTr1w937tzJxdq+P7aUEhERERlYTm4zGhISotGnNLNWUgAwNzdH5cqVAQD169fH5cuXsXz5cqxZsyZdWUdHR4SHh2usCw8Ph6Ojo1b1MxS2lBIREREZmEzSbQGgnuJJtWQVSt+lVCqRlJSU4XPu7u4ICAjQWHf06NFM+6DmFraUEhERERmYJJMgybRsKdWynMqUKVPg7e2N8uXLIyYmBlu2bMHJkydx+PBhAICfnx/Kli2rvvw/ZswYeHh4YPHixejYsSO2bt2KK1euYO3atbq9KD1jKCUiIiIyMF0mxdd18P2LFy/g5+eH0NBQ2NrawtXVFYcPH0bbtm0BAE+ePIFM9t/F8aZNm2LLli2YOnUqvvzyS1SpUgV79+5F7dq1dTuwnnGeUiIiIiqwjJ0DVMe/3LyFTvOUNjxzutBlF7aUEhERERmYIVtKCwqGUiIiIiIDy8k8pYUNQykRERGRgelyp6b3uaNTfsZQSkRERGRoOly+L6xNpQylRERERAYmkyTItEyl2pYraBhKiYiIiAyNnUqzxVBKREREZGDsU5o9hlIiIiIiA+OUUNljKCUiIiIyMLaUZo+hlIiIiMjAJOjQUmrQmuRdDKVEREREBsaW0uwxlBIREREZGPuUZo+hlIiIiMjAGEqzx1BKREREZGCcpjR7DKVEREREBsY+pdljKCUiIiIyMF6+zx5DKREREZGBMZRmj6GUiIiIyNBkUtqibdlCiKGUiIiIyMA40Cl7DKVEREREBpbWUKpd3CykDaUMpURERESGxj6l2ZMZuwJEREREBd6/U0Jps+iaSufNm4eGDRvC2toa9vb28PHxQXBwcJbbrF+/Pt1xLSws3ucVvjeGUiIiIiIDU2VNbRdd/PnnnxgxYgQuXLiAo0ePIiUlBe3atUNcXFyW29nY2CA0NFS9PH78+D1e4fvj5XsiIiIiQzPg9ftDhw5pPF6/fj3s7e1x9epVfPjhh1kcRoKjo6PWx3ny5IlO9VKxs7ODjY1NtuUYSomIiIgMLDf7lEZFRQEAihcvnmW52NhYVKhQAUqlEvXq1cPcuXNRq1atTMs7OztDkiQIIbSuiyRJmDZtGr7++utsyzKUEhERERmY9O9/2pYFgOjoaI31crkccrk8y22VSiXGjh2LZs2aoXbt2pmWq1atGn766Se4uroiKioKixYtQtOmTfH333+jXLlyme7bkNinlIiIiMjAJJluCwA4OTnB1tZWvcybNy/b44wYMQK3b9/G1q1bsyzn7u4OPz8/uLm5wcPDA7t370apUqWwZs0afbzcHGEoJSIiIjKwnAx0CgkJQVRUlHqZMmVKlscYOXIk9u/fjxMnTmTa2pkZMzMzfPDBB7h3755W5T/77DNs2LAh3fro6Gh89tlnOh1bhaGUiIiIyNAk6b9bjWa3/JtKbWxsNJbMLt0LITBy5Ejs2bMHx48fh4uLi87VUygUuHXrFkqXLq1V+fXr12P48OEYPXq0xmX9hISEDMOqNhhKiYiIiAzNgHNCjRgxAps2bcKWLVtgbW2NsLAwhIWFISEhQV3Gz89Po6V15syZOHLkCB48eIBr167hk08+wePHj+Hv76/1cQ8cOICDBw/Cy8sLEREROtU5IwylRERERAYm6fifLlatWoWoqCh4enqidOnS6mXbtm3qMk+ePEFoaKj6cUREBAYNGoQaNWqgQ4cOiI6Oxrlz51CzZk2tj1uzZk1cvHgRKSkpaNSoEQIDA3Wq97s4+p6IiIjI0FSX5rUtqwNtpmg6efKkxuOlS5di6dKlOh3nbdK/rbklSpTAsWPHMHToULi7u2PhwoU53idDKREREZGh5eZEpbng7SBsamqKdevWoWbNmhg+fHiO98lQSkRERGRoMmjfaTIfdK48ceJEusn5x48fD1dXV5w9ezZH+2QoJSIiIjI46d9F27J5m4eHR4br27RpgzZt2uRonwylRJSOUCqBpDggMQ5ITQZU031IEmBiBlhYAhaWkEz4EUJEpA1JJkHSsq+otuUKGn6jEBEAQCTFA5HhEPHRQFK8dtuYyYEi1pBs7SFZ2hq4hkRE+VgB61NqCAylRIWYEAKIeQMRGQbER2e/wbtSkoCUJIjoVxDmRSDZOQC2pdiCSkT0LgOOvi8o+M1BVEiJxFiI5/eA5ITsC2sjOQHixSPg9VPAwQWSTUn97JeIqACQJEk9jZI2ZQujfDC+i4j0SQglxMsnEI9u6S+Qvk2RCvH8HyifBUOkpuh//0RE+ZEB7+hkLKdPn8Ynn3wCd3d3PHv2DADwyy+/4MyZMznaH0MpUSEiUpIgHt2CeP3M8AeLeQPx8AZEXJThj0VElNdJOi553K5du+Dl5YUiRYrg+vXrSEpKAgBERUVh7ty5OdonQylRISGSEiAe39Z6EJNeKFIhngZCxLzJvWPmY0KRChEfAxEZDvEqJK1F++UTiNfPIWIjIJITtbpzCxHlPZJMtyWvmz17NlavXo0ffvgBZmZm6vXNmjXDtWvXcrRP9iklKgREciJEyN+AMS6nCwHx7C5Qrhokq2K5f/w8TgglEBcFxLwGErX4wWBiAmFVHLAuAcnM3PAVJCL9KGCj74ODg/Hhhx+mW29ra4vIyMgc7TMfZHEieh9CkQoREmicQPpfLSCe3YVIjDViHfIeEfMGCAkCXoZoF0gBQKEAol4Cz4LSWlEVqYatJBHphQRJPdgp2yUfXL93dHTEvXv30q0/c+YMKlasmKN96hxKK1asiNevX6dbHxkZmeNKEJHhiBePgJREY1cDEEqI5/fSJuYv5ERqCkT4Q+DVUyCnoVIAiI0Ent1lv12i/KCADXQaNGgQxowZg4sXL0KSJDx//hybN2/G559/jmHDhuVonzpfvn/06BEUCkW69UlJSeqRV0SUN4jYiLRWtbwiOSFtyqhS5Y1dE6MRyYlA2IOch9F3KVKBF48h7BwgFXPQzz6JSP8K2DylkydPhlKpROvWrREfH48PP/wQcrkcn3/+OUaNGpWjfWodSn///Xf1nw8fPgxb2//u3qJQKBAQEABnZ+ccVYKI9E8oUiHCHhi7GumI188Aq+KQilgZuyq5Li2Q3k+7BK9vkeEQEJCKOep/30T03tIaQLWdp9TAlXlPCoUCZ8+exYgRIzBx4kTcu3cPsbGxqFmzJqyscv7ZrnUo9fHxAZB2Qvv166fxnJmZGZydnbF48eIcV4SI9OzN87T71udB4uVjSOVrGbsauUooFUD4Q8MEUpXIFxBmcg4oI8qLCtBAJxMTE7Rr1w6BgYGws7NDzZo19bJfrUOp8t9+YC4uLrh8+TJKluTdWojyKiGUEJEvjF2NzMVHQyTFQ5IXNXZNcs+b57kz2Oz1cwgLK0imZtmXJaJcI8kkSFpelte2nDHVrl0bDx48gIuLi972qfNAp4cPHzKQEuV1MW8ARR6/m1JkuLFrkGtEQgwQE5E7B1Mq0vrtElHeoupTqu2Sx82ePRuff/459u/fj9DQUERHR2ssOaFVS+m3336LwYMHw8LCAt9++22WZUePHp2jihCR/oiIvB/4RNRLoFR5SDITY1fF8HL77yM+pvC1RBPldQXo8j0AdOjQAQDQpUsXjb6yQghIkpThoPjsaBVKly5dir59+8LCwgJLly7NtJwkSQylREYmFKlAQs5+peYqpQKIjwKsihu7JgYlkhJy9y5aKjGvAYZSoryjgIXSEydO6H2fWoXShw8fZvhnolwjBKBITuuTJ0TaP1gTM8DUPF/8481V+WmC+sS4Ah9KEZN+XudcERsJUbxM4WiJJsoPClgo9fDw0Ps+jXpHJzc3N8TExAAAnJ2dcePGDa22a9CgAU6ePGm4ilHeolSkBa3kxLQ/C2Xa/1MSgcSYtD/TfxLjjF0DrYl8VNccM9aPBCG0v0sUERmcLve9l3RMZ/PmzUPDhg1hbW0Ne3t7+Pj4IDg4ONvtduzYgerVq8PCwgJ16tTBwYMHdTpuZGQkFi9eDH9/f/j7+2Pp0qWIisr5zTx0njwfAJ4+fYrff/8dT548QXKy5pQzS5Ys0Xo/2oZQKsSESLv0KTK5C5DqeQurfPHLMjfkq1t5JuSjuuaAUCiAFCNOy5UcDxS1Nt7xieg/Bmwp/fPPPzFixAg0bNgQqamp+PLLL9GuXTvcuXMHlpaWGW5z7tw59OnTB/PmzUOnTp2wZcsW+Pj44Nq1a6hdu3a2x7xy5Qq8vLxQpEgRNGrUCEBaBpwzZw6OHDmCevXq6fQagByE0oCAAHTp0gUVK1ZEUFAQateujUePHkEIoXMFJElCREQE7OzsAACbN2/GwIEDERUVhSFDhmDixIkA0k7c8OHDkZqaqj7heYIQaf9nGDIcRUrmgVRFKNPKmZrnTp3yuuQ8cEtRbSlSIBQKSCYF9BJzcoJxj59k5OPnQ0IIQKmAZJKjNhvKIZGaDEiygn3eDRhKDx06pPF4/fr1sLe3x9WrV/Hhhx9muM3y5cvRvn17ddaaNWsWjh49iu+//x6rV6/O9pjjxo1Dly5d8MMPP8DUNO3vLTU1Ff7+/hg7dixOnTql02sAcnD5fsqUKfj8889x69YtWFhYYNeuXQgJCYGHhwd69uypcwXeFh4ejitXruDChQv47rvvcO7cOSQnJ8PX1xeLFi3C7du30adPH9y8eTPTfSQlJellWoJsKVLTLsslxfHysSFpO61Rbsz/mF/kt3vLiwL878fY03Ip88gP+HxCKBXA87vAkzsQhWjKMmMTr55CeXo3lOd+g4jL+aXfvE6SJJ0WAOnyTFJSklbHUl1CL1488z7758+fR5s2bTTWeXl54fz581od48qVK5g0aZI6kAKAqakpvvjiC1y5ckWrfbxL51AaGBgIPz8/9cETEhJgZWWFmTNnYv78+TmqhMrAgQMhSRJKliyJ7t2749ixYwgKCoKpqan6xLVr1w4VK1bMdB/z5s2Dra2tenFycnqvOmVKdd/qf39Vk4GoWqOzL2jQauQr2bUs5zVa/x2TznhudZOSBCT/+6UfF2nUqhQm4uWztM+tlCSIN2HGro7hSDrMUfpvKHVyctLINPPmzcv2MEqlEmPHjkWzZs2yvAwfFhYGBwcHjXUODg4IC9Pu78DGxgZPnjxJtz4kJATW1jnrNqRzO7mlpaW6H2np0qVx//591KqVdrvAV69e5agSmcnsHrFZ3Tt2ypQpGD9+vPpxdHS0YYKpqdm/rRASICvAlxuMTZIB0CL0swvFf3TtIW9s+a2+OjHy+5L/LnRjbpHWBzcxDrApZezaFBpSmUoQr58DZnJIpQzUkJQX5ODyfUhICGxsbNSr5XJ5tpuOGDECt2/fxpkzZ3JUTW35+vpi4MCBWLRoEZo2bQoAOHv2LCZOnIg+ffrkaJ86p6kmTZrgzJkzqFGjBjp06IAJEybg1q1b2L17N5o0aZKjSqisX78eHh4eePPmDfbs2YNff/0V1atXR2pqKk6cOIGWLVvi2LFjuH//fqb7kMvlWv2lvTeZSdrgGjIsUzPtLoGa8JaKavltCiBZAQ6lxr7VJ/9d6ESSZICD/m6ZSNqRbEvCpHk3Y1fD8HIQSm1sbDRCaXZGjhyJ/fv349SpUyhXrlyWZR0dHREertlNJTw8HI6Ojloda9GiRZAkCX5+fuqxPmZmZhg2bBi++eYbrev8Np1D6ZIlSxAbmzZidsaMGYiNjcW2bdtQpUoVnUbeZ6RUqVKoX78+oqKiMHLkSHXy3rZtG4YPHw6FQoGGDRuibt2673UcykdkpmkhK6suEjITfvm+TV4kra9zfmAmL9jzaJoXSWssNdZVdHkRIx2YiNKRoP3FEx0vcgghMGrUKOzZswcnT57U6n707u7uCAgIwNixY9Xrjh49Cnd3d62OaW5ujuXLl2PevHnqxsJKlSqhaNGc37RDp1CqUCjw9OlTuLq6Aki7lK/NCK3MiLf6Oz169CjTck2bNuX0UYWVJKXdlSYpPuNgKjNJe56XKdUkCyuIaP12pTEYi4ynKikoJJkMwkz+Xz/F3GbOOzoR5Rm6TECqY7emESNGYMuWLfjtt99gbW2t7hdqa2uLIkXSfpz6+fmhbNmy6n6pY8aMgYeHBxYvXoyOHTti69atuHLlCtauXavVMaOioqBQKFC8eHHUqVNHvf7NmzcwNTXVqYVXRadXbWJignbt2iEiIkLnAxHlmCQD5JZpi4kZYGKa9n950bR1BbpPYg7ko6AnFYYuMEWMNE+o6gcbEeUNqsv32i46WLVqFaKiouDp6YnSpUurl23btqnLPHnyBKGhoerHTZs2xZYtW7B27VrUrVsXO3fuxN69e7WaoxQAevfuja1bt6Zbv337dvTu3Vun+qvofPm+du3aePDggVZNw0R6I0n/hlEOKstWPgql+aquOWVdAogyQsu1VTFIBbm/LlF+Y8B5SoUWM21kdCfMnj175ng6z4sXL2bYbdPT0xNfffVVjvap8yfW7Nmz8fnnn2P//v0IDQ3NnTlBiUhrkswEsCpm7Gpkz8QMKKr75Z38RjKTA0WM0CJsUyL3j0lEmTNgS6kxJCUlZXgzo5SUFCQk5OzGHVqH0pkzZyIuLg4dOnTAzZs30aVLF5QrVw7FihVDsWLFYGdnh2LF8sEXIVEhINk5ZF/IyCQ7+7TRzoVBMcfcnR3KulhaGCaivEOCDqHU2JXNXqNGjTLsf7p69WrUr18/R/vU+lrojBkzMHToUJw4cSJHByKiXGRpB5hZACl5+Jaj+SA464skLwphaw9EvjD8wUzNgOJlDH8cItKNAS/fG8Ps2bPRpk0b3Lx5E61btwaQdiv6y5cv48iRIznap9ahVNVfwcPDI0cHIqLcI0kSUMwB4sVjY1clY1aFsCXPzh6IjwGSDXg/eglAyXIFe5otovyqgIXSZs2a4fz581i4cCG2b9+OIkWKwNXVFT/++COqVKmSo33qNGokqzspEVEeY+eY1jJnyBCUE5IMkr2zsWuR6yRJBuHoDITeB1KSDXOQEuUgGWu0PxFlrYCFUgBwc3PD5s2b9bY/nUJp1apVsw2mb968ea8KEZF+SDIZULoyxONbxq6KBqlUeUjmFsauhlFIJmYQjpWA8Af6nbtUAlDSCVJ+GOBGVFjJZNrfwS4fzJxx7do1mJmZqeco/e233/Dzzz+jZs2amD59OszNzXXep06hdMaMGbC1tdX5IERkHFIRK6B4GYg3z41dlTRFbNIG/RRikqkZROkqQEQYoI+bHJhbpF2y55ykRHmbAe/oZAxDhgzB5MmTUadOHTx48AC+vr7o3r07duzYgfj4eCxbtkznfeoUSnv37g17e3udD0JERlTSCYiPBhJjjVsPE1NIpSuxGxD+bcUuUQbC0gZ4Ewok5aCLhUwG2JQECtMsBkT5WsFKpXfv3oWbmxsAYMeOHfDw8MCWLVtw9uxZ9O7d27ChlF8kRPmTJJMBTjUgHt82Xv9SmQkkpxqF9rJ9ZiQLK6BMFYik+LRW0/hoQKnMYgMA5kUA6+KAJSfHJ8pXClifUiEElP9+Xh07dgydOnUCADg5OeHVq5xdBdJ59D0R5T+SiSlQvhZEyB0gKT53D25iCqlcjcJxS9EckuRFgVLl0z5nU5PTWk5TkwGhTPtykpmkhVHzIgyiRPmUJMm0/vebH65+NGjQQD0t1J9//olVq1YBAB4+fAgHh5xN+ad1KFVm9eudiPI8ydQsLZg+/weIi8ydg5oXgVS2Kvs7akmSJMBMnrYQUcFSwFpKly1bhr59+2Lv3r346quvULlyZQDAzp070bRp0xztkzcSJypEJBNTSE41ICJfQLx4BCgVhjtW8TJpI8LZskdEVNC6lMLV1RW3bqWf3WXhwoUwMcnZXMn8tiAqhCQ7e0guddPu/KRv5kUgVagNyb4CAykRkZqk45J/DB8+XN2P1MLCAmZmZjnaD78xiAopyUwOmVMNSM6ugK098L59mKyKQSpXHZJLXU7gTkT0LtU8pdou+cimTZsQHR393vvh5XuiQk6ysIRUuhKEfQUg+iVE3L/TR6Vmc9chE1PAwjItgNraF77bhhIR6aKA9Sl9m74GwzOUEhGAf0foFysNqVhpAIBQpAAJcf+NAodIa001MUsLowyhRETaK8ChVF8YSokoQ5KJGWBlZ+xqEBEVDAU4lMbExOhlP/mr0wIRERFRfqQKpdouedT27duRnPxf966nT59qTBsaHx+PBQsW5GjfDKVEREREhiZBh1Bq7Mpmrk+fPoiMjFQ/rlmzJh49eqR+HBMTgylTpuRo37x8T0RERGRoBeTy/buDmvR5x0+GUiIiIiJD02Wqp3w2JZS+FM5XTURERJSbDNin9NSpU+jcuTPKlCkDSZKwd+/eLMufPHkSkiSlW8LCwt7jBb4/tpQSERERGZoBbzMaFxeHunXr4rPPPkP37t213i44OBg2Njbqx/b29lptd/jwYdja2gIAlEolAgICcPv2bQDQ6G+qK4ZSIiIiIoMzXCr19vaGt7e3zjWyt7eHnZ2dztv169dP4/GQIUM0Hks57BPLy/dEREREhpaD24xGR0drLElJSXqtkpubG0qXLo22bdvi7NmzWm2jVCqzXRQKRY7qw1BKREREZGg56FPq5OQEW1tb9TJv3jy9VKV06dJYvXo1du3ahV27dsHJyQmenp64du2aXvafU7x8T0RERGRwugxgSisXEhKi0edTLtfP7Z2rVauGatWqqR83bdoU9+/fx9KlS/HLL7/o5Rg5wVBKREREZGg5mKfUxsZGI5QaUqNGjXDmzJksy7i4uOSov+jYsWMxevTobMsxlBIREREZWh6fPP/GjRsoXbp0lmXWr1+fo307OztrVY6hlIiIiMjQJFnaom1ZHcTGxuLevXvqxw8fPsSNGzdQvHhxlC9fHlOmTMGzZ8+wceNGAMCyZcvg4uKCWrVqITExEevWrcPx48dx5MiRLI/j4eGhU710xVBKREREZGgGbCm9cuUKWrZsqX48fvx4AGlTN61fvx6hoaF48uSJ+vnk5GRMmDABz549Q9GiReHq6opjx45p7MMYJKHPm5bmQdHR0bC1tUVUVFSu9csgIiKivMHYOUB1/Dfr58GmqIV228Qnonj/KYUuu7CllIiIiMjQ8nif0ryAoZSIiIjI4CRoPz08QykRERERGQJbSrPFUEpERERkaDIpbdG2bCHEUEpERERkaGwpzRZDKREREZGhGXCe0oKCoZSIiIjI0NhSmi2GUiIiIiJDY0tpthhKiYiIiAxNgvYzPRXOhlKGUiIiIiKDY0tpthhKiYiIiAyNU0Jli6GUiIiIyOB4/T47DKVEREREBqfD5Xutb0dasDCUEhERERmaDNpnzcKZSRlKiYiIiAyOA52yxVBKREREZGicPD9bDKVEREREBidB++vyDKVEREREZAjsU5othlIiIiIiQ2Of0mwxlBIREREZGvuUZqtwRnEiIiKi3CST6bbo4NSpU+jcuTPKlCkDSZKwd+/ebLc5efIk6tWrB7lcjsqVK2P9+vU5e116xFBKREREZHCSjov24uLiULduXaxYsUKr8g8fPkTHjh3RsmVL3LhxA2PHjoW/vz8OHz6s03H1jZfviYiIiAzNgJfvvb294e3trXX51atXw8XFBYsXLwYA1KhRA2fOnMHSpUvh5eWl07H1iS2lRERERIamCqXaLgZ0/vx5tGnTRmOdl5cXzp8/b9DjZoctpURERESGpktf0X/LRUdHa6yWy+WQy+XvXZWwsDA4ODhorHNwcEB0dDQSEhJQpEiR9z5GTrCllIiIiMjQctBS6uTkBFtbW/Uyb948I78Iw2JLKREREZGh5aBPaUhICGxsbNSr9dFKCgCOjo4IDw/XWBceHg4bGxujtZICDKVEREREuUCXUfVp5WxsbDRCqb64u7vj4MGDGuuOHj0Kd3d3vR9LF7x8T0RERGRoqjs6abvoIDY2Fjdu3MCNGzcApE35dOPGDTx58gQAMGXKFPj5+anLDx06FA8ePMAXX3yBoKAgrFy5Etu3b8e4ceP09nJzgi2lRERERIYmSYDMMFNCXblyBS1btlQ/Hj9+PACgX79+WL9+PUJDQ9UBFQBcXFxw4MABjBs3DsuXL0e5cuWwbt06o04HBTCUEhERERmeAecp9fT0hBAi0+czuluTp6cnrl+/rtNxDI2hlIiIiMjQJOgQSg1akzyLoZSIiIjI4GTQfihP4Rzyw1BKREREZGCSJEHSsqVU23IFDUMpERERkaEZsE9pQcFQSkRERGRoukz1pOOUUAUFQykRERGRocl0mBJK23IFDEMpERERkaGxpTRbDKVEREREBqf7bUYLG4ZSIiIiIkPjQKdsMZQSERERGRpDabYYSomIiIgMjX1Ks8VQSkRERGRoHH2fLYZSIiIiIoPjQKfsMJQSERERGRr7lGaLoZSIiIjI0CRJhz6lDKVEREREZAhsKc0WQykRERGRwekQStmnlIiIiIgMQiZLW7QtWwgxlBIREREZmPTvf9qWLYwYSomIiIgMjZPnZ4uhlIiIiMjQONApW4UzihMRERHlJlUo1XbJgRUrVsDZ2RkWFhZo3LgxLl26lGnZ9evXQ5IkjcXCwiKnr04vGEqJiIiIDM3AoXTbtm0YP348pk2bhmvXrqFu3brw8vLCixcvMt3GxsYGoaGh6uXx48fv8wrfG0MpERERkaGpRt9ru+hoyZIlGDRoEAYMGICaNWti9erVKFq0KH766adMt5EkCY6OjurFwcHhfV7he2MoJSIiIjI4ScdFe8nJybh69SratGmjXieTydCmTRucP38+0+1iY2NRoUIFODk5oWvXrvj77791e0l6xlBKREREZGg5uHwfHR2tsSQlJWW461evXkGhUKRr6XRwcEBYWFiG21SrVg0//fQTfvvtN2zatAlKpRJNmzbF06dP9fu6dcBQSkRERGRoOQilTk5OsLW1VS/z5s3TW3Xc3d3h5+cHNzc3eHh4YPfu3ShVqhTWrFmjt2PoilNCERERERmcLpfl08qFhITAxsZGvVYul2dYumTJkjAxMUF4eLjG+vDwcDg6Omp1RDMzM3zwwQe4d++elnXUP7aUEhERERmaBB1aStM2sbGx0VgyC6Xm5uaoX78+AgIC1OuUSiUCAgLg7u6uVfUUCgVu3bqF0qVLv+8rzTG2lBIREREZmoEnzx8/fjz69euHBg0aoFGjRli2bBni4uIwYMAAAICfnx/Kli2r7gIwc+ZMNGnSBJUrV0ZkZCQWLlyIx48fw9/fX+dj6wtDKREREZHB6X75Xhe+vr54+fIlvv76a4SFhcHNzQ2HDh1SD3568uQJZG9NNRUREYFBgwYhLCwMxYoVQ/369XHu3DnUrFlT52PriySEEEY7ei6Ijo6Gra0toqKiNPplEBERUcFn7BygOn7kPzdgY22t3TYxMbCr4lbosgv7lBIRERGR0fHyPREREZHBGfbyfUHAUEpERERkaAYe6FQQMJQSERERGRpDabYYSomIiIgMjpfvs8NQSkRERGRobCnNFkMpERERkcGxpTQ7DKVEREREhsZMmi2GUiIiIiKD0+HyfSFNpQylRERERAbHptLsMJQSERERGZgkSZC0bCnVtlxBw1BKREREZHBsKc0OQykRERGRwTGUZoehlIiIiMjQmEmzxVBKREREZGicPD9bDKVEREREBsem0uwwlBIREREZGltKs8VQSkRERGRobCjNFkMpERERkcExlWaHoZSIiIjI0CTocPneoDXJsxhKiYiIiAyOLaXZkRm7AkRERET0/lasWAFnZ2dYWFigcePGuHTpUpbld+zYgerVq8PCwgJ16tTBwYMHc6mmGWMoJSIiIjI01eh7bRcdbdu2DePHj8e0adNw7do11K1bF15eXnjx4kWG5c+dO4c+ffpg4MCBuH79Onx8fODj44Pbt2+/7yvNMUkIIYx29FwQHR0NW1tbREVFwcbGxtjVISIiolxk7BygPn7oE62PHx0dDdvS5XWqc+PGjdGwYUN8//33AAClUgknJyeMGjUKkydPTlfe19cXcXFx2L9/v3pdkyZN4ObmhtWrV2t1TH1jSykRERGRwUk6LtpLTk7G1atX0aZNG/U6mUyGNm3a4Pz58xluc/78eY3yAODl5ZVp+dxQ4Ac6qRqCo6OjjVwTIiIiym2q739jXxiOjonV+rJ8dExs2v/fyS5yuRxyuTxd+VevXkGhUMDBwUFjvYODA4KCgjI8RlhYWIblw8LCtKqjIRT4UBoTEwMAcHJyMnJNiIiIyFhiYmJga2ub68c1NzeHo6MjnKrW0Gk7KyurdNll2rRpmD59uh5rl7cU+FBapkwZhISEwNraGpKeb9sVHR0NJycnhISEsL9qLuJ5z30858bB8577eM6Nw5DnXQiBmJgYlClTRq/71ZaFhQUePnyI5ORknbYTQqTLLRm1kgJAyZIlYWJigvDwcI314eHhcHR0zHAbR0dHncrnhgIfSmUyGcqVK2fQY9jY2PDDywh43nMfz7lx8LznPp5z4zDUeTdGC+nbLCwsYGFhYbD9m5ubo379+ggICICPjw+AtIFOAQEBGDlyZIbbuLu7IyAgAGPHjlWvO3r0KNzd3Q1Wz+wU+FBKREREVNCNHz8e/fr1Q4MGDdCoUSMsW7YMcXFxGDBgAADAz88PZcuWxbx58wAAY8aMgYeHBxYvXoyOHTti69atuHLlCtauXWu018BQSkRERJTP+fr64uXLl/j6668RFhYGNzc3HDp0SD2Y6cmTJ5DJ/pt0qWnTptiyZQumTp2KL7/8ElWqVMHevXtRu3ZtY70EhtL3IZfLMW3atEz7eJBh8LznPp5z4+B5z30858bB864fI0eOzPRy/cmTJ9Ot69mzJ3r27GngWmmvwE+eT0RERER5HyfPJyIiIiKjYyglIiIiIqNjKCUiIiIio2MoJSIiIiKjYyjVEseD5R6e69yVkpKi8ZjnnwoqIQSUSqX6z0SUtzCUZiMoKAgAIEkSP8QMLCIiAvHx8YiIiDB2VQqNu3fvYsiQIZg0aRKWL18OAHq/HS9lLCUlJd0PAjKcoKAgjBo1Cr6+vjh69CgkSVIHVDKsqKgohIWF8f1O2WIozYAqfD58+BCdOnXC6NGjATCYGlJgYCB69uyJPn36YOzYsTh79qyxq1TgBQYGokePHqhTpw6sra1x4cIFBAQEGLtahcKdO3cwcOBAdOnSBRs3bkRcXJyxq1SgBQYG4tNPP0W1atVQr149+Pr64sqVKxoTiZNh3LlzBz4+PujRoweGDx+Oy5cvG7tKlIfxX2QGJEnCb7/9hi+++AINGjTAvn37MHz4cPVzDKb6FRwcDF9fXwwaNAhffvklHB0d8eeffwIAWzIMJCoqCiNHjsTgwYMxbtw4jBs3DomJibh06ZKxq1bgqQKSh4cHPvroIyxYsIA/wgwoIiICY8eOxbBhwzBq1ChMmTIFfn5+OHLkiLGrVuAFBwfj448/xpAhQ7B161ZERUVh165dxq4W5WEMpRn4559/MGXKFMyYMQMbN27E7t27ceXKFYwfPx4Ag6k+paSk4IcffkCfPn3g6+uLxo0bo3bt2jhy5AiUSiVbMgzE1NQU48aNw4ABAyCEgKWlJTw9PfHixQtjV61Ai4mJwbRp0zB48GAMHDgQ/v7+6NmzJ3bv3m3sqhVYcrkcPXv2hK+vr/pHrrW1Ne7fv2/kmhVsycnJ2LRpEwYMGIDevXujXLlyGDduHM6fP4+kpCSN71B+n5IKbzOagaioKFhbW6N69eqQyWRwdXVFr169sGDBAhQtWhSzZ89mvzs9MTMzw2effYaSJUsCSPtwcnV1hampqfocx8TEwNra2pjVLHAsLS3RokULWFpaqteZmpoiNjYWAHD+/HncvHkTQ4cONVYVCyRzc3P06NED7du3hxACkiTBwcEBf/31l7pMamoqTE350awPQggULVoU/fr1g5mZmXp9zZo1kZiYCAC4cOEC3rx5A29vb36u65G5uTm6dOmCsmXLAkhrgJDL5YiMjERycjLkcjmio6NhY2PD805qbIZC+l9pVapUQenSpdV9vUxMTFCpUiX4+fnh3LlzuH79upFqWjBVrVoV9vb2ANJaoW1sbJCSkgJJknDq1CmMHj2ag5/0SKFQAABsbW01HicmJsLZ2Rn//PMPhg0bhipVqhitjgWREAJyuRzdunXT+CKuUqUKihUrBiDtx8DGjRuRnJxszKoWGKpzrAqkqs/6pKQkFC9eHDdv3sSnn34KExMTBiM9UrVIN2zYEGXKlAGQ9nfg6OgIW1tbWFtb4+zZs5g6dSpev35tzKpSHsNQirQPrj///BMbNmzAwYMHYWtrCy8vL5w4cQKjR4/G5s2bMWHCBHTu3BmOjo7Grm6+p/rCVX1BvNsqlJCQgLJly+LUqVMYNWoUevXqpf7Sppx5/vw5VqxYAQAwMTHR6KtrYmICAHB0dMTOnTvRr18/zJw5E61btzZKXQsa1fs8s4CUkJCAEiVK4OrVq+jfvz/Kli0Lc3Nz41S2AHj06BFOnz6d4XOqvwNJkrB69WoMGDAAy5cvh5eXV25WscBKSkoCAMhksgwvyZuZmcHZ2RnHjx/HsGHD0K5dO5QoUSK3q0l5GK8RATh58iT8/f3RpEkTvHnzBpcvX8a0adNQrVo1/P777zh+/Dh27NgBIQQCAwPVl5pJd69evcKECRMwatQoNGjQQH0JU0UIASEEjhw5gqCgIHzzzTfw9vY2Yo3zPyEEzp07h507dyIpKQnjx4+HTCZT99lV/R2YmJjg5s2bOHLkCNq0aWPsahcIISEhOHfuHNq1a5fuh5Xqfa9QKPDjjz/iyJEjWLp0KQPSe9q9ezfmz5+Pbdu2wdPTU+M51XteLpfj6dOnWL9+PVq2bGmcihYwYWFh+P777+Hl5YUWLVqox16o3udKpRIRERHYvHkzLly4gO+++w5eXl7pvgOocCv0ofTGjRtYtWoVDh48iKpVq+LEiRNYvHgxvv76a8ycOROtWrWCQqHAgQMHMHPmTGzYsAFOTk7Grna+9fLlS1hbW2P+/Pn46quv4ObmpvGLWpIk2Nvbo3z58pgxYwYDqR5IkoQ2bdogJSUF27Ztg1KpxOeffw6ZTIaUlBSYmZnhzp07MDc3x82bN1GnTh1+UejJyZMnsXjxYqSkpKBTp06ws7MDoNl6amdnByEEFi5cyB8DejB+/HgkJSVh2LBhWLFiBVq1agXgv0D66NEjNGrUCAEBAfD09OR7XU8iIiIQGBiIxMREmJmZoUmTJuq5YCVJgkwmg5WVFRo0aICZM2eqf3zx3JMGUcg8e/ZMbNu2TQghRFRUlBg5cqQoVqyYuH37thBCiLi4OHH8+HHRunVrMWbMGPV2+/fvF4GBgcaocoFz+/ZtMWnSJNGtWzdx/fp1IYQQKSkpQgghLly4IJYsWSIeP34shBBCqVQaq5oFTlRUlNi0aZPo2rWrWLBggXr90aNHRY0aNcSlS5fU63je9WfdunXCw8NDbNiwQURERAghhFAoFEIIIYKCgsT58+fF+fPnhRA87+9LdV6FEGLWrFmievXq4tixY+rzeuLECWFubi7++usvdTmec/25ffu26N+/vxg7dqz6Pa1y8eJFsW3bNvX3KM87ZaTQhdLt27eLv/76S7x580YIkfaPqGfPnqJ3797iyZMnQggh4uPjRUBAgLhy5Yoxq1qg3bp1S0yaNEn4+Pioz/Px48eFg4ODOHr0qJFrV3C9HUx/+uknceHCBeHi4iJ27Nhh7KoVOG8HpLVr1woPDw+xfv168erVKyFEWkAqWrQoP2f07O3zPmfOHFGtWjVx69YtcfLkSeHs7Cx27dplxNoVfKpgOmbMGHHmzBkhhBCnT58W1tbW4siRI0auHeV1khCFY4Kw8PBwhISEoEGDBoiKisLgwYPRrFkzjB49Grdu3cKKFSsQHx+PWbNmoUKFCpwjMxfcvn0bmzZtwvPnz+Hu7o4lS5Zg/vz56N69u7GrVqBFR0fjwIEDWLlyJc6ePYtdu3ahW7duvIxpAG+f03Xr1mHTpk2YOHEiFAoFxo4di0WLFvH9bgBvf37PnTsXS5cuhRACP/zwg/q9DvDSsaH8/fffWLRoEZycnFCiRAksW7YMS5YsQbdu3YxdNcrjCkUoTU5OxqxZs/Do0SMMHDgQzZs3x7fffovr16+jRYsWGDx4MG7duoVFixYhKSkJ69evh1wu5wdWLrh9+zbWrl2LNWvWYMuWLfjoo48YjnJBVFQU9u3bBycnJ3h4ePCc69G75/LdYPrdd9/h6dOn+OGHH9C9e3cGpBx6+fIllEolHBwcMnz+7WD6/fffo1KlSvD29uZ7XQ8SEhJQpEiRLMv8/fffmDVrFk6ePImVK1fyvU5aKRShFEibEmfJkiWIj4/HsGHDUKNGDaxduxanT59G69at4e/vj5s3b8LExAS1a9c2dnULlcDAQAghULNmTX5h6FlW5/PtSdoVCoV6aijS3fPnzxEREYFatWoByDqYbt26FY6Ojhxk8x5CQ0NRp04dDBw4EEOGDEHFihUzLPfuFS+GovcXEhKCPn36YNeuXZn+IFD5559/kJCQAFdXV77XSSsFfvS96kPp4cOHCA4OxvXr1/HixQt88cUXGDx4MGQyGfbv34/U1FTevUaPdPkAqlGjhoFrUziozvmtW7cQEREBZ2dnlC9fPsOyCoUCpqamSElJgampKQPpe3j16hXq1KmDtm3bYvTo0WjatGm66XBUo5BlMhl69+4NQLd/I6Tp+fPnSE1NxYULF1CqVCl89NFHcHFxSVdOJpOpf3yxS5Z+qGZQyS6QAtC4AQff66SNAv8vVCaT4c8//8SgQYMwZ84cbNy4Efb29li3bh0CAwMxcOBAtGnTBk2aNDF2VQsM1ZftsWPH1H2JMrtrh+puQgkJCXj48CE/uN6DJEn4448/0Lt3b+zbtw916tTBoUOH0pVTtYpGRkZizJgxePnypRFqW3DExcWhfPnyiIuLw6FDh3DmzBkAUAdTFVVAAtJaqflez7n69etjyJAhkMvlOHjwIDZs2IDHjx+nK6f68RUREYEuXbogJibGCLUtGFQ3PXn58iUSEhIAZH3P+rc/258/f274ClKBUOBDKZB2ebh169ZwdXVFq1atMHLkSFy5cgWTJ0/G1atXMXLkSLi5uRm7mgWGJEk4ePAgxo8fj9q1a2Py5MlYunSp+gtZ5e1w1KNHD6SkpBipxgXDX3/9ha+//hr79u1D69atUb58eTRo0ED9vBBC45x369YNvr6+6lu8Us5UqFABnTt3RmhoKJ49e4Zdu3bh3LlzADSD6bsB6dWrV8asdr7z4sULjXDTvXt3tG3bFrNnz8apU6fw448/4smTJ+rn336v+/r6Yvz48bC2tjZG1fO9Fy9eYMaMGXjy5AnMzc3VgTMzb5/7tm3bIj4+PpdqSvldoQilZmZmePDgAQCo+y526tQJRYsWVd//m/RDCIFXr15h2bJl2LdvHwDAzc0Nw4cP17idaGpqqkYg/eKLL1C1alVjVbtAEELA398fgYGB+Prrr7Fnzx6ULFkS27dvx6tXr9R3bVKd85kzZ8LDw8PY1c6XQkJC8OjRI/Xj0aNHo0OHDujTpw+ioqLw66+/4sKFCwDSgunb73dfX19MnDiRd4bTQUhICCpWrAgvLy8cOHAAZ86cQePGjXHgwAHExMRg5cqVOH/+PFatWoXHjx9DCKHxXv/f//6nnkSfdBMXF4erV6/iyZMnWL16NS5cuID69esDSJswPz4+HgkJCXj06JH6ipjq3Pfs2RNz5sxB5cqVjfkSKD8x5HxTxqCakDc4OFjcvHlTPH78WCQlJQk3Nzfh5+cn/vrrL3Hs2DHRvHnzdJP70vtTKBQiISFBDBgwQKxZs0Y0a9ZMBAUFCSHSJhHfvHmzuuybN29E27ZtxalTp4xV3QIhMDBQBAUFiVu3bonixYuLKlWqiPj4eCGEEOfOnRPVq1dXz4UZGxsr6tevz3P+Hp4/fy4kSRKVKlUSP/74o9i6dasQQoju3buLHTt2iNDQUOHv7y8GDx4szp49q94uIiJCtG7dmuc+ByIiIkTLli2FmZmZWLhwoejevbuYPn26WLhwoejVq5cQQoizZ8+KFi1aiLt37wohhIiJiRFt2rQRf/75pzGrnq9FRkaK9u3bi+3bt4u9e/eKcePGiWrVqglJkkS7du2Ek5OT+OCDD0SjRo1EhQoVxJ07d4QQaX9frVq14nuddFbgQqkQQvz222+iQYMGolWrVqJdu3Zi6dKlIjExUXTp0kX07NlT1KtXT/z+++/GrmaBc+vWLbFlyxYhhBBdunQRxYoVU9+Q4OLFi6JGjRrqyZOTk5NF+/btxfHjx41W34IgKipK9OzZUx2M5syZI5o0aSK2bNkitm/fLtzc3DTe6w8fPtS4mw3lTOfOnYUkSWLt2rWibdu2YurUqWLy5MmiSZMmIjExUdy+fVv4+fmpv6Tj4uJE+/btxcmTJ41c8/zlzp07YsOGDUIIIV68eCHat28v2rVrJxISEsTXX38tunTpIjw9PUV4eLgQIi1EqVy9elVcu3bNKPXO796+29KSJUtEixYtxG+//SYOHDggPvvsM9G9e3dx7Ngx8eTJE/H06VPx8uVLdeNDYmKi6Nu3rzhx4oSRak/5WYGbEiooKAh9+/bF1q1bUbp0aVy5cgXz58/H4MGD0aVLFyQnJyM6OhoODg4c/apnM2bMwMWLF3Hw4EFcunQJ33zzDV6/fo2OHTti06ZNmDt3Ljp16gQASEpKQkREBBwdHY1c6/xHpP2YVI8k/vbbb/HDDz/g1KlTUCgU2L9/PzZu3IgaNWqgU6dOnJtRTx49eoQzZ87Ay8sLpUqVQvv27REbG4s//vgD27Ztw9WrVxEUFIQNGzaoBz5ZWloCAIKDg6FUKjnThA6Cg4Px8ccfY/DgwRgyZAgA4M2bN/D29oazszO2bdsGALh16xZq166dbsYDyrnExERYWFioH69YsQKbNm3CpEmTIITA8ePHUaRIEfTr1y/dNGjR0dGIiYlB2bJljVV9ysfyfSh98uQJzp8/jy5duqBIkSK4dOkSJk+ejICAAEiShPj4eMydOxcpKSmYP3++satboLw7xUpSUhLc3d0xefJk9OrVCy9evMAPP/ygnpqoRYsW/NJ4D2/PJRoSEgInJyf1cwMGDEDv3r3h5eWVriy9v6CgIAwYMAAdO3aEt7e3uk9d8+bN1TNNyOVy3Lt3D5UqVWJAek/BwcH47LPP0L9/fwwaNAgKhQKnTp1Cy5Yt8fr1a3Tu3BnFixfH/v371dvwfOvH3bt30bNnT/Tq1QulSpWCv78/ZDIZdu/ejUWLFuGrr75CcnIy9u3bBysrK8ybN0/944vofeXrgU7BwcH46KOPcO3aNVy5cgUAYGtrC2trawQEBCAhIQFFixZFpUqV8ObNG6SmpmY5hQVpJz4+HikpKZDJZDhz5gzCwsKQmJgIuVyOvn374vbt2wAAe3t7fPXVV+jbty9atGgBgHPV5dTjx4/xyy+/ICkpCa9fv0aTJk0wevRodWuRi4sLNmzYoC7PQKo/d+/eRffu3TFy5EhMnTpVHUgB4MyZM5DJZGjevDlSUlJQuXJlBtL39PLlS7i6uqoDaVJSErp27YrTp08DAEqUKIF9+/bh2bNnaNu2rXo7nm/9iI2Nxa1btxAQEIDt27ejbdu26NixI+zs7FClShUsX74ckiTBx8cHQ4YMYSAlvcq3LaV3795Ft27dMHnyZHz66acaz02cOBGhoaEoX7483Nzc8L///Q/ff/+9xgcY5cyLFy8wb948fP3117C1tcXIkSNx6tQpdO7cGW3btkXZsmXh5eWlnieT9OPy5cswMTGBk5MTzMzM8PLlS2zduhVXr15FXFwchg0bhkGDBmHHjh0cZaxn//vf/2BtbY0vvvgCwH9zM77dhcLd3R1xcXG4efMmw9F7ePbsGYQQGDJkCBQKBX7//Xf07dsX5cqVw9KlSzXKvnnzBkFBQWjatKmRaltwXbhwAYMGDcLatWthYWGBK1eu4Pjx44iKisKhQ4dga2uLoKAgrSbQJ9JFvm0p/fHHH/Hxxx+rA6kQQj0P5sKFC+Hp6YmEhAQcOnQI3377LQOpntjb22P48OGIjo5W39N49erVKF26ND7++GPs3r0bSqUSW7duzXYuO9Jew4YNUa1aNXTr1g1LlixB2bJl8b///Q979+5Fw4YNcezYMURERMDGxsbYVS0wnj9/jpSUFMTGxqqDqGrSe0mSIJPJEBwcjPDwcJw/fx5r165lIH0PwcHB6NatGx48eIDdu3fD3NwcFhYW6QLpn3/+iRUrVqB48eIMpAbSpEkTLF++HEOHDkVUVBQGDRqEDRs2YNeuXdi+fTsOHjzIQEqGkYuDqvRq4MCBYt26dUKItJHcb7t7966Ii4sTQqSNBCT9UCgU6j+PGjVKeHh4iJMnT6rX3717V3z77beiadOmYv/+/caqZoG2f/9+0apVKzF37lz1zAZCCBEfHy8ePXpkxJoVLEFBQaJBgwbiyJEjYsiQIWLo0KHq51JTU0VqaqoQIm2aM9XnEOVcUFCQaNSokdixY4d6XWxsrOjZs6do1qyZet2pU6dEnTp1xJ49e4xQy8LnxIkTonr16uLPP/9Uv+dVlEqlxih9In3IV6E0IiJC/efevXur56cTQoiUlBR1OFq4cKH46aefhBCaQYpyLqMPn/Hjx4uOHTuKo0ePqn8ECJH2d5HZNqQ91fm7c+eOuHTpkoiKihJCCBEQECA+/PBDsWDBAvH48eNMt6OcUQWk7du3CyGEuHnzprC2thbLli3TKHfhwgVRu3ZtzsX4noKCgkS5cuVEmTJl1OtUjQlxcXGiU6dOomXLluLUqVOicePG4uDBg0IIvs9zy8mTJ0X58uU5xRPlinwTSmNiYkTLli3FnDlzhBBp8162aNFCLF68WKPcuXPnRI0aNcSZM2eMUc0CSfXhHxAQINauXStWrVqlfm78+PHCx8dHHDp0iK3SeqQ65wcOHBDVq1cXgwYNEk5OTuobPpw4cULUr19fzJkzh+ddjzIKSAqFQmzcuFHY2dmJsWPHig0bNogdO3aIatWqcb7j93T79m3h7u4uVqxYITp06CBatWqlfk714zY+Pl60a9dOSJLEKzBGcvz4cd6EgHJFvgilSUlJQgghNm3aJBo2bChWrFghhEib1Ldly5aif//+Yv/+/WLLli2iSpUqYt++fcasboF04MABUaNGDbFjxw5hYmIiRo8erW6FHj58uPD29havXr0yci0Llj///FPUrVtX3L9/X+zcuVPY2tqKChUqqFvmjh07xruS6VFWASk1NVVcvnxZ+Pj4iJ49e4oRI0awxe49KZVK0adPH/WP3Ddv3ggPDw/RunVrdRlVMI2JiRE3btwwSj3pP3yvk6Hl+VD68uVL8emnn6pvk7hjxw7h5uamvjx/8uRJ0bFjR9GpUyfh7+/PLwoDuHXrlmjSpIm4d++eOHjwoKhXr55wcXERfn5+6n5GqjvXUM49fvxY7N69W8TGxgohhNi2bZu4deuWOHDggGjQoIFITU0Vn3zyiShevDgvGetZZgHp7WAqxH/919lFRT9U51ElIiJCeHp6agTTd8cM8JwTFVymxh5olZ0XL17A1tYW8+bNw//+9z/06NEDQgjMnTsXSUlJGDp0KDw8PAD8N5m74ByBeqE6j0IIrF27FqGhoZg0aRKuX7+O69evo1GjRrCxscG3337LO9XowenTpzF9+nSkpKSgR48e6NatG5RKJRYvXow5c+bAxMQErVq1wv3792Fqmuf/6eYrkiRh48aN6vNarFgx7N27F926dUPr1q0REBCgUV41Dyw/Z96P6jyqPmvs7OywZ88e9OzZE+7u7jh//jzMzMw0tuE5Jyq48uyUUImJiQCAmjVrYujQoahUqRKmTZuGmzdvomfPnvjyyy/x008/adylSfVhxQ+t9yP+nf4mNjYWAFCnTh3UqVMHV69exbBhw2BiYoKIiAgMGTIEPXr04PnWk759++KLL77AwoULsW3bNvX0Q/fu3cPVq1cREBCAH3/8EWvXroW7u7uxq1vgvB2QAKgDkkwmU59vVUDie14/MvrMtrOzw/bt22FqaoqLFy8aq2pEZAxGbKXNVHBwsGjfvr2YN2+eSEhIEEqlUjx//lxMnDhR+Pj4iGvXrgkhhNiyZYtwdXUVjx494iUdPTtw4IDw9vYWgwcPFleuXBFKpVLMmzdPlClTRqxbt05UqFBBnDt3TgjBy2nv4/79+yIgIEC8efNGvW7jxo3igw8+EFu2bBFCCHHkyBFRv3590aJFC/WIcMo9b968Ec2bNxcXLlwwdlUKlXcv7RNRwZcn7+j022+/oXfv3rC1tUXr1q1hZWWFr776CtHR0di9ezdu3bqFSZMmoUGDBggLC4Ojo6Oxq1wgiH8voV25cgUDBgzAzJkzsXnzZlhaWmLAgAHw9PTE9OnTER0djZYtW6Jz587GrnK+FhUVhRo1aiAsLAzdunVDfHw8hg8fjsaNG+PYsWNYvXo1Ro8ejR49eiA2NhYKhQK2trbsnmIEqamp7DLxHt68eYOUlBRYW1ujaNGixq4OEeVReTKURkREYMaMGejQoQPMzc0REBCAn3/+GR9//DHkcjmKFCmCCxcuYPPmzbC2tjZ2dQuUy5cvY/PmzXB1dcVnn32GhIQETJ06FS9evED//v3RunVrdVmGo/e3ZMkSLFy4EN988w0CAwMRFRWFvXv3YvDgwfjuu+8gl8uxYMGCdLfSpZx7+fIllEol70iTS+7cuYMhQ4bAxMQETZo0waxZs9L1E32bQqFI19eUiAqHPNmntFixYgCAxYsXw9PTE7NmzUJMTAySk5Oxbt06BAUFYfHixQykBnDp0iX8/vvvuHbtGl6/fo0iRYpg7ty5sLa2xo8//ohXr16py/LLIueUSiUAYPz48Rg+fDiWL1+OLl26YNWqVdi/fz+aN2+O9u3bw87ODqVKlTJybQuO0NBQ1KlTB19//TUePHiQbfm3b5WbB3+/53n3799H//79MWrUKCxbtgz79u1DaGioRpm3z6sqkEZFRSEoKIifMUSFTJ5rKVX9Mo6Pj8eAAQPg4eGBVatW4ZNPPsGkSZMQEhKC8PBwNGjQwNhVLbB+/PFHbN68GUOHDkXbtm1RrFgxJCUl4cGDBxxlr0eq2SIAYPr06di2bRtWrFiBVq1aqctERUXxkr0excTE4MMPP0Tr1q2hVCoxbNgwVKlSJcOybwek0NBQVK9ePZdrm//99NNPuHbtGr7//nsAQKNGjeDq6gp7e3t4enqiXbt26rJvn+/WrVvj22+/5b3tiQqZPNdSqvriNTExgYODA8aPH4/PP/8ckyZNglKphJOTEwOpnrz7e0TVejdw4ED4+Phg3bp12LdvH968eQO5XM5AqmcymUx9zqdPn44+ffpg7NixOHbsmPrvxtbWFgBbpfXFzMwMpUuXRoUKFRATE4ONGzfi/Pnz2Lt3r0a5dwPSmzdvjFPhfM7ExAT37t1TzxhRvXp19O7dG3Fxcdi+fTtev34N4L/zHRkZie7du2PZsmUMpESFUJ4LpSpyuRwjR45EmTJl4OrqauzqFCjx8fEA0ofSt0PS6NGj0a5dO6xbtw5JSUm5XsfC4u1z/vXXX6Nz584YPnw4IiIijFyzgiclJQUWFhaoVasWvL29MWHCBNy/fx+dO3dGcHCwuhwDkv74+vqiXr16ePr0KczMzLBx40a0adMGo0aNws2bNxETEwMA6h8APj4+mD59Opo3b27kmhORMeTp4aRVq1aFl5cXDh48iFq1asHc3NzYVcr3goKC8MUXX6BcuXJo3Lgx+vbtCxMTE3VLnCokyWQyfP755+jRowdKly5t5Frnb9kNrHn7nM+ZMwcDBgxA8eLFc7mWBZ9qcI2FhQWCg4PRqFEjnDt3DlWrVsXr16/x4MEDVKxYUSMgzZo1iwFJBw8ePMC+fftw7do1VKlSBT4+Ppg7dy7evHmDx48fIyUlBWZmZkhOTgbw3w9jhUKBSZMmYdasWWjRooUxXwIRGVGe61P6rhs3biAuLg7NmjUzdlXyveDgYHz88ccYOHAgQkJCcP/+fWzfvl39/NujXpVKJSRJUt/RiZePcyY0NBQffPABunbtikmTJqFixYqZln37nAOafx+ku7cDUs2aNdGsWTM0b94cu3btwo4dO3Dv3j307dsXTZo0wU8//YRx48ahZs2aUCgUGDFiBPr27cuApIOgoCD06NEDXbp0gRACFy9ehK2tLTp27Ah/f380adIEtra28PDwwPbt2zFjxgx07dpVvX1ERIR6kCsRFU55PpSSfty9excfffQRxo4di4EDByIiIgLNmzdHr169EB8fj/Hjx8PBwUEdQN/uU3fy5El07NiR8zTmQE4H1oSFhaFatWq5XNuCI6OAZGdnh27dusHb2xutWrXCJ598gi+++AJA2u2M7e3t1dszIOkmMDAQQ4YMwYABAzBgwAAAaVcIVq5ciWvXrmH27NmoVKkSpk+fDhsbGzRu3Bht27aFEAJCCPWAPyIq3BhKC4krV67Aw8MDQUFBcHR0RMeOHVGqVCm0aNECZ86cQVhYGH777TdYWlpqhCMvLy8sWbKEfepyKDExEd27d4e3tzf++usvODo6okOHDggPD4ePj4+6HEce6092AWncuHGoXr16hjfdeHtGBNJOTEwMqlSpgk6dOmHdunVQKBRQKBQwNzdHWFgY/P39UadOHcybN8/YVSWiPI6fvoVEgwYNsH//fnh5eaFFixZwc3NTT/s0evRoWFlZISUlBUIIdTjq3r07Fi1axHCUQxxYk/tiYmLQsmVLVK1aFQMGDIBCoUBycjJKlSqFIUOGQKFQ4MSJE+pAmtFgP9Le06dP8ddff2Hu3LnYv38/fv/9d5iYmMDU1BSpqalwdHREy5YtcfHiRSiVSs71SkRZ4idwIdKyZUv8+OOPePjwIXx9fdXrExMT8ezZM8TFxUGSJERFRaFjx44cBfue3h1YU6JEiXQDawCOPNYXbQPS6dOn1eGIfaVzLigoCD179sS+ffvg4+OD2bNno1+/fvj9998hk8nU57ho0aJo0aIFZDIZzzcRZYmdBAsZd3d3bN26FX5+fti2bRuioqIwduxYzJw5E2XLloVSqcSGDRswf/58Di7TUWYDa9zc3PDLL79g2rRpGDNmjHpgTWJiIgCOPNaHoKAg9c02vvjiCyiVSvTr1w8bNmxAly5d1HdmUgUkhqP3ExgYCF9fX0yePBkff/wxAMDf3x8A0K9fP/z888/w8fHBmTNnsGzZMqxatcqY1SWi/EJQoXTixAlhb28vKlasKA4cOKDxXFxcnJFqlX8FBgaKWrVqiSlTpojJkyeLli1bim7duomNGzeKly9fijp16oj58+ery4eHh2ts/+bNm9yucoFx584dUadOHbF582aN9T/88IOws7MTe/bsEUIIcfr0aVG1alUREBBghFoWHCkpKWLQoEHiu+++01ivUCiEEGnn3cHBQUyfPl00a9ZM/P7778aoJhHlQ2wpLaQ8PT2xd+9eREVFoX379gD+G+RRtGhRI9cuf1ENrJkwYUK6gTU7d+6Ek5MTjhw5ojGwRjXSW3XOOdI7Z1JTU7F06VIMHjxY3WIHpJ1XVcvd0KFDcfPmTRw9ehSLFi3SuI0r6c7U1BQxMTEoW7YsgP/6RKv643788ccwNTXFZ599hj179qBz586cVo6ItMJQWoi5u7sDgPoLg4M8dKcaWNOpUyf1wBqFQqEeWOPv748TJ07A09MTANJ9OfOcvx8GpNwlhEBycjJCQ0M1+kSrBjGZmJhg5cqVaNiwIZ4+fYoyZcrwfBOR1viNSPzCyCEOrDEuIQSSkpIyDEiqPqQrV66Ei4sLnj59iq5duzIgvSdJkiCXyzFixAh8//332Ldvn3q9iYkJLl26hI0bN6Jo0aIoU6aMkWtLRPkNW0qJcoADa4zv7YD0xRdfoGrVqujcubO61V8VkDw8PBiQ9KxTp074+++/MXHiRDx79gxubm6IiorCuHHj8M0336Bhw4bqsnzvE5G2GEqJdMSRx3kLA1LuK1KkCCZOnIhKlSph0aJFKFOmDKytrTF//nx2kSCiHOMdnYh0kJqaiuHDh8PV1RUjR45Ur1cNWFq3bh2mTp2KYcOG4ejRo5g0aRI6d+5sxBoXDnFxcdi9e7dGQPr0008ZkHJBTEwMrKysEB8fD0tLS55vIsoxtpQS6YADa/ImS0tLfPrpp/Dx8WFAymXW1tYA0lpPAbZGE1HOMZQSaYkjj/M+BiTj4UwSRPS++ClCpCWOPM4/GJCIiPIftpQS6YgDa4iIiPSPA52IcoADa4iIiPSLoZToPXDkMRERkX4wlBLpgWpKKCIiIsoZhlIiIiIiMjo27RARERGR0TGUEhEREZHRMZQSERERkdExlBIRERGR0TGUEhEREZHRMZQSERERkdExlBJRtqZPnw43Nzf14/79+8PHx8do9SEiooKHoZSogAsLC8OoUaNQsWJFyOVyODk5oXPnzggICDDYMdevXw87OzuD7T+nWrZsiXXr1hm7GkRElAFTY1eAiAzn0aNHaNasGezs7LBw4ULUqVMHKSkpOHz4MEaMGIGgoCBjVzHXvHnzBmfPnsXWrVv1ts+UlBSYmZnpbX+5tW8ioryILaVEBdjw4cMhSRIuXbqEjz76CFWrVkWtWrUwfvx4XLhwQV0uMjIS/v7+KFWqFGxsbNCqVSvcvHkzR8c8efIkBgwYgKioKEiSBEmSMH36dMycORO1a9dOV97NzQ3/+9//APzXLWDGjBnqugwdOhTJycnq8kqlEvPmzYOLiwuKFCmCunXrYufOndnW68CBA6hXrx4cHBwyfN7Z2RmzZs1Cnz59YGlpibJly2LFihUaZSRJwqpVq9ClSxdYWlpizpw5AIBVq1ahUqVKMDc3R7Vq1fDLL79obBcUFITmzZvDwsICNWvWxLFjxyBJEvbu3Qsg7ceDJEnYtm0bPDw8YGFhgc2bN+P169fo06cPypYti6JFi6JOnTr49ddfNfbt6emJUaNGYezYsShWrBgcHBzwww8/IC4uDgMGDIC1tTUqV66MP/74I9tzRERkVIKICqTXr18LSZLE3Llzsy3bpk0b0blzZ3H58mVx9+5dMWHCBFGiRAnx+vVrIYQQ06ZNE3Xr1lWX79evn+jatWuG+0pKShLLli0TNjY2IjQ0VISGhoqYmBgREhIiZDKZuHTpkrrstWvXhCRJ4v79++r9WllZCV9fX3H79m2xf/9+UapUKfHll1+qt5k9e7aoXr26OHTokLh//774+eefhVwuFydPnszyNfbo0SPLc1GhQgVhbW0t5s2bJ4KDg8W3334rTExMxJEjR9RlAAh7e3vx008/ifv374vHjx+L3bt3CzMzM7FixQoRHBwsFi9eLExMTMTx48eFEEKkpqaKatWqibZt24obN26I06dPi0aNGgkAYs+ePUIIIR4+fCgACGdnZ7Fr1y7x4MED8fz5c/H06VOxcOFCcf36dXH//n11nS5evKiuk4eHh7C2thazZs0Sd+/eFbNmzRImJibC29tbrF27Vty9e1cMGzZMlChRQsTFxWV5joiIjImhlKiAunjxogAgdu/enWW506dPCxsbG5GYmKixvlKlSmLNmjVCCN1CqRBC/Pzzz8LW1jbdem9vbzFs2DD141GjRglPT0+N/RYvXlwjPK1atUpYWVkJhUIhEhMTRdGiRcW5c+c09jtw4EDRp0+fTOuTmJgorKysxO3btzMtU6FCBdG+fXuNdb6+vsLb21v9GIAYO3asRpmmTZuKQYMGaazr2bOn6NChgxBCiD/++EOYmpqK0NBQ9fNHjx7NMJQuW7Ys0/qpdOzYUUyYMEH92MPDQzRv3lz9ODU1VVhaWopPP/1UvS40NFQAEOfPn892/0RExsLL90QFlBBCq3I3b95EbGwsSpQoASsrK/Xy8OFD3L9/X691GjRoEH799VckJiYiOTkZW7ZswWeffaZRpm7duihatKj6sbu7O2JjYxESEoJ79+4hPj4ebdu21ajrxo0bs6zr8ePHYW9vj1q1amVZP3d393SPAwMDNdY1aNBA43FgYCCaNWumsa5Zs2bq7YKDg+Hk5ARHR0f1840aNcrw+O/uW6FQYNasWahTpw6KFy8OKysrHD58GE+ePNEo5+rqqv6ziYkJSpQogTp16qjXqbosvHjxIsPjEhHlBRzoRFRAValSBZIkZTuYKTY2FqVLl8bJkyfTPafvEfSdO3eGXC7Hnj17YG5ujpSUFPTo0UPr7WNjYwGk9Q8tW7asxnNyuTzT7X7//Xd06dIlZ5V+h6WlpV72o82+Fy5ciOXLl2PZsmWoU6cOLC0tMXbsWI0+tgDSDYiSJEljnSRJANL64xIR5VUMpUQFVPHixeHl5YUVK1Zg9OjR6QJPZGQk7OzsUK9ePYSFhcHU1BTOzs56Oba5uTkUCkW69aampujXrx9+/vlnmJubo3fv3ihSpIhGmZs3byIhIUG9/sKFC7CysoKTkxOKFy8OuVyOJ0+ewMPDQ6u6CCGwb98+bNq0Kduybw/+Uj2uUaNGltvUqFEDZ8+eRb9+/dTrzp49i5o1awIAqlWrhpCQEISHh6tbLC9fvqxV3c+ePYuuXbvik08+AZAWKu/evaveNxFRQcJQSlSArVixAs2aNUOjRo0wc+ZMuLq6IjU1FUePHsWqVasQGBiINm3awN3dHT4+PliwYAGqVq2K58+f48CBA+jWrVu6S8racHZ2RmxsLAICAtSX41WX5P39/dVB7+zZs+m2TU5OxsCBAzF16lQ8evQI06ZNw8iRIyGTyWBtbY3PP/8c48aNg1KpRPPmzREVFYWzZ8/CxsZGIxiqXL16FfHx8WjevHm29T579iwWLFgAHx8fHD16FDt27MCBAwey3GbixIno1asXPvjgA7Rp0wb79u3D7t27cezYMQBA27ZtUalSJfTr1w8LFixATEwMpk6dCuC/FszMVKlSBTt37sS5c+dQrFgxLFmyBOHh4QylRFQgsU8pUQFWsWJFXLt2DS1btsSECRNQu3ZttG3bFgEBAVi1ahWAtGB08OBBfPjhhxgwYACqVq2K3r174/Hjx5lOn5Sdpk2bYujQofD19UWpUqWwYMEC9XNVqlRB06ZNUb16dTRu3Djdtq1bt0aVKlXw4YcfwtfXF126dMH06dPVz8+aNQv/+9//MG/ePNSoUQPt27fHgQMH4OLikmFdfvvtN3To0AGmptn/Bp8wYQKuXLmCDz74ALNnz8aSJUvg5eWV5TY+Pj5Yvnw5Fi1ahFq1amHNmjX4+eef4enpCSCtj+fevXsRGxuLhg0bwt/fH1999RUAwMLCIst9T506FfXq1YOXlxc8PT3h6OjIO2kRUYElCW1HQxAR6YEQAlWqVMHw4cMxfvx4jef69++PyMhI9fyd+uDq6oqpU6eiV69eWZZzdnbG2LFjMXbsWL0dOzNnz55F8+bNce/ePVSqVMngxyMiyg94+Z6Ics3Lly+xdetWhIWFYcCAAQY/XnJyMj766CN4e3sb/FhZ2bNnD6ysrFClShXcu3cPY8aMQbNmzRhIiYjewlBKRLnG3t4eJUuWxNq1a1GsWDGDH8/c3BzTpk0z+HGyExMTg0mTJuHJkycoWbIk2rRpg8WLFxu7WkREeQov3xMRERGR0XGgExEREREZHUMpERERERkdQykRERERGR1DKREREREZHUMpERERERkdQykRERERGR1DKREREREZHUMpERERERkdQykRERERGd3/ASJrd0e6+a/eAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Top enrichments:\n", + " program trait E_score E_score_z\n", + "CD4_Naive_L2 ibd 16.650182 0.728061\n", + " CD4_TCM_L2 ibd 8.865241 0.605904\n", + " B_naive_L2 ibd 0.895021 0.042090\n", + " AllCoding ibd 0.000000 0.000000\n", + " CD8_TEM_L2 ibd -16.924056 -0.599301\n", + " NK_L2 ibd -35.671857 -1.117085\n" + ] + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "abc_road = results_df[results_df[\"strategy\"].str.startswith(\"ABC_Road\")].copy()\n", + "\n", + "if abc_road.empty:\n", + " print(\"No ABC_Road results yet — run heritability first.\")\n", + "else:\n", + " programs = sorted(abc_road[\"program\"].unique())\n", + " traits = sorted(abc_road[\"trait\"].unique())\n", + " prog2x = {p: i for i, p in enumerate(programs)}\n", + " trait2y = {t: i for i, t in enumerate(traits)}\n", + "\n", + " fig, ax = plt.subplots(figsize=(max(6, len(programs) * 1.2), max(4, len(traits) * 0.8)))\n", + "\n", + " e_vals = abc_road[\"E_score\"].clip(lower=0).to_numpy()\n", + " z_vals = abc_road[\"E_score_z\"].abs().to_numpy()\n", + " x_vals = abc_road[\"program\"].map(prog2x).to_numpy()\n", + " y_vals = abc_road[\"trait\"].map(trait2y).to_numpy()\n", + "\n", + " sc = ax.scatter(\n", + " x_vals, y_vals,\n", + " s=e_vals * 60 + 5,\n", + " c=z_vals,\n", + " cmap=\"Reds\",\n", + " vmin=0, vmax=max(4, z_vals.max()),\n", + " alpha=0.85,\n", + " edgecolors=\"none\",\n", + " )\n", + " plt.colorbar(sc, ax=ax, label=\"|E-score z|\")\n", + "\n", + " ax.set_xticks(range(len(programs)))\n", + " ax.set_xticklabels(programs, rotation=45, ha=\"right\", fontsize=8)\n", + " ax.set_yticks(range(len(traits)))\n", + " ax.set_yticklabels(traits, fontsize=8)\n", + " ax.set_xlabel(\"Cell type / program\")\n", + " ax.set_ylabel(\"Trait\")\n", + " ax.set_title(\"sc-linker E-scores (size ∝ E-score, colour ∝ |z|)\")\n", + " plt.tight_layout()\n", + " plt.savefig(\"sclinker_escore_bubble.png\", dpi=150, bbox_inches=\"tight\")\n", + " plt.show()\n", + "\n", + " top = abc_road.nlargest(10, \"E_score_z\")\n", + " print(\"Top enrichments:\")\n", + " print(top[[\"program\",\"trait\",\"E_score\",\"E_score_z\"]].to_string(index=False))" + ] + }, + { + "cell_type": "markdown", + "id": "11399517", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Part 9: PPI gene scores (optional)\n", + "\n", + "The GSSG pipeline supports propagating gene scores through a protein-protein\n", + "interaction network (Random Walk with Restart) to create PPI-informed programs.\n", + "This ports `ppi_RWR.R` / `ppi_string_RWR.R` from `code/calc_PPI_scores/`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cd69a0dc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PPI network: 473,860 edges, 16,201 unique genes\n", + "Seed genes for 'B naive_L2': 892\n", + "\n", + "Top PPI-propagated genes:\n", + "HLA-DRB1 0.000964\n", + "IGHD 0.000964\n", + "FCGR2B 0.000963\n", + "FAU_ENSG00000149806 0.000961\n", + "DUSP1_ENSG00000120129 0.000961\n", + "PNOC_ENSG00000168081 0.000961\n", + "AFF3_ENSG00000144218 0.000961\n", + "CAMK1D_ENSG00000183049 0.000961\n", + "COBLL1_ENSG00000082438 0.000961\n", + "LSM10_ENSG00000181817 0.000961\n", + "APPL1_ENSG00000157500 0.000961\n", + "CAMK2D_ENSG00000145349 0.000961\n", + "LIMD2_ENSG00000136490 0.000961\n", + "SPI1_ENSG00000066336 0.000961\n", + "HLA-DOB_ENSG00000241106 0.000961\n", + "QRSL1_ENSG00000130348 0.000961\n", + "CALHM6_ENSG00000188820 0.000961\n", + "CAPG_ENSG00000042493 0.000961\n", + "HLA-DPA1_ENSG00000231389 0.000961\n", + "SPIB_ENSG00000269404 0.000961\n" + ] + } + ], + "source": [ + "import os, pandas as pd\n", + "\n", + "ppi_links_file = \"9606.protein.links.v12.0.txt.gz\"\n", + "ppi_info_file = \"9606.protein.info.v12.0.txt.gz\"\n", + "\n", + "if not os.path.exists(ppi_links_file):\n", + " print(f\"PPI file not found: {ppi_links_file}\")\n", + " print(\"Download from https://string-db.org/cgi/download?species_text=Homo+sapiens\")\n", + "else:\n", + " ppi_raw = pd.read_csv(ppi_links_file, sep=\" \")\n", + " ppi_info = pd.read_csv(ppi_info_file, sep=\"\\t\")\n", + "\n", + " # STRING v12 info file: first column may have a '#' prefix in the header\n", + " ppi_info.columns = [c.lstrip(\"#\").strip() for c in ppi_info.columns]\n", + "\n", + " # Map STRING protein ID → HGNC gene name\n", + " id_col = \"string_protein_id\" # e.g. \"9606.ENSP00000000233\"\n", + " name_col = \"preferred_name\"\n", + " id2gene = ppi_info.set_index(id_col)[name_col].to_dict()\n", + "\n", + " ppi = pd.DataFrame({\n", + " \"gene1\": ppi_raw[\"protein1\"].map(id2gene),\n", + " \"gene2\": ppi_raw[\"protein2\"].map(id2gene),\n", + " \"score\": ppi_raw[\"combined_score\"],\n", + " }).dropna(subset=[\"gene1\", \"gene2\"])\n", + " ppi = ppi[ppi[\"score\"] >= 700]\n", + " print(f\"PPI network: {len(ppi):,} edges, {ppi['gene1'].nunique():,} unique genes\")\n", + "\n", + " top_program = genescores.columns[0]\n", + " seed = genescores[top_program]\n", + " seed = seed[seed > 0.3]\n", + " print(f\"Seed genes for '{top_program}': {len(seed)}\")\n", + "\n", + " ppi_scores = compute_ppi_gene_scores(\n", + " seed_genes=seed,\n", + " ppi_network=ppi,\n", + " restart_prob=0.7,\n", + " source_col=\"gene1\",\n", + " target_col=\"gene2\",\n", + " weight_col=\"score\",\n", + " )\n", + " print(\"\\nTop PPI-propagated genes:\")\n", + " print(ppi_scores.sort_values(ascending=False).head(20).to_string())" + ] + }, + { + "cell_type": "markdown", + "id": "e77ca733", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Comparison: sc-linker vs scDRS vs Seismic\n", + "\n", + "| | **sc-linker** | **scDRS** | **Seismic** |\n", + "|---|---|---|---|\n", + "| Resolution | Cell-type programs | Individual cells | Cell types |\n", + "| GWAS input | Sumstats + baseline-LD | MAGMA gene scores | MAGMA gene scores |\n", + "| SNP-to-gene | Roadmap + ABC enhancers | Top-N MAGMA genes | Specificity scores |\n", + "| S-LDSC mode | `--h2 --overlap-annot` | — | — |\n", + "| Key output | E-score per (program, trait) | Norm score per cell | p-value per (cell type, trait) |\n", + "| Extra deps | `bedtools`, LDSC binary | `scdrs` package | R + `seismicGWAS` |\n", + "| Ref data | ~10–50 GB from Broad | None extra | None extra |\n", + "| Best for | Heritability-based causal program ID | Cell-level scoring & heterogeneity | Fast cell-type associations |\n", + "\n", + "---\n", + "\n", + "## Notes on interpretation\n", + "\n", + "**E-score threshold** — the paper uses E-score > 2 as significance. This is the excess enrichment of the program annotation over all-protein-coding genes linked via the same enhancer strategy.\n", + "\n", + "**Two strategies** — always report both ABC\\_Road and 100 kb. ABC\\_Road is primary; 100 kb is a less biologically-informed comparison. Concordance between the two supports the result.\n", + "\n", + "**Disease-dependent vs healthy programs** — for epithelial diseases (UC, asthma), disease-dependent programs are typically more enriched; for immune diseases, both healthy and disease programs tend to be enriched.\n", + "\n", + "**Sample size** — use > 50 donors for the scRNA-seq and a well-powered GWAS (z-score for h² > 5)." + ] + }, + { + "cell_type": "markdown", + "id": "0d75e73b", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Troubleshooting: IndexError in `_check_variance` during `--h2 --overlap-annot`\n", + "\n", + "If you see an error like:\n", + "\n", + "```\n", + "IndexError: boolean index did not match indexed array along dimension 1;\n", + "dimension is 11 but corresponding boolean dimension is 10\n", + "```\n", + "\n", + "this is a known pandas compatibility bug in `ldscore/parse.py` (ldsc\n", + "[issue #342](https://github.com/bulik/ldsc/issues/342),\n", + "[PR #341](https://github.com/bulik/ldsc/pull/341/files)).\n", + "\n", + "**What causes it:** `pd.concat(chr_ld)` reorders annotation columns\n", + "alphabetically across chromosomes in newer pandas versions. When your\n", + "annotation column name sorts before the baseline columns, `_check_variance()`\n", + "receives a mismatched array and crashes.\n", + "\n", + "**The fix** (one line in `ldscore/parse.py` ~line 147):\n", + "```python\n", + "# before\n", + "x = pd.concat(chr_ld)\n", + "# after\n", + "x = pd.concat(chr_ld)[chr_ld[0].columns]\n", + "```\n", + "\n", + "Run the cell below to apply it automatically. It is safe to call repeatedly\n", + "(idempotent). Only apply it if you actually hit the error." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1461742", + "metadata": {}, + "outputs": [], + "source": [ + "from cellink.tl.external import check_and_patch_ldsc_parse_bug\n", + "\n", + "result = check_and_patch_ldsc_parse_bug(runner)\n", + "print(f\"status : {result['status']}\")\n", + "print(f\"mode : {result['mode']}\")\n", + "print(f\"parse_path : {result['parse_path']}\")\n", + "print(f\"detail : {result['detail']}\")\n", + "# status: already_patched → already fixed, nothing to do\n", + "# status: patched → fix applied successfully\n", + "# status: patch_failed → check parse_script path and Singularity permissions\n", + "# status: not_found → parse_script path wrong or image not accessible" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/cellink/resources/__init__.py b/src/cellink/resources/__init__.py index 3adbdcd..25103be 100644 --- a/src/cellink/resources/__init__.py +++ b/src/cellink/resources/__init__.py @@ -8,4 +8,10 @@ get_pgs_catalog_score, get_pgs_catalog_scores, ) -from ._ld import get_1000genomes_ld_scores, get_1000genomes_ld_weights, get_1000genomes_plink_files +from ._ld import ( + get_1000genomes_ld_scores, + get_1000genomes_ld_weights, + get_1000genomes_plink_files, + get_1000genomes_frq, + get_1000genomes_hapmap3, +) diff --git a/src/cellink/resources/_ld.py b/src/cellink/resources/_ld.py index 21d57c0..022c2e7 100644 --- a/src/cellink/resources/_ld.py +++ b/src/cellink/resources/_ld.py @@ -274,7 +274,115 @@ def get_1000genomes_plink_files( return DATA, prefix - +def get_1000genomes_frq( + config_path: str | Path = "./cellink/resources/config/1000genomes.yaml", + population: str = "EUR", + data_home: str | Path | None = None, + return_path: bool = False, + refresh: bool = False, +) -> tuple[Path, str]: + """ + Download and extract 1000 Genomes allele frequency files. + + Required for ``ldsc.py --overlap-annot --frqfile-chr``. + Downloaded from Zenodo record 10515792 (``1000G_Phase3_frq.tgz``). + + Parameters + ---------- + config_path : str or pathlib.Path + Path to YAML configuration file. + population : str, default='EUR' + Population code. Currently only ``'EUR'`` is available. + data_home : str or pathlib.Path, optional + Root directory where data will be stored. + return_path : bool, default=False + If True, returns ``(DATA, prefix)`` instead of loading DataFrames. + For frq files, ``return_path=True`` is the typical usage since these + files are passed as a prefix to ldsc.py rather than loaded into memory. + refresh : bool, default=False + If True, re-downloads and re-extracts files even if they already exist. + + Returns + ------- + tuple + If ``return_path=True``: ``(DATA, prefix)`` where ``DATA`` is a + ``Path`` to the directory containing the extracted ``.frq`` files and + ``prefix`` is the file name prefix (e.g. ``"1000G.EUR.QC."``). + + If ``return_path=False``: ``(DATA, frq_df)`` where ``frq_df`` is a + concatenated ``pd.DataFrame`` of all per-chromosome frq files. + + Raises + ------ + ValueError + If ``population`` is not listed in the configuration. + + Examples + -------- + >>> frq_dir, frq_prefix = get_1000genomes_frq(population="EUR", return_path=True) + >>> frqfile_chr = str(frq_dir / frq_prefix) # passed as --frqfile-chr + """ + data_home = get_data_home(data_home) + DATA = data_home / f"1000genomes_frq_{population}" + DATA.mkdir(exist_ok=True) + + config = _load_config(config_path) + if population not in config["frq"]: + raise ValueError(f"population must be one of {list(config['frq'].keys())}") + + prefix = config["frq"]["prefix"] + tgz_path = DATA / config["frq"][population]["filename"] + + _download_file(config["frq"][population]["url"], tgz_path, checksum=None) + _extract_or_refresh(tgz_path, DATA, refresh=refresh) + + return DATA, prefix + + +def get_1000genomes_hapmap3( + config_path: str | Path = "./cellink/resources/config/1000genomes.yaml", + data_home: str | Path | None = None, + refresh: bool = False, +) -> Path: + """ + Download the HapMap3 SNP list (no MHC region). + + Used as ``--print-snps`` when computing per-annotation LD scores to + restrict output to well-imputed HapMap3 SNPs, and as ``--merge-alleles`` + during sumstats munging. + + Downloaded from Zenodo record 10515792 (``hm3_no_MHC.list.txt``). + + Parameters + ---------- + config_path : str or pathlib.Path + Path to YAML configuration file. + data_home : str or pathlib.Path, optional + Root directory where data will be stored. + refresh : bool, default=False + If True, re-downloads the file even if it already exists. + + Returns + ------- + pathlib.Path + Path to the downloaded ``hm3_no_MHC.list.txt`` file. + + Examples + -------- + >>> hapmap3_snps = get_1000genomes_hapmap3() + >>> print_snps = str(hapmap3_snps) # passed as --print-snps to ldsc.py + """ + data_home = get_data_home(data_home) + DATA = data_home / "1000genomes_hapmap3" + DATA.mkdir(exist_ok=True) + + config = _load_config(config_path) + dest = DATA / config["hapmap3"]["filename"] + + _download_file(config["hapmap3"]["url"], dest, checksum=None) + + return dest + if __name__ == "__main__": annot, ldscores, prefix = get_1000genomes_ld_scores(population="EUR") annot, ldscores, prefix = get_1000genomes_ld_scores(population="EAS") @@ -284,3 +392,8 @@ def get_1000genomes_plink_files( plink_files, prefix = get_1000genomes_plink_files(population="EUR") plink_files, prefix = get_1000genomes_plink_files(population="EAS") + + frq, prefix = get_1000genomes_frq(population="EUR") + frq, prefix = get_1000genomes_frq(population="EAS") + + hapmap3 = get_1000genomes_hapmap3() diff --git a/src/cellink/resources/config/1000genomes.yaml b/src/cellink/resources/config/1000genomes.yaml index ab95be2..5e838c0 100644 --- a/src/cellink/resources/config/1000genomes.yaml +++ b/src/cellink/resources/config/1000genomes.yaml @@ -155,3 +155,12 @@ ld_weights: EAS: filename: "1000G_Phase3_EAS_weights_hm3_no_MHC.tgz" url: "https://zenodo.org/records/10515792/files/1000G_Phase3_EAS_weights_hm3_no_MHC.tgz?download=1" +frq: + prefix: 1000G.EUR.QC. + EUR: + filename: "1000G_Phase3_frq.tgz" + url: "https://zenodo.org/records/10515792/files/1000G_Phase3_frq.tgz?download=1" +hapmap3: + filename: "hm3_no_MHC.list.txt" + url: "https://zenodo.org/records/10515792/files/hm3_no_MHC.list.txt?download=1" + \ No newline at end of file diff --git a/src/cellink/tl/_runner.py b/src/cellink/tl/_runner.py index 619f010..3d68668 100644 --- a/src/cellink/tl/_runner.py +++ b/src/cellink/tl/_runner.py @@ -133,7 +133,13 @@ def _build_container_command(self, base_command: str, file_paths: list[str] = No for host_path, container_path in volumes.items(): bind_args.extend(["-B", f"{host_path}:{container_path}"]) - cmd = ["singularity", "exec", *bind_args, self.config["singularity_image"], container_command] + # Inject overlay if the overlay patch strategy was used + overlay_args = [] + if self.config.get("_ldsc_overlay_path"): + overlay_args = ["--overlay", f"{self.config['_ldsc_overlay_path']}:ro"] + image = self.config.get("singularity_image", "") + cmd = ["singularity", "exec", *bind_args, *overlay_args, image, container_command] + return " ".join(cmd) return base_command @@ -161,6 +167,17 @@ def _rewrite_paths_in_command(self, command: str, volumes: dict[str, str]) -> st def run_command(self, base_command: str, file_paths: list[str] = None, check: bool = True): """ Execute command with automatic path inference + + For Singularity, three patch modes are handled transparently: + + - **overlay**: if ``_ldsc_overlay_path`` is in config (set by + ``check_and_patch_ldsc_parse_bug`` with ``singularity_patch_strategy="overlay"``), + ``--overlay :ro`` is injected into the ``singularity exec`` call + so the patched ``parse.py`` is always active. + - **sandbox / rebuild**: ``singularity_image`` is updated in the runner + config to point at the sandbox directory or rebuilt SIF, so + ``_build_container_command`` picks it up automatically — no special + handling needed here. Parameters ---------- diff --git a/src/cellink/tl/external/__init__.py b/src/cellink/tl/external/__init__.py index 293489d..fae51e6 100644 --- a/src/cellink/tl/external/__init__.py +++ b/src/cellink/tl/external/__init__.py @@ -16,7 +16,7 @@ make_annot_from_donor_data, munge_sumstats, ) -from ._ldsc2magma import genesets_dir_to_entrez_gmt, load_ensembl_to_entrez_map +from ._ldsc2magma import genesets_dir_to_entrez_gmt from ._magma import run_magma_pipeline from ._pc import calculate_pcs from ._saigeqtl import ( @@ -28,8 +28,43 @@ ) from ._scdrs import run_scdrs from ._seismic import run_seismic +from ._gsmap import load_gsmap_results, format_gsmap_sumstats +from ._magma import run_magma_pipeline from ._sldsc_utils import generate_gene_coord_file, generate_sldsc_genesets, preprocess_for_sldsc from ._tensorqtl import read_tensorqtl_results, run_tensorqtl +from ._sclinker import ( + compute_celltype_programs, + compute_diseaseprogression_programs, + compute_nmf_programs, + compute_joint_nmf_programs, + #geneprogram_to_bedgraph, + #bedgraph_to_snp_annotation, + #run_sclinker_sldsc, + #load_sclinker_results, + #compute_escore, + #download_sclinker_references, + #load_enhancer_links, + #run_sclinker_pipeline, +) +from ._sclinker_utils import ( + run_sclinker_heritability, + download_sclinker_enhancer_links, + download_sclinker_references, + load_roadmap_links, + load_abc_links, + load_gene_annotation, + genescores_to_abc_road_bedgraph, + genescores_to_100kb_bedgraph, + bedgraph_to_snp_annotation, + genescores_to_annotations, + compute_ld_scores_for_sclinker, + #run_sclinker_sldsc, + load_sclinker_heritability_results, + compute_escore, + compute_ppi_gene_scores, + check_and_patch_ldsc_parse_bug, +) +from ._joint_nmf import JointNMFWrapper __all__ = [ "read_jaxqtl_results", @@ -49,6 +84,35 @@ "read_saigeqtl_results", "make_group_file", "run_saigeqtl", + ### + "compute_celltype_programs", + "compute_diseaseprogression_programs", + "compute_nmf_programs", + "compute_joint_nmf_programs", + "geneprogram_to_bedgraph", + "bedgraph_to_snp_annotation", + "run_sclinker_sldsc", + "load_sclinker_results", + "compute_escore", + "download_sclinker_references", + "load_enhancer_links", + "run_sclinker_pipeline", + "download_sclinker_references", + "load_roadmap_links", + "load_abc_links", + "load_gene_annotation", + "genescores_to_abc_road_bedgraph", + "genescores_to_100kb_bedgraph", + "bedgraph_to_snp_annotation", + "genescores_to_annotations", + "compute_ld_scores_for_sclinker", + "run_sclinker_sldsc", + "load_sclinker_heritability_results", + "compute_escore", + "compute_ppi_gene_scores", + "check_and_patch_ldsc_parse_bug", + ### + "JointNMFWrapper", ] diff --git a/src/cellink/tl/external/_joint_nmf.py b/src/cellink/tl/external/_joint_nmf.py new file mode 100644 index 0000000..8cfe5e8 --- /dev/null +++ b/src/cellink/tl/external/_joint_nmf.py @@ -0,0 +1,294 @@ +""" +Pure-Python port of the Joint NMF implementation from the sc-linker paper. + +Reference: Jagadeesh*, Dey* et al., Nature Genetics 2022. +Original code: https://github.com/karthikj89/scgenetics/blob/master/jointNMF.py + +The JointNMFWrapper class provides a clean interface over the original +multiplicative update rules. It decomposes two matrices (healthy H and +disease D) simultaneously: + + H ≈ [L_shared_H | L_unique_H] × F_H + D ≈ [L_shared_D | L_unique_D] × F_D + +with a coupling term γ/2 ||L_shared_H − L_shared_D||² that encourages +shared programs to align across conditions. +""" + +from __future__ import annotations + +import logging +from time import time +from typing import Optional, Tuple + +import numpy as np +from scipy import sparse +from scipy.sparse import issparse +from sklearn.decomposition import NMF +from sklearn.utils.extmath import safe_sparse_dot + +logger = logging.getLogger(__name__) + +_LARGE = 1e100 +_SMALL = 1e-10 + + +class JointNMFWrapper: + """ + Joint NMF decomposition for paired healthy / disease single-cell data. + + Parameters + ---------- + Xh + Dense matrix (cells × genes) for the healthy condition. + Xd + Dense matrix (cells × genes) for the disease condition. + Must share the same gene dimension as ``Xh``. + n_shared + Number of programs shared between healthy and disease (KC in paper). + n_healthy_specific + Number of programs unique to the healthy condition (KH in paper). + n_disease_specific + Number of programs unique to the disease condition (KD in paper). + gamma + Coupling strength. Higher values force shared programs to align more. + mu + L2 regularisation on loading matrices. If None, estimated from data. + n_init + Number of random NMF initialisations; best reconstruction is kept. + max_iters + Maximum multiplicative-update iterations. + tol + Convergence tolerance on relative change in cost. + random_state + Random seed for reproducibility. + """ + + def __init__( + self, + Xh: np.ndarray, + Xd: np.ndarray, + *, + n_shared: int = 10, + n_healthy_specific: int = 5, + n_disease_specific: int = 5, + gamma: float = 1.0, + mu: Optional[float] = None, + n_init: int = 5, + max_iters: int = 1000, + tol: float = _SMALL, + random_state: int = 0, + ): + self.n_shared = n_shared + self.n_healthy_specific = n_healthy_specific + self.n_disease_specific = n_disease_specific + self.gamma = gamma + self.max_iters = max_iters + self.tol = tol + + # Normalise to [0, 1] + self.Xh = sparse.csr_matrix(Xh, dtype=np.float64) + self.Xd = sparse.csr_matrix(Xd, dtype=np.float64) + _max_h = self.Xh.max() + _max_d = self.Xd.max() + if _max_h > 0: + self.Xh = self.Xh / _max_h + if _max_d > 0: + self.Xd = self.Xd / _max_d + + # Initialise with NMF (best of n_init starts) + nh_total = n_shared + n_healthy_specific + nd_total = n_shared + n_disease_specific + + self.Wh, self.Hh = self._best_nmf(self.Xh, nh_total, n_init, random_state) + self.Wd, self.Hd = self._best_nmf(self.Xd, nd_total, n_init, random_state + 1) + + # Align shared columns between Wh and Wd + self._align() + + # Estimate mu if not provided + if mu is None: + diff_h = 0.5 * self._frob(self.Xh - self.Wh.dot(self.Hh)) ** 2 + diff_d = 0.5 * self._frob(self.Xd - self.Wd.dot(self.Hd)) ** 2 + denom = self._frob(self.Wh) ** 2 + self._frob(self.Wd) ** 2 + self.mu = (diff_h + diff_d) / (denom + _SMALL) + else: + self.mu = mu + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def fit(self) -> "JointNMFWrapper": + """Run multiplicative updates until convergence.""" + t0 = time() + chi2 = self._cost() + old_chi2 = _LARGE + niter = 0 + + while niter < self.max_iters and abs((old_chi2 - chi2) / old_chi2) > self.tol: + self._update_Wh() + self._update_Hh() + self._update_Wd() + self._update_Hd() + + old_chi2 = chi2 + chi2 = self._cost() + + if not np.isfinite(chi2): + raise ValueError("Joint NMF diverged (NaN/Inf cost). Check input data.") + + if niter % 50 == 0: + logger.debug( + f"JointNMF iter {niter}: cost={chi2:.4f}, " + f"Δ={100*(old_chi2-chi2)/old_chi2:.3f}%" + ) + niter += 1 + + elapsed = (time() - t0) / 60.0 + logger.info(f"Joint NMF converged after {niter} iterations ({elapsed:.2f} min)") + return self + + # ------------------------------------------------------------------ + # Properties: dense factor matrices + # ------------------------------------------------------------------ + + @property + def Wh(self) -> np.ndarray: + """Healthy cell × factor loadings (dense).""" + return self._Wh.toarray() if issparse(self._Wh) else self._Wh + + @Wh.setter + def Wh(self, value): + self._Wh = sparse.csr_matrix(value) + + @property + def Wd(self) -> np.ndarray: + return self._Wd.toarray() if issparse(self._Wd) else self._Wd + + @Wd.setter + def Wd(self, value): + self._Wd = sparse.csr_matrix(value) + + @property + def Hh(self) -> np.ndarray: + return self._Hh.toarray() if issparse(self._Hh) else self._Hh + + @Hh.setter + def Hh(self, value): + self._Hh = sparse.csr_matrix(value) + + @property + def Hd(self) -> np.ndarray: + return self._Hd.toarray() if issparse(self._Hd) else self._Hd + + @Hd.setter + def Hd(self, value): + self._Hd = sparse.csr_matrix(value) + + # ------------------------------------------------------------------ + # Private: cost and updates + # ------------------------------------------------------------------ + + def _cost(self) -> float: + Wshh = self._Wh[:, : self.n_shared] + Wshd = self._Wd[:, : self.n_shared] + d1 = 0.5 * self._frob(self.Xh - safe_sparse_dot(self._Wh, self._Hh)) ** 2 + d2 = 0.5 * self._frob(self.Xd - safe_sparse_dot(self._Wd, self._Hd)) ** 2 + d3 = (self.mu / 2) * (self._frob(self._Wh) ** 2 + self._frob(self._Wd) ** 2) + d4 = (self.gamma / 2) * self._frob(Wshh - Wshd) ** 2 + return float(d1 + d2 + d3 + d4) + + def _update_Wh(self): + scale = np.append( + (self.gamma + self.mu) * np.ones(self.n_shared), + self.mu * np.ones(self.n_healthy_specific), + ) + Wshd = self._Wd[:, : self.n_shared] + num1 = safe_sparse_dot(self.Xh, self._Hh.T) + zeros = sparse.csr_matrix( + np.zeros((self.Xh.shape[0], self.n_healthy_specific)) + ) + num2 = sparse.hstack([Wshd.multiply(self.gamma), zeros]) + den = ( + safe_sparse_dot(self._Wh, safe_sparse_dot(self._Hh, self._Hh.T)) + + safe_sparse_dot(self._Wh, np.diag(scale)) + ) + self._Wh = self._Wh.multiply((num1 + num2) / (den + _SMALL)).tocsr() + + def _update_Hh(self): + num = safe_sparse_dot(self._Wh.T, self.Xh) + den = safe_sparse_dot(safe_sparse_dot(self._Wh.T, self._Wh), self._Hh) + self._Hh = self._Hh.multiply(num / (den + _SMALL)).tocsr() + + def _update_Wd(self): + scale = np.append( + (self.gamma + self.mu) * np.ones(self.n_shared), + self.mu * np.ones(self.n_disease_specific), + ) + Wshh = self._Wh[:, : self.n_shared] + num1 = safe_sparse_dot(self.Xd, self._Hd.T) + zeros = np.zeros((self.Xd.shape[0], self.n_disease_specific)) + num2 = sparse.hstack([Wshh.multiply(self.gamma), sparse.csr_matrix(zeros)]) + den = ( + safe_sparse_dot(self._Wd, safe_sparse_dot(self._Hd, self._Hd.T)) + + safe_sparse_dot(self._Wd, np.diag(scale)) + ) + self._Wd = self._Wd.multiply((num1 + num2) / (den + _SMALL)).tocsr() + + def _update_Hd(self): + num = safe_sparse_dot(self._Wd.T, self.Xd) + den = safe_sparse_dot(safe_sparse_dot(self._Wd.T, self._Wd), self._Hd) + self._Hd = self._Hd.multiply(num / (den + _SMALL)).tocsr() + + # ------------------------------------------------------------------ + # Private: initialisation helpers + # ------------------------------------------------------------------ + + @staticmethod + def _best_nmf( + X: sparse.spmatrix, n_components: int, n_init: int, seed: int + ) -> Tuple[sparse.spmatrix, sparse.spmatrix]: + best_err = _LARGE + best_W = best_H = None + for i in range(n_init): + model = NMF(n_components=n_components, random_state=seed + i) + W = model.fit_transform(X) + if model.reconstruction_err_ < best_err: + best_err = model.reconstruction_err_ + best_W = sparse.csr_matrix(W) + best_H = sparse.csr_matrix(model.components_) + return best_W, best_H + + def _align(self): + """Reorder columns of Wh and Wd so shared programs are best-matched.""" + Wh_dense = self._Wh.toarray() + Wd_dense = self._Wd.toarray() + + corr = np.corrcoef(Wh_dense.T, Wd_dense.T) + n_h = Wh_dense.shape[1] + n_d = Wd_dense.shape[1] + corr = corr[:n_h, n_h:] # shape (n_h, n_d) + + reorder_h, reorder_d = [], [] + corr_copy = corr.copy() + for _ in range(min(n_h, n_d)): + argmax = np.argmax([corr_copy[i, j] for i, j in enumerate(corr_copy.argmax(axis=1))]) + i = argmax + j = corr_copy.argmax(axis=1)[argmax] + reorder_h.append(i) + reorder_d.append(j) + corr_copy[i, :] = -2 + corr_copy[:, j] = -2 + + reorder_h += [k for k in range(n_h) if k not in reorder_h] + reorder_d += [k for k in range(n_d) if k not in reorder_d] + + self._Wh = sparse.csr_matrix(Wh_dense[:, reorder_h]) + self._Hh = sparse.csr_matrix(self._Hh.toarray()[reorder_h, :]) + self._Wd = sparse.csr_matrix(Wd_dense[:, reorder_d]) + self._Hd = sparse.csr_matrix(self._Hd.toarray()[reorder_d, :]) + + @staticmethod + def _frob(M) -> float: + return sparse.linalg.norm(M, ord="fro") if issparse(M) else np.linalg.norm(M, "fro") \ No newline at end of file diff --git a/src/cellink/tl/external/_ldsc.py b/src/cellink/tl/external/_ldsc.py index dcc152f..ea626c1 100644 --- a/src/cellink/tl/external/_ldsc.py +++ b/src/cellink/tl/external/_ldsc.py @@ -12,7 +12,67 @@ class LDSCRunner(BaseToolRunner): - """LDSC Runner with support for local, docker, and singularity""" + """ + LDSC Runner with support for local, Docker, and Singularity. + + Configuration keys + ------------------ + execution_mode : str + One of ``"local"``, ``"docker"``, ``"singularity"``. + ldsc_command : str + Command name / path for ``ldsc.py``. + make_annot_command : str + Command name / path for ``make_annot.py``. + munge_command : str + Command name / path for ``munge_sumstats.py``. + parse_script : str, optional + **Explicit path to** ``ldscore/parse.py`` **inside the container or + local install.** Setting this is strongly recommended for Docker / + Singularity setups where auto-discovery via ``PATH`` is unreliable. + + Examples:: + + # Singularity (common default layout) + "parse_script": "/ldsc/ldscore/parse.py" + + # Docker (zijingliu/ldsc image) + "parse_script": "/ldsc/ldscore/parse.py" + + # Local conda env + "parse_script": "/opt/conda/envs/ldsc/lib/python3.8/site-packages/ldsc/ldscore/parse.py" + + Used by :func:`~cellink.tl.external._sldsc_utils.check_and_patch_ldsc_parse_bug` + to locate and patch the pandas column-sort bug (ldsc issue #342). + docker_image : str, optional + Docker image name (required when ``execution_mode="docker"``). + singularity_image : str, optional + Path to Singularity SIF image (required when + ``execution_mode="singularity"``). + singularity_patch_strategy : str, optional + How to apply the parse.py bug fix for Singularity images. + One of: + + ``"overlay"`` *(default)* + Creates a persistent ext3 overlay image at + ``singularity_overlay_path`` and mounts it read-only on every + ``ldsc.py`` call. No root required; HPC-friendly. + ``"sandbox"`` + Converts the SIF to a writable sandbox directory once; uses the + sandbox for all subsequent calls. No rebuild needed. + ``"rebuild"`` + Converts to sandbox, patches, rebuilds a new SIF. Original SIF + is backed up as ``.bak.sif``. Requires build privileges or + ``--fakeroot``. + + singularity_overlay_path : str, optional + Path for the ext3 overlay image (``"overlay"`` strategy). + Defaults to ``~/.cellink/ldsc_overlay.img``. + singularity_overlay_size_mb : int, optional + Size in MB for a newly created overlay image. Default 256. + singularity_sandbox_path : str, optional + Path for the sandbox directory (``"sandbox"`` / ``"rebuild"`` + strategies). Defaults to ``.sandbox/``. + """ def __init__(self, config_path: str | None = None, config_dict: dict | None = None): required_fields = ["execution_mode", "ldsc_command", "make_annot_command", "munge_command"] @@ -32,6 +92,11 @@ def _load_config(self, config_path: str | None, config_dict: dict | None) -> dic "ldsc_command": "ldsc.py", "make_annot_command": "make_annot.py", "munge_command": "munge_sumstats.py", + "parse_script": "ldscore/parse.py", + "singularity_patch_strategy": "overlay", + "singularity_overlay_path": None, + "singularity_overlay_size_mb": 256, + "singularity_sandbox_path": None, } @property @@ -49,6 +114,10 @@ def munge_command(self) -> str: @property def execution_mode(self) -> str: return self.config["execution_mode"] + + @property + def parse_script(self) -> str | None: + return self.config.get("parse_script") _ldsc_runner = None diff --git a/src/cellink/tl/external/_sclinker.py b/src/cellink/tl/external/_sclinker.py new file mode 100644 index 0000000..8be39f6 --- /dev/null +++ b/src/cellink/tl/external/_sclinker.py @@ -0,0 +1,793 @@ +""" +sc-linker: Integrating single-cell RNA-seq, epigenomic maps and GWAS summary statistics +to infer disease-critical cell types and cellular processes. + +Based on: Jagadeesh*, Dey* et al., Nature Genetics 2022. +https://doi.org/10.1038/s41588-022-01187-9 + +Pipeline overview +----------------- +Step 1 – Gene programs + compute_celltype_programs() : Wilcoxon DE per cell type vs rest → gene scores + compute_diseaseprogression_programs(): Disease vs healthy DE per cell type → gene scores + compute_nmf_programs() : NMF latent factors → gene scores + compute_joint_nmf_programs() : Joint healthy/disease NMF → gene scores + +Step 2 – Gene programs → SNP annotations + geneprogram_to_bedgraph() : Gene scores × enhancer-gene links → bedgraph + bedgraph_to_snp_annotation() : bedgraph × BIM file → .annot.gz + +Step 3 – S-LDSC heritability enrichment + Delegates to cellink.tl.external._ldsc and _sldsc_utils, which already exist. + run_sclinker_sldsc() : Run S-LDSC for all programs/traits + load_sclinker_results() : Parse postprocess files → DataFrame + compute_escore() : Compute E-score (program − all-protein-coding) +""" + +from __future__ import annotations + +import logging +import os +import re +import subprocess +import tempfile +from pathlib import Path +from typing import Dict, List, Literal, Optional, Tuple, Union + +import numpy as np +import pandas as pd +import scanpy as sc +import scipy.stats +from anndata import AnnData +from collections import Counter +from scipy import sparse + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Public constants +# --------------------------------------------------------------------------- + +ENHANCER_TISSUES = Literal[ + "BLD", "BRN", "GI", "LNG", "LIV", "KID", "SKIN", "FAT", "HRT", "ALL" +] + +ENHANCER_TISSUE_MAP = { + "BLD": "Blood", + "BRN": "Brain", + "GI": "Colon/Intestine", + "LNG": "Lung", + "LIV": "Liver", + "KID": "Kidney", + "SKIN": "Skin", + "FAT": "Adipose", + "HRT": "Heart", + "ALL": "All tissues (non-specific)", +} + + +# --------------------------------------------------------------------------- +# Step 1a: Cell-type gene programs +# --------------------------------------------------------------------------- + +def compute_celltype_programs( + adata: AnnData, + celltype_col: str, + *, + min_cells_per_type: int = 10, + use_raw: bool = False, + method: str = "wilcoxon", + prefix: str = "celltype", + out_dir: str | Path | None = None, + save: bool = True, +) -> Dict[str, pd.DataFrame]: + """ + Compute cell-type gene programs by differential expression (one vs rest). + + Each gene receives a probabilistic score in [0, 1] derived from the + Wilcoxon rank-sum test p-value, following the sc-linker paper: + + X = -2 * log(p_adj) [chi2_2 distributed] + score = (X - min(X)) / (max(X) - min(X)) + + Parameters + ---------- + adata + AnnData with log-normalised expression in ``adata.X``. + celltype_col + Column in ``adata.obs`` with cell-type labels. + min_cells_per_type + Cell types with fewer cells are skipped. + use_raw + Passed to ``sc.tl.rank_genes_groups``. + method + DE method; ``"wilcoxon"`` is recommended. + prefix + Prefix for the DE key stored in ``adata.uns``. + out_dir + If provided and ``save=True``, saves CSV files here. + save + Whether to write output CSV files. + + Returns + ------- + dict + ``{"pval": DataFrame, "logfold": DataFrame, "score": DataFrame, + "genescores": DataFrame}`` + All DataFrames are (genes × cell_types). + """ + if celltype_col not in adata.obs.columns: + raise ValueError(f"'{celltype_col}' not found in adata.obs") + + de_key = f"{celltype_col}_DE" + + # Filter rare cell types + counts = Counter(adata.obs[celltype_col]) + adata.obs[f"{celltype_col}_counts"] = [counts[ct] for ct in adata.obs[celltype_col]] + adata_filtered = adata[adata.obs[f"{celltype_col}_counts"] > min_cells_per_type].copy() + + logger.info( + f"Running rank_genes_groups for {celltype_col} " + f"({adata_filtered.n_obs} cells after filtering)" + ) + sc.tl.rank_genes_groups( + adata_filtered, + celltype_col, + key_added=de_key, + use_raw=use_raw, + method=method, + n_genes=adata_filtered.n_vars, + ) + # Propagate back to full adata + adata.uns[de_key] = adata_filtered.uns[de_key] + + results = _extract_de_matrices(adata, de_key, label_col=celltype_col) + genescores = _compute_genescores(results["score"]) + + # If var_names look like ENSG IDs, try to map to HGNC via adata.var + if (genescores.index.str.startswith("ENSG").mean() > 0.5 + and "gene_name" in adata.var.columns): + gene_name_map = adata.var["gene_name"].dropna().to_dict() + # Map ENSG → HGNC for all DataFrames + for key in list(results.keys()): + results[key].index = results[key].index.map( + lambda g: gene_name_map.get(g, g) + ) + genescores = results["score"].copy() + genescores = _compute_genescores(genescores) + logger.info( + "Mapped var_names from ENSG IDs to HGNC gene names using adata.var['gene_name']. " + "This is required for matching against Roadmap/ABC TargetGene columns." + ) + elif genescores.index.str.startswith("ENSG").mean() > 0.5: + logger.warning( + "var_names appear to be ENSG IDs but no 'gene_name' column found in adata.var. " + "The Roadmap/ABC TargetGene column uses HGNC gene names, so annotation will " + "produce empty results unless var_names are HGNC symbols. " + "Add HGNC names to adata.var['gene_name'] before calling this function, " + "or ensure adata.var_names are already HGNC symbols." + ) + + results["genescores"] = genescores + + if save and out_dir is not None: + out_dir = Path(out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + for key, df in results.items(): + df.to_csv(out_dir / f"{prefix}_{key}.csv") + logger.info(f"Saved cell-type program matrices to {out_dir}") + + return results + + +# --------------------------------------------------------------------------- +# Step 1b: Disease-progression gene programs +# --------------------------------------------------------------------------- + +def compute_diseaseprogression_programs( + adata: AnnData, + celltype_col: str, + diagnosis_col: str, + healthy_label: str, + disease_label: str, + *, + min_cells_per_group: int = 5, + use_raw: bool = False, + method: str = "wilcoxon", + prefix: str = "disease", + out_dir: str | Path | None = None, + save: bool = True, +) -> Dict[str, pd.DataFrame]: + """ + Compute disease-progression gene programs. + + For each cell type present in both healthy and disease tissue, performs + DE (disease cells of that type vs healthy cells of that type). + Contamination genes (outlier low-score genes from global disease DE) are + zeroed out before normalisation, following the sc-linker approach. + + Parameters + ---------- + adata + AnnData with both healthy and disease cells. + celltype_col + Column in ``adata.obs`` with cell-type labels. + diagnosis_col + Column in ``adata.obs`` with diagnosis / disease status. + healthy_label + Value in ``diagnosis_col`` that denotes healthy samples. + disease_label + Value in ``diagnosis_col`` that denotes disease samples. + min_cells_per_group + Minimum cells in each (healthy/disease × cell type) group. + use_raw + Passed to ``sc.tl.rank_genes_groups``. + method + DE method. + prefix + Prefix for output file names. + out_dir + Directory to write CSV output files. + save + Whether to write output CSV files. + + Returns + ------- + dict + Same structure as :func:`compute_celltype_programs`. + """ + if celltype_col not in adata.obs.columns: + raise ValueError(f"'{celltype_col}' not found in adata.obs") + if diagnosis_col not in adata.obs.columns: + raise ValueError(f"'{diagnosis_col}' not found in adata.obs") + + disease_label_mapping = {healthy_label: "Healthy", disease_label: "Disease"} + + # Compute contamination from global disease-only DE + disease_subset = adata[adata.obs[diagnosis_col] == disease_label].copy() + sc.tl.rank_genes_groups( + disease_subset, + groupby=celltype_col, + reference="rest", + n_genes=disease_subset.n_vars, + method=method, + use_raw=use_raw, + ) + contamination = _compute_contamination(disease_subset, celltype_col) + adata.uns[f"contamination_{celltype_col}"] = contamination + + # Build DEstatus column: "Healthy_" or "Disease_" + adata.obs["_DEstatus"] = [ + disease_label_mapping.get(diag, "Unknown") + "_" + ct + for diag, ct in zip(adata.obs[diagnosis_col], adata.obs[celltype_col]) + ] + destatus_counts = Counter(adata.obs["_DEstatus"]) + + cell_types = sorted(set(adata.obs[celltype_col])) + processed_cell_types = [] + + for ct in cell_types: + healthy_key = f"Healthy_{ct}" + disease_key = f"Disease_{ct}" + n_h = destatus_counts.get(healthy_key, 0) + n_d = destatus_counts.get(disease_key, 0) + if n_h < min_cells_per_group or n_d < min_cells_per_group: + logger.debug(f"Skipping {ct}: healthy={n_h}, disease={n_d}") + continue + + de_key = f"{ct}_DE" + logger.info(f"Computing disease-progression DE for {ct} (H={n_h}, D={n_d})") + sc.tl.rank_genes_groups( + adata, + groupby="_DEstatus", + reference=healthy_key, + groups=[disease_key], + key_added=de_key, + n_genes=adata.n_vars, + method=method, + use_raw=use_raw, + ) + processed_cell_types.append(ct) + + # Collect all DE results + all_de_keys = [f"{ct}_DE" for ct in processed_cell_types] + results = _extract_de_matrices_disease(adata, all_de_keys, contamination, celltype_col) + genescores = _compute_genescores(results["score"]) + results["genescores"] = genescores + + # Clean up temporary column + adata.obs.drop(columns=["_DEstatus"], inplace=True, errors="ignore") + + if save and out_dir is not None: + out_dir = Path(out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + for key, df in results.items(): + df.to_csv(out_dir / f"{prefix}_{key}.csv") + logger.info(f"Saved disease-progression program matrices to {out_dir}") + + return results + + +# --------------------------------------------------------------------------- +# Step 1c: NMF cellular process programs (healthy) +# --------------------------------------------------------------------------- + +def compute_nmf_programs( + adata: AnnData, + *, + n_components: Optional[int] = None, + n_extra: int = 10, + celltype_col: str = "cell_type", + layer: Optional[str] = "counts", + normalize: bool = True, + random_state: int = 0, + device: str = "cuda", + prefix: str = "nmf", + out_dir: str | Path | None = None, + save: bool = True, +) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + """ + Compute NMF cellular process programs (single healthy tissue). + + Parameters + ---------- + adata + AnnData. Raw counts preferred (from ``layer`` or ``adata.X``). + n_components + Number of NMF factors. Defaults to ``n_cell_types + n_extra``. + n_extra + Added to the number of annotated cell types when auto-setting components. + celltype_col + Column in ``adata.obs`` for auto-setting ``n_components``. + layer + Layer to use. If None, uses ``adata.X``. + normalize + Divide each matrix by its global maximum (as in original sc-linker). + random_state + NMF random seed. + device : str, default ``"cuda"`` + Device for torchnmf backend: ``"cuda"`` or ``"cpu"``. + + - ``"cuda"`` — uses GPU if available, raises a clear warning if CUDA is + not found and falls back to CPU. + - ``"cpu"`` — forces CPU even if a GPU is present. + + If ``torchnmf`` is not installed at all, cellink logs an install hint + and falls back to sklearn NMF (which is slower but always available). + prefix + File name prefix for output CSVs. + out_dir + Directory for output CSVs. + save + Whether to save CSVs. + + Returns + ------- + W : DataFrame + Cell × factor (cell programs), index = obs_names. + H : DataFrame + Gene × factor (gene programs), index = var_names. + corr : DataFrame + Gene × factor (Pearson correlation between gene expression and W scores). + + Notes + ----- + Backend priority: + + 1. **torchnmf** (GPU or CPU) — install with ``pip install torchnmf``. + On large matrices (>50k cells) this is 5–20× faster than sklearn. + 2. **sklearn NMF** with ``init='nndsvda'`` + ``solver='cd'`` — always + available. Significantly faster than ``init='random'`` but still slow + on very large matrices. + """ + from sklearn.decomposition import NMF + + if n_components is None: + n_ct = len(set(adata.obs.get(celltype_col, []))) if celltype_col in adata.obs else 10 + n_components = n_ct + n_extra + logger.info(f"Setting n_components = {n_ct} cell types + {n_extra} = {n_components}") + + X = adata.layers[layer] if layer and layer in adata.layers else adata.X + if sparse.issparse(X): + X = X.toarray() + X = X.astype(np.float64) + if normalize: + X = X / (np.max(X) + 1e-12) + + logger.info(f"Fitting NMF with {n_components} components on {X.shape} matrix") + + # ── Backend 1: torchnmf ──────────────────────────────────────────────── + W_arr = None + H_arr = None + try: + import torch + from torchnmf.nmf import NMF as TorchNMF + except ImportError: + logger.warning( + "torchnmf is not installed — falling back to sklearn NMF, which is " + "significantly slower on large matrices (>50k cells).\n" + "Install the faster backend with:\n" + " pip install torchnmf" + ) + torch = None # type: ignore + + if torch is not None: + # Resolve the requested device + if device == "cuda": + if torch.cuda.is_available(): + _device = "cuda" + logger.info( + f"torchnmf: using GPU ({torch.cuda.get_device_name(0)})" + ) + else: + logger.warning( + "device='cuda' requested but no CUDA GPU found. " + "Falling back to CPU for torchnmf. " + "Pass device='cpu' explicitly to suppress this warning." + ) + _device = "cpu" + else: + _device = "cpu" + logger.info("torchnmf: using CPU (device='cpu' requested)") + + X_t = torch.tensor(X, dtype=torch.float32, device=_device) + model_t = TorchNMF(X_t.shape, rank=n_components).to(_device) + model_t.fit(X_t, beta=2, max_iter=200, tol=1e-4) + W_arr = model_t.H.T.detach().cpu().numpy() # (n_cells, n_components) + H_arr = model_t.W.detach().cpu().numpy() # (n_features, n_components) + del X_t, model_t + + # ── Backend 2: sklearn NMF (always available, slower) ───────────────── + if W_arr is None: + logger.info( + "Using sklearn NMF with init='nndsvda' + solver='cd'. " + "This is slower than torchnmf on large matrices. " + "Install torchnmf for GPU-accelerated NMF: pip install torchnmf" + ) + model = NMF( + n_components=n_components, + init="nndsvda", # truncated-SVD warm start — ~5-10x faster than "random" + solver="cd", # coordinate descent — faster than multiplicative update + max_iter=500, + tol=1e-4, + random_state=random_state, + ) + W_arr = model.fit_transform(X) + H_arr = model.components_.T # (n_features, n_components) + + W = pd.DataFrame(W_arr, index=adata.obs_names, columns=[f"NMF_{i}" for i in range(n_components)]) + H = pd.DataFrame(H_arr, index=adata.var_names, columns=[f"NMF_{i}" for i in range(n_components)]) + corr = _compute_nmf_gene_correlations(X, W_arr, adata.var_names, W.columns) + + if save and out_dir is not None: + out_dir = Path(out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + W.to_csv(out_dir / f"{prefix}_cellprograms.csv") + H.to_csv(out_dir / f"{prefix}_geneprograms.csv") + corr.to_csv(out_dir / f"{prefix}_correlation_cellprograms.csv") + + return W, H, corr + + +# --------------------------------------------------------------------------- +# Step 1d: Joint NMF (healthy + disease cellular processes) +# --------------------------------------------------------------------------- + +def compute_joint_nmf_programs( + adata_healthy: AnnData, + adata_disease: AnnData, + *, + n_shared: int = 10, + n_healthy_specific: int = 5, + n_disease_specific: int = 5, + gamma: float = 1.0, + layer: Optional[str] = None, + random_state: int = 0, + prefix: str = "joint_nmf", + out_dir: str | Path | None = None, + save: bool = True, +) -> Dict[str, pd.DataFrame]: + """ + Compute joint NMF programs across healthy and disease tissue. + + Decomposes healthy (H) and disease (D) matrices jointly: + + H ≈ [L_shared_H | L_unique_H] × F_H + D ≈ [L_shared_D | L_unique_D] × F_D + + with a coupling term γ/2 ||L_shared_H − L_shared_D||² that forces + the shared programs to be similar. + + Parameters + ---------- + adata_healthy + AnnData for healthy tissue. + adata_disease + AnnData for disease tissue. Must share var_names with ``adata_healthy``. + n_shared + Number of shared programs between healthy and disease. + n_healthy_specific + Number of healthy-specific programs. + n_disease_specific + Number of disease-specific programs. + gamma + Coupling strength (higher → shared programs more similar). + layer + Layer to use as expression matrix. If None, uses ``adata.X``. + random_state + Random seed for NMF initialisation. + prefix + File name prefix for output CSVs. + out_dir + Directory for output CSVs. + save + Whether to save CSVs. + + Returns + ------- + dict with keys: + ``"Wh"`` : healthy cell × factor loadings (shared + healthy-specific) + ``"Wd"`` : disease cell × factor loadings (shared + disease-specific) + ``"Hh"`` : gene × factor weights (healthy) + ``"Hd"`` : gene × factor weights (disease) + ``"shared_Wh"`` : healthy cell × shared-factor loadings + ``"shared_Wd"`` : disease cell × shared-factor loadings + ``"unique_Hh"`` : gene × healthy-specific-factor weights + ``"unique_Hd"`` : gene × disease-specific-factor weights + """ + from ._joint_nmf import JointNMFWrapper + + # Align genes + common_genes = adata_healthy.var_names.intersection(adata_disease.var_names) + if len(common_genes) == 0: + raise ValueError("No overlapping genes between healthy and disease AnnData objects.") + + Xh = _get_dense(adata_healthy[:, common_genes], layer) + Xd = _get_dense(adata_disease[:, common_genes], layer) + + logger.info( + f"Joint NMF: healthy={Xh.shape}, disease={Xd.shape}, " + f"shared={n_shared}, H-specific={n_healthy_specific}, D-specific={n_disease_specific}" + ) + + jnmf = JointNMFWrapper( + Xh=Xh, + Xd=Xd, + n_shared=n_shared, + n_healthy_specific=n_healthy_specific, + n_disease_specific=n_disease_specific, + gamma=gamma, + random_state=random_state, + ) + jnmf.fit() + + n_total_h = n_shared + n_healthy_specific + n_total_d = n_shared + n_disease_specific + + h_cols = [f"Shared_{i}" for i in range(n_shared)] + [f"Healthy_{i}" for i in range(n_healthy_specific)] + d_cols = [f"Shared_{i}" for i in range(n_shared)] + [f"Disease_{i}" for i in range(n_disease_specific)] + + Wh_df = pd.DataFrame(jnmf.Wh, index=adata_healthy.obs_names, columns=h_cols) + Wd_df = pd.DataFrame(jnmf.Wd, index=adata_disease.obs_names, columns=d_cols) + Hh_df = pd.DataFrame(jnmf.Hh.T, index=common_genes, columns=h_cols) + Hd_df = pd.DataFrame(jnmf.Hd.T, index=common_genes, columns=d_cols) + + results = { + "Wh": Wh_df, + "Wd": Wd_df, + "Hh": Hh_df, + "Hd": Hd_df, + "shared_Wh": Wh_df.iloc[:, :n_shared], + "shared_Wd": Wd_df.iloc[:, :n_shared], + "unique_Hh": Hh_df.iloc[:, n_shared:], + "unique_Hd": Hd_df.iloc[:, n_shared:], + } + + if save and out_dir is not None: + out_dir = Path(out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + for key, df in results.items(): + df.to_csv(out_dir / f"{prefix}_{key}.csv") + + return results + + +# --------------------------------------------------------------------------- +# Step 2a: Gene program → bedgraph (SNP-to-gene linking) +# --------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------- +# Step 2b: Bedgraph → SNP annotation (.annot.gz) +# --------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------- +# Step 3: S-LDSC heritability enrichment (delegates to existing cellink code) +# --------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------- +# Loading / postprocessing results +# --------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------- +# Reference data download helpers +# --------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------- +# Full pipeline convenience function +# --------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------- +# Private helpers +# --------------------------------------------------------------------------- + +def _get_dense(adata: AnnData, layer: Optional[str]) -> np.ndarray: + """Return a dense float32 expression matrix from an AnnData layer or X.""" + X = adata.layers[layer] if (layer and layer in adata.layers) else adata.X + if sparse.issparse(X): + return X.toarray().astype(np.float64) + return np.array(X, dtype=np.float64) + + +def _compute_contamination(adata: AnnData, celltype_col: str) -> Dict[str, list]: + """Identify outlier genes from global disease DE (sc-linker paper method).""" + contamination: Dict[str, list] = {} + for ct in set(adata.obs[celltype_col]): + try: + scores = pd.DataFrame(adata.uns["rank_genes_groups"]["scores"])[ct] + names = pd.DataFrame(adata.uns["rank_genes_groups"]["names"])[ct] + threshold = np.mean(scores) - 6 * np.std(scores) + contamination[ct] = names[scores < threshold].tolist() + except (KeyError, TypeError): + contamination[ct] = [] + return contamination + + +def _extract_de_matrices( + adata: AnnData, de_key: str, label_col: str +) -> Dict[str, pd.DataFrame]: + """Extract pval / logfold / score matrices from adata.uns DE results.""" + genes = list(set(adata.var_names)) + gene2idx = {g: i for i, g in enumerate(genes)} + cellsubsets = list(adata.uns[de_key]["names"].dtype.fields.keys()) + + pval_mtx = np.zeros((len(genes), len(cellsubsets))) + logfold_mtx = np.zeros_like(pval_mtx) + score_mtx = np.zeros_like(pval_mtx) + + for gene_row, pval_row, lf_row, sc_row in zip( + adata.uns[de_key]["names"], + adata.uns[de_key]["pvals_adj"], + adata.uns[de_key]["logfoldchanges"], + adata.uns[de_key]["scores"], + ): + for j, cs in enumerate(cellsubsets): + g = gene_row[cs] + if g in gene2idx: + idx = gene2idx[g] + pval_mtx[idx, j] = pval_row[cs] + logfold_mtx[idx, j] = lf_row[cs] + score_mtx[idx, j] = sc_row[cs] + + # Append level suffix so column names match sc-linker convention + level = label_col.split("_")[-1] if "_" in label_col else "2" + col_names = [f"{cs}_L{level}" for cs in cellsubsets] + + return { + "pval": pd.DataFrame(pval_mtx, index=genes, columns=col_names), + "logfold": pd.DataFrame(logfold_mtx, index=genes, columns=col_names), + "score": pd.DataFrame(score_mtx, index=genes, columns=col_names), + } + + +def _extract_de_matrices_disease( + adata: AnnData, + de_keys: List[str], + contamination: Dict[str, list], + celltype_col: str, +) -> Dict[str, pd.DataFrame]: + """Extract DE matrices from disease-progression analysis.""" + genes = list(set(adata.var_names)) + gene2idx = {g: i for i, g in enumerate(genes)} + + all_pval, all_lf, all_sc = [], [], [] + col_names = [] + + for de_key in de_keys: + ct = de_key.replace("_DE", "") + ct_contamination = set(contamination.get(ct, [])) + cellsubsets = list(adata.uns[de_key]["names"].dtype.fields.keys()) + + pval_mtx = np.zeros((len(genes), len(cellsubsets))) + logfold_mtx = np.zeros_like(pval_mtx) + score_mtx = np.zeros_like(pval_mtx) + + for gene_row, pval_row, lf_row, sc_row in zip( + adata.uns[de_key]["names"], + adata.uns[de_key]["pvals_adj"], + adata.uns[de_key]["logfoldchanges"], + adata.uns[de_key]["scores"], + ): + for j, cs in enumerate(cellsubsets): + g = gene_row[cs] + if g not in gene2idx: + continue + idx = gene2idx[g] + if g in ct_contamination: + pval_mtx[idx, j] = 1.0 + # logfold and score remain 0 + else: + pval_mtx[idx, j] = pval_row[cs] + logfold_mtx[idx, j] = lf_row[cs] + score_mtx[idx, j] = sc_row[cs] + + all_pval.append(pd.DataFrame(pval_mtx, index=genes, columns=cellsubsets)) + all_lf.append(pd.DataFrame(logfold_mtx, index=genes, columns=cellsubsets)) + all_sc.append(pd.DataFrame(score_mtx, index=genes, columns=cellsubsets)) + + if not all_pval: + return {"pval": pd.DataFrame(), "logfold": pd.DataFrame(), "score": pd.DataFrame()} + + return { + "pval": pd.concat(all_pval, axis=1), + "logfold": pd.concat(all_lf, axis=1), + "score": pd.concat(all_sc, axis=1), + } + + +def _compute_genescores(score_mtx: pd.DataFrame) -> pd.DataFrame: + """ + Transform score matrix to probabilistic gene scores in [0, 1]. + + Follows sc-linker: clip negative scores to 0, apply survival function of + normal distribution (converts z-score to one-sided p-value), add epsilon, + compute X = -2*log(p), then min-max normalize to [0, 1]. + """ + if score_mtx.empty: + return score_mtx + + clipped = score_mtx.clip(lower=0) + pvals = pd.DataFrame( + scipy.stats.norm.sf(clipped.values), + index=clipped.index, + columns=clipped.columns, + ) + pvals = pvals + 1e-8 + X = -2 * np.log(pvals) + + col_min = X.min(axis=0) + col_max = X.max(axis=0) + denom = col_max - col_min + denom = denom.replace(0, 1.0) + + genescores = (X - col_min) / denom + return genescores + + +def _compute_nmf_gene_correlations( + X: np.ndarray, W: np.ndarray, var_names, nmf_cols +) -> pd.DataFrame: + """Pearson correlation between gene expression and NMF cell program weights.""" + nrow, ncol = X.shape + correlations = [] + Xsp = sparse.csc_matrix(X) + + for i in range(W.shape[1]): + y = W[:, i] + yy = y - y.mean() + ys = yy / (np.sqrt(np.dot(yy, yy)) + 1e-12) + + xm = np.asarray(Xsp.mean(axis=0)).ravel() + xs_sq = np.add.reduceat(Xsp.data ** 2, Xsp.indptr[:-1]) - nrow * xm * xm + xs_sq = np.maximum(xs_sq, 1e-12) + xs = np.sqrt(xs_sq) + + correl = np.add.reduceat(Xsp.data * ys[Xsp.indices], Xsp.indptr[:-1]) / xs + correlations.append(correl) + + return pd.DataFrame(np.vstack(correlations).T, columns=nmf_cols, index=var_names) \ No newline at end of file diff --git a/src/cellink/tl/external/_sclinker_utils.py b/src/cellink/tl/external/_sclinker_utils.py new file mode 100644 index 0000000..f4a71f9 --- /dev/null +++ b/src/cellink/tl/external/_sclinker_utils.py @@ -0,0 +1,2511 @@ +""" +sc-linker Step 2: Gene programs → SNP annotations +================================================== + +This module ports the GSSG `code/GeneSet_toS2G/` pipeline to Python. + +The original pipeline: + 1. geneset_to_bed_sclinker.R → bedgraphs (Roadmap ∪ ABC + 100kb) + 2. clean_bed.sh → bedtools sort + merge to remove overlaps + 3. bedgraph_to_annot.py → SNP annotation (.annot.gz) from .bim + bedgraph + +sc-linker runs TWO strategies per program: + a) ABC_Road_{tissue}: weighted by enhancer activity (Roadmap ∪ ABC links) + b) 100kb: gene body ± 100kb window (unweighted, binary-ish) + +The E-score is: + E_score(program, trait) = Enrichment(program) - Enrichment(AllCoding_control) + +**S-LDSC call used by sc-linker** (from GSSG code/ldsc/run_ldsc_reg.sh): + ldsc.py --h2 {trait}.sumstats + --ref-ld-chr {annotations}/{program}/merged. + --frqfile-chr 1000G_Phase3_frq/1000G.EUR.QC. + --w-ld-chr weights.hm3_noMHC. + --overlap-annot --print-coefficients --print-delete-vals + --out heritability/{program}_h2/{trait}_merged + + NOTE: --h2 (partitioned heritability), NOT --h2-cts (cell-type-specific). + +**LDSC bug note** (ldsc issue #342, PR #341): + When using --overlap-annot with multiple chromosomes, old pandas versions + sort columns alphabetically during pd.concat in parse.py, causing an IndexError. + The sclinker-skg author (yyoshiaki) submitted a fix (PR #341) that is not yet + merged upstream. If your LDSC container uses unpatched ldsc with pandas ≥ 1.0, + this will crash. See ``LDSCPatchChecker`` below. + +Reference data files (download from Broad LDSCORE server): + Roadmap links: LDSCORE/Jagadeesh_Dey_sclinker/extras/Roadmap_{tissue}_E.txt.gz + ABC links: LDSCORE/Jagadeesh_Dey_sclinker/extras/ABCpaper_NasserFulcoEngreitz2020_{tissue}_AvgHiC.txt.gz + Gene coords: LDSCORE/Jagadeesh_Dey_sclinker/extras/gene_annotation.txt (TSS positions for 100kb) +""" + +from __future__ import annotations + +import logging +import os +import re +import shutil +import subprocess +import tempfile +from pathlib import Path +from typing import Dict, List, Literal, Optional, Tuple, Union + +import numpy as np +import pandas as pd + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Tissue codes +# --------------------------------------------------------------------------- + +# Tissue codes used by sc-linker (Jagadeesh*, Dey* et al. Nat. Genet. 2022). +# These correspond to the ABC strategy tissue labels. +TISSUE_CODES = { + "BLD": "Blood", + "BRN": "Brain", + "GI": "Colon/Intestine", + "LNG": "Lung", + "LIV": "Liver", + "KID": "Kidney", + "SKIN": "Skin", + "FAT": "Adipose", + "HRT": "Heart", +} + +# GCS base URL for sc-linker reference files (public, no auth needed). +# Note: the old alkesgroup.broadinstitute.org FTP mirror returns 404. +_SCLINKER_GCS_BASE = ( + "https://storage.googleapis.com/broad-alkesgroup-public" + "/LDSCORE/Jagadeesh_Dey_sclinker/extras" +) + +# --------------------------------------------------------------------------- +# What is actually in the GCS bucket +# (confirmed from directory listing 2024-04): +# +# extras/ +# AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz +# ← all-tissue ABC links (373 MB) +# RoadmapUABCannot_regions_to_genes.txt.gz +# ← all-tissue Roadmap links (207 MB) +# Roadmap_map_EID_names.txt ← EID → tissue name mapping +# RoadmapLinks/links_E001_*.txt, ... ← per-EID raw Roadmap link files +# all_genes/ALL/100kb/, 10kb/, ABC_Road_GI_*/ +# ← precomputed annots (outputs, not inputs) +# ldsc_results_examples/ ← example LDSC results +# +# Per-tissue files like Roadmap_BLD_E.txt.gz are NOT in the bucket. +# They are produced by the GSSG R script (geneset_to_bed_sclinker.R) which +# reads the two large combined files and filters by tissue. +# cellink reproduces this filtering in Python via load_roadmap_links() / +# load_abc_links() after downloading the combined files. +# +# gene_annotation.txt is also NOT in the bucket. +# It is an LDSC gene coordinate file generated by cellink via pybiomart. +# --------------------------------------------------------------------------- + +def _http_download(url: str, dest: Path) -> None: + """Download a file via HTTPS, using cellink helper if available.""" + if dest.exists(): + logger.info(f"Already exists, skipping: {dest}") + return + logger.info(f"Downloading {url}") + try: + from cellink.resources._utils import _download_file + _download_file(url, dest, checksum=None) + except Exception: + import urllib.request + urllib.request.urlretrieve(url, str(dest)) + + +def download_sclinker_enhancer_links( + out_dir: Union[str, Path] = "sclinker_refs", + *, + tissue: Optional[str] = None, + chromosomes: List[int] = list(range(1, 23)), +) -> Dict[str, Path]: + """ + Download the sc-linker enhancer-gene reference files from GCS. + + Downloads the two large combined files that cellink then filters by tissue: + + - ``RoadmapUABCannot_regions_to_genes.txt.gz`` (207 MB) — all-tissue + Roadmap enhancer-gene activity scores. + - ``AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz`` + (373 MB) — all-tissue ABC model predictions. + - ``Roadmap_map_EID_names.txt`` — Roadmap EID → tissue name mapping. + + The per-tissue files (``Roadmap_BLD_E.txt.gz`` etc.) do **not** exist in + the bucket. They are produced on-the-fly by :func:`load_roadmap_links` and + :func:`load_abc_links` when you pass the downloaded combined files. + + Parameters + ---------- + out_dir + Directory to download files into. + tissue + Accepted for API compatibility but ignored — the combined files cover + all tissues. Pass ``tissue`` to :func:`load_roadmap_links` / + :func:`load_abc_links` to filter after downloading. + chromosomes + Unused — kept for API consistency. + + Returns + ------- + dict with keys: + ``"roadmap"`` → ``Path`` to ``RoadmapUABCannot_regions_to_genes.txt.gz`` + ``"abc"`` → ``Path`` to ``AllPredictions.AvgHiC.ABC0.015...txt.gz`` + ``"roadmap_eid"`` → ``Path`` to ``Roadmap_map_EID_names.txt`` + """ + out_dir = Path(out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + + files = { + "roadmap": ( + "RoadmapUABCannot_regions_to_genes.txt.gz", + out_dir / "RoadmapUABCannot_regions_to_genes.txt.gz", + ), + "abc": ( + "AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz", + out_dir / "AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz", + ), + "roadmap_eid": ( + "Roadmap_map_EID_names.txt", + out_dir / "Roadmap_map_EID_names.txt", + ), + } + + downloaded: Dict[str, Path] = {} + for key, (fname, dest) in files.items(): + _http_download(f"{_SCLINKER_GCS_BASE}/{fname}", dest) + downloaded[key] = dest + + logger.info(f"sc-linker enhancer files in {out_dir}: {list(downloaded.keys())}") + return downloaded + + +def _symlink_annots_into_ld_dir(annot_prefix: str, ld_prefix: str) -> None: + """ + Symlink .annot.gz files from the annotations directory into the LD scores + directory. LDSC --overlap-annot looks for {ref-ld-chr}{chrom}.annot.gz + at the same prefix as the LD score files. Cellink writes them separately, + so we symlink (or copy if cross-filesystem) them into place. + """ + annot_dir = Path(annot_prefix).parent + ld_dir = Path(ld_prefix).parent + stem = Path(annot_prefix).name # e.g. "B_naive_L2." + + if not annot_dir.exists(): + logger.warning(f"Annotation directory not found: {annot_dir}") + return + + linked = 0 + for annot_file in sorted(annot_dir.glob(f"{stem}*.annot.gz")): + link = ld_dir / annot_file.name + if not link.exists(): + try: + link.symlink_to(annot_file.resolve()) + linked += 1 + except OSError: + import shutil as _shutil + _shutil.copy2(annot_file, link) + linked += 1 + if linked: + logger.debug(f"Linked {linked} .annot.gz files into {ld_dir}") + + +def run_sclinker_heritability( + ld_prefixes: Dict[str, Dict[str, str]], + sumstats_files: List[str], + ref_ld_chr: str, + w_ld_chr: str, + out_dir: Union[str, Path], + *, + annotation_prefixes: Optional[Dict[str, Dict[str, str]]] = None, + frqfile_chr: Optional[str] = None, + runner=None, +) -> Dict[str, Dict[str, Dict[str, str]]]: + """ + Run S-LDSC ``--h2 --overlap-annot`` for every (program, strategy, trait). + + **Important**: sc-linker uses ``--h2`` (standard partitioned heritability), + not ``--h2-cts``. The ``--ref-ld-chr`` is ``baseline,{program_ldscore}`` + and ``--overlap-annot`` is always used. + + ``--overlap-annot`` requires that ``.annot.gz`` files exist at the same + prefix as the LD score files. Cellink writes them to a separate + ``annotations/`` directory, so this function symlinks them into the LD + score directory automatically via ``annotation_prefixes``. + + Parameters + ---------- + ld_prefixes + Output of ``compute_ld_scores_for_sclinker``: + ``{program: {strategy: ld_score_prefix}}``. + sumstats_files + List of munged ``.sumstats.gz`` files (use absolute paths so the path + survives Singularity bind-mount remapping). + ref_ld_chr + Baseline-LD prefix (e.g. from ``get_1000genomes_ld_scores()``). + w_ld_chr + Regression weights prefix. + out_dir + Directory to write ``.log`` output files. + annotation_prefixes + Output of ``genescores_to_annotations``: + ``{program: {strategy: annot_prefix}}``. + Pass this so LDSC can find the ``.annot.gz`` files for + ``--overlap-annot``. If None, the function infers the location by + replacing ``ldscores`` with ``annotations`` in ``ld_prefix``. + frqfile_chr + Allele frequency file prefix (required with ``--overlap-annot``). + runner + ``LDSCRunner`` instance. If None, uses the global runner. + + Returns + ------- + dict + ``{program: {strategy: {trait: log_path}}}``. + """ + try: + from cellink.tl.external._ldsc import estimate_heritability, get_ldsc_runner + except ImportError: + raise ImportError("cellink._ldsc is required for run_sclinker_heritability") + + if runner is None: + runner = get_ldsc_runner() + + if frqfile_chr is None: + logger.warning( + "frqfile_chr not provided. --overlap-annot requires allele frequency " + "files. Consider passing the 1000G frq prefix from " + "get_1000genomes_frq()." + ) + + out_dir = Path(out_dir).resolve() + out_dir.mkdir(parents=True, exist_ok=True) + results: Dict[str, Dict[str, Dict[str, str]]] = {} + + for program, strategies in ld_prefixes.items(): + results[program] = {} + safe_prog = _safe_filename(program) + + for strategy_name, ld_prefix in strategies.items(): + results[program][strategy_name] = {} + + # ── Symlink .annot.gz files into the LD score directory ────────── + # LDSC --overlap-annot needs .annot.gz at the same prefix as .l2.ldscore.gz + if annotation_prefixes and program in annotation_prefixes: + annot_prefix = annotation_prefixes[program].get(strategy_name) + else: + # Infer: ldscores/.../B_naive_L2. → annotations/.../B_naive_L2. + annot_prefix = ( + ld_prefix + .replace("/ldscores/", "/annotations/") + .replace(os.sep + "ldscores" + os.sep, os.sep + "annotations" + os.sep) + ) + if annot_prefix: + _symlink_annots_into_ld_dir(annot_prefix, ld_prefix) + + for sumstats_file in sumstats_files: + trait = Path(sumstats_file).stem.replace(".sumstats", "") + prog_out = out_dir / safe_prog / _safe_filename(strategy_name) + prog_out.mkdir(parents=True, exist_ok=True) + out_prefix = str(prog_out / trait) + + # sc-linker ref-ld = baseline + program LD scores + ref_ld = f"{ref_ld_chr},{ld_prefix}" + + logger.info(f"S-LDSC: {program}/{strategy_name}/{trait}") + estimate_heritability( + sumstats_file=str(Path(sumstats_file).resolve()), + ref_ld_chr=ref_ld, + w_ld_chr=w_ld_chr, + out_prefix=out_prefix, + overlap_annot=True, + frqfile_chr=frqfile_chr, + print_coefficients=True, + print_delete_vals=True, + run=True, + runner=runner, + ) + results[program][strategy_name][trait] = f"{out_prefix}.log" + + return results + + +def download_sclinker_references( + out_dir: Union[str, Path] = "sclinker_references", + tissue: Optional[str] = None, + download_roadmap: bool = True, + download_abc: bool = True, + download_gene_coords: bool = True, + download_bims: bool = False, # use get_1000genomes_plink_files() instead + download_frq: bool = False, # use get_1000genomes_frq() instead + download_weights: bool = False, # use get_1000genomes_ld_weights() instead + download_hapmap3: bool = False, # use get_1000genomes_hapmap3() instead + download_baseline: bool = False, + chromosomes: List[int] = list(range(1, 23)), +) -> Dict[str, Path]: + """ + Download sc-linker reference files. + + **For 1000G files (PLINK, LD scores, weights, frq, HapMap3), use the + cellink resource helpers instead** — they download from Zenodo + (https://zenodo.org/records/10515792) which is reliable and fast: + + .. code-block:: python + + from cellink.resources import ( + get_1000genomes_ld_scores, + get_1000genomes_ld_weights, + get_1000genomes_plink_files, + get_1000genomes_frq, + get_1000genomes_hapmap3, + ) + + This function handles only the sc-linker-specific extras (enhancer-gene + links, gene coordinates) which are only available via Google Cloud Storage: + + ``gs://broad-alkesgroup-public/LDSCORE/Jagadeesh_Dey_sclinker/extras/`` + + Parameters + ---------- + out_dir + Root directory for downloaded files. + tissue + Tissue code (e.g. ``"BLD"``). None downloads all tissues. + download_roadmap, download_abc, download_gene_coords + Which enhancer-gene link files to download (default: all True). + download_bims, download_frq, download_weights, download_hapmap3, download_baseline + Kept for API compatibility but ignored — use the cellink resource + helpers listed above instead. + chromosomes + Ignored (kept for API compatibility). + """ + if any([download_bims, download_frq, download_weights, + download_hapmap3, download_baseline]): + logger.warning( + "download_bims / download_frq / download_weights / download_hapmap3 / " + "download_baseline are ignored. Use the cellink resource helpers " + "get_1000genomes_plink_files(), get_1000genomes_frq(), " + "get_1000genomes_ld_weights(), get_1000genomes_hapmap3() instead — " + "they download from Zenodo (https://zenodo.org/records/10515792)." + ) + + # Delegate to the focused helper + return download_sclinker_enhancer_links( + out_dir=out_dir, + tissue=tissue, + ) + + +# --------------------------------------------------------------------------- +# Step 2a: Load enhancer-gene links +# --------------------------------------------------------------------------- + +def load_roadmap_eid_map(eid_file: Union[str, Path]) -> Dict[str, str]: + """ + Load the Roadmap EID → tissue-name mapping from ``Roadmap_map_EID_names.txt``. + + Returns a dict mapping EID (e.g. ``"E062"``) to tissue label + (e.g. ``"Primary mononuclear cells from peripheral blood"``). + """ + df = pd.read_csv(eid_file, sep="\t", header=None, names=["EID", "tissue"]) + return dict(zip(df["EID"].str.strip(), df["tissue"].str.strip())) + + +def load_roadmap_links( + roadmap_file: Union[str, Path], + tissue: Optional[str] = None, + eid_map_file: Optional[Union[str, Path]] = None, +) -> pd.DataFrame: + """ + Load Roadmap enhancer-gene links from the combined all-tissue file. + + The actual file in the GCS bucket is + ``RoadmapUABCannot_regions_to_genes.txt.gz`` — a single combined file + for all tissues. Per-tissue files (``Roadmap_BLD_E.txt.gz`` etc.) do not + exist in the bucket; cellink filters the combined file here instead. + + Parameters + ---------- + roadmap_file + Path to ``RoadmapUABCannot_regions_to_genes.txt.gz``, downloaded by + :func:`download_sclinker_enhancer_links`. + tissue + Tissue code to filter to (e.g. ``"BLD"``). If None, returns all rows. + Uses ``Roadmap_map_EID_names.txt`` (``eid_map_file``) to match EIDs to + tissue names. If ``eid_map_file`` is None, the ``tissue`` filter is + applied as a case-insensitive substring match on the EID/tissue column. + eid_map_file + Path to ``Roadmap_map_EID_names.txt`` for EID→tissue mapping. + Only used when ``tissue`` is not None. + + Returns + ------- + pd.DataFrame with columns: chr, start, end, Gene, EID, activity (score). + """ + # Try comma-separated first (RoadmapUABCannot file is CSV, not TSV) + # then fall back to tab-separated for other formats + df = pd.read_csv(roadmap_file, sep=",", compression="infer") + if len(df.columns) == 1: + # Only one column → probably tab-separated + df = pd.read_csv(roadmap_file, sep="\t", compression="infer") + df.columns = [c.strip() for c in df.columns] + logger.info( + f"Loaded {len(df):,} Roadmap links from {Path(roadmap_file).name}. " + f"Columns: {df.columns.tolist()}" + ) + + if tissue is not None: + tissue_upper = tissue.upper() + # Keyword mapping for matching against tissue name columns + # Roadmap tissue name keywords — matched case-insensitively against + # the 'tissuename' column in RoadmapUABCannot_regions_to_genes.txt.gz. + # Tissue names in that file are full descriptive strings from the + # Roadmap Epigenomics project, e.g.: + # "Primary mononuclear cells from peripheral blood" + # "Primary T cells from peripheral blood" + # "Fetal Brain Male" + # Keywords are ordered from most to least specific. + tissue_keywords = { + "BLD": ["blood", "mononuclear", "t cell", "t-cell", "b cell", "b-cell", + "nk cell", "cd4", "cd8", "erythro", "hsc", "monocyte", + "neutrophil", "lymph"], + "BRN": ["brain", "neuron", "cortex", "cerebellum", "hippocampus", + "neural", "glia"], + "GI": ["colon", "intestin", "sigmoid", "rectum", "duodenum", + "stomach", "bowel", "gastrointestinal"], + "LNG": ["lung", "bronchial", "alveolar", "pulmonary"], + "LIV": ["liver", "hepat"], + "KID": ["kidney", "renal"], + "SKIN": ["skin", "keratinocyte", "fibroblast", "melanocyte", "dermis", + "epiderm"], + "FAT": ["adipos", "fat", "adipocyte"], + "HRT": ["heart", "cardiac", "cardiomyo", "ventricle", "aorta"], + } + keywords = tissue_keywords.get(tissue_upper, [tissue_upper.lower()]) + + # 1. Try a direct tissue-name column (RoadmapUABCannot has 'tissuename') + tissue_col = next( + (c for c in df.columns + if c.lower() in ("tissuename", "tissue_name", "tissue", "celltype", + "cell_type", "biosample")), + None + ) + if tissue_col is not None: + pattern = "|".join(re.escape(kw) for kw in keywords) + mask = df[tissue_col].str.lower().str.contains(pattern, regex=True, na=False) + df = df[mask] + logger.info( + f"Filtered to {len(df):,} Roadmap rows for tissue={tissue} " + f"via column '{tissue_col}' (keywords: {keywords})" + ) + + # 2. Fall back to EID map if provided and tissue_col not found + elif eid_map_file is not None: + eid_map = load_roadmap_eid_map(eid_map_file) + matching_eids = { + eid for eid, name in eid_map.items() + if any(kw in name.lower() for kw in keywords) + } + eid_col = next( + (c for c in df.columns if "eid" in c.lower() or c.upper() == "EID"), + None + ) + if eid_col and matching_eids: + df = df[df[eid_col].isin(matching_eids)] + logger.info( + f"Filtered to {len(df):,} Roadmap rows for tissue={tissue} " + f"via EID column '{eid_col}' ({len(matching_eids)} EIDs)" + ) + else: + logger.warning( + f"Could not filter Roadmap for tissue={tissue}: " + f"no tissue-name or EID column found. " + f"Columns present: {df.columns.tolist()}. " + "Returning unfiltered DataFrame." + ) + else: + logger.warning( + f"tissue={tissue} specified but no tissue-name column found " + "and no eid_map_file provided. Returning unfiltered DataFrame. " + f"Columns present: {df.columns.tolist()}" + ) + + # Safety: if filtering removed all rows, fall back to unfiltered and warn + if tissue is not None and len(df) == 0: + logger.warning( + f"Roadmap tissue filter for tissue='{tissue}' returned 0 rows. " + f"Using ALL rows instead. " + f"This usually means the tissuename values don't match the keywords. " + f"Check unique tissuename values in the file:\n" + f" import pandas as pd; pd.read_csv('sclinker_refs/RoadmapUABCannot_regions_to_genes.txt.gz', sep=',')[['tissuename']].drop_duplicates().head(30)" + ) + # Reload unfiltered + df = pd.read_csv(roadmap_file, sep=",", compression="infer") + if len(df.columns) == 1: + df = pd.read_csv(roadmap_file, sep="\t", compression="infer") + df.columns = [c.strip() for c in df.columns] + + return df + + +def load_abc_links( + abc_file: Union[str, Path], + tissue: Optional[str] = None, +) -> pd.DataFrame: + """ + Load ABC model enhancer-gene predictions from the combined all-tissue file. + + The actual file in the GCS bucket is + ``AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz`` + — a single combined file for all tissues. Per-tissue files do not exist. + + Parameters + ---------- + abc_file + Path to the combined ABC predictions file, downloaded by + :func:`download_sclinker_enhancer_links`. + tissue + Tissue code to filter to (e.g. ``"BLD"``). If None, returns all rows. + Matched against the ``CellType`` or equivalent column in the file. + + Returns + ------- + pd.DataFrame with columns: chr, start, end, TargetGene, CellType, + ABC.Score (and others from the predictions file). + """ + df = pd.read_csv(abc_file, sep="\t", compression="infer") + df.columns = [c.strip() for c in df.columns] + logger.info( + f"Loaded {len(df):,} ABC predictions from {Path(abc_file).name}. " + f"Columns: {df.columns.tolist()}" + ) + + if tissue is not None: + tissue_keywords = { + "BLD": ["blood", "k562", "gm12878", "cd4", "cd8", "nk-cell", "monocyte"], + "BRN": ["brain", "neuron", "astrocyte", "microglia"], + "GI": ["colon", "intestin", "sigmoid", "rectum", "transverse"], + "LNG": ["lung", "bronchial", "alveolar", "imr90"], + "LIV": ["liver", "hepg2", "hepat"], + "KID": ["kidney"], + "SKIN": ["skin", "keratinocyte", "fibroblast", "melanocyte", "dermis"], + "FAT": ["adipos", "fat"], + "HRT": ["heart", "cardiac", "cardiomyo"], + } + keywords = tissue_keywords.get(tissue.upper(), [tissue.lower()]) + # Find the cell type column + cell_col = next( + (c for c in df.columns + if c.lower() in ("celltype", "cell_type", "tissue", "biosample")), + None + ) + if cell_col: + pattern = "|".join(re.escape(kw) for kw in keywords) + mask = df[cell_col].str.lower().str.contains(pattern, regex=True, na=False) + df = df[mask] + logger.info( + f"Filtered to {len(df):,} ABC rows for tissue={tissue} " + f"(keywords: {keywords})" + ) + else: + logger.warning( + f"Could not find CellType column for tissue={tissue} filtering. " + f"Available columns: {df.columns.tolist()[:10]}. " + "Returning unfiltered DataFrame." + ) + + return df + + +# --------------------------------------------------------------------------- +# Gene coordinate file for the 100 kb window strategy +# --------------------------------------------------------------------------- +# LDSC make_annot.py --gene-coord-file expects a space-separated file: +# GENE CHR START END +# +# The GENE column is matched by plain string equality against the gene-set +# file passed to make_annot. It must use the SAME identifiers: +# +# gene_id_type="ensembl" → GENE = ENSG IDs (e.g. ENSG00000099338) +# Use this when your AnnData var_names are Ensembl IDs — which is +# the typical case for sc-linker gene programs. +# +# gene_id_type="hgnc" → GENE = HGNC symbols (e.g. CD19, FOXP3) +# Use this when your gene-set files contain gene names. +# +# The file does NOT ship with LDSC. cellink generates both variants from +# Ensembl BioMart (pybiomart) in a single query and caches them. +# --------------------------------------------------------------------------- + +_GENE_COORD_CACHE = { + "ensembl": "gene_coord_ensembl.txt", + "hgnc": "gene_coord_hgnc.txt", +} +_VALID_CHRS = {str(c) for c in range(1, 23)} | {"X", "Y", "MT"} + + +def _query_biomart_and_write_gene_coords(data_dir: Path) -> None: + """ + Query Ensembl BioMart once and write both ENSG and HGNC gene coord files. + + A single BioMart query retrieves both ``ensembl_gene_id`` and + ``external_gene_name`` alongside coordinates, so both cache files are + populated in one network round-trip. + + Requires ``pybiomart`` (``pip install pybiomart``). + """ + try: + from pybiomart import Server + except ImportError as exc: + raise ImportError( + "pybiomart is required to generate the gene coordinate file.\n" + "Install it with: pip install pybiomart" + ) from exc + + logger.info("Querying Ensembl BioMart for human gene coordinates ...") + server = Server(host="http://www.ensembl.org") + dataset = server.marts["ENSEMBL_MART_ENSEMBL"].datasets["hsapiens_gene_ensembl"] + + df = dataset.query(attributes=[ + "ensembl_gene_id", # → ENSG IDs + "external_gene_name", # → HGNC names + "chromosome_name", + "start_position", + "end_position", + ]) + df.columns = ["ensembl_gene_id", "hgnc_name", "CHR", "START", "END"] + + # Standard chromosomes only + df = df[df["CHR"].astype(str).isin(_VALID_CHRS)] + df = df.dropna(subset=["ensembl_gene_id"]) + df = df[df["ensembl_gene_id"].str.strip() != ""] + + logger.info(f"BioMart returned {len(df):,} gene entries") + data_dir.mkdir(parents=True, exist_ok=True) + + def _dedup(frame: pd.DataFrame) -> pd.DataFrame: + """ + Deduplicate on GENE, keeping the widest genomic span per gene. + BioMart returns one row per transcript, so a single gene can appear + many times. We collapse to one row per unique GENE name: smallest + START and largest END across all transcripts, which gives the full + gene body extent. This is what LDSC uses for window-based annotations. + """ + return ( + frame + .groupby("GENE", as_index=False) + .agg(CHR=("CHR", "first"), START=("START", "min"), END=("END", "max")) + [["GENE", "CHR", "START", "END"]] # keep column order + ) + + # Write ENSG variant + ensg = df[["ensembl_gene_id", "CHR", "START", "END"]].copy() + ensg.columns = ["GENE", "CHR", "START", "END"] + ensg = _dedup(ensg) + ensg.to_csv(data_dir / _GENE_COORD_CACHE["ensembl"], sep=" ", index=False) + logger.info(f"Wrote {len(ensg):,} unique ENSG entries → {_GENE_COORD_CACHE['ensembl']}") + + # Write HGNC variant (only rows with a non-empty gene name) + hgnc = df[df["hgnc_name"].notna() & (df["hgnc_name"].str.strip() != "")] + hgnc = hgnc[["hgnc_name", "CHR", "START", "END"]].copy() + hgnc.columns = ["GENE", "CHR", "START", "END"] + hgnc = _dedup(hgnc) + hgnc.to_csv(data_dir / _GENE_COORD_CACHE["hgnc"], sep=" ", index=False) + logger.info(f"Wrote {len(hgnc):,} unique HGNC entries → {_GENE_COORD_CACHE['hgnc']}") + + +def get_gene_annotation( + path: Optional[Union[str, Path]] = None, + gene_id_type: str = "hgnc", + data_home: Optional[Union[str, Path]] = None, + refresh: bool = False, +) -> Path: + """ + Locate or generate the LDSC gene coordinate file. + + The ``GENE`` column in the output file must match the identifiers used in + your gene-program index / gene-set files. A silent mismatch produces + all-zero annotation columns. + + Parameters + ---------- + path + Explicit path to an existing ``GENE CHR START END`` file. Used + directly if the file exists; all other arguments are ignored. + gene_id_type : ``"ensembl"`` | ``"hgnc"`` + Which identifier to put in the ``GENE`` column: + + ``"ensembl"`` *(default)* + Ensembl stable IDs (e.g. ``ENSG00000099338``). Use this when + your AnnData ``var_names`` are ENSG IDs — the typical case for + sc-linker gene programs derived from standard scRNA-seq pipelines. + + ``"hgnc"`` + HGNC gene symbols (e.g. ``CD19``, ``FOXP3``). Use this when + your gene-set files contain gene names. + + data_home + Override for the cellink data directory. + refresh + If True, re-query BioMart even if cached files already exist. + + Returns + ------- + pathlib.Path + Path to a readable gene coord file with columns ``GENE CHR START END``. + + Raises + ------ + ValueError + If ``gene_id_type`` is not ``"ensembl"`` or ``"hgnc"``. + ImportError + If ``pybiomart`` is not installed and no cached/explicit file exists. + """ + if gene_id_type not in _GENE_COORD_CACHE: + raise ValueError( + f"gene_id_type must be 'ensembl' or 'hgnc', got {gene_id_type!r}" + ) + + # 1. Explicit path + if path is not None and Path(path).exists(): + return Path(path) + + from cellink.resources._utils import get_data_home + data_dir = Path(get_data_home(data_home)) + cache = data_dir / _GENE_COORD_CACHE[gene_id_type] + + # 2. Cached file + if cache.exists() and not refresh: + logger.info(f"Using cached gene coordinates ({gene_id_type}): {cache}") + return cache + + # 3. Generate from BioMart (writes both variants in one query) + _query_biomart_and_write_gene_coords(data_dir) + return cache + + +def load_gene_annotation( + gene_annotation_file: Optional[Union[str, Path]] = None, + gene_id_type: str = "hgnc", + data_home: Optional[Union[str, Path]] = None, + refresh: bool = False, +) -> pd.DataFrame: + """ + Load gene TSS/TES coordinates for the 100 kb window S2G strategy. + + Calls :func:`get_gene_annotation` to locate or generate the file from + Ensembl BioMart, then reads it into a DataFrame. + + Parameters + ---------- + gene_annotation_file + Explicit path to an existing coord file. Passed to + :func:`get_gene_annotation` as ``path``. + gene_id_type : ``"ensembl"`` | ``"hgnc"`` + Which identifier is in the ``GENE`` column — must match your data. + Default is ``"ensembl"`` because sc-linker AnnData objects typically + have ENSG ``var_names``. + data_home + Override for the cellink data directory. + refresh + If True, re-query BioMart even if cached files exist. + + Returns + ------- + pd.DataFrame + Columns: ``GENE``, ``CHR``, ``START``, ``END``. + """ + resolved = get_gene_annotation(gene_annotation_file, gene_id_type, data_home, refresh) + df = pd.read_csv(resolved, sep=" ") + df.columns = [c.strip() for c in df.columns] + logger.info( + f"Loaded {len(df):,} gene coordinates " + f"(gene_id_type='{gene_id_type}') from {resolved.name}" + ) + return df + + +# --------------------------------------------------------------------------- +# Step 2b: Gene scores → bedgraphs +# --------------------------------------------------------------------------- + +def genescores_to_abc_road_bedgraph( + genescores: pd.DataFrame, + roadmap_links: pd.DataFrame, + abc_links: pd.DataFrame, + *, + roadmap_gene_col: Optional[str] = None, + roadmap_activity_col: Optional[str] = None, + abc_gene_col: Optional[str] = None, + abc_activity_col: Optional[str] = None, + chr_col: str = "chr", + start_col: str = "start", + end_col: str = "end", + use_bedtools_for_merge: bool = True, +) -> Dict[str, pd.DataFrame]: + """ + Convert gene scores to ABC_Road bedgraphs (Roadmap ∪ ABC strategy). + + This is the primary sc-linker S2G strategy. For each program (column in + genescores), creates a bedgraph where each interval's score is the + weighted sum of gene scores linked to that interval: + + score(interval) = gene_score × link_activity + + The Roadmap and ABC bedgraphs are merged (union) with scores summed + where they overlap. + + Parameters + ---------- + genescores + DataFrame (genes × programs) with probabilistic scores in [0, 1]. + Index must be HGNC gene symbols. + roadmap_links + Roadmap enhancer-gene links DataFrame (from ``load_roadmap_links``). + abc_links + ABC model enhancer-gene links DataFrame (from ``load_abc_links``). + roadmap_gene_col + Column in ``roadmap_links`` with target gene names. + roadmap_activity_col + Activity/weight column in ``roadmap_links``. If None, uses weight=1. + abc_gene_col + Column in ``abc_links`` with target gene names. + abc_activity_col + Activity/weight column in ``abc_links``. If None, uses weight=1. + chr_col, start_col, end_col + Genomic coordinate columns in link DataFrames. + use_bedtools_for_merge + If True and bedtools is available, uses ``bedtools merge`` to merge + overlapping intervals (matches original pipeline exactly). + If False, uses a pure-Python merge (faster but may differ slightly). + + Returns + ------- + dict + Program name → bedgraph DataFrame (chr, start, end, score). + """ + # ── Auto-detect gene columns if not specified ──────────────────────────── + # Common column name variants seen in the sc-linker reference files: + _ROADMAP_GENE_CANDIDATES = ["Gene", "gene", "GENE", "gene_name", "GeneName", + "target_gene", "TargetGene"] + _ABC_GENE_CANDIDATES = ["TargetGene", "target_gene", "Gene", "gene", + "GENE", "GeneName", "gene_name"] + + if roadmap_gene_col is None: + roadmap_gene_col = next( + (c for c in _ROADMAP_GENE_CANDIDATES if c in roadmap_links.columns), None + ) + if roadmap_gene_col is None: + raise KeyError( + f"Cannot find a gene-name column in roadmap_links. " + f"Columns present: {roadmap_links.columns.tolist()}\n" + f"Pass roadmap_gene_col='' explicitly." + ) + logger.info(f"Auto-detected Roadmap gene column: '{roadmap_gene_col}'") + + if abc_gene_col is None: + abc_gene_col = next( + (c for c in _ABC_GENE_CANDIDATES if c in abc_links.columns), None + ) + if abc_gene_col is None: + raise KeyError( + f"Cannot find a gene-name column in abc_links. " + f"Columns present: {abc_links.columns.tolist()}\n" + f"Pass abc_gene_col='' explicitly." + ) + logger.info(f"Auto-detected ABC gene column: '{abc_gene_col}'") + + # ── Deduplicate + normalise gene names ─────────────────────────────────── + genescores = genescores.copy() + genescores.index = genescores.index.str.upper() + if genescores.index.duplicated().any(): + genescores = genescores.groupby(level=0).max() + + roadmap_links = roadmap_links.copy() + roadmap_links["_gene_upper"] = roadmap_links[roadmap_gene_col].str.upper() + + abc_links = abc_links.copy() + abc_links["_gene_upper"] = abc_links[abc_gene_col].str.upper() + + # ── Pre-index link tables by gene — O(1) lookup per gene per program ──── + # The naive loop does a full table scan (39M rows) per gene per program. + # Building a dict once reduces this to a single groupby + dict lookup. + roadmap_activity_col_r = roadmap_activity_col # local alias for clarity + abc_activity_col_r = abc_activity_col + + def _build_gene_index( + links: pd.DataFrame, + activity_col: Optional[str], + ) -> Dict[str, np.ndarray]: + """ + Return {gene_upper: array of shape (N, 4)} where columns are + [chr_idx, start, end, weight]. chr is stored as a categorical int + to avoid per-row string operations in the hot loop. + """ + cols = ["_gene_upper", chr_col, start_col, end_col] + if activity_col and activity_col in links.columns: + cols.append(activity_col) + weight_col = activity_col + else: + links = links.copy() + links["_w"] = 1.0 + weight_col = "_w" + cols.append("_w") + + sub = links[cols].copy() + sub[start_col] = sub[start_col].astype(np.int64) + sub[end_col] = sub[end_col].astype(np.int64) + sub[weight_col] = sub[weight_col].astype(np.float64) + + index: Dict[str, tuple] = {} + for gene, grp in sub.groupby("_gene_upper", sort=False): + index[gene] = ( + grp[chr_col].to_numpy(), + grp[start_col].to_numpy(), + grp[end_col].to_numpy(), + grp[weight_col].to_numpy(), + ) + return index + + logger.info("Building Roadmap gene index ...") + roadmap_idx = _build_gene_index(roadmap_links, roadmap_activity_col_r) + logger.info(f" {len(roadmap_idx):,} unique genes in Roadmap index") + + logger.info("Building ABC gene index ...") + abc_idx = _build_gene_index(abc_links, abc_activity_col_r) + logger.info(f" {len(abc_idx):,} unique genes in ABC index") + + # ── Per-program bedgraph computation ───────────────────────────────────── + bedgraphs: Dict[str, pd.DataFrame] = {} + + for program in genescores.columns: + scores = genescores[program] + scores = scores[scores > 0] + + chrs_r, starts_r, ends_r, scores_r = [], [], [], [] + chrs_a, starts_a, ends_a, scores_a = [], [], [], [] + + for gene, gene_score in scores.items(): + if gene in roadmap_idx: + chrs_g, starts_g, ends_g, weights_g = roadmap_idx[gene] + chrs_r.append(chrs_g) + starts_r.append(starts_g) + ends_r.append(ends_g) + scores_r.append(weights_g * gene_score) + + if gene in abc_idx: + chrs_g, starts_g, ends_g, weights_g = abc_idx[gene] + chrs_a.append(chrs_g) + starts_a.append(starts_g) + ends_a.append(ends_g) + scores_a.append(weights_g * gene_score) + + all_chrs = (np.concatenate(chrs_r + chrs_a) if chrs_r or chrs_a else np.array([], dtype=object)) + all_starts = (np.concatenate(starts_r + starts_a) if starts_r or starts_a else np.array([], dtype=np.int64)) + all_ends = (np.concatenate(ends_r + ends_a) if ends_r or ends_a else np.array([], dtype=np.int64)) + all_scores = (np.concatenate(scores_r + scores_a) if scores_r or scores_a else np.array([], dtype=np.float64)) + + if len(all_chrs) == 0: + logger.warning(f"No enhancer-linked intervals for program '{program}'") + continue + + bg = pd.DataFrame({ + "chr": all_chrs, + "start": all_starts, + "end": all_ends, + "score": all_scores, + }) + + if use_bedtools_for_merge and shutil.which("bedtools"): + bg = _merge_bedgraph_bedtools(bg) + else: + bg = _merge_bedgraph_python(bg) + + bedgraphs[program] = bg + + return bedgraphs + + +def genescores_to_100kb_bedgraph( + genescores: pd.DataFrame, + gene_annotation: pd.DataFrame, + *, + window_kb: int = 100, + gene_col: str = "GENE", + chr_col: str = "CHR", + start_col: str = "START", + end_col: str = "END", + use_bedtools_for_merge: bool = True, +) -> Dict[str, pd.DataFrame]: + """ + Convert gene scores to 100kb window bedgraphs. + + For each gene with a non-zero score, creates a bedgraph interval spanning + [TSS - window_kb, TES + window_kb]. The score is the gene's probabilistic + score (no activity weighting). + + This is the secondary sc-linker S2G strategy, reported alongside the + primary ABC_Road strategy. + + Parameters + ---------- + genescores + DataFrame (genes × programs) with probabilistic scores in [0, 1]. + gene_annotation + Gene coordinate DataFrame from ``load_gene_annotation``. + window_kb + Window size in kilobases around gene body (default 100). + gene_col, chr_col, start_col, end_col + Column names in ``gene_annotation``. + use_bedtools_for_merge + If True and bedtools available, use bedtools merge. + + Returns + ------- + dict + Program name → bedgraph DataFrame (chr, start, end, score). + """ + window_bp = window_kb * 1000 + + genescores = genescores.copy() + genescores.index = genescores.index.str.upper() + + # Build gene coord lookup once — deduplication already done at load time + ga = gene_annotation.copy() + ga["_gene_upper"] = ga[gene_col].str.upper() + ga = ga.set_index("_gene_upper") + ga_chr = ga[chr_col].to_dict() + ga_start = ga[start_col].to_dict() + ga_end = ga[end_col].to_dict() + + bedgraphs: Dict[str, pd.DataFrame] = {} + + # Deduplicate genescores index — AllCoding concat or BioMart can introduce + # duplicate gene names. Keep the row with the highest max score per gene. + if genescores.index.duplicated().any(): + n_before = len(genescores) + genescores = genescores.groupby(level=0).max() + logger.debug(f"Deduplicated genescores index: {n_before} → {len(genescores)} genes") + + # Restrict to genes present in the gene annotation + common_genes = genescores.index[genescores.index.isin(ga_chr)] + if len(common_genes) == 0: + logger.warning("No overlap between genescores genes and gene_annotation. " + "Check gene_id_type.") + return bedgraphs + + # Use reindex (not .loc) to avoid duplicate-index row expansion + gs_sub = genescores.reindex(common_genes) + + # Build coordinate arrays once — one entry per unique gene + chrs_arr = np.array([ga_chr[g] for g in common_genes], dtype=object) + starts_arr = np.array([max(0, int(ga_start[g]) - window_bp) for g in common_genes], dtype=np.int64) + ends_arr = np.array([int(ga_end[g]) + window_bp for g in common_genes], dtype=np.int64) + + for program in gs_sub.columns: + scores = gs_sub[program].to_numpy() + nonzero = scores > 0 + if not nonzero.any(): + logger.warning(f"No gene coordinates found for program '{program}'") + continue + + bg = pd.DataFrame({ + "chr": chrs_arr[nonzero], + "start": starts_arr[nonzero], + "end": ends_arr[nonzero], + "score": scores[nonzero], + }) + + if use_bedtools_for_merge and shutil.which("bedtools"): + bg = _merge_bedgraph_bedtools(bg) + else: + bg = _merge_bedgraph_python(bg) + + bedgraphs[program] = bg + + return bedgraphs + + +# --------------------------------------------------------------------------- +# Step 2c: Bedgraph → SNP annotation (.annot.gz) +# --------------------------------------------------------------------------- + +def bedgraph_to_snp_annotation( + bedgraph: Union[pd.DataFrame, str, Path], + bim_file: Union[str, Path], + out_prefix: str, + *, + use_bedtools: bool = True, +) -> Path: + """ + Convert a bedgraph file to an S-LDSC annotation file (.annot.gz). + + Implements the logic of GSSG's ``bedgraph_to_annot.py``. Each SNP in the + BIM file is assigned the score of the bedgraph interval it falls within + (0 if not covered). + + Parameters + ---------- + bedgraph + Bedgraph DataFrame with columns (chr, start, end, score), or path to file. + bim_file + PLINK BIM file defining SNP positions. + out_prefix + Output prefix; the file ``{out_prefix}.annot.gz`` is written. + use_bedtools + If True and bedtools is available, use bedtools intersect (more robust). + Otherwise falls back to pure-Python interval lookup. + + Returns + ------- + Path + Path to the written annotation file. + """ + if isinstance(bedgraph, (str, Path)): + bg = pd.read_csv(bedgraph, sep="\t", header=None, + names=["chr", "start", "end", "score"]) + else: + bg = bedgraph.copy() + + bim = pd.read_csv( + bim_file, sep=r"\s+", header=None, + names=["CHR", "SNP", "CM", "BP", "A1", "A2"], + ) + + out_path = Path(f"{out_prefix}.annot.gz") + + if use_bedtools and shutil.which("bedtools"): + annot_values = _annotate_with_bedtools(bg, bim) + else: + annot_values = _annotate_python(bg, bim) + + # cellink passes --thin-annot to ldsc, which means the .annot.gz file + # must contain ONLY the annotation column(s), no CHR/BP/SNP/CM header. + # Writing those 4 string columns causes ldsc to crash with: + # TypeError: can't multiply sequence by non-int of type 'float' + annot_df = pd.DataFrame({"ANNOT": annot_values.astype(np.float64)}) + annot_df.to_csv(out_path, sep="\t", index=False, compression="gzip") + n_nonzero = int((annot_values > 0).sum()) + logger.info(f"Wrote {out_path}: {n_nonzero:,} / {len(bim):,} SNPs annotated") + return out_path + + +def genescores_to_annotations( + genescores: pd.DataFrame, + roadmap_links: pd.DataFrame, + abc_links: pd.DataFrame, + gene_annotation: pd.DataFrame, + bim_prefix: str, + out_dir: Union[str, Path], + *, + tissue: str = "BLD", + chromosomes: List[int] = list(range(1, 23)), + window_kb: int = 100, + save_bedgraphs: bool = True, + use_bedtools: bool = True, + **link_kwargs, +) -> Dict[str, Dict[str, str]]: + """ + Full Step 2: gene scores → bedgraphs → per-chromosome SNP annotations. + + Produces TWO annotation strategies per program, matching the sc-linker paper: + - ``ABC_Road_{tissue}``: Roadmap ∪ ABC enhancer-gene links (weighted) + - ``100kb``: gene body ± window_kb (unweighted) + + Parameters + ---------- + genescores + DataFrame (genes × programs) with probabilistic scores in [0, 1]. + roadmap_links + Roadmap enhancer-gene links (from ``load_roadmap_links``). + abc_links + ABC enhancer-gene links (from ``load_abc_links``). + gene_annotation + Gene coordinate file (from ``load_gene_annotation``). + bim_prefix + Prefix for per-chromosome BIM files. + Example: ``"refs/1000G.EUR.QC."`` → files ``...1.bim``, ``...2.bim``, ... + out_dir + Root output directory. Creates sub-dirs per program and strategy. + tissue + Tissue code (used only for naming the strategy folder). + chromosomes + Chromosomes to process. + window_kb + Window for 100kb strategy. + save_bedgraphs + Whether to save bedgraph files alongside annotation files. + use_bedtools + Use bedtools binary for interval merging and annotation. + + Returns + ------- + dict + ``{program_name: {"ABC_Road": annot_prefix, "100kb": annot_prefix}}`` + where annot_prefix is a string like ``"out_dir/program/ABC_Road_BLD/program."`` + (without the chromosome number, to be passed to LDSC). + """ + out_dir = Path(out_dir).resolve() # absolute path — required by LDSCRunner + out_dir.mkdir(parents=True, exist_ok=True) + + logger.info("Computing ABC_Road bedgraphs...") + abc_road_bgs = genescores_to_abc_road_bedgraph( + genescores, roadmap_links, abc_links, + use_bedtools_for_merge=use_bedtools, **link_kwargs + ) + + logger.info("Computing 100kb bedgraphs...") + kb100_bgs = genescores_to_100kb_bedgraph( + genescores, gene_annotation, + window_kb=window_kb, + use_bedtools_for_merge=use_bedtools, + ) + + annotation_prefixes: Dict[str, Dict[str, str]] = {} + + for program in genescores.columns: + safe_name = _safe_filename(program) + prog_dir = out_dir / safe_name + prog_dir.mkdir(exist_ok=True) + + annotation_prefixes[program] = {} + + for strategy_name, bgs in [ + (f"ABC_Road_{tissue}", abc_road_bgs), + ("100kb", kb100_bgs), + ]: + if program not in bgs: + logger.warning(f"No bedgraph for '{program}' / {strategy_name}, skipping") + continue + + bg_df = bgs[program] + strategy_dir = prog_dir / strategy_name + strategy_dir.mkdir(exist_ok=True) + + if save_bedgraphs: + bg_path = strategy_dir / f"{safe_name}.bedgraph" + bg_df.to_csv(bg_path, sep="\t", header=False, index=False) + + # Per-chromosome annotations + for chrom in chromosomes: + bim_file = Path(f"{bim_prefix}{chrom}.bim") + if not bim_file.exists(): + logger.debug(f"BIM file not found for chr{chrom}: {bim_file}") + continue + + # Filter bedgraph to this chromosome + chrom_str = str(chrom) + bg_chrom = bg_df[ + bg_df["chr"].astype(str).str.replace("^chr", "", regex=True) == chrom_str + ] + + if len(bg_chrom) == 0: + # Write an all-zero annotation for this chromosome so LDSC doesn't fail + _write_zero_annotation(bim_file, str(strategy_dir / f"{safe_name}.{chrom}")) + continue + + bedgraph_to_snp_annotation( + bg_chrom, bim_file, + out_prefix=str(strategy_dir / f"{safe_name}.{chrom}"), + use_bedtools=use_bedtools, + ) + + annot_prefix = str(strategy_dir / f"{safe_name}.") + annotation_prefixes[program][strategy_name] = annot_prefix + logger.info(f" {program}/{strategy_name}: annotations at {annot_prefix}*.annot.gz") + + return annotation_prefixes + + +# --------------------------------------------------------------------------- +# Step 3: LD score computation +# --------------------------------------------------------------------------- + +def compute_ld_scores_for_sclinker( + annotation_prefixes: Dict[str, Dict[str, str]], + bim_prefix: str, + ld_scores_dir: Union[str, Path], + *, + hapmap3_snps_prefix: Optional[str] = None, + hapmap3_snps_file: Optional[Union[str, Path]] = None, + chromosomes: List[int] = list(range(1, 23)), + n_jobs: int = 4, + runner=None, +) -> Dict[str, Dict[str, str]]: + """ + Compute LD scores for all sc-linker annotations. + + Calls the existing cellink ``compute_ld_scores_with_annotations_from_bimfile`` + for each (program, strategy, chromosome) combination. + + Chromosome jobs are dispatched in parallel using a thread pool — each job + is an independent Singularity/subprocess call so threads work well. + + Parameters + ---------- + annotation_prefixes + Output of ``genescores_to_annotations``. + bim_prefix + Prefix for per-chromosome BIM/PLINK files. + ld_scores_dir + Directory to write LD score files. + hapmap3_snps_file + Path to a single HapMap3 SNP list file (e.g. ``hm3_no_MHC.list.txt`` + from ``get_1000genomes_hapmap3()``). Used as ``--print-snps`` for + every chromosome. Takes precedence over ``hapmap3_snps_prefix``. + hapmap3_snps_prefix + Prefix for per-chromosome HapMap3 SNP files (e.g. ``"refs/hm."``). + Use ``hapmap3_snps_file`` instead when you have a single combined file. + chromosomes + Chromosomes to process. For a quick tutorial run use ``[21, 22]``. + n_jobs + Number of chromosomes to process in parallel. Each job is one + Singularity subprocess call; ``n_jobs=4`` is a safe default on an + HPC node. Set to 1 to disable parallelism. + runner + cellink LDSCRunner. If None, uses global runner. + + Returns + ------- + dict + ``{program: {strategy: ld_score_prefix}}`` + where ld_score_prefix ends in ``.`` (chromosome appended by LDSC). + """ + import concurrent.futures + + try: + from cellink.tl.external._ldsc import ( + compute_ld_scores_with_annotations_from_bimfile, + get_ldsc_runner, + ) + except ImportError: + raise ImportError("cellink LDSC wrappers required") + + if runner is None: + runner = get_ldsc_runner() + + ld_scores_dir = Path(ld_scores_dir).resolve() + ld_scores_dir.mkdir(parents=True, exist_ok=True) + + # Resolve hapmap3 path once + print_snps_global: Optional[str] = None + if hapmap3_snps_file and Path(hapmap3_snps_file).exists(): + print_snps_global = str(hapmap3_snps_file) + + # Build the full list of (program, strategy, chrom) jobs first + # so we can dispatch them all at once to the thread pool. + Job = tuple # (program, strategy_name, annot_prefix, chrom, out_prefix, print_snps) + jobs: List[Job] = [] + ld_prefixes: Dict[str, Dict[str, str]] = {} + + for program, strategies in annotation_prefixes.items(): + ld_prefixes[program] = {} + for strategy_name, annot_prefix in strategies.items(): + safe_name = _safe_filename(program) + strategy_ld_dir = ld_scores_dir / safe_name / strategy_name + strategy_ld_dir.mkdir(parents=True, exist_ok=True) + ld_prefixes[program][strategy_name] = str(strategy_ld_dir / f"{safe_name}.") + + for chrom in chromosomes: + annot_file = f"{annot_prefix}{chrom}.annot.gz" + if not Path(annot_file).exists(): + logger.debug(f"Annotation missing: {annot_file}") + continue + + bim_file = f"{bim_prefix}{chrom}" + out_prefix = str(strategy_ld_dir / f"{safe_name}.{chrom}") + + # Per-chrom snp file fallback + print_snps = print_snps_global + if print_snps is None and hapmap3_snps_prefix: + snp_file = f"{hapmap3_snps_prefix}{chrom}.snp" + if Path(snp_file).exists(): + print_snps = snp_file + + jobs.append((program, strategy_name, chrom, annot_file, + bim_file, out_prefix, print_snps)) + + n_total = len(jobs) + logger.info( + f"Computing LD scores: {n_total} jobs " + f"({len(annotation_prefixes)} programs × " + f"{sum(len(s) for s in annotation_prefixes.values()) // max(len(annotation_prefixes),1)} strategies × " + f"{len(chromosomes)} chromosomes), n_jobs={n_jobs}" + ) + + def _run_one(job: Job) -> str: + program, strategy_name, chrom, annot_file, bim_file, out_prefix, print_snps = job + compute_ld_scores_with_annotations_from_bimfile( + bfile_prefix=bim_file, + annot_file=annot_file, + out_prefix=out_prefix, + print_snps=print_snps, + runner=runner, + ) + return f"{program}/{strategy_name}/chr{chrom}" + + errors: List[str] = [] + completed = 0 + + if n_jobs == 1: + for job in jobs: + try: + label = _run_one(job) + completed += 1 + logger.info(f" [{completed}/{n_total}] done: {label}") + except Exception as exc: + errors.append(f"{job[0]}/{job[1]}/chr{job[2]}: {exc}") + logger.error(f" FAILED: {errors[-1]}") + else: + with concurrent.futures.ThreadPoolExecutor(max_workers=n_jobs) as pool: + future_to_job = {pool.submit(_run_one, job): job for job in jobs} + for future in concurrent.futures.as_completed(future_to_job): + job = future_to_job[future] + try: + label = future.result() + completed += 1 + logger.info(f" [{completed}/{n_total}] done: {label}") + except Exception as exc: + errors.append(f"{job[0]}/{job[1]}/chr{job[2]}: {exc}") + logger.error(f" FAILED: {errors[-1]}") + + if errors: + logger.warning( + f"{len(errors)} LD score job(s) failed:\n" + "\n".join(errors) + ) + + return ld_prefixes + + +# --------------------------------------------------------------------------- +# Step 3: S-LDSC heritability regression (sc-linker mode) +# --------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- +# Step 4: Parse results and compute E-score +# --------------------------------------------------------------------------- + +def load_sclinker_heritability_results( + results_dir: Union[str, Path], + *, + log_pattern: str = "**/*.log", +) -> pd.DataFrame: + """ + Parse all S-LDSC .log files from a sc-linker run into a DataFrame. + + Extracts Enrichment, Enrichment_std_error, Coefficient, and tau* + for the annotation of interest (last non-baseline annotation column). + + Parameters + ---------- + results_dir + Directory containing .log files (searched recursively). + log_pattern + Glob pattern for log files. + + Returns + ------- + DataFrame with columns: + program, strategy, trait, Enrichment, Enrichment_std_error, + Enrichment_z_score, Coefficient, Coefficient_std_error, tau_star + """ + results_dir = Path(results_dir) + log_files = sorted(results_dir.glob(log_pattern)) + + if not log_files: + raise FileNotFoundError(f"No log files matching '{log_pattern}' in {results_dir}") + + logger.info(f"Found {len(log_files)} log files in {results_dir}") + + rows = [] + skipped = [] + for log_file in log_files: + parts = log_file.relative_to(results_dir).parts + trait = log_file.stem + try: + strategy = parts[-2] + program = parts[-3] + except IndexError: + program = strategy = "unknown" + + row = _parse_ldsc_log(log_file) + if row: + row.update({"program": program, "strategy": strategy, "trait": trait}) + rows.append(row) + else: + skipped.append(str(log_file)) + + if skipped: + logger.warning( + f"{len(skipped)} log file(s) could not be parsed " + f"(--overlap-annot may have failed; check the logs):\n" + + "\n".join(f" {p}" for p in skipped[:5]) + + ("\n ..." if len(skipped) > 5 else "") + ) + + if not rows: + logger.warning( + "No results parsed. Possible causes:\n" + " 1. No .results files alongside the .log files — check that LDSC ran with " + "--overlap-annot and that annotation_prefixes= was passed to run_sclinker_heritability().\n" + " 2. LDSC exited before writing .results — open a .log file and check for errors." + ) + return pd.DataFrame() + + df = pd.DataFrame(rows) + logger.info( + f"Loaded {len(df)} results: " + f"{df['program'].nunique()} programs, {df['trait'].nunique()} traits" + ) + return df + + +def compute_escore( + results_df: pd.DataFrame, + control_program: str = "AllCoding", + *, + control_strategy: Optional[str] = None, + enrichment_col: str = "Enrichment", + se_col: str = "Enrichment_std_error", +) -> pd.DataFrame: + """ + Compute the E-score: enrichment of a program minus the all-coding control. + + E-score(program, trait) = Enrichment(program) - Enrichment(AllCoding_control) + SE_E = sqrt(SE_program² + SE_control²) + z_E = E-score / SE_E + + Parameters + ---------- + results_df + Output of ``load_sclinker_heritability_results``. + control_program + Name of the all-coding control program. + control_strategy + Strategy to use for the control (if None, matches current strategy). + enrichment_col, se_col + Column names for enrichment and its standard error. + + Returns + ------- + DataFrame with added columns E_score, E_score_se, E_score_z. + """ + df = results_df.copy() + + ctrl_mask = df["program"] == control_program + if control_strategy: + ctrl_mask &= df["strategy"] == control_strategy + + # When multiple strategies exist per control program, take one control row + # per (trait, strategy) pair; fall back to any matching trait if unambiguous. + ctrl = ( + df[ctrl_mask] + .groupby(["trait", "strategy"])[[enrichment_col, se_col]] + .first() + .rename(columns={enrichment_col: "_ctrl_enr", se_col: "_ctrl_se"}) + ) + + if ctrl.empty: + logger.warning(f"Control program '{control_program}' not found; skipping E-score") + return df + + df = df.copy() + + # Merge on (trait, strategy) so each row gets the correct per-strategy control + merged = df.merge( + ctrl.reset_index(), + on=["trait", "strategy"], + how="left", + ) + merged["E_score"] = merged[enrichment_col] - merged["_ctrl_enr"] + merged["E_score_se"] = np.sqrt(merged[se_col] ** 2 + merged["_ctrl_se"] ** 2) + merged["E_score_z"] = merged["E_score"] / (merged["E_score_se"] + 1e-12) + return merged.drop(columns=["_ctrl_enr", "_ctrl_se"]) + + +# --------------------------------------------------------------------------- +# PPI gene scores (code/calc_PPI_scores) +# --------------------------------------------------------------------------- + +def compute_ppi_gene_scores( + seed_genes: pd.Series, + ppi_network: pd.DataFrame, + *, + restart_prob: float = 0.7, + n_iter: int = 100, + normalize: bool = True, + source_col: str = "gene1", + target_col: str = "gene2", + weight_col: Optional[str] = None, +) -> pd.Series: + """ + Random Walk with Restart (RWR) on a PPI network to propagate gene scores. + + This ports the R scripts `ppi_RWR.R` and `ppi_string_RWR.R` from + `code/calc_PPI_scores/`. It propagates seed gene scores through the PPI + network to identify closely connected genes. + + Parameters + ---------- + seed_genes + Series mapping gene name → initial score (typically 0 or gene score). + ppi_network + Edge list DataFrame with columns for source gene, target gene, + and optionally edge weight. + restart_prob + Probability of restarting at seed genes at each step. + n_iter + Number of RWR iterations. + normalize + Normalize the adjacency matrix by node degree. + source_col, target_col + Column names for edge endpoints. + weight_col + Optional edge weight column. If None, uniform weights are used. + + Returns + ------- + Series + Gene → propagated score (sums to 1 if normalized). + """ + # Build gene universe + all_genes = sorted(set( + ppi_network[source_col].tolist() + + ppi_network[target_col].tolist() + + seed_genes.index.tolist() + )) + n = len(all_genes) + gene2idx = {g: i for i, g in enumerate(all_genes)} + + # Build adjacency matrix + import scipy.sparse as sp + # Vectorised adjacency matrix build — no per-edge Python loop + src_genes = ppi_network[source_col].map(gene2idx) + tgt_genes = ppi_network[target_col].map(gene2idx) + valid = src_genes.notna() & tgt_genes.notna() + i_arr = src_genes[valid].astype(int).to_numpy() + j_arr = tgt_genes[valid].astype(int).to_numpy() + if weight_col and weight_col in ppi_network.columns: + w_arr = ppi_network.loc[valid, weight_col].astype(float).to_numpy() + else: + w_arr = np.ones(valid.sum()) + # Undirected: add both directions + row_arr = np.concatenate([i_arr, j_arr]) + col_arr = np.concatenate([j_arr, i_arr]) + data_arr = np.concatenate([w_arr, w_arr]) + A = sp.csr_matrix((data_arr, (row_arr, col_arr)), shape=(n, n)) + + if normalize: + # Column normalize + deg = np.asarray(A.sum(axis=0)).ravel() + deg[deg == 0] = 1 + D_inv = sp.diags(1.0 / deg) + A = A.dot(D_inv) + + # Seed vector + p0 = np.zeros(n) + for gene, score in seed_genes.items(): + if gene in gene2idx: + p0[gene2idx[gene]] = score + p0_sum = p0.sum() + if p0_sum > 0: + p0 /= p0_sum + + # RWR + p = p0.copy() + for _ in range(n_iter): + p_new = (1 - restart_prob) * A.dot(p) + restart_prob * p0 + if np.max(np.abs(p_new - p)) < 1e-10: + break + p = p_new + + return pd.Series(p, index=all_genes) + + +# --------------------------------------------------------------------------- +# LDSC parse.py bug check + patch (works for local, Docker and Singularity) +# --------------------------------------------------------------------------- + +# Unique string present ONLY in the patched version (not in vanilla ldsc) +_PARSE_PY_PATCH_MARKER = "chr_ld[0].columns" + +# Exact one-line fix from ldsc PR #341: +# x = pd.concat(chr_ld) ← buggy (columns reordered by pandas) +# x = pd.concat(chr_ld)[chr_ld[0].columns] ← fixed (columns locked to first file) +_PARSE_PY_BUGGY_PATTERNS = [ + ( + "x = pd.concat(chr_ld) # automatically sorted by chromosome", + "x = pd.concat(chr_ld)[chr_ld[0].columns] # automatically sorted by chromosome", + ), + # Fallback without the trailing comment (in case whitespace differs) + ( + "x = pd.concat(chr_ld)", + "x = pd.concat(chr_ld)[chr_ld[0].columns]", + ), +] + + +# --------------------------------------------------------------------------- +# Internal helpers — read / write parse.py via runner +# --------------------------------------------------------------------------- + +def _get_parse_py_path(runner) -> Optional[str]: + """ + Return the path to ``ldscore/parse.py`` to be used for reading/writing. + + Resolution order: + + 1. ``runner.parse_script`` / ``runner.config["parse_script"]`` — the + canonical field on ``LDSCRunner``. Always set this explicitly for + Docker and Singularity; the path inside the container is typically + ``/ldsc/ldscore/parse.py``. + 2. Auto-discovery via PATH — find ``ldsc.py`` on the host PATH and infer + the sibling ``ldscore/parse.py``. Works for local installs; will not + work inside containers. + + Returns None if the path cannot be determined. + """ + # 1. Explicit parse_script from runner (preferred) + explicit = getattr(runner, "parse_script", None) or runner.config.get("parse_script") + if explicit: + return str(explicit) + + # 2. Auto-discover from host PATH (local fallback) + ldsc_cmd = runner.config.get("ldsc_command", "ldsc.py") + ldsc_bin = shutil.which(ldsc_cmd) + if ldsc_bin is None: + return None + + for candidate in [ + Path(ldsc_bin).parent / "ldscore" / "parse.py", + Path(ldsc_bin).parent.parent / "ldscore" / "parse.py", + ]: + if candidate.exists(): + return str(candidate) + + return None + + +def _read_parse_py_via_runner(runner) -> Optional[str]: + """ + Read the source of ``ldscore/parse.py`` through the configured runner. + + - **local**: reads the file directly from disk. + - **docker**: uses ``docker run --rm ... cat ``. + - **singularity**: uses ``singularity exec ... cat ``. + + Returns the source text, or None if the file cannot be read. + """ + mode = runner.config.get("execution_mode", "local") + parse_path = _get_parse_py_path(runner) + + if parse_path is None: + logger.warning( + "Cannot locate ldscore/parse.py. " + "Set parse_script in your runner config to the explicit path, " + "e.g. /ldsc/ldscore/parse.py for Singularity/Docker." + ) + return None + + if mode == "local": + p = Path(parse_path) + return p.read_text() if p.exists() else None + + elif mode == "docker": + image = runner.config.get("docker_image", "zijingliu/ldsc") + result = subprocess.run( + ["docker", "run", "--rm", image, "cat", parse_path], + capture_output=True, text=True, + ) + return result.stdout if result.returncode == 0 else None + + elif mode == "singularity": + image = runner.config.get("singularity_image", "") + result = subprocess.run( + ["singularity", "exec", image, "cat", parse_path], + capture_output=True, text=True, + ) + return result.stdout if result.returncode == 0 else None + + return None + + +def _write_parse_py_via_runner(runner, patched_source: str) -> bool: + """ + Write ``patched_source`` back to ``ldscore/parse.py`` through the runner. + + **local** + Overwrites ``parse.py`` on disk; backs up original as ``parse.py.bak``. + + **docker** + ``docker cp`` + ``docker commit`` — the patched file is baked + permanently into the image. Idempotent and survives restarts. + + **singularity** + SIF images are read-only squashfs archives — they cannot be edited + in-place. cellink supports three strategies, chosen via + ``singularity_patch_strategy`` in the runner config: + + ``"overlay"`` *(default — HPC-friendly, no root needed)* + Creates a persistent ext3 overlay image alongside the SIF. + On every ``ldsc.py`` call cellink appends + ``--overlay `` so the patch is always active. + The overlay file lives at ``singularity_overlay_path`` (default: + ``~/.cellink/ldsc_overlay.img``). + + ``"sandbox"`` + Converts the SIF to a writable directory sandbox once, then + patches ``parse.py`` inside it. Subsequent runs use + ``singularity exec `` instead of the SIF. The + sandbox path is stored in ``singularity_sandbox_path`` (default: + next to the SIF as ``.sandbox/``). + + ``"rebuild"`` + Converts to sandbox, patches, then rebuilds a new SIF. The + patched SIF replaces the original (original is backed up as + ``.bak.sif``). Requires Singularity build privileges (or + ``--fakeroot``). After rebuild the runner ``singularity_image`` + config is updated to point at the new image. + + Returns True on success, False otherwise. + """ + import tempfile + mode = runner.config.get("execution_mode", "local") + parse_path = _get_parse_py_path(runner) + + if parse_path is None: + return False + + with tempfile.NamedTemporaryFile( + mode="w", suffix="_parse_patched.py", delete=False + ) as tf: + tf.write(patched_source) + tmp_path = tf.name + + try: + # ── local ──────────────────────────────────────────────────────────── + if mode == "local": + p = Path(parse_path) + if not p.exists(): + logger.error(f"parse.py not found on disk at {p}") + return False + shutil.copy2(str(p), str(p) + ".bak") + shutil.copy2(tmp_path, str(p)) + logger.info(f"Patched {p} (backup: {p}.bak)") + return True + + # ── docker ─────────────────────────────────────────────────────────── + elif mode == "docker": + image = runner.config.get("docker_image", "zijingliu/ldsc") + cid_result = subprocess.run( + ["docker", "create", image], + capture_output=True, text=True, + ) + if cid_result.returncode != 0: + logger.error(f"docker create failed: {cid_result.stderr}") + return False + cid = cid_result.stdout.strip() + try: + subprocess.run( + ["docker", "cp", tmp_path, f"{cid}:{parse_path}"], + check=True, capture_output=True, + ) + subprocess.run( + ["docker", "commit", cid, image], + check=True, capture_output=True, + ) + logger.info( + f"Patched {parse_path} committed to Docker image '{image}' permanently." + ) + return True + except subprocess.CalledProcessError as e: + logger.error(f"Docker patch failed: {e.stderr}") + return False + finally: + subprocess.run(["docker", "rm", cid], capture_output=True) + + # ── singularity ─────────────────────────────────────────────────────── + elif mode == "singularity": + strategy = runner.config.get("singularity_patch_strategy", "overlay") + sif = runner.config.get("singularity_image", "") + + if strategy == "overlay": + return _singularity_patch_overlay(runner, sif, parse_path, tmp_path) + elif strategy == "sandbox": + return _singularity_patch_sandbox(runner, sif, parse_path, tmp_path, rebuild=False) + elif strategy == "rebuild": + return _singularity_patch_sandbox(runner, sif, parse_path, tmp_path, rebuild=True) + else: + logger.error( + f"Unknown singularity_patch_strategy '{strategy}'. " + "Choose 'overlay', 'sandbox', or 'rebuild'." + ) + return False + + finally: + try: + os.unlink(tmp_path) + except Exception: + pass + + return False + + +def _singularity_patch_overlay(runner, sif: str, parse_path: str, patched_tmp: str) -> bool: + """ + Singularity overlay strategy: create a persistent ext3 overlay image, + copy the patched parse.py into it, and configure the runner to always + mount it. + + The overlay file path defaults to ``~/.cellink/ldsc_overlay.img`` but + can be overridden via ``singularity_overlay_path`` in the runner config. + + On every subsequent call, ``run_command`` in BaseToolRunner automatically + appends ``--overlay `` to the ``singularity exec`` invocation + because ``_ldsc_overlay_path`` is stored in ``runner.config``. + """ + overlay_path = runner.config.get( + "singularity_overlay_path", + str(Path.home() / ".cellink" / "ldsc_overlay.img"), + ) + overlay_path = str(overlay_path) + overlay_size_mb = runner.config.get("singularity_overlay_size_mb", 256) + + # Create overlay image if it does not exist yet + if not Path(overlay_path).exists(): + logger.info(f"Creating Singularity overlay image: {overlay_path} ({overlay_size_mb} MB)") + Path(overlay_path).parent.mkdir(parents=True, exist_ok=True) + result = subprocess.run( + ["singularity", "overlay", "create", + "--size", str(overlay_size_mb), overlay_path], + capture_output=True, text=True, + ) + if result.returncode != 0: + logger.error(f"singularity overlay create failed: {result.stderr}") + return False + + # Write the patched parse.py into the overlay + # We create a tiny shell script that does the copy using --overlay --writable + result = subprocess.run( + [ + "singularity", "exec", + "--overlay", f"{overlay_path}:rw", + sif, + "bash", "-c", + f"mkdir -p $(dirname {parse_path}) && cp /dev/stdin {parse_path}", + ], + input=open(patched_tmp).read(), + capture_output=True, text=True, + ) + if result.returncode != 0: + logger.error(f"Failed to write patched parse.py into overlay: {result.stderr}") + return False + + # Record overlay path so run_command mounts it on every subsequent call + runner.config["_ldsc_overlay_path"] = overlay_path + logger.info( + f"Patched {parse_path} written into overlay {overlay_path}. " + "Overlay is mounted automatically on every ldsc.py call (permanent " + "across sessions as long as the overlay file exists)." + ) + return True + + +def _singularity_patch_sandbox( + runner, sif: str, parse_path: str, patched_tmp: str, rebuild: bool +) -> bool: + """ + Singularity sandbox strategy. + + 1. Convert the SIF to a writable sandbox directory (once). + 2. Copy the patched ``parse.py`` into the sandbox. + 3a. If ``rebuild=False``: update ``runner.config["singularity_image"]`` + to point at the sandbox directory so all subsequent ``singularity exec`` + calls use it directly. + 3b. If ``rebuild=True``: rebuild a new SIF from the sandbox, back up the + original SIF, and update ``runner.config["singularity_image"]`` to the + new SIF. + """ + sandbox_path = runner.config.get( + "singularity_sandbox_path", + str(Path(sif).with_suffix("")) + ".sandbox", + ) + + # Convert SIF → sandbox (only once; skip if sandbox directory already exists) + if not Path(sandbox_path).exists(): + logger.info(f"Converting {sif} to writable sandbox at {sandbox_path} ...") + result = subprocess.run( + ["singularity", "build", "--sandbox", sandbox_path, sif], + capture_output=True, text=True, + ) + if result.returncode != 0: + # Try with --fakeroot for systems without root + logger.warning("sandbox build failed without root; retrying with --fakeroot") + result = subprocess.run( + ["singularity", "build", "--fakeroot", "--sandbox", sandbox_path, sif], + capture_output=True, text=True, + ) + if result.returncode != 0: + logger.error(f"singularity build --sandbox failed: {result.stderr}") + return False + + # Copy patched parse.py into the sandbox (sandbox is just a directory) + sandbox_parse = Path(sandbox_path) / parse_path.lstrip("/") + sandbox_parse.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(patched_tmp, str(sandbox_parse)) + logger.info(f"Patched {sandbox_parse}") + + if not rebuild: + # Point the runner at the sandbox directory for all future calls + runner.config["singularity_image"] = sandbox_path + logger.info( + f"Runner updated: singularity_image → {sandbox_path}. " + "All subsequent ldsc.py calls will use the patched sandbox." + ) + return True + else: + # Rebuild a new SIF from the patched sandbox + new_sif = str(Path(sif).with_suffix("")) + ".patched.sif" + backup_sif = sif + ".bak.sif" + logger.info(f"Rebuilding SIF from sandbox: {sandbox_path} → {new_sif} ...") + result = subprocess.run( + ["singularity", "build", new_sif, sandbox_path], + capture_output=True, text=True, + ) + if result.returncode != 0: + result = subprocess.run( + ["singularity", "build", "--fakeroot", new_sif, sandbox_path], + capture_output=True, text=True, + ) + if result.returncode != 0: + logger.error(f"singularity build from sandbox failed: {result.stderr}") + return False + + # Swap: backup original, promote new SIF + shutil.move(sif, backup_sif) + shutil.move(new_sif, sif) + runner.config["singularity_image"] = sif + logger.info( + f"Rebuilt SIF: {sif} (original backed up as {backup_sif}). " + "The patch is permanently baked into the image." + ) + return True + + +def check_and_patch_ldsc_parse_bug(runner) -> dict: + """ + Check whether the LDSC installation has the pandas column-sort bug + (ldsc issue `#342 `_ / + PR `#341 `_) and patch it. + + The bug: ``pd.concat`` in ``ldscore/parse.py`` alphabetically re-sorts + annotation columns after ``pd.concat`` across chromosomes in ``ldscore/parse.py`` + (line ~147), causing ``_check_variance()`` to raise an ``IndexError`` + during ``--h2 --overlap-annot``. + + The fix (PR #341): reindex the concatenated DataFrame to the column order + of the first chromosome file: ``pd.concat(chr_ld)[chr_ld[0].columns]``. + + Works for **local**, **Docker** and **Singularity** execution modes. + + **Configuration** — add ``parse_script`` to your runner config so cellink + knows exactly where ``parse.py`` lives, especially for containerised setups: + + .. code-block:: python + + runner = configure_ldsc_runner(config_dict={ + "execution_mode": "singularity", + "singularity_image": "/path/to/ldsc.sif", + "ldsc_command": "ldsc.py", + "parse_script": "/ldsc/ldscore/parse.py", # ← explicit path + }) + + If ``parse_script`` is omitted, cellink tries to auto-discover the path + from ``PATH`` (works for local installs; may fail for containers). + + Behaviour by mode + ----------------- + - **local** — patches ``parse.py`` on disk and backs up the original as + ``parse.py.bak``. + - **docker** — patches the file inside the image via ``docker cp`` + + ``docker commit``. The image is updated in-place and the patch survives + container restarts. + - **singularity** — SIF images are read-only, so cellink writes the patched + ``parse.py`` to a host-side shadow directory (default: + ``~/.cellink/ldsc_patch/ldscore/``) and injects + ``PYTHONPATH=:$PYTHONPATH`` into every subsequent ``ldsc.py`` + call. Set ``ldsc_patch_shadow_dir`` in the runner config to override + the shadow location. + + Parameters + ---------- + runner + A configured ``LDSCRunner`` instance. + + Returns + ------- + dict with keys: + ``"status"`` — ``"already_patched"``, ``"patched"``, + ``"patch_failed"`` or ``"not_found"`` + ``"mode"`` — ``"local"``, ``"docker"`` or ``"singularity"`` + ``"parse_path"`` — resolved path to ``parse.py`` (or None) + ``"detail"`` — human-readable explanation + """ + mode = runner.config.get("execution_mode", "local") + parse_path = _get_parse_py_path(runner) + source = _read_parse_py_via_runner(runner) + + if source is None: + return { + "status": "not_found", + "mode": mode, + "parse_path": parse_path, + "detail": ( + "Could not read ldscore/parse.py. " + "Add parse_script to your runner config with the explicit path, " + "e.g. '/ldsc/ldscore/parse.py' for Singularity/Docker." + ), + } + + if _PARSE_PY_PATCH_MARKER in source: + return { + "status": "already_patched", + "mode": mode, + "parse_path": parse_path, + "detail": "parse.py already patched (chr_ld[0].columns reindex present) — no action needed.", + } + + # Apply the textual patch + patched = source + applied = False + for old, new in _PARSE_PY_BUGGY_PATTERNS: + if old in patched: + patched = patched.replace(old, new) + applied = True + + if not applied: + return { + "status": "patch_failed", + "mode": mode, + "parse_path": parse_path, + "detail": ( + "Found parse.py but could not locate the expected pd.concat " + "call. The installed LDSC version may differ. " + "Apply the fix from https://github.com/bulik/ldsc/pull/341 manually." + ), + } + + success = _write_parse_py_via_runner(runner, patched) + if success: + return { + "status": "patched", + "mode": mode, + "parse_path": parse_path, + "detail": ( + f"Applied chr_ld[0].columns reindex fix to {parse_path} " + f"(ldsc issue #342 / PR #341) via {mode}." + ), + } + return { + "status": "patch_failed", + "mode": mode, + "parse_path": parse_path, + "detail": ( + "Found and modified parse.py source but could not write it back. " + "Check file permissions (local) or container access (Docker/Singularity)." + ), + } + + + +# --------------------------------------------------------------------------- +# Private helpers +# --------------------------------------------------------------------------- + +def _merge_bedgraph_bedtools(bg: pd.DataFrame) -> pd.DataFrame: + """Merge overlapping intervals using bedtools merge, summing scores.""" + with tempfile.NamedTemporaryFile( + mode="w", suffix=".bed", delete=False + ) as f_in: + tmp_in = f_in.name + # Sort by chr, start + bg_sorted = bg.sort_values(["chr", "start"]).reset_index(drop=True) + bg_sorted.to_csv(f_in, sep="\t", header=False, index=False) + + with tempfile.NamedTemporaryFile( + mode="r", suffix=".bed", delete=False + ) as f_out: + tmp_out = f_out.name + + try: + # bedtools sort (chromosomes may be numeric or "chrN") + sort_result = subprocess.run( + ["bedtools", "sort", "-i", tmp_in], + capture_output=True, text=True, check=True, + ) + # bedtools merge with sum of scores + merge_result = subprocess.run( + ["bedtools", "merge", "-i", "stdin", "-c", "4", "-o", "sum"], + input=sort_result.stdout, + capture_output=True, text=True, check=True, + ) + rows = [] + for line in merge_result.stdout.strip().split("\n"): + if not line: + continue + parts = line.split("\t") + rows.append({ + "chr": parts[0], + "start": int(parts[1]), + "end": int(parts[2]), + "score": float(parts[3]), + }) + return pd.DataFrame(rows, columns=["chr", "start", "end", "score"]) + except subprocess.CalledProcessError as e: + logger.warning(f"bedtools merge failed: {e.stderr}. Falling back to Python merge.") + return _merge_bedgraph_python(bg) + finally: + for f in [tmp_in, tmp_out]: + try: + os.unlink(f) + except Exception: + pass + + +def _merge_bedgraph_python(bg: pd.DataFrame) -> pd.DataFrame: + """Interval merge with score summing — numpy-based, no iterrows.""" + out_chrs, out_starts, out_ends, out_scores = [], [], [], [] + for chrom, grp in bg.groupby("chr", sort=False): + starts = grp["start"].to_numpy(dtype=np.int64) + ends = grp["end"].to_numpy(dtype=np.int64) + scores = grp["score"].to_numpy(dtype=np.float64) + order = np.argsort(starts, kind="stable") + starts, ends, scores = starts[order], ends[order], scores[order] + + # Sweep-line merge + ms, me, msc = starts[0], ends[0], scores[0] + for s, e, sc in zip(starts[1:], ends[1:], scores[1:]): + if s <= me: + me = max(me, e) + msc += sc + else: + out_chrs.append(chrom); out_starts.append(ms) + out_ends.append(me); out_scores.append(msc) + ms, me, msc = s, e, sc + out_chrs.append(chrom); out_starts.append(ms) + out_ends.append(me); out_scores.append(msc) + + return pd.DataFrame({ + "chr": out_chrs, "start": out_starts, + "end": out_ends, "score": out_scores, + }) + + +def _annotate_with_bedtools(bg: pd.DataFrame, bim: pd.DataFrame) -> np.ndarray: + """Annotate BIM SNPs using bedtools intersect.""" + # Write BIM as BED (SNPs are 1-bp intervals) + bim_bed = bim.copy() + bim_bed["_start"] = bim_bed["BP"] - 1 # 0-based + bim_bed["_end"] = bim_bed["BP"] + bim_bed["_chr"] = bim_bed["CHR"].astype(str) + + import tempfile as _tf + # Write BIM as BED using pandas — no per-row Python loop + fa = _tf.NamedTemporaryFile(mode="w", suffix=".bed", delete=False) + bim_out = pd.DataFrame({ + "chr": bim_bed["_chr"], + "start": bim_bed["_start"], + "end": bim_bed["_end"], + "idx": bim_bed.index, + }) + bim_out.to_csv(fa, sep="\t", header=False, index=False) + fa.close() + snp_bed_file = fa.name + + # Write bedgraph using pandas + fb = _tf.NamedTemporaryFile(mode="w", suffix=".bedgraph", delete=False) + bg_out = bg.copy() + bg_out["chr"] = bg_out["chr"].astype(str).str.replace("chr", "", regex=False) + bg_out[["chr", "start", "end", "score"]].to_csv(fb, sep="\t", header=False, index=False) + fb.close() + bg_file = fb.name + + try: + result = subprocess.run( + [ + "bedtools", "intersect", + "-a", snp_bed_file, + "-b", bg_file, + "-wa", "-wb", + ], + capture_output=True, text=True, check=True, + ) + annot_values = np.zeros(len(bim)) + for line in result.stdout.strip().split("\n"): + if not line: + continue + parts = line.split("\t") + snp_idx = int(parts[3]) + score = float(parts[7]) + annot_values[snp_idx] += score + return annot_values + except subprocess.CalledProcessError: + logger.warning("bedtools intersect failed; falling back to Python annotation") + return _annotate_python(bg, bim) + finally: + for f in [snp_bed_file, bg_file]: + try: + os.unlink(f) + except Exception: + pass + + +def _annotate_python(bg: pd.DataFrame, bim: pd.DataFrame) -> np.ndarray: + """Pure-Python SNP annotation from bedgraph.""" + # Normalise chromosome labels + bg = bg.copy() + bg["_chr"] = bg["chr"].astype(str).str.replace("^chr", "", regex=True) + bim = bim.copy() + bim["_chr"] = bim["CHR"].astype(str) + + annot_values = np.zeros(len(bim)) + + for chrom, bg_chrom in bg.groupby("_chr"): + snp_mask = bim["_chr"] == str(chrom) + if not snp_mask.any(): + continue + snp_indices = np.where(snp_mask)[0] + snp_pos = bim.loc[snp_mask, "BP"].values + starts = bg_chrom["start"].values + ends = bg_chrom["end"].values + scores = bg_chrom["score"].values + + # Vectorised lookup + for idx, pos in zip(snp_indices, snp_pos): + hits = np.where((starts <= pos) & (pos < ends))[0] + if hits.size > 0: + annot_values[idx] = scores[hits].sum() + + return annot_values + + +def _write_zero_annotation(bim_file: Union[str, Path], out_prefix: str) -> Path: + """Write an all-zero annotation file for a chromosome with no coverage.""" + bim = pd.read_csv( + bim_file, sep=r"\s+", header=None, + names=["CHR", "SNP", "CM", "BP", "A1", "A2"], + ) + out_path = Path(f"{out_prefix}.annot.gz") + annot = pd.DataFrame({"ANNOT": np.zeros(len(bim), dtype=np.float64)}) + annot.to_csv(out_path, sep="\t", index=False, compression="gzip") + return out_path + + +def _parse_ldsc_results_file(results_file: Path) -> Optional[dict]: + """ + Parse an LDSC ``.results`` file written by ``--h2 --overlap-annot``. + + The file is tab-separated with one row per annotation category. The + **last row** is always the program-specific annotation (``L2_1``). + + Columns used: + ``Enrichment``, ``Enrichment_std_error``, ``Coefficient``, + ``Coefficient_std_error``, ``Coefficient_z-score``. + """ + if not results_file.exists(): + return None + try: + df = pd.read_csv(results_file, sep="\t") + except Exception: + return None + + if df.empty: + return None + + row = df.iloc[-1] + + result: dict = {} + for src_col, dst_col in [ + ("Enrichment", "Enrichment"), + ("Enrichment_std_error", "Enrichment_std_error"), + ("Coefficient", "Coefficient"), + ("Coefficient_std_error", "Coefficient_std_error"), + ("Coefficient_z-score", "Coefficient_z_score"), + ]: + if src_col in df.columns: + try: + result[dst_col] = float(row[src_col]) + except (ValueError, TypeError): + pass + + if "Enrichment" in result and "Enrichment_std_error" in result: + e = result["Enrichment"] + se = result["Enrichment_std_error"] + result["Enrichment_z_score"] = e / (se + 1e-12) + + return result if result else None + + +def _parse_ldsc_log(log_file: Path) -> Optional[dict]: + """ + Parse an LDSC ``.log`` file and its companion ``.results`` file. + + LDSC writes enrichment/coefficient statistics to ``.results`` + (tab-separated, one row per annotation) when ``--overlap-annot`` is used, + and runtime metadata to ``.log``. This function reads both. + + Always extracted from ``.log`` (if present): + ``h2_obs``, ``h2_obs_se`` — observed-scale heritability. + + Extracted from ``.results`` (when ``--overlap-annot`` succeeded): + ``Enrichment``, ``Enrichment_std_error``, ``Enrichment_z_score``, + ``Coefficient``, ``Coefficient_std_error``, ``Coefficient_z_score``. + + Returns ``None`` only if the file is missing or entirely unparseable. + """ + if not log_file.exists(): + return None + + import re as _re + text = log_file.read_text() + + result: dict = {} + + h2_match = _re.search( + r"Total Observed scale h2:\s*([\-\d.eE+]+)\s*\(([\d.eE+]+)\)", text + ) + if h2_match: + result["h2_obs"] = float(h2_match.group(1)) + result["h2_obs_se"] = float(h2_match.group(2)) + + # .results file lives next to the .log file + results_file = log_file.with_suffix(".results") + stats = _parse_ldsc_results_file(results_file) + if stats: + result.update(stats) + + if not result: + logger.warning(f"Could not parse any statistics from {log_file}") + return None + + return result + + +def _is_float(s: str) -> bool: + """Return True if *s* can be converted to a float.""" + try: + float(s) + return True + except ValueError: + return False + + +def _safe_filename(s: str) -> str: + """Convert a string to a filesystem-safe filename (spaces → underscores, special chars stripped).""" + s = str(s).strip().replace(" ", "_") + return re.sub(r"[^\w.\-+]+", "_", s) \ No newline at end of file From 2516c91653357765b1153e4d3cc8da49571ad300 Mon Sep 17 00:00:00 2001 From: Lucas Arnoldt Date: Tue, 19 May 2026 03:50:09 +0200 Subject: [PATCH 5/6] scDRS fix --- src/cellink/tl/external/_scdrs.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/cellink/tl/external/_scdrs.py b/src/cellink/tl/external/_scdrs.py index 6d0504a..dc6699a 100644 --- a/src/cellink/tl/external/_scdrs.py +++ b/src/cellink/tl/external/_scdrs.py @@ -149,17 +149,27 @@ def run_scdrs( logger.info("Filtering cells and genes") sc.pp.filter_cells(adata, min_genes=min_genes) sc.pp.filter_genes(adata, min_cells=min_cells) - + if "log1p" not in adata.uns_keys(): - logger.info("Log-normalizing data") + adata.layers["counts"] = adata.X + logger.info("Normalizing and log-transforming data") sc.pp.normalize_total(adata, target_sum=1e4) sc.pp.log1p(adata) + else: + if not "counts" in adata.layers: + raise ValueError( + "adata appears already normalized (log1p in uns) but adata.layers['counts'] does not exist. " + "Please pass raw counts, or set adata.layers['counts'] before normalizing." + ) if "X_pca" not in adata.obsm_keys(): logger.info(f"Computing PCA with {n_pcs} components") sc.pp.highly_variable_genes(adata, n_top_genes=2000) + adata = adata[:, adata.var.highly_variable] sc.pp.scale(adata, max_value=10) sc.tl.pca(adata, n_comps=n_pcs) + + adata.X = adata.layers["counts"] covariate_list = [] From b09313232e0dcffe84c14605a6fb4c85054f8c34 Mon Sep 17 00:00:00 2001 From: Lucas Arnoldt Date: Wed, 27 May 2026 01:58:14 +0200 Subject: [PATCH 6/6] sclinker implementation --- ...tutorial_sclinker.ipynb => sclinker.ipynb} | 432 +++--------------- src/cellink/tl/external/__init__.py | 11 - src/cellink/tl/external/_joint_nmf.py | 43 +- src/cellink/tl/external/_sclinker.py | 132 +----- src/cellink/tl/external/_sclinker_utils.py | 377 +-------------- 5 files changed, 102 insertions(+), 893 deletions(-) rename docs/tutorials/{tutorial_sclinker.ipynb => sclinker.ipynb} (98%) diff --git a/docs/tutorials/tutorial_sclinker.ipynb b/docs/tutorials/sclinker.ipynb similarity index 98% rename from docs/tutorials/tutorial_sclinker.ipynb rename to docs/tutorials/sclinker.ipynb index b56edfd..6448d56 100644 --- a/docs/tutorials/tutorial_sclinker.ipynb +++ b/docs/tutorials/sclinker.ipynb @@ -8,48 +8,23 @@ "# Tutorial: sc-linker via cellink\n", "**Integrating single-cell RNA-seq, epigenomics and GWAS to identify disease-critical cell types**\n", "\n", - "Based on: Jagadeesh\\*, Dey\\* et al., *Nature Genetics* 2022.\n", - "\n", - "Please read the **Pipeline architecture** section before running — sc-linker has several details that differ from simpler methods like scDRS or Seismic.\n", - "\n", - "---\n", - "\n", - "## Pipeline architecture\n", - "\n", - "```\n", - "scRNA-seq\n", - " │\n", - " ▼ Step 1 (cellink)\n", - "Gene programs (genes × cell_types, scores in [0,1])\n", - " │\n", - " ▼ Step 2 (cellink → bedtools)\n", - "Bedgraphs — two strategies per program:\n", - " a) ABC_Road_{tissue} : gene_score × enhancer_activity (Roadmap ∪ ABC links)\n", - " b) 100kb : gene_score × 1 (gene body ± 100 kb window)\n", - " │\n", - " ▼ Step 2c (cellink → bedtools intersect)\n", - "SNP annotations (.annot.gz, one per chromosome × program × strategy)\n", - " │\n", - " ▼ Step 2d (cellink → ldsc.py --l2)\n", - "LD scores (.l2.ldscore.gz, one per chromosome × program × strategy)\n", - " │\n", - " ▼ Step 3 (cellink → ldsc.py --h2 --overlap-annot)\n", - "Heritability enrichment (.log, one per program × strategy × trait)\n", - " │\n", - " ▼ Step 4 (cellink)\n", - "E-score = Enrichment(program) − Enrichment(AllCoding_control)\n", - "```\n", - "\n", - "**Key differences from scDRS / Seismic**\n", - "- Uses S-LDSC `--h2` (partitioned heritability), **not** `--h2-cts` (cell-type-specific mode)\n", - "- Requires `bedtools` binary for interval merging\n", - "- Always runs two SNP-to-gene strategies: ABC\\_Road (primary) and 100 kb (secondary)\n", - "- E-score is computed in postprocessing, not directly from LDSC output\n", - "- Requires ~5–50 GB of reference data from the Broad server\n", - "\n", - "**Known LDSC bug affecting sc-linker** (ldsc issue [#342](https://github.com/bulik/ldsc/issues/342), PR [#341](https://github.com/bulik/ldsc/pull/341)): \n", - "When running `--h2 --overlap-annot` with multiple chromosomes, newer pandas versions cause an `IndexError` in `ldsc/ldscore/parse.py`. \n", - "If you hit this crash, see the **Troubleshooting** section at the end of the notebook." + "sc-linker was developed by [Jagadeesh et al. 2022](https://www.nature.com/articles/s41588-022-01187-9). sc-linker first generates gene programs (genes × cell_types, scores in [0,1]) from scRNA-seq data. It then generates bedgraphs using enhancer activity and a 100kb window. These bedgraphs are intersected with SNP annotations. LDSC is then used for heritability estimation. The interpretable result is the E-score, which is the difference of the nrichment of the gene program and the enrichment of a control. Use > 50 donors for the scRNA-seq and a well-powered GWAS." + ] + }, + { + "cell_type": "markdown", + "id": "84da74fa", + "metadata": {}, + "source": [ + "## Environment" + ] + }, + { + "cell_type": "markdown", + "id": "f81d24a0", + "metadata": {}, + "source": [ + "sc-linker requires an installation of bedtools, e.g. via [conda](https://anaconda.org/channels/bioconda/packages/bedtools/overview) (conda install bioconda::bedtools)." ] }, { @@ -119,7 +94,6 @@ " compute_ld_scores_for_sclinker,\n", " load_sclinker_heritability_results,\n", " compute_escore,\n", - " compute_ppi_gene_scores,\n", " run_sclinker_heritability,\n", ")\n", "\n", @@ -136,9 +110,7 @@ "id": "dc117b77", "metadata": {}, "source": [ - "---\n", - "\n", - "## Part 0: Configure the LDSC runner\n", + "## Configure the LDSC runner\n", "\n", "cellink runs LDSC through a **Singularity** container via `LDSCRunner`." ] @@ -158,8 +130,6 @@ " \"make_annot_command\": \"/ldsc/make_annot.py\",\n", " \"munge_command\": \"/ldsc/munge_sumstats.py\",\n", "\n", - " # Path to parse.py inside the container — used by the troubleshooting\n", - " # cell at the end of this notebook if you hit an IndexError.\n", " \"parse_script\": \"/ldsc/ldscore/parse.py\",\n", "\n", " \"singularity_patch_strategy\": \"overlay\",\n", @@ -171,9 +141,7 @@ "id": "9a720dae", "metadata": {}, "source": [ - "---\n", - "\n", - "## Part 1: Download reference files\n", + "## Download reference files\n", "\n", "### 1a: 1000 Genomes LD scores, weights and PLINK files\n", "\n", @@ -216,35 +184,27 @@ } ], "source": [ - "# All 1000G files come from Zenodo (https://zenodo.org/records/10515792)\n", - "\n", "_cfg = \"../../src/cellink/resources/config/1000genomes.yaml\"\n", "\n", - "# Baseline LD scores (--ref-ld-chr)\n", "ld_scores_dir, ld_prefix = get_1000genomes_ld_scores(population=\"EUR\", return_path=True, config_path=_cfg)\n", "print(f\"LD scores : {ld_scores_dir} (prefix: {ld_prefix})\")\n", "\n", - "# Regression weights (--w-ld-chr)\n", "ld_weights_dir, weights_prefix = get_1000genomes_ld_weights(population=\"EUR\", return_path=True, config_path=_cfg)\n", "print(f\"LD weights : {ld_weights_dir} (prefix: {weights_prefix})\")\n", "\n", - "# PLINK files (BIM/BED/FAM — for computing per-annotation LD scores)\n", "plink_dir, plink_prefix = get_1000genomes_plink_files(population=\"EUR\", config_path=_cfg)\n", "print(f\"PLINK : {plink_dir} (prefix: {plink_prefix})\")\n", "\n", - "# Allele frequency files (--frqfile-chr, required with --overlap-annot)\n", "frq_dir, frq_prefix = get_1000genomes_frq(population=\"EUR\", config_path=_cfg)\n", "print(f\"Frq files : {frq_dir} (prefix: {frq_prefix})\")\n", "\n", - "# HapMap3 SNP list (--print-snps, restricts LD score computation)\n", "hapmap3_snps = get_1000genomes_hapmap3(config_path=_cfg)\n", "print(f\"HapMap3 : {hapmap3_snps}\")\n", "\n", - "# Prefix strings for ldsc.py\n", - "ref_ld_chr = str(ld_scores_dir / ld_prefix) # e.g. .../baselineLD.\n", - "w_ld_chr = str(ld_weights_dir / weights_prefix) # e.g. .../weights.hm3_noMHC.\n", - "bim_prefix = str(plink_dir / plink_prefix) # e.g. .../1000G.EUR.QC.\n", - "frq_chr = str(frq_dir / frq_prefix) # e.g. .../1000G.EUR.QC.\n", + "ref_ld_chr = str(ld_scores_dir / ld_prefix) \n", + "w_ld_chr = str(ld_weights_dir / weights_prefix) \n", + "bim_prefix = str(plink_dir / plink_prefix) \n", + "frq_chr = str(frq_dir / frq_prefix) \n", "\n", "print(f\"\\nref_ld_chr : {ref_ld_chr}\")\n", "print(f\"w_ld_chr : {w_ld_chr}\")\n", @@ -303,9 +263,7 @@ "id": "40640439", "metadata": {}, "source": [ - "---\n", - "\n", - "## Part 2: Step 1 — Gene programs\n", + "## Gene programs\n", "\n", "### 2a: Cell-type programs" ] @@ -357,34 +315,26 @@ "print(f\"Cells: {adata.n_obs:,}\")\n", "\n", "# If var_names are ENSG IDs (e.g. ENSG00000...), the Roadmap/ABC TargetGene\n", - "# column uses HGNC names — they won't match. Add gene_name to adata.var if\n", - "# not already present. The dummy OneK1K dataset uses ENSG IDs as var_names.\n", + "# column uses HGNC names, so there will be a mismatch.\n", "if adata.var_names.str.startswith(\"ENSG\").mean() > 0.5:\n", " if \"gene_name\" not in adata.var.columns:\n", - " # Try common column names\n", " for col in [\"gene_names\", \"feature_name\", \"gene_symbol\", \"Symbol\"]:\n", " if col in adata.var.columns:\n", " adata.var[\"gene_name\"] = adata.var[col]\n", " print(f\"Using adata.var['{col}'] as gene_name\")\n", " break\n", " else:\n", - " # Last resort: query BioMart (slow, cached after first call)\n", " print(\"var_names are ENSG IDs — querying BioMart for HGNC mapping ...\")\n", " from cellink.tl.external import get_gene_annotation\n", " gene_coord = get_gene_annotation(gene_id_type=\"ensembl\")\n", " import pandas as _pd\n", " ensg2hgnc = _pd.read_csv(gene_coord, sep=\" \")[[\"GENE\"]].copy()\n", - " # get_gene_annotation with ensembl gives ENSG→coord, we need ENSG→HGNC\n", - " # Use the HGNC file instead\n", " hgnc_coord = get_gene_annotation(gene_id_type=\"hgnc\")\n", - " # Build map from ensembl cache that was written alongside hgnc\n", " from pathlib import Path\n", " from cellink.resources._utils import get_data_home\n", " ensembl_f = get_data_home() / \"gene_coord_ensembl.txt\"\n", " hgnc_f = get_data_home() / \"gene_coord_hgnc.txt\"\n", " if ensembl_f.exists() and hgnc_f.exists():\n", - " # Both files are 1:1 gene lists in the same order from BioMart\n", - " # Re-query to get the proper ENSG→HGNC map\n", " from cellink.tl.external._sldsc_utils import _query_biomart_and_write_gene_coords\n", " _query_biomart_and_write_gene_coords(get_data_home())\n", " print(\" Done. HGNC names will be applied automatically in compute_celltype_programs.\")\n", @@ -440,6 +390,14 @@ "### 2c: NMF cellular process programs" ] }, + { + "cell_type": "markdown", + "id": "775ed242", + "metadata": {}, + "source": [ + "If you have a GPU available, you may install `torchnmf` via `pip install torchnmf`. Alternatively the scikit-learn NMF implementation is used, which is substantially slower. " + ] + }, { "cell_type": "code", "execution_count": 7, @@ -478,14 +436,6 @@ } ], "source": [ - "# Backend priority:\n", - "# 1. torchnmf (GPU) — install with: pip install torchnmf\n", - "# device='cuda' → uses GPU if available, warns and falls back to CPU if not\n", - "# device='cpu' → forces CPU even if GPU is present\n", - "# 2. sklearn NMF (init='nndsvda', solver='cd') — always available, slower\n", - "#\n", - "# On a (125k × 34k) matrix torchnmf on GPU is ~10-20x faster than sklearn.\n", - "# If you are on CPU only, set device='cpu' to suppress the CUDA warning.\n", "W, H, corr = compute_nmf_programs(\n", " adata,\n", " celltype_col=\"predicted.celltype.l2\",\n", @@ -544,12 +494,9 @@ " index=gene_annot[\"GENE\"].str.upper(),\n", " columns=[\"AllCoding\"],\n", ")\n", - "# Drop any duplicate gene names in the AllCoding index before concat\n", "all_coding = all_coding[~all_coding.index.duplicated(keep=\"first\")]\n", "\n", - "# Combine cell-type programs + AllCoding; deduplicate index\n", "all_genescores = pd.concat([genescores, all_coding], axis=1).fillna(0)\n", - "# Drop duplicate rows (same gene appearing twice in either source)\n", "all_genescores = all_genescores[~all_genescores.index.duplicated(keep=\"first\")]\n", "print(f\"Programs including AllCoding: {all_genescores.shape[1]} total\")\n", "print(f\"Unique genes: {all_genescores.shape[0]:,}\")" @@ -560,13 +507,11 @@ "id": "77e12974", "metadata": {}, "source": [ - "---\n", - "\n", - "## Part 3: Step 2 — Gene programs → SNP annotations\n", + "## Step 2: Gene programs -> SNP annotations\n", "\n", - "This creates **two strategies** per program, matching the sc-linker paper:\n", - "- `ABC_Road_BLD/` — Roadmap ∪ ABC enhancer-gene links, weighted by activity (primary)\n", - "- `100kb/` — gene body ± 100 kb window, uniform weight (secondary)\n", + "This creates two strategies per program:\n", + "- `ABC_Road_BLD/`: Roadmap ∪ ABC enhancer-gene links, weighted by activity (primary)\n", + "- `100kb/`: gene body ± 100 kb window, uniform weight (secondary)\n", "\n", "Requires `bedtools` in `PATH` for correct interval merging." ] @@ -884,8 +829,6 @@ } ], "source": [ - "# Pass tissue= to filter the combined file to the relevant tissue.\n", - "# Pass eid_map_file= so Roadmap EIDs are mapped correctly.\n", "roadmap = load_roadmap_links(\n", " enhancer_refs[\"roadmap\"],\n", " tissue=\"BLD\",\n", @@ -895,7 +838,6 @@ " enhancer_refs[\"abc\"],\n", " tissue=\"BLD\",\n", ")\n", - "# cellink auto-locates ENSG_coord.txt (container, cache, or downloads from GitHub)\n", "gene_annot = load_gene_annotation()\n", "\n", "annotation_prefixes = genescores_to_annotations(\n", @@ -950,9 +892,7 @@ "id": "cdfc05a9", "metadata": {}, "source": [ - "---\n", - "\n", - "## Part 4: Step 3a — Compute LD scores\n", + "## Step 3a: Compute LD scores\n", "\n", "One `.l2.ldscore.gz` file per chromosome per program per strategy.\n", "This loops over all (program, strategy, chromosome) combinations and calls\n", @@ -18405,20 +18345,13 @@ } ], "source": [ - "# ── LD score computation ──────────────────────────────────────────────────────\n", - "# LDSC --h2 always reads chromosomes 1-22; you cannot pass a subset.\n", - "# n_jobs parallelises across chromosomes — use as many as your node has cores.\n", - "# With n_jobs=4: AllCoding × 2 strategies × 22 chr ≈ 11 parallel batches ≈ ~15 min.\n", - "# For a full analysis with many programs, increase n_jobs or submit as a batch job.\n", - "# ─────────────────────────────────────────────────────────────────────────────\n", - "\n", "ld_prefixes = compute_ld_scores_for_sclinker(\n", " annotation_prefixes=annotation_prefixes,\n", " bim_prefix=bim_prefix,\n", " ld_scores_dir=os.path.abspath(\"ldscores\"),\n", " hapmap3_snps_file=str(hapmap3_snps),\n", - " chromosomes=list(range(1, 23)), # all 22 chromosomes — required by LDSC\n", - " n_jobs=28, # increase if more cores are available\n", + " chromosomes=list(range(1, 23)), \n", + " n_jobs=28, \n", " runner=runner,\n", ")\n", "print(list(ld_prefixes.keys()))" @@ -18461,12 +18394,10 @@ "id": "9427c051", "metadata": {}, "source": [ - "---\n", - "\n", - "## Part 5: Munge GWAS summary statistics\n", + "## Munge GWAS summary statistics\n", "\n", "Converts raw GWAS sumstats into the `.sumstats.gz` format expected by LDSC.\n", - "Uses cellink's existing `munge_sumstats` wrapper from `_ldsc.py`." + "Uses cellink's existing `munge_sumstats` wrapper from `_ldsc.py`. We use IBD (Inflammatory Bowel Disease, GCST006250), a large immune-mediated trait with N=~86k, harmonised summary stats from the GWAS catalog." ] }, { @@ -18494,34 +18425,16 @@ } ], "source": [ - "# ── Download a real GWAS for the tutorial ────────────────────────────────────\n", - "# We use IBD (Inflammatory Bowel Disease, GCST006250, de Lange et al. 2017)\n", - "# — a large immune-mediated trait with N~86k, harmonised summary stats with\n", - "# rsIDs, and strong signal in blood cell types (relevant to OneK1K dataset).\n", - "#\n", - "# The GWAS catalog harmonised file has these key columns:\n", - "# rsid → SNP identifier (passed to munge as --snp rsid)\n", - "# effect_allele → A1\n", - "# other_allele → A2\n", - "# beta → effect size\n", - "# standard_error → SE\n", - "# p_value → p-value\n", - "# n → per-variant N (or use n_samples below if absent)\n", - "#\n", - "# get_gwas_catalog_study_summary_stats() downloads the harmonised .tsv.gz\n", - "# and returns a DataFrame; return_path=True gives just the file path.\n", "from cellink.resources import get_gwas_catalog_study_summary_stats\n", "\n", - "GWAS_ACCESSION = \"GCST006250\" # IBD GWAS — change for a different trait\n", - "N_SAMPLES = 86640 # approximate total N for this study\n", + "GWAS_ACCESSION = \"GCST006250\"\n", + "N_SAMPLES = 86640 \n", "\n", "gwas_path = get_gwas_catalog_study_summary_stats(\n", " GWAS_ACCESSION,\n", " return_path=True,\n", ")\n", - "print(f\"Downloaded GWAS to: {gwas_path}\")\n", - "\n", - "# Peek at the columns to confirm rsid is present\n" + "print(f\"Downloaded GWAS to: {gwas_path}\")" ] }, { @@ -18666,12 +18579,6 @@ "import os\n", "os.makedirs(\"sumstats\", exist_ok=True)\n", "\n", - "# merge_alleles is OPTIONAL — it restricts the munged sumstats to SNPs in a\n", - "# reference panel file with columns SNP, A1, A2. The hm3_no_MHC.list.txt\n", - "# from get_1000genomes_hapmap3() is a bare rsID list (no alleles), so it\n", - "# cannot be used for --merge-alleles. For sc-linker this is fine: the\n", - "# --print-snps flag already restricted LD scores to HapMap3 SNPs during\n", - "# compute_ld_scores_for_sclinker, so no further filtering is needed here.\n", "munge_sumstats(\n", " sumstats_file=\"gwas_sumstat_ibd_filt.tsv.gz\",\n", " out_prefix=\"ibd\",\n", @@ -18679,7 +18586,7 @@ " snp_col=\"hm_rsid\",\n", " a1_col=\"hm_effect_allele\",\n", " a2_col=\"hm_other_allele\",\n", - " signed_sumstats=(\"hm_beta\", 0), # string format: \"colname,null_value\"\n", + " signed_sumstats=(\"hm_beta\", 0), \n", " p_col=\"p_value\",\n", " runner=runner,\n", ")" @@ -19239,10 +19146,6 @@ } ], "source": [ - "# annotation_prefixes is required so LDSC --overlap-annot can find the\n", - "# .annot.gz files alongside the LD scores (cellink symlinks them automatically).\n", - "# sumstats path must be absolute so it survives Singularity bind-mount remapping.\n", - "import os\n", "sumstats_abs = os.path.abspath(\"ibd.sumstats.gz\")\n", "\n", "sldsc_log_paths = run_sclinker_heritability(\n", @@ -19251,61 +19154,28 @@ " ref_ld_chr=ref_ld_chr,\n", " w_ld_chr=w_ld_chr,\n", " out_dir=os.path.abspath(\"sldsc_results\"),\n", - " annotation_prefixes=annotation_prefixes, # for --overlap-annot symlink\n", + " annotation_prefixes=annotation_prefixes, \n", " frqfile_chr=frq_chr,\n", " runner=runner,\n", ")" ] }, { - "cell_type": "code", - "execution_count": 19, - "id": "02093864", - "metadata": {}, - "outputs": [], - "source": [ - "os.makedirs(\"sldsc_results\", exist_ok=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "020e5758", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/ictstr01/groups/ml01/workspace/lucas.arnoldt/projects/cellink/docs/tutorials'" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "os.getcwd()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "43222434", + "cell_type": "markdown", + "id": "e9e6d35a", "metadata": {}, - "outputs": [], "source": [ - "#/data/ldscores/B_naive_L2/ABC_Road_BLD/B_naive_L2.1.annot[./gz/bz2]" + "## Load results and compute E-scores" ] }, { "cell_type": "markdown", - "id": "e9e6d35a", + "id": "40646704", "metadata": {}, "source": [ - "---\n", + "[Jagadeesh et al. 2022](https://www.nature.com/articles/s41588-022-01187-9) interpret scores with E-score > 2 as significant. This is the excess enrichment of the program annotation over all-protein-coding genes linked via the same enhancer strategy. Both ABC\\_Road and 100 kb results should be reported since ABC\\_Road is primary while 100 kb is a less biologically-informed comparison. Concordance between the two supports the result.\n", "\n", - "## Part 7: Load results and compute E-scores" + "Please note that the presented results are for demonstration purposes only since we are working with a subsampled OneK1K dataset." ] }, { @@ -19366,14 +19236,7 @@ "id": "760d5d76", "metadata": {}, "source": [ - "---\n", - "\n", - "## Part 8: Visualise results\n", - "\n", - "Bubble plot replicating sc-linker Fig. 2d style:\n", - "- dot **size** ∝ E-score\n", - "- dot **opacity** ∝ significance (|E_score_z|)\n", - "- dashed threshold at E-score = 2 (paper significance cutoff)" + "## Visualise results" ] }, { @@ -19454,193 +19317,6 @@ " print(\"Top enrichments:\")\n", " print(top[[\"program\",\"trait\",\"E_score\",\"E_score_z\"]].to_string(index=False))" ] - }, - { - "cell_type": "markdown", - "id": "11399517", - "metadata": {}, - "source": [ - "---\n", - "\n", - "## Part 9: PPI gene scores (optional)\n", - "\n", - "The GSSG pipeline supports propagating gene scores through a protein-protein\n", - "interaction network (Random Walk with Restart) to create PPI-informed programs.\n", - "This ports `ppi_RWR.R` / `ppi_string_RWR.R` from `code/calc_PPI_scores/`." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "cd69a0dc", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PPI network: 473,860 edges, 16,201 unique genes\n", - "Seed genes for 'B naive_L2': 892\n", - "\n", - "Top PPI-propagated genes:\n", - "HLA-DRB1 0.000964\n", - "IGHD 0.000964\n", - "FCGR2B 0.000963\n", - "FAU_ENSG00000149806 0.000961\n", - "DUSP1_ENSG00000120129 0.000961\n", - "PNOC_ENSG00000168081 0.000961\n", - "AFF3_ENSG00000144218 0.000961\n", - "CAMK1D_ENSG00000183049 0.000961\n", - "COBLL1_ENSG00000082438 0.000961\n", - "LSM10_ENSG00000181817 0.000961\n", - "APPL1_ENSG00000157500 0.000961\n", - "CAMK2D_ENSG00000145349 0.000961\n", - "LIMD2_ENSG00000136490 0.000961\n", - "SPI1_ENSG00000066336 0.000961\n", - "HLA-DOB_ENSG00000241106 0.000961\n", - "QRSL1_ENSG00000130348 0.000961\n", - "CALHM6_ENSG00000188820 0.000961\n", - "CAPG_ENSG00000042493 0.000961\n", - "HLA-DPA1_ENSG00000231389 0.000961\n", - "SPIB_ENSG00000269404 0.000961\n" - ] - } - ], - "source": [ - "import os, pandas as pd\n", - "\n", - "ppi_links_file = \"9606.protein.links.v12.0.txt.gz\"\n", - "ppi_info_file = \"9606.protein.info.v12.0.txt.gz\"\n", - "\n", - "if not os.path.exists(ppi_links_file):\n", - " print(f\"PPI file not found: {ppi_links_file}\")\n", - " print(\"Download from https://string-db.org/cgi/download?species_text=Homo+sapiens\")\n", - "else:\n", - " ppi_raw = pd.read_csv(ppi_links_file, sep=\" \")\n", - " ppi_info = pd.read_csv(ppi_info_file, sep=\"\\t\")\n", - "\n", - " # STRING v12 info file: first column may have a '#' prefix in the header\n", - " ppi_info.columns = [c.lstrip(\"#\").strip() for c in ppi_info.columns]\n", - "\n", - " # Map STRING protein ID → HGNC gene name\n", - " id_col = \"string_protein_id\" # e.g. \"9606.ENSP00000000233\"\n", - " name_col = \"preferred_name\"\n", - " id2gene = ppi_info.set_index(id_col)[name_col].to_dict()\n", - "\n", - " ppi = pd.DataFrame({\n", - " \"gene1\": ppi_raw[\"protein1\"].map(id2gene),\n", - " \"gene2\": ppi_raw[\"protein2\"].map(id2gene),\n", - " \"score\": ppi_raw[\"combined_score\"],\n", - " }).dropna(subset=[\"gene1\", \"gene2\"])\n", - " ppi = ppi[ppi[\"score\"] >= 700]\n", - " print(f\"PPI network: {len(ppi):,} edges, {ppi['gene1'].nunique():,} unique genes\")\n", - "\n", - " top_program = genescores.columns[0]\n", - " seed = genescores[top_program]\n", - " seed = seed[seed > 0.3]\n", - " print(f\"Seed genes for '{top_program}': {len(seed)}\")\n", - "\n", - " ppi_scores = compute_ppi_gene_scores(\n", - " seed_genes=seed,\n", - " ppi_network=ppi,\n", - " restart_prob=0.7,\n", - " source_col=\"gene1\",\n", - " target_col=\"gene2\",\n", - " weight_col=\"score\",\n", - " )\n", - " print(\"\\nTop PPI-propagated genes:\")\n", - " print(ppi_scores.sort_values(ascending=False).head(20).to_string())" - ] - }, - { - "cell_type": "markdown", - "id": "e77ca733", - "metadata": {}, - "source": [ - "---\n", - "\n", - "## Comparison: sc-linker vs scDRS vs Seismic\n", - "\n", - "| | **sc-linker** | **scDRS** | **Seismic** |\n", - "|---|---|---|---|\n", - "| Resolution | Cell-type programs | Individual cells | Cell types |\n", - "| GWAS input | Sumstats + baseline-LD | MAGMA gene scores | MAGMA gene scores |\n", - "| SNP-to-gene | Roadmap + ABC enhancers | Top-N MAGMA genes | Specificity scores |\n", - "| S-LDSC mode | `--h2 --overlap-annot` | — | — |\n", - "| Key output | E-score per (program, trait) | Norm score per cell | p-value per (cell type, trait) |\n", - "| Extra deps | `bedtools`, LDSC binary | `scdrs` package | R + `seismicGWAS` |\n", - "| Ref data | ~10–50 GB from Broad | None extra | None extra |\n", - "| Best for | Heritability-based causal program ID | Cell-level scoring & heterogeneity | Fast cell-type associations |\n", - "\n", - "---\n", - "\n", - "## Notes on interpretation\n", - "\n", - "**E-score threshold** — the paper uses E-score > 2 as significance. This is the excess enrichment of the program annotation over all-protein-coding genes linked via the same enhancer strategy.\n", - "\n", - "**Two strategies** — always report both ABC\\_Road and 100 kb. ABC\\_Road is primary; 100 kb is a less biologically-informed comparison. Concordance between the two supports the result.\n", - "\n", - "**Disease-dependent vs healthy programs** — for epithelial diseases (UC, asthma), disease-dependent programs are typically more enriched; for immune diseases, both healthy and disease programs tend to be enriched.\n", - "\n", - "**Sample size** — use > 50 donors for the scRNA-seq and a well-powered GWAS (z-score for h² > 5)." - ] - }, - { - "cell_type": "markdown", - "id": "0d75e73b", - "metadata": {}, - "source": [ - "---\n", - "\n", - "## Troubleshooting: IndexError in `_check_variance` during `--h2 --overlap-annot`\n", - "\n", - "If you see an error like:\n", - "\n", - "```\n", - "IndexError: boolean index did not match indexed array along dimension 1;\n", - "dimension is 11 but corresponding boolean dimension is 10\n", - "```\n", - "\n", - "this is a known pandas compatibility bug in `ldscore/parse.py` (ldsc\n", - "[issue #342](https://github.com/bulik/ldsc/issues/342),\n", - "[PR #341](https://github.com/bulik/ldsc/pull/341/files)).\n", - "\n", - "**What causes it:** `pd.concat(chr_ld)` reorders annotation columns\n", - "alphabetically across chromosomes in newer pandas versions. When your\n", - "annotation column name sorts before the baseline columns, `_check_variance()`\n", - "receives a mismatched array and crashes.\n", - "\n", - "**The fix** (one line in `ldscore/parse.py` ~line 147):\n", - "```python\n", - "# before\n", - "x = pd.concat(chr_ld)\n", - "# after\n", - "x = pd.concat(chr_ld)[chr_ld[0].columns]\n", - "```\n", - "\n", - "Run the cell below to apply it automatically. It is safe to call repeatedly\n", - "(idempotent). Only apply it if you actually hit the error." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1461742", - "metadata": {}, - "outputs": [], - "source": [ - "from cellink.tl.external import check_and_patch_ldsc_parse_bug\n", - "\n", - "result = check_and_patch_ldsc_parse_bug(runner)\n", - "print(f\"status : {result['status']}\")\n", - "print(f\"mode : {result['mode']}\")\n", - "print(f\"parse_path : {result['parse_path']}\")\n", - "print(f\"detail : {result['detail']}\")\n", - "# status: already_patched → already fixed, nothing to do\n", - "# status: patched → fix applied successfully\n", - "# status: patch_failed → check parse_script path and Singularity permissions\n", - "# status: not_found → parse_script path wrong or image not accessible" - ] } ], "metadata": { diff --git a/src/cellink/tl/external/__init__.py b/src/cellink/tl/external/__init__.py index fae51e6..4e91d3e 100644 --- a/src/cellink/tl/external/__init__.py +++ b/src/cellink/tl/external/__init__.py @@ -37,14 +37,6 @@ compute_diseaseprogression_programs, compute_nmf_programs, compute_joint_nmf_programs, - #geneprogram_to_bedgraph, - #bedgraph_to_snp_annotation, - #run_sclinker_sldsc, - #load_sclinker_results, - #compute_escore, - #download_sclinker_references, - #load_enhancer_links, - #run_sclinker_pipeline, ) from ._sclinker_utils import ( run_sclinker_heritability, @@ -58,10 +50,8 @@ bedgraph_to_snp_annotation, genescores_to_annotations, compute_ld_scores_for_sclinker, - #run_sclinker_sldsc, load_sclinker_heritability_results, compute_escore, - compute_ppi_gene_scores, check_and_patch_ldsc_parse_bug, ) from ._joint_nmf import JointNMFWrapper @@ -109,7 +99,6 @@ "run_sclinker_sldsc", "load_sclinker_heritability_results", "compute_escore", - "compute_ppi_gene_scores", "check_and_patch_ldsc_parse_bug", ### "JointNMFWrapper", diff --git a/src/cellink/tl/external/_joint_nmf.py b/src/cellink/tl/external/_joint_nmf.py index 8cfe5e8..c1bf248 100644 --- a/src/cellink/tl/external/_joint_nmf.py +++ b/src/cellink/tl/external/_joint_nmf.py @@ -1,20 +1,3 @@ -""" -Pure-Python port of the Joint NMF implementation from the sc-linker paper. - -Reference: Jagadeesh*, Dey* et al., Nature Genetics 2022. -Original code: https://github.com/karthikj89/scgenetics/blob/master/jointNMF.py - -The JointNMFWrapper class provides a clean interface over the original -multiplicative update rules. It decomposes two matrices (healthy H and -disease D) simultaneously: - - H ≈ [L_shared_H | L_unique_H] × F_H - D ≈ [L_shared_D | L_unique_D] × F_D - -with a coupling term γ/2 ||L_shared_H − L_shared_D||² that encourages -shared programs to align across conditions. -""" - from __future__ import annotations import logging @@ -40,9 +23,9 @@ class JointNMFWrapper: Parameters ---------- Xh - Dense matrix (cells × genes) for the healthy condition. + Dense matrix (cells x genes) for the healthy condition. Xd - Dense matrix (cells × genes) for the disease condition. + Dense matrix (cells x genes) for the disease condition. Must share the same gene dimension as ``Xh``. n_shared Number of programs shared between healthy and disease (KC in paper). @@ -86,7 +69,6 @@ def __init__( self.max_iters = max_iters self.tol = tol - # Normalise to [0, 1] self.Xh = sparse.csr_matrix(Xh, dtype=np.float64) self.Xd = sparse.csr_matrix(Xd, dtype=np.float64) _max_h = self.Xh.max() @@ -96,17 +78,14 @@ def __init__( if _max_d > 0: self.Xd = self.Xd / _max_d - # Initialise with NMF (best of n_init starts) nh_total = n_shared + n_healthy_specific nd_total = n_shared + n_disease_specific self.Wh, self.Hh = self._best_nmf(self.Xh, nh_total, n_init, random_state) self.Wd, self.Hd = self._best_nmf(self.Xd, nd_total, n_init, random_state + 1) - # Align shared columns between Wh and Wd self._align() - # Estimate mu if not provided if mu is None: diff_h = 0.5 * self._frob(self.Xh - self.Wh.dot(self.Hh)) ** 2 diff_d = 0.5 * self._frob(self.Xd - self.Wd.dot(self.Hd)) ** 2 @@ -115,10 +94,6 @@ def __init__( else: self.mu = mu - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - def fit(self) -> "JointNMFWrapper": """Run multiplicative updates until convergence.""" t0 = time() @@ -149,13 +124,9 @@ def fit(self) -> "JointNMFWrapper": logger.info(f"Joint NMF converged after {niter} iterations ({elapsed:.2f} min)") return self - # ------------------------------------------------------------------ - # Properties: dense factor matrices - # ------------------------------------------------------------------ - @property def Wh(self) -> np.ndarray: - """Healthy cell × factor loadings (dense).""" + """Healthy cell x factor loadings (dense).""" return self._Wh.toarray() if issparse(self._Wh) else self._Wh @Wh.setter @@ -186,10 +157,6 @@ def Hd(self) -> np.ndarray: def Hd(self, value): self._Hd = sparse.csr_matrix(value) - # ------------------------------------------------------------------ - # Private: cost and updates - # ------------------------------------------------------------------ - def _cost(self) -> float: Wshh = self._Wh[:, : self.n_shared] Wshd = self._Wd[:, : self.n_shared] @@ -241,10 +208,6 @@ def _update_Hd(self): den = safe_sparse_dot(safe_sparse_dot(self._Wd.T, self._Wd), self._Hd) self._Hd = self._Hd.multiply(num / (den + _SMALL)).tocsr() - # ------------------------------------------------------------------ - # Private: initialisation helpers - # ------------------------------------------------------------------ - @staticmethod def _best_nmf( X: sparse.spmatrix, n_components: int, n_init: int, seed: int diff --git a/src/cellink/tl/external/_sclinker.py b/src/cellink/tl/external/_sclinker.py index 8be39f6..69d8b0a 100644 --- a/src/cellink/tl/external/_sclinker.py +++ b/src/cellink/tl/external/_sclinker.py @@ -1,29 +1,3 @@ -""" -sc-linker: Integrating single-cell RNA-seq, epigenomic maps and GWAS summary statistics -to infer disease-critical cell types and cellular processes. - -Based on: Jagadeesh*, Dey* et al., Nature Genetics 2022. -https://doi.org/10.1038/s41588-022-01187-9 - -Pipeline overview ------------------ -Step 1 – Gene programs - compute_celltype_programs() : Wilcoxon DE per cell type vs rest → gene scores - compute_diseaseprogression_programs(): Disease vs healthy DE per cell type → gene scores - compute_nmf_programs() : NMF latent factors → gene scores - compute_joint_nmf_programs() : Joint healthy/disease NMF → gene scores - -Step 2 – Gene programs → SNP annotations - geneprogram_to_bedgraph() : Gene scores × enhancer-gene links → bedgraph - bedgraph_to_snp_annotation() : bedgraph × BIM file → .annot.gz - -Step 3 – S-LDSC heritability enrichment - Delegates to cellink.tl.external._ldsc and _sldsc_utils, which already exist. - run_sclinker_sldsc() : Run S-LDSC for all programs/traits - load_sclinker_results() : Parse postprocess files → DataFrame - compute_escore() : Compute E-score (program − all-protein-coding) -""" - from __future__ import annotations import logging @@ -44,9 +18,6 @@ logger = logging.getLogger(__name__) -# --------------------------------------------------------------------------- -# Public constants -# --------------------------------------------------------------------------- ENHANCER_TISSUES = Literal[ "BLD", "BRN", "GI", "LNG", "LIV", "KID", "SKIN", "FAT", "HRT", "ALL" @@ -66,9 +37,6 @@ } -# --------------------------------------------------------------------------- -# Step 1a: Cell-type gene programs -# --------------------------------------------------------------------------- def compute_celltype_programs( adata: AnnData, @@ -114,14 +82,13 @@ def compute_celltype_programs( dict ``{"pval": DataFrame, "logfold": DataFrame, "score": DataFrame, "genescores": DataFrame}`` - All DataFrames are (genes × cell_types). + All DataFrames are (genes x cell_types). """ if celltype_col not in adata.obs.columns: raise ValueError(f"'{celltype_col}' not found in adata.obs") de_key = f"{celltype_col}_DE" - # Filter rare cell types counts = Counter(adata.obs[celltype_col]) adata.obs[f"{celltype_col}_counts"] = [counts[ct] for ct in adata.obs[celltype_col]] adata_filtered = adata[adata.obs[f"{celltype_col}_counts"] > min_cells_per_type].copy() @@ -138,17 +105,14 @@ def compute_celltype_programs( method=method, n_genes=adata_filtered.n_vars, ) - # Propagate back to full adata adata.uns[de_key] = adata_filtered.uns[de_key] results = _extract_de_matrices(adata, de_key, label_col=celltype_col) genescores = _compute_genescores(results["score"]) - # If var_names look like ENSG IDs, try to map to HGNC via adata.var if (genescores.index.str.startswith("ENSG").mean() > 0.5 and "gene_name" in adata.var.columns): gene_name_map = adata.var["gene_name"].dropna().to_dict() - # Map ENSG → HGNC for all DataFrames for key in list(results.keys()): results[key].index = results[key].index.map( lambda g: gene_name_map.get(g, g) @@ -180,10 +144,6 @@ def compute_celltype_programs( return results -# --------------------------------------------------------------------------- -# Step 1b: Disease-progression gene programs -# --------------------------------------------------------------------------- - def compute_diseaseprogression_programs( adata: AnnData, celltype_col: str, @@ -219,7 +179,7 @@ def compute_diseaseprogression_programs( disease_label Value in ``diagnosis_col`` that denotes disease samples. min_cells_per_group - Minimum cells in each (healthy/disease × cell type) group. + Minimum cells in each (healthy/disease x cell type) group. use_raw Passed to ``sc.tl.rank_genes_groups``. method @@ -243,7 +203,6 @@ def compute_diseaseprogression_programs( disease_label_mapping = {healthy_label: "Healthy", disease_label: "Disease"} - # Compute contamination from global disease-only DE disease_subset = adata[adata.obs[diagnosis_col] == disease_label].copy() sc.tl.rank_genes_groups( disease_subset, @@ -289,13 +248,11 @@ def compute_diseaseprogression_programs( ) processed_cell_types.append(ct) - # Collect all DE results all_de_keys = [f"{ct}_DE" for ct in processed_cell_types] results = _extract_de_matrices_disease(adata, all_de_keys, contamination, celltype_col) genescores = _compute_genescores(results["score"]) results["genescores"] = genescores - # Clean up temporary column adata.obs.drop(columns=["_DEstatus"], inplace=True, errors="ignore") if save and out_dir is not None: @@ -308,10 +265,6 @@ def compute_diseaseprogression_programs( return results -# --------------------------------------------------------------------------- -# Step 1c: NMF cellular process programs (healthy) -# --------------------------------------------------------------------------- - def compute_nmf_programs( adata: AnnData, *, @@ -364,20 +317,19 @@ def compute_nmf_programs( Returns ------- W : DataFrame - Cell × factor (cell programs), index = obs_names. + Cell x factor (cell programs), index = obs_names. H : DataFrame - Gene × factor (gene programs), index = var_names. + Gene x factor (gene programs), index = var_names. corr : DataFrame - Gene × factor (Pearson correlation between gene expression and W scores). + Gene x factor (Pearson correlation between gene expression and W scores). Notes ----- Backend priority: - 1. **torchnmf** (GPU or CPU) — install with ``pip install torchnmf``. - On large matrices (>50k cells) this is 5–20× faster than sklearn. - 2. **sklearn NMF** with ``init='nndsvda'`` + ``solver='cd'`` — always - available. Significantly faster than ``init='random'`` but still slow + 1. **torchnmf** (GPU or CPU), install with ``pip install torchnmf``. + On large matrices (>50k cells) this is 5-20x faster than sklearn. + 2. **sklearn NMF** with ``init='nndsvda'`` + ``solver='cd'``. Significantly faster than ``init='random'`` but still slow on very large matrices. """ from sklearn.decomposition import NMF @@ -396,7 +348,6 @@ def compute_nmf_programs( logger.info(f"Fitting NMF with {n_components} components on {X.shape} matrix") - # ── Backend 1: torchnmf ──────────────────────────────────────────────── W_arr = None H_arr = None try: @@ -409,10 +360,9 @@ def compute_nmf_programs( "Install the faster backend with:\n" " pip install torchnmf" ) - torch = None # type: ignore + torch = None if torch is not None: - # Resolve the requested device if device == "cuda": if torch.cuda.is_available(): _device = "cuda" @@ -437,7 +387,6 @@ def compute_nmf_programs( H_arr = model_t.W.detach().cpu().numpy() # (n_features, n_components) del X_t, model_t - # ── Backend 2: sklearn NMF (always available, slower) ───────────────── if W_arr is None: logger.info( "Using sklearn NMF with init='nndsvda' + solver='cd'. " @@ -469,10 +418,6 @@ def compute_nmf_programs( return W, H, corr -# --------------------------------------------------------------------------- -# Step 1d: Joint NMF (healthy + disease cellular processes) -# --------------------------------------------------------------------------- - def compute_joint_nmf_programs( adata_healthy: AnnData, adata_disease: AnnData, @@ -492,10 +437,10 @@ def compute_joint_nmf_programs( Decomposes healthy (H) and disease (D) matrices jointly: - H ≈ [L_shared_H | L_unique_H] × F_H - D ≈ [L_shared_D | L_unique_D] × F_D + H ≈ [L_shared_H | L_unique_H] x F_H + D ≈ [L_shared_D | L_unique_D] x F_D - with a coupling term γ/2 ||L_shared_H − L_shared_D||² that forces + with a coupling term y/2 ||L_shared_H - L_shared_D||² that forces the shared programs to be similar. Parameters @@ -526,18 +471,17 @@ def compute_joint_nmf_programs( Returns ------- dict with keys: - ``"Wh"`` : healthy cell × factor loadings (shared + healthy-specific) - ``"Wd"`` : disease cell × factor loadings (shared + disease-specific) - ``"Hh"`` : gene × factor weights (healthy) - ``"Hd"`` : gene × factor weights (disease) - ``"shared_Wh"`` : healthy cell × shared-factor loadings - ``"shared_Wd"`` : disease cell × shared-factor loadings - ``"unique_Hh"`` : gene × healthy-specific-factor weights - ``"unique_Hd"`` : gene × disease-specific-factor weights + ``"Wh"`` : healthy cell x factor loadings (shared + healthy-specific) + ``"Wd"`` : disease cell x factor loadings (shared + disease-specific) + ``"Hh"`` : gene x factor weights (healthy) + ``"Hd"`` : gene x factor weights (disease) + ``"shared_Wh"`` : healthy cell x shared-factor loadings + ``"shared_Wd"`` : disease cell x shared-factor loadings + ``"unique_Hh"`` : gene x healthy-specific-factor weights + ``"unique_Hd"`` : gene x disease-specific-factor weights """ from ._joint_nmf import JointNMFWrapper - # Align genes common_genes = adata_healthy.var_names.intersection(adata_disease.var_names) if len(common_genes) == 0: raise ValueError("No overlapping genes between healthy and disease AnnData objects.") @@ -592,40 +536,6 @@ def compute_joint_nmf_programs( return results -# --------------------------------------------------------------------------- -# Step 2a: Gene program → bedgraph (SNP-to-gene linking) -# --------------------------------------------------------------------------- - - -# --------------------------------------------------------------------------- -# Step 2b: Bedgraph → SNP annotation (.annot.gz) -# --------------------------------------------------------------------------- - - -# --------------------------------------------------------------------------- -# Step 3: S-LDSC heritability enrichment (delegates to existing cellink code) -# --------------------------------------------------------------------------- - - -# --------------------------------------------------------------------------- -# Loading / postprocessing results -# --------------------------------------------------------------------------- - - -# --------------------------------------------------------------------------- -# Reference data download helpers -# --------------------------------------------------------------------------- - - -# --------------------------------------------------------------------------- -# Full pipeline convenience function -# --------------------------------------------------------------------------- - - -# --------------------------------------------------------------------------- -# Private helpers -# --------------------------------------------------------------------------- - def _get_dense(adata: AnnData, layer: Optional[str]) -> np.ndarray: """Return a dense float32 expression matrix from an AnnData layer or X.""" X = adata.layers[layer] if (layer and layer in adata.layers) else adata.X @@ -674,7 +584,6 @@ def _extract_de_matrices( logfold_mtx[idx, j] = lf_row[cs] score_mtx[idx, j] = sc_row[cs] - # Append level suffix so column names match sc-linker convention level = label_col.split("_")[-1] if "_" in label_col else "2" col_names = [f"{cs}_L{level}" for cs in cellsubsets] @@ -720,7 +629,6 @@ def _extract_de_matrices_disease( idx = gene2idx[g] if g in ct_contamination: pval_mtx[idx, j] = 1.0 - # logfold and score remain 0 else: pval_mtx[idx, j] = pval_row[cs] logfold_mtx[idx, j] = lf_row[cs] diff --git a/src/cellink/tl/external/_sclinker_utils.py b/src/cellink/tl/external/_sclinker_utils.py index f4a71f9..5dea773 100644 --- a/src/cellink/tl/external/_sclinker_utils.py +++ b/src/cellink/tl/external/_sclinker_utils.py @@ -1,44 +1,3 @@ -""" -sc-linker Step 2: Gene programs → SNP annotations -================================================== - -This module ports the GSSG `code/GeneSet_toS2G/` pipeline to Python. - -The original pipeline: - 1. geneset_to_bed_sclinker.R → bedgraphs (Roadmap ∪ ABC + 100kb) - 2. clean_bed.sh → bedtools sort + merge to remove overlaps - 3. bedgraph_to_annot.py → SNP annotation (.annot.gz) from .bim + bedgraph - -sc-linker runs TWO strategies per program: - a) ABC_Road_{tissue}: weighted by enhancer activity (Roadmap ∪ ABC links) - b) 100kb: gene body ± 100kb window (unweighted, binary-ish) - -The E-score is: - E_score(program, trait) = Enrichment(program) - Enrichment(AllCoding_control) - -**S-LDSC call used by sc-linker** (from GSSG code/ldsc/run_ldsc_reg.sh): - ldsc.py --h2 {trait}.sumstats - --ref-ld-chr {annotations}/{program}/merged. - --frqfile-chr 1000G_Phase3_frq/1000G.EUR.QC. - --w-ld-chr weights.hm3_noMHC. - --overlap-annot --print-coefficients --print-delete-vals - --out heritability/{program}_h2/{trait}_merged - - NOTE: --h2 (partitioned heritability), NOT --h2-cts (cell-type-specific). - -**LDSC bug note** (ldsc issue #342, PR #341): - When using --overlap-annot with multiple chromosomes, old pandas versions - sort columns alphabetically during pd.concat in parse.py, causing an IndexError. - The sclinker-skg author (yyoshiaki) submitted a fix (PR #341) that is not yet - merged upstream. If your LDSC container uses unpatched ldsc with pandas ≥ 1.0, - this will crash. See ``LDSCPatchChecker`` below. - -Reference data files (download from Broad LDSCORE server): - Roadmap links: LDSCORE/Jagadeesh_Dey_sclinker/extras/Roadmap_{tissue}_E.txt.gz - ABC links: LDSCORE/Jagadeesh_Dey_sclinker/extras/ABCpaper_NasserFulcoEngreitz2020_{tissue}_AvgHiC.txt.gz - Gene coords: LDSCORE/Jagadeesh_Dey_sclinker/extras/gene_annotation.txt (TSS positions for 100kb) -""" - from __future__ import annotations import logging @@ -55,12 +14,7 @@ logger = logging.getLogger(__name__) -# --------------------------------------------------------------------------- -# Tissue codes -# --------------------------------------------------------------------------- -# Tissue codes used by sc-linker (Jagadeesh*, Dey* et al. Nat. Genet. 2022). -# These correspond to the ABC strategy tissue labels. TISSUE_CODES = { "BLD": "Blood", "BRN": "Brain", @@ -73,37 +27,12 @@ "HRT": "Heart", } -# GCS base URL for sc-linker reference files (public, no auth needed). -# Note: the old alkesgroup.broadinstitute.org FTP mirror returns 404. + _SCLINKER_GCS_BASE = ( "https://storage.googleapis.com/broad-alkesgroup-public" "/LDSCORE/Jagadeesh_Dey_sclinker/extras" ) -# --------------------------------------------------------------------------- -# What is actually in the GCS bucket -# (confirmed from directory listing 2024-04): -# -# extras/ -# AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz -# ← all-tissue ABC links (373 MB) -# RoadmapUABCannot_regions_to_genes.txt.gz -# ← all-tissue Roadmap links (207 MB) -# Roadmap_map_EID_names.txt ← EID → tissue name mapping -# RoadmapLinks/links_E001_*.txt, ... ← per-EID raw Roadmap link files -# all_genes/ALL/100kb/, 10kb/, ABC_Road_GI_*/ -# ← precomputed annots (outputs, not inputs) -# ldsc_results_examples/ ← example LDSC results -# -# Per-tissue files like Roadmap_BLD_E.txt.gz are NOT in the bucket. -# They are produced by the GSSG R script (geneset_to_bed_sclinker.R) which -# reads the two large combined files and filters by tissue. -# cellink reproduces this filtering in Python via load_roadmap_links() / -# load_abc_links() after downloading the combined files. -# -# gene_annotation.txt is also NOT in the bucket. -# It is an LDSC gene coordinate file generated by cellink via pybiomart. -# --------------------------------------------------------------------------- def _http_download(url: str, dest: Path) -> None: """Download a file via HTTPS, using cellink helper if available.""" @@ -130,15 +59,13 @@ def download_sclinker_enhancer_links( Downloads the two large combined files that cellink then filters by tissue: - - ``RoadmapUABCannot_regions_to_genes.txt.gz`` (207 MB) — all-tissue - Roadmap enhancer-gene activity scores. - - ``AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz`` - (373 MB) — all-tissue ABC model predictions. - - ``Roadmap_map_EID_names.txt`` — Roadmap EID → tissue name mapping. + - ``RoadmapUABCannot_regions_to_genes.txt.gz``: all-tissue Roadmap enhancer-gene activity scores. + - ``AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz``: all-tissue ABC model predictions. + - ``Roadmap_map_EID_names.txt``: Roadmap EID -> tissue name mapping. - The per-tissue files (``Roadmap_BLD_E.txt.gz`` etc.) do **not** exist in + The per-tissue files (``Roadmap_BLD_E.txt.gz`` etc.) do not exist in the bucket. They are produced on-the-fly by :func:`load_roadmap_links` and - :func:`load_abc_links` when you pass the downloaded combined files. + :func:`load_abc_links` when the downloaded combined files is passed. Parameters ---------- @@ -194,7 +121,7 @@ def _symlink_annots_into_ld_dir(annot_prefix: str, ld_prefix: str) -> None: """ annot_dir = Path(annot_prefix).parent ld_dir = Path(ld_prefix).parent - stem = Path(annot_prefix).name # e.g. "B_naive_L2." + stem = Path(annot_prefix).name if not annot_dir.exists(): logger.warning(f"Annotation directory not found: {annot_dir}") @@ -294,12 +221,9 @@ def run_sclinker_heritability( for strategy_name, ld_prefix in strategies.items(): results[program][strategy_name] = {} - # ── Symlink .annot.gz files into the LD score directory ────────── - # LDSC --overlap-annot needs .annot.gz at the same prefix as .l2.ldscore.gz if annotation_prefixes and program in annotation_prefixes: annot_prefix = annotation_prefixes[program].get(strategy_name) else: - # Infer: ldscores/.../B_naive_L2. → annotations/.../B_naive_L2. annot_prefix = ( ld_prefix .replace("/ldscores/", "/annotations/") @@ -314,7 +238,6 @@ def run_sclinker_heritability( prog_out.mkdir(parents=True, exist_ok=True) out_prefix = str(prog_out / trait) - # sc-linker ref-ld = baseline + program LD scores ref_ld = f"{ref_ld_chr},{ld_prefix}" logger.info(f"S-LDSC: {program}/{strategy_name}/{trait}") @@ -338,15 +261,6 @@ def run_sclinker_heritability( def download_sclinker_references( out_dir: Union[str, Path] = "sclinker_references", tissue: Optional[str] = None, - download_roadmap: bool = True, - download_abc: bool = True, - download_gene_coords: bool = True, - download_bims: bool = False, # use get_1000genomes_plink_files() instead - download_frq: bool = False, # use get_1000genomes_frq() instead - download_weights: bool = False, # use get_1000genomes_ld_weights() instead - download_hapmap3: bool = False, # use get_1000genomes_hapmap3() instead - download_baseline: bool = False, - chromosomes: List[int] = list(range(1, 23)), ) -> Dict[str, Path]: """ Download sc-linker reference files. @@ -384,27 +298,13 @@ def download_sclinker_references( chromosomes Ignored (kept for API compatibility). """ - if any([download_bims, download_frq, download_weights, - download_hapmap3, download_baseline]): - logger.warning( - "download_bims / download_frq / download_weights / download_hapmap3 / " - "download_baseline are ignored. Use the cellink resource helpers " - "get_1000genomes_plink_files(), get_1000genomes_frq(), " - "get_1000genomes_ld_weights(), get_1000genomes_hapmap3() instead — " - "they download from Zenodo (https://zenodo.org/records/10515792)." - ) - # Delegate to the focused helper return download_sclinker_enhancer_links( out_dir=out_dir, tissue=tissue, ) -# --------------------------------------------------------------------------- -# Step 2a: Load enhancer-gene links -# --------------------------------------------------------------------------- - def load_roadmap_eid_map(eid_file: Union[str, Path]) -> Dict[str, str]: """ Load the Roadmap EID → tissue-name mapping from ``Roadmap_map_EID_names.txt``. @@ -425,7 +325,7 @@ def load_roadmap_links( Load Roadmap enhancer-gene links from the combined all-tissue file. The actual file in the GCS bucket is - ``RoadmapUABCannot_regions_to_genes.txt.gz`` — a single combined file + ``RoadmapUABCannot_regions_to_genes.txt.gz``, a single combined file for all tissues. Per-tissue files (``Roadmap_BLD_E.txt.gz`` etc.) do not exist in the bucket; cellink filters the combined file here instead. @@ -447,11 +347,9 @@ def load_roadmap_links( ------- pd.DataFrame with columns: chr, start, end, Gene, EID, activity (score). """ - # Try comma-separated first (RoadmapUABCannot file is CSV, not TSV) - # then fall back to tab-separated for other formats + df = pd.read_csv(roadmap_file, sep=",", compression="infer") if len(df.columns) == 1: - # Only one column → probably tab-separated df = pd.read_csv(roadmap_file, sep="\t", compression="infer") df.columns = [c.strip() for c in df.columns] logger.info( @@ -461,15 +359,6 @@ def load_roadmap_links( if tissue is not None: tissue_upper = tissue.upper() - # Keyword mapping for matching against tissue name columns - # Roadmap tissue name keywords — matched case-insensitively against - # the 'tissuename' column in RoadmapUABCannot_regions_to_genes.txt.gz. - # Tissue names in that file are full descriptive strings from the - # Roadmap Epigenomics project, e.g.: - # "Primary mononuclear cells from peripheral blood" - # "Primary T cells from peripheral blood" - # "Fetal Brain Male" - # Keywords are ordered from most to least specific. tissue_keywords = { "BLD": ["blood", "mononuclear", "t cell", "t-cell", "b cell", "b-cell", "nk cell", "cd4", "cd8", "erythro", "hsc", "monocyte", @@ -488,7 +377,6 @@ def load_roadmap_links( } keywords = tissue_keywords.get(tissue_upper, [tissue_upper.lower()]) - # 1. Try a direct tissue-name column (RoadmapUABCannot has 'tissuename') tissue_col = next( (c for c in df.columns if c.lower() in ("tissuename", "tissue_name", "tissue", "celltype", @@ -504,7 +392,6 @@ def load_roadmap_links( f"via column '{tissue_col}' (keywords: {keywords})" ) - # 2. Fall back to EID map if provided and tissue_col not found elif eid_map_file is not None: eid_map = load_roadmap_eid_map(eid_map_file) matching_eids = { @@ -535,7 +422,6 @@ def load_roadmap_links( f"Columns present: {df.columns.tolist()}" ) - # Safety: if filtering removed all rows, fall back to unfiltered and warn if tissue is not None and len(df) == 0: logger.warning( f"Roadmap tissue filter for tissue='{tissue}' returned 0 rows. " @@ -544,7 +430,6 @@ def load_roadmap_links( f"Check unique tissuename values in the file:\n" f" import pandas as pd; pd.read_csv('sclinker_refs/RoadmapUABCannot_regions_to_genes.txt.gz', sep=',')[['tissuename']].drop_duplicates().head(30)" ) - # Reload unfiltered df = pd.read_csv(roadmap_file, sep=",", compression="infer") if len(df.columns) == 1: df = pd.read_csv(roadmap_file, sep="\t", compression="infer") @@ -561,8 +446,7 @@ def load_abc_links( Load ABC model enhancer-gene predictions from the combined all-tissue file. The actual file in the GCS bucket is - ``AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz`` - — a single combined file for all tissues. Per-tissue files do not exist. + ``AllPredictions.AvgHiC.ABC0.015.minus150.withcolnames.ForABCPaper.txt.gz``, a single combined file for all tissues. Per-tissue files do not exist. Parameters ---------- @@ -598,7 +482,7 @@ def load_abc_links( "HRT": ["heart", "cardiac", "cardiomyo"], } keywords = tissue_keywords.get(tissue.upper(), [tissue.lower()]) - # Find the cell type column + cell_col = next( (c for c in df.columns if c.lower() in ("celltype", "cell_type", "tissue", "biosample")), @@ -622,26 +506,6 @@ def load_abc_links( return df -# --------------------------------------------------------------------------- -# Gene coordinate file for the 100 kb window strategy -# --------------------------------------------------------------------------- -# LDSC make_annot.py --gene-coord-file expects a space-separated file: -# GENE CHR START END -# -# The GENE column is matched by plain string equality against the gene-set -# file passed to make_annot. It must use the SAME identifiers: -# -# gene_id_type="ensembl" → GENE = ENSG IDs (e.g. ENSG00000099338) -# Use this when your AnnData var_names are Ensembl IDs — which is -# the typical case for sc-linker gene programs. -# -# gene_id_type="hgnc" → GENE = HGNC symbols (e.g. CD19, FOXP3) -# Use this when your gene-set files contain gene names. -# -# The file does NOT ship with LDSC. cellink generates both variants from -# Ensembl BioMart (pybiomart) in a single query and caches them. -# --------------------------------------------------------------------------- - _GENE_COORD_CACHE = { "ensembl": "gene_coord_ensembl.txt", "hgnc": "gene_coord_hgnc.txt", @@ -672,15 +536,14 @@ def _query_biomart_and_write_gene_coords(data_dir: Path) -> None: dataset = server.marts["ENSEMBL_MART_ENSEMBL"].datasets["hsapiens_gene_ensembl"] df = dataset.query(attributes=[ - "ensembl_gene_id", # → ENSG IDs - "external_gene_name", # → HGNC names + "ensembl_gene_id", + "external_gene_name", "chromosome_name", "start_position", "end_position", ]) df.columns = ["ensembl_gene_id", "hgnc_name", "CHR", "START", "END"] - # Standard chromosomes only df = df[df["CHR"].astype(str).isin(_VALID_CHRS)] df = df.dropna(subset=["ensembl_gene_id"]) df = df[df["ensembl_gene_id"].str.strip() != ""] @@ -700,17 +563,15 @@ def _dedup(frame: pd.DataFrame) -> pd.DataFrame: frame .groupby("GENE", as_index=False) .agg(CHR=("CHR", "first"), START=("START", "min"), END=("END", "max")) - [["GENE", "CHR", "START", "END"]] # keep column order + [["GENE", "CHR", "START", "END"]] ) - # Write ENSG variant ensg = df[["ensembl_gene_id", "CHR", "START", "END"]].copy() ensg.columns = ["GENE", "CHR", "START", "END"] ensg = _dedup(ensg) ensg.to_csv(data_dir / _GENE_COORD_CACHE["ensembl"], sep=" ", index=False) logger.info(f"Wrote {len(ensg):,} unique ENSG entries → {_GENE_COORD_CACHE['ensembl']}") - # Write HGNC variant (only rows with a non-empty gene name) hgnc = df[df["hgnc_name"].notna() & (df["hgnc_name"].str.strip() != "")] hgnc = hgnc[["hgnc_name", "CHR", "START", "END"]].copy() hgnc.columns = ["GENE", "CHR", "START", "END"] @@ -771,7 +632,6 @@ def get_gene_annotation( f"gene_id_type must be 'ensembl' or 'hgnc', got {gene_id_type!r}" ) - # 1. Explicit path if path is not None and Path(path).exists(): return Path(path) @@ -779,12 +639,10 @@ def get_gene_annotation( data_dir = Path(get_data_home(data_home)) cache = data_dir / _GENE_COORD_CACHE[gene_id_type] - # 2. Cached file if cache.exists() and not refresh: logger.info(f"Using cached gene coordinates ({gene_id_type}): {cache}") return cache - # 3. Generate from BioMart (writes both variants in one query) _query_biomart_and_write_gene_coords(data_dir) return cache @@ -830,10 +688,6 @@ def load_gene_annotation( return df -# --------------------------------------------------------------------------- -# Step 2b: Gene scores → bedgraphs -# --------------------------------------------------------------------------- - def genescores_to_abc_road_bedgraph( genescores: pd.DataFrame, roadmap_links: pd.DataFrame, @@ -855,7 +709,7 @@ def genescores_to_abc_road_bedgraph( genescores), creates a bedgraph where each interval's score is the weighted sum of gene scores linked to that interval: - score(interval) = gene_score × link_activity + score(interval) = gene_score C link_activity The Roadmap and ABC bedgraphs are merged (union) with scores summed where they overlap. @@ -863,7 +717,7 @@ def genescores_to_abc_road_bedgraph( Parameters ---------- genescores - DataFrame (genes × programs) with probabilistic scores in [0, 1]. + DataFrame (genes C programs) with probabilistic scores in [0, 1]. Index must be HGNC gene symbols. roadmap_links Roadmap enhancer-gene links DataFrame (from ``load_roadmap_links``). @@ -889,8 +743,7 @@ def genescores_to_abc_road_bedgraph( dict Program name → bedgraph DataFrame (chr, start, end, score). """ - # ── Auto-detect gene columns if not specified ──────────────────────────── - # Common column name variants seen in the sc-linker reference files: + _ROADMAP_GENE_CANDIDATES = ["Gene", "gene", "GENE", "gene_name", "GeneName", "target_gene", "TargetGene"] _ABC_GENE_CANDIDATES = ["TargetGene", "target_gene", "Gene", "gene", @@ -920,7 +773,6 @@ def genescores_to_abc_road_bedgraph( ) logger.info(f"Auto-detected ABC gene column: '{abc_gene_col}'") - # ── Deduplicate + normalise gene names ─────────────────────────────────── genescores = genescores.copy() genescores.index = genescores.index.str.upper() if genescores.index.duplicated().any(): @@ -932,10 +784,7 @@ def genescores_to_abc_road_bedgraph( abc_links = abc_links.copy() abc_links["_gene_upper"] = abc_links[abc_gene_col].str.upper() - # ── Pre-index link tables by gene — O(1) lookup per gene per program ──── - # The naive loop does a full table scan (39M rows) per gene per program. - # Building a dict once reduces this to a single groupby + dict lookup. - roadmap_activity_col_r = roadmap_activity_col # local alias for clarity + roadmap_activity_col_r = roadmap_activity_col abc_activity_col_r = abc_activity_col def _build_gene_index( @@ -980,7 +829,6 @@ def _build_gene_index( abc_idx = _build_gene_index(abc_links, abc_activity_col_r) logger.info(f" {len(abc_idx):,} unique genes in ABC index") - # ── Per-program bedgraph computation ───────────────────────────────────── bedgraphs: Dict[str, pd.DataFrame] = {} for program in genescores.columns: @@ -1055,7 +903,7 @@ def genescores_to_100kb_bedgraph( Parameters ---------- genescores - DataFrame (genes × programs) with probabilistic scores in [0, 1]. + DataFrame (genes X programs) with probabilistic scores in [0, 1]. gene_annotation Gene coordinate DataFrame from ``load_gene_annotation``. window_kb @@ -1075,7 +923,6 @@ def genescores_to_100kb_bedgraph( genescores = genescores.copy() genescores.index = genescores.index.str.upper() - # Build gene coord lookup once — deduplication already done at load time ga = gene_annotation.copy() ga["_gene_upper"] = ga[gene_col].str.upper() ga = ga.set_index("_gene_upper") @@ -1085,24 +932,19 @@ def genescores_to_100kb_bedgraph( bedgraphs: Dict[str, pd.DataFrame] = {} - # Deduplicate genescores index — AllCoding concat or BioMart can introduce - # duplicate gene names. Keep the row with the highest max score per gene. if genescores.index.duplicated().any(): n_before = len(genescores) genescores = genescores.groupby(level=0).max() logger.debug(f"Deduplicated genescores index: {n_before} → {len(genescores)} genes") - # Restrict to genes present in the gene annotation common_genes = genescores.index[genescores.index.isin(ga_chr)] if len(common_genes) == 0: logger.warning("No overlap between genescores genes and gene_annotation. " "Check gene_id_type.") return bedgraphs - # Use reindex (not .loc) to avoid duplicate-index row expansion gs_sub = genescores.reindex(common_genes) - # Build coordinate arrays once — one entry per unique gene chrs_arr = np.array([ga_chr[g] for g in common_genes], dtype=object) starts_arr = np.array([max(0, int(ga_start[g]) - window_bp) for g in common_genes], dtype=np.int64) ends_arr = np.array([int(ga_end[g]) + window_bp for g in common_genes], dtype=np.int64) @@ -1131,10 +973,6 @@ def genescores_to_100kb_bedgraph( return bedgraphs -# --------------------------------------------------------------------------- -# Step 2c: Bedgraph → SNP annotation (.annot.gz) -# --------------------------------------------------------------------------- - def bedgraph_to_snp_annotation( bedgraph: Union[pd.DataFrame, str, Path], bim_file: Union[str, Path], @@ -1184,10 +1022,6 @@ def bedgraph_to_snp_annotation( else: annot_values = _annotate_python(bg, bim) - # cellink passes --thin-annot to ldsc, which means the .annot.gz file - # must contain ONLY the annotation column(s), no CHR/BP/SNP/CM header. - # Writing those 4 string columns causes ldsc to crash with: - # TypeError: can't multiply sequence by non-int of type 'float' annot_df = pd.DataFrame({"ANNOT": annot_values.astype(np.float64)}) annot_df.to_csv(out_path, sep="\t", index=False, compression="gzip") n_nonzero = int((annot_values > 0).sum()) @@ -1220,7 +1054,7 @@ def genescores_to_annotations( Parameters ---------- genescores - DataFrame (genes × programs) with probabilistic scores in [0, 1]. + DataFrame (genes X programs) with probabilistic scores in [0, 1]. roadmap_links Roadmap enhancer-gene links (from ``load_roadmap_links``). abc_links @@ -1250,7 +1084,7 @@ def genescores_to_annotations( where annot_prefix is a string like ``"out_dir/program/ABC_Road_BLD/program."`` (without the chromosome number, to be passed to LDSC). """ - out_dir = Path(out_dir).resolve() # absolute path — required by LDSCRunner + out_dir = Path(out_dir).resolve() out_dir.mkdir(parents=True, exist_ok=True) logger.info("Computing ABC_Road bedgraphs...") @@ -1291,21 +1125,18 @@ def genescores_to_annotations( bg_path = strategy_dir / f"{safe_name}.bedgraph" bg_df.to_csv(bg_path, sep="\t", header=False, index=False) - # Per-chromosome annotations for chrom in chromosomes: bim_file = Path(f"{bim_prefix}{chrom}.bim") if not bim_file.exists(): logger.debug(f"BIM file not found for chr{chrom}: {bim_file}") continue - # Filter bedgraph to this chromosome chrom_str = str(chrom) bg_chrom = bg_df[ bg_df["chr"].astype(str).str.replace("^chr", "", regex=True) == chrom_str ] if len(bg_chrom) == 0: - # Write an all-zero annotation for this chromosome so LDSC doesn't fail _write_zero_annotation(bim_file, str(strategy_dir / f"{safe_name}.{chrom}")) continue @@ -1322,10 +1153,6 @@ def genescores_to_annotations( return annotation_prefixes -# --------------------------------------------------------------------------- -# Step 3: LD score computation -# --------------------------------------------------------------------------- - def compute_ld_scores_for_sclinker( annotation_prefixes: Dict[str, Dict[str, str]], bim_prefix: str, @@ -1343,7 +1170,7 @@ def compute_ld_scores_for_sclinker( Calls the existing cellink ``compute_ld_scores_with_annotations_from_bimfile`` for each (program, strategy, chromosome) combination. - Chromosome jobs are dispatched in parallel using a thread pool — each job + Chromosome jobs are dispatched in parallel using a thread pool, each job is an independent Singularity/subprocess call so threads work well. Parameters @@ -1392,13 +1219,10 @@ def compute_ld_scores_for_sclinker( ld_scores_dir = Path(ld_scores_dir).resolve() ld_scores_dir.mkdir(parents=True, exist_ok=True) - # Resolve hapmap3 path once print_snps_global: Optional[str] = None if hapmap3_snps_file and Path(hapmap3_snps_file).exists(): print_snps_global = str(hapmap3_snps_file) - # Build the full list of (program, strategy, chrom) jobs first - # so we can dispatch them all at once to the thread pool. Job = tuple # (program, strategy_name, annot_prefix, chrom, out_prefix, print_snps) jobs: List[Job] = [] ld_prefixes: Dict[str, Dict[str, str]] = {} @@ -1420,7 +1244,6 @@ def compute_ld_scores_for_sclinker( bim_file = f"{bim_prefix}{chrom}" out_prefix = str(strategy_ld_dir / f"{safe_name}.{chrom}") - # Per-chrom snp file fallback print_snps = print_snps_global if print_snps is None and hapmap3_snps_prefix: snp_file = f"{hapmap3_snps_prefix}{chrom}.snp" @@ -1433,8 +1256,8 @@ def compute_ld_scores_for_sclinker( n_total = len(jobs) logger.info( f"Computing LD scores: {n_total} jobs " - f"({len(annotation_prefixes)} programs × " - f"{sum(len(s) for s in annotation_prefixes.values()) // max(len(annotation_prefixes),1)} strategies × " + f"({len(annotation_prefixes)} programs X " + f"{sum(len(s) for s in annotation_prefixes.values()) // max(len(annotation_prefixes),1)} strategies X " f"{len(chromosomes)} chromosomes), n_jobs={n_jobs}" ) @@ -1482,14 +1305,6 @@ def _run_one(job: Job) -> str: return ld_prefixes -# --------------------------------------------------------------------------- -# Step 3: S-LDSC heritability regression (sc-linker mode) -# --------------------------------------------------------------------------- - -# --------------------------------------------------------------------------- -# Step 4: Parse results and compute E-score -# --------------------------------------------------------------------------- - def load_sclinker_heritability_results( results_dir: Union[str, Path], *, @@ -1628,121 +1443,13 @@ def compute_escore( return merged.drop(columns=["_ctrl_enr", "_ctrl_se"]) -# --------------------------------------------------------------------------- -# PPI gene scores (code/calc_PPI_scores) -# --------------------------------------------------------------------------- -def compute_ppi_gene_scores( - seed_genes: pd.Series, - ppi_network: pd.DataFrame, - *, - restart_prob: float = 0.7, - n_iter: int = 100, - normalize: bool = True, - source_col: str = "gene1", - target_col: str = "gene2", - weight_col: Optional[str] = None, -) -> pd.Series: - """ - Random Walk with Restart (RWR) on a PPI network to propagate gene scores. - - This ports the R scripts `ppi_RWR.R` and `ppi_string_RWR.R` from - `code/calc_PPI_scores/`. It propagates seed gene scores through the PPI - network to identify closely connected genes. - - Parameters - ---------- - seed_genes - Series mapping gene name → initial score (typically 0 or gene score). - ppi_network - Edge list DataFrame with columns for source gene, target gene, - and optionally edge weight. - restart_prob - Probability of restarting at seed genes at each step. - n_iter - Number of RWR iterations. - normalize - Normalize the adjacency matrix by node degree. - source_col, target_col - Column names for edge endpoints. - weight_col - Optional edge weight column. If None, uniform weights are used. - - Returns - ------- - Series - Gene → propagated score (sums to 1 if normalized). - """ - # Build gene universe - all_genes = sorted(set( - ppi_network[source_col].tolist() + - ppi_network[target_col].tolist() + - seed_genes.index.tolist() - )) - n = len(all_genes) - gene2idx = {g: i for i, g in enumerate(all_genes)} - - # Build adjacency matrix - import scipy.sparse as sp - # Vectorised adjacency matrix build — no per-edge Python loop - src_genes = ppi_network[source_col].map(gene2idx) - tgt_genes = ppi_network[target_col].map(gene2idx) - valid = src_genes.notna() & tgt_genes.notna() - i_arr = src_genes[valid].astype(int).to_numpy() - j_arr = tgt_genes[valid].astype(int).to_numpy() - if weight_col and weight_col in ppi_network.columns: - w_arr = ppi_network.loc[valid, weight_col].astype(float).to_numpy() - else: - w_arr = np.ones(valid.sum()) - # Undirected: add both directions - row_arr = np.concatenate([i_arr, j_arr]) - col_arr = np.concatenate([j_arr, i_arr]) - data_arr = np.concatenate([w_arr, w_arr]) - A = sp.csr_matrix((data_arr, (row_arr, col_arr)), shape=(n, n)) - - if normalize: - # Column normalize - deg = np.asarray(A.sum(axis=0)).ravel() - deg[deg == 0] = 1 - D_inv = sp.diags(1.0 / deg) - A = A.dot(D_inv) - - # Seed vector - p0 = np.zeros(n) - for gene, score in seed_genes.items(): - if gene in gene2idx: - p0[gene2idx[gene]] = score - p0_sum = p0.sum() - if p0_sum > 0: - p0 /= p0_sum - - # RWR - p = p0.copy() - for _ in range(n_iter): - p_new = (1 - restart_prob) * A.dot(p) + restart_prob * p0 - if np.max(np.abs(p_new - p)) < 1e-10: - break - p = p_new - - return pd.Series(p, index=all_genes) - - -# --------------------------------------------------------------------------- -# LDSC parse.py bug check + patch (works for local, Docker and Singularity) -# --------------------------------------------------------------------------- - -# Unique string present ONLY in the patched version (not in vanilla ldsc) _PARSE_PY_PATCH_MARKER = "chr_ld[0].columns" - -# Exact one-line fix from ldsc PR #341: -# x = pd.concat(chr_ld) ← buggy (columns reordered by pandas) -# x = pd.concat(chr_ld)[chr_ld[0].columns] ← fixed (columns locked to first file) _PARSE_PY_BUGGY_PATTERNS = [ ( "x = pd.concat(chr_ld) # automatically sorted by chromosome", "x = pd.concat(chr_ld)[chr_ld[0].columns] # automatically sorted by chromosome", ), - # Fallback without the trailing comment (in case whitespace differs) ( "x = pd.concat(chr_ld)", "x = pd.concat(chr_ld)[chr_ld[0].columns]", @@ -1750,10 +1457,6 @@ def compute_ppi_gene_scores( ] -# --------------------------------------------------------------------------- -# Internal helpers — read / write parse.py via runner -# --------------------------------------------------------------------------- - def _get_parse_py_path(runner) -> Optional[str]: """ Return the path to ``ldscore/parse.py`` to be used for reading/writing. @@ -1770,12 +1473,10 @@ def _get_parse_py_path(runner) -> Optional[str]: Returns None if the path cannot be determined. """ - # 1. Explicit parse_script from runner (preferred) explicit = getattr(runner, "parse_script", None) or runner.config.get("parse_script") if explicit: return str(explicit) - - # 2. Auto-discover from host PATH (local fallback) + ldsc_cmd = runner.config.get("ldsc_command", "ldsc.py") ldsc_bin = shutil.which(ldsc_cmd) if ldsc_bin is None: @@ -1888,7 +1589,6 @@ def _write_parse_py_via_runner(runner, patched_source: str) -> bool: tmp_path = tf.name try: - # ── local ──────────────────────────────────────────────────────────── if mode == "local": p = Path(parse_path) if not p.exists(): @@ -1899,7 +1599,6 @@ def _write_parse_py_via_runner(runner, patched_source: str) -> bool: logger.info(f"Patched {p} (backup: {p}.bak)") return True - # ── docker ─────────────────────────────────────────────────────────── elif mode == "docker": image = runner.config.get("docker_image", "zijingliu/ldsc") cid_result = subprocess.run( @@ -1929,7 +1628,6 @@ def _write_parse_py_via_runner(runner, patched_source: str) -> bool: finally: subprocess.run(["docker", "rm", cid], capture_output=True) - # ── singularity ─────────────────────────────────────────────────────── elif mode == "singularity": strategy = runner.config.get("singularity_patch_strategy", "overlay") sif = runner.config.get("singularity_image", "") @@ -1976,7 +1674,6 @@ def _singularity_patch_overlay(runner, sif: str, parse_path: str, patched_tmp: s overlay_path = str(overlay_path) overlay_size_mb = runner.config.get("singularity_overlay_size_mb", 256) - # Create overlay image if it does not exist yet if not Path(overlay_path).exists(): logger.info(f"Creating Singularity overlay image: {overlay_path} ({overlay_size_mb} MB)") Path(overlay_path).parent.mkdir(parents=True, exist_ok=True) @@ -1989,8 +1686,6 @@ def _singularity_patch_overlay(runner, sif: str, parse_path: str, patched_tmp: s logger.error(f"singularity overlay create failed: {result.stderr}") return False - # Write the patched parse.py into the overlay - # We create a tiny shell script that does the copy using --overlay --writable result = subprocess.run( [ "singularity", "exec", @@ -2006,7 +1701,6 @@ def _singularity_patch_overlay(runner, sif: str, parse_path: str, patched_tmp: s logger.error(f"Failed to write patched parse.py into overlay: {result.stderr}") return False - # Record overlay path so run_command mounts it on every subsequent call runner.config["_ldsc_overlay_path"] = overlay_path logger.info( f"Patched {parse_path} written into overlay {overlay_path}. " @@ -2036,7 +1730,6 @@ def _singularity_patch_sandbox( str(Path(sif).with_suffix("")) + ".sandbox", ) - # Convert SIF → sandbox (only once; skip if sandbox directory already exists) if not Path(sandbox_path).exists(): logger.info(f"Converting {sif} to writable sandbox at {sandbox_path} ...") result = subprocess.run( @@ -2044,7 +1737,6 @@ def _singularity_patch_sandbox( capture_output=True, text=True, ) if result.returncode != 0: - # Try with --fakeroot for systems without root logger.warning("sandbox build failed without root; retrying with --fakeroot") result = subprocess.run( ["singularity", "build", "--fakeroot", "--sandbox", sandbox_path, sif], @@ -2054,14 +1746,12 @@ def _singularity_patch_sandbox( logger.error(f"singularity build --sandbox failed: {result.stderr}") return False - # Copy patched parse.py into the sandbox (sandbox is just a directory) sandbox_parse = Path(sandbox_path) / parse_path.lstrip("/") sandbox_parse.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(patched_tmp, str(sandbox_parse)) logger.info(f"Patched {sandbox_parse}") if not rebuild: - # Point the runner at the sandbox directory for all future calls runner.config["singularity_image"] = sandbox_path logger.info( f"Runner updated: singularity_image → {sandbox_path}. " @@ -2069,7 +1759,6 @@ def _singularity_patch_sandbox( ) return True else: - # Rebuild a new SIF from the patched sandbox new_sif = str(Path(sif).with_suffix("")) + ".patched.sif" backup_sif = sif + ".bak.sif" logger.info(f"Rebuilding SIF from sandbox: {sandbox_path} → {new_sif} ...") @@ -2086,7 +1775,6 @@ def _singularity_patch_sandbox( logger.error(f"singularity build from sandbox failed: {result.stderr}") return False - # Swap: backup original, promote new SIF shutil.move(sif, backup_sif) shutil.move(new_sif, sif) runner.config["singularity_image"] = sif @@ -2180,7 +1868,6 @@ def check_and_patch_ldsc_parse_bug(runner) -> dict: "detail": "parse.py already patched (chr_ld[0].columns reindex present) — no action needed.", } - # Apply the textual patch patched = source applied = False for old, new in _PARSE_PY_BUGGY_PATTERNS: @@ -2222,11 +1909,6 @@ def check_and_patch_ldsc_parse_bug(runner) -> dict: } - -# --------------------------------------------------------------------------- -# Private helpers -# --------------------------------------------------------------------------- - def _merge_bedgraph_bedtools(bg: pd.DataFrame) -> pd.DataFrame: """Merge overlapping intervals using bedtools merge, summing scores.""" with tempfile.NamedTemporaryFile( @@ -2243,12 +1925,10 @@ def _merge_bedgraph_bedtools(bg: pd.DataFrame) -> pd.DataFrame: tmp_out = f_out.name try: - # bedtools sort (chromosomes may be numeric or "chrN") sort_result = subprocess.run( ["bedtools", "sort", "-i", tmp_in], capture_output=True, text=True, check=True, ) - # bedtools merge with sum of scores merge_result = subprocess.run( ["bedtools", "merge", "-i", "stdin", "-c", "4", "-o", "sum"], input=sort_result.stdout, @@ -2287,7 +1967,6 @@ def _merge_bedgraph_python(bg: pd.DataFrame) -> pd.DataFrame: order = np.argsort(starts, kind="stable") starts, ends, scores = starts[order], ends[order], scores[order] - # Sweep-line merge ms, me, msc = starts[0], ends[0], scores[0] for s, e, sc in zip(starts[1:], ends[1:], scores[1:]): if s <= me: @@ -2308,14 +1987,12 @@ def _merge_bedgraph_python(bg: pd.DataFrame) -> pd.DataFrame: def _annotate_with_bedtools(bg: pd.DataFrame, bim: pd.DataFrame) -> np.ndarray: """Annotate BIM SNPs using bedtools intersect.""" - # Write BIM as BED (SNPs are 1-bp intervals) bim_bed = bim.copy() bim_bed["_start"] = bim_bed["BP"] - 1 # 0-based bim_bed["_end"] = bim_bed["BP"] bim_bed["_chr"] = bim_bed["CHR"].astype(str) import tempfile as _tf - # Write BIM as BED using pandas — no per-row Python loop fa = _tf.NamedTemporaryFile(mode="w", suffix=".bed", delete=False) bim_out = pd.DataFrame({ "chr": bim_bed["_chr"], @@ -2327,7 +2004,6 @@ def _annotate_with_bedtools(bg: pd.DataFrame, bim: pd.DataFrame) -> np.ndarray: fa.close() snp_bed_file = fa.name - # Write bedgraph using pandas fb = _tf.NamedTemporaryFile(mode="w", suffix=".bedgraph", delete=False) bg_out = bg.copy() bg_out["chr"] = bg_out["chr"].astype(str).str.replace("chr", "", regex=False) @@ -2367,7 +2043,6 @@ def _annotate_with_bedtools(bg: pd.DataFrame, bim: pd.DataFrame) -> np.ndarray: def _annotate_python(bg: pd.DataFrame, bim: pd.DataFrame) -> np.ndarray: """Pure-Python SNP annotation from bedgraph.""" - # Normalise chromosome labels bg = bg.copy() bg["_chr"] = bg["chr"].astype(str).str.replace("^chr", "", regex=True) bim = bim.copy() @@ -2385,7 +2060,6 @@ def _annotate_python(bg: pd.DataFrame, bim: pd.DataFrame) -> np.ndarray: ends = bg_chrom["end"].values scores = bg_chrom["score"].values - # Vectorised lookup for idx, pos in zip(snp_indices, snp_pos): hits = np.where((starts <= pos) & (pos < ends))[0] if hits.size > 0: @@ -2483,7 +2157,6 @@ def _parse_ldsc_log(log_file: Path) -> Optional[dict]: result["h2_obs"] = float(h2_match.group(1)) result["h2_obs_se"] = float(h2_match.group(2)) - # .results file lives next to the .log file results_file = log_file.with_suffix(".results") stats = _parse_ldsc_results_file(results_file) if stats: