-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxenium_gene_counter.py
More file actions
100 lines (85 loc) · 3.4 KB
/
xenium_gene_counter.py
File metadata and controls
100 lines (85 loc) · 3.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
Module for counting Xenium transcripts for each gene in each cell of a segmentation mask
Since a segmentation mask is used and not polygons, transcripts are bound to the closest pixel.
"""
import argparse
import numpy as np
import pandas as pd
from skimage import measure, io
from PIL import Image
Image.MAX_IMAGE_PIXELS = 9999999999
def get_arguments():
"""
Parses and checks command line arguments, and provides an help text.
Assumes 3 and returns 3 positional command line arguments:
mask_file = Path to the input mask file
transcript_file = Path to the input transcript file
output_file = Path to the output single cell data file
"""
parser = argparse.ArgumentParser(description="Assigns transcripts to cells.")
parser.add_argument("mask_file", help="path to the input cell mask file")
parser.add_argument("transcript_file", help="path to the input transcript file")
parser.add_argument("output_file", help="path to the cell data output")
args = parser.parse_args()
return args.mask_file, args.transcript_file, args.output_file
def gene_counter(regionmask, intensity_image):
"""Adds all values in a boolean array"""
return np.sum(intensity_image[regionmask])
if __name__ == "__main__":
mask_file_name, transcript_file_name, output_file = get_arguments()
print("Processing Mask")
mask = io.imread(mask_file_name, "tiff")
w = mask.shape[1]
h = mask.shape[0]
print("Processing Transcripts")
transcripts = pd.read_csv(
transcript_file_name,
sep=",",
header=0,
usecols=["x_location_px_ROI", "y_location_px_ROI", "feature_name"],
)
transcripts = transcripts.rename(
columns={
"x_location_px_ROI": "x",
"y_location_px_ROI": "y",
"feature_name": "gene",
}
)
transcripts["x"] = transcripts["x"].astype(np.int32)
transcripts["y"] = transcripts["y"].astype(np.int32)
transcripts = transcripts.loc[(transcripts.x < w) & (transcripts.y < h)]
transcripts = (
transcripts.groupby(["x", "y", "gene"]).size().reset_index(name="freq")
)
genes = transcripts.gene
geneset = sorted(list(set(genes))) # Sorted is not necessary
print(geneset)
print("Measuring cell area and position")
measures = [
pd.DataFrame(
measure.regionprops_table(mask, None, ["area", "centroid", "label"])
)
]
print("Counting transcripts in each cell")
for gene in geneset:
genemask = np.zeros(mask.shape, dtype="int")
points = transcripts.loc[transcripts.gene == gene][["x", "y"]].to_numpy()
counts = transcripts.loc[transcripts.gene == gene]["freq"]
idx = points[:, 0] + points[:, 1] * w # 1D index of each point
np.put(
genemask, idx, counts, mode="raise"
) # Add transcript count at every point
geneint = pd.DataFrame(
measure.regionprops_table(
mask, genemask, [], extra_properties=[gene_counter]
)
)
geneint.rename(columns={"gene_counter": gene}, inplace=True)
measures.append(geneint)
print(gene)
cells = pd.concat(measures, axis=1, ignore_index=False)
cells.rename(
columns={"centroid-0": "centroid.y", "centroid-1": "centroid.x"}, inplace=True
)
print("Prepare and write csv output")
cells.to_csv(output_file, header=True, index=True, index_label="cell")