diff --git a/anndata-zarr/package.json b/anndata-zarr/package.json index bdd7650b..76e01e7d 100644 --- a/anndata-zarr/package.json +++ b/anndata-zarr/package.json @@ -5,7 +5,9 @@ "type": "module", "main": "dist/biongff-anndata-zarr.cjs.js", "module": "dist/biongff-anndata-zarr.es.js", - "files": ["dist"], + "files": [ + "dist" + ], "exports": { ".": { "import": "./dist/biongff-anndata-zarr.es.js", @@ -23,17 +25,18 @@ }, "dependencies": { "@tanstack/react-query": "^5.85.3", + "hyparquet": "^1.25.6", "lodash": "^4.17.21", "react-window": "^2.0.2", "zarrita": "0.5.0" }, "peerDependencies": { - "react": "^18.2.0", - "react-dom": "^18.2.0", + "@emotion/react": "^11.14.0", + "@emotion/styled": "^11.14.1", "@mui/icons-material": "^7.2.0", "@mui/material": "^7.2.0", - "@emotion/react": "^11.14.0", - "@emotion/styled": "^11.14.1" + "react": "^18.2.0", + "react-dom": "^18.2.0" }, "devDependencies": { "@vitejs/plugin-react": "^4.3.3", diff --git a/anndata-zarr/src/hooks.js b/anndata-zarr/src/hooks.js index 73f9b702..038f0379 100644 --- a/anndata-zarr/src/hooks.js +++ b/anndata-zarr/src/hooks.js @@ -5,27 +5,48 @@ import _ from "lodash"; import { COLORSCALES } from "./constants/colorscales"; import { fetchDataFromZarr, getColors, getObs, getVarNames, getZarrPath } from "./utils"; +import { + getTableData, + getFeatureNamesFromTable, + getObsFromTable, + getFeatureDataFromTable, + getColumnDataFromTable, +} from "./table-loader"; -const getAnndataColors = async (url, matrixProps, colorProps) => { - let zarrData; - try { - const zarrPath = await getZarrPath(url, matrixProps); - zarrData = await fetchDataFromZarr(zarrPath.url, zarrPath.path, zarrPath.s); - } catch (error) { - console.error(error); - return null; +// --------------------------------------------------------------------------- +// Backend-aware data fetching functions +// --------------------------------------------------------------------------- + +async function getFeatureNames(url, namesCol) { + const { meta, data } = await getTableData(url); + if (meta.backend === "anndata") { + return getVarNames(url, namesCol); } - if (!zarrData) return null; + return getFeatureNamesFromTable(data, meta.indexKey, meta); +} - const { categories } = zarrData; +async function getObsColumns(url) { + const { meta, data } = await getTableData(url); + if (meta.backend === "anndata") { + return getObs(url); + } + return getObsFromTable(data, meta.indexKey, meta); +} - const max = categories ? categories.length - 1 : colorProps?.max || _.max(zarrData.data); - const min = categories ? 0 : colorProps?.min || _.min(zarrData.data); +function computeColorResult(columnData, colorProps) { + if (!columnData) return null; + const { categories } = columnData; + // Convert BigInt data (from int64 zarr arrays or parquet) to Number + let data = columnData.data; + if (data?.length > 0 && typeof data[0] === "bigint") { + data = Array.from(data, Number); + } + const max = categories ? categories.length - 1 : Number(colorProps?.max || _.max(data)); + const min = categories ? 0 : Number(colorProps?.min || _.min(data)); const colorscale = categories ? COLORSCALES.Accent : colorProps?.colorscale; - return { colors: getColors({ - data: zarrData.data, + data, max, min, colorProps: { ...colorProps, colorscale }, @@ -36,7 +57,42 @@ const getAnndataColors = async (url, matrixProps, colorProps) => { ...(categories ? { categories } : {}), colorscale, }; -}; +} + +async function getColorData(url, matrixProps, colorProps) { + const { meta, data } = await getTableData(url); + + let columnData; + if (meta.backend === "anndata") { + try { + const zarrPath = await getZarrPath(url, matrixProps); + columnData = await fetchDataFromZarr(zarrPath.url, zarrPath.path, zarrPath.s); + } catch (error) { + console.error(error); + return null; + } + } else { + const { feature, obs } = matrixProps || {}; + try { + if (feature?.index !== undefined && feature?.index !== null) { + columnData = getFeatureDataFromTable(data, feature.index, meta.indexKey, meta); + } else if (feature?.name) { + columnData = getColumnDataFromTable(data, feature.name); + } else if (obs?.col) { + columnData = getColumnDataFromTable(data, obs.col); + } + } catch (error) { + console.error(error); + return null; + } + } + + return computeColorResult(columnData, colorProps); +} + +// --------------------------------------------------------------------------- +// Hooks +// --------------------------------------------------------------------------- export const useAnndataColors = (adata = { url: null }, opts = {}) => { const { @@ -44,8 +100,8 @@ export const useAnndataColors = (adata = { url: null }, opts = {}) => { isLoading = false, serverError = null, } = useQuery({ - queryKey: ["anndataColor", adata.url, adata.matrixProps, adata.colorProps], - queryFn: () => getAnndataColors(adata.url, adata.matrixProps, adata.colorProps), + queryKey: ["tableColor", adata.url, adata.matrixProps, adata.colorProps], + queryFn: () => getColorData(adata.url, adata.matrixProps, adata.colorProps), ...opts, }); @@ -67,8 +123,8 @@ export const useAnndatasColors = (adatas = [], opts = {}) => { serverError = null, } = useQueries({ queries: adatas.map(({ url, matrixProps, colorProps }) => ({ - queryKey: ["anndataColor", url, matrixProps, colorProps], - queryFn: () => getAnndataColors(url, matrixProps, colorProps), + queryKey: ["tableColor", url, matrixProps, colorProps], + queryFn: () => getColorData(url, matrixProps, colorProps), })), ...opts, combine, @@ -83,8 +139,8 @@ export const useAnndataFeatures = (adata = { url: null, namesCol: null }) => { isLoading = false, serverError = null, } = useQuery({ - queryKey: ["anndataFeatures", adata.url, adata.namesCol], - queryFn: () => getVarNames(adata.url, adata.namesCol), + queryKey: ["tableFeatures", adata.url, adata.namesCol], + queryFn: () => getFeatureNames(adata.url, adata.namesCol), }); return { data, isLoading, serverError }; @@ -96,8 +152,8 @@ export const useAnndataObs = (adata = { url: null }) => { isLoading = false, serverError = null, } = useQuery({ - queryKey: ["anndataObs", adata.url], - queryFn: () => getObs(adata.url), + queryKey: ["tableObs", adata.url], + queryFn: () => getObsColumns(adata.url), }); return { data, isLoading, serverError }; diff --git a/anndata-zarr/src/table-loader.js b/anndata-zarr/src/table-loader.js new file mode 100644 index 00000000..bd28c02c --- /dev/null +++ b/anndata-zarr/src/table-loader.js @@ -0,0 +1,261 @@ +/** + * Table loader for non-anndata backends (CSV, JSON, Parquet). + * Detects backend type from .zattrs and normalizes data into + * a column-oriented format compatible with the anndata-zarr hooks. + */ + +// Promise-based caches to deduplicate concurrent requests +const _metaPromises = new Map(); +const _tablePromises = new Map(); + +/** + * Detect table backend and metadata from .zattrs. + * Result is cached per URL. + */ +export function getTableMeta(url) { + if (!_metaPromises.has(url)) { + _metaPromises.set(url, _fetchTableMeta(url)); + } + return _metaPromises.get(url); +} + +async function _fetchTableMeta(url) { + const res = await fetch(`${url}/.zattrs`); + if (!res.ok) throw new Error(`Failed to load table metadata from ${url}`); + const attrs = await res.json(); + const backend = attrs.backend || (attrs["encoding-type"] === "anndata" ? "anndata" : "unknown"); + if (backend === "unknown") { + console.warn(`[anndata-zarr] Unrecognized table backend at ${url}. Expected one of: anndata, csv, json, parquet.`); + } else { + console.log(`[anndata-zarr] Detected table backend: ${backend} (${url})`); + } + return { + backend, + indexKey: attrs.index_key || attrs.instance_key || null, + categoricalColumns: attrs.categorical_columns || [], + measurementColumns: attrs.measurement_columns || [], + metadataColumns: attrs.metadata_columns || [], + }; +} + +/** + * Load and cache table metadata + data. + * For anndata backend, returns { meta, data: null }. + * For flat backends, returns { meta, data: { col: values[] } }. + */ +export function getTableData(url) { + if (!_tablePromises.has(url)) { + _tablePromises.set(url, _fetchTableData(url)); + } + return _tablePromises.get(url); +} + +async function _fetchTableData(url) { + let meta; + try { + meta = await getTableMeta(url); + } catch (error) { + console.error(`[anndata-zarr] Failed to load table metadata from ${url}:`, error); + throw error; + } + if (meta.backend === "anndata") { + return { meta, data: null }; + } + try { + const data = await _loadFlatTable(url, meta.backend); + return { meta, data }; + } catch (error) { + console.error(`[anndata-zarr] Failed to load ${meta.backend} table from ${url}:`, error); + throw error; + } +} + +// --------------------------------------------------------------------------- +// CSV parser +// --------------------------------------------------------------------------- + +function _parseCsvLine(line) { + const values = []; + let current = ""; + let inQuotes = false; + + for (let i = 0; i < line.length; i++) { + const char = line[i]; + if (inQuotes) { + if (char === '"') { + if (i + 1 < line.length && line[i + 1] === '"') { + current += '"'; + i++; + } else { + inQuotes = false; + } + } else { + current += char; + } + } else if (char === '"') { + inQuotes = true; + } else if (char === ",") { + values.push(current); + current = ""; + } else { + current += char; + } + } + values.push(current); + return values; +} + +function _parseCsv(text) { + const lines = text.trim().split("\n"); + if (lines.length === 0) return {}; + + const headers = _parseCsvLine(lines[0]); + const columns = {}; + for (const h of headers) columns[h] = []; + + for (let i = 1; i < lines.length; i++) { + if (!lines[i].trim()) continue; + const values = _parseCsvLine(lines[i]); + for (let j = 0; j < headers.length; j++) { + columns[headers[j]].push(values[j]); + } + } + + // Auto-detect and convert numeric columns + for (const key of Object.keys(columns)) { + const col = columns[key]; + if (col.length > 0 && col.every((v) => v !== "" && !Number.isNaN(Number(v)))) { + columns[key] = col.map(Number); + } + } + + return columns; +} + +// --------------------------------------------------------------------------- +// Backend-specific loaders +// --------------------------------------------------------------------------- + +async function _loadFlatTable(url, backend) { + switch (backend) { + case "csv": { + const res = await fetch(`${url}/table.csv`); + if (!res.ok) throw new Error(`Failed to load CSV from ${url}/table.csv`); + return _parseCsv(await res.text()); + } + case "json": { + const res = await fetch(`${url}/table/.zattrs`); + if (!res.ok) throw new Error(`Failed to load JSON from ${url}/table/.zattrs`); + return await res.json(); + } + case "parquet": { + const { parquetMetadata, parquetRead } = await import("hyparquet"); + const res = await fetch(`${url}/table.parquet`); + if (!res.ok) throw new Error(`Failed to load parquet from ${url}/table.parquet`); + const buffer = await res.arrayBuffer(); + + const metadata = parquetMetadata(buffer); + const columnNames = metadata.schema.slice(1).map((s) => s.name); + const columns = {}; + for (const name of columnNames) columns[name] = []; + + await parquetRead({ + file: buffer, + metadata, + onComplete: (rows) => { + for (const row of rows) { + for (let i = 0; i < columnNames.length; i++) { + columns[columnNames[i]].push(row[i]); + } + } + }, + }); + + return columns; + } + default: + console.error(`[anndata-zarr] Unsupported table backend: "${backend}". Supported: csv, json, parquet.`); + throw new Error(`Unsupported table backend: ${backend}`); + } +} + +// --------------------------------------------------------------------------- +// Data extraction from flat (column-oriented) tables +// --------------------------------------------------------------------------- + +/** + * Get feature (numeric column) names from flat table data. + * Uses measurement_columns from metadata if available, otherwise infers + * from column data types. + */ +export function getFeatureNamesFromTable(tableData, indexKey, meta = {}) { + if (meta.measurementColumns?.length > 0) { + return meta.measurementColumns.filter((key) => key in tableData); + } + return Object.keys(tableData).filter((key) => { + if (key === indexKey) return false; + const col = tableData[key]; + return col.length > 0 && (typeof col[0] === "number" || typeof col[0] === "bigint"); + }); +} + +/** + * Get observation metadata columns from flat table data. + * Returns { categorical: [{name, categories}], numerical: [] } matching + * the shape returned by getObs() for anndata. + */ +export function getObsFromTable(tableData, indexKey, meta = {}) { + const featureNames = new Set(getFeatureNamesFromTable(tableData, indexKey, meta)); + const obs = { categorical: [], numerical: [] }; + for (const [key, values] of Object.entries(tableData)) { + if (key === indexKey) continue; + if (featureNames.has(key)) continue; + const isCategorical = + meta.categoricalColumns?.length > 0 + ? meta.categoricalColumns.includes(key) + : values.length > 0 && typeof values[0] === "string"; + if (isCategorical) { + const categories = [...new Set(values)].map(String); + obs.categorical.push({ name: key, categories }); + } else if (values.length > 0 && (typeof values[0] === "number" || typeof values[0] === "bigint")) { + obs.numerical.push({ name: key }); + } + } + return obs; +} + +/** + * Get a single column's data, applying categorical encoding for string columns. + * Returns { data: number[] } or { data: number[], categories: string[] }. + */ +export function getColumnDataFromTable(tableData, colName) { + const values = tableData[colName]; + if (!values) throw new Error(`Column "${colName}" not found in table`); + + if (values.length > 0 && typeof values[0] === "string") { + const categories = [...new Set(values)]; + const categoryMap = new Map(categories.map((c, i) => [c, i])); + return { data: values.map((v) => categoryMap.get(v)), categories }; + } + // Convert BigInt values (e.g. from parquet int64) to Number + if (values.length > 0 && typeof values[0] === "bigint") { + return { data: values.map(Number) }; + } + return { data: values }; +} + +/** + * Get feature data by matrix index from a flat table. + */ +export function getFeatureDataFromTable(tableData, featureIndex, indexKey, meta = {}) { + const featureNames = getFeatureNamesFromTable(tableData, indexKey, meta); + if (featureIndex < 0 || featureIndex >= featureNames.length) { + throw new Error(`Feature index ${featureIndex} out of range (0-${featureNames.length - 1})`); + } + const values = tableData[featureNames[featureIndex]]; + // Convert BigInt values (e.g. from parquet int64) to Number + if (values.length > 0 && typeof values[0] === "bigint") { + return { data: values.map(Number) }; + } + return { data: values }; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index cc5ddfd4..9efb559d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -82,6 +82,9 @@ importers: '@tanstack/react-query': specifier: ^5.85.3 version: 5.90.21(react@18.3.1) + hyparquet: + specifier: ^1.25.6 + version: 1.25.6 lodash: specifier: ^4.17.21 version: 4.17.21 @@ -2375,6 +2378,9 @@ packages: resolution: {integrity: sha512-eKCa6bwnJhvxj14kZk5NCPc6Hb6BdsU9DZcOnmQKSnO1VKrfV0zCvtttPZUsBvjmNDn8rpcJfpwSYnHBjc95MQ==} engines: {node: '>=18.18.0'} + hyparquet@1.25.6: + resolution: {integrity: sha512-Q9W5IjkVch3ZMnYd4qFv2q8suu5Jc36yt7J+zUNM9grwnP1S189icp0jdEQKM5HJvQkTVy8NMiQ8n/dM5QAt1A==} + ieee754@1.2.1: resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==} @@ -6269,6 +6275,8 @@ snapshots: human-signals@8.0.1: {} + hyparquet@1.25.6: {} + ieee754@1.2.1: {} image-size@0.7.5: {}