From 4970573fa3925b903a7aae3daa84cb16153c9e77 Mon Sep 17 00:00:00 2001 From: Fran McDade <18710366+frano-m@users.noreply.github.com> Date: Wed, 13 May 2026 17:24:45 +1000 Subject: [PATCH 1/5] feat: [hca dcp] add hca projects to google datasets catalog (#4806) --- .../utils/schemaOrg/hcaProjectDataset.test.ts | 247 ++++++++++++++++++ .../Detail/components/JsonLd/jsonLd.tsx | 33 +++ app/utils/schemaOrg/common.ts | 138 ++++++++++ app/utils/schemaOrg/hcaProjectDataset.ts | 188 +++++++++++++ .../common/accessionMapper/accessionMapper.ts | 2 +- pages/[entityListType]/[...params].tsx | 34 ++- 6 files changed, 638 insertions(+), 4 deletions(-) create mode 100644 __tests__/utils/schemaOrg/hcaProjectDataset.test.ts create mode 100644 app/components/Detail/components/JsonLd/jsonLd.tsx create mode 100644 app/utils/schemaOrg/common.ts create mode 100644 app/utils/schemaOrg/hcaProjectDataset.ts diff --git a/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts b/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts new file mode 100644 index 000000000..4345f8bb8 --- /dev/null +++ b/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts @@ -0,0 +1,247 @@ +import { ProjectsResponse } from "../../../app/apis/azul/hca-dcp/common/responses"; +import { MAX_DESCRIPTION_LENGTH } from "../../../app/utils/schemaOrg/common"; +import { buildHcaProjectJsonLd } from "../../../app/utils/schemaOrg/hcaProjectDataset"; + +const BROWSER_URL = "https://explore.data.humancellatlas.org"; + +/** + * Builds a minimal valid HCA project response with optional overrides for + * top-level (project) and aggregated (donor/sample/specimen/protocol) fields. + * @param overrides - Partial overrides applied to the base response. + * @returns A `ProjectsResponse` shape suitable for builder tests. + */ +function makeProjectsResponse( + overrides: Partial = {} +): ProjectsResponse { + return { + dates: [], + donorOrganisms: [], + entryId: "abc", + fileTypeSummaries: [], + projects: [ + { + accessible: true, + accessions: [], + bionetworkName: [], + contributedAnalyses: {}, + contributors: [], + dataUseRestriction: null, + duosId: null, + estimatedCellCount: null, + laboratory: [], + matrices: {}, + projectDescription: "A study of cells.", + projectId: "uuid-1", + projectShortname: "Cell Study", + projectTitle: "Cells of the body", + publications: [], + supplementaryLinks: [], + tissueAtlas: [], + }, + ], + protocols: [], + samples: [], + specimens: [], + status: 200, + ...overrides, + } as unknown as ProjectsResponse; +} + +describe("buildHcaProjectJsonLd", () => { + it("returns undefined when no project is present", () => { + const response = makeProjectsResponse({ projects: [] }); + expect(buildHcaProjectJsonLd(response, BROWSER_URL)).toBeUndefined(); + }); + + it("populates required Schema.org Dataset fields", () => { + const result = buildHcaProjectJsonLd(makeProjectsResponse(), BROWSER_URL); + expect(result).toBeDefined(); + expect(result!["@context"]).toBe("https://schema.org"); + expect(result!["@type"]).toBe("Dataset"); + expect(result!.name).toBe("Cells of the body"); + expect(result!.description).toBe("A study of cells."); + expect(result!.url).toBe(`${BROWSER_URL}/projects/uuid-1`); + expect(result!.identifier).toEqual(["uuid-1"]); + expect(result!.isAccessibleForFree).toBe(true); + expect(result!.includedInDataCatalog).toEqual({ + "@type": "DataCatalog", + name: "Human Cell Atlas Data Coordination Platform", + url: BROWSER_URL, + }); + }); + + it("falls back to projectShortname when projectTitle is empty", () => { + const response = makeProjectsResponse(); + response.projects[0].projectTitle = ""; + const result = buildHcaProjectJsonLd(response, BROWSER_URL); + expect(result!.name).toBe("Cell Study"); + }); + + it("strips HTML tags from description", () => { + const response = makeProjectsResponse(); + response.projects[0].projectDescription = + "

Single-cell RNA-seq data.

"; + const result = buildHcaProjectJsonLd(response, BROWSER_URL); + expect(result!.description).toBe("Single-cell RNA-seq data."); + }); + + it("truncates descriptions over 5000 characters and appends an ellipsis", () => { + const longDescription = "a".repeat(MAX_DESCRIPTION_LENGTH + 200); + const response = makeProjectsResponse(); + response.projects[0].projectDescription = longDescription; + const result = buildHcaProjectJsonLd(response, BROWSER_URL); + expect(result!.description).toHaveLength(MAX_DESCRIPTION_LENGTH); + expect(result!.description.endsWith("…")).toBe(true); + }); + + it("includes accession ids in identifier and identifiers.org URLs in sameAs", () => { + const response = makeProjectsResponse(); + response.projects[0].accessions = [ + { accession: "GSE12345", namespace: "geo_series" }, + { accession: "PRJNA9999", namespace: "insdc_project" }, + { accession: "X", namespace: "unknown_namespace" }, + ]; + const result = buildHcaProjectJsonLd(response, BROWSER_URL); + expect(result!.identifier).toEqual([ + "uuid-1", + "GSE12345", + "PRJNA9999", + "X", + ]); + expect(result!.sameAs).toEqual([ + "https://identifiers.org/geo:GSE12345", + "https://identifiers.org/ena.embl:PRJNA9999", + ]); + }); + + it("omits sameAs when no accessions map to a known namespace", () => { + const result = buildHcaProjectJsonLd(makeProjectsResponse(), BROWSER_URL); + expect(result!.sameAs).toBeUndefined(); + }); + + it("builds creators from contributors with affiliation", () => { + const response = makeProjectsResponse(); + response.projects[0].contributors = [ + { + contactName: "Smith,Alice,B", + email: null, + institution: "Example University", + }, + { + contactName: "", + email: null, + institution: "Should be skipped (no name)", + }, + { + contactName: "Jones,Bob", + email: null, + institution: "", + }, + ]; + const result = buildHcaProjectJsonLd(response, BROWSER_URL); + expect(result!.creator).toEqual([ + { + "@type": "Person", + affiliation: { "@type": "Organization", name: "Example University" }, + name: "Alice B Smith", + }, + { "@type": "Person", name: "Bob Jones" }, + ]); + }); + + it("builds citations from publications using DOI then publicationUrl as sameAs", () => { + const response = makeProjectsResponse(); + response.projects[0].publications = [ + { + doi: "10.1000/example", + officialHcaPublication: true, + publicationTitle: "Cell Paper", + publicationUrl: "https://example.org/cell-paper", + }, + { + doi: null, + officialHcaPublication: false, + publicationTitle: "Other Paper", + publicationUrl: "https://example.org/other", + }, + { + doi: null, + officialHcaPublication: false, + publicationTitle: "", + publicationUrl: "https://example.org/no-title", + }, + ]; + const result = buildHcaProjectJsonLd(response, BROWSER_URL); + expect(result!.citation).toEqual([ + { + "@type": "ScholarlyArticle", + headline: "Cell Paper", + name: "Cell Paper", + sameAs: "https://doi.org/10.1000/example", + }, + { + "@type": "ScholarlyArticle", + headline: "Other Paper", + name: "Other Paper", + sameAs: "https://example.org/other", + }, + ]); + }); + + it("builds deduplicated keywords from donor, sample, specimen, and protocol fields", () => { + const response = makeProjectsResponse({ + donorOrganisms: [ + { + biologicalSex: null, + developmentStage: [], + disease: ["normal"], + donorCount: 1, + genusSpecies: ["Homo sapiens"], + organismAge: null, + }, + ], + protocols: [ + { + libraryConstructionApproach: ["10x v2", "10x v3"], + }, + ], + samples: [ + { + disease: ["normal"], + id: ["s1"], + organ: ["brain"], + organPart: ["cortex"], + sampleEntityType: ["specimens"], + }, + ], + specimens: [ + { + disease: ["normal"], + id: ["s1"], + organ: ["brain"], + organPart: ["cortex"], + preservationMethod: [], + source: [], + }, + ], + } as unknown as Partial); + const result = buildHcaProjectJsonLd(response, BROWSER_URL); + expect(result!.keywords).toEqual([ + "Homo sapiens", + "normal", + "brain", + "cortex", + "specimens", + "10x v2", + "10x v3", + ]); + }); + + it("omits keywords, creator, citation, sameAs when sources are empty", () => { + const result = buildHcaProjectJsonLd(makeProjectsResponse(), BROWSER_URL); + expect(result!.keywords).toBeUndefined(); + expect(result!.creator).toBeUndefined(); + expect(result!.citation).toBeUndefined(); + expect(result!.sameAs).toBeUndefined(); + }); +}); diff --git a/app/components/Detail/components/JsonLd/jsonLd.tsx b/app/components/Detail/components/JsonLd/jsonLd.tsx new file mode 100644 index 000000000..1150b742f --- /dev/null +++ b/app/components/Detail/components/JsonLd/jsonLd.tsx @@ -0,0 +1,33 @@ +import Head from "next/head"; +import { JSX } from "react"; +import { + escapeJsonForHtml, + SchemaDataset, +} from "../../../../utils/schemaOrg/common"; + +interface JsonLdProps { + jsonLd: SchemaDataset; +} + +/** + * Renders a Schema.org Dataset JSON-LD `` sequences in entity descriptions). + * @param props - Component props. + * @param props.jsonLd - Schema.org Dataset payload built by a consumer-specific builder. + * @returns Head element with the JSON-LD script tag. + */ +export const JsonLd = ({ jsonLd }: JsonLdProps): JSX.Element => { + return ( + + ` or HTML entity injection. + * @param json - Serialised JSON to embed. + * @returns Escaped JSON safe for `dangerouslySetInnerHTML`. + */ +export function escapeJsonForHtml(json: string): string { + return json + .replace(//g, "\\u003e") + .replace(/&/g, "\\u0026"); +} + +/** + * De-duplicates and removes empty/null/undefined entries from a string array. + * @param values - Source array (may contain null, undefined, or duplicates). + * @returns Deduplicated array of non-empty strings, preserving first-seen order. + */ +export function uniqueNonEmpty( + values: (string | null | undefined)[] +): string[] { + const seen = new Set(); + const result: string[] = []; + for (const value of values) { + if (!value) continue; + if (seen.has(value)) continue; + seen.add(value); + result.push(value); + } + return result; +} diff --git a/app/utils/schemaOrg/hcaProjectDataset.ts b/app/utils/schemaOrg/hcaProjectDataset.ts new file mode 100644 index 000000000..a8be960ee --- /dev/null +++ b/app/utils/schemaOrg/hcaProjectDataset.ts @@ -0,0 +1,188 @@ +import { + AccessionResponse, + ContributorResponse, + PublicationResponse, +} from "../../apis/azul/hca-dcp/common/entities"; +import { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses"; +import { transformAccessionURL } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper"; +import { ACCESSION_CONFIGS_BY_RESPONSE_KEY } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/constants"; +import { + SchemaDataset, + SchemaOrganization, + SchemaPerson, + SchemaScholarlyArticle, + stripHtmlTags, + truncateDescription, + uniqueNonEmpty, +} from "./common"; + +const CATALOG_NAME = "Human Cell Atlas Data Coordination Platform"; + +/** + * Builds a Schema.org Dataset JSON-LD object for an HCA DCP project. + * + * Returns `undefined` when the response does not carry a project we can + * describe (i.e. no project entity), so the caller can skip rendering. + * @param data - HCA DCP project detail response from Azul. + * @param browserURL - Site base URL used for canonical and catalog URLs. + * @returns Schema.org Dataset JSON-LD object, or `undefined` if not buildable. + */ +export function buildHcaProjectJsonLd( + data: ProjectsResponse, + browserURL: string +): SchemaDataset | undefined { + const project = data.projects?.[0]; + if (!project) return undefined; + + const description = truncateDescription( + stripHtmlTags(project.projectDescription || "") + ); + const name = project.projectTitle || project.projectShortname; + const identifier = uniqueNonEmpty([ + project.projectId, + ...project.accessions.map((accession) => accession.accession), + ]); + + const jsonLd: SchemaDataset = { + "@context": "https://schema.org", + "@type": "Dataset", + description, + identifier, + includedInDataCatalog: { + "@type": "DataCatalog", + name: CATALOG_NAME, + url: browserURL, + }, + isAccessibleForFree: true, + name, + url: `${browserURL}/projects/${project.projectId}`, + }; + + const sameAs = buildSameAs(project.accessions); + if (sameAs.length > 0) jsonLd.sameAs = sameAs; + + const keywords = buildKeywords(data); + if (keywords.length > 0) jsonLd.keywords = keywords; + + const creator = buildCreators(project.contributors); + if (creator.length > 0) jsonLd.creator = creator; + + const citation = buildCitations(project.publications); + if (citation.length > 0) jsonLd.citation = citation; + + return jsonLd; +} + +/** + * Builds the sameAs array of external accession URLs via identifiers.org. + * Only includes accessions whose namespace maps to a known identifier prefix. + * @param accessions - Project accessions from the Azul response. + * @returns Array of canonical accession URLs. + */ +function buildSameAs(accessions: AccessionResponse[]): string[] { + const urls: string[] = []; + for (const { accession, namespace } of accessions) { + const prefix = + ACCESSION_CONFIGS_BY_RESPONSE_KEY.get(namespace)?.identifierOrgPrefix; + if (!prefix) continue; + const url = transformAccessionURL(accession, prefix); + if (url) urls.push(url); + } + return uniqueNonEmpty(urls); +} + +/** + * Builds a keywords array by unioning biologically-meaningful fields from the + * project's aggregated donor/sample/specimen/protocol responses. + * @param data - HCA project detail response. + * @returns Deduplicated keywords array. + */ +function buildKeywords(data: ProjectsResponse): string[] { + const values: (string | null | undefined)[] = []; + for (const donor of data.donorOrganisms ?? []) { + values.push(...(donor.genusSpecies ?? [])); + values.push(...(donor.disease ?? [])); + } + for (const sample of data.samples ?? []) { + values.push(...(sample.organ ?? [])); + values.push(...(sample.organPart ?? [])); + values.push(...(sample.disease ?? [])); + values.push(...(sample.sampleEntityType ?? [])); + } + for (const specimen of data.specimens ?? []) { + values.push(...(specimen.organ ?? [])); + values.push(...(specimen.organPart ?? [])); + values.push(...(specimen.disease ?? [])); + } + for (const protocol of data.protocols ?? []) { + values.push(...(protocol.libraryConstructionApproach ?? [])); + values.push(...(protocol.instrumentManufacturerModel ?? [])); + } + return uniqueNonEmpty(values); +} + +/** + * Builds the creator array from project contributors. Skips entries without a + * name. When the contributor has an institution, attaches it as an affiliation. + * @param contributors - HCA project contributors. + * @returns Array of schema.org Person objects. + */ +function buildCreators(contributors: ContributorResponse[]): SchemaPerson[] { + const creators: SchemaPerson[] = []; + for (const contributor of contributors ?? []) { + if (!contributor.contactName) continue; + const person: SchemaPerson = { + "@type": "Person", + name: normaliseContactName(contributor.contactName), + }; + if (contributor.institution) { + const affiliation: SchemaOrganization = { + "@type": "Organization", + name: contributor.institution, + }; + person.affiliation = affiliation; + } + creators.push(person); + } + return creators; +} + +/** + * Builds the citation array from project publications. Skips entries without a + * title. Prefers DOI for `sameAs`, falling back to the publication URL. + * @param publications - HCA project publications. + * @returns Array of schema.org ScholarlyArticle objects. + */ +function buildCitations( + publications: PublicationResponse[] +): SchemaScholarlyArticle[] { + const citations: SchemaScholarlyArticle[] = []; + for (const publication of publications ?? []) { + if (!publication.publicationTitle) continue; + const article: SchemaScholarlyArticle = { + "@type": "ScholarlyArticle", + headline: publication.publicationTitle, + name: publication.publicationTitle, + }; + if (publication.doi) { + article.sameAs = `https://doi.org/${publication.doi}`; + } else if (publication.publicationUrl) { + article.sameAs = publication.publicationUrl; + } + citations.push(article); + } + return citations; +} + +/** + * Normalises an HCA contributor's contactName from "Last,First,Middle" to + * "First Middle Last" for use as a Schema.org Person.name value. + * @param contactName - Raw contactName from the Azul response. + * @returns Human-readable contributor name. + */ +function normaliseContactName(contactName: string): string { + const parts = contactName.split(",").map((part) => part.trim()); + if (parts.length < 2) return contactName; + const [last, ...rest] = parts; + return [...rest, last].filter(Boolean).join(" "); +} diff --git a/app/viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper.ts b/app/viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper.ts index d6f50837c..7f5c4925e 100644 --- a/app/viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper.ts +++ b/app/viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper.ts @@ -72,7 +72,7 @@ export function mapAccessions( * @param identifierOrgPrefix - Identifier org prefix. * @returns formatted accession URL. */ -function transformAccessionURL( +export function transformAccessionURL( accessionId: string, identifierOrgPrefix: string ): string { diff --git a/pages/[entityListType]/[...params].tsx b/pages/[entityListType]/[...params].tsx index 34249eddb..3046a171d 100644 --- a/pages/[entityListType]/[...params].tsx +++ b/pages/[entityListType]/[...params].tsx @@ -30,6 +30,8 @@ import { useRouter } from "next/router"; import { ParsedUrlQuery } from "querystring"; import { JSX } from "react"; import { EntityGuard } from "../../app/components/Detail/components/EntityGuard/entityGuard"; +import { JsonLd } from "../../app/components/Detail/components/JsonLd/jsonLd"; +import { buildHcaProjectJsonLd } from "../../app/utils/schemaOrg/hcaProjectDataset"; import { readFile } from "../../app/utils/tsvParser"; import { ROUTES } from "../../site-config/anvil-cmg/dev/export/routes"; @@ -38,6 +40,7 @@ import { getConsentGroup, isNRESOrUnrestrictedAccess, } from "../../app/apis/azul/anvil-cmg/common/transformers"; +import { ProjectsResponse } from "../../app/apis/azul/hca-dcp/common/responses"; import { isProductionEnvironment } from "../../app/config/utils"; const setOfProcessedIds = new Set(); @@ -54,6 +57,7 @@ interface PageUrl extends ParsedUrlQuery { } export interface EntityDetailPageProps extends AzulEntityStaticResponse { + browserURL?: string; entityListType: string; override?: Override; } @@ -67,6 +71,7 @@ export interface EntityDetailPageProps extends AzulEntityStaticResponse { const EntityDetailPage = (props: EntityDetailPageProps): JSX.Element => { const { config: siteConfig } = useConfig(); const isAnVIL = siteConfig.appTitle?.includes("AnVIL"); + const isHcaDcp = siteConfig.appTitle?.includes("HCA"); const { query } = useRouter(); if (!props.entityListType) return <>; if (props.override) return ; @@ -81,9 +86,32 @@ const EntityDetailPage = (props: EntityDetailPageProps): JSX.Element => { } if (isChooseExportView(query)) return ; if (isExportMethodView(query)) return ; - return ; + return ( + <> + {isHcaDcp && renderHcaProjectJsonLd(props)} + + + ); }; +/** + * Renders the HCA project JSON-LD when the page is a project detail route with + * data and a browser URL available. Returns null otherwise. + * @param props - Entity detail page props. + * @returns JsonLd element, or null when the page can't be described. + */ +function renderHcaProjectJsonLd( + props: EntityDetailPageProps +): JSX.Element | null { + if (props.entityListType !== "projects") return null; + if (!props.browserURL || !props.data) return null; + const jsonLd = buildHcaProjectJsonLd( + props.data as ProjectsResponse, + props.browserURL + ); + return jsonLd ? : null; +} + /** * Returns the override for the given entity ID. * @param overrides - Overrides. @@ -254,7 +282,7 @@ export const getStaticProps: GetStaticProps = async ({ params, }: GetStaticPropsContext) => { const appConfig = config(); - const { entities } = appConfig; + const { browserURL, entities } = appConfig; const entityListType = (params as PageUrl).entityListType; const slug = (params as PageUrl).params; const entityConfig = getEntityConfig(entities, entityListType); @@ -264,7 +292,7 @@ export const getStaticProps: GetStaticProps = async ({ if (!entityConfig || !entityId) return { notFound: true }; - const props: EntityDetailPageProps = { entityListType }; + const props: EntityDetailPageProps = { browserURL, entityListType }; // Process entity override props. processEntityOverrideProps(entityConfig, entityListType, entityId, props); From 0b74934747da31e0fdf373f6a826619cd695e965 Mon Sep 17 00:00:00 2001 From: Fran McDade <18710366+frano-m@users.noreply.github.com> Date: Wed, 13 May 2026 17:41:24 +1000 Subject: [PATCH 2/5] refactor: apply coding standards to schemaorg files (#4806) --- .../utils/schemaOrg/hcaProjectDataset.test.ts | 4 +- app/utils/schemaOrg/hcaProjectDataset.ts | 144 +++++++++--------- app/utils/schemaOrg/types.ts | 79 ++++++++++ app/utils/schemaOrg/{common.ts => utils.ts} | 99 ++---------- .../components/JsonLd/jsonLd.tsx | 6 +- pages/[entityListType]/[...params].tsx | 40 ++--- 6 files changed, 184 insertions(+), 188 deletions(-) create mode 100644 app/utils/schemaOrg/types.ts rename app/utils/schemaOrg/{common.ts => utils.ts} (52%) rename app/{components/Detail => views/EntityDetailView}/components/JsonLd/jsonLd.tsx (85%) diff --git a/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts b/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts index 4345f8bb8..2a12df566 100644 --- a/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts +++ b/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts @@ -1,6 +1,6 @@ -import { ProjectsResponse } from "../../../app/apis/azul/hca-dcp/common/responses"; -import { MAX_DESCRIPTION_LENGTH } from "../../../app/utils/schemaOrg/common"; +import type { ProjectsResponse } from "../../../app/apis/azul/hca-dcp/common/responses"; import { buildHcaProjectJsonLd } from "../../../app/utils/schemaOrg/hcaProjectDataset"; +import { MAX_DESCRIPTION_LENGTH } from "../../../app/utils/schemaOrg/types"; const BROWSER_URL = "https://explore.data.humancellatlas.org"; diff --git a/app/utils/schemaOrg/hcaProjectDataset.ts b/app/utils/schemaOrg/hcaProjectDataset.ts index a8be960ee..9575b272c 100644 --- a/app/utils/schemaOrg/hcaProjectDataset.ts +++ b/app/utils/schemaOrg/hcaProjectDataset.ts @@ -1,23 +1,74 @@ -import { +import type { AccessionResponse, ContributorResponse, PublicationResponse, } from "../../apis/azul/hca-dcp/common/entities"; -import { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses"; +import type { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses"; import { transformAccessionURL } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper"; import { ACCESSION_CONFIGS_BY_RESPONSE_KEY } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/constants"; -import { +import type { SchemaDataset, SchemaOrganization, SchemaPerson, SchemaScholarlyArticle, - stripHtmlTags, - truncateDescription, - uniqueNonEmpty, -} from "./common"; +} from "./types"; +import { stripHtmlTags, truncateDescription, uniqueNonEmpty } from "./utils"; const CATALOG_NAME = "Human Cell Atlas Data Coordination Platform"; +/** + * Builds the citation array from project publications. Skips entries without a + * title. Prefers DOI for `sameAs`, falling back to the publication URL. + * @param publications - HCA project publications. + * @returns Array of schema.org ScholarlyArticle objects. + */ +function buildCitations( + publications: PublicationResponse[] +): SchemaScholarlyArticle[] { + const citations: SchemaScholarlyArticle[] = []; + for (const publication of publications ?? []) { + if (!publication.publicationTitle) continue; + const article: SchemaScholarlyArticle = { + "@type": "ScholarlyArticle", + headline: publication.publicationTitle, + name: publication.publicationTitle, + }; + if (publication.doi) { + article.sameAs = `https://doi.org/${publication.doi}`; + } else if (publication.publicationUrl) { + article.sameAs = publication.publicationUrl; + } + citations.push(article); + } + return citations; +} + +/** + * Builds the creator array from project contributors. Skips entries without a + * name. When the contributor has an institution, attaches it as an affiliation. + * @param contributors - HCA project contributors. + * @returns Array of schema.org Person objects. + */ +function buildCreators(contributors: ContributorResponse[]): SchemaPerson[] { + const creators: SchemaPerson[] = []; + for (const contributor of contributors ?? []) { + if (!contributor.contactName) continue; + const person: SchemaPerson = { + "@type": "Person", + name: normaliseContactName(contributor.contactName), + }; + if (contributor.institution) { + const affiliation: SchemaOrganization = { + "@type": "Organization", + name: contributor.institution, + }; + person.affiliation = affiliation; + } + creators.push(person); + } + return creators; +} + /** * Builds a Schema.org Dataset JSON-LD object for an HCA DCP project. * @@ -73,24 +124,6 @@ export function buildHcaProjectJsonLd( return jsonLd; } -/** - * Builds the sameAs array of external accession URLs via identifiers.org. - * Only includes accessions whose namespace maps to a known identifier prefix. - * @param accessions - Project accessions from the Azul response. - * @returns Array of canonical accession URLs. - */ -function buildSameAs(accessions: AccessionResponse[]): string[] { - const urls: string[] = []; - for (const { accession, namespace } of accessions) { - const prefix = - ACCESSION_CONFIGS_BY_RESPONSE_KEY.get(namespace)?.identifierOrgPrefix; - if (!prefix) continue; - const url = transformAccessionURL(accession, prefix); - if (url) urls.push(url); - } - return uniqueNonEmpty(urls); -} - /** * Builds a keywords array by unioning biologically-meaningful fields from the * project's aggregated donor/sample/specimen/protocol responses. @@ -122,56 +155,21 @@ function buildKeywords(data: ProjectsResponse): string[] { } /** - * Builds the creator array from project contributors. Skips entries without a - * name. When the contributor has an institution, attaches it as an affiliation. - * @param contributors - HCA project contributors. - * @returns Array of schema.org Person objects. - */ -function buildCreators(contributors: ContributorResponse[]): SchemaPerson[] { - const creators: SchemaPerson[] = []; - for (const contributor of contributors ?? []) { - if (!contributor.contactName) continue; - const person: SchemaPerson = { - "@type": "Person", - name: normaliseContactName(contributor.contactName), - }; - if (contributor.institution) { - const affiliation: SchemaOrganization = { - "@type": "Organization", - name: contributor.institution, - }; - person.affiliation = affiliation; - } - creators.push(person); - } - return creators; -} - -/** - * Builds the citation array from project publications. Skips entries without a - * title. Prefers DOI for `sameAs`, falling back to the publication URL. - * @param publications - HCA project publications. - * @returns Array of schema.org ScholarlyArticle objects. + * Builds the sameAs array of external accession URLs via identifiers.org. + * Only includes accessions whose namespace maps to a known identifier prefix. + * @param accessions - Project accessions from the Azul response. + * @returns Array of canonical accession URLs. */ -function buildCitations( - publications: PublicationResponse[] -): SchemaScholarlyArticle[] { - const citations: SchemaScholarlyArticle[] = []; - for (const publication of publications ?? []) { - if (!publication.publicationTitle) continue; - const article: SchemaScholarlyArticle = { - "@type": "ScholarlyArticle", - headline: publication.publicationTitle, - name: publication.publicationTitle, - }; - if (publication.doi) { - article.sameAs = `https://doi.org/${publication.doi}`; - } else if (publication.publicationUrl) { - article.sameAs = publication.publicationUrl; - } - citations.push(article); +function buildSameAs(accessions: AccessionResponse[]): string[] { + const urls: string[] = []; + for (const { accession, namespace } of accessions) { + const prefix = + ACCESSION_CONFIGS_BY_RESPONSE_KEY.get(namespace)?.identifierOrgPrefix; + if (!prefix) continue; + const url = transformAccessionURL(accession, prefix); + if (url) urls.push(url); } - return citations; + return uniqueNonEmpty(urls); } /** diff --git a/app/utils/schemaOrg/types.ts b/app/utils/schemaOrg/types.ts new file mode 100644 index 000000000..c19473653 --- /dev/null +++ b/app/utils/schemaOrg/types.ts @@ -0,0 +1,79 @@ +/** + * Shared Schema.org Dataset types used by per-consumer JSON-LD builders + * (HCA DCP, AnVIL, LungMAP). Each consumer composes its own `SchemaDataset` + * from its source entity and renders it via the shared `JsonLd` component. + * + * See https://developers.google.com/search/docs/appearance/structured-data/dataset + * for Google's Dataset structured data guidelines. + */ + +/** + * Google Dataset Search caps description at 5000 characters. + */ +export const MAX_DESCRIPTION_LENGTH = 5000; + +/** + * Schema.org DataCatalog type. + */ +export interface SchemaDataCatalog { + "@type": "DataCatalog"; + name: string; + url: string; +} + +/** + * Schema.org DataDownload type. + */ +export interface SchemaDataDownload { + "@type": "DataDownload"; + contentUrl: string; + encodingFormat?: string; +} + +/** + * Schema.org Dataset JSON-LD structure. + */ +export interface SchemaDataset { + "@context": "https://schema.org"; + "@type": "Dataset"; + citation?: SchemaScholarlyArticle[]; + creator?: (SchemaPerson | SchemaOrganization)[]; + description: string; + distribution?: SchemaDataDownload[]; + identifier: string[]; + includedInDataCatalog: SchemaDataCatalog; + isAccessibleForFree: boolean; + keywords?: string[]; + measurementTechnique?: string[]; + name: string; + sameAs?: string[]; + url: string; +} + +/** + * Schema.org Organization type. + */ +export interface SchemaOrganization { + "@type": "Organization"; + name: string; +} + +/** + * Schema.org Person type. + */ +export interface SchemaPerson { + "@type": "Person"; + affiliation?: SchemaOrganization; + name: string; +} + +/** + * Schema.org ScholarlyArticle type. + */ +export interface SchemaScholarlyArticle { + "@type": "ScholarlyArticle"; + author?: SchemaPerson[]; + headline: string; + name: string; + sameAs?: string; +} diff --git a/app/utils/schemaOrg/common.ts b/app/utils/schemaOrg/utils.ts similarity index 52% rename from app/utils/schemaOrg/common.ts rename to app/utils/schemaOrg/utils.ts index be063e146..2e98a1cfd 100644 --- a/app/utils/schemaOrg/common.ts +++ b/app/utils/schemaOrg/utils.ts @@ -1,82 +1,16 @@ -/** - * Shared Schema.org Dataset types and helpers used by per-consumer JSON-LD - * builders (HCA DCP, AnVIL, LungMAP). Each consumer composes its own - * `SchemaDataset` from its source entity and renders it via the shared - * `JsonLd` component. - * - * See https://developers.google.com/search/docs/appearance/structured-data/dataset - * for Google's Dataset structured data guidelines. - */ - -/** - * Google Dataset Search caps description at 5000 characters. - */ -export const MAX_DESCRIPTION_LENGTH = 5000; - -/** - * Schema.org DataCatalog type. - */ -export interface SchemaDataCatalog { - "@type": "DataCatalog"; - name: string; - url: string; -} - -/** - * Schema.org DataDownload type. - */ -export interface SchemaDataDownload { - "@type": "DataDownload"; - contentUrl: string; - encodingFormat?: string; -} - -/** - * Schema.org Organization type. - */ -export interface SchemaOrganization { - "@type": "Organization"; - name: string; -} - -/** - * Schema.org Person type. - */ -export interface SchemaPerson { - "@type": "Person"; - affiliation?: SchemaOrganization; - name: string; -} +import { MAX_DESCRIPTION_LENGTH } from "./types"; /** - * Schema.org ScholarlyArticle type. - */ -export interface SchemaScholarlyArticle { - "@type": "ScholarlyArticle"; - author?: SchemaPerson[]; - headline: string; - name: string; - sameAs?: string; -} - -/** - * Schema.org Dataset JSON-LD structure. + * Escapes a JSON string for safe embedding inside an HTML `` or HTML entity injection. + * @param json - Serialised JSON to embed. + * @returns Escaped JSON safe for `dangerouslySetInnerHTML`. */ -export interface SchemaDataset { - "@context": "https://schema.org"; - "@type": "Dataset"; - citation?: SchemaScholarlyArticle[]; - creator?: (SchemaPerson | SchemaOrganization)[]; - description: string; - distribution?: SchemaDataDownload[]; - identifier: string[]; - includedInDataCatalog: SchemaDataCatalog; - isAccessibleForFree: boolean; - keywords?: string[]; - measurementTechnique?: string[]; - name: string; - sameAs?: string[]; - url: string; +export function escapeJsonForHtml(json: string): string { + return json + .replace(//g, "\\u003e") + .replace(/&/g, "\\u0026"); } /** @@ -105,19 +39,6 @@ export function truncateDescription(description: string): string { return description.slice(0, MAX_DESCRIPTION_LENGTH - 1) + "…"; } -/** - * Escapes a JSON string for safe embedding inside an HTML `` or HTML entity injection. - * @param json - Serialised JSON to embed. - * @returns Escaped JSON safe for `dangerouslySetInnerHTML`. - */ -export function escapeJsonForHtml(json: string): string { - return json - .replace(//g, "\\u003e") - .replace(/&/g, "\\u0026"); -} - /** * De-duplicates and removes empty/null/undefined entries from a string array. * @param values - Source array (may contain null, undefined, or duplicates). diff --git a/app/components/Detail/components/JsonLd/jsonLd.tsx b/app/views/EntityDetailView/components/JsonLd/jsonLd.tsx similarity index 85% rename from app/components/Detail/components/JsonLd/jsonLd.tsx rename to app/views/EntityDetailView/components/JsonLd/jsonLd.tsx index 1150b742f..adae845ac 100644 --- a/app/components/Detail/components/JsonLd/jsonLd.tsx +++ b/app/views/EntityDetailView/components/JsonLd/jsonLd.tsx @@ -1,9 +1,7 @@ import Head from "next/head"; import { JSX } from "react"; -import { - escapeJsonForHtml, - SchemaDataset, -} from "../../../../utils/schemaOrg/common"; +import type { SchemaDataset } from "../../../../utils/schemaOrg/types"; +import { escapeJsonForHtml } from "../../../../utils/schemaOrg/utils"; interface JsonLdProps { jsonLd: SchemaDataset; diff --git a/pages/[entityListType]/[...params].tsx b/pages/[entityListType]/[...params].tsx index 3046a171d..dfc80f34b 100644 --- a/pages/[entityListType]/[...params].tsx +++ b/pages/[entityListType]/[...params].tsx @@ -30,9 +30,9 @@ import { useRouter } from "next/router"; import { ParsedUrlQuery } from "querystring"; import { JSX } from "react"; import { EntityGuard } from "../../app/components/Detail/components/EntityGuard/entityGuard"; -import { JsonLd } from "../../app/components/Detail/components/JsonLd/jsonLd"; import { buildHcaProjectJsonLd } from "../../app/utils/schemaOrg/hcaProjectDataset"; import { readFile } from "../../app/utils/tsvParser"; +import { JsonLd } from "../../app/views/EntityDetailView/components/JsonLd/jsonLd"; import { ROUTES } from "../../site-config/anvil-cmg/dev/export/routes"; import { DatasetsResponse } from "../../app/apis/azul/anvil-cmg/common/responses"; @@ -40,7 +40,7 @@ import { getConsentGroup, isNRESOrUnrestrictedAccess, } from "../../app/apis/azul/anvil-cmg/common/transformers"; -import { ProjectsResponse } from "../../app/apis/azul/hca-dcp/common/responses"; +import type { ProjectsResponse } from "../../app/apis/azul/hca-dcp/common/responses"; import { isProductionEnvironment } from "../../app/config/utils"; const setOfProcessedIds = new Set(); @@ -94,24 +94,6 @@ const EntityDetailPage = (props: EntityDetailPageProps): JSX.Element => { ); }; -/** - * Renders the HCA project JSON-LD when the page is a project detail route with - * data and a browser URL available. Returns null otherwise. - * @param props - Entity detail page props. - * @returns JsonLd element, or null when the page can't be described. - */ -function renderHcaProjectJsonLd( - props: EntityDetailPageProps -): JSX.Element | null { - if (props.entityListType !== "projects") return null; - if (!props.browserURL || !props.data) return null; - const jsonLd = buildHcaProjectJsonLd( - props.data as ProjectsResponse, - props.browserURL - ); - return jsonLd ? : null; -} - /** * Returns the override for the given entity ID. * @param overrides - Overrides. @@ -547,3 +529,21 @@ async function processEntityProps( props.data = entityResponse; } } + +/** + * Renders the HCA project JSON-LD when the page is a project detail route with + * data and a browser URL available. Returns null otherwise. + * @param props - Entity detail page props. + * @returns JsonLd element, or null when the page can't be described. + */ +function renderHcaProjectJsonLd( + props: EntityDetailPageProps +): JSX.Element | null { + if (props.entityListType !== "projects") return null; + if (!props.browserURL || !props.data) return null; + const jsonLd = buildHcaProjectJsonLd( + props.data as ProjectsResponse, + props.browserURL + ); + return jsonLd ? : null; +} From d5dba6f452692c71c9321e25ae200213883d15c5 Mon Sep 17 00:00:00 2001 From: Fran McDade <18710366+frano-m@users.noreply.github.com> Date: Wed, 13 May 2026 17:56:08 +1000 Subject: [PATCH 3/5] fix: address copilot feedback on jsonld builder (#4806) --- .../utils/schemaOrg/hcaProjectDataset.test.ts | 56 ++++++++++++++++--- app/utils/schemaOrg/constants.ts | 14 +++++ app/utils/schemaOrg/hcaProjectDataset.ts | 50 +++++++++++++++-- app/utils/schemaOrg/types.ts | 5 -- app/utils/schemaOrg/utils.ts | 6 +- pages/[entityListType]/[...params].tsx | 2 +- 6 files changed, 111 insertions(+), 22 deletions(-) create mode 100644 app/utils/schemaOrg/constants.ts diff --git a/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts b/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts index 2a12df566..64d5dde9a 100644 --- a/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts +++ b/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts @@ -1,6 +1,6 @@ import type { ProjectsResponse } from "../../../app/apis/azul/hca-dcp/common/responses"; +import { DESCRIPTION_LENGTH } from "../../../app/utils/schemaOrg/constants"; import { buildHcaProjectJsonLd } from "../../../app/utils/schemaOrg/hcaProjectDataset"; -import { MAX_DESCRIPTION_LENGTH } from "../../../app/utils/schemaOrg/types"; const BROWSER_URL = "https://explore.data.humancellatlas.org"; @@ -30,7 +30,8 @@ function makeProjectsResponse( estimatedCellCount: null, laboratory: [], matrices: {}, - projectDescription: "A study of cells.", + projectDescription: + "A study of cells across multiple human individuals examining inter-individual variation in gene expression.", projectId: "uuid-1", projectShortname: "Cell Study", projectTitle: "Cells of the body", @@ -59,7 +60,9 @@ describe("buildHcaProjectJsonLd", () => { expect(result!["@context"]).toBe("https://schema.org"); expect(result!["@type"]).toBe("Dataset"); expect(result!.name).toBe("Cells of the body"); - expect(result!.description).toBe("A study of cells."); + expect(result!.description).toBe( + "A study of cells across multiple human individuals examining inter-individual variation in gene expression." + ); expect(result!.url).toBe(`${BROWSER_URL}/projects/uuid-1`); expect(result!.identifier).toEqual(["uuid-1"]); expect(result!.isAccessibleForFree).toBe(true); @@ -80,17 +83,43 @@ describe("buildHcaProjectJsonLd", () => { it("strips HTML tags from description", () => { const response = makeProjectsResponse(); response.projects[0].projectDescription = - "

Single-cell RNA-seq data.

"; + "

Single-cell RNA-seq data across many cells and donors and tissues.

"; const result = buildHcaProjectJsonLd(response, BROWSER_URL); - expect(result!.description).toBe("Single-cell RNA-seq data."); + expect(result!.description).toBe( + "Single-cell RNA-seq data across many cells and donors and tissues." + ); + }); + + it("pads short descriptions with name and catalog context to meet the 50-char minimum", () => { + const response = makeProjectsResponse(); + response.projects[0].projectDescription = "Short."; + const result = buildHcaProjectJsonLd(response, BROWSER_URL); + expect(result!.description).toBe( + "Cells of the body — Short. — Human Cell Atlas Data Coordination Platform project." + ); + expect(result!.description.length).toBeGreaterThanOrEqual( + DESCRIPTION_LENGTH.MIN + ); + }); + + it("falls back to project name plus catalog context when description is empty", () => { + const response = makeProjectsResponse(); + response.projects[0].projectDescription = ""; + const result = buildHcaProjectJsonLd(response, BROWSER_URL); + expect(result!.description).toBe( + "Cells of the body — Human Cell Atlas Data Coordination Platform project." + ); + expect(result!.description.length).toBeGreaterThanOrEqual( + DESCRIPTION_LENGTH.MIN + ); }); it("truncates descriptions over 5000 characters and appends an ellipsis", () => { - const longDescription = "a".repeat(MAX_DESCRIPTION_LENGTH + 200); + const longDescription = "a".repeat(DESCRIPTION_LENGTH.MAX + 200); const response = makeProjectsResponse(); response.projects[0].projectDescription = longDescription; const result = buildHcaProjectJsonLd(response, BROWSER_URL); - expect(result!.description).toHaveLength(MAX_DESCRIPTION_LENGTH); + expect(result!.description).toHaveLength(DESCRIPTION_LENGTH.MAX); expect(result!.description.endsWith("…")).toBe(true); }); @@ -114,6 +143,19 @@ describe("buildHcaProjectJsonLd", () => { ]); }); + it("splits semicolon-separated accession ids in both identifier and sameAs", () => { + const response = makeProjectsResponse(); + response.projects[0].accessions = [ + { accession: "GSE12345; GSE67890", namespace: "geo_series" }, + ]; + const result = buildHcaProjectJsonLd(response, BROWSER_URL); + expect(result!.identifier).toEqual(["uuid-1", "GSE12345", "GSE67890"]); + expect(result!.sameAs).toEqual([ + "https://identifiers.org/geo:GSE12345", + "https://identifiers.org/geo:GSE67890", + ]); + }); + it("omits sameAs when no accessions map to a known namespace", () => { const result = buildHcaProjectJsonLd(makeProjectsResponse(), BROWSER_URL); expect(result!.sameAs).toBeUndefined(); diff --git a/app/utils/schemaOrg/constants.ts b/app/utils/schemaOrg/constants.ts new file mode 100644 index 000000000..d3bfb45a7 --- /dev/null +++ b/app/utils/schemaOrg/constants.ts @@ -0,0 +1,14 @@ +/** + * Schema.org Dataset constants shared by consumer-specific JSON-LD builders. + */ + +/** + * Google Dataset Search description-length bounds. Descriptions outside this + * range may be rejected or downranked by Google's structured-data validator. + * + * See https://developers.google.com/search/docs/appearance/structured-data/dataset + */ +export const DESCRIPTION_LENGTH = { + MAX: 5000, + MIN: 50, +} as const; diff --git a/app/utils/schemaOrg/hcaProjectDataset.ts b/app/utils/schemaOrg/hcaProjectDataset.ts index 9575b272c..e4c7ea7a0 100644 --- a/app/utils/schemaOrg/hcaProjectDataset.ts +++ b/app/utils/schemaOrg/hcaProjectDataset.ts @@ -6,6 +6,7 @@ import type { import type { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses"; import { transformAccessionURL } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper"; import { ACCESSION_CONFIGS_BY_RESPONSE_KEY } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/constants"; +import { DESCRIPTION_LENGTH } from "./constants"; import type { SchemaDataset, SchemaOrganization, @@ -43,6 +44,27 @@ function buildCitations( return citations; } +/** + * Builds the Schema.org description for a project, padding short or empty + * source descriptions with the project name and catalog context so the result + * satisfies Google's minimum description-length requirement (50 chars). + * @param sourceDescription - Raw projectDescription from the Azul response. + * @param name - Project name used as a padding fallback. + * @returns HTML-stripped description, padded if short, truncated if long. + */ +function buildDescription(sourceDescription: string, name: string): string { + const stripped = stripHtmlTags(sourceDescription || ""); + if (stripped.length >= DESCRIPTION_LENGTH.MIN) { + return truncateDescription(stripped); + } + // Padding includes the catalog name (~43 chars) to reliably push the + // result past the 50-char minimum even when name + stripped are short. + const padded = stripped + ? `${name} — ${stripped} — ${CATALOG_NAME} project.` + : `${name} — ${CATALOG_NAME} project.`; + return truncateDescription(padded); +} + /** * Builds the creator array from project contributors. Skips entries without a * name. When the contributor has an institution, attaches it as an affiliation. @@ -85,13 +107,13 @@ export function buildHcaProjectJsonLd( const project = data.projects?.[0]; if (!project) return undefined; - const description = truncateDescription( - stripHtmlTags(project.projectDescription || "") - ); const name = project.projectTitle || project.projectShortname; + const description = buildDescription(project.projectDescription, name); const identifier = uniqueNonEmpty([ project.projectId, - ...project.accessions.map((accession) => accession.accession), + ...project.accessions.flatMap((accession) => + splitAccessionIds(accession.accession) + ), ]); const jsonLd: SchemaDataset = { @@ -166,8 +188,10 @@ function buildSameAs(accessions: AccessionResponse[]): string[] { const prefix = ACCESSION_CONFIGS_BY_RESPONSE_KEY.get(namespace)?.identifierOrgPrefix; if (!prefix) continue; - const url = transformAccessionURL(accession, prefix); - if (url) urls.push(url); + for (const id of splitAccessionIds(accession)) { + const url = transformAccessionURL(id, prefix); + if (url) urls.push(url); + } } return uniqueNonEmpty(urls); } @@ -184,3 +208,17 @@ function normaliseContactName(contactName: string): string { const [last, ...rest] = parts; return [...rest, last].filter(Boolean).join(" "); } + +/** + * Splits an Azul accession string into individual accession IDs. Azul returns + * accessions as a semicolon-separated string when a project carries multiple + * IDs under the same namespace (mirrors the split done by `mapAccessions`). + * @param accession - Raw accession value from the Azul response. + * @returns Trimmed, non-empty accession IDs. + */ +function splitAccessionIds(accession: string): string[] { + return accession + .split(";") + .map((id) => id.trim()) + .filter(Boolean); +} diff --git a/app/utils/schemaOrg/types.ts b/app/utils/schemaOrg/types.ts index c19473653..19acb58e7 100644 --- a/app/utils/schemaOrg/types.ts +++ b/app/utils/schemaOrg/types.ts @@ -7,11 +7,6 @@ * for Google's Dataset structured data guidelines. */ -/** - * Google Dataset Search caps description at 5000 characters. - */ -export const MAX_DESCRIPTION_LENGTH = 5000; - /** * Schema.org DataCatalog type. */ diff --git a/app/utils/schemaOrg/utils.ts b/app/utils/schemaOrg/utils.ts index 2e98a1cfd..c7d0849dc 100644 --- a/app/utils/schemaOrg/utils.ts +++ b/app/utils/schemaOrg/utils.ts @@ -1,4 +1,4 @@ -import { MAX_DESCRIPTION_LENGTH } from "./types"; +import { DESCRIPTION_LENGTH } from "./constants"; /** * Escapes a JSON string for safe embedding inside an HTML `` or HTML entity injection. diff --git a/pages/[entityListType]/[...params].tsx b/pages/[entityListType]/[...params].tsx index d95bbe166..a8bc9df1a 100644 --- a/pages/[entityListType]/[...params].tsx +++ b/pages/[entityListType]/[...params].tsx @@ -30,17 +30,18 @@ import { useRouter } from "next/router"; import { ParsedUrlQuery } from "querystring"; import { JSX } from "react"; import { EntityGuard } from "../../app/components/Detail/components/EntityGuard/entityGuard"; +import { buildAnvilDatasetJsonLd } from "../../app/utils/schemaOrg/anvilDataset"; import { buildHcaProjectJsonLd } from "../../app/utils/schemaOrg/hcaProjectDataset"; +import type { SchemaDataset } from "../../app/utils/schemaOrg/types"; import { readFile } from "../../app/utils/tsvParser"; import { JsonLd } from "../../app/views/EntityDetailView/components/JsonLd/jsonLd"; import { ROUTES } from "../../site-config/anvil-cmg/dev/export/routes"; -import { DatasetsResponse } from "../../app/apis/azul/anvil-cmg/common/responses"; +import type { DatasetsResponse } from "../../app/apis/azul/anvil-cmg/common/responses"; import { getConsentGroup, isNRESOrUnrestrictedAccess, } from "../../app/apis/azul/anvil-cmg/common/transformers"; -import type { ProjectsResponse } from "../../app/apis/azul/hca-dcp/common/responses"; import { isProductionEnvironment } from "../../app/config/utils"; const setOfProcessedIds = new Set(); @@ -88,7 +89,8 @@ const EntityDetailPage = (props: EntityDetailPageProps): JSX.Element => { if (isExportMethodView(query)) return ; return ( <> - {isHcaDcp && renderHcaProjectJsonLd(props)} + {isAnVIL && renderJsonLd(props, "datasets", buildAnvilDatasetJsonLd)} + {isHcaDcp && renderJsonLd(props, "projects", buildHcaProjectJsonLd)} ); @@ -531,19 +533,21 @@ async function processEntityProps( } /** - * Renders the HCA project JSON-LD when the page is a project detail route with - * data and a browser URL available. Returns null otherwise. + * Renders a consumer-specific Schema.org Dataset JSON-LD script when the page + * matches the given entity list type and carries the data needed by the + * builder. Returns null otherwise. * @param props - Entity detail page props. + * @param entityListType - The entity list type this builder applies to. + * @param build - Consumer-specific builder that maps detail data to a Dataset. * @returns JsonLd element, or null when the page can't be described. */ -function renderHcaProjectJsonLd( - props: EntityDetailPageProps +function renderJsonLd( + props: EntityDetailPageProps, + entityListType: string, + build: (data: T, browserURL: string) => SchemaDataset | undefined ): JSX.Element | null { - if (props.entityListType !== "projects") return null; + if (props.entityListType !== entityListType) return null; if (!props.browserURL || !props.data) return null; - const jsonLd = buildHcaProjectJsonLd( - props.data as ProjectsResponse, - props.browserURL - ); + const jsonLd = build(props.data as T, props.browserURL); return jsonLd ? : null; } From 73625f0fd97798c705353e1abfe7fe5d96dff947 Mon Sep 17 00:00:00 2001 From: Fran McDade <18710366+frano-m@users.noreply.github.com> Date: Wed, 13 May 2026 22:27:46 +1000 Subject: [PATCH 5/5] feat: [lungmap] add lungmap projects to google datasets catalog (#4808) --- .../schemaOrg/lungmapProjectDataset.test.ts | 74 ++++++ app/utils/schemaOrg/hcaProjectDataset.ts | 199 +--------------- app/utils/schemaOrg/lungmapProjectDataset.ts | 27 +++ app/utils/schemaOrg/projectDataset.ts | 224 ++++++++++++++++++ pages/[entityListType]/[...params].tsx | 3 + 5 files changed, 336 insertions(+), 191 deletions(-) create mode 100644 __tests__/utils/schemaOrg/lungmapProjectDataset.test.ts create mode 100644 app/utils/schemaOrg/lungmapProjectDataset.ts create mode 100644 app/utils/schemaOrg/projectDataset.ts diff --git a/__tests__/utils/schemaOrg/lungmapProjectDataset.test.ts b/__tests__/utils/schemaOrg/lungmapProjectDataset.test.ts new file mode 100644 index 000000000..67688c89b --- /dev/null +++ b/__tests__/utils/schemaOrg/lungmapProjectDataset.test.ts @@ -0,0 +1,74 @@ +import type { ProjectsResponse } from "../../../app/apis/azul/hca-dcp/common/responses"; +import { buildLungmapProjectJsonLd } from "../../../app/utils/schemaOrg/lungmapProjectDataset"; + +const BROWSER_URL = "https://data-browser.lungmap.net"; + +/** + * Builds a minimal valid project response for the LungMAP wrapper. The full + * mapping is covered by `hcaProjectDataset.test.ts` (same shared core); this + * file only verifies the LungMAP-specific catalog identity surfaces correctly. + * @returns A `ProjectsResponse` shape sufficient for catalog-identity checks. + */ +function makeProjectsResponse(): ProjectsResponse { + return { + dates: [], + donorOrganisms: [], + entryId: "abc", + fileTypeSummaries: [], + projects: [ + { + accessible: true, + accessions: [], + bionetworkName: [], + contributedAnalyses: {}, + contributors: [], + dataUseRestriction: null, + duosId: null, + estimatedCellCount: null, + laboratory: [], + matrices: {}, + projectDescription: + "A study of lung development and disease across many donors.", + projectId: "uuid-1", + projectShortname: "Lung Study", + projectTitle: "Lung development atlas", + }, + ], + protocols: [], + samples: [], + specimens: [], + status: 200, + } as unknown as ProjectsResponse; +} + +describe("buildLungmapProjectJsonLd", () => { + it("returns undefined when no project is present", () => { + const response = { ...makeProjectsResponse(), projects: [] }; + expect( + buildLungmapProjectJsonLd(response as ProjectsResponse, BROWSER_URL) + ).toBeUndefined(); + }); + + it("surfaces LungMAP as the catalog identity and uses the projects URL pattern", () => { + const result = buildLungmapProjectJsonLd( + makeProjectsResponse(), + BROWSER_URL + ); + expect(result).toBeDefined(); + expect(result!.includedInDataCatalog).toEqual({ + "@type": "DataCatalog", + name: "LungMAP Data Explorer", + url: BROWSER_URL, + }); + expect(result!.url).toBe(`${BROWSER_URL}/projects/uuid-1`); + }); + + it("pads short descriptions with the LungMAP catalog suffix", () => { + const response = makeProjectsResponse(); + response.projects[0].projectDescription = "Short."; + const result = buildLungmapProjectJsonLd(response, BROWSER_URL); + expect(result!.description).toBe( + "Lung development atlas — Short. — LungMAP Data Explorer project." + ); + }); +}); diff --git a/app/utils/schemaOrg/hcaProjectDataset.ts b/app/utils/schemaOrg/hcaProjectDataset.ts index d70fcd2f5..aa2bcd084 100644 --- a/app/utils/schemaOrg/hcaProjectDataset.ts +++ b/app/utils/schemaOrg/hcaProjectDataset.ts @@ -1,80 +1,17 @@ -import type { - AccessionResponse, - ContributorResponse, - PublicationResponse, -} from "../../apis/azul/hca-dcp/common/entities"; import type { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses"; -import { transformAccessionURL } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper"; -import { ACCESSION_CONFIGS_BY_RESPONSE_KEY } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/constants"; -import type { - SchemaDataset, - SchemaOrganization, - SchemaPerson, - SchemaScholarlyArticle, -} from "./types"; -import { buildDescription, uniqueNonEmpty } from "./utils"; +import type { ProjectCatalogOptions } from "./projectDataset"; +import { buildProjectJsonLd } from "./projectDataset"; +import type { SchemaDataset } from "./types"; const CATALOG_NAME = "Human Cell Atlas Data Coordination Platform"; -const DESCRIPTION_FALLBACK_SUFFIX = `${CATALOG_NAME} project.`; -/** - * Builds the citation array from project publications. Skips entries without a - * title. Prefers DOI for `sameAs`, falling back to the publication URL. - * @param publications - HCA project publications. - * @returns Array of schema.org ScholarlyArticle objects. - */ -function buildCitations( - publications: PublicationResponse[] -): SchemaScholarlyArticle[] { - const citations: SchemaScholarlyArticle[] = []; - for (const publication of publications ?? []) { - if (!publication.publicationTitle) continue; - const article: SchemaScholarlyArticle = { - "@type": "ScholarlyArticle", - headline: publication.publicationTitle, - name: publication.publicationTitle, - }; - if (publication.doi) { - article.sameAs = `https://doi.org/${publication.doi}`; - } else if (publication.publicationUrl) { - article.sameAs = publication.publicationUrl; - } - citations.push(article); - } - return citations; -} - -/** - * Builds the creator array from project contributors. Skips entries without a - * name. When the contributor has an institution, attaches it as an affiliation. - * @param contributors - HCA project contributors. - * @returns Array of schema.org Person objects. - */ -function buildCreators(contributors: ContributorResponse[]): SchemaPerson[] { - const creators: SchemaPerson[] = []; - for (const contributor of contributors ?? []) { - if (!contributor.contactName) continue; - const person: SchemaPerson = { - "@type": "Person", - name: normaliseContactName(contributor.contactName), - }; - if (contributor.institution) { - const affiliation: SchemaOrganization = { - "@type": "Organization", - name: contributor.institution, - }; - person.affiliation = affiliation; - } - creators.push(person); - } - return creators; -} +const OPTIONS: ProjectCatalogOptions = { + catalogName: CATALOG_NAME, + descriptionFallbackSuffix: `${CATALOG_NAME} project.`, +}; /** * Builds a Schema.org Dataset JSON-LD object for an HCA DCP project. - * - * Returns `undefined` when the response does not carry a project we can - * describe (i.e. no project entity), so the caller can skip rendering. * @param data - HCA DCP project detail response from Azul. * @param browserURL - Site base URL used for canonical and catalog URLs. * @returns Schema.org Dataset JSON-LD object, or `undefined` if not buildable. @@ -83,125 +20,5 @@ export function buildHcaProjectJsonLd( data: ProjectsResponse, browserURL: string ): SchemaDataset | undefined { - const project = data.projects?.[0]; - if (!project) return undefined; - - const name = project.projectTitle || project.projectShortname; - const description = buildDescription( - project.projectDescription, - name, - DESCRIPTION_FALLBACK_SUFFIX - ); - const identifier = uniqueNonEmpty([ - project.projectId, - ...project.accessions.flatMap((accession) => - splitAccessionIds(accession.accession) - ), - ]); - - const jsonLd: SchemaDataset = { - "@context": "https://schema.org", - "@type": "Dataset", - description, - identifier, - includedInDataCatalog: { - "@type": "DataCatalog", - name: CATALOG_NAME, - url: browserURL, - }, - isAccessibleForFree: true, - name, - url: `${browserURL}/projects/${project.projectId}`, - }; - - const sameAs = buildSameAs(project.accessions); - if (sameAs.length > 0) jsonLd.sameAs = sameAs; - - const keywords = buildKeywords(data); - if (keywords.length > 0) jsonLd.keywords = keywords; - - const creator = buildCreators(project.contributors); - if (creator.length > 0) jsonLd.creator = creator; - - const citation = buildCitations(project.publications); - if (citation.length > 0) jsonLd.citation = citation; - - return jsonLd; -} - -/** - * Builds a keywords array by unioning biologically-meaningful fields from the - * project's aggregated donor/sample/specimen/protocol responses. - * @param data - HCA project detail response. - * @returns Deduplicated keywords array. - */ -function buildKeywords(data: ProjectsResponse): string[] { - const values: (string | null | undefined)[] = []; - for (const donor of data.donorOrganisms ?? []) { - values.push(...(donor.genusSpecies ?? [])); - values.push(...(donor.disease ?? [])); - } - for (const sample of data.samples ?? []) { - values.push(...(sample.organ ?? [])); - values.push(...(sample.organPart ?? [])); - values.push(...(sample.disease ?? [])); - values.push(...(sample.sampleEntityType ?? [])); - } - for (const specimen of data.specimens ?? []) { - values.push(...(specimen.organ ?? [])); - values.push(...(specimen.organPart ?? [])); - values.push(...(specimen.disease ?? [])); - } - for (const protocol of data.protocols ?? []) { - values.push(...(protocol.libraryConstructionApproach ?? [])); - values.push(...(protocol.instrumentManufacturerModel ?? [])); - } - return uniqueNonEmpty(values); -} - -/** - * Builds the sameAs array of external accession URLs via identifiers.org. - * Only includes accessions whose namespace maps to a known identifier prefix. - * @param accessions - Project accessions from the Azul response. - * @returns Array of canonical accession URLs. - */ -function buildSameAs(accessions: AccessionResponse[]): string[] { - const urls: string[] = []; - for (const { accession, namespace } of accessions) { - const prefix = - ACCESSION_CONFIGS_BY_RESPONSE_KEY.get(namespace)?.identifierOrgPrefix; - if (!prefix) continue; - for (const id of splitAccessionIds(accession)) { - const url = transformAccessionURL(id, prefix); - if (url) urls.push(url); - } - } - return uniqueNonEmpty(urls); -} - -/** - * Normalises an HCA contributor's contactName from "Last,First,Middle" to - * "First Middle Last" for use as a Schema.org Person.name value. - * @param contactName - Raw contactName from the Azul response. - * @returns Human-readable contributor name. - */ -function normaliseContactName(contactName: string): string { - const parts = contactName.split(",").map((part) => part.trim()); - if (parts.length < 2) return contactName; - const [last, ...rest] = parts; - return [...rest, last].filter(Boolean).join(" "); -} - -/** - * Splits an Azul accession string into individual accession IDs. Azul returns - * accessions as a semicolon-separated string when a project carries multiple - * IDs under the same namespace (mirrors the split done by `mapAccessions`). - * @param accession - Raw accession value from the Azul response. - * @returns Trimmed, non-empty accession IDs. - */ -function splitAccessionIds(accession: string): string[] { - return accession - .split(";") - .map((id) => id.trim()) - .filter(Boolean); + return buildProjectJsonLd(data, browserURL, OPTIONS); } diff --git a/app/utils/schemaOrg/lungmapProjectDataset.ts b/app/utils/schemaOrg/lungmapProjectDataset.ts new file mode 100644 index 000000000..d07cba9d3 --- /dev/null +++ b/app/utils/schemaOrg/lungmapProjectDataset.ts @@ -0,0 +1,27 @@ +import type { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses"; +import type { ProjectCatalogOptions } from "./projectDataset"; +import { buildProjectJsonLd } from "./projectDataset"; +import type { SchemaDataset } from "./types"; + +const CATALOG_NAME = "LungMAP Data Explorer"; + +const OPTIONS: ProjectCatalogOptions = { + catalogName: CATALOG_NAME, + descriptionFallbackSuffix: `${CATALOG_NAME} project.`, +}; + +/** + * Builds a Schema.org Dataset JSON-LD object for a LungMAP project. LungMAP + * shares the HCA Azul backend, so the response shape matches HCA's + * `ProjectsResponse` and the shared `buildProjectJsonLd` core does the + * mapping; this wrapper just supplies LungMAP-specific catalog identity. + * @param data - LungMAP project detail response from Azul. + * @param browserURL - Site base URL used for canonical and catalog URLs. + * @returns Schema.org Dataset JSON-LD object, or `undefined` if not buildable. + */ +export function buildLungmapProjectJsonLd( + data: ProjectsResponse, + browserURL: string +): SchemaDataset | undefined { + return buildProjectJsonLd(data, browserURL, OPTIONS); +} diff --git a/app/utils/schemaOrg/projectDataset.ts b/app/utils/schemaOrg/projectDataset.ts new file mode 100644 index 000000000..8703a298e --- /dev/null +++ b/app/utils/schemaOrg/projectDataset.ts @@ -0,0 +1,224 @@ +/** + * Shared Schema.org Dataset builder for consumers that surface HCA-style + * `ProjectResponse` data (HCA DCP, LungMAP). Per-consumer files (e.g. + * `hcaProjectDataset.ts`, `lungmapProjectDataset.ts`) supply a + * `ProjectCatalogOptions` describing catalog identity and call + * `buildProjectJsonLd` to produce the JSON-LD payload. + */ + +import type { + AccessionResponse, + ContributorResponse, + PublicationResponse, +} from "../../apis/azul/hca-dcp/common/entities"; +import type { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses"; +import { transformAccessionURL } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper"; +import { ACCESSION_CONFIGS_BY_RESPONSE_KEY } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/constants"; +import type { + SchemaDataset, + SchemaOrganization, + SchemaPerson, + SchemaScholarlyArticle, +} from "./types"; +import { buildDescription, uniqueNonEmpty } from "./utils"; + +/** + * Per-consumer catalog identity used to populate `includedInDataCatalog` and + * the description-padding fallback. Callers (e.g. HCA, LungMAP) supply this + * via thin wrappers so the shared builder stays consumer-agnostic. + */ +export interface ProjectCatalogOptions { + catalogName: string; + descriptionFallbackSuffix: string; +} + +/** + * Builds the citation array from project publications. Skips entries without a + * title. Prefers DOI for `sameAs`, falling back to the publication URL. + * @param publications - Project publications. + * @returns Array of schema.org ScholarlyArticle objects. + */ +function buildCitations( + publications: PublicationResponse[] +): SchemaScholarlyArticle[] { + const citations: SchemaScholarlyArticle[] = []; + for (const publication of publications ?? []) { + if (!publication.publicationTitle) continue; + const article: SchemaScholarlyArticle = { + "@type": "ScholarlyArticle", + headline: publication.publicationTitle, + name: publication.publicationTitle, + }; + if (publication.doi) { + article.sameAs = `https://doi.org/${publication.doi}`; + } else if (publication.publicationUrl) { + article.sameAs = publication.publicationUrl; + } + citations.push(article); + } + return citations; +} + +/** + * Builds the creator array from project contributors. Skips entries without a + * name. When the contributor has an institution, attaches it as an affiliation. + * @param contributors - Project contributors. + * @returns Array of schema.org Person objects. + */ +function buildCreators(contributors: ContributorResponse[]): SchemaPerson[] { + const creators: SchemaPerson[] = []; + for (const contributor of contributors ?? []) { + if (!contributor.contactName) continue; + const person: SchemaPerson = { + "@type": "Person", + name: normaliseContactName(contributor.contactName), + }; + if (contributor.institution) { + const affiliation: SchemaOrganization = { + "@type": "Organization", + name: contributor.institution, + }; + person.affiliation = affiliation; + } + creators.push(person); + } + return creators; +} + +/** + * Builds a keywords array by unioning biologically-meaningful fields from the + * project's aggregated donor/sample/specimen/protocol responses. + * @param data - Project detail response. + * @returns Deduplicated keywords array. + */ +function buildKeywords(data: ProjectsResponse): string[] { + const values: (string | null | undefined)[] = []; + for (const donor of data.donorOrganisms ?? []) { + values.push(...(donor.genusSpecies ?? [])); + values.push(...(donor.disease ?? [])); + } + for (const sample of data.samples ?? []) { + values.push(...(sample.organ ?? [])); + values.push(...(sample.organPart ?? [])); + values.push(...(sample.disease ?? [])); + values.push(...(sample.sampleEntityType ?? [])); + } + for (const specimen of data.specimens ?? []) { + values.push(...(specimen.organ ?? [])); + values.push(...(specimen.organPart ?? [])); + values.push(...(specimen.disease ?? [])); + } + for (const protocol of data.protocols ?? []) { + values.push(...(protocol.libraryConstructionApproach ?? [])); + values.push(...(protocol.instrumentManufacturerModel ?? [])); + } + return uniqueNonEmpty(values); +} + +/** + * Builds a Schema.org Dataset JSON-LD object from a project detail response. + * + * Returns `undefined` when the response does not carry a project we can + * describe, so the caller can skip rendering. + * @param data - Project detail response from Azul. + * @param browserURL - Site base URL used for canonical and catalog URLs. + * @param options - Consumer-specific catalog identity. + * @returns Schema.org Dataset JSON-LD object, or `undefined` if not buildable. + */ +export function buildProjectJsonLd( + data: ProjectsResponse, + browserURL: string, + options: ProjectCatalogOptions +): SchemaDataset | undefined { + const project = data.projects?.[0]; + if (!project) return undefined; + + const name = project.projectTitle || project.projectShortname; + const description = buildDescription( + project.projectDescription, + name, + options.descriptionFallbackSuffix + ); + const identifier = uniqueNonEmpty([ + project.projectId, + ...project.accessions.flatMap((accession) => + splitAccessionIds(accession.accession) + ), + ]); + + const jsonLd: SchemaDataset = { + "@context": "https://schema.org", + "@type": "Dataset", + description, + identifier, + includedInDataCatalog: { + "@type": "DataCatalog", + name: options.catalogName, + url: browserURL, + }, + isAccessibleForFree: true, + name, + url: `${browserURL}/projects/${project.projectId}`, + }; + + const sameAs = buildSameAs(project.accessions); + if (sameAs.length > 0) jsonLd.sameAs = sameAs; + + const keywords = buildKeywords(data); + if (keywords.length > 0) jsonLd.keywords = keywords; + + const creator = buildCreators(project.contributors); + if (creator.length > 0) jsonLd.creator = creator; + + const citation = buildCitations(project.publications); + if (citation.length > 0) jsonLd.citation = citation; + + return jsonLd; +} + +/** + * Builds the sameAs array of external accession URLs via identifiers.org. + * Only includes accessions whose namespace maps to a known identifier prefix. + * @param accessions - Project accessions from the Azul response. + * @returns Array of canonical accession URLs. + */ +function buildSameAs(accessions: AccessionResponse[]): string[] { + const urls: string[] = []; + for (const { accession, namespace } of accessions) { + const prefix = + ACCESSION_CONFIGS_BY_RESPONSE_KEY.get(namespace)?.identifierOrgPrefix; + if (!prefix) continue; + for (const id of splitAccessionIds(accession)) { + const url = transformAccessionURL(id, prefix); + if (url) urls.push(url); + } + } + return uniqueNonEmpty(urls); +} + +/** + * Normalises an Azul contributor's contactName from "Last,First,Middle" to + * "First Middle Last" for use as a Schema.org Person.name value. + * @param contactName - Raw contactName from the Azul response. + * @returns Human-readable contributor name. + */ +function normaliseContactName(contactName: string): string { + const parts = contactName.split(",").map((part) => part.trim()); + if (parts.length < 2) return contactName; + const [last, ...rest] = parts; + return [...rest, last].filter(Boolean).join(" "); +} + +/** + * Splits an Azul accession string into individual accession IDs. Azul returns + * accessions as a semicolon-separated string when a project carries multiple + * IDs under the same namespace (mirrors the split done by `mapAccessions`). + * @param accession - Raw accession value from the Azul response. + * @returns Trimmed, non-empty accession IDs. + */ +function splitAccessionIds(accession: string): string[] { + return accession + .split(";") + .map((id) => id.trim()) + .filter(Boolean); +} diff --git a/pages/[entityListType]/[...params].tsx b/pages/[entityListType]/[...params].tsx index a8bc9df1a..b15c2c78b 100644 --- a/pages/[entityListType]/[...params].tsx +++ b/pages/[entityListType]/[...params].tsx @@ -32,6 +32,7 @@ import { JSX } from "react"; import { EntityGuard } from "../../app/components/Detail/components/EntityGuard/entityGuard"; import { buildAnvilDatasetJsonLd } from "../../app/utils/schemaOrg/anvilDataset"; import { buildHcaProjectJsonLd } from "../../app/utils/schemaOrg/hcaProjectDataset"; +import { buildLungmapProjectJsonLd } from "../../app/utils/schemaOrg/lungmapProjectDataset"; import type { SchemaDataset } from "../../app/utils/schemaOrg/types"; import { readFile } from "../../app/utils/tsvParser"; import { JsonLd } from "../../app/views/EntityDetailView/components/JsonLd/jsonLd"; @@ -73,6 +74,7 @@ const EntityDetailPage = (props: EntityDetailPageProps): JSX.Element => { const { config: siteConfig } = useConfig(); const isAnVIL = siteConfig.appTitle?.includes("AnVIL"); const isHcaDcp = siteConfig.appTitle?.includes("HCA"); + const isLungMap = siteConfig.appTitle?.includes("LungMAP"); const { query } = useRouter(); if (!props.entityListType) return <>; if (props.override) return ; @@ -91,6 +93,7 @@ const EntityDetailPage = (props: EntityDetailPageProps): JSX.Element => { <> {isAnVIL && renderJsonLd(props, "datasets", buildAnvilDatasetJsonLd)} {isHcaDcp && renderJsonLd(props, "projects", buildHcaProjectJsonLd)} + {isLungMap && renderJsonLd(props, "projects", buildLungmapProjectJsonLd)} );