From bed37f5fba5519ff79e4791fa9098cb500d1a3c5 Mon Sep 17 00:00:00 2001 From: Petros Stavropoulos Date: Fri, 9 May 2025 16:04:13 +0300 Subject: [PATCH 1/9] Add files via upload --- .../thematic_persistence.qmd | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 sections/2_academic_impact/thematic_persistence.qmd diff --git a/sections/2_academic_impact/thematic_persistence.qmd b/sections/2_academic_impact/thematic_persistence.qmd new file mode 100644 index 0000000..69d02da --- /dev/null +++ b/sections/2_academic_impact/thematic_persistence.qmd @@ -0,0 +1,92 @@ +--- +title: Thematic Persistence Score (TPS) +author: + - name: P. Stavropoulos + orcid: 0000-0003-1664-6554 + affiliations: + - ref: arc +affiliations: + - id: arc + name: Athena Research Center + city: Athena + country: Greece +--- + +::: {.callout collapse="true"} + +# History + +| Version | Revision date | Revision | Author | +|---------|---------------|-------------|---------------------| +| 1.0 | 2025-05-09 | First Draft | Petros Stavropoulos | + +::: + +# Description + +The *Thematic Persistence Score (TPS)* quantifies the continuity and growing relevance of research topics over time. Unlike single-moment impact metrics, TPS focuses on the sustained presence and development of a topic within the scientific literature. This allows the detection of emerging themes that gain traction and helps distinguish persistent research agendas from fleeting trends. + +Persistent topics can signal strategic areas for funding, guide curriculum development, or highlight shifts in disciplinary focus. They provide insight into the structural evolution of research fields and identify which topics have lasting influence in the academic community. + +TPS is especially relevant in the context of Open Science, where transparent tracking of trends contributes to reproducibility, research evaluation, and strategic foresight. + +## Thematic Persistence Score (TPS) + +TPS is a composite metric based on: + +- **Sequence Length**: Number of consecutive years a topic appears, with exponential weighting. +- **Growth Pattern**: Increase in the number of publications over time. +- **Volume**: Total number of papers on the topic. +- **Citation Impact**: Field-Weighted Citation Impact (FWCI) for publications in the topic. +- **Recency**: Preference for topics that remain active in recent years. + +Together, these factors help identify which research themes demonstrate durable engagement by the academic community. + +### Measurement + +#### Mathematical Formula + +For each topic, sequences of consecutive years are identified where the topic appears in publications. For each such sequence `s`, the score is computed as: + +$$ +\text{Score}_s = (\text{Length}_s)^{1.5} \times \text{Count}_s \times \text{Growth}_s \times \text{FWCI}_s \times \text{Recency}_s +$$ + +Where: +- $\text{Length}_s$: Length of the sequence (in years) +- $\text{Count}_s$: Number of publications in the sequence +- $\text{Growth}_s = \frac{\text{LastYearCount}}{\text{FirstYearCount}}$ (capped at 3) +- $\text{FWCI}_s$: Mean Field-Weighted Citation Impact for publications in the sequence +- $\text{Recency}_s = 1 + \frac{w (\text{LastYear}_s - \text{MaxYear} + 10)}{10}$, with $w$ as a recency weight (e.g. 0.2) + +The final TPS is the sum of the scores for all sequences of the topic: + +$$ +\text{TPS}_{\text{topic}} = \sum_s \text{Score}_s +$$ + +#### Datasources + +##### OpenAIRE Research Graph + +The [OpenAIRE Research Graph](https://graph.openaire.eu/) offers extensive metadata on publications, including: +- **Publication year**, which is crucial for identifying consecutive topic appearances. +- **Citation metadata**, enabling FWCI computation via connected sources. + +##### Semantic Scholar + +[Semantic Scholar](https://www.semanticscholar.org/) offers full-text access and machine-readable metadata, including: +- Year of publication +- Citation counts + +#### Existing Methodologies + +##### SciNoBo Toolkit + +The [SciNoBo Toolkit](https://scinobo.ilsp.gr/toolkit) provides essential functionalities for TPS: + +- **Field of Science (FoS) classification**: Uses a hierarchical taxonomy (6 levels) to assign topics to publications, capturing both broad disciplines and fine-grained emerging themes. This allows robust tracking of how topics evolve across scientific fields. + +- **Citation Analysis**: Aggregates citation metrics across publications, facilitating computation of Field-Weighted Citation Impact (FWCI) for each topic-year combination. + +These tools make it feasible to apply the TPS metric across large bibliographic datasets with a rich contextual understanding of scientific domains. From 9a36813182a064f33393df4f4b4aee414c368a52 Mon Sep 17 00:00:00 2001 From: Petros Stavropoulos Date: Mon, 25 Aug 2025 16:54:45 +0300 Subject: [PATCH 2/9] Revise Thematic Persistence and include other literature --- .../thematic_persistence.qmd | 105 +++++++++++++----- 1 file changed, 76 insertions(+), 29 deletions(-) diff --git a/sections/2_academic_impact/thematic_persistence.qmd b/sections/2_academic_impact/thematic_persistence.qmd index 69d02da..5d6d5c4 100644 --- a/sections/2_academic_impact/thematic_persistence.qmd +++ b/sections/2_academic_impact/thematic_persistence.qmd @@ -1,5 +1,5 @@ --- -title: Thematic Persistence Score (TPS) +title: Thematic Persistence author: - name: P. Stavropoulos orcid: 0000-0003-1664-6554 @@ -18,33 +18,64 @@ affiliations: | Version | Revision date | Revision | Author | |---------|---------------|-------------|---------------------| +| 1.1 | 2025-08-25 | Additions | Petros Stavropoulos | | 1.0 | 2025-05-09 | First Draft | Petros Stavropoulos | ::: # Description -The *Thematic Persistence Score (TPS)* quantifies the continuity and growing relevance of research topics over time. Unlike single-moment impact metrics, TPS focuses on the sustained presence and development of a topic within the scientific literature. This allows the detection of emerging themes that gain traction and helps distinguish persistent research agendas from fleeting trends. +*Thematic Persistence* captures the ability of a research topic to remain present and influential in the scientific record over extended periods of time. It reflects the **continuity, longevity, and stability** of themes, distinguishing enduring areas of inquiry from those that are short-lived or sporadic. -Persistent topics can signal strategic areas for funding, guide curriculum development, or highlight shifts in disciplinary focus. They provide insight into the structural evolution of research fields and identify which topics have lasting influence in the academic community. +Persistent topics often indicate fields with strong conceptual foundations, long-term societal or technological relevance, or strategic importance for research policy. Conversely, non-persistent topics may point to fleeting interests or speculative research directions. -TPS is especially relevant in the context of Open Science, where transparent tracking of trends contributes to reproducibility, research evaluation, and strategic foresight. +Measuring thematic persistence helps assess the structural evolution of science, identify durable agendas, and guide funding, policy, and curriculum decisions. ## Thematic Persistence Score (TPS) -TPS is a composite metric based on: +One way to operationalize persistence is through composite indicators such as the *Thematic Persistence Score (TPS)*. TPS combines multiple aspects of a topic’s evolution (continuity across years, growth, impact, and recency) into a single measure. -- **Sequence Length**: Number of consecutive years a topic appears, with exponential weighting. -- **Growth Pattern**: Increase in the number of publications over time. -- **Volume**: Total number of papers on the topic. -- **Citation Impact**: Field-Weighted Citation Impact (FWCI) for publications in the topic. -- **Recency**: Preference for topics that remain active in recent years. - -Together, these factors help identify which research themes demonstrate durable engagement by the academic community. +Other methodologies apply different approaches, such as linking clusters across time periods, defining continuity typologies, or evaluating the survival of citation-based topics. ### Measurement -#### Mathematical Formula +Thematic persistence can be measured through a combination of: + +- **Temporal continuity**: duration and uninterrupted presence of topics across consecutive years or periods. +- **Growth dynamics**: how the volume of publications on a topic changes over time. +- **Impact measures**: the influence of topic publications relative to their fields. +- **Structural stability**: whether a topic maintains coherence in its conceptual or citation network. +- **Recency**: whether a topic remains active in the most recent period. + +The precise operationalization depends on the chosen methodology, as outlined below. + +#### Datasources + +##### OpenAIRE Research Graph + +The [OpenAIRE Research Graph](https://graph.openaire.eu/) offers extensive metadata on publications, including: +- **Publication year**, which is crucial for identifying consecutive topic appearances. +- **Citation metadata**, enabling FWCI computation via connected sources. + +##### Semantic Scholar + +[Semantic Scholar](https://www.semanticscholar.org/) offers full-text access and machine-readable metadata, including: +- Year of publication +- Citation counts + +#### Existing Methodologies + +##### SciNoBo Toolkit + +The [SciNoBo Toolkit](https://scinobo.ilsp.gr/toolkit) provides essential functionalities for TPS: + +- **Field of Science (FoS) classification**: Uses a hierarchical taxonomy (6 levels) to assign topics to publications, capturing both broad disciplines and fine-grained emerging themes. This allows robust tracking of how topics evolve across scientific fields. + +- **Citation Analysis**: Aggregates citation metrics across publications, facilitating computation of Field-Weighted Citation Impact (FWCI) for each topic-year combination. + +These tools make it feasible to apply the TPS metric across large bibliographic datasets with a rich contextual understanding of scientific domains. + +**TPS Formula:** For each topic, sequences of consecutive years are identified where the topic appears in publications. For each such sequence `s`, the score is computed as: @@ -65,28 +96,44 @@ $$ \text{TPS}_{\text{topic}} = \sum_s \text{Score}_s $$ -#### Datasources +This approach emphasizes **continuity**, while integrating **growth, impact, and recency**. -##### OpenAIRE Research Graph +##### Longitudinal Co-word Analysis (SciMAT) -The [OpenAIRE Research Graph](https://graph.openaire.eu/) offers extensive metadata on publications, including: -- **Publication year**, which is crucial for identifying consecutive topic appearances. -- **Citation metadata**, enabling FWCI computation via connected sources. +The [SciMAT framework](https://doi.org/10.1016/j.joi.2010.09.002) (Cobo et al., 2011) measures persistence by detecting **continuing themes** across consecutive time periods. -##### Semantic Scholar +- Topics identified via keyword co-occurrence networks. +- Continuity measured using the **Inclusion Index**: -[Semantic Scholar](https://www.semanticscholar.org/) offers full-text access and machine-readable metadata, including: -- Year of publication -- Citation counts +$$ +\text{Inclusion}(U,V) = \frac{|U \cap V|}{\min(|U|,|V|)} +$$ -#### Existing Methodologies +- Topics linked across periods with high inclusion are *continuing*; absence of links indicates *new* or *discontinued* themes. -##### SciNoBo Toolkit +This approach emphasizes **structural continuity of thematic vocabularies**. -The [SciNoBo Toolkit](https://scinobo.ilsp.gr/toolkit) provides essential functionalities for TPS: +##### Direct-Citation Topic Survival -- **Field of Science (FoS) classification**: Uses a hierarchical taxonomy (6 levels) to assign topics to publications, capturing both broad disciplines and fine-grained emerging themes. This allows robust tracking of how topics evolve across scientific fields. - -- **Citation Analysis**: Aggregates citation metrics across publications, facilitating computation of Field-Weighted Citation Impact (FWCI) for each topic-year combination. +The [CWTS publication-level classification system](https://doi.org/10.1002/asi.22748) enables persistence analysis based on citation-linked topic clusters. + +- Topics are defined via **direct citation clustering**. +- Persistence is measured through indicators such as: + - **Survival length**: number of years a cluster remains active. + - **Activity stability**: whether publication volume is maintained or growing. + +This approach measures persistence at the **topic-cluster level**, grounded in citation networks. + +##### Continuity Typologies + +The [continuity framework](https://doi.org/10.1016/j.joi.2013.11.006) quantifies persistence by categorizing topics into distinct evolutionary types: + +- **Steady**: stable over time +- **Concentrating**: narrowing focus while persisting +- **Diluting**: broadening and dispersing +- **Sporadic**: intermittent appearance +- **Emerging**: new and growing + +Continuity is evaluated by the **strength of inter-year linkages** among topic clusters. +This allows distinguishing different **modes of persistence** and topic evolution. -These tools make it feasible to apply the TPS metric across large bibliographic datasets with a rich contextual understanding of scientific domains. From 3b81b610530e969876415ab08d3d519a85cc024e Mon Sep 17 00:00:00 2001 From: Petros Stavropoulos Date: Mon, 25 Aug 2025 21:40:46 +0300 Subject: [PATCH 3/9] Create reproducibility_composite_confidence_index.qmd Added a comprehensive overview of the Reproducibility Composite Confidence Index (RCCI), including its metrics, methodologies, and data sources. --- ...oducibility_composite_confidence_index.qmd | 249 ++++++++++++++++++ 1 file changed, 249 insertions(+) create mode 100644 sections/5_reproducibility/reproducibility_composite_confidence_index.qmd diff --git a/sections/5_reproducibility/reproducibility_composite_confidence_index.qmd b/sections/5_reproducibility/reproducibility_composite_confidence_index.qmd new file mode 100644 index 0000000..042c7c1 --- /dev/null +++ b/sections/5_reproducibility/reproducibility_composite_confidence_index.qmd @@ -0,0 +1,249 @@ +--- +author: + - name: P. Stavropoulos + orcid: 0000-0003-1664-6554 + affiliations: + - ref: arc + +affiliations: +- id: arc + name: Athena Research Center + city: Athena + country: Greece + +title: Reuse of data in research +--- + + +::: {.callout collapse="true"} + + +# History + +| Version | Revision date | Revision | Author | +|---------|---------------|-------------|---------------------| +| 1.0 | 2023-08-25 | First draft | Petros Stavropoulos | + +::: + +# Description + +The **Reproducibility Composite Confidence Index (RCCI)** is a comprehensive indicator that assesses the quality, reusability, and trustworthiness of **research artefacts** (datasets, data collections, code, or software). + +A high RCCI score indicates that an artefact is: + +- **Highly cited** in its field (scholarly impact). +- **Frequently reused** by others in the scientific community. +- **Accepted and trusted** by peers, as reflected in citation sentiment. +- **Well-documented and FAIR-aligned**, with metadata enabling discoverability and reuse. + +This indicator was first introduced and tested in the **TIER2 project** ([tier2-project.eu](https://tier2-project.eu/)), where it was implemented in a pilot **Reproducibility Dashboard** for funders and research-performing organisations (RPOs). The RCCI was presented and reviewed in **two stakeholder webinars** and in discussions with funders and RPOs, where feedback confirmed its value for monitoring research reproducibility. + +# Metrics + +## RCCI + +The RCCI integrates four dimensions into a **single score**: + +1. **Field-Weighted Citation Impact (FWCI)** → measures academic impact (see [Citation Impact](../2_academic_impact/citation_impact.qmd)). +2. **Field-Weighted Reusability Index (FWRI)** → measures how often artefacts are reused relative to others in the same field (based on [Reuse of Code in Research](../5_reproducibility/reuse_of_code_in_research.qmd) and [Reuse of Data in Research](../5_reproducibility/reuse_of_data_in_research.qmd)). +3. **FAIR Index (FI)** → measures metadata completeness and alignment with [FAIR data practices](../1_open_science/prevalence_open_fair_data_practices.qmd). +4. **Reproducibility Confidence Index (RCI)** → measures community sentiment using polarity of publications (see [Polarity of Publications](../5_reproducibility/polarity_of_publications.qmd)). + +The RCCI is calculated as: + +$$ +RCCI = FWCI \times FWRI \times FI \times RCI +$$ + +A value greater than 1 (after scaling) suggests that artefacts are impactful, widely reused, FAIR-compliant, and positively regarded in the scientific community. + +--- + +### Measurement + +#### 1. Field-Weighted Citation Impact (FWCI) + +**Definition:** +The Field-Weighted Citation Impact (FWCI) measures how often a publication or research artefact (dataset, code, software) is cited compared to the average citation rate of publications in the **same Field of Science** and **same publication year**. By controlling for disciplinary citation intensity and publication age, FWCI allows comparisons of citation performance across different fields and timeframes. + +**Formula:** +$$ +FWCI = \frac{Citations_{i}}{\overline{Citations}_{f,y}} +$$ + +Where: +- $Citations_{i}$ = the number of citations received by publication or artefact *i*. +- $\overline{Citations}_{f,y}$ = the mean number of citations for all publications in the same field $f$ and year $y$. + +**Interpretation:** +- FWCI = 1 → the publication/artefact is cited at the world average for its field and year. +- FWCI > 1 → cited more frequently than the average in its field. +- FWCI < 1 → cited less frequently than the average in its field. + +**Connections to other indicators in the Handbook:** +- Discussed extensively in [Citation Impact](../2_academic_impact/citation_impact.qmd), where normalised citation indicators are introduced and their methodological challenges explained. +- Used in the [Impact of Open Code in Research](../5_reproducibility/impact_of_open_code_in_research.qmd) and [Impact of Open Data in Research](../5_reproducibility/impact_of_open_data_in_research.qmd) indicators to assess the citation performance of publications that make research outputs openly available. + +--- + +#### 2. Field-Weighted Reusability Index (FWRI) + +**Definition:** +The Field-Weighted Reusability Index (FWRI) measures how often a research artefact (dataset, code, software) is **reused** compared to the average reuse rate of artefacts in the **same Field of Science (FoS Level 3)** and within a **comparable publication window (e.g. 3 years after release)**. + +Reuse is operationalised through **citation statements (citances)** in publications that have been validated to explicitly indicate that the artefact was reused (e.g. “we used dataset X” or “software Y was applied in our analysis”). This ensures that FWRI captures **practical adoption** rather than generic mentions. + +**Formula:** +$$ +FWRI = \frac{Reuse_{i}}{\overline{Reuse}_{f,y}} +$$ + +Where: +- $Reuse_{i}$ = the number of validated reuse citances to artefact *i*. +- $\overline{Reuse}_{f,y}$ = the mean number of validated reuse citances for artefacts in the same field $f$ and publication year $y$. + +**Interpretation:** +- FWRI = 1 → the artefact is reused at the world average for its field and year. +- FWRI > 1 → the artefact is reused more frequently than similar artefacts. +- FWRI < 1 → the artefact is reused less frequently than similar artefacts. + +**Connections to other indicators in the Handbook:** +- Builds upon [Reuse of Code in Research](../5_reproducibility/reuse_of_code_in_research.qmd) and [Reuse of Data in Research](../5_reproducibility/reuse_of_data_in_research.qmd), which measure the raw adoption of code and data in subsequent studies. +- Extends these indicators by adding **field-normalisation**, analogous to the way FWCI normalises citation impact across fields. +- Complements [Impact of Open Data in Research](../5_reproducibility/impact_of_open_data_in_research.qmd), which uses Normalised Citation Impact (NCI) to evaluate the influence of Open Data publications. + +**Relation to methodologies and tools:** +- Reuse detection requires analysing **citation statements** with Natural Language Processing and machine learning, as implemented in platforms such the **SciNoBo toolkit** [@gialitsis2022; @kotitsas2023]. +- The SciNoBo toolkit in particular can identify and classify citances by intent (reuse, comparison, generic), polarity (supporting, refuting, neutral), and semantics (claim, method, results, artefact/output), making it possible to operationalise FWRI. + +--- + +#### 3. FAIR Index (FI) + +**Definition:** +The FAIR Index (FI) measures the extent to which a research artefact (dataset, code, software) complies with the **FAIR principles**: *Findable, Accessible, Interoperable, and Reusable* [@wilkinson2016]. +The indicator provides a simple, computational way of assessing FAIRness by checking for the presence and completeness of key metadata elements that are essential for discovery, access, licensing, and reuse. + +**Formula:** +$$ +FI = \frac{\# Valid \; Metadata \; Elements}{4} +$$ + +Metadata elements: + +1. **Name** — a clear and unique name for the artefact. +2. **Version** — a version number or persistent identifier that distinguishes releases. +3. **License** — explicit usage rights (e.g., open license, restricted license). +4. **URL** — a persistent and resolvable web link providing access to the artefact. + +Each element is scored as present/valid (1) or missing/invalid (0). +- FI = 1.0 → all four metadata elements are valid, indicating full FAIR compliance. +- FI = 0.5 → two elements are valid, indicating partial FAIRness. +- FI = 0 → no FAIR metadata elements available. + +**Interpretation:** +- A high FI indicates that an artefact is **well-documented and accessible**, increasing its chances of being reused reliably by others. +- A low FI signals **poor metadata practices**, limiting discoverability and trust in the artefact. + +**Connections to other indicators in the Handbook:** +- Directly linked to [Prevalence of Open/FAIR Data Practices](../1_open_science/prevalence_open_fair_data_practices.qmd), which measures the general status of FAIR adoption across publications and datasets. +- Complements the **Reuse of Data in Research** and **Reuse of Code in Research** indicators, since proper FAIR metadata often enables practical reuse. + +**Relation to methodologies and tools:** +- The **SciNoBo toolkit** can extract and validate metadata from publications and associated artefacts, supporting automated FI scoring at scale. + +--- + +#### 4. Reproducibility Confidence Index (RCI) + +**Definition:** +The Reproducibility Confidence Index (RCI) measures how the scientific community perceives the **reliability and reproducibility** of a research artefact (dataset, code, software) based on the polarity of its citations. +It incorporates **supporting, neutral, and refuting citances** to determine whether the artefact is generally validated, questioned, or disputed in follow-up research. + +RCI therefore reflects not only the *quantity* of citations, but their *quality* in terms of endorsement or criticism. + +**Formula:** +$$ +RCI = \frac{(1 \times Positive \; Citations) + (0.5 \times Neutral \; Citations) - (1 \times Negative \; Citations)}{Total \; Citations} +$$ + +**Interpretation:** +- RCI = 1 → all citations are positive, strong reproducibility confidence. +- RCI ≈ 0 → balanced or neutral sentiment, no clear consensus on reproducibility. +- RCI < 0 → predominantly negative citations, low reproducibility confidence. + +**Connections to other indicators in the Handbook:** +- Directly based on [Polarity of Publications](../5_reproducibility/polarity_of_publications.qmd), which provides the methodological basis for classifying citances. +- Complements **FWCI** and **FWRI** by adding a qualitative perception dimension to quantitative measures of citation and reuse. + +**Relation to methodologies and tools:** +- **OpenAIRE Research Graph** supports linkage of citations, which can be enriched with polarity classification. +- The **SciNoBo toolkit** [@gialitsis2022; @kotitsas2023] includes functionality for automated citance classification by intent (reuse, comparison, generic), polarity (supporting, refuting, neutral), and semantics (claim, method, results, artefact/output). + +--- + +# Datasources + +To calculate the RCCI, different types of metadata are required — including citation counts, reuse information, citation polarity, and FAIR metadata. +The following datasources provide alternative ways to obtain this information. Not all of them are strictly required for every calculation, but together they offer complementary coverage for retrieving the inputs needed for RCCI and its component indicators. + +- **OpenAIRE Research Graph** + [OpenAIRE](https://graph.openaire.eu/) aggregates metadata on publications, datasets, and software. It supports linking artefacts to publications and can be used to identify reuse cases and citances that indicate how artefacts are cited, which is essential for FWRI and RCI. + +- **OpenAlex** + [OpenAlex](https://openalex.org/) is an openly accessible bibliometric database that provides citation counts, references, and links to associated datasets and software. It can be used to calculate citation-based metrics such as FWCI and to identify citation links needed for FWRI and RCI. + +- **Dimensions** + [Dimensions](https://app.dimensions.ai/) offers citation data and normalised indicators such as the Field Citation Ratio (FCR). It provides expected citation baselines by field and year, which are useful for calculating FWCI. + +- **Scopus** + [Scopus](https://www.scopus.com/) is a large citation database that includes the Field-Weighted Citation Impact (FWCI) indicator. It can serve as a source for citation data and normalised impact values used in RCCI. + +- **Web of Science / InCites** + [Web of Science](https://webofscience.com/) provides citation data and normalised citation metrics through InCites, where the Category Normalised Citation Impact (CNCI) is implemented. This can be used as an alternative to Scopus FWCI or Dimensions FCR. + +- **DataCite** + [DataCite](https://datacite.org/) is a registry that provides persistent identifiers (DOIs) and metadata for research datasets and software. It is especially useful for retrieving metadata elements (Name, Version, License, URL) needed for calculating the FAIR Index. + +- **Crossref** + [Crossref](https://www.crossref.org/) maintains extensive metadata for scholarly publications and related outputs, including references and links to datasets and software. It is valuable both for reuse tracking (FWRI) and FAIR metadata extraction (FI). + +- **Zenodo / Figshare / Institutional Repositories** + These repositories host datasets, software, and other artefacts. They expose metadata via APIs, which can be used to evaluate FAIRness and retrieve usage information for reuse analysis. + +- **scite.ai** + [scite.ai](https://scite.ai/) provides classification of citation statements into supporting, refuting, or mentioning. It can be used to measure polarity of publications and calculate the RCI. + +--- + +# Existing Methodologies + +## SciNoBo Toolkit + +The **SciNoBo toolkit** [@gialitsis2022a; @kotitsas2023a] has implemented and operationalised the RCCI and its component indicators into a **working monitoring dashboard**. + +- In the **TIER2 project**, SciNoBo was used to extract artefacts from project deliverables and publications, link them to citation and reuse data, and compute FWCI, FWRI, FI, RCI, and RCCI. +- The RCCI results were presented in **pilot dashboards** for funders and RPOs. +- The approach was validated and refined through **stakeholder feedback** in webinars and presentations. + +This makes RCCI not only a conceptual indicator, but also one that has been **implemented and tested in practice**. + +--- + +## Other methodologies + +While SciNoBo currently offers the most complete implementation, other methodologies and tools can be used to compute individual RCCI components: + +- **Citation normalisation** + FWCI can be derived using normalisation approaches described in the [Citation Impact](../2_academic_impact/citation_impact.qmd) indicator, based on expected citation counts per field and year. This methodology is implemented in bibliometric databases such as Web of Science/InCites (CNCI), Scopus (FWCI), and Dimensions (FCR). + +- **Reuse detection** + FWRI requires identifying reuse through validated citances. Platforms such as **scite.ai** classify citations as supporting, refuting, or mentioning, while **OpenAIRE Research Graph** can link publications to datasets and software. + These can be complemented with **DataCite** and **Crossref** metadata, which record relationships between publications and artefacts. + +- **FAIR assessment** + The **F-UJI tool** provides an automated method for evaluating dataset FAIRness by checking metadata completeness and quality. This can be used to score the four FAIR Index elements (Name, Version, License, URL). + +- **Polarity classification** + RCI can be measured using **scite.ai**, which classifies citations into supporting, refuting, or mentioning. From f94653b6806a8e99e30ff9085b283c0e8aa1263f Mon Sep 17 00:00:00 2001 From: Vincent Traag Date: Tue, 26 Aug 2025 13:53:05 +0200 Subject: [PATCH 4/9] Fixed style (especially some enums) --- .../thematic_persistence.qmd | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/sections/2_academic_impact/thematic_persistence.qmd b/sections/2_academic_impact/thematic_persistence.qmd index 5d6d5c4..523f172 100644 --- a/sections/2_academic_impact/thematic_persistence.qmd +++ b/sections/2_academic_impact/thematic_persistence.qmd @@ -13,33 +13,31 @@ affiliations: --- ::: {.callout collapse="true"} - # History | Version | Revision date | Revision | Author | |---------|---------------|-------------|---------------------| -| 1.1 | 2025-08-25 | Additions | Petros Stavropoulos | +| 1.1 | 2025-08-25 | Additions | Petros Stavropoulos | | 1.0 | 2025-05-09 | First Draft | Petros Stavropoulos | - ::: # Description -*Thematic Persistence* captures the ability of a research topic to remain present and influential in the scientific record over extended periods of time. It reflects the **continuity, longevity, and stability** of themes, distinguishing enduring areas of inquiry from those that are short-lived or sporadic. +*Thematic Persistence* captures the ability of a research topic to remain present and influential in the scientific record over extended periods of time. It reflects the **continuity, longevity, and stability** of themes, distinguishing enduring areas of inquiry from those that are short-lived or sporadic. -Persistent topics often indicate fields with strong conceptual foundations, long-term societal or technological relevance, or strategic importance for research policy. Conversely, non-persistent topics may point to fleeting interests or speculative research directions. +Persistent topics often indicate fields with strong conceptual foundations, long-term societal or technological relevance, or strategic importance for research policy. Conversely, non-persistent topics may point to fleeting interests or speculative research directions. Measuring thematic persistence helps assess the structural evolution of science, identify durable agendas, and guide funding, policy, and curriculum decisions. ## Thematic Persistence Score (TPS) -One way to operationalize persistence is through composite indicators such as the *Thematic Persistence Score (TPS)*. TPS combines multiple aspects of a topic’s evolution (continuity across years, growth, impact, and recency) into a single measure. +One way to operationalize persistence is through composite indicators such as the *Thematic Persistence Score (TPS)*. TPS combines multiple aspects of a topic’s evolution (continuity across years, growth, impact, and recency) into a single measure. -Other methodologies apply different approaches, such as linking clusters across time periods, defining continuity typologies, or evaluating the survival of citation-based topics. +Other methodologies apply different approaches, such as linking clusters across time periods, defining continuity typologies, or evaluating the survival of citation-based topics. ### Measurement -Thematic persistence can be measured through a combination of: +Thematic persistence can be measured through a combination of: - **Temporal continuity**: duration and uninterrupted presence of topics across consecutive years or periods. - **Growth dynamics**: how the volume of publications on a topic changes over time. @@ -54,12 +52,14 @@ The precise operationalization depends on the chosen methodology, as outlined be ##### OpenAIRE Research Graph The [OpenAIRE Research Graph](https://graph.openaire.eu/) offers extensive metadata on publications, including: + - **Publication year**, which is crucial for identifying consecutive topic appearances. - **Citation metadata**, enabling FWCI computation via connected sources. ##### Semantic Scholar [Semantic Scholar](https://www.semanticscholar.org/) offers full-text access and machine-readable metadata, including: + - Year of publication - Citation counts @@ -69,9 +69,8 @@ The [OpenAIRE Research Graph](https://graph.openaire.eu/) offers extensive metad The [SciNoBo Toolkit](https://scinobo.ilsp.gr/toolkit) provides essential functionalities for TPS: -- **Field of Science (FoS) classification**: Uses a hierarchical taxonomy (6 levels) to assign topics to publications, capturing both broad disciplines and fine-grained emerging themes. This allows robust tracking of how topics evolve across scientific fields. - -- **Citation Analysis**: Aggregates citation metrics across publications, facilitating computation of Field-Weighted Citation Impact (FWCI) for each topic-year combination. +- **Field of Science (FoS) classification**: Uses a hierarchical taxonomy (6 levels) to assign topics to publications, capturing both broad disciplines and fine-grained emerging themes. This allows robust tracking of how topics evolve across scientific fields. +- **Citation Analysis**: Aggregates citation metrics across publications, facilitating computation of Field-Weighted Citation Impact (FWCI) for each topic-year combination. These tools make it feasible to apply the TPS metric across large bibliographic datasets with a rich contextual understanding of scientific domains. @@ -83,11 +82,12 @@ $$ \text{Score}_s = (\text{Length}_s)^{1.5} \times \text{Count}_s \times \text{Growth}_s \times \text{FWCI}_s \times \text{Recency}_s $$ -Where: -- $\text{Length}_s$: Length of the sequence (in years) -- $\text{Count}_s$: Number of publications in the sequence +Where: + +- $\text{Length}_s$: Length of the sequence (in years) +- $\text{Count}_s$: Number of publications in the sequence - - $\text{Growth}_s = \frac{\text{LastYearCount}}{\text{FirstYearCount}}$ (capped at 3) -- $\text{FWCI}_s$: Mean Field-Weighted Citation Impact for publications in the sequence +- $\text{FWCI}_s$: Mean Field-Weighted Citation Impact for publications in the sequence - $\text{Recency}_s = 1 + \frac{w (\text{LastYear}_s - \text{MaxYear} + 10)}{10}$, with $w$ as a recency weight (e.g. 0.2) The final TPS is the sum of the scores for all sequences of the topic: @@ -100,7 +100,8 @@ This approach emphasizes **continuity**, while integrating **growth, impact, and ##### Longitudinal Co-word Analysis (SciMAT) -The [SciMAT framework](https://doi.org/10.1016/j.joi.2010.09.002) (Cobo et al., 2011) measures persistence by detecting **continuing themes** across consecutive time periods. +The [SciMAT framework](https://doi.org/10.1016/j.joi.2010.09.002) (Cobo et al., 2011) measures persistence by detecting **continuing themes** across consecutive time periods. + - Topics identified via keyword co-occurrence networks. - Continuity measured using the **Inclusion Index**: @@ -115,25 +116,24 @@ This approach emphasizes **structural continuity of thematic vocabularies**. ##### Direct-Citation Topic Survival -The [CWTS publication-level classification system](https://doi.org/10.1002/asi.22748) enables persistence analysis based on citation-linked topic clusters. +The [CWTS publication-level classification system](https://doi.org/10.1002/asi.22748) enables persistence analysis based on citation-linked topic clusters. -- Topics are defined via **direct citation clustering**. -- Persistence is measured through indicators such as: - - **Survival length**: number of years a cluster remains active. - - **Activity stability**: whether publication volume is maintained or growing. +- Topics are defined via **direct citation clustering**.\ +- Persistence is measured through indicators such as: + - **Survival length**: number of years a cluster remains active.\ + - **Activity stability**: whether publication volume is maintained or growing. This approach measures persistence at the **topic-cluster level**, grounded in citation networks. ##### Continuity Typologies -The [continuity framework](https://doi.org/10.1016/j.joi.2013.11.006) quantifies persistence by categorizing topics into distinct evolutionary types: - -- **Steady**: stable over time -- **Concentrating**: narrowing focus while persisting -- **Diluting**: broadening and dispersing -- **Sporadic**: intermittent appearance -- **Emerging**: new and growing +The [continuity framework](https://doi.org/10.1016/j.joi.2013.11.006) quantifies persistence by categorizing topics into distinct evolutionary types: -Continuity is evaluated by the **strength of inter-year linkages** among topic clusters. -This allows distinguishing different **modes of persistence** and topic evolution. +- **Steady**: stable over time\ +- **Concentrating**: narrowing focus while persisting\ +- **Diluting**: broadening and dispersing\ +- **Sporadic**: intermittent appearance\ +- **Emerging**: new and growing +Continuity is evaluated by the **strength of inter-year linkages** among topic clusters.\ +This allows distinguishing different **modes of persistence** and topic evolution. \ No newline at end of file From 4c6c5a71ceaa5fdb9c23c6c16bd87c73ec2811f8 Mon Sep 17 00:00:00 2001 From: Petros Stavropoulos Date: Tue, 26 Aug 2025 16:12:40 +0300 Subject: [PATCH 5/9] Delete sections/5_reproducibility/reproducibility_composite_confidence_index.qmd --- ...oducibility_composite_confidence_index.qmd | 249 ------------------ 1 file changed, 249 deletions(-) delete mode 100644 sections/5_reproducibility/reproducibility_composite_confidence_index.qmd diff --git a/sections/5_reproducibility/reproducibility_composite_confidence_index.qmd b/sections/5_reproducibility/reproducibility_composite_confidence_index.qmd deleted file mode 100644 index 042c7c1..0000000 --- a/sections/5_reproducibility/reproducibility_composite_confidence_index.qmd +++ /dev/null @@ -1,249 +0,0 @@ ---- -author: - - name: P. Stavropoulos - orcid: 0000-0003-1664-6554 - affiliations: - - ref: arc - -affiliations: -- id: arc - name: Athena Research Center - city: Athena - country: Greece - -title: Reuse of data in research ---- - - -::: {.callout collapse="true"} - - -# History - -| Version | Revision date | Revision | Author | -|---------|---------------|-------------|---------------------| -| 1.0 | 2023-08-25 | First draft | Petros Stavropoulos | - -::: - -# Description - -The **Reproducibility Composite Confidence Index (RCCI)** is a comprehensive indicator that assesses the quality, reusability, and trustworthiness of **research artefacts** (datasets, data collections, code, or software). - -A high RCCI score indicates that an artefact is: - -- **Highly cited** in its field (scholarly impact). -- **Frequently reused** by others in the scientific community. -- **Accepted and trusted** by peers, as reflected in citation sentiment. -- **Well-documented and FAIR-aligned**, with metadata enabling discoverability and reuse. - -This indicator was first introduced and tested in the **TIER2 project** ([tier2-project.eu](https://tier2-project.eu/)), where it was implemented in a pilot **Reproducibility Dashboard** for funders and research-performing organisations (RPOs). The RCCI was presented and reviewed in **two stakeholder webinars** and in discussions with funders and RPOs, where feedback confirmed its value for monitoring research reproducibility. - -# Metrics - -## RCCI - -The RCCI integrates four dimensions into a **single score**: - -1. **Field-Weighted Citation Impact (FWCI)** → measures academic impact (see [Citation Impact](../2_academic_impact/citation_impact.qmd)). -2. **Field-Weighted Reusability Index (FWRI)** → measures how often artefacts are reused relative to others in the same field (based on [Reuse of Code in Research](../5_reproducibility/reuse_of_code_in_research.qmd) and [Reuse of Data in Research](../5_reproducibility/reuse_of_data_in_research.qmd)). -3. **FAIR Index (FI)** → measures metadata completeness and alignment with [FAIR data practices](../1_open_science/prevalence_open_fair_data_practices.qmd). -4. **Reproducibility Confidence Index (RCI)** → measures community sentiment using polarity of publications (see [Polarity of Publications](../5_reproducibility/polarity_of_publications.qmd)). - -The RCCI is calculated as: - -$$ -RCCI = FWCI \times FWRI \times FI \times RCI -$$ - -A value greater than 1 (after scaling) suggests that artefacts are impactful, widely reused, FAIR-compliant, and positively regarded in the scientific community. - ---- - -### Measurement - -#### 1. Field-Weighted Citation Impact (FWCI) - -**Definition:** -The Field-Weighted Citation Impact (FWCI) measures how often a publication or research artefact (dataset, code, software) is cited compared to the average citation rate of publications in the **same Field of Science** and **same publication year**. By controlling for disciplinary citation intensity and publication age, FWCI allows comparisons of citation performance across different fields and timeframes. - -**Formula:** -$$ -FWCI = \frac{Citations_{i}}{\overline{Citations}_{f,y}} -$$ - -Where: -- $Citations_{i}$ = the number of citations received by publication or artefact *i*. -- $\overline{Citations}_{f,y}$ = the mean number of citations for all publications in the same field $f$ and year $y$. - -**Interpretation:** -- FWCI = 1 → the publication/artefact is cited at the world average for its field and year. -- FWCI > 1 → cited more frequently than the average in its field. -- FWCI < 1 → cited less frequently than the average in its field. - -**Connections to other indicators in the Handbook:** -- Discussed extensively in [Citation Impact](../2_academic_impact/citation_impact.qmd), where normalised citation indicators are introduced and their methodological challenges explained. -- Used in the [Impact of Open Code in Research](../5_reproducibility/impact_of_open_code_in_research.qmd) and [Impact of Open Data in Research](../5_reproducibility/impact_of_open_data_in_research.qmd) indicators to assess the citation performance of publications that make research outputs openly available. - ---- - -#### 2. Field-Weighted Reusability Index (FWRI) - -**Definition:** -The Field-Weighted Reusability Index (FWRI) measures how often a research artefact (dataset, code, software) is **reused** compared to the average reuse rate of artefacts in the **same Field of Science (FoS Level 3)** and within a **comparable publication window (e.g. 3 years after release)**. - -Reuse is operationalised through **citation statements (citances)** in publications that have been validated to explicitly indicate that the artefact was reused (e.g. “we used dataset X” or “software Y was applied in our analysis”). This ensures that FWRI captures **practical adoption** rather than generic mentions. - -**Formula:** -$$ -FWRI = \frac{Reuse_{i}}{\overline{Reuse}_{f,y}} -$$ - -Where: -- $Reuse_{i}$ = the number of validated reuse citances to artefact *i*. -- $\overline{Reuse}_{f,y}$ = the mean number of validated reuse citances for artefacts in the same field $f$ and publication year $y$. - -**Interpretation:** -- FWRI = 1 → the artefact is reused at the world average for its field and year. -- FWRI > 1 → the artefact is reused more frequently than similar artefacts. -- FWRI < 1 → the artefact is reused less frequently than similar artefacts. - -**Connections to other indicators in the Handbook:** -- Builds upon [Reuse of Code in Research](../5_reproducibility/reuse_of_code_in_research.qmd) and [Reuse of Data in Research](../5_reproducibility/reuse_of_data_in_research.qmd), which measure the raw adoption of code and data in subsequent studies. -- Extends these indicators by adding **field-normalisation**, analogous to the way FWCI normalises citation impact across fields. -- Complements [Impact of Open Data in Research](../5_reproducibility/impact_of_open_data_in_research.qmd), which uses Normalised Citation Impact (NCI) to evaluate the influence of Open Data publications. - -**Relation to methodologies and tools:** -- Reuse detection requires analysing **citation statements** with Natural Language Processing and machine learning, as implemented in platforms such the **SciNoBo toolkit** [@gialitsis2022; @kotitsas2023]. -- The SciNoBo toolkit in particular can identify and classify citances by intent (reuse, comparison, generic), polarity (supporting, refuting, neutral), and semantics (claim, method, results, artefact/output), making it possible to operationalise FWRI. - ---- - -#### 3. FAIR Index (FI) - -**Definition:** -The FAIR Index (FI) measures the extent to which a research artefact (dataset, code, software) complies with the **FAIR principles**: *Findable, Accessible, Interoperable, and Reusable* [@wilkinson2016]. -The indicator provides a simple, computational way of assessing FAIRness by checking for the presence and completeness of key metadata elements that are essential for discovery, access, licensing, and reuse. - -**Formula:** -$$ -FI = \frac{\# Valid \; Metadata \; Elements}{4} -$$ - -Metadata elements: - -1. **Name** — a clear and unique name for the artefact. -2. **Version** — a version number or persistent identifier that distinguishes releases. -3. **License** — explicit usage rights (e.g., open license, restricted license). -4. **URL** — a persistent and resolvable web link providing access to the artefact. - -Each element is scored as present/valid (1) or missing/invalid (0). -- FI = 1.0 → all four metadata elements are valid, indicating full FAIR compliance. -- FI = 0.5 → two elements are valid, indicating partial FAIRness. -- FI = 0 → no FAIR metadata elements available. - -**Interpretation:** -- A high FI indicates that an artefact is **well-documented and accessible**, increasing its chances of being reused reliably by others. -- A low FI signals **poor metadata practices**, limiting discoverability and trust in the artefact. - -**Connections to other indicators in the Handbook:** -- Directly linked to [Prevalence of Open/FAIR Data Practices](../1_open_science/prevalence_open_fair_data_practices.qmd), which measures the general status of FAIR adoption across publications and datasets. -- Complements the **Reuse of Data in Research** and **Reuse of Code in Research** indicators, since proper FAIR metadata often enables practical reuse. - -**Relation to methodologies and tools:** -- The **SciNoBo toolkit** can extract and validate metadata from publications and associated artefacts, supporting automated FI scoring at scale. - ---- - -#### 4. Reproducibility Confidence Index (RCI) - -**Definition:** -The Reproducibility Confidence Index (RCI) measures how the scientific community perceives the **reliability and reproducibility** of a research artefact (dataset, code, software) based on the polarity of its citations. -It incorporates **supporting, neutral, and refuting citances** to determine whether the artefact is generally validated, questioned, or disputed in follow-up research. - -RCI therefore reflects not only the *quantity* of citations, but their *quality* in terms of endorsement or criticism. - -**Formula:** -$$ -RCI = \frac{(1 \times Positive \; Citations) + (0.5 \times Neutral \; Citations) - (1 \times Negative \; Citations)}{Total \; Citations} -$$ - -**Interpretation:** -- RCI = 1 → all citations are positive, strong reproducibility confidence. -- RCI ≈ 0 → balanced or neutral sentiment, no clear consensus on reproducibility. -- RCI < 0 → predominantly negative citations, low reproducibility confidence. - -**Connections to other indicators in the Handbook:** -- Directly based on [Polarity of Publications](../5_reproducibility/polarity_of_publications.qmd), which provides the methodological basis for classifying citances. -- Complements **FWCI** and **FWRI** by adding a qualitative perception dimension to quantitative measures of citation and reuse. - -**Relation to methodologies and tools:** -- **OpenAIRE Research Graph** supports linkage of citations, which can be enriched with polarity classification. -- The **SciNoBo toolkit** [@gialitsis2022; @kotitsas2023] includes functionality for automated citance classification by intent (reuse, comparison, generic), polarity (supporting, refuting, neutral), and semantics (claim, method, results, artefact/output). - ---- - -# Datasources - -To calculate the RCCI, different types of metadata are required — including citation counts, reuse information, citation polarity, and FAIR metadata. -The following datasources provide alternative ways to obtain this information. Not all of them are strictly required for every calculation, but together they offer complementary coverage for retrieving the inputs needed for RCCI and its component indicators. - -- **OpenAIRE Research Graph** - [OpenAIRE](https://graph.openaire.eu/) aggregates metadata on publications, datasets, and software. It supports linking artefacts to publications and can be used to identify reuse cases and citances that indicate how artefacts are cited, which is essential for FWRI and RCI. - -- **OpenAlex** - [OpenAlex](https://openalex.org/) is an openly accessible bibliometric database that provides citation counts, references, and links to associated datasets and software. It can be used to calculate citation-based metrics such as FWCI and to identify citation links needed for FWRI and RCI. - -- **Dimensions** - [Dimensions](https://app.dimensions.ai/) offers citation data and normalised indicators such as the Field Citation Ratio (FCR). It provides expected citation baselines by field and year, which are useful for calculating FWCI. - -- **Scopus** - [Scopus](https://www.scopus.com/) is a large citation database that includes the Field-Weighted Citation Impact (FWCI) indicator. It can serve as a source for citation data and normalised impact values used in RCCI. - -- **Web of Science / InCites** - [Web of Science](https://webofscience.com/) provides citation data and normalised citation metrics through InCites, where the Category Normalised Citation Impact (CNCI) is implemented. This can be used as an alternative to Scopus FWCI or Dimensions FCR. - -- **DataCite** - [DataCite](https://datacite.org/) is a registry that provides persistent identifiers (DOIs) and metadata for research datasets and software. It is especially useful for retrieving metadata elements (Name, Version, License, URL) needed for calculating the FAIR Index. - -- **Crossref** - [Crossref](https://www.crossref.org/) maintains extensive metadata for scholarly publications and related outputs, including references and links to datasets and software. It is valuable both for reuse tracking (FWRI) and FAIR metadata extraction (FI). - -- **Zenodo / Figshare / Institutional Repositories** - These repositories host datasets, software, and other artefacts. They expose metadata via APIs, which can be used to evaluate FAIRness and retrieve usage information for reuse analysis. - -- **scite.ai** - [scite.ai](https://scite.ai/) provides classification of citation statements into supporting, refuting, or mentioning. It can be used to measure polarity of publications and calculate the RCI. - ---- - -# Existing Methodologies - -## SciNoBo Toolkit - -The **SciNoBo toolkit** [@gialitsis2022a; @kotitsas2023a] has implemented and operationalised the RCCI and its component indicators into a **working monitoring dashboard**. - -- In the **TIER2 project**, SciNoBo was used to extract artefacts from project deliverables and publications, link them to citation and reuse data, and compute FWCI, FWRI, FI, RCI, and RCCI. -- The RCCI results were presented in **pilot dashboards** for funders and RPOs. -- The approach was validated and refined through **stakeholder feedback** in webinars and presentations. - -This makes RCCI not only a conceptual indicator, but also one that has been **implemented and tested in practice**. - ---- - -## Other methodologies - -While SciNoBo currently offers the most complete implementation, other methodologies and tools can be used to compute individual RCCI components: - -- **Citation normalisation** - FWCI can be derived using normalisation approaches described in the [Citation Impact](../2_academic_impact/citation_impact.qmd) indicator, based on expected citation counts per field and year. This methodology is implemented in bibliometric databases such as Web of Science/InCites (CNCI), Scopus (FWCI), and Dimensions (FCR). - -- **Reuse detection** - FWRI requires identifying reuse through validated citances. Platforms such as **scite.ai** classify citations as supporting, refuting, or mentioning, while **OpenAIRE Research Graph** can link publications to datasets and software. - These can be complemented with **DataCite** and **Crossref** metadata, which record relationships between publications and artefacts. - -- **FAIR assessment** - The **F-UJI tool** provides an automated method for evaluating dataset FAIRness by checking metadata completeness and quality. This can be used to score the four FAIR Index elements (Name, Version, License, URL). - -- **Polarity classification** - RCI can be measured using **scite.ai**, which classifies citations into supporting, refuting, or mentioning. From 1985b39f777c1666842bd7c69b411a065cd3c375 Mon Sep 17 00:00:00 2001 From: Petros Stavropoulos Date: Tue, 26 Aug 2025 16:15:39 +0300 Subject: [PATCH 6/9] Add new references to the bibliography --- references.bib | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/references.bib b/references.bib index bb0d78c..b027e48 100644 --- a/references.bib +++ b/references.bib @@ -3386,3 +3386,33 @@ @inproceedings{zeleti2014 doi = {10.1007/978-3-662-44745-1_50}, address = {Berlin, Heidelberg} } + +@article{Cobo2011AnAF, + title={An approach for detecting, quantifying, and visualizing the evolution of a research field: A practical application to the Fuzzy Sets Theory field}, + author={M.J. Cobo and Antonio Gabriel L{\'o}pez-Herrera and Enrique Herrera-Viedma and Francisco Herrera}, + journal={J. Informetrics}, + year={2011}, + volume={5}, + pages={146-166}, + url={https://api.semanticscholar.org/CorpusID:9814348} +} + +@article{Waltman2012ANM, + title={A new methodology for constructing a publication-level classification system of science}, + author={Ludo Waltman and Nees Jan van Eck}, + journal={J. Assoc. Inf. Sci. Technol.}, + year={2012}, + volume={63}, + pages={2378-2392}, + url={https://api.semanticscholar.org/CorpusID:15589099} +} + +@article{Yan2014ResearchDM, + title={Research dynamics: Measuring the continuity and popularity of research topics}, + author={Erjia Yan}, + journal={J. Informetrics}, + year={2014}, + volume={8}, + pages={98-110}, + url={https://api.semanticscholar.org/CorpusID:35965754} +} From acfc7847a6e43fd5cdf2b625a45c87865d0f3e2c Mon Sep 17 00:00:00 2001 From: Petros Stavropoulos Date: Tue, 26 Aug 2025 16:18:37 +0300 Subject: [PATCH 7/9] Fix references --- sections/2_academic_impact/thematic_persistence.qmd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sections/2_academic_impact/thematic_persistence.qmd b/sections/2_academic_impact/thematic_persistence.qmd index 523f172..0377cfe 100644 --- a/sections/2_academic_impact/thematic_persistence.qmd +++ b/sections/2_academic_impact/thematic_persistence.qmd @@ -100,7 +100,7 @@ This approach emphasizes **continuity**, while integrating **growth, impact, and ##### Longitudinal Co-word Analysis (SciMAT) -The [SciMAT framework](https://doi.org/10.1016/j.joi.2010.09.002) (Cobo et al., 2011) measures persistence by detecting **continuing themes** across consecutive time periods. +The SciMAT framework [@Cobo2011AnAF] measures persistence by detecting **continuing themes** across consecutive time periods. - Topics identified via keyword co-occurrence networks. @@ -116,7 +116,7 @@ This approach emphasizes **structural continuity of thematic vocabularies**. ##### Direct-Citation Topic Survival -The [CWTS publication-level classification system](https://doi.org/10.1002/asi.22748) enables persistence analysis based on citation-linked topic clusters. +The CWTS publication-level classification system [@Waltman2012ANM] enables persistence analysis based on citation-linked topic clusters. - Topics are defined via **direct citation clustering**.\ - Persistence is measured through indicators such as: @@ -127,7 +127,7 @@ This approach measures persistence at the **topic-cluster level**, grounded in c ##### Continuity Typologies -The [continuity framework](https://doi.org/10.1016/j.joi.2013.11.006) quantifies persistence by categorizing topics into distinct evolutionary types: +The continuity framework [@Yan2014ResearchDM] quantifies persistence by categorizing topics into distinct evolutionary types: - **Steady**: stable over time\ - **Concentrating**: narrowing focus while persisting\ @@ -136,4 +136,4 @@ The [continuity framework](https://doi.org/10.1016/j.joi.2013.11.006) quantifies - **Emerging**: new and growing Continuity is evaluated by the **strength of inter-year linkages** among topic clusters.\ -This allows distinguishing different **modes of persistence** and topic evolution. \ No newline at end of file +This allows distinguishing different **modes of persistence** and topic evolution. From 8d81e4d1f1b30272c5297baeeb3754301e0b10a3 Mon Sep 17 00:00:00 2001 From: Vincent Traag Date: Tue, 26 Aug 2025 16:08:58 +0200 Subject: [PATCH 8/9] Add indicator to quarto project --- _quarto.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_quarto.yml b/_quarto.yml index 946361f..1932778 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -86,6 +86,7 @@ website: - sections/2_academic_impact/quality.qmd - sections/2_academic_impact/use_of_code_in_research.qmd - sections/2_academic_impact/use_of_data_in_research.qmd + - sections/2_academic_impact/thematic_persistence.qmd - title: Societal Impact contents: From 172f3048a6215e9bfce6cda0f33f1ecc41be2e5a Mon Sep 17 00:00:00 2001 From: Vincent Traag Date: Tue, 26 Aug 2025 16:45:51 +0200 Subject: [PATCH 9/9] Sort and align references --- references.bib | 90 +++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/references.bib b/references.bib index b027e48..d310541 100644 --- a/references.bib +++ b/references.bib @@ -512,6 +512,16 @@ @article{civan2010 doi = {10.1002/hec.1494} } +@article{Cobo2011AnAF, + title = {An approach for detecting, quantifying, and visualizing the evolution of a research field: A practical application to the Fuzzy Sets Theory field}, + author = {M.J. Cobo and Antonio Gabriel L{\'o}pez-Herrera and Enrique Herrera-Viedma and Francisco Herrera}, + journal = {J. Informetrics}, + year = {2011}, + volume = {5}, + pages = {146-166}, + url = {https://api.semanticscholar.org/CorpusID:9814348} +} + @misc{codeof, title = {Code of Practice}, url = {https://www.countermetrics.org/code-of-practice/}, @@ -1113,6 +1123,7 @@ @inproceedings{gialitsis2022 langid = {en} } + @inproceedings{gialitsis2022a, title = {WWW '22: The ACM Web Conference 2022}, author = {Gialitsis, Nikolaos and Kotitsas, Sotiris and Papageorgiou, Haris}, @@ -1142,7 +1153,6 @@ @inproceedings{gialitsis2022b langid = {en} } - @article{giovani2017, author = {Giovani, B.}, title = {Open Data for Research and Strategic Monitoring in the Pharmaceutical and Biotech Industry}, @@ -1153,6 +1163,7 @@ @article{giovani2017 doi = {10.5334/dsj-2017-018} } + @article{glaser_governing_2016, title = {Governing {Science}: {How} {Science} {Policy} {Shapes} {Research} {Content}}, volume = {57}, @@ -1170,7 +1181,6 @@ @article{glaser_governing_2016 pages = {117--168} } - @article{goben2020, title = {Open data repositories: Current risks and opportunities | Goben | College & Research Libraries News}, author = {Goben, Abigail and Sandusky, Robert J.}, @@ -1240,6 +1250,7 @@ @article{gordon2021 langid = {en} } + @article{gormally2012, title = {Developing a Test of Scientific Literacy Skills (TOSLS): Measuring Undergraduates{\textquoteright} Evaluation of Scientific Information and Arguments}, author = {Gormally, Cara and Brickman, Peggy and Lutz, Mary}, @@ -1263,7 +1274,6 @@ @misc{goyal_causal_2024 year = {2024} } - @article{grimme, title = {The State of Open Monographs}, author = {Grimme, Sara and Holland, Cathy and Potter, Peter and Taylor, Mike and Watkinson, Charles}, @@ -1599,6 +1609,7 @@ @article{keller2014 langid = {en} } + @article{khan2022, title = {Open science failed to penetrate academic hiring practices: a cross-sectional study}, author = {Khan, Hassan and Almoli, Elham and Franco, Marina Christ and Moher, David}, @@ -1613,7 +1624,6 @@ @article{khan2022 langid = {en} } - @article{klebel_academic_2025, title = {The academic impact of {Open} {Science}: a scoping review}, volume = {12}, @@ -2081,6 +2091,7 @@ @book{monitori2021 langid = {eng} } + @article{munafò2017, title = {A manifesto for reproducible science}, author = {{Munafò}, Marcus R. and Nosek, Brian A. and Bishop, Dorothy V. M. and Button, Katherine S. and Chambers, Christopher D. and Percie Du Sert, Nathalie and Simonsohn, Uri and Wagenmakers, Eric-Jan and Ware, Jennifer J. and Ioannidis, John P. A.}, @@ -2096,7 +2107,6 @@ @article{munafò2017 langid = {en} } - @article{munafò2018, title = {Robust research needs many lines of evidence}, author = {{Munafò}, Marcus R. and Smith, George Davey}, @@ -2142,6 +2152,7 @@ @article{nielsen2023 doi = {10.1038/s41598-023-33102-5} } + @inbook{norris2014, title = {Conceptions of Scientific Literacy: Identifying and Evaluating Their Programmatic Elements}, author = {Norris, Stephen P. and Phillips, Linda M. and Burns, David P.}, @@ -2172,7 +2183,6 @@ @article{nosek2015 note = {Publisher: American Association for the Advancement of Science} } - @article{nosek2022, title = {Replicability, Robustness, and Reproducibility in Psychological Science}, author = {Nosek, Brian A. and Hardwicke, Tom E. and Moshontz, Hannah and Allard, {Aurélien} and Corker, Katherine S. and Dreber, Anna and Fidler, Fiona and Hilgard, Joe and Kline Struhl, Melissa and Nuijten, {Michèle B.} and Rohrer, Julia M. and Romero, Felipe and Scheel, Anne M. and Scherer, Laura D. and {Schönbrodt}, Felix D. and Vazire, Simine}, @@ -2611,6 +2621,7 @@ @inbook{roberts2013 url = {https://api.taylorfrancis.com/content/chapters/edit/download?identifierName=doi&identifierValue=10.4324/9780203824696-29&type=chapterpdf} } + @inbook{roberts2013a, title = {Scientific literacy/science literacy}, author = {Roberts, Douglas A.}, @@ -2711,6 +2722,7 @@ @article{ross-hellauer2022 } + @techreport{ruiter2023, title = {Automatically Finding and Categorizing Replication Studies}, author = {Ruiter, Bob de}, @@ -2722,7 +2734,6 @@ @techreport{ruiter2023 } - @article{schmidt2009, title = {Shall we Really do it Again? The Powerful Concept of Replication is Neglected in the Social Sciences}, author = {Schmidt, Stefan}, @@ -2739,6 +2750,7 @@ @article{schmidt2009 } + @article{schnog2021, author = {Schnog, J.-J. B. and Samson, M. J. and Gans, R. O. B. and Duits, A. J.}, title = {An urgent call to raise the bar in oncology}, @@ -2751,7 +2763,6 @@ @article{schnog2021 } - @article{schoenmakers2010, title = {The technological origins of radical inventions}, author = {Schoenmakers, Wilfred and Duysters, Geert}, @@ -2765,7 +2776,6 @@ @article{schoenmakers2010 note = {Publisher: Elsevier} } - @article{schulz, title = {A network-based citation indicator of scientific performance}, author = {Schulz, Christian and Uzzi, Brian and Helbing, Dirk and Woolley-Meza, Olivia}, @@ -2773,6 +2783,7 @@ @article{schulz doi = {10.48550/arXiv.1807.04712} } + @article{shirk2012, title = {Public Participation in Scientific Research: a Framework for Deliberate Design}, author = {Shirk, Jennifer L. and Ballard, Heidi L. and Wilderman, Candie C. and Phillips, Tina and Wiggins, Andrea and Jordan, Rebecca and McCallie, Ellen and Minarchek, Matthew and Lewenstein, Bruce V. and Krasny, Marianne E. and Bonney, Rick}, @@ -2785,7 +2796,6 @@ @article{shirk2012 note = {Publisher: Resilience Alliance Inc.} } - @book{smaldino2023, title = {Modeling social behavior: mathematical and agent-based models of social dynamics and cultural evolution}, author = {Smaldino, Paul E.}, @@ -2795,6 +2805,7 @@ @book{smaldino2023 address = {Princeton} } + @techreport{soyer_what_2021, title = {What is societal impact of research? {A} literature review}, shorttitle = {What is societal impact of research?}, @@ -2807,7 +2818,6 @@ @techreport{soyer_what_2021 file = {PDF:/home/vtraag/Zotero/storage/ACU7XAP5/Literature-Review_Societal-Research-Impact.pdf:application/pdf} } - @techreport{sparceurope2019, title = {Using open and FAIR data to increase research efficiency}, author = {SPARC Europe}, @@ -2871,6 +2881,8 @@ @article{sugimoto2011 url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.21568} } + + @techreport{sveinsdottir2021, title = {An Analysis of Open Science Policies in Europe, v7}, author = {Sveinsdottir, Thordis and Davidson, Joy and Proudman, Vanessa}, @@ -2882,7 +2894,6 @@ @techreport{sveinsdottir2021 } - @article{szomszor2022, title = {Overton: A bibliometric database of policy document citations}, author = {Szomszor, Martin and Adie, Euan}, @@ -2898,6 +2909,7 @@ @article{szomszor2022 } + @book{tashakkori2021, title = {Foundations of mixed methods research: integrating quantitative and qualitative approaches in the social and behavioral sciences}, author = {Tashakkori, Abbas and Johnson, R. Burke and Teddlie, Charles}, @@ -2909,7 +2921,6 @@ @book{tashakkori2021 } - @article{tattersall2018, title = {What Can Altmetric.com Tell Us About Policy Citations of Research? An Analysis of Altmetric.com Data for Research Articles from the University of Sheffield}, author = {Tattersall, Andy and Carroll, Christopher}, @@ -2924,7 +2935,6 @@ @article{tattersall2018 langid = {English} } - @article{tennant2016, title = {The academic, economic and societal impacts of Open Access: an evidence-based review}, author = {Tennant, Jonathan P. and Waldner, {François} and Jacques, Damien C. and Masuzzo, Paola and Collister, Lauren B. and Hartgerink, Chris H. J.}, @@ -2935,6 +2945,7 @@ @article{tennant2016 url = {https://f1000research.com/articles/5-632} } + @article{tiokhin_shifting_2023, title = {Shifting the {Level} of {Selection} in {Science}}, issn = {1745-6916}, @@ -2949,7 +2960,6 @@ @article{tiokhin_shifting_2023 pages = {17456916231182568} } - @article{tomkins_reviewer_2017, title = {Reviewer bias in single- versus double-blind peer review}, volume = {114}, @@ -3123,6 +3133,16 @@ @article{waltman2011 note = {PMID: 21654898} } +@article{Waltman2012ANM, + title = {A new methodology for constructing a publication-level classification system of science}, + author = {Ludo Waltman and Nees Jan van Eck}, + journal = {J. Assoc. Inf. Sci. Technol.}, + year = {2012}, + volume = {63}, + pages = {2378-2392}, + url = {https://api.semanticscholar.org/CorpusID:15589099} +} + @article{waltman2013, title = {On the calculation of percentile-based bibliometric indicators}, author = {Waltman, Ludo and Schreiber, Michael}, @@ -3322,6 +3342,16 @@ @article{wuchty2007 langid = {en} } +@article{Yan2014ResearchDM, + title = {Research dynamics: Measuring the continuity and popularity of research topics}, + author = {Erjia Yan}, + journal = {J. Informetrics}, + year = {2014}, + volume = {8}, + pages = {98-110}, + url = {https://api.semanticscholar.org/CorpusID:35965754} +} + @article{yarkoni2019, title = {The Generalizability Crisis}, author = {Yarkoni, Tal}, @@ -3386,33 +3416,3 @@ @inproceedings{zeleti2014 doi = {10.1007/978-3-662-44745-1_50}, address = {Berlin, Heidelberg} } - -@article{Cobo2011AnAF, - title={An approach for detecting, quantifying, and visualizing the evolution of a research field: A practical application to the Fuzzy Sets Theory field}, - author={M.J. Cobo and Antonio Gabriel L{\'o}pez-Herrera and Enrique Herrera-Viedma and Francisco Herrera}, - journal={J. Informetrics}, - year={2011}, - volume={5}, - pages={146-166}, - url={https://api.semanticscholar.org/CorpusID:9814348} -} - -@article{Waltman2012ANM, - title={A new methodology for constructing a publication-level classification system of science}, - author={Ludo Waltman and Nees Jan van Eck}, - journal={J. Assoc. Inf. Sci. Technol.}, - year={2012}, - volume={63}, - pages={2378-2392}, - url={https://api.semanticscholar.org/CorpusID:15589099} -} - -@article{Yan2014ResearchDM, - title={Research dynamics: Measuring the continuity and popularity of research topics}, - author={Erjia Yan}, - journal={J. Informetrics}, - year={2014}, - volume={8}, - pages={98-110}, - url={https://api.semanticscholar.org/CorpusID:35965754} -}