diff --git a/assets/less/cds-rdm/administration/harvester-reports.less b/assets/less/cds-rdm/administration/harvester-reports.less index 12a16ecb..b6d3ee1f 100644 --- a/assets/less/cds-rdm/administration/harvester-reports.less +++ b/assets/less/cds-rdm/administration/harvester-reports.less @@ -53,4 +53,58 @@ color: #999; margin-top: 0.2em; } -} \ No newline at end of file +} + +.harvester-report-actions { + display: flex; + flex-direction: row; + flex-wrap: nowrap; + align-items: center; + justify-content: flex-end; + gap: 0.5em; + width: 100%; + box-sizing: border-box; + padding-right: 0.25rem; + + > .ui.button { + flex: 0 0 auto; + margin: 0 !important; + } +} + +.ui.grid .column.harvester-report-actions-col { + padding-right: 1.25rem; +} + +.harvester-run-report-meta { + margin-top: 0.15rem !important; +} + +.harvester-run-log-grid { + margin-top: 1rem; +} + +.harvester-run-success-message { + white-space: normal; + text-align: left; +} + +// Job run report: wrap long lines; scroll region height (matches admin log-table intent) +.harvester-run-log-report { + width: 100%; + + .harvester-run-log-table.log-table { + max-height: calc(100vh - 11rem); + overflow-x: hidden; + } + + .harvester-run-log-segment { + overflow-x: hidden; + } + + .log-line .log-message { + white-space: pre-wrap; + overflow-wrap: anywhere; + word-break: break-word; + } +} diff --git a/site/cds_rdm/assets/semantic-ui/js/cds_rdm/administration/harvesterReports/DownloadButton.js b/site/cds_rdm/assets/semantic-ui/js/cds_rdm/administration/harvesterReports/DownloadButton.js index 8ca4c67a..0b981fbd 100644 --- a/site/cds_rdm/assets/semantic-ui/js/cds_rdm/administration/harvesterReports/DownloadButton.js +++ b/site/cds_rdm/assets/semantic-ui/js/cds_rdm/administration/harvesterReports/DownloadButton.js @@ -5,39 +5,39 @@ // under the terms of the GPL-2.0 License; see LICENSE file for more details. import React from "react"; -import { withState } from "react-searchkit"; import { Button, Icon } from "semantic-ui-react"; import { i18next } from "@translations/invenio_administration/i18next"; -import { extractRunIdFromQuery } from "./utils"; -const DownloadButtonComponent = ({ currentQueryState }) => { - const domContainer = document.getElementById("invenio-search-config"); - const runs = JSON.parse(domContainer?.dataset.harvesterRuns || "[]"); - - const runId = extractRunIdFromQuery( - currentQueryState.queryString || "", - runs - ); - - const handleDownload = () => { - if (!runId) return; - const params = new URLSearchParams({ run_id: runId }); - window.location.href = `/harvester-reports/download?${params.toString()}`; - }; - - return ( +export const DownloadButton = ({ runId }) => ( +
- ); -}; - -export const DownloadButton = withState(DownloadButtonComponent); + +
+); diff --git a/site/cds_rdm/assets/semantic-ui/js/cds_rdm/administration/harvesterReports/SearchBar.js b/site/cds_rdm/assets/semantic-ui/js/cds_rdm/administration/harvesterReports/SearchBar.js index ad37da32..38b549f6 100644 --- a/site/cds_rdm/assets/semantic-ui/js/cds_rdm/administration/harvesterReports/SearchBar.js +++ b/site/cds_rdm/assets/semantic-ui/js/cds_rdm/administration/harvesterReports/SearchBar.js @@ -28,18 +28,15 @@ const SearchBarComponent = ({ updateQueryState, currentQueryState }) => { const { sortOptions, sortOrderDisabled } = useContext(SearchConfigurationContext); - // Derive selected run from the timestamp in the current query — null if user typed a custom range - const runIdFromQuery = extractRunIdFromQuery(currentQueryState.queryString, runs); - const selectedRun = runs.find((r) => r.id === runIdFromQuery) || null; + const [activeRunId, setActiveRunId] = React.useState(() => defaultRun?.id ?? null); - const [inputValue, setInputValue] = React.useState(currentQueryState.queryString || ""); + const runIdFromQuery = extractRunIdFromQuery(currentQueryState.queryString, runs); + const effectiveRunId = runIdFromQuery || activeRunId; + const selectedRun = runs.find((r) => r.id === effectiveRunId) || null; - // Auto-select default run on mount only if there is no existing query - React.useEffect(() => { - if (!currentQueryState.queryString && defaultRun) { - executeSearch(defaultRun, ""); - } - }, []); + const [inputValue, setInputValue] = React.useState( + currentQueryState.queryString || "" + ); const executeSearch = (run, userInput) => { const timestampFilter = buildTimestampFilter(run); @@ -57,9 +54,25 @@ const SearchBarComponent = ({ updateQueryState, currentQueryState }) => { }); }; + React.useEffect(() => { + const q = currentQueryState.queryString || ""; + const fromQuery = extractRunIdFromQuery(q, runs); + if (fromQuery) { + setActiveRunId(fromQuery); + return; + } + if (!q && defaultRun) { + setActiveRunId(defaultRun.id); + executeSearch(defaultRun, ""); + } + }, []); + const onRunChange = (e, { value }) => { + setActiveRunId(value || null); const run = runs.find((r) => r.id === value); - executeSearch(run, ""); + if (run) { + executeSearch(run, ""); + } }; const onBtnSearchClick = () => { @@ -68,6 +81,10 @@ const SearchBarComponent = ({ updateQueryState, currentQueryState }) => { queryString: inputValue, hiddenParams, }); + const id = extractRunIdFromQuery(inputValue, runs); + if (id) { + setActiveRunId(id); + } }; const formatDate = (dateStr) => { @@ -114,7 +131,7 @@ const SearchBarComponent = ({ updateQueryState, currentQueryState }) => { selection placeholder={i18next.t("Select a harvest run...")} options={runOptions} - value={selectedRun?.id || ""} + value={activeRunId || ""} onChange={onRunChange} /> @@ -162,7 +179,7 @@ const SearchBarComponent = ({ updateQueryState, currentQueryState }) => { - +
{i18next.t("Search Logs")}
{ }} />
- + - - + +
diff --git a/site/cds_rdm/harvester_download/resources/resource.py b/site/cds_rdm/harvester_download/resources/resource.py index 52f30fbb..31281b7b 100644 --- a/site/cds_rdm/harvester_download/resources/resource.py +++ b/site/cds_rdm/harvester_download/resources/resource.py @@ -9,11 +9,13 @@ import re import uuid -from datetime import datetime +from datetime import datetime, timezone from flask import Response, current_app, request +from flask_babel import format_datetime, get_timezone from flask_resources import Resource, route from invenio_access.permissions import system_identity +from invenio_i18n import gettext as _ from invenio_jobs.models import Run from invenio_jobs.proxies import current_jobs_logs_service @@ -22,6 +24,45 @@ INSPIRE_HARVESTER_TASK = "process_inspire" +def _format_timestamp(value): + """``Run.started_at`` / ``finished_at``: naive DB datetimes are UTC → user locale.""" + if value is None or value == "": + return "N/A" + if isinstance(value, datetime): + dt = ( + value.replace(tzinfo=timezone.utc) + if value.tzinfo is None + else value + ) + else: + try: + dt = datetime.fromisoformat(str(value).replace("Z", "+00:00")) + except (ValueError, TypeError): + return str(value) + return format_datetime(dt, "yyyy-MM-dd HH:mm", rebase=True) + + +def _format_log_hit_timestamp(value): + """Job log ``timestamp`` from search: zone-less ISO is local wall (like admin RunsLogs).""" + if value is None or value == "": + return "N/A" + if isinstance(value, datetime): + dt = value + else: + try: + dt = datetime.fromisoformat(str(value).replace("Z", "+00:00")) + except (ValueError, TypeError): + return str(value) + if dt.tzinfo is None: + tz = get_timezone() + if hasattr(tz, "localize"): + dt = tz.localize(dt) + else: + dt = dt.replace(tzinfo=tz) + return format_datetime(dt, "yyyy-MM-dd HH:mm", rebase=False) + return format_datetime(dt, "yyyy-MM-dd HH:mm", rebase=True) + + class HarvesterDownloadResource(Resource): """Harvester download resource.""" @@ -32,38 +73,33 @@ def create_url_rules(self): route("GET", routes["download"], self.download), ] - def download(self): - """Download a harvester run's logs as a plain-text ``.log`` file. - - Mirrors the admin job-run page: status header, failure banner, - truncation warning, and task-grouped entries formatted as - ``[yyyy-MM-dd HH:mm] LEVEL message``. - """ - permission = curators_permission - if not permission.can(): - return {"message": "Permission denied"}, 403 - - run_id = request.args.get("run_id", "").strip() + @staticmethod + def _resolve_harvester_run(run_id): + """Return ``(run, None)`` or ``(None, (error_dict, status))``.""" + run_id = (run_id or "").strip() if not run_id: - return {"message": "Missing run_id"}, 400 + return None, ({"message": "Missing run_id"}, 400) try: uuid.UUID(run_id) except ValueError: - return {"message": "Invalid run_id"}, 400 + return None, ({"message": "Invalid run_id"}, 400) run = Run.query.filter_by(id=run_id, parent_run_id=None).one_or_none() if not run: - return {"message": "Run not found"}, 404 - + return None, ({"message": "Run not found"}, 404) if not run.job or run.job.task != INSPIRE_HARVESTER_TASK: - return {"message": "Run is not a harvester run"}, 404 - + return None, ({"message": "Run is not a harvester run"}, 404) + return run, None - max_results = current_app.config.get("JOBS_LOGS_MAX_RESULTS", 2000) + def _fetch_hits(self, run): + """Return ``(hits, total)`` from structured job logs.""" try: result = current_jobs_logs_service.search( system_identity, - params={"q": str(run.id), "sort": "timestamp"}, + params={ + "q": f'"{run.id}"', + "sort": "timestamp", + }, ) hits = list(result.hits) total = result.total or len(hits) @@ -73,19 +109,10 @@ def download(self): ) hits = [] total = 0 + return hits, total - def _format_timestamp(raw): - # Admin UI (RunsLogs.js) format. - if not raw: - return "N/A" - try: - return datetime.fromisoformat( - raw.replace("Z", "+00:00") - ).strftime("%Y-%m-%d %H:%M") - except (ValueError, TypeError): - return raw - - # Group by context.task_id in first-seen order (RunsLogs.js buildLogTree). + def _lines_from_hits(self, hits): + """De-duplicated lines and counts (task-group order, same as before).""" task_groups = {} seen = set() error_count = 0 @@ -93,8 +120,6 @@ def _format_timestamp(raw): for hit in hits: raw_ts = hit.get("timestamp") level = hit.get("level", "INFO") - # Collapse whitespace so multi-line errors render on one line - # (admin UI does the same via ``white-space: normal``). message = re.sub(r"\s+", " ", (hit.get("message") or "")).strip() key = (raw_ts, level, message) if key in seen: @@ -106,35 +131,41 @@ def _format_timestamp(raw): warning_count += 1 task_id = (hit.get("context") or {}).get("task_id") or "unknown" task_groups.setdefault(task_id, []).append( - f"[{_format_timestamp(raw_ts)}] {level} {message}" + f"[{_format_log_hit_timestamp(raw_ts)}] {level} {message}" ) - lines = [line for group in task_groups.values() for line in group] + return lines, error_count, warning_count - header = [] + def _plain_text_log(self, run, lines, total, error_count, warning_count): + """Same plain-text shape as the original download endpoint.""" + max_results = current_app.config.get("JOBS_LOGS_MAX_RESULTS", 2000) status = getattr(run.status, "name", str(run.status)) - header.append(f"Status: {status}") - header.append(f"Started: {_format_timestamp(run.started_at.isoformat())}") + header = [ + f"Status: {status}", + f"Started: {_format_timestamp(run.started_at)}", + ] if run.finished_at: - header.append( - f"Finished: {_format_timestamp(run.finished_at.isoformat())}" - ) + header.append(f"Finished: {_format_timestamp(run.finished_at)}") summary = [] if status in ("FAILED", "PARTIAL_SUCCESS", "SUCCESS"): summary.append( { - "FAILED": "Job failed", - "PARTIAL_SUCCESS": "Job partially succeeded", - "SUCCESS": "Job completed successfully", + "FAILED": _("Job failed"), + "PARTIAL_SUCCESS": _("Job partially succeeded"), + "SUCCESS": _("Job completed successfully"), }[status] ) if run.message: summary.append(run.message) if error_count: - summary.append(f"{error_count} error(s) found in logs below") + summary.append( + _("%(count)s error(s) found in logs below", count=error_count) + ) if warning_count: - summary.append(f"{warning_count} warning(s) found in logs below") + summary.append( + _("%(count)s warning(s) found in logs below", count=warning_count) + ) if summary: header.append("") header.extend(summary) @@ -147,9 +178,53 @@ def _format_timestamp(raw): header.append("=" * 80) logs = "\n".join(header + lines) - if not lines: logs += "\n" + (run.message or "No logs available for this run.\n") + return logs + + def report_template_context(self, run_id): + """Context for the colored HTML report page; errors like ``download``.""" + run, err = self._resolve_harvester_run(run_id) + if err: + return None, err + hits, total = self._fetch_hits(run) + lines, error_count, _unused_warnings = self._lines_from_hits(hits) + status = getattr(run.status, "name", str(run.status)) + + truncation_message = None + if total and total > len(lines): + truncation_message = ( + f"Log results truncated. Too many log results returned ({total}). " + f"Only the most recent {len(lines)} results are shown." + ) + + display_title = (getattr(run, "title", None) or "").strip() or f"Run {run.id}" + ctx = { + "run": run, + "title": display_title, + "status": status, + "started_at": _format_timestamp(run.started_at), + "finished_at": ( + _format_timestamp(run.finished_at) if run.finished_at else None + ), + "truncation_message": truncation_message, + "lines": lines, + "error_count": error_count, + } + return ctx, None + + def download(self): + """Download a harvester run's logs as a plain-text ``.log`` file.""" + if not curators_permission.can(): + return {"message": "Permission denied"}, 403 + + run, err = self._resolve_harvester_run(request.args.get("run_id", "")) + if err: + return err + + hits, total = self._fetch_hits(run) + lines, error_count, warning_count = self._lines_from_hits(hits) + logs = self._plain_text_log(run, lines, total, error_count, warning_count) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"harvester_logs_{run.id}_{timestamp}.log" diff --git a/site/cds_rdm/templates/semantic-ui/cds_rdm/administration/harvester_run_report.html b/site/cds_rdm/templates/semantic-ui/cds_rdm/administration/harvester_run_report.html new file mode 100644 index 00000000..035acae7 --- /dev/null +++ b/site/cds_rdm/templates/semantic-ui/cds_rdm/administration/harvester_run_report.html @@ -0,0 +1,23 @@ +{# +Copyright (C) 2026 CERN. + +CDS-RDM is free software; you can redistribute it and/or modify it +under the terms of the GPL-2.0 License; see LICENSE file for more details. + +Job run report: same admin shell as other administration pages (no custom width wrapper). +#} +{% extends "cds_rdm/administration/admin_base_template.html" %} + +{% block page_title %} +

{{ title }}

+

+ {{ _("Status") }}: {{ status }} + · {{ _("Started") }}: {{ started_at }} + {% if finished_at %}· {{ _("Finished") }}: {{ finished_at }}{% endif %} +

+ +{% endblock page_title %} + +{% block admin_page_content %} +{% include "cds_rdm/harvester_download/report_body.html" %} +{% endblock admin_page_content %} diff --git a/site/cds_rdm/templates/semantic-ui/cds_rdm/harvester_download/report_body.html b/site/cds_rdm/templates/semantic-ui/cds_rdm/harvester_download/report_body.html new file mode 100644 index 00000000..ff89e457 --- /dev/null +++ b/site/cds_rdm/templates/semantic-ui/cds_rdm/harvester_download/report_body.html @@ -0,0 +1,91 @@ +{# +Copyright (C) 2026 CERN. + +CDS-RDM is free software; you can redistribute it and/or modify it +under the terms of the GPL-2.0 License; see LICENSE file for more details. + +Job run report body: layout aligned with invenio_jobs RunsLogs. Presentation-only +logic lives here; resource.py only supplies run, lines, and counts. +#} +
+ {% if truncation_message %} +
+ +
+
{{ _("Log results truncated") }}
+

{{ truncation_message }}

+
+
+ {% endif %} + +
+
+

{{ _("Job run") }}

+
+
+ {% if status == "SUCCESS" %} + + {% elif status == "FAILED" %} + + {% elif status == "RUNNING" %} + + {% elif status == "PARTIAL_SUCCESS" %} + + {% elif status == "CANCELLED" %} + + {% elif status == "QUEUED" %} + + {% else %} + + {% endif %} +
+ {% if started_at and started_at != "N/A" %} +

{{ started_at }}

+ {% if run.started_at and run.finished_at %} +

+ {{ ((run.finished_at - run.started_at).total_seconds() // 60) | int }} {{ _("mins") }} +

+ {% endif %} + {% else %} +

{{ _("Not yet started") }}

+ {% endif %} + {% if run.message and status not in ("FAILED", "PARTIAL_SUCCESS") %} +
+ {{ run.message }} +
+ {% endif %} +
+
+
+
+
+ {% if status in ("FAILED", "PARTIAL_SUCCESS") %} +
+ +
+
+ {% if status == "FAILED" %}{{ _("Job failed") }}{% else %}{{ _("Job partially succeeded") }}{% endif %} +
+ {% if run.message %} +
{{ run.message }}
+ {% endif %} + {% if error_count %} +

{{ _("%(count)s error(s) found in logs below", count=error_count) }}

+ {% endif %} +
+
+ {% endif %} + +
+ {% for line in lines %} +
+ {{ line }} +
+ {% endfor %} + {% if not lines %} +

{{ _("No log lines in this view.") }}

+ {% endif %} +
+
+
+
diff --git a/site/cds_rdm/templates/semantic-ui/invenio_jobs/emails/run_notification.html b/site/cds_rdm/templates/semantic-ui/invenio_jobs/emails/run_notification.html index dcb53f11..29c701d0 100644 --- a/site/cds_rdm/templates/semantic-ui/invenio_jobs/emails/run_notification.html +++ b/site/cds_rdm/templates/semantic-ui/invenio_jobs/emails/run_notification.html @@ -46,7 +46,7 @@

Harvester Actions

- Download Harvester Logs + Download error log

Download all record.publish audit logs from this harvest run @@ -57,7 +57,7 @@

Harvester Actions

- View Harvester Reports + View list of changes

View detailed audit reports for all harvester runs diff --git a/site/cds_rdm/templates/semantic-ui/invenio_jobs/emails/run_notification.txt b/site/cds_rdm/templates/semantic-ui/invenio_jobs/emails/run_notification.txt index 07f278f8..fbabaaba 100644 --- a/site/cds_rdm/templates/semantic-ui/invenio_jobs/emails/run_notification.txt +++ b/site/cds_rdm/templates/semantic-ui/invenio_jobs/emails/run_notification.txt @@ -25,10 +25,10 @@ Harvester Actions: {% set end_time = (run.finished_at | string | replace(' ', 'T')) if run.finished_at else '*' %} {% set timestamp_range = "@timestamp:[" ~ start_time ~ " TO " ~ end_time ~ "]" %} -Download Harvester Logs: +Download error log: {{ config.SITE_UI_URL }}/harvester-reports/download?q={{ timestamp_range | urlencode }}&action=record.publish -View Harvester Reports: +View list of changes: {{ config.SITE_UI_URL }}/administration/harvester-reports?q={{ timestamp_range | urlencode }}&l=list&p=1&s=20&sort=newest {% endif %} diff --git a/site/cds_rdm/views.py b/site/cds_rdm/views.py index fb931943..a2172a71 100644 --- a/site/cds_rdm/views.py +++ b/site/cds_rdm/views.py @@ -8,7 +8,7 @@ """CDS views.""" -from flask import Blueprint, current_app, render_template, url_for +from flask import Blueprint, current_app, jsonify, render_template, url_for from flask_principal import AnonymousIdentity from invenio_access.permissions import any_user from invenio_app_rdm.records_ui.utils import dump_external_resource @@ -45,9 +45,28 @@ def create_cds_clc_sync_bp(app): def create_harvester_download_bp(app): - """Create harvester download blueprint.""" + """Harvester log download blueprint and colored HTML report page.""" + from cds_rdm.administration.permissions import curators_permission + ext = app.extensions["cds-rdm"] - return ext.harvester_download_resource.as_blueprint() + bp = ext.harvester_download_resource.as_blueprint() + + report_bp = Blueprint("cds_rdm_harvester_report_page", __name__) + + @report_bp.route("/administration/harvester-reports//report") + @curators_permission.require(http_exception=403) + def harvester_run_report(run_id): + ctx, err = ext.harvester_download_resource.report_template_context(str(run_id)) + if err: + body, code = err + return jsonify(body), code + return render_template( + "cds_rdm/administration/harvester_run_report.html", + **ctx, + ) + + app.register_blueprint(report_bp) + return bp def inspire_link_render(record):