From 0982cde6c19ade55e2d122151cd8a33c10c9e269 Mon Sep 17 00:00:00 2001 From: orin Date: Fri, 22 May 2026 11:49:34 +0800 Subject: [PATCH] add benchmark thresholds and positioning docs --- .gitignore | 8 ++ README.md | 36 ++++++--- benchmarks/README.md | 27 +++++++ benchmarks/manifest.schema.json | 96 +++++++++++++++++++++++ benchmarks/sample_manifest.json | 41 ++++++++++ benchmarks/thresholds/golden_minimum.json | 12 +++ docs/financial_fact_platform_roadmap.md | 22 +++--- scripts/eval.py | 57 ++++++++++++++ tests/test_benchmark_manifest.py | 14 ++++ tests/test_eval_script.py | 69 +++++++++++++++- 10 files changed, 357 insertions(+), 25 deletions(-) create mode 100644 benchmarks/README.md create mode 100644 benchmarks/manifest.schema.json create mode 100644 benchmarks/sample_manifest.json create mode 100644 benchmarks/thresholds/golden_minimum.json create mode 100644 tests/test_benchmark_manifest.py diff --git a/.gitignore b/.gitignore index b02013e..eb00336 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,14 @@ __pycache__/ data/ tmpclaude-* +# Benchmark data policy: commit manifests/schemas/thresholds, keep raw/private data local. +benchmarks/raw/ +benchmarks/private/ +benchmarks/**/*.pdf +benchmarks/**/*.xlsx +benchmarks/**/*.xls +benchmarks/**/labels.private.json + # Example runtime artifacts examples/**/output/ examples/**/fixtures/*.pdf diff --git a/README.md b/README.md index ab0d696..7c515a1 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,14 @@ # Jetbot -Jetbot is a financial report analysis platform that turns PDF filings into structured financial statements, key notes, risk signals, event-study outputs, and trader-style summaries. It combines PDF extraction, validation, LLM orchestration, a FastAPI backend, and a Vue dashboard in one repository. +Jetbot is a Filing-to-Model Copilot and Financial Fact Platform for evidence-backed financial report extraction. It turns PDF filings into canonical financial facts, structured statements, key notes, risk signals, event-study outputs, and analyst-ready summaries. -It is designed for teams that need a single workflow to ingest reports, inspect extracted evidence, and ship the results through an API, a CLI, or a browser UI. +It is designed for teams that need a single workflow to ingest reports, inspect source evidence, review and correct extracted facts, and ship the results through an API, a CLI, exports, or a browser UI. ## Highlights -- End-to-end PDF pipeline for raw text, tables, statements, notes, and report generation. +- End-to-end PDF pipeline for raw text, tables, statements, notes, facts, and report generation. +- Canonical financial fact layer with page/table/cell evidence metadata for review and downstream exports. +- Evaluation runner with machine-readable reports and configurable quality thresholds. - Works in mock mode out of the box, with optional OpenAI and Anthropic model routing. - Vue 3 dashboard for reviewing original PDFs alongside extraction and analysis outputs. - Docker-first local stack with API, worker, Redis, PostgreSQL, and MinIO. @@ -17,13 +19,13 @@ It is designed for teams that need a single workflow to ingest reports, inspect ```mermaid flowchart LR - A[Financial PDF] --> B[PDF extraction and OCR] - B --> C[Normalization and validation] - C --> D[LLM enrichment and report generation] - C --> E[Risk signals and event study] - D --> F[FastAPI and CLI] - E --> F - F --> G[Vue dashboard at /ui] + A[Financial Filing PDF] --> B[PDF extraction and OCR] + B --> C[Statements and canonical facts] + C --> D[Evidence and validation] + D --> E[Review, API, and exports] + D --> F[Risk signals and analyst reports] + E --> G[Vue dashboard at /ui] + F --> G ``` ## Quick Start @@ -81,7 +83,7 @@ After startup, the main entry points are: | Surface | URL / Command | Notes | | --- | --- | --- | | Web UI | `http://127.0.0.1:18000/ui/` | Review uploaded PDFs, tables, statements, signals, and generated reports | -| API | `http://127.0.0.1:18000/v1` | Programmatic ingestion and retrieval | +| API | `http://127.0.0.1:18000/v1` | Programmatic ingestion and retrieval, including canonical facts | | OpenAPI docs | `http://127.0.0.1:18000/docs` | Interactive API explorer | | Health | `http://127.0.0.1:18000/health` | Liveness probe | | Metrics | `http://127.0.0.1:18000/metrics` | Prometheus endpoint | @@ -153,6 +155,7 @@ pip install -e ".[all]" ```bash make test make eval +python scripts/eval.py --thresholds benchmarks/thresholds/golden_minimum.json make fmt make lint make typecheck @@ -164,12 +167,19 @@ The repository is organized around a small number of clear surfaces: - `src/api/` for HTTP entry points and application wiring - `src/pdf/` for extraction, rendering, tables, and OCR -- `src/finance/` for schemas, normalization, validation, and signal logic +- `src/finance/` for facts, normalization, validation, and signal logic - `src/agent/` for pipeline orchestration and state handling - `src/market/` for event-study analysis and market providers - `web/` for the Vue 3 dashboard - `tests/` for API, storage, pipeline, frontend-adjacent, and integration coverage -- `docs/` for architecture, branch protection, and project notes +- `benchmarks/` for benchmark manifest schemas, threshold configs, and non-sensitive sample manifests +- `docs/` for architecture, branch protection, roadmap, and project notes + +## Benchmark Data Policy + +Benchmark manifests, anonymized labels, synthetic fixtures, schemas, and threshold configs can be committed. Raw third-party or proprietary PDFs, private labels, customer files, and generated benchmark artifacts must stay out of git. + +Use `benchmarks/raw/` or `benchmarks/private/` for local-only datasets. Those paths are ignored by git. Store only stable metadata, expected facts, expected evidence pointers, and licensing notes in committed manifests. ## Contributing diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..33df6b6 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,27 @@ +# Benchmark Manifests + +This directory stores committed benchmark metadata for Jetbot evaluation. It is for manifests, schemas, anonymized labels, synthetic fixtures, and quality threshold configs only. + +Do commit: + +- `manifest.schema.json` +- anonymized benchmark manifests +- synthetic fixture metadata +- expected facts, expected evidence pointers, expected note/risk labels +- threshold configs under `thresholds/` + +Do not commit: + +- raw third-party or proprietary PDFs +- private customer reports +- non-anonymized analyst labels +- generated eval outputs +- files under `benchmarks/raw/` or `benchmarks/private/` + +Run the current golden evaluation gate with: + +```bash +python scripts/eval.py --thresholds benchmarks/thresholds/golden_minimum.json +``` + +Real PDF benchmark manifests should point to local-only files through relative paths such as `raw/company-2025-10k.pdf`. Those raw files are intentionally ignored by git. \ No newline at end of file diff --git a/benchmarks/manifest.schema.json b/benchmarks/manifest.schema.json new file mode 100644 index 0000000..c84d29d --- /dev/null +++ b/benchmarks/manifest.schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/magic-alt/jetbot/benchmarks/manifest.schema.json", + "title": "Jetbot Benchmark Manifest", + "type": "object", + "additionalProperties": false, + "required": ["schema_version", "benchmark_id", "name", "cases"], + "properties": { + "schema_version": {"type": "integer", "const": 1}, + "benchmark_id": {"type": "string", "minLength": 1}, + "name": {"type": "string", "minLength": 1}, + "description": {"type": "string"}, + "data_policy": { + "type": "object", + "additionalProperties": false, + "required": ["raw_files_committed", "label_policy"], + "properties": { + "raw_files_committed": {"type": "boolean", "const": false}, + "label_policy": {"type": "string", "enum": ["synthetic", "anonymized", "private"]}, + "notes": {"type": "string"} + } + }, + "cases": { + "type": "array", + "minItems": 1, + "items": {"$ref": "#/$defs/case"} + } + }, + "$defs": { + "case": { + "type": "object", + "additionalProperties": false, + "required": ["case_id", "source", "expected_facts"], + "properties": { + "case_id": {"type": "string", "minLength": 1}, + "company": {"type": "string"}, + "ticker": {"type": "string"}, + "filing_type": {"type": "string"}, + "period_end": {"type": "string", "format": "date"}, + "source": { + "type": "object", + "additionalProperties": false, + "required": ["type", "path"], + "properties": { + "type": {"type": "string", "enum": ["synthetic", "pdf", "html", "xbrl"]}, + "path": {"type": "string", "minLength": 1}, + "license": {"type": "string"}, + "sha256": {"type": "string"} + } + }, + "expected_facts": { + "type": "array", + "items": {"$ref": "#/$defs/fact"} + }, + "expected_notes": { + "type": "array", + "items": {"type": "string"} + }, + "expected_risk_categories": { + "type": "array", + "items": {"type": "string"} + } + } + }, + "fact": { + "type": "object", + "additionalProperties": false, + "required": ["statement_type", "concept", "value"], + "properties": { + "statement_type": {"type": "string", "enum": ["income", "balance", "cashflow", "note", "other"]}, + "concept": {"type": "string", "minLength": 1}, + "label": {"type": "string"}, + "value": {"type": "number"}, + "unit": {"type": "string"}, + "currency": {"type": "string"}, + "period_end": {"type": "string", "format": "date"}, + "evidence": { + "type": "array", + "items": {"$ref": "#/$defs/evidence"} + } + } + }, + "evidence": { + "type": "object", + "additionalProperties": false, + "required": ["page"], + "properties": { + "page": {"type": "integer", "minimum": 1}, + "table_id": {"type": "string"}, + "row": {"type": "integer", "minimum": 0}, + "col": {"type": "integer", "minimum": 0}, + "quote": {"type": "string"} + } + } + } +} \ No newline at end of file diff --git a/benchmarks/sample_manifest.json b/benchmarks/sample_manifest.json new file mode 100644 index 0000000..f4f44c7 --- /dev/null +++ b/benchmarks/sample_manifest.json @@ -0,0 +1,41 @@ +{ + "schema_version": 1, + "benchmark_id": "synthetic-smoke-v1", + "name": "Synthetic Smoke Benchmark", + "description": "A committed example manifest showing the expected shape for benchmark metadata. It does not reference real proprietary files.", + "data_policy": { + "raw_files_committed": false, + "label_policy": "synthetic", + "notes": "Use synthetic or anonymized labels in git. Keep real PDFs under ignored local paths." + }, + "cases": [ + { + "case_id": "synthetic-income-001", + "company": "Example Co", + "ticker": "EXM", + "filing_type": "10-Q", + "period_end": "2025-12-31", + "source": { + "type": "synthetic", + "path": "tests/golden/conftest.py", + "license": "synthetic" + }, + "expected_facts": [ + { + "statement_type": "income", + "concept": "revenue", + "label": "Revenue", + "value": 100.0, + "unit": "USD millions", + "currency": "USD", + "period_end": "2025-12-31", + "evidence": [ + {"page": 1, "quote": "Revenue 100"} + ] + } + ], + "expected_notes": ["other"], + "expected_risk_categories": [] + } + ] +} \ No newline at end of file diff --git a/benchmarks/thresholds/golden_minimum.json b/benchmarks/thresholds/golden_minimum.json new file mode 100644 index 0000000..e35854f --- /dev/null +++ b/benchmarks/thresholds/golden_minimum.json @@ -0,0 +1,12 @@ +{ + "schema_version": 1, + "description": "Initial non-regression thresholds for the synthetic golden suite. Tighten these as extraction quality improves.", + "min_metrics": { + "n_cases": 5, + "avg_source_ref_completeness": 1.0, + "avg_signal_category_recall": 0.8, + "avg_note_type_recall": 0.6, + "avg_fact_value_accuracy": 0.08, + "avg_fact_source_ref_completeness": 0.34 + } +} \ No newline at end of file diff --git a/docs/financial_fact_platform_roadmap.md b/docs/financial_fact_platform_roadmap.md index 2da881a..263a301 100644 --- a/docs/financial_fact_platform_roadmap.md +++ b/docs/financial_fact_platform_roadmap.md @@ -79,7 +79,7 @@ Jetbot 当前已经具备较完整的财报 PDF Agent MVP 能力:PDF 上传、 ## 4. 已完成的第一实现切片 -本路线图的第一切片已经在当前分支 `feat/financial-fact-foundation` 中实现,目标是为后续人工复核、导出和 benchmark 建立事实层底座。 +本路线图的第一切片已通过 PR12 合并到 `main`,目标是为后续人工复核、导出和 benchmark 建立事实层底座。 ### 4.1 Schema 与证据模型 @@ -162,7 +162,7 @@ Jetbot 当前已经具备较完整的财报 PDF Agent MVP 能力:PDF 上传、 - 文档和 README 中明确 Jetbot 的下一阶段定位。 - 每个 P0 feature 都能映射到质量指标。 -- 不把真实敏感 PDF 提交到仓库。 +- 不把真实敏感 PDF 提交到仓库;真实样本只保存在本地或私有存储,仓库只提交 manifest、匿名标签、合成 fixture、schema 和阈值配置。 ### Phase 1:Benchmark 与 Eval CI,Week 1-2 @@ -192,6 +192,7 @@ Jetbot 当前已经具备较完整的财报 PDF Agent MVP 能力:PDF 上传、 验收标准: - `python scripts/eval.py --output-dir data/eval-dev` 可生成报告。 +- `python scripts/eval.py --thresholds benchmarks/thresholds/golden_minimum.json` 可作为质量门槛,指标低于阈值时返回非 0。 - 报告包含 document-level 与 aggregate metrics。 - synthetic golden gate 可稳定在 CI 中运行。 - real PDF benchmark 可本地运行,且不会把敏感样本提交到 git。 @@ -574,13 +575,14 @@ docker compose up --build ## 10. 下一步推荐执行顺序 -1. 完成 correction API 和 effective facts。 -2. 在前端增加 facts tab 或 review panel。 -3. 给 `PdfViewer` 增加 bbox overlay。 -4. 给 `EvidenceLink` 增加 row/col/bbox payload。 -5. 增加 Excel/CSV/JSON export。 -6. 扩展 benchmark manifest 和 threshold gate。 -7. 开始 table router protocol。 -8. 再接 SEC/XBRL/HTML ingestion。 +1. 收口 Phase 0:README/路线图正式定位为 Filing-to-Model Copilot / Financial Fact Platform,并文档化 benchmark 数据政策。 +2. 完成 Phase 1 评测门槛:benchmark manifest schema、样例 manifest、threshold 配置和 eval gate。 +3. 完成 correction API 和 effective facts。 +4. 在前端增加 facts tab 或 review panel。 +5. 给 `PdfViewer` 增加 bbox overlay。 +6. 给 `EvidenceLink` 增加 row/col/bbox payload。 +7. 增加 Excel/CSV/JSON export。 +8. 开始 table router protocol。 +9. 再接 SEC/XBRL/HTML ingestion。 这一路线的判断标准很简单:每增加一个能力,都必须让 facts 更准确、证据更可审计、复核更省时间、输出更能进入真实 analyst workflow。 \ No newline at end of file diff --git a/scripts/eval.py b/scripts/eval.py index e5747f6..204e443 100644 --- a/scripts/eval.py +++ b/scripts/eval.py @@ -17,6 +17,7 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace: parser = argparse.ArgumentParser(description="Run Jetbot financial extraction evaluation.") parser.add_argument("--output-dir", default=str(DEFAULT_OUTPUT_DIR), help="Directory for eval artifacts.") + parser.add_argument("--thresholds", help="Optional JSON file with min_metrics/max_metrics quality gates.") parser.add_argument("--skip-pytest", action="store_true", help="Skip pytest golden gate and only compute metrics.") parser.add_argument("--allow-real-llm", action="store_true", help="Do not force the mock LLM provider.") return parser.parse_args(argv) @@ -32,11 +33,13 @@ def main(argv: Sequence[str] | None = None) -> int: pytest_result = None if args.skip_pytest else _run_pytest_gate() case_results = _run_golden_cases(output_dir) metrics = _compute_metrics(case_results) + threshold_results = evaluate_thresholds(metrics, load_thresholds(args.thresholds) if args.thresholds else None) finished_at = _utc_now() report = build_eval_report( metrics=metrics, case_results=case_results, pytest_result=pytest_result, + threshold_results=threshold_results, started_at=started_at, finished_at=finished_at, ) @@ -44,6 +47,8 @@ def main(argv: Sequence[str] | None = None) -> int: print(render_markdown_report(report)) if pytest_result and pytest_result["exit_code"] != 0: return int(pytest_result["exit_code"]) + if threshold_results["status"] == "failed": + return 2 return 0 @@ -52,12 +57,16 @@ def build_eval_report( metrics: dict[str, Any], case_results: list[dict[str, Any]], pytest_result: dict[str, Any] | None, + threshold_results: dict[str, Any] | None = None, started_at: str, finished_at: str, ) -> dict[str, Any]: status = "passed" if pytest_result and pytest_result["exit_code"] != 0: status = "failed" + threshold_results = threshold_results or {"status": "skipped", "checks": []} + if threshold_results["status"] == "failed": + status = "failed" return { "schema_version": 1, "suite": "golden", @@ -66,6 +75,7 @@ def build_eval_report( "finished_at": finished_at, "metrics": metrics, "cases": [_case_summary(case) for case in case_results], + "thresholds": threshold_results, "pytest": pytest_result, } @@ -84,6 +94,14 @@ def render_markdown_report(report: dict[str, Any]) -> str: ] for key, value in metrics.items(): lines.append(f"- `{key}`: {_format_metric(value)}") + thresholds = report.get("thresholds", {"status": "skipped", "checks": []}) + lines.extend(["", "## Thresholds", "", f"Status: **{thresholds['status']}**"]) + for check in thresholds.get("checks", []): + comparator = ">=" if check["kind"] == "min" else "<=" + lines.append( + f"- `{check['metric']}`: {_format_metric(check.get('actual'))} " + f"{comparator} {_format_metric(check['threshold'])} -> {check['status']}" + ) lines.extend(["", "## Cases", ""]) for case in report["cases"]: lines.append( @@ -102,6 +120,45 @@ def write_eval_report(report: dict[str, Any], output_dir: Path) -> None: (output_dir / "eval_report.md").write_text(render_markdown_report(report), encoding="utf-8") +def load_thresholds(path: str | None) -> dict[str, Any] | None: + if not path: + return None + return json.loads(Path(path).read_text(encoding="utf-8")) + + +def evaluate_thresholds(metrics: dict[str, Any], thresholds: dict[str, Any] | None) -> dict[str, Any]: + if not thresholds: + return {"status": "skipped", "checks": []} + + checks: list[dict[str, Any]] = [] + failed = False + for metric, threshold in thresholds.get("min_metrics", {}).items(): + actual = metrics.get(metric) + passed = _is_number(actual) and float(actual) >= float(threshold) + failed = failed or not passed + checks.append(_threshold_check("min", metric, actual, threshold, passed)) + for metric, threshold in thresholds.get("max_metrics", {}).items(): + actual = metrics.get(metric) + passed = _is_number(actual) and float(actual) <= float(threshold) + failed = failed or not passed + checks.append(_threshold_check("max", metric, actual, threshold, passed)) + return {"status": "failed" if failed else "passed", "checks": checks} + + +def _threshold_check(kind: str, metric: str, actual: Any, threshold: Any, passed: bool) -> dict[str, Any]: + return { + "kind": kind, + "metric": metric, + "actual": actual, + "threshold": threshold, + "status": "passed" if passed else "failed", + } + + +def _is_number(value: Any) -> bool: + return isinstance(value, int | float) and not isinstance(value, bool) + + def _case_summary(case: dict[str, Any]) -> dict[str, Any]: return { "name": case["name"], diff --git a/tests/test_benchmark_manifest.py b/tests/test_benchmark_manifest.py new file mode 100644 index 0000000..3619cd5 --- /dev/null +++ b/tests/test_benchmark_manifest.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +import json +from pathlib import Path + + +def test_benchmark_manifest_schema_and_sample_are_valid_json() -> None: + schema = json.loads(Path("benchmarks/manifest.schema.json").read_text(encoding="utf-8")) + sample = json.loads(Path("benchmarks/sample_manifest.json").read_text(encoding="utf-8")) + + assert schema["title"] == "Jetbot Benchmark Manifest" + assert sample["schema_version"] == schema["properties"]["schema_version"]["const"] + assert sample["data_policy"]["raw_files_committed"] is False + assert sample["cases"][0]["expected_facts"][0]["concept"] == "revenue" \ No newline at end of file diff --git a/tests/test_eval_script.py b/tests/test_eval_script.py index 481c88d..28eb265 100644 --- a/tests/test_eval_script.py +++ b/tests/test_eval_script.py @@ -1,8 +1,10 @@ from __future__ import annotations +import json from pathlib import Path -from scripts.eval import build_eval_report, render_markdown_report, write_eval_report +import scripts.eval as eval_script +from scripts.eval import build_eval_report, evaluate_thresholds, render_markdown_report, write_eval_report def test_eval_report_writer_creates_json_and_markdown(tmp_path: Path) -> None: @@ -10,6 +12,7 @@ def test_eval_report_writer_creates_json_and_markdown(tmp_path: Path) -> None: metrics={"n_cases": 1, "avg_fact_value_accuracy": 1.0}, case_results=[{"name": "case-a", "fact_count": 2, "statement_types": ["income"], "errors": []}], pytest_result={"exit_code": 0, "command": ["pytest"], "stdout": "", "stderr": ""}, + threshold_results={"status": "passed", "checks": []}, started_at="2026-01-01T00:00:00+00:00", finished_at="2026-01-01T00:00:01+00:00", ) @@ -26,8 +29,70 @@ def test_eval_report_marks_pytest_failure() -> None: metrics={"n_cases": 0}, case_results=[], pytest_result={"exit_code": 1, "command": ["pytest"], "stdout": "", "stderr": "failed"}, + threshold_results={"status": "skipped", "checks": []}, started_at="2026-01-01T00:00:00+00:00", finished_at="2026-01-01T00:00:01+00:00", ) - assert report["status"] == "failed" \ No newline at end of file + assert report["status"] == "failed" + + +def test_thresholds_pass_when_metrics_meet_minimums() -> None: + result = evaluate_thresholds( + {"avg_fact_value_accuracy": 0.9, "n_cases": 5}, + {"min_metrics": {"avg_fact_value_accuracy": 0.8, "n_cases": 5}}, + ) + + assert result["status"] == "passed" + assert all(check["status"] == "passed" for check in result["checks"]) + + +def test_thresholds_fail_when_metric_is_below_minimum() -> None: + result = evaluate_thresholds( + {"avg_fact_value_accuracy": 0.7}, + {"min_metrics": {"avg_fact_value_accuracy": 0.8}}, + ) + + assert result["status"] == "failed" + assert result["checks"][0] == { + "kind": "min", + "metric": "avg_fact_value_accuracy", + "actual": 0.7, + "threshold": 0.8, + "status": "failed", + } + + +def test_eval_report_marks_threshold_failure() -> None: + report = build_eval_report( + metrics={"avg_fact_value_accuracy": 0.7}, + case_results=[], + pytest_result={"exit_code": 0, "command": ["pytest"], "stdout": "", "stderr": ""}, + threshold_results={"status": "failed", "checks": []}, + started_at="2026-01-01T00:00:00+00:00", + finished_at="2026-01-01T00:00:01+00:00", + ) + + assert report["status"] == "failed" + + +def test_main_returns_nonzero_when_thresholds_fail(tmp_path: Path, monkeypatch) -> None: + thresholds = tmp_path / "thresholds.json" + thresholds.write_text(json.dumps({"min_metrics": {"avg_fact_value_accuracy": 0.9}}), encoding="utf-8") + + monkeypatch.setattr(eval_script, "_force_mock_llm", lambda: None) + monkeypatch.setattr(eval_script, "_run_golden_cases", lambda output_dir: []) + monkeypatch.setattr(eval_script, "_compute_metrics", lambda case_results: {"avg_fact_value_accuracy": 0.5}) + + exit_code = eval_script.main([ + "--skip-pytest", + "--thresholds", + str(thresholds), + "--output-dir", + str(tmp_path / "out"), + ]) + + assert exit_code == 2 + report = json.loads((tmp_path / "out" / "eval_report.json").read_text(encoding="utf-8")) + assert report["status"] == "failed" + assert report["thresholds"]["checks"][0]["status"] == "failed" \ No newline at end of file