From 16da34c47712a0bcf465d48e57b3d8f6e75a1750 Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Fri, 24 Apr 2026 16:47:27 +0100 Subject: [PATCH 01/18] Add initial plan for migration --- plan-goatPipelineMigration.prompt.md | 359 +++++++++++++++++++++++++++ 1 file changed, 359 insertions(+) create mode 100644 plan-goatPipelineMigration.prompt.md diff --git a/plan-goatPipelineMigration.prompt.md b/plan-goatPipelineMigration.prompt.md new file mode 100644 index 0000000..8a38553 --- /dev/null +++ b/plan-goatPipelineMigration.prompt.md @@ -0,0 +1,359 @@ +# GoaT Data Import Pipeline Migration Plan + +## TL;DR +Migrate all data fetching from the legacy `goat-data` GitHub Actions workflow to scheduled Prefect-backed updaters in the `data` repo, then wire up parsers and validators to produce import-ready TSV/YAML pairs on S3. Five phases: fetch (Phase 1), parse+validate (Phase 2), switch S3 source (Phase 3), replace import (Phase 4), full pipeline (Phase 5). + +--- + +## Gap Analysis: Updater Coverage + +### Already Implemented (10 updaters) +| Updater | Legacy Equivalent | Schedule | +|---------|------------------|----------| +| `update_ncbi_datasets` | fetch-ncbi-datasets-zip | Daily | +| `update_ncbi_taxonomy` | taxdump download | Weekly | +| `update_ena_taxonomy_extra` | ENA taxonomy API | Weekly | +| `update_genomehubs_taxonomy` | fetch-genomehubs-taxonomy | Daily | +| `update_tolid_prefixes` | ToLID GitLab fetch | Weekly | +| `update_ott_taxonomy` | OTT download | Monthly | +| `update_tol_portal_status` | STS API (replaced) | Daily (orchestrated) | +| `update_tol_genome_notes` | (new source) | Daily (orchestrated) | +| `update_nhm_status_list` | NHM Data Portal API | Weekly | +| `update_boat_config` | GoaT API + Lustre | Daily | + +### Missing — Need New Updaters (8 categories, ~11 updaters) + +| # | Source | Legacy Job | Priority | Schedule | Complexity | +|---|--------|-----------|----------|----------|------------| +| 1 | **BlobToolKit** | fetch-blobtoolkit (Docker `genomehubs parse --btk`) | HIGH | Daily | Medium — API pagination + Docker | +| 2 | **RefSeq Organelles** | fetch-refseq-organelles (FTP + BioPython) | HIGH | Weekly | Medium — FTP + GenBank parsing | +| 3 | **VGP Status** | fetch-from-apis (GitHub YAML) | MEDIUM | Weekly | Low — simple HTTP + YAML parse | +| 4 | **JGI 1KFG** | fetch-from-apis (OAuth REST) | MEDIUM | Weekly | Medium — OAuth token exchange | +| 5 | **Ensembl Metadata** (×6) | fetch-assembly-links (6 JSON endpoints) | MEDIUM | Monthly | Low — HTTP + JSON→TSV, one parameterized updater | +| 6 | **UCSC Assembly Hubs** | fetch-assembly-links | LOW | Monthly | Low — HTTP + text parsing | +| 7 | **Google Sheets Status** (~20+ projects) | fetch-from-apis (R + Python) | HIGH | Weekly | High — rewrite R→Python, normalize tables | +| 8 | **SRA Data** | (parse_sra_data.py) | MEDIUM | Weekly | Medium — NCBI API + XML parsing | + +### Static/Semi-Static Sources (no external fetch needed) +These exist as curated YAML/TSV pairs in `goat-data/sources/` and are uploaded directly to S3: +- **Genomesize/Karyotype** — 25 FILE_ sources (genome size databases, chromosome counts) +- **Conservation** — CITES index (periodically updated manually) +- **UK Legislation** — 9 FILE_ sources (very static) +- **Regional Lists** — 7 FILE_ sources (static geographic lists) +- **Lineages** — ODB10 lineage mappings +- **OTT IDs** — OTT taxonomy mappings +- **ToLIDs** — Tree of Life ID naming + +These should be synced to S3 via a simple `sync_static_sources` utility or manually, not via updaters. + +--- + +## Phase 1: External Data Fetching + +### Goal +All external data fetching implemented as Prefect updaters with scheduled deployments, uploading raw data to S3 and emitting events for downstream parsing. + +### Steps + +#### Group A: API-Based Updaters (parallel development) + +**Step 1: `update_vgp_status` — VGP Status List** +- Fetch GitHub YAML from `https://raw.githubusercontent.com/vgl-hub/genome-portal/master/_data/table_tracker.yml` +- Parse YAML, extract fields: common_name, family, order, scientific_name, status, taxon_id, vgp_phase +- Write TSV to `s3://goat/resources/status-lists/vgp.tsv` +- Schedule: Weekly +- Reuse: `safe_get()` from `flows/lib/utils.py`, `parse_args/shared_args` pattern +- Reference: `goat-data/scripts/api/api_config.py` VGL handlers + +**Step 2: `update_jgi_status` — JGI 1KFG** +- OAuth token exchange: offline_token → access_token via `https://signon.jgi.doe.gov/signon/create` +- Paginated API: `https://gold-ws.jgi.doe.gov/projects?studyGoldId=Gs0000001` +- Write TSV to `s3://goat/resources/status-lists/jgi_1kfg.tsv` +- Schedule: Weekly +- Requires: `JGI_OFFLINE_TOKEN` secret (Prefect Secret block or env var) +- Reference: `goat-data/scripts/jgi_to_tsv.py` +- Bug risk: Legacy code has fragile OAuth flow — add proper token refresh and expiry handling + +**Step 3: `update_ensembl_metadata` — Ensembl Species Metadata (6 databases)** +- Single parameterized updater deployed 6 times with different division parameters +- Divisions: Fungi, Metazoa, Plants, Protists, Vertebrates, Rapid Release +- Fetch JSON from Ensembl REST API endpoints +- Transform JSON→TSV (replace legacy `jq` one-liners with explicit Python) +- Write to `s3://goat/resources/assembly-data/species_metadata_Ensembl{Division}.tsv.gz` +- Schedule: Monthly +- Reference: `goat-data/.github/workflows/fetch-resources.yml` fetch-assembly-links job + +**Step 4: `update_ucsc_assemblies` — UCSC Genome Browser** +- Fetch assembly hub list from UCSC API +- Parse to TSV +- Write to `s3://goat/resources/assembly-data/ucsc_ids.tsv` +- Schedule: Monthly +- Reuse: `safe_get()`, standard arg parsing + +**Step 5: `update_sra_data` — SRA Metadata** +- Fetch from NCBI SRA API (Entrez or BigQuery) +- Parse XML/JSON responses to TSV +- Write to `s3://goat/resources/sra/sra.tsv.gz` +- Schedule: Weekly +- Reference: `goat-data/scripts/parse_sra_data.py` +- Bug risk: Legacy script has hardcoded batch sizes and silent error swallowing + +#### Group B: Complex Updaters (sequential, more effort) + +**Step 6: `update_blobtoolkit` — BlobToolKit Analysis Data** +- Approach A (preferred): Direct API fetch from `https://blobtoolkit.genomehubs.org/api/v1/search/Eukaryota` + per-assembly detail queries +- Approach B: Docker-isolated `genomehubs parse --btk` via orchestrator pattern (like tol_genome_notes) +- Outputs: `btk.tsv.gz` + `btk.files.yaml` to `s3://goat/resources/btk/` +- Schedule: Daily +- Reference: `goat-data/scripts/parse_blobtoolkit.py` +- Bug risk: Legacy has `print(plots)` debug line left in (line 66); pagination may miss entries + +**Step 7: `update_refseq_organelles` — RefSeq Organelle Data** +- Fetch from NCBI FTP: `ftp.ncbi.nlm.nih.gov/refseq/release/` +- Parse GenBank flat files for mitochondrion/plastid sequences +- Extract: accession, taxon_id, organism, sequence_length, references +- Write to `s3://goat/resources/assembly-data/refseq_organelles.tsv.gz` +- Schedule: Weekly +- Reference: `goat-data/scripts/parse_refseq_organelles.py` (uses BioPython) +- Consideration: BioPython dependency may need Docker isolation (check pydantic conflicts) + +**Step 8: `update_google_sheets_status` — Google Sheets Project Status Lists** +- Rewrite R script (`get_googlesheets.R`) entirely in Python +- Fetch TSVs from public Google Sheets URLs (no auth needed for public sheets) +- Use `import_status_lib.py` patterns for table normalization but rewrite cleanly: + - Replace pandas one-liners with explicit column mapping + - Handle encoding robustly (UTF-8 with fallback) + - Normalize species names, taxon IDs +- Projects list parameterized (deploy once per project group or batch) +- Outputs: One TSV per project to `s3://goat/resources/status-lists/{project}_expanded.tsv` +- Schedule: Weekly +- Sub-steps: + - 8a: Core fetcher function (reusable across all sheets) + - 8b: Table normalizer (species name cleaning, status field mapping) + - 8c: Per-project configuration (sheet URLs, field mappings, column renames) + - 8d: Deploy as single flow with project list parameter +- Reference: `goat-data/scripts/import_status_lib.py`, `goat-data/scripts/import_status.py` +- Bug risks in legacy: + - Code duplication (import_status_lib.py copied to ebp_import/) + - Silent encoding failures + - Hardcoded 24-project list + - Pandas operations that silently drop data on merge conflicts + +#### Group C: Infrastructure & Static Data + +**Step 9: `sync_static_sources` — Static YAML/TSV pairs** +- Utility to upload curated YAML/TSV pairs from goat-data/sources/ to S3 +- Not a scheduled updater — run manually or on goat-data repo changes +- Covers: genomesize-karyotype, conservation, uk-legislation, regional-lists, lineages +- Could be triggered by a webhook on goat-data repo pushes + +**Step 10: Secrets & Configuration** +- Configure Prefect Secret blocks for: `JGI_OFFLINE_TOKEN`, Google Sheets URLs +- STS_AUTHORIZATION_KEY no longer needed (replaced by tol-sdk) +- Add deployment entries to `flows/prefect.yaml` for all new updaters + +### Relevant Files (Phase 1) + +**New files to create:** +- `flows/updaters/update_vgp_status.py` +- `flows/updaters/update_jgi_status.py` +- `flows/updaters/update_ensembl_metadata.py` +- `flows/updaters/update_ucsc_assemblies.py` +- `flows/updaters/update_sra_data.py` +- `flows/updaters/update_blobtoolkit.py` +- `flows/updaters/update_refseq_organelles.py` +- `flows/updaters/update_google_sheets_status.py` +- `flows/lib/google_sheets.py` (shared Google Sheets fetching utilities) +- `flows/lib/api_helpers.py` (shared API helpers: OAuth, pagination, JSON→TSV) + +**Existing files to modify:** +- `flows/prefect.yaml` — add deployments for all new updaters +- `flows/lib/utils.py` — add any missing shared utilities +- `flows/lib/shared_args.py` — add new argument definitions if needed +- `requirements.txt` — add BioPython if needed for RefSeq parsing + +**Reference files (goat-data, read-only):** +- `goat-data/scripts/api/api_config.py` — API endpoint definitions +- `goat-data/scripts/api/api_tools.py` — retry/pagination patterns +- `goat-data/scripts/jgi_to_tsv.py` — JGI OAuth flow +- `goat-data/scripts/parse_blobtoolkit.py` — BTK API parsing +- `goat-data/scripts/parse_refseq_organelles.py` — GenBank parsing +- `goat-data/scripts/parse_sra_data.py` — SRA parsing +- `goat-data/scripts/import_status_lib.py` — table normalization +- `goat-data/scripts/get_googlesheets.R` — Google Sheets URLs +- `goat-data/.github/workflows/fetch-resources.yml` — complete fetch workflow + +### Verification (Phase 1) +1. Each updater runs locally with `SKIP_PREFECT=true` and produces valid output TSV +2. Output TSV format matches goat-data legacy output (diff comparison where possible) +3. S3 upload succeeds to `s3://goat/resources/` paths +4. Events emitted with correct resource types for downstream triggering +5. All tests pass: `python -m pytest tests/` +6. No secret values hardcoded; all auth via env vars or Prefect Secret blocks +7. `prefect deploy --prefect-file flows/prefect.yaml --all` succeeds + +### Decisions (Phase 1) +- **Google Sheets**: Rewrite in Python (not R) for consistency with the rest of the codebase +- **BlobToolKit**: Prefer direct API approach over Docker genomehubs parse (simpler, avoids Docker-in-Docker); fall back to orchestrator pattern if API is insufficient +- **RefSeq Organelles**: Use BioPython in Docker container if pydantic conflicts arise +- **Static sources**: Not updaters — sync utility or manual upload +- **STS replaced by ToL Portal**: No migration needed (already done via `update_tol_portal_status`) + +--- + +## Phase 2: YAML-Backed Parsers & Validation + +### Goal +All data sources processed by fetch-parse-validate pipeline. Parsing triggered by update events. Validated TSV/YAML pairs uploaded to new S3 directories (`s3://goat/validated/`). + +### Steps + +**Step 1: Implement `parse_sequencing_status` parser** +- Handle all status list TSV formats (VGP, JGI, Google Sheets projects, NHM, ToL Portal) +- Config-driven: read YAML to determine column mappings +- Reuse `Config` class from `flows/lib/utils.py` +- One parser handles all ~65 status list YAML configs + +**Step 2: Implement `parse_refseq_organelles` parser** +- Replace stub with working implementation +- Read YAML config, apply field mappings from `refseq_organelles.types.yaml` +- Validate organelle accessions, taxonomy + +**Step 3: Implement `parse_blobtoolkit` parser** +- Parse BTK TSV using YAML config from `btk.types.yaml` +- Handle BUSCO stats, base composition, read mapping fields + +**Step 4: Implement `parse_ensembl_metadata` parser** +- Handle all 6 Ensembl division TSVs +- Single generic parser, config-driven via YAML + +**Step 5: Implement `parse_sra_data` parser** +- Parse SRA TSV with YAML config from `sra.types.yaml` + +**Step 6: Implement `parse_genomesize_karyotype` parser** +- Handle the 25+ genomesize/karyotype FILE_ sources +- Generic parser for simple TSV→validated TSV transformation + +**Step 7: Implement `parse_conservation` and `parse_legislation` parsers** +- Static data validation parsers +- Check CITES categories, legislation references against YAML constraints + +**Step 8: Wire all fetch-parse-validate deployments** +- Add trigger entries in `prefect.yaml` for each parser +- Events from Phase 1 updaters trigger corresponding parse-validate flows +- `validate_file_pair()` runs `blobtk validate` on each output +- Gate S3 upload on validation success + +**Step 9: Configure S3 output paths** +- Validated outputs go to `s3://goat/validated/{directory}/` (NOT `s3://goat/resources/` or `s3://goat/sources/`) +- Both validated TSV and validated YAML uploaded +- Validation report (JSONL) uploaded alongside for audit + +### Relevant Files (Phase 2) +- `flows/parsers/parse_sequencing_status.py` — complete implementation +- `flows/parsers/parse_refseq_organelles.py` — replace stub +- `flows/parsers/parse_blobtoolkit.py` — new +- `flows/parsers/parse_ensembl_metadata.py` — new +- `flows/parsers/parse_sra_data.py` — new +- `flows/parsers/parse_genomesize_karyotype.py` — new (generic) +- `flows/parsers/parse_conservation.py` — new +- `flows/orchestration/wrapper_fetch_parse_validate.py` — existing, may need updates +- `flows/validators/validate_file_pair.py` — existing, may need S3 path updates +- `flows/prefect.yaml` — add trigger entries +- Local copies of YAML configs from `goat-data/sources/` for development + +### Verification (Phase 2) +1. Each parser produces TSV matching the YAML config headers +2. `blobtk validate -g ` passes for each output with ≥95% valid rows +3. Event chain works: updater → parse → validate → S3 upload +4. Validated files appear in `s3://goat/validated/` directories +5. Row counts comparable to legacy pipeline output +6. No data loss: compare parsed row counts against raw input counts + +### Decisions (Phase 2) +- **S3 validated path**: `s3://goat/validated/` (separate from `resources/` and `sources/`) +- **Parser reuse**: `parse_sequencing_status` handles ALL status list formats via YAML config +- **Parser reuse**: `parse_genomesize_karyotype` handles ALL genomesize/karyotype sources generically +- **YAML configs**: Develop with local copies, production fetches from goat-data sources/ +- **Scope boundary**: Phase 2 does NOT change the legacy import at all + +--- + +## Phase 3: Switch Legacy Import to Validated Data + +### Goal +Legacy import workflow reads from `s3://goat/validated/` instead of `s3://goat/resources/` or `s3://goat/sources/`, removing all fetch steps from the import. + +### Steps +1. Verify data parity: compare `s3://goat/validated/` against `s3://goat/sources/` for all directories +2. Update `goat-data/.github/workflows/genomehubs-index.yml` to read from `s3://goat/validated/` +3. Remove fetch jobs from `goat-data/.github/workflows/fetch-resources.yml` (or disable) +4. Update `goat-data/.github/workflows/s3_release.yml` to skip fetch-resources +5. Run test release with validated data; compare with latest production release +6. Staged rollout: switch one directory at a time, verify, proceed + +### Verification (Phase 3) +1. Test release produces identical (or improved) Elasticsearch indices +2. API test suite passes +3. UI test suite passes +4. Row counts match or exceed previous release +5. Rollback path confirmed: can revert to `s3://goat/sources/` if issues + +### Risk Mitigation +- Keep `s3://goat/sources/` and `s3://goat/resources/` intact as rollback +- Phase 3 changes only S3 paths in workflow config, easily reversible +- Switch one source directory at a time (assembly-data first, then status-lists, etc.) + +--- + +## Phase 4: Replace Legacy Import (Future) + +### Goal +Replace `genomehubs index` with updated import code that reads validated TSV/YAML pairs directly. + +### Scope +- Requires new import code not yet available +- Skip validation/lookup steps (already done in Phase 2) +- Direct TSV→Elasticsearch indexing + +--- + +## Phase 5: Full Pipeline Migration (Future) + +### Goal +Remove all GitHub Actions workflow dependencies; full pipeline runs in Prefect. + +### Scope +- Yet to be defined +- Includes: ES init, indexing, fill, test, release promotion +- Replaces: s3_release.yml, genomehubs-init.yml, genomehubs-index.yml, genomehubs-fill.yml, genomehubs-test.yml + +--- + +## Conventions Reference + +### YAML/TSV Pair Convention (goat-data) +- **Prefix patterns**: `ATTR_` (attribute defs), `TAXON_` (taxonomy), `FILE_` (data sources), unprefixed (primary) +- **YAML structure**: `file:` metadata, `attributes:` field mappings, `taxonomy:` taxon matching, `identifiers:` ID columns +- **`needs:`** directive: lists dependent YAML files that must be co-located +- **TSV naming**: matches `file.name` in YAML config, often `.gz` compressed + +### Data Repo Code Conventions +- Absolute imports: `from flows.lib import utils` +- Google-style docstrings with type hints +- `SKIP_PREFECT=true` for local testing +- `run_quoted()` for subprocess (never `shell=True`) +- `safe_get()` for HTTP requests +- `parse_args()` with `shared_args` constants for CLI +- Tasks: focused, idempotent, with `@task(retries=N, log_prints=True)` +- Events: `emit_event()` with `prefect.resource.id/type/matches.previous` +- Black formatter, 88-char line length + +### Legacy Code Bug Risks to Avoid +1. `parse_blobtoolkit.py` line 66: debug `print(plots)` left in production +2. `import_status_lib.py`: duplicated across directories, encoding silently fails +3. `fetch-or-fallback.sh`: `|| exit 0` masks real errors +4. Google Sheets: hardcoded `gid` parameters break on URL changes +5. JGI OAuth: no token refresh/expiry handling +6. NCBI API: hardcoded 30s timeouts, silent failure on rate limit +7. Pandas merge conflicts silently drop data in status list processing From 4b267060291dc25d9db07d86a2a93c68cc384d57 Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Wed, 29 Apr 2026 10:39:03 +0100 Subject: [PATCH 02/18] update migration plan after second audit --- plan-goatPipelineMigration.prompt.md | 268 ++++++++++++++++++++++++--- 1 file changed, 240 insertions(+), 28 deletions(-) diff --git a/plan-goatPipelineMigration.prompt.md b/plan-goatPipelineMigration.prompt.md index 8a38553..7b04500 100644 --- a/plan-goatPipelineMigration.prompt.md +++ b/plan-goatPipelineMigration.prompt.md @@ -1,48 +1,62 @@ # GoaT Data Import Pipeline Migration Plan ## TL;DR + Migrate all data fetching from the legacy `goat-data` GitHub Actions workflow to scheduled Prefect-backed updaters in the `data` repo, then wire up parsers and validators to produce import-ready TSV/YAML pairs on S3. Five phases: fetch (Phase 1), parse+validate (Phase 2), switch S3 source (Phase 3), replace import (Phase 4), full pipeline (Phase 5). --- ## Gap Analysis: Updater Coverage -### Already Implemented (10 updaters) -| Updater | Legacy Equivalent | Schedule | -|---------|------------------|----------| -| `update_ncbi_datasets` | fetch-ncbi-datasets-zip | Daily | -| `update_ncbi_taxonomy` | taxdump download | Weekly | -| `update_ena_taxonomy_extra` | ENA taxonomy API | Weekly | -| `update_genomehubs_taxonomy` | fetch-genomehubs-taxonomy | Daily | -| `update_tolid_prefixes` | ToLID GitLab fetch | Weekly | -| `update_ott_taxonomy` | OTT download | Monthly | -| `update_tol_portal_status` | STS API (replaced) | Daily (orchestrated) | -| `update_tol_genome_notes` | (new source) | Daily (orchestrated) | -| `update_nhm_status_list` | NHM Data Portal API | Weekly | -| `update_boat_config` | GoaT API + Lustre | Daily | +### Already Implemented (11 updaters) + +| Updater | Legacy Equivalent | Schedule | Notes | +| ---------------------------- | -------------------------------------------------- | -------------------- | -------------------------------------------------- | +| `update_ncbi_datasets` | fetch-ncbi-datasets-zip | Daily | NCBI Datasets CLI → JSONL | +| `update_ncbi_taxonomy` | fetch-ncbi-taxdump (commented out in legacy) | Weekly | FTP taxdump with MD5 verification | +| `update_ena_taxonomy_extra` | fetch-ena-taxonomy-extra (commented out in legacy) | Weekly | ENA REST API | +| `update_genomehubs_taxonomy` | fetch-genomehubs-taxonomy | Daily | blobtk collation from NCBI+ENA+OTT | +| `update_tolid_prefixes` | fetch-tolids (commented out in legacy) | Weekly | GitLab WTSI; 400k line minimum validation | +| `update_ott_taxonomy` | (no legacy equivalent) | Monthly | Open Tree of Life .tgz download | +| `update_tol_portal_status` | STS API (fully replaced via tol-sdk) | Daily (orchestrated) | Docker-isolated; replaces `STS_AUTHORIZATION_KEY` | +| `update_tol_genome_notes` | (new source — no legacy equivalent) | Daily (orchestrated) | Docker-isolated; tol-sdk | +| `update_nhm_status_list` | fetch-from-apis NHM (commented out in legacy) | Weekly | POST API with cursor pagination | +| `update_boat_config` | GoaT API + Lustre | Daily | Assembly QC config builder | +| `tol_utils` (shared) | — | — | Shared ToL Portal helper, not a standalone updater | ### Missing — Need New Updaters (8 categories, ~11 updaters) -| # | Source | Legacy Job | Priority | Schedule | Complexity | -|---|--------|-----------|----------|----------|------------| -| 1 | **BlobToolKit** | fetch-blobtoolkit (Docker `genomehubs parse --btk`) | HIGH | Daily | Medium — API pagination + Docker | -| 2 | **RefSeq Organelles** | fetch-refseq-organelles (FTP + BioPython) | HIGH | Weekly | Medium — FTP + GenBank parsing | -| 3 | **VGP Status** | fetch-from-apis (GitHub YAML) | MEDIUM | Weekly | Low — simple HTTP + YAML parse | -| 4 | **JGI 1KFG** | fetch-from-apis (OAuth REST) | MEDIUM | Weekly | Medium — OAuth token exchange | -| 5 | **Ensembl Metadata** (×6) | fetch-assembly-links (6 JSON endpoints) | MEDIUM | Monthly | Low — HTTP + JSON→TSV, one parameterized updater | -| 6 | **UCSC Assembly Hubs** | fetch-assembly-links | LOW | Monthly | Low — HTTP + text parsing | -| 7 | **Google Sheets Status** (~20+ projects) | fetch-from-apis (R + Python) | HIGH | Weekly | High — rewrite R→Python, normalize tables | -| 8 | **SRA Data** | (parse_sra_data.py) | MEDIUM | Weekly | Medium — NCBI API + XML parsing | +| # | Source | Legacy Job | Priority | Schedule | Complexity | +| --- | ---------------------------------------- | --------------------------------------------------- | -------- | -------- | ------------------------------------------------ | +| 1 | **BlobToolKit** | fetch-blobtoolkit (Docker `genomehubs parse --btk`) | HIGH | Daily | Medium — API pagination + Docker | +| 2 | **RefSeq Organelles** | fetch-refseq-organelles (FTP + BioPython) | HIGH | Weekly | Medium — FTP + GenBank parsing | +| 3 | **VGP Status** | fetch-from-apis (GitHub YAML) | MEDIUM | Weekly | Low — simple HTTP + YAML parse | +| 4 | **JGI 1KFG** | fetch-from-apis (OAuth REST) | MEDIUM | Weekly | Medium — OAuth token exchange | +| 5 | **Ensembl Metadata** (×6) | fetch-assembly-links (6 JSON endpoints) | MEDIUM | Monthly | Low — HTTP + JSON→TSV, one parameterized updater | +| 6 | **UCSC Assembly Hubs** | fetch-assembly-links | LOW | Monthly | Low — HTTP + text parsing | +| 7 | **Google Sheets Status** (~20+ projects) | fetch-from-apis (R + Python) | HIGH | Weekly | High — rewrite R→Python, normalize tables | +| 8 | **SRA Data** | (parse_sra_data.py) | MEDIUM | Weekly | Medium — NCBI API + XML parsing | + +### Legacy-Only Binary Fetches (no migration needed) + +These legacy jobs fetch tool binaries, not data. They become package dependencies in the data repo: + +- **fetch-ncbi-datasets** — downloads `datasets` CLI executable → already a pip dependency (`ncbi-datasets-cli`) +- **fetch-genomehubs-api** — downloads GenomeHubs API binary → already a pip dependency (`genomehubs`) +- **fetch-genomehubs-ui** — downloads GenomeHubs UI binary → not needed for data pipeline ### Static/Semi-Static Sources (no external fetch needed) + These exist as curated YAML/TSV pairs in `goat-data/sources/` and are uploaded directly to S3: -- **Genomesize/Karyotype** — 25 FILE_ sources (genome size databases, chromosome counts) + +- **Genomesize/Karyotype** — 25 FILE\_ sources (genome size databases, chromosome counts) - **Conservation** — CITES index (periodically updated manually) -- **UK Legislation** — 9 FILE_ sources (very static) -- **Regional Lists** — 7 FILE_ sources (static geographic lists) +- **UK Legislation** — 9 FILE\_ sources (very static) +- **Regional Lists** — 7 FILE\_ sources (static geographic lists) - **Lineages** — ODB10 lineage mappings - **OTT IDs** — OTT taxonomy mappings -- **ToLIDs** — Tree of Life ID naming + +Note: **ToLIDs** are NOT static — the prefix list is actively fetched by `update_tolid_prefixes`. The `tolids.names.yaml` config in `goat-data/sources/tolids/` is a naming convention file that ships with the YAML configs, not a separate data source. These should be synced to S3 via a simple `sync_static_sources` utility or manually, not via updaters. @@ -51,6 +65,7 @@ These should be synced to S3 via a simple `sync_static_sources` utility or manua ## Phase 1: External Data Fetching ### Goal + All external data fetching implemented as Prefect updaters with scheduled deployments, uploading raw data to S3 and emitting events for downstream parsing. ### Steps @@ -58,6 +73,7 @@ All external data fetching implemented as Prefect updaters with scheduled deploy #### Group A: API-Based Updaters (parallel development) **Step 1: `update_vgp_status` — VGP Status List** + - Fetch GitHub YAML from `https://raw.githubusercontent.com/vgl-hub/genome-portal/master/_data/table_tracker.yml` - Parse YAML, extract fields: common_name, family, order, scientific_name, status, taxon_id, vgp_phase - Write TSV to `s3://goat/resources/status-lists/vgp.tsv` @@ -66,6 +82,7 @@ All external data fetching implemented as Prefect updaters with scheduled deploy - Reference: `goat-data/scripts/api/api_config.py` VGL handlers **Step 2: `update_jgi_status` — JGI 1KFG** + - OAuth token exchange: offline_token → access_token via `https://signon.jgi.doe.gov/signon/create` - Paginated API: `https://gold-ws.jgi.doe.gov/projects?studyGoldId=Gs0000001` - Write TSV to `s3://goat/resources/status-lists/jgi_1kfg.tsv` @@ -75,6 +92,7 @@ All external data fetching implemented as Prefect updaters with scheduled deploy - Bug risk: Legacy code has fragile OAuth flow — add proper token refresh and expiry handling **Step 3: `update_ensembl_metadata` — Ensembl Species Metadata (6 databases)** + - Single parameterized updater deployed 6 times with different division parameters - Divisions: Fungi, Metazoa, Plants, Protists, Vertebrates, Rapid Release - Fetch JSON from Ensembl REST API endpoints @@ -84,6 +102,7 @@ All external data fetching implemented as Prefect updaters with scheduled deploy - Reference: `goat-data/.github/workflows/fetch-resources.yml` fetch-assembly-links job **Step 4: `update_ucsc_assemblies` — UCSC Genome Browser** + - Fetch assembly hub list from UCSC API - Parse to TSV - Write to `s3://goat/resources/assembly-data/ucsc_ids.tsv` @@ -91,6 +110,7 @@ All external data fetching implemented as Prefect updaters with scheduled deploy - Reuse: `safe_get()`, standard arg parsing **Step 5: `update_sra_data` — SRA Metadata** + - Fetch from NCBI SRA API (Entrez or BigQuery) - Parse XML/JSON responses to TSV - Write to `s3://goat/resources/sra/sra.tsv.gz` @@ -101,6 +121,7 @@ All external data fetching implemented as Prefect updaters with scheduled deploy #### Group B: Complex Updaters (sequential, more effort) **Step 6: `update_blobtoolkit` — BlobToolKit Analysis Data** + - Approach A (preferred): Direct API fetch from `https://blobtoolkit.genomehubs.org/api/v1/search/Eukaryota` + per-assembly detail queries - Approach B: Docker-isolated `genomehubs parse --btk` via orchestrator pattern (like tol_genome_notes) - Outputs: `btk.tsv.gz` + `btk.files.yaml` to `s3://goat/resources/btk/` @@ -109,6 +130,7 @@ All external data fetching implemented as Prefect updaters with scheduled deploy - Bug risk: Legacy has `print(plots)` debug line left in (line 66); pagination may miss entries **Step 7: `update_refseq_organelles` — RefSeq Organelle Data** + - Fetch from NCBI FTP: `ftp.ncbi.nlm.nih.gov/refseq/release/` - Parse GenBank flat files for mitochondrion/plastid sequences - Extract: accession, taxon_id, organism, sequence_length, references @@ -118,6 +140,7 @@ All external data fetching implemented as Prefect updaters with scheduled deploy - Consideration: BioPython dependency may need Docker isolation (check pydantic conflicts) **Step 8: `update_google_sheets_status` — Google Sheets Project Status Lists** + - Rewrite R script (`get_googlesheets.R`) entirely in Python - Fetch TSVs from public Google Sheets URLs (no auth needed for public sheets) - Use `import_status_lib.py` patterns for table normalization but rewrite cleanly: @@ -142,12 +165,14 @@ All external data fetching implemented as Prefect updaters with scheduled deploy #### Group C: Infrastructure & Static Data **Step 9: `sync_static_sources` — Static YAML/TSV pairs** + - Utility to upload curated YAML/TSV pairs from goat-data/sources/ to S3 - Not a scheduled updater — run manually or on goat-data repo changes - Covers: genomesize-karyotype, conservation, uk-legislation, regional-lists, lineages - Could be triggered by a webhook on goat-data repo pushes **Step 10: Secrets & Configuration** + - Configure Prefect Secret blocks for: `JGI_OFFLINE_TOKEN`, Google Sheets URLs - STS_AUTHORIZATION_KEY no longer needed (replaced by tol-sdk) - Add deployment entries to `flows/prefect.yaml` for all new updaters @@ -155,6 +180,7 @@ All external data fetching implemented as Prefect updaters with scheduled deploy ### Relevant Files (Phase 1) **New files to create:** + - `flows/updaters/update_vgp_status.py` - `flows/updaters/update_jgi_status.py` - `flows/updaters/update_ensembl_metadata.py` @@ -167,12 +193,14 @@ All external data fetching implemented as Prefect updaters with scheduled deploy - `flows/lib/api_helpers.py` (shared API helpers: OAuth, pagination, JSON→TSV) **Existing files to modify:** + - `flows/prefect.yaml` — add deployments for all new updaters - `flows/lib/utils.py` — add any missing shared utilities - `flows/lib/shared_args.py` — add new argument definitions if needed - `requirements.txt` — add BioPython if needed for RefSeq parsing **Reference files (goat-data, read-only):** + - `goat-data/scripts/api/api_config.py` — API endpoint definitions - `goat-data/scripts/api/api_tools.py` — retry/pagination patterns - `goat-data/scripts/jgi_to_tsv.py` — JGI OAuth flow @@ -184,6 +212,7 @@ All external data fetching implemented as Prefect updaters with scheduled deploy - `goat-data/.github/workflows/fetch-resources.yml` — complete fetch workflow ### Verification (Phase 1) + 1. Each updater runs locally with `SKIP_PREFECT=true` and produces valid output TSV 2. Output TSV format matches goat-data legacy output (diff comparison where possible) 3. S3 upload succeeds to `s3://goat/resources/` paths @@ -193,6 +222,7 @@ All external data fetching implemented as Prefect updaters with scheduled deploy 7. `prefect deploy --prefect-file flows/prefect.yaml --all` succeeds ### Decisions (Phase 1) + - **Google Sheets**: Rewrite in Python (not R) for consistency with the rest of the codebase - **BlobToolKit**: Prefer direct API approach over Docker genomehubs parse (simpler, avoids Docker-in-Docker); fall back to orchestrator pattern if API is insufficient - **RefSeq Organelles**: Use BioPython in Docker container if pydantic conflicts arise @@ -204,52 +234,63 @@ All external data fetching implemented as Prefect updaters with scheduled deploy ## Phase 2: YAML-Backed Parsers & Validation ### Goal + All data sources processed by fetch-parse-validate pipeline. Parsing triggered by update events. Validated TSV/YAML pairs uploaded to new S3 directories (`s3://goat/validated/`). ### Steps **Step 1: Implement `parse_sequencing_status` parser** + - Handle all status list TSV formats (VGP, JGI, Google Sheets projects, NHM, ToL Portal) - Config-driven: read YAML to determine column mappings - Reuse `Config` class from `flows/lib/utils.py` - One parser handles all ~65 status list YAML configs **Step 2: Implement `parse_refseq_organelles` parser** + - Replace stub with working implementation - Read YAML config, apply field mappings from `refseq_organelles.types.yaml` - Validate organelle accessions, taxonomy **Step 3: Implement `parse_blobtoolkit` parser** + - Parse BTK TSV using YAML config from `btk.types.yaml` - Handle BUSCO stats, base composition, read mapping fields **Step 4: Implement `parse_ensembl_metadata` parser** + - Handle all 6 Ensembl division TSVs - Single generic parser, config-driven via YAML **Step 5: Implement `parse_sra_data` parser** + - Parse SRA TSV with YAML config from `sra.types.yaml` **Step 6: Implement `parse_genomesize_karyotype` parser** -- Handle the 25+ genomesize/karyotype FILE_ sources + +- Handle the 25+ genomesize/karyotype FILE\_ sources - Generic parser for simple TSV→validated TSV transformation **Step 7: Implement `parse_conservation` and `parse_legislation` parsers** + - Static data validation parsers - Check CITES categories, legislation references against YAML constraints **Step 8: Wire all fetch-parse-validate deployments** + - Add trigger entries in `prefect.yaml` for each parser - Events from Phase 1 updaters trigger corresponding parse-validate flows - `validate_file_pair()` runs `blobtk validate` on each output - Gate S3 upload on validation success **Step 9: Configure S3 output paths** + - Validated outputs go to `s3://goat/validated/{directory}/` (NOT `s3://goat/resources/` or `s3://goat/sources/`) - Both validated TSV and validated YAML uploaded - Validation report (JSONL) uploaded alongside for audit ### Relevant Files (Phase 2) + - `flows/parsers/parse_sequencing_status.py` — complete implementation - `flows/parsers/parse_refseq_organelles.py` — replace stub - `flows/parsers/parse_blobtoolkit.py` — new @@ -263,6 +304,7 @@ All data sources processed by fetch-parse-validate pipeline. Parsing triggered b - Local copies of YAML configs from `goat-data/sources/` for development ### Verification (Phase 2) + 1. Each parser produces TSV matching the YAML config headers 2. `blobtk validate -g ` passes for each output with ≥95% valid rows 3. Event chain works: updater → parse → validate → S3 upload @@ -271,6 +313,7 @@ All data sources processed by fetch-parse-validate pipeline. Parsing triggered b 6. No data loss: compare parsed row counts against raw input counts ### Decisions (Phase 2) + - **S3 validated path**: `s3://goat/validated/` (separate from `resources/` and `sources/`) - **Parser reuse**: `parse_sequencing_status` handles ALL status list formats via YAML config - **Parser reuse**: `parse_genomesize_karyotype` handles ALL genomesize/karyotype sources generically @@ -282,9 +325,11 @@ All data sources processed by fetch-parse-validate pipeline. Parsing triggered b ## Phase 3: Switch Legacy Import to Validated Data ### Goal + Legacy import workflow reads from `s3://goat/validated/` instead of `s3://goat/resources/` or `s3://goat/sources/`, removing all fetch steps from the import. ### Steps + 1. Verify data parity: compare `s3://goat/validated/` against `s3://goat/sources/` for all directories 2. Update `goat-data/.github/workflows/genomehubs-index.yml` to read from `s3://goat/validated/` 3. Remove fetch jobs from `goat-data/.github/workflows/fetch-resources.yml` (or disable) @@ -293,6 +338,7 @@ Legacy import workflow reads from `s3://goat/validated/` instead of `s3://goat/r 6. Staged rollout: switch one directory at a time, verify, proceed ### Verification (Phase 3) + 1. Test release produces identical (or improved) Elasticsearch indices 2. API test suite passes 3. UI test suite passes @@ -300,6 +346,7 @@ Legacy import workflow reads from `s3://goat/validated/` instead of `s3://goat/r 5. Rollback path confirmed: can revert to `s3://goat/sources/` if issues ### Risk Mitigation + - Keep `s3://goat/sources/` and `s3://goat/resources/` intact as rollback - Phase 3 changes only S3 paths in workflow config, easily reversible - Switch one source directory at a time (assembly-data first, then status-lists, etc.) @@ -309,9 +356,11 @@ Legacy import workflow reads from `s3://goat/validated/` instead of `s3://goat/r ## Phase 4: Replace Legacy Import (Future) ### Goal + Replace `genomehubs index` with updated import code that reads validated TSV/YAML pairs directly. ### Scope + - Requires new import code not yet available - Skip validation/lookup steps (already done in Phase 2) - Direct TSV→Elasticsearch indexing @@ -321,24 +370,186 @@ Replace `genomehubs index` with updated import code that reads validated TSV/YAM ## Phase 5: Full Pipeline Migration (Future) ### Goal + Remove all GitHub Actions workflow dependencies; full pipeline runs in Prefect. ### Scope + - Yet to be defined - Includes: ES init, indexing, fill, test, release promotion - Replaces: s3_release.yml, genomehubs-init.yml, genomehubs-index.yml, genomehubs-fill.yml, genomehubs-test.yml --- +## Network Robustness Review + +### Current `safe_get()` Implementation + +The existing `safe_get()` in `flows/lib/utils.py` provides: + +- Configurable timeout (default 300s) +- Supports GET/POST/HEAD methods +- Raises on HTTP errors via `response.raise_for_status()` +- **No built-in retry logic** — relies entirely on Prefect task-level retries + +### Current Retry Patterns Across Updaters + +| Updater | Task Retries | Delay | Notes | +| -------------------------- | ------------ | ----- | --------------------------------------------------------- | +| `update_tolid_prefixes` | 2 | 2s | Reasonable for a single file download | +| `update_ncbi_datasets` | 2 | 2s | Too few for NCBI rate-limited API; batches of 50 | +| `update_nhm_status_list` | 100 | 60s | Very aggressive — NHM API known to be unstable | +| `update_tol_portal_status` | 3 | 60s | Orchestrator-level retries re-run entire Docker container | +| `update_tol_genome_notes` | 3 | 60s | Same orchestrator pattern | +| `update_ncbi_taxonomy` | 2 | 2s | FTP download — may need longer delay | +| `update_ott_taxonomy` | 2 | 2s | HTTP download of .tgz — adequate | +| `update_boat_config` | (not set) | — | SSH-based, different failure modes | + +### Gaps & Recommendations for New Updaters + +**1. Add HTTP-level retry to `safe_get()`** (or create `resilient_get()`) + +- Use `urllib3.util.Retry` with `requests.adapters.HTTPAdapter` for transport-level retries +- Retry on: 429 (rate limit), 500, 502, 503, 504 +- Exponential backoff: 1s, 2s, 4s (3 attempts) +- This separates transient HTTP failures from task-level Prefect retries (which re-run the entire task) +- Existing updaters benefit automatically when `safe_get()` is hardened + +**2. Per-source timeout tuning** +| Source | Recommended Timeout | Rationale | +|--------|-------------------|-----------| +| VGP (GitHub raw) | 30s | Small YAML file, fast CDN | +| JGI API | 120s per page | Paginated, can be slow | +| Ensembl FTP | 300s | Large JSON files (>100MB for Vertebrates) | +| UCSC | 60s | Small text file | +| SRA API | 300s | Potentially large responses | +| BlobToolKit API | 120s per request | Many per-assembly detail calls | +| RefSeq FTP | 600s | Large GenBank files | +| Google Sheets | 60s per sheet | Can be slow on large sheets | + +**3. Partial failure handling for paginated APIs** + +- JGI, BlobToolKit, and SRA all paginate — a failure mid-pagination should not discard pages already fetched +- Write each page to a temp file; only assemble final TSV after all pages succeed +- If a page fails after retries, emit a warning event with partial count and halt gracefully + +**4. Idempotency and freshness checks** + +- `update_tolid_prefixes` already uses HTTP HEAD timestamp comparison — reuse this pattern +- New updaters should check `Last-Modified` or `ETag` before downloading, using `is_local_file_current_http()` +- For APIs without timestamp headers (JGI, BlobToolKit), compare MD5 of output against previous S3 version using `generate_md5()` + +**5. S3 upload atomicity** + +- Current `upload_to_s3()` uses `s3cmd put` — if interrupted, leaves partial file on S3 +- Recommendation: upload to a `.tmp` key first, then copy to final key and delete `.tmp` +- Or use boto3 multipart upload with automatic cleanup on failure + +**6. Connection pooling for high-volume API calls** + +- BlobToolKit updater will make ~10,000+ individual API calls (one per assembly) +- Use a `requests.Session()` to reuse TCP connections and benefit from connection pooling +- Add rate limiting (e.g., 10 req/s) to avoid overwhelming the BlobToolKit API + +**7. DNS and TLS failure handling** + +- `requests.exceptions.ConnectionError` and `requests.exceptions.SSLError` are not HTTP status codes — they won't be caught by status-code retry logic +- Ensure transport-level retries cover these cases +- Add explicit handling in updaters: log the error clearly, distinguish transient DNS vs permanent config errors + +--- + +## Logging Review + +### Current Logging Patterns in the Data Repo + +**Primary mechanism:** `print()` with `@task(log_prints=True)` + +- When running under Prefect, print statements are captured as INFO-level logs +- When `SKIP_PREFECT=true`, print goes to stdout (useful for local testing) +- No structured logging (no JSON, no log levels beyond print) + +**What's logged today (by updater):** + +| Updater | Logging Pattern | Gaps | +| ---------------------------- | ------------------------------------------------ | --------------------------------- | +| `update_ncbi_datasets` | Prints batch progress, line counts, match status | No timing info | +| `update_ncbi_taxonomy` | Prints MD5 comparison, extraction status | Good — includes checksums | +| `update_tolid_prefixes` | Prints line count, timestamp comparison result | Good — includes freshness check | +| `update_nhm_status_list` | Prints page count, record counts per page | Missing total elapsed time | +| `update_tol_portal_status` | Minimal — Docker output captured | Docker stdout mixed with app logs | +| `update_tol_genome_notes` | Minimal — Docker output captured | Same Docker stdout issue | +| `update_ena_taxonomy_extra` | Prints taxon counts | Missing API call timing | +| `update_genomehubs_taxonomy` | Prints blobtk command and result | Good — includes command | +| `update_ott_taxonomy` | Prints download size, extraction | Good | +| `update_boat_config` | Prints API queries, SSH commands | Good — verbose | + +### Recommendations for New Updaters + +**1. Standardize a logging helper** +Create a `log_progress()` utility in `flows/lib/utils.py` that: + +- Prints a timestamped message (ISO 8601) +- Includes the updater/task name as a prefix +- Works identically with and without Prefect (`print()`-based, not `logging` module) +- Example: `[2026-04-24T12:00:00Z] update_vgp_status: Fetched 1,234 records in 3.2s` + +**2. Log network call summaries** +Every HTTP request should log: + +- URL (redacted if contains secrets) +- Method (GET/POST/HEAD) +- Response status code +- Response size (bytes) +- Elapsed time (seconds) +- Whether the response was from cache/retry + +**3. Log output file summaries** +After writing each output file, log: + +- File path (local and S3) +- Row count +- File size +- MD5 hash +- Whether it matches previous version (changed/unchanged) + +**4. Log events emitted** +Print a summary when emitting Prefect events: + +- Event name +- `matches.previous` value +- Key payload fields (row count, etc.) +- This aids debugging when running with `SKIP_PREFECT=true` (since `emit_event` is a no-op) + +**5. Error context in exceptions** +Every caught exception should include: + +- The URL or resource that failed +- The HTTP status code (if applicable) +- The attempt number (if retrying) +- A hint about whether the error is transient or permanent +- Example: `RuntimeError("JGI OAuth token exchange failed (attempt 2/3): 401 Unauthorized — check JGI_OFFLINE_TOKEN is valid")` + +**6. Docker orchestrator logging** +For Docker-isolated flows (`tol_portal_status`, `tol_genome_notes`): + +- Capture and prefix Docker stdout/stderr separately +- Log Docker exit code explicitly +- Log the full Docker command (with secrets redacted) for reproducibility + +--- + ## Conventions Reference ### YAML/TSV Pair Convention (goat-data) + - **Prefix patterns**: `ATTR_` (attribute defs), `TAXON_` (taxonomy), `FILE_` (data sources), unprefixed (primary) - **YAML structure**: `file:` metadata, `attributes:` field mappings, `taxonomy:` taxon matching, `identifiers:` ID columns - **`needs:`** directive: lists dependent YAML files that must be co-located - **TSV naming**: matches `file.name` in YAML config, often `.gz` compressed ### Data Repo Code Conventions + - Absolute imports: `from flows.lib import utils` - Google-style docstrings with type hints - `SKIP_PREFECT=true` for local testing @@ -350,6 +561,7 @@ Remove all GitHub Actions workflow dependencies; full pipeline runs in Prefect. - Black formatter, 88-char line length ### Legacy Code Bug Risks to Avoid + 1. `parse_blobtoolkit.py` line 66: debug `print(plots)` left in production 2. `import_status_lib.py`: duplicated across directories, encoding silently fails 3. `fetch-or-fallback.sh`: `|| exit 0` masks real errors From bc1db9a6da0347c820178db1bd566b59b5c3f24a Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Wed, 29 Apr 2026 12:11:00 +0100 Subject: [PATCH 03/18] implement phase 1 of migration plan - updaters --- flows/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 143 bytes flows/__pycache__/__init__.cpython-313.pyc | Bin 0 -> 143 bytes .../lib/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 147 bytes .../lib/__pycache__/__init__.cpython-313.pyc | Bin 0 -> 147 bytes .../conditional_import.cpython-312.pyc | Bin 0 -> 1537 bytes .../conditional_import.cpython-313.pyc | Bin 0 -> 1533 bytes .../__pycache__/shared_args.cpython-312.pyc | Bin 0 -> 5104 bytes .../__pycache__/shared_args.cpython-313.pyc | Bin 0 -> 5108 bytes flows/lib/__pycache__/utils.cpython-312.pyc | Bin 0 -> 39924 bytes flows/lib/__pycache__/utils.cpython-313.pyc | Bin 0 -> 39393 bytes flows/lib/utils.py | 51 +- flows/prefect.yaml | 139 ++++++ .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 152 bytes .../__pycache__/__init__.cpython-313.pyc | Bin 0 -> 152 bytes .../__pycache__/tol_utils.cpython-312.pyc | Bin 0 -> 2118 bytes .../__pycache__/tol_utils.cpython-313.pyc | Bin 0 -> 2057 bytes .../update_ensembl_metadata.cpython-313.pyc | Bin 0 -> 9050 bytes ...pdate_google_sheets_status.cpython-312.pyc | Bin 0 -> 20275 bytes ...pdate_google_sheets_status.cpython-313.pyc | Bin 0 -> 19984 bytes .../update_ncbi_datasets.cpython-312.pyc | Bin 0 -> 9028 bytes .../update_refseq_organelles.cpython-312.pyc | Bin 0 -> 13537 bytes .../update_sra_data.cpython-313.pyc | Bin 0 -> 17088 bytes .../update_tol_portal_status.cpython-312.pyc | Bin 0 -> 10444 bytes .../update_tol_portal_status.cpython-313.pyc | Bin 0 -> 10455 bytes .../update_ucsc_assemblies.cpython-313.pyc | Bin 0 -> 4285 bytes .../update_vgp_status.cpython-312.pyc | Bin 0 -> 3882 bytes .../update_vgp_status.cpython-313.pyc | Bin 0 -> 3827 bytes .../api/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 156 bytes .../api/__pycache__/__init__.cpython-313.pyc | Bin 0 -> 156 bytes .../__pycache__/api_config.cpython-312.pyc | Bin 0 -> 9456 bytes .../__pycache__/api_config.cpython-313.pyc | Bin 0 -> 9619 bytes .../api/__pycache__/api_tools.cpython-312.pyc | Bin 0 -> 1607 bytes .../api/__pycache__/api_tools.cpython-313.pyc | Bin 0 -> 1595 bytes flows/updaters/update_blobtoolkit.py | 280 +++++++++++ flows/updaters/update_ensembl_metadata.py | 217 +++++++++ flows/updaters/update_google_sheets_status.py | 441 ++++++++++++++++++ flows/updaters/update_jgi_status.py | 204 ++++++++ flows/updaters/update_refseq_organelles.py | 318 +++++++++++++ flows/updaters/update_sra_data.py | 379 +++++++++++++++ flows/updaters/update_ucsc_assemblies.py | 94 ++++ flows/updaters/update_vgp_status.py | 87 ++++ 41 files changed, 2207 insertions(+), 3 deletions(-) create mode 100644 flows/__pycache__/__init__.cpython-312.pyc create mode 100644 flows/__pycache__/__init__.cpython-313.pyc create mode 100644 flows/lib/__pycache__/__init__.cpython-312.pyc create mode 100644 flows/lib/__pycache__/__init__.cpython-313.pyc create mode 100644 flows/lib/__pycache__/conditional_import.cpython-312.pyc create mode 100644 flows/lib/__pycache__/conditional_import.cpython-313.pyc create mode 100644 flows/lib/__pycache__/shared_args.cpython-312.pyc create mode 100644 flows/lib/__pycache__/shared_args.cpython-313.pyc create mode 100644 flows/lib/__pycache__/utils.cpython-312.pyc create mode 100644 flows/lib/__pycache__/utils.cpython-313.pyc create mode 100644 flows/updaters/__pycache__/__init__.cpython-312.pyc create mode 100644 flows/updaters/__pycache__/__init__.cpython-313.pyc create mode 100644 flows/updaters/__pycache__/tol_utils.cpython-312.pyc create mode 100644 flows/updaters/__pycache__/tol_utils.cpython-313.pyc create mode 100644 flows/updaters/__pycache__/update_ensembl_metadata.cpython-313.pyc create mode 100644 flows/updaters/__pycache__/update_google_sheets_status.cpython-312.pyc create mode 100644 flows/updaters/__pycache__/update_google_sheets_status.cpython-313.pyc create mode 100644 flows/updaters/__pycache__/update_ncbi_datasets.cpython-312.pyc create mode 100644 flows/updaters/__pycache__/update_refseq_organelles.cpython-312.pyc create mode 100644 flows/updaters/__pycache__/update_sra_data.cpython-313.pyc create mode 100644 flows/updaters/__pycache__/update_tol_portal_status.cpython-312.pyc create mode 100644 flows/updaters/__pycache__/update_tol_portal_status.cpython-313.pyc create mode 100644 flows/updaters/__pycache__/update_ucsc_assemblies.cpython-313.pyc create mode 100644 flows/updaters/__pycache__/update_vgp_status.cpython-312.pyc create mode 100644 flows/updaters/__pycache__/update_vgp_status.cpython-313.pyc create mode 100644 flows/updaters/api/__pycache__/__init__.cpython-312.pyc create mode 100644 flows/updaters/api/__pycache__/__init__.cpython-313.pyc create mode 100644 flows/updaters/api/__pycache__/api_config.cpython-312.pyc create mode 100644 flows/updaters/api/__pycache__/api_config.cpython-313.pyc create mode 100644 flows/updaters/api/__pycache__/api_tools.cpython-312.pyc create mode 100644 flows/updaters/api/__pycache__/api_tools.cpython-313.pyc create mode 100644 flows/updaters/update_blobtoolkit.py create mode 100644 flows/updaters/update_ensembl_metadata.py create mode 100644 flows/updaters/update_google_sheets_status.py create mode 100644 flows/updaters/update_jgi_status.py create mode 100644 flows/updaters/update_refseq_organelles.py create mode 100644 flows/updaters/update_sra_data.py create mode 100644 flows/updaters/update_ucsc_assemblies.py create mode 100644 flows/updaters/update_vgp_status.py diff --git a/flows/__pycache__/__init__.cpython-312.pyc b/flows/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a80046ecf22b58188e954633536b7947ba11b8f3 GIT binary patch literal 143 zcmX@j%ge<81kbxKW`O9&AOanHW&w&!XQ*V*Wb|9fP{ah}eFmxdrKcZSoLW?@pImHW zte>2jl$w*OTbx;vs-KcrlBl1SlV4t}A0MBYmst`YuUAlci^C>2KczG$)vkyYsGSjr Qi$RQ!%#4hTMa)1J0J_m3E&u=k literal 0 HcmV?d00001 diff --git a/flows/__pycache__/__init__.cpython-313.pyc b/flows/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..435d80e340db468fd2e1db3e6a157fc847ee4b49 GIT binary patch literal 143 zcmey&%ge<81kbxKW`O9&AOZ#$p^VQgK*m&tbOudEzm*I{OhDdekkl2jl$w*OTbx;vs-KcrlBl1SlV4t}pOcxSA0MBYmst`YuUAlci^C>2KczG$)vkyY UXapk=7lRldnHd=wi976rYi_+E3eYKHLz8qUO@*&|Ro=GG?I|hnT|F*l7>C2(2`>6_2E4X4Xwi z4-OcbkVB46uRYb|PwB;>y>yD9&_l>A@%GeH-<$nP$RRysV4mK4dhhA?zVz$NOpU;K z`Q*j!eVdTKQ5YYP%^5s`$1zDrLOBUYVjYoyT7=Q(BLgv;*r2l#2dJHtfI5jAloA@a z`3+0>|)mUe8KFiIN?XG=8Q*;{o+& zUSXh4Jo>hIAX2=8+dz(qGD;-(tO+||?v*C&gh}WaUt;auBxH;D_Q8)e56;$J+VeI9 z+h(!y(yUhqCB1apQ@hM#`F<*jJY%`?_9Kx-UCz8zdXlMT&3JDz)fc&3HUft076o7S z?utGb{*NH&q8nRZuWy7Kn`^h%R_|&@fwOOGtB|@pLZIy(rcTHJ6Z&_h^{r$=wqki} zsTHSP#@QD#RjidnDr#->;(OWRX}1*@d6KHM$Ri#O6KnP!=!S%Z!`?7q(SU#sTzMVH zA^B&PG-jVJKVE*?e%yZ6{H^Z)^4*L2(y6l~aE{(-3O0NW?w|q$#)lqZS*=byngq}i zg8U`80~g3K`2kE$Xh&2*I-#20muTRLI#69%;ZfF2qV~Z8Ced8sMI3S224<+g2v!3` zdq{qoH-LtLEKNggg`vc0@J7)bj3GV&%{;mx$V2kS+_lrnwU3vJzycT$;sS7QWg`&m z!Gen9-usDMK`$)JGzoDT{ohFdeZ?ZE@s+qbHvcG1_z>{xEK60$_F?khhi{;K2!3rE zL3|3>u@loIUSZ!47T3+brTNZe1GoEmj58~J%QVjvz^A&>`6gWbYW2>VPfa*bRYgA! zZ@ZJ@TfFnO>A5Uf-Z+)#CaGFCj>WWJn31H^f_w__!pTSksV-O+mzBj zN%h<<(_3#Y6X(KF``o3a#VL91lKG2=>wnE%dU)rJOG>rV+ULJJH(xq8&YaenyD9&_l>AvHAh}-e@H!MiZhxeBrxHt5mRsTDv4lF7ZiF z|CWmSF7;+!VW7>pcwHaJ8TaTez!CXgD-pWJRGbRk(o~!Z0iEy*jOIEaH;89Gee48} z_`ux~Y=_0U%lm^&?tUbA)JvGlr7Kz9aGWm7ha#0t$A#6) zvZU$W7eg@kAI_>CbT+oat?t_0wblE|%E8eym61tRE?!mUF3XR}2oq2%{sYN`^kaE@ z*^haTCG0E7bLMYHdF1aT*<j~$c~pl3RaFer zFwTZ)?%C(I`m4Bsuij2)2!1)_1GFfY&Ca z^e<99v&;1Mo6E$y@MG(+b!OAj)pK}lllhAW8-LGTI$VEalah1d-2B73_0n28wfs}d RJGH((vq`N>56gPG{sV^?LDK*L literal 0 HcmV?d00001 diff --git a/flows/lib/__pycache__/shared_args.cpython-312.pyc b/flows/lib/__pycache__/shared_args.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9aed2997b21779fb499472961bd63d3c53738244 GIT binary patch literal 5104 zcmai1OKcm*8Qxv4C_XJ(55MF{D@v9{C8W~$QAf6v(k5+75lKa&Qd_pOU2#Uz+I(eq z8Cx3}V1OPJAZT@qf~J52)TjgZpi2QCa}3Z6RR^%LDOv$(54|aHPd)YjXP44CZPEp} zJM;bXzyE(`_@{6KMBLC1Fls8(gsMx zg#^h#K-yhMh;)!nkksXp6efqrVZe^K)*T>6f%BLPX(Qc$9CslR@)GF*zFrr0kVvEt zu&4`bC;h-VKzQ0gPJHTN9(9sIz)rq^b&(;!VlQBa$S`>s14Zwcp_|QX= z*NQphJ27lorx5@dpUX$S425T!+tA(L+UqL;k+OL4v* zxCY219at2}6wFV!<{6j|k}N$*a-e6D^0%j(MYsP2In%H%wj#vdQvShnTp+0})k?H% z=#svyXp~3`)VND&S<(xdS~c`UAdprH%MwuwhFU2r+KQx>C1aUNMO8N>jaD_PGk&Ek zDULA7sDK}ul2C%wxfB-=6Ry49_A?RZ!Rux_UZyN*YdrGNVq@w`| zp4n6KbTWTc8k63SGBwdr=|(iJM_-Ym@wk3Ut}4cI^cKueHxS>CF4JNa2GMDtGy{;I zeVd6RszwV&1p-+qgX{!~(dfH1fC=#AwHgJ)SgBH&f#dh@2N<6dKnSYb^m=I6vF(Oq z7#K0Uk1=y@#RC7t0?5WZmM2+WvBVUl(rTG2Lpn`=Jg!t#xuzA(wz-)p2||U*N={EC zVuB^2AC`!=z^sKiZ;2?@6yl2ML*^nis6Iyb7x?GYm2zC8dSpimVbdlyoN2L@Hsn$KymX6nRmj)TAu# zW`8rrNRe!cT{jSLC&Z?gF;a6dr5Lp7q*^BQUP8KD(MpOjDqV*sp2@^hph$=l3=Pp% zWUW><&o$kvQf(2IONs;}2}sgi)mX0745?NH{pjTEsgWhsV7}@osAdM^@nyrP+U9mQ z&BZWZ&F80~P@Q^6m;qQ>Tae32iJDRt-c3`l&~l|juhtfH=sw+0$_1K0xhjzhm13<_ zHskK)HYPc})$A2egDo(TVs_m#L%@$AkTJjJ;XJ%iAHy)0i_|DAqYJ9Nv|2N49}c^H zP|JWbcbP>cwJbySA$5ZpwXBwECCGmj3Ld(wTq`Y54Rx>;l2*B^Ct@DE09pxMRBA;- zYlU(!55(gr2}(g-DpO*{enePvkz^?Mlmu1~vfAG2X8(_HI&09!3i3fB_nbW9cG4y| zUJCJd&9->_-5Q1Rs8n%V)XhUVdmur5uohxul&oR&v`WR0!9Iw~&x)(YfQEkHgP z4~sUY3hP*>u=4`1j#cRFFy=thB8`?RP{!C!BYV};EOn?MU0=2nHTdHk@2QFeYf+Jb z5eJ({t>hGHZ<7}~ID6^h1R4o_f`;QFR;Xn*SpwjhZX39>k<$=AG*<^V&YSgrH0WNBL4_@Cq@O!=vFSx$( z_XPt^QTw^=Ug=<~0ufd@;=v>Rc8deLK13wKV~6iCN`~jYXVpVmR{7s>#QR&18Ae3q zR=J08F~qMe?IFMFS>^HS^pIEihx{xT^FeJfnH}&D0`_W4b z-;7zD<_GmG8D`KGT8oc4+cb*|93+B9EstJXusloDh>2Po@?j>efTIWcj|tL6K=Bm) z_nDD7xB%!Q1^x7?k%GEFi!`pQ29z)q{K#SvHiMC(x-eqjL-35#6V(;FK2Y?R@Y8<* zuR6CI;Mz~tC;t)*|N88QXV-@}79R9JSp4ke-;aDYvN`x9`ZPNBG&o*QZHvCOI~ylA zjy~vXga)?6fqH5um{}_{f|>QRjqa(9!h^w2?!aL4^cPqD+w%rwNzbI`#R_R1@ykQ#w_i0Dd2i}~vRNQSWtEu<6*6s}fM3AQ^Gt{Kj8nPB0! zy!4&{XR5^yL(qdD+-iYq*OLA@_c_l3t)pp@c7&0b3{8UZSMbwMz^l&vfRoq0^z1pQ zq1i7uiPg!R9Dug)&p|2UU<12(4q+9>uV%5 z%^$-YjbV;1G;?&}(deeJ)qP^?C zVeWkdgO9oY)3*E{_{UBK+=m8Eakq|P8w%X2a2qT7dk-`$yT?OCc`8DrhVz1HxC zE9AZ)S^>6*g$CNRA0?nH%gb=ht|xQ|Ta=ZQ7Q;pO`YZhONqE({zlXb?g$K661DoAn zg~#fbzipG&JlnqRHSaTD_m;1FWA?$}Pi{7RuRogIJoNdEr@q&BJzS)3*UN?4p9PO^ z1&==s_B6yEmbTci)hf%RQjld08%pbhm*s_92<+_|=14<}G%QHiWem+~h5M)s*Hg=z zoSsf+Q&uoJJDbj2oVqSgq^w9PnNP}>XVU5UG`p$86*rkrTdk>?>+;N8*7D;$%cl6Q z&RvwV$xPbvPo(5axv9BK)(T8yQ|W7R{`zzp$J29pd$C6?8{VM! z2q>w!Ow&M^<+A8jn2mCo>$bhVpXX-Ylv5KkmLHERn-{L+6~(Z`g-WGp@4q7c?_v7^ z>ncmcziE~qPhY*N6sVnE){3|T*t>}DLa9P(MLMoc0fu$2_dy`p&mp^eC>_?nD%@B7r`=vdE!ecN4$C!Vd)1@P*C zP!H^e(Agg#Fj42WJ33KXtB4V5Wzssiwq6#W^Dh!3LOrw#L_0)hUL>+80kLB}*9Z@6 zca1#h*b0q-9gKi>G&)Z-!h_pg@lAazbRH-Ng?f13!kx&d%PnR-_}j5=2k@+m)(jvE zk~`|nAc3lZM)<^b*V!*17%5Q2Oa~(vOSel_lSRTbbsXJ}oODHl8OVH^YJ~fCBAM-u zLoPDoWn@g`cGu}Ag{{y<(8i(|^7C1K+aF#lte;w2{;+4)!+_|?N1jJ>o2e)LjiIrv z=(r8jMamZ=6PKd^ga__dWm! WKAW@b^W@sIvE0^J?q7(p?d(7KJ;DG0 literal 0 HcmV?d00001 diff --git a/flows/lib/__pycache__/shared_args.cpython-313.pyc b/flows/lib/__pycache__/shared_args.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f8009e31d21507e45477e39487d98c44eeb118c GIT binary patch literal 5108 zcmai1O>7(25#C*{DE?Zse*7m}UQx0vDk0^>vTMthQ`)2>$s(!nk*OUsS**yFv^BY8 z-Y#Qng8(ehgIW}*E&-gIbEX_GF%{rTq2 z|IB;Cm*H@LgX_=tzg_=sfaAW$kM8B`yuAFaz;T~)grgkciExLfJbMaMfT!n?jF>)NR6Rvef$VuQl=>CMuYtVRUGk2TI7t9D?ZQrw3*-&J-gIFn z$we{)*d-Tsid+UAzi=VLtu#U3Vd8$Q&Ry$XkH@vg5-DNs^nud5fd~ z?Rs{Wq{(eS-*$0JWWEIXEQvEh50nI1D0xc*rO^^ZX+~toqMJVBC3jA7zZU5j@Qstj z()f%>mKbu*g=82qL9(UuBnR3SOMHI0TY862khaWL#@2+`JIX(KjteB|TC=9=rXd;Y z3RNr8ifTSoRb4X5RBM<*>iH5k95OG-zW zWY$5Bq*N*(5Z-ZlL!pLB6M+J}yM&eNx~XWoA(;>B(gStFm<`0G)vB^)%t}c|M}1W? zOseT?#-Td+Vp0z@GL{7JG8w??(g6YheORZJ0}DYPGqIr<#(&!wNIz0)jjB2ukl-R0 z<>h2yUYe6WjxsgTS?PW>ZbV;~qVc$KQEn*adh~txLft_8IJ&M@8}Jfc21+*o1@bNv zSJ9|iHtP_`x(>1vD2A#ZHUX{xPk}a7K+KJX3SZ#(C!YiuUnhVNRJrT*?-TK;%kX=rkjR;|8beo2B*VX~6T3yGLu zi|B_fqAl>%!<@H86l)1_#quF@L)9x*{5E6^9d*8?kCbXtWif`TP#;01V8T|lsyb=J z5_Z4<|5e!pS7QS8f_1i6VfAH&K_;Z6wo=`YwTcx=c0QzqRKn_y$194d$g5OUEtSRH z8tcXwDUw5R>IMSd39;p6jH)$}QcSh$q^4KYM+xb6oz@g{R=NjQJd=s1K#>ryU}!3I zL#9pLy3%#8q0&`YE-4a_Bp^u-HFLe*G^J((^rMsH;`EwkGG7f8)U<-}__}E}9CL@e z=3O|A2}(g*)76R<`x#-~MUtsJQW97}$m)1&SYtoK*;#`=){zeqdEn#;x04Q` z%#p5V&1*7gGRZcK!hSkJb1Y0@%E#Oh(x&5;k<;h;nF=t59ulLM>yiW=!u6Bxxf{< zG>nM&bfl&EyKmqKmF6$-X?~oG`JlF#&64{p#vKYbET_Gktr@n`k}*{}j9y}@6tg+% z2jwgoR?rn%n~!z2X%-ncNCc|d9;3Npd)8DlCel9S!%W(NjvnYgCdd#0#Z%P2G+l%N zz?d!@GZ&}J+KO6LrZF5&7I-vJHffOILFMlkCyATnxa?as-~-~UhH5+b>>#slV53EJXDx_e}I=y zx&Pa=@UQGDGBxwIU>i*Gn zsq+m@D_XE4!DtMpHS;Ep-W?SqP7l-TuB$ z+Vt%Djy(4bf8`r~Or9M7tkm{heM+{EeR2OA-_;i$E;73BgQs_br@s!4w8asY zwAhs0E6bI7S(Yib6CH$`?S%;h_H~LGqG%7ra)cemq;@aNqB4x8wl}%FoX)1~V3Lq@ z=H}8pc_C#-QprM6zMW5}-%GP89md;aA#L}j^7rI?F>CvA#&RgW`QlAEo6Mwb|3XT> zm0K!ivUXr0n@Zo63-^}O_`X~$IEy`+Zrc9LLRNk^xde(r_zIdA-p;1M2K>sWZ{_kS zVh3}@!U13SonjhP=avf#xh$~*`CP6b7m{~DL~mzUw|#`nJ6b|ONfk3)1K}%|MYqE2 zEtk3H*z5aME`LW(E#z%K9#sx6jN^61w8fQrz3S|}BL3%Ly8-JdTg1O;wjWPkqoI^l zC%LQ46=4s7Mx;ql#}>1P8wp*gUF5zvnI z;JJ2qVs|LMZR~`u0p+OB3Ljdy7nyar#jFQ^2ezC5UUt#C0c1gPN4*;)P&M8TpW7X} z{1*sD3RE%E!3f4O?2^@GkuXgICwC*~UD03$GM|>(;nBTFW_RG2i_CZ#856lXH1n*y z6S@i7SQJBkKFja=!<*%;i<|4e8`<|TAbS3(=V@^}^=zy?IkywN;lO)6{>`yXV{`I1 zAN-J9waNjidIeb1Ae_+r3fWY6_8b|QP;~B(m><4=Jvrhnm$2MV)XLp~^<#y(B K|3-{$Xa5018H$kr literal 0 HcmV?d00001 diff --git a/flows/lib/__pycache__/utils.cpython-312.pyc b/flows/lib/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7f957d4594a1316c0523ae01e4752d551fd4cd4 GIT binary patch literal 39924 zcmdVD33MFSc`jJh`-;ZWjh#djJ3uT10g4n?Vc|}Kv`A4FBn4`s8eo&yT-_iE1e76< zl7O-D1Uk0aisYzed*bGiGKSvCOyGCUyojLB;XG5QXosE4)b~g2y*xB4?W@k&E1!vu`b;QY0Q(Dpk#-3^NV|onQLjqakG})- zelZ&s;UL~`5AIMd-A&y&ggg6%!^m^s4H|?aX!U@VqkqCP!cpWO!{2fI_2TaY{;1au z;UwxE6u!iM7x?r2Rmjy5BrrHM2#=$kLu{lDz}t2u_Y`su-A7c+dyM7MEYe#r2~Q&LxbS6+zIT#uc=p!>^1U~B$<#Rz3XF|5=p=bbv|Yu0c0-{FFcB9XKbBjf@4yM*QA$tlz;Fo6U;@4?$DGo$Lo?45ODj zy{FIl*_C%}FjawW->^z{Uohw&IXgVr9}pya(EpVQ|LB09ei(xxpBR$#{!u|P1w*2b z&cpuEq0l+WG7=ER#{#1vsX)Ez9~Z~Q$3&VBDF^MO{l3A88)!86O`HvSpXxk*%uBBmpwYsnym!DrPf`vO&g?CI z1Ml-X7vj|$=M}`O{-*OZ|E6+(n(No@r`4g>)*RE^`Sj;0chQnkA1!J3(^_JLF|(=c zEv5BHdwNKvlC#}2hu+c-XbPD|DM zsWW<`Juj6j@Qf(waF>8*&G9C^F&yc=6ZG^?omHNjM%{jG^wYW*wudU@S{(PLa)A=L zYM%9B{nQBt^v0`}Po2|xlc(lR>L>M8TpJhUFX?#hDa-=Ty>#TV{weMfU&W>7shShP zh_HFq>ORFy>MwC}*#@JJ2gotNC1h}TG!P2-h67W6ZwLrufUSm%1)yLVD%>d!rJ)(h zk<0fF28R9KdZ3Gjo!*na&^d2tj5Ws=OJ+qOQ z=%4hB3`-`tVaYKB6gA=x`3Q$e2D&0#8dA}fWLEH&R6r#e3Fsf37-b|VC|Q+^(V*lS z7yTClV-rESkRaLch>-ts2q;mq1SiIEKZqLpFAw+`?U9_UJ8Ct_kv@kEO-e2`OYKCS zT3SV`!3Kk52>OQyCA&N#_8P?w3|(wT5+pQ2lPVINOP2or0Dz^xf6CR(prl33V1-S9 zsu}KXAy-tfTv#73tdEv07H*q0ykuW-moK{;;_ilM^$$1S*gXIHi|%gZTUSuBXmh-1 zbF}A&M{gWmEZTuw$4bej<&tgjl5O+CUtf6p!eU7ma&reo`k#j`r&Jb!x!T2##qzdcMLyq+;pt!b@lmkhNax zmZXK-&_L~!B)<9)R~ z?c95H#-1(a_u6y!oAiHcVl z`-QQA{(kXMw8|(dBX;bah?SH_NeliMReX`Vt#k2W(O72-t2&a?vTlG zM*J~?RgQ~vf;A2DG_6yS5f~BPSD~VgV^|GY5JuV(&BPUH0QOtq2^E;X7>*#Ir6XgJ zce}$qT3(N@lEMBV$fbd2FQh>)X3lw#<`{jF7n7ZJ zO`a9;G2A(YzhD6ptf6wwkw3RNJRN;@(Y`BY*mXa5qY~y-dZlZ)8=W8gjo&C3{04Bs zMsUI=!Q?j!X1^KSyam54lhy{CWID-s(y3;KQRN?LEDuctL%EsEj$~jc(?EDkvJ3v; zfEZvXDT4_jR)oP6!R2*=Bd#A;^1%qH!uEjV>T!?Cvjg0^&R3{H8NI_b1d1U|QSRph zDHC;51vO%Y*>AAIMCIg0QpA9sLj&^G5BP?MeP@UL028W21;OXBo5T`kR!JLu`jgVL z*F5mS#7lk$sV4(Q*HEoqfK@jpO6;LzKavK6xSg&ub2CvlLmg8txp@t12T(0Y{L>7V zEap5Vgpz?o*{Q5{*6^XFm;s(-AW#sAn;6jE6Mh)*m-dbW+!bQ3wD*d;%E7RRQj^#y z!X#7wR+|deE%sU%$O#a%Fw;J9@`10Hm93}3%BxyanN~NwJ@6XZld^IQ)I{2p3k(Bz z(|bYCK?jM11RrO80~hcf*t8$S7$rMKRU7J{qvE9rF@%fi)34yQ=RT{_7P zQd^S#f`3wM#8n0zA*P$b!!f`>CLIZBaHF!*L3@bk4m+l~Hx;Nk1(A1JcaaYkqy&y> z$ZW*AhX~%-p|&Az7If#$YHsR0&Gm3E>j!j0fY)cij$hGF>u30vOleQlt53Y9dnw<5 za#ysRQ-DzUiRU#zC9Q2<(5cV=f8*H$VD70mmyoJg4SL^ROdtc}17qTdFBJ6pyo8s) zbVKLBIDBNS@5&D^;iw(iA8QB$HmtW$K?8#Zr74 z(ALC?KC7%X`7T!2SwD%#=uyZoG~p3}fpgvw-z4R$J=9hoEuo9P;R(MN0=4{TQcbWd zs50m_9e}NLWJ*6Fg^vvcpqLT7mjWbQU`(tFsanK*dsTGMnKl(9p@e*3Trn3auik|D z8W^4sD9`7u4EnJyeW5WiSgG}bdXkz?Jkb}zz{Z2ry&&Nht&$hBgND{s#^AP$UbKml z`%cqEZ-icf7wV7^j0mp;Ip?8b!B8g{<)N`^&&udtaNIu-7!05!-aZveA4po~zuIUcTX3EPm{UC(=2;>Fws%3c_FdPGQs&FMe#%)fuyNeqwRVJ`=Z;E?YLn zEgQqZNcW1A~VzK4Sk0us2eZc?J`Breg?JqAbOvGExELxsS>UBob=Xdm6 zNhkmL-?f|xl2+lS&fHaQxoLAEeU~$LZL+*;aUvC)a8ojf&`4#VHy5(?_*?a^MQ;ka zNFj!8gmU;E?pg3WR}3Ml#g1vi{YWgxzvQ0QXG0}|J}UwuNXdwR3zN%0w(0fGD(u7Xt!7mZq1MkGh5CKKYTAuSujnUn zTwbmjD-taeMq``^@Gl!N!xsZ%B5I6}QJ$Wx z*hFZ2A|%hgJj-l;ssRKN?~{0|#M^os%#vXYZ%`x&MeL<}#!Dhpx8f6&K`UN7gQUU4 zR*u+58N?h&{Gg=Alt_9EHb@c+wXO<4P9Fn{{?J%|D0opM)+B{;;I$gQ0lw8OZUT!la2SD!Xmgd&>S%Z_LZTZRFg=&<)sXW7e9_ z@7TDKCwU0??B~5;B|0-#*k#cFpcKhXzPfW??#-H9y546*`mVv)*cF|+Cb4E+Y|Vm+&S+5;2(^_S1Tzj+ zhRcRSRBa_N)YBaI40wwxdf+K-1i({Td#Sz-YK)JV&b%4azFjd0{CN!*v3X#LpnH)E zX?O{^jxgz(1A12F>FT*h)2XUCSB%pJh!RHPgmU%~nvT#&q)eb9GD4cR)ssk$ay-e^+_TU%u@}fg`htbyLJwA7?zAqO z9HzeZQSjK`=CAWW@LmHq$^WJP5~OYsV8Hc4#<30f#seXy5{BVR*Rn<81Crs=m?&h* zHrCxxoBA)0i+iVbc6Q$d|tBneBP`#MKi^jt$YCMaPzp$~r!0jK|liYk7(9Z?MSd=rmf9 zO4on7g0FzBMuH4qNml>p#E2j4o*$;J@!>!Sc#8>fIZ7*jS&owoREcpul9BPKk~39; z#<>QgiKFNXOa3PsA5AtB@Cu7(dRNSObJedImdqPI+S9q{>iW^?`O{0ejnR$|bGIy< zj=Q>ImaY|NQP}XBu;i>txC$T>7M5N=f9?FYhp*aZ^|Mc`*gfE*gzxu+pINduuDDyG z6N~O`v-=aprLUV`G2f<1{8d?XULA(9K!g|O@wNo?RNX9~A8FSU-^+TS5nyE5l&MIlrDwSEQY@%PiW22c} z3k)(z%}<7JS;AllM5=g5mnG-%^w%RfGoCD&m8bT?L=R<4&_6gfEJ&84{z+zHzrl+a z0YpIpDn*l;PZk$#SWKTTeigT4RPM`2unJ2pMmGq5ZoypJ)r+yh?Qz$RWmjk1g|*n7 zaBW<6)x}+Pi>`({dOo)iD9Bl!)ElifESmhXq!m}6-F0vdH+20*_eZV*sQHcVdqJoc zxUbbWgJEvw{?huWet~~fpMBu~DQ8(Yf}R!t1AN?jH=PB=l$sdcMesC)8^h zZG%tH6BDK78VH#qm$6n_u1U!?v$D9$e(MAaJL@SGtjaxemV0tNr5DPRGi%$Z_M~&t zDcH}`h}Im8B{nE5F&Il?FsBU|4;VqYJ+Q-0#tv&tD0Fw}{d34qPy|-2qlO&c{|QEi z!3Laaf?l5>zyMC(JV0{_9cdQ*!%W*G>!wuwinhMis+v`@!dj)3m;uHu-ox_x%*g!$ zz#Ry&24MykeFNw!wG2m;E?BC)KGjs+1kMr6TI!LquTGPf1ki)s-f_P;08!O9kjG@`lu~2Go=tdTl1DY3t z-c0R9YBXLjXJ_Fj1Tk8h%)qD+fYJaKQEza-H%J5_sJ@af(p_2q|2tCYl%`RGlI&z& zQw}iYBC#I|P^DVPkSNyERch}+VKoSu(V?}+5JS4AG~K|Zm6D$-sv^EOYUB3$#FqB?&bLjm?MLEUj?NyA6*k2k z&572n^KCaS&Yz37?xVcTaYs|4raqeY#+m4`c+I1;hvzQD9hHgWXA-$JxAnSSK4-Qk zme&w>G)DKv9Xl7090A91lt0ZUa?4QS=-Lvy;*Nu?L@)n=@29ni+^VdlZlRQib2KE& zxr6)>{wL>dp1W=0Hf~xq8k{-5+~A$tpD3t}*2D{1lSVGDFjm;GnA@1x;Jr=vSM{cY zylvJHbJoNywUOuJmPh7)|CXiu^E*1+WfIKIt+`zVmJGulp<+g09>E11r9nzA@}iM^ ziUeLp_;f{3f_$r-&jTD*}^+`Jh+D=J_l%%1S+Smn<6hFx*$Wa+=-?iV8TP z=-)Kp5jYg%u`vZ(jD%}%_qV0nGPo$1jsY34IGkG7m+~@z;h&4b!3Sig9cf^c!7HpM z-^t3Q^&Wr!v@n2xu@3Bx!X3YiS!I+!B-tL!uq!w=0fr$(3kVOx*i8}@gC&Mk;dHOZPc3A!Q zcqqA}1o=sRh%vx2K6{Xs0$S24(#OJ|li-~lfJ#jp3R{n=2_YrQngh_QKs?lKHKcBX zo`cp7J0Ri}K=Y~@ng#u|A+-{w4PWC0{)*vw!zEr50S!Jc8e~w~DS#6q!yAkh%uRV< ztXUIOQk(+Nu55JB&?y7o6#mL80sc-Po_Y`eJV?K96l|P~=BM*M>l#}%HASx&#sp89 z_dovliN|+($lZ@jq;D*3>Fqo!#DHa5d@S?bu@$i7@NW-(r8xa2- zH4hPJ6GoijZWqJ&nXu#}O3G$>X3OJ-!i1}2+2xJ9ygzeQ&g$WDXf0eRZi=3sZ(l6l zK6^Of$ek;XJ2oymYT}NXNZ>=qmfL17x9T%2gWQrkoDRhud20hPk@04c5!dkT)b~mQ8BSq|H_KMKgBxyni6O%f#=rVA3DV zY?B$LOl_&sLSK(c(Au%7ONGu^hqWFp(N>Q5I;K_fXgo}(vQvZ@da(xg#92xPk^Gu4 z8fu;5`)u4& zwrr`4TPi=PsESqZidXE8?Ku&zIGL!ZeErfZmtu807rGWJ_9aS66Gf%r9oMcTO3M>$H z_}c|`XU;$BomSgzJ(nxHO=V~WX{y+|%g05EaT0X0HV~{cb_m|5=vEYDLojTOri6V4 zbDGErZAQAZotm($2>xG;4f$==c&Yw<5k=k$m@V;ZlrZE#TzL2JVAyV z7o?oy9In}mS9irqTjKVXn4v{xXi-8t9{ygii}SaAFVKpNHjhpW4Euee_WJ(yyq2up z`vPwzwbJ`1Ac9QU(}tP-J>}p%p-{gv7vfu(|CiRjsISKD6_Gu0`<9qt%L87NET5`G zN?t&hc@fJ8d5ZjTkwcD|V{(0pbtQ)SnR$?Z*WZDy)v#6LCv< z%)2#i*?NaFSuhbxxb7I$n->humx)I9EX!Gi?X2* zyJJXiMbu1wD*epR#cM+Fzr)irb@8$|oRurmrM8u+{7Q!Vg(6C4tqb3NRY1_{!aeH> zxGM-XCZ>#$HScgiBd+L*X^6XGhQ{1-;Ta&$!fB&mWmkFA=JQzu7Qv>ri$!A+?5u3B zRzH88T8>n$(d~GKzAOO`B1k6LJ|>n8y@R z*gMaD!i1SKlaO>^L7Qspqr=7xhKUj;t?C5ogy!(fV!O|Qv>~lxJB)`aMKJyF%=jf z5>F1F;KkAfdFnU;r?;M%DcRJk*aaw8*@d7smr_NiR(Vi%qsw4BQ+L_!i8LqOj1}?V zhpIRy6G^6;exNKzBLR9 z|47UBq5t$B>G+3z^s74%`9mK0+giFQc_gJ9C&pT`!{p?Lxx0rPQ(XSD<7jYH@QDJ; zC`ilbR-j5Eb0$Me061j}HrVLDK=&DY%@}R?$uOaIo^ly;{WcAcZ7&QA^^b1bB01Ed z(J%4e(d}GJ3VXQvte^$iY~GN_EP4I_t&@IqtY3w5k+dss6B@2U0)4xe1Cx9@+#2^Z zE_+(yo|fqEFLW+?_Ach_n>n&lP&gL~pNJPVEf;Nx7j2o}@D9IN)U{a9J##EkvLR+D z{KyW8*`27Vo9T&JH^mK=iIUo=J6_U^dx@eFXnhj!t9kO4xja%nUwmuRqX|p#vZW$! zsfZYtEVT)9N%+()vo~S)go|$3y$MJ8vZE^QsEX|W(9xK1mV^)9a#lrc^MkkQci(^W z&@E?mv~>RBt%f~mH!wW;@U~-I)Sv2JvN1Djx-M%c#A|G_&^9R; ze6;W5iE%7S^6*z(atKwCI-k}mQ<=*y{LF6!3mYK~$RRf?kHo4x%%`8waOTtQLyY8> z`$d{j`UKoDA&(-XZ4G^-AzwBYrSJE-{*greTjDPT6_F}xFDX7GW5{>;umGFc#98>u zfU3|E9vQ(-WjHU2e@#UkY!(8+5z3PC&j!YVzLD|alqy#$K%PRJH2p4ZC}YC7NglIg zAlEikze}LW^t%#&VU2dzBbQH8gRyz}DQbL7D8P>kCU?P)?aSA`93GB#{mj!kYfV`4 zmMz6`OEFeS>4w*fUnzdI^p4I_kbg&SEzMswI-EJT4Y-=?gxw)8f3AJ*E7u;mW=-ml zcbo41qdtd;)`eR>zhl5}rkq=FcDigCQZ{imOv)x#QAL(ASAcB(|@549?QLxX1cWk^@!TK#vQp!U$)tRX-ZMk=6e~s&dfl4Ru1IG&V zbd~7yw{+LJtEM!33ylWh+w*KhnMpPwK*inDdSK=X0~}*uP19KUtHx>WPaMpl2ktTf z*wDVu{~I1rGc+s1Rnwb*aXP@_BL+pkR(BE}=o&`XPyLlP3pA*dAy;O8wO6M=hC1*} zMpw;k1bhc~de8X45o6P$SN2oUpkCRErj}OEjg1#B$rU@rvjI55K-2_=B5!1^Y8qck z)g&{VKLc=L>4j@=HwGzABZ0T?1UwhyUG%T9j1b%CYi1icB?CQ0GRaSqOft-iF}$;R29KRNRG9T(^6l^uNlu(C79Frd9{Je&kr(#)XmJ=Y7 zP8A%xn{&G74u?apPrfpVXdR6=8t2c%8+U)`*u!|$8$7lHW!sS~FRPDP|CH4-0FUhc ze&~6YkSl(Ogt;s{CNCDrmO{4Tf5c4_^@23wpR>)$R>wbDT-J(DD{a=jGMoIrQ8Pr* z(ImAdQU+`CIYlI~r!7747r6TwF(SouF~j{+x!1V&n`iEFI3?km;wX$6JhE<)8dVQk z)bx9mWEt?@x*3BUhbzO20T$$GY%dmc--RpXTZT846_w%BpnuD#Vp%A6v^960R@|CH z#pl@JV$8N*2L`irt8n%*Q1Php0I)jz=^HFQ5fjig5f}!pgi9WRd
oRVYQC|hX} z;0qhm#8C6FA6sTninbFEj|~L|veJ9HPx7~_XJEdEAOvvQBQ1k^8<;SBIUD0S8y9oDGY1m}`yU;D;rLu*cxcH`tNv;TKe=S6N!ase zdS#sPOFS*mMDwmAl1@`5nJKy-c0LZ^9y`!276XT{?dt!4B%KUuOA~C8z_Sf2ZXKAW ztfUeA<}?q6OHT|JZN8H|{6)EjfjuoFfpvRr4MW;9vY?8!H71P>rilX>!g1dPh!CS* z2KCBYU4MyV38z4xEok2agcYd5h$c+Z7MYH%kG5{Rd^vLkk@68T#VGWfv;x3SVb?rk z64_!%U$zP?d+Xb~mQgsO2;`LTU=D$ILHVo?T6T*niSA8SbA6&YMq z2U|9+O4i@aeA8QrRg9f6m9&oK9Ws@)m@8BD6);mV<>NB?^{Uz6bb@|NrI5zGrZKxI zuk`!4U4%7RJCfA9%nSsnj-U!8{2 zqH9ldJCgaqxNA=gTDFH5fm*>PM^}-ogc1>QjY3<})@kCjGBj~Swe|R0eH=+TTA1cu z1v;4KU**1|>BQhJqsxXCWV@sYBSfkWcD(69p$${-Gk`w;A9uD5TXy3jU=#rykZ}(8 zWCe7s<0^cn2i;?^Dqx`ynFWbcSzmRCYMovTS2Yc=o%T=x45BCNK@+&s%;=>{l{40oBENptAI{cKp&s`%efmZCxhcjE$J zQpn{NhaU}Zj(jO{IdVQ$+_vc2I&&ydzh$|8cf5Z0JNjGo-Lrw&r^9@>K3p2JSH}#; zKY(N4aYhIz+-h)QI0P4TaN2>h6QRuaOsJW?=Rd^7`Wn}Org6=xk<~rqzWyQi)10LM z8>XrM#NOONMQD;G5Aax|lTt?^qfr@NXn zkxYI31A*g=9NtAXooVgLAR8SpKYVu4x=Fr%a!y#Z(q=!^_G=EdGt`iQ5Qri(K$%kj1*-Q>QoW~KoU@{UG`DA{ zc{E`FjVK2ko%y#?E~U^|(M~4>2ibxkp*kQA+Vdp4G_ZYHvON>%2m7OVI5a>RZ)vbj z{V%^AI#GtIvChYcj|Qj_YLwY#V5C#+-m?<}7hwDR{|2S#g6+h%b+Kn6S{-o11ng8-LSDy)2FFGq` zb%_Gcasl>a)FcYZ69r}1nc=R*Mr^lj)`C5yj>2&9E&HZ~r&Psg^R{^1u7%Ec-Cl&O zsoNi`JrFB6NIO4VXqK_zEQ_lcG4_E~4H& zCzhc4b$V-0vbUDZ=L#xjjwXsW!-}|Kw*JxO7cS4)!>5?^y^fR`eX|{Np_siaW;pWB6G&uj0v@CtWHzAEgra5V5bMi@k$#Q;eJij(_X)(WbIe%L` zf7|@PLfc~go)?bJ9GLA+*z@MvXAwiSQ04<|%-Kw4`E^5683DdDv-=i2Af4IIhTeoU zik`MiHGgzAAqFV)xbxcbM3Z8)=O0)ih-s$HC(}A{k^~Mx_Z=O)y|M=f zGa$g12?|N#QE&jE6Z&CCwy@{$@6!D95wy&l7N&<6Oe(6!9ZQI`BXT>r>Q(AIZJxHM zrSbI<#F`b%>;Dds!GRs;$vbz=F`Z_dnlwLPoP7E?C(Ua^k-fBA4UWDhX!>JneyC7= zG`8WCU9pD9WsDtyg$Zj`Hs7{sEBLImUQJuEEysMt`n)x5TaMkg7q8StO!WajNS0R# zp3C$cr`oeJ<{H}t$qIVVlOXO)N6Ey$5#UfJ6Ou9}hz3-CJq5z5AeUZ|-DUMsHHN0V zSz;d#O^i+j#u;kltJG zJp;9SPlw$;PLW)-MHeN6G9(K{pd%HPNAb(J z6|^By-=ykG8IHNrW5y}oxdg|$KwLBZ!u?cytzi0 zi8*K4TogAKg^f$*jghJ4x}9lcP%j`AhPfnVi$u9sXZ32%?|Eaf#sPybcNg6*B2rS`)gJh75nxSU%V z&#jDfL_JHnt%#S_afDyaYxtET=jFp!56=yK=-Bv4Mx*D^=!cHOpIWJ8to`skl#b;! z{08PpM-jF%8FN=G&KO$U9%+bn%-f>V3tJa_3#Vc^olBOkM2>6LOq-jGx%Yy-n6aDI zw(d6W=1ymKjs9J8QFn>)-HjHyuHAtA9~<11mKeJ$tv}vqM*fd$jNL8fA2*sQzs=g? z(f@d7VNb69y<8)%v!ng3po!^dKNXfXn3LN1N4uec4m1S*we-kt8d1oo0nskAMs|;^ zR17o_1umPAm?{CHU{Hv{uNkV+gDB`zoFG)s>4-gThhaAR)3iPV*^?DYBO91}<$%(g zYzAu%RY9&2w1=aAM3l4$R>4O8WkYaWPLURnBRFfkWmm};+?LJfvydCXaRK&G@sSiE z5u9umau~vPp;i{cl8MAaMe0C=+;z*Ry=CnFBugSY&X=lJsq=ZVpsqPmdq2gmq=?J; zjcbbGXR0J@=VoPoz_tJXXMQHlSxE4IRY`3o37+F$(yMoVr&%rh;)o2BmDV3!uyOmKQYd}C!kP%5TkYI{y(3B|8|_hI&?pS z>Yqh~Etpg$Sl54dqA#~2ll&AbLWG&-f(YMK!4sn-m$OJ~!%W4r(>s-`WCq)n9I+L% zdRlBlD$;)OUP!9#kT)egeAcEM&CR}n;pXv)v%`S_h!#1BS;)Q=C5JSX4D|67rWKmX zl{p1%fL^g3%}qIF;}~lb&-btkWg1|K2Kg6udc_VDZOCKV1Ifm;o2)f)8+D7g25~!G zJ%=lV42DgF3B8gH7A*R@7nI+OVW2N*;Zl?wkHfYK`=i{-PO8fk8zvTaA}H>noULdL zf!`T-!8BEj#i0F4;vPzf(GV&%}QB?P6WKf1Co7OU%BEIko(pMd_&-FP>ja~EFUer@~pz1Q|e>Xr%+ezA~!YG&v| zcP;dJdBqq;B6njVw=|JkgdP^;%yy%Ph2DrBK?*`ko}O8RxArWvz2A|}TdtawT+b(^ zjnSRarg>pu`$7Y{*2C8&>Y9IW<&7(mF?6kDJGz#akFFI}#cFmgJn}bteza$yHCA?<_sA zgL^Nh13&&&-&u;&`xV9m_15>R%_#N$X5)cv=J&Ty+4pyBKiI1ONvj^$KiO(MRH*;S zuEIlk`VaDqxR!Fs`5PRu-#_J-)$o8{?I2E^sp0=@ji|nsJA(kMh{9Y}zcrP6frtxh zUN+tc`ZdjOGCk@8ePat8m&PZNVO|w^(VAh}0Mo!t==g}6dH~;aUK6af(ZM#Ta#PS3 znv?)&`Dn{DZF*2!&07H3gvCnwg>f*#XEms35)`9GVl=u8?L%A z9{UyR6-?oEm50gXZoZ1P3C#B6Rb1+h*}Rj?saKHIe+FWfYtU*bjaSpb?GZ037(^<5;rBIzGN z>`1JZEQq)}PLj8jPqt~{G=7ee%P-0QvMVpi#0!HXaEQl<##urm zO+9nQSn>WuVOhBI+POJBZK<00)149i8+#L#)pG~J z?bnVb+~puq-WnLrAAe=?u2El+|I3Q{upVv@4NX6^-muPBy_K`*?zxc@X$pQZJJ2{+ZKmYlv9xZ;F zhe9!D<-PQR_kzS*eXnjmr@v{d>*+AwY%A)iw!Leq!u7kGi+freKh}GC8uEW!l27So zBjs$_guCzMTTtu0Y9n1YQ2u)@Cc5r0_Uy5~_ox|n|CTfEx0&$)3<~_o0_jQ?eA|-+ z2a}xGYBL5K^Q8QfK7QoK#vAn;AsOD&nz!gT&|TPf^fT2DwxFr>CaZL>X%DXGNAsch z%&Ha!Z2<13gT68GDqPX<2LmxXLo>n7ddMwi(!%>*2V}Aps`_5DF*672tSZUWlxUcR zVk*NH%FuaRGZm4QqXTCH zX^vL(g6?mphjP(vLaTDuGqjncuuSzyd%O3yqd2U=CzbqCwbtYr%xJQ*xf#7;lYJku zWv(;nLra^@DND^$Ej1%8HG;%9OMDC5t?125`cZs4!GfBDBr%XXCfnNz^Yznssp2z~ zJWI*%QPPiOt>GFU($MB4&dl1P`{w@cEW4OcLj%;p?!IF&|hP0p;tfiW{5V%|?hqXeGUh@Gw&pXJ>LVMW5!J_#zLTKFSYRJQ zWF%D4XToEdHl4EI&g5Xtd`nsNOcCYbzNy4KuZI~=D6GKKz{Ws{$FlZ_kRUapeVTy6 zbx_>tL5e6QqsFNM>IiLpK%enV&lYU9ZJnw+?i&Unhj8IN7ziEnoh7et`eX$5F#x;D zz6r3!_D&gEP>=a9{F?CUR73Wf^i;ScP7IwRj2{Z3?v%b}>jCk5sPteT>r2{I^fv&@n%4W%CHOhRD9b!Fx`c-R9$RObuabtE?#{ z!bNkDHb2DukcpDAk~yjH^tF6B~%MD83%Si9yxZLwp?h;uGj4 z^AHyQZ|bk%d~j@34p%`dRk8$vV~lXJI2MwXI3Kh>GA@5x>L}G_>QM)CDV7|{oG}%V z_#{2qtYpD;$v-F=2u!eHWjxRicQNMPhy9`Qj7mBogb!(B4(Qv<>{EKOjh56)v@Fy^ z7XFGeO7;UfFqLGb7ymmv_Y#rGpW*_1mAk@th)=kRmt9qH7h=vmy5!o4O)fF_u7$%3 zyJGnVmmG%@1x1M>?{ZOdyr?uGn*C zC%(Qj%>VIT*4&QuNU!bf!BhDBHPva2!fYK*oox?1P?xN9pIyqvcAy7?05B6pP}O3K5=@R>+Y z zx+Z61@oQA^1te?O@cc}4Lvtbi0g8Q}5Y0u@Bt*km+;fw0vvt>cDNPm)BWV(l#q>e*9+QaSW>{hP?7@n2dm>#tRuN9!&FRrCD{Q+ zOV2(@mG(8BsrVNLaPs0>#~l36Gw!k{-lt&r7h{{4D-*V0q`a>j$3P)fCZQJ*rPlaX z*=aAB;$JV5at+X#%M_w8Fe_%Jk5mfy@7p|@n+f?aUS0*<|A^rJGF}CfLm%J~l{6WR zZg}96xh`5be`;>;V!@*zSH<4f9j`bdzc+t+v3TdKGtss;)^_Bj;M|_&g2s43<6=S6 zqN_P(IUPHB8tG>%_Coq#PulmM$YbQc-`82k-K^m$t+RHv>u+x5k!Ik2S`2JKd>a>7 z5J=@C+k(g@y{47}#VLPC{VcL=sy@CusV$FG?0u#N`OOys-INVKD~A5!o(%s2U`r8j zd|rH#`ZC)^$@s!_34lF9<=IpbfqmtH$p*@&9jHJrlqH(+T|5Hlk}TLg5GW9TRi(u2%JvIWq8(V83S<&*#CIzNoLH3>SK{J)%rAiwQe@8G;U*E$?f7%Ae z?67rzSmLw+AgJKPcynm1nZ%@ZBAiY+j?t$p$)e?lROf2Kw?)}_<%|RHQDwxNsIu%A zYLJ~ah`39Nt2vdW=zdg@$z*HDd?~i6$g7AXm41A?RD`CF?SvHP(C{w_$_}7G24(pb z;i<*EMnuocua1DlZ=E^1VzAC03zx#V^Thnod6=C#mkeD9j2A=ryQgCtwonK?M8Lz& z0PKmZzMp(+U`M_FBYisHWy4o0uLsPc0Ud3PS9!~A+#jd;A_KHY$rZx6qf0K1A9- zB5X?qp;GU|dd3CGap1EVF0$zdwQnHy;rE+l?K)|uC_=+IA9n8q&PsZ;AX`Q0tEHm& zKJH_8v;VRX7=rwa54l?zbdlNsrY_W?XG+F_a}%Q%%IkaMR&IZEnCF{n>rWH@gVrk2~{%4*Je3;bXrVqG=5o@s$X2Qa4 z7>})jL@@WVeM8*7VbNX@GgN$P&X#*GNV3UyYdbgTZ*DSnK57uY?ro{>} z{zuBww&5#rGlFvPjeKY%@b=nU$nCrZE@R>(3(X-}2x(BJ90ACd+Vd)sH>^nT$kh7s zt|Bk}B!!@4Ae<*YM!&l#A=hc~DN2Z@$X}@aDqX!u2^lTrZ@OHkt3RjY2bA2P7QCtCN4Q53;{F8CY31C$dyziQ(6{Es>N$6U_G zoR$4|e9SpN=JGz~TtDZ^e!*4!oZIkouILwBU7V}?dk%Mzzxn4}!_T>D6!9_=tkNl2S#7mIlaKRGes=`z;uZ{{knaCKggp9Iz;Cviqd($M?SxE zdX@c7p4M&R^CM^hCz>&wlD#}v>P_Uj6P}XW_Hy2o$i-)CasDJHFKNUXnAlyEG}D!Z zv*#qObY;UQa+6dVA<<3NRVQ8Tb@Ih?Xd9<+PsAM=h_*-0y|EGM0~%Cu(%Xoh>608z z^Ol8zx1FnWne5~9`P@iVlEW!lKi__%d6h1cjc&m20KHsyA*_ld^QxIv1fO_VkoaAcMD|-J>DXN_>Dk>N8Q9$@8QI+=nb_Sdnc3YUS#Z}KwjQ!cHV@UM zKWsnbkQ|4cl9T;5947 z+^zFDtAu9^QbUDMC-@4g1fOfOj*o=YxK{8z=_@>2?<+d%=J|caXPZ#EvD@M+p>ofN zQZp;*M#)mGWQ(s1rOMBGk+;6v|ls1;Kh1J-8cB>Dy`Z{oT zoNYpDJK581?CA!r6&qRVcHgEdLE7Q_gs%&~MQNw6+qc=br9)#KZVdUk24JuJ1?*NgXN^X_J;yM2$bd^D@}(L8Ln{;|AX$R{#FB0{!!y^&Q5OBltUt zzd`&R!yom-;XAIB9`HTR(n`)2pRLCe9l;NSVuSAltL*?A0SBOUtCH?x=?CviKgrS$ zv3BO>_p|iFtPi>Ar&#(CmQJHgvt{xb@6pm3ydlb_zTv6xM8vC; zZT;clsS}|+yG&u;B|rCs5tvVuRq-&Gh*~Q=Z}T(!}^gbYeW@InUZ1?XcPK0#p;- zVe{Y`L+f^UPM#03m}g=noAHKKwg;lo(D=Er%R}LyY>$RMH64l!hv5TrHK1N;ecXQv;1 zj&=@5ySqAv!{9SA;tbo5Pe5Y(|r`776zaAku`pbqHP%&I&{yKpRBR1s!OD$!7p$h@c8a z$uz3>nq|}8iO5KJ^t}!gnAv3Gzow-C_02Oph7t3OOh}%APz1w%e)?S0^JL%ABOaPf zKt_jca2QlcyG)3&3H%340C3@3wc_AKD;|T#{*O6JyR2`I9oCKbx_t0hO*Sv z{-DCja}O_(kM8(tI;axyeOG%_7w zWF{(Gm4rxCc1=p53*m|BD9;p>ZFt30=+YEWplpdwPvUtLCH7q!4l(*7J6UtoVzMK5 ztQcC9U22lrh$1z&ibkVegKUU~#ztg2?-83OiNI6pMi3=jL9|06(3LGiLt#w*(2(rf z%VxbpP3U5o0by5!du2j-?NV9Gn`JHY)v2=0vxYC(m)$i>Ztt6J?|j4e*56pa@Fywv zUZh)>k+XdLo8{~0`@eVi#^F@?Hl#Y1E7vVmZho_J^Fr|Z&%XL>s&Wrf3(~dgW(~J2 zHNUi1EEiQT6*atB)bPzG-zw@{Zth%a?s>DhC+)7de(>7C*t1D@Yoe|9HN(#wHywBN zx|ZTO!>z)G_w_>YhKxm6d>AFt z^LqE@{x1EGZO!`(!jHRb`$Y3k3`P4Y^gk&V5qd3>7n%4-Z=qN_1?ws3q+lxr8xY*k z4ekdi!q5|-V(4u{L*o;{X+Z9f>=+vQ)O27>dEy$v^hr^GOav4OkIh3v!HMCaA!!FH zl1qn%K*^`V!$W~7urBAOr$SLYBZ|z3C7WZZj($==M2}I`&kMJ8I-})%%Nk=F&&cv@ zr;Hk;3@aEBiJZM-l#UQCC>`K}U#CUCPQ_JVUBsFHZaacSwF(|lR`kCH8XcOOIkRoEW=S)I)>Vq&mYWF&1({aYEz86GwGmSR>D~B7RabO5J%LmB@hGY$d`DA-A z6djhr42k9ON5r8ps3D5=f^fQU40nEkmS$mSLzWu?_pY0HdnH9;SXJUp$|FV83Y7*C zAIyG(4<<6k3n^p=?ew$^2gb$%=f*;qc*;x}z}0zF^v$#8bSAgv+~gH6$fr_#__A*R zbSdO%5I}JN-Ioqha0r3dAc2bzR;1?RXui~4yjl%Chfyp_1nr8DsSrvlzhnijL{Q!I zmZgG$jBKEJlL)4UX(H6ifC!tEHbhjT@JlXIaMWbaq zt1vi3ugYI+M)pK@C_~78T}F{;H3E#ETPUxbwWlqP=bcxbbHQ7dmb9xXVX0!nh8%_0NVbGrE(9!7X^irhN)i zor}T!!Wj)b)Cs5cN>|Vh9+%i=cKHRB3vYm^>euZ=O+j$zS!&>i>_yqn5RkEJWsL#Q zLZ9x0S$&?p`-KwWls>BK6r#Ek;XttHM{M!W&@<+7P20?AZ z2~Fn6d57rJsV({+*@mc&w`v4!Q)6Czpcvh;?Vpgw15;5?z(dFfEHu;!(q&ILf|M+v zw|UM(DhJ&k<)kza4S6ntQgIGlDMFQy4j^ys@nyq}mWmoas<}c19z($)(V%;fo?KgMK2N!lFJ0AOS{Zjq@Wc~iMtN8g#S1;WtO;tSh z?b3MPH_Bcri9P=B%Wjk=D;`U^`VyADcP)Ngmc_O3N8BHJD(F>J*;s?L#I28p#5pEWk_H3}+ ztZ*TGz0kC0v*qsLb>KK-hwhR~T&LuZh-BE~a7r|R2IsD=(T33tD-Q&7><=@Tkj z4Mew2Lv%(J{rL(!_;u5P(d>P75mFIq5sa8A5GwXThCG_QZ$fVI@Ui^|TN;6IvpK)Tik! zhbP9S$0J$wC7~7eI-Z}_cYXnF#fL*tb)AL+z+u{h%N{92A~s*N+PXxogufU=0u0M~ z$S}q5goNCa6O_gk8LKtwV`DCc0yOnR9bqS}O&t5GL&!&cVtQ(FdWw%SA5Ezd6E2GZ zS+vTcZ4e@)VFI%xeFDjnpPm^nN>H~-XDQ)n3Z6pXHE~HaKnWy{%HoKu$1uoxbTCT7 z1dX<;5pV|B)DKNf3{6EZNYCPN9DmWP2$(9sRxlf!+nFq=jhEcAHqIBlQ&K*=ciB}L ztAA-~$<>f@HKYrRuA8r!72*BogZ@;-ssAwgTJ)uw`0%$zUm2YrUf7gs+OnvByK>Jz zjV3EjK^!kF%b0}n>f2_0Y4vaQ#-ifeM!{VH0pHe?ur_{h$0k%hAwo{KHN9nR{4iRF z*34Dzaq9o33c*ct(;iFV&Gur%U$+=3blUb*n3_kIMhMd$E5~o%roY4>!&>tb% z6&Ez5$_T6X5=|SbrbLXV`~1dclsp>XBjXnvRNhE80y${r`;elBry-DC2#sAPHsC@? zB5FG}#tHcf{)pD-Fp$g`!e~CKxnKAI_TWMaE5)mRu;h`0K^S^dumfulu;7Xp%lQCC zz(uOFyF{4l0Rk^DusHzy&nB(d4->_XuL?o{AmZ1pCU>b*eN6Q0E(k9ef!o*dK^qmf z>PH|TOPGGhAtF?YfywX`)7-*lrR&%ri(%PtaY72_OF-6L(1wOCO-j9TyE3#x)Gb=& zY!ne<6nBkHK(vbXc4*n(z>A`UG~UDF&j@;t?N`;^*UWQbMlaOW#~=U8(%Js!k6k@> z%duu2I=2mSb@vC14f$L%nkNoHE5^_hnG@uy2ftduU%+1DQHH-{YbY{39s;8mg1KsP zEIb7~M$1_)P-+QrQBO8dA;t&EM#h!O&TJ0qzWeRM4L>-UbnQu4_AEQgV}_T4OV0YFvp(%A zf$&&Xb^V!Z&wOp{s%=)kY%c|875rv@?Cg@gHEC~Mc6ZEAr`(%o_oXYUUN*mIzD**|KM?^{p4^5lHuyAdyHX1d8w|cy8PH#2D}diW96dvmL8P zVX`$|MKyP-O1!Mv5lyFRAVw@ zg1Fo#1VYTf|yVfqbnv<^Pl*@ZpFBY}}&p2x` zdZX2bB~e_Ru_E@ndk(?jhML~!{<*6J8hWGq!zj?H@HuaX@Ex&3_^xfI{vXAi`qdZ4 ze&oD=Vfger4hSH>0BbzwR~Ep#bvYnz&_LW6aIFAw%mRVy>TsCHkwV_*_IUug3P5=* z^B)VSo;nF|`2Z*+{WpM5f?TgOIS=L^+0r)8eo%@m^i2@NUJMf{=AZ{5Z7lR*vDU|S zwU7JB8RK9i7`nuONcy)JYw7a{a^a{n939e%YU@k-60-dZW!;1Tpj4(emO?HiK$;u5 z`kBPK%}Lkhgk|&ZmK|j;SrYbj1SF+Dcex2jNAZy=>KBRW8t? z3~cih*`%&wNc9M&J70OGvN#%x^(0WF2&BuFUaG{)3OT;ZfDQ?&K9Qn9QO4I8b(Jdp129OP>=FZCk;dU(nPtF06mMiZ%4 zIEUhpivAtWVU*g zl2IFPg;5|uSgVI2f?U;Ru6aqz1r1)2;%qRa!30NwZq*4P8Xeah5u{2i7rBJNj8E~( zI>Mc=Y@@?N3be@9(c$A9aO5(rmB-XB5>&_z1w~%=O$H*eReQNC?v$Ce#y#!pNCj$)rgz_?24ze3AviI!*AWDZ&9Fe`l>g+8Ey<-m&24b%yiH8J0k zd0o=HE^S{Mua5^_X-e5!(i^%K`d&39wjN4uI6QkWQP!4pw5K~aEo{7TVc~qTb2p`} zPdeJtjV<#G52CyKmDN9%lV(y?O^!6C2_ zhsBd(y098K4zJ9yGwB#$IR?cy0{_&MF05ZQ*PF;yDmc8E8eu>@B>wFD&GWZS!rFCr zjRt4Iuhw|x_N7aj<{OhGop+5w(fT=KqRb14;OA>Rw<*V6y=g$S%^DKU#-ybw{+Xm@ z%fg?$Y1#Y19UY!Bl5=yrZO=N(>t-v$JX;*$Tt-)RA@TyF8#^(U0U%Uz+3|^lFHaY8 z49lKCfx(|Je9xYb>PB@iF0>FQlJ(@_xauP;RCP^jDD`Q=Cr{h5VsM*hSb~!t*S|+X z7Zr;I)XNMXs(P!S=V@(Cu$A*Pu@=BKL7=8c^OP+xhH8SBJ)ui0Z59s`*&)oS76FB` z;t2AOS+LRz7}r5&iNoR`v63x=Cb_&CKkbm*XV!rS?kbt;VLv)b&_Ic{rN71iltq#JlS53~=k=^6tK1UHQSYP0uo zi7p2R1*O@5p5ZilzW|mu*$*=QXc#YNnm+~=$v4dtNR!q%3);E%ru)0kCL1O)@FZYs zuVdgCT9>XM1}@eWotOq?&mwk0s<0RT8D%6GVHgrNeg){h8DPC|_*ZckRXrAsSfMb}OxYI>5Ty|<0}a?_QA_bftn`@AnzwGny@!7ayi5 zfQqu7RHJZzBxq*WX-yRg>x-(2AQZOZ@@vXOXuTR$trui6Y8$&Chm{Z?dPXzv`t*K- zs!Rn`8$zN_j2QY1Bcdj!83F=9d0!AL44K1VG+=(d!xQbAl#wO-K%LwGprJ(0VQFf_ zJTdKKgL8Hk9?7NTv#f-8b6c)yB(gQGR5~zC!Jrh!5YCwQoj7sq#10P(jiwwxlTBRn zFOm9EHcrA+It6Zl!7i1{@(pktm!y^4LMb?f96YIvvh-Idd4NC|<|%;iZ!2JSOj`=l zmDN}JXKRv%vb3vm$>mA9Ja4<|X7%vdvz9Gaw9TJPRcxI-n06G-)g&EjmmG~rM`Jwv zwqwH`vrt(7I{d_1$RS@P!OUP7AfQ%}@@fk}1#FI}q1@HLAJr+B z&yY3F`AiUxnBbO;Ws$Y^%+d}qOBFMHRyF;MS*;WAwb5Gg*(p?WQYoLK!>azqf*Mv9 zloMi?U0V>U%Wdw$$gYZ0~eIc&jwt3z85 zt9E16wIO{ALnoJNWT#KrJ3)N5)P!f!90lVD-owZVjER4bsP`!TaJ{=*b}FyC?1NKg zRQfVfCEzM(>J8Fg(hoAn$wp~n8v8Buw7m3udC|MMDvcmUW5CO3No9wG8S>F}>3hiZ zG_9$xAp){cAyluyYFd*tl)vMun$^RTY4)k4rFzLym$cNqTU(!K*qN-|mFPK^tUaEt zt$X?6ix(5kI~Mn(YImnAtJ39Fv2E8rovx}*SFMSif3bbJW*vC<^RKiotV`B)rE0p< z&8_LWMoL*OE}aX;b|=f$C))awWqVS^dvEK-`hM|tiQQT7TfNh2gDJm|yCdbP-)N|) zcXy#B$YE}LI<9u}>Sylds)`k*)F{kBk*r|N08115pfSjmaI+(ohoApwuHoaV%ChZw zn6ixQs?rxIV18krM=Vzn=7b_FTaolC9)E_WUUB^^6CAGD3s-k0sydSPj)b9ubCm?4 z>hzwuOQ}4-DOaC24 zYNqhG6ds4Wsz*ieDl>1}t1$&L&LiQ-vryN027s$4C0e2o6KRpoxThn`NQ%8O!FE-} zvO2_6ave4^TQNhc_5$hqsOswkJo^w~jJ?${dn{?GNq9CTEt}pKOx8nU+TltRZcI8h zWpsLn?Y7?Fw7svV2N{RpESxi4J)EdMm~xGc)%IPCl&MG(;;{#N>#r^qHYnqlEI z-kz_>=ECNxJc};73rgiCz<{F$erY@r)x6>YzoUtwT3I+KMZo*yi5DiO)?g~@(;JC* ziI_Tth#Bf-%Z~pIDiJISztLx9ariX*%_nGethjtO^&MD1CZC<<_93@g3y9AHN;pEIW`l0B1Cmb+URMT(%&m>>AceDoZU^%Racpu54MqSsjr;Das1ILWTf}D8w($ zDHb>5*Xc>+R|)&|wDQYMzeY{o(!eqygT3cN!_Puz%hleTl&UNW6RY83NqToutL)oz z+|#lTyF$-<_Jk*(;-+n<$E6A4?Yz+ckY7=lc0*(hsUL~U)85B6s=%;2OjMhkBEeUw zqBSjE0##27F-6=as;JZ{RdKghtK!<->{^d1_$2Qw{Oi3tvm4sPDS)y;MlAt7aOe~#L?PvK@6V;X*Jk4p$&`C3ev(QoP4I{Lr= zN4oy&Zu+%19RIwC^vxZ6DcF)#F%u&!+hL6f!4BL{ZW*r7xk*$Q2?nGfODM@n*sDN{ zOcp?fi~vl^CStI?-$nBoJIokkc!@AEbCFURGyDVU9@{V%9vzBo-XJ^FuF)^)b$VQg z0bws!-xURE6c`~u!1uQm57QbLLc@ks7?ysDT(1)HU4sB>YmWe?_GGLxS=zc(+L0{n znE#W-zEo*%s%ZC>L(3&)b5pTn$?~?P@(s!I4GU{t6I11TQYCw@97$KMNf^q0X@?B! zPB%7R=}%bKB@K1y%BEyxJD#M=E1`2q!>8nll({Bevrv&*w>@pCShCb6EwypuElX3{ zTp9DF%$~HpG*+Ind(w`YB}aYIQ6Jy;mZLT8tc(q$ob~gzg^^UtuKOPzNI4tks}?S# zygfM&Z#i0jX)aIidhE5e$z8|Oj!LM#do&GoqIp}=vK@NyKI}H4`}5!idNMBE-k6CJg(aFH*HK&dZQ3RB2_r8Ql=o}j_b^Y14|lP@yQ|e4@ZE6T=U&Kp)^@Zt@3eqF0z(1e#4*>sW(2ZvDsbsw5C*2{b8r^{U7&?JK8_u> z@W+#WNLd`L$KmKWCCSC-!V}TJ_~cktRVtStO(8Fu{*?AdF%j9s`zRa8eM{A!5*RZ5 zsVqLbLVH@u^XI6a*jOY{;vHJ(!-#;(Dih$Pb^6-r*x3A@w@W)`t!YcqlBFVPslaNe zTJv(nixuCfx}&p{6yMQXtBUU$9nOL~2E^w2ps_D1p8M3bEnl|Y)*lk&#AwMP$z_<;>AP>^38c+*jn zE-KAHhAeouVoiqZ*1&lsm3PdD-LnbK;<;A%@TM!*{`sK~?zn`~LGEPw`G&qi!>~46 zcri(kq}0572tlp{dD?SAT_<>=USJM1I8oC;eHqzSv7HCTCG7hB4`hXWI1V5JO-O?L zFd#4t=R1Bg)TnIb!o+Y)$rHKEpcUh5U(2JV^K2X;@jNW9x%2Vo6Ktn5w%*fjGM-_a<;yQ##(52`H(D1?C0lpB<>+A?9=5Dg>u!jHUh4z5w(>Rb zYvg0QSY@XI-^1|b9U6jqgls0#FAy**`ePitWm^`vNw@Hj@EbGEe86_OS{=W&xU9AB z*V(MScLW5S<@^__i)be5r79&#VD&z&F!uJG6(hZer@tdYUqO*8!uvHIWA9f_-4k%j z^ouY#B@CrpA4ip{7kEfcnEwG0@Uc2lmGD!T@+|B??S5=a_30jm-=5E~-I!}>_v?Me z?XWjy(cuYNA}g*sZA(PN=r^vm!2>3sTr1}4d7utad1?>TAs{K1z$tWm&vbYU=nTGJ zIOHh`%I)K4o+z6yaljKcHc3)d zJ_bTmU&Pk~ZN;7#85tS@T?0eXf|H&CB*yRI=^iDWh!^%CKu(6x3a_#tJo{+B>D0v# zbo@%~yhajrac~Ms0K6dcY7#yjx5WhEX<}Niu$WCs3XhJWQPh9*Y$e~T4oZhIC_6lR zl>=_T&_r_wiWP?_&W`iyuz8K)zro;!?@{nu3f`xHAPwtVKd|qlY&d?*hw#9@zW)8N zry7~oLN3T35hkhT^+$Fw#tT~*u%GPwL=dONNCD<(5@9ELOh>VM!ag3jG^7m6jOF~) z)MRu=XXlKWVu%2l6adO3Cj!}`b`ocs@gB*>aoCx#YeF_Fbx8j=WzSG{BkQSbR=P^K z3R3z+WaVB7xnSrVf!2`nT>3pqucsv(M1--_;GfO4*!GdIRDjZ?3rf@Fjp<6ST+x}X zSaaKMa+7}}Es_wduu>G3i^6DzM(%NTOXtcny?zGu|v zHBDEcBS!iaHhfCbudaO6Q-?K+trK;$YWWt5I$FAQysQFHD*ijck*P;b2JaCK zVaS4tHZg~@%sMXL$69*sZ~X%i)Cso#G|KddA$9I)ODgIf`yT!t0w^s)1xv#%(n4kH)JcJIUJouM-tMuS>l%Rv{0H@eG<*gUFv?JfIKP1 z0-;N<3LQ-MLXCh6$`Xa$F<{5^sN^s*t=4XRPHBJU7^!7kJaFenoWD`MMY z>*J5dFU6lpRBTMSHeET8ZrQNZvMbrL>ot9)IH+4{t?e}oO3Y$rkPi51Byt( z4~TY++XUY}MYCLL>#2|4O3R|SAm{74RN z+R^HRp3X*7Y~t!d$P~eI8#fG*S_kG#={bxowv9400WhJgk{v;w-|JeuA9z4L05f~E z*br-wL_3}$1Q$C|8}mM~x}LXPwasmdok&?c|7>+!Etx$%C#9@aJU$dVm$I(o@h9ej zDJ$*bQZ1I|Xct@LdB8xlk^#e46<~nEcMrYw41V)#b-L}-PS}K-p?)>BvLAHar`xW> zMi#gYQWw}pmW}Y0GWc~s97fpexZ#~`zu}fOpb~6@@xahd+K*9-*E;jvkB%CYE^18d z5#ne8N`%}v0}HI;>++nN9)1?~xc@QeK^N^J=4lUm2afy#%?@9ZEntPGNZ0qFG1>*D zQZf=u5Tcaz*ycxatnu8$)I=8!NMr}!$}Zx^hd4Eg4qey;5>rTJ7;3SitGpLdF(q0M ztRyrJhG&qGVHxsT_)~%=Qtw_7?m|bqX4bmwEPnp%7tY2SQqJ00UAm-nsRaJ_jp>q_ zbV+pvCj&HLFS6SKM_H`mE&IB3X_bnd7HrApor`_R=HA8WWb?j6)BZ#WG#~@m zN?W#uw$gILhHRh-$WxqYScov`3!owq0L!Dn?sTClm9?N8U4c3P6g*wYXFtbxql|nv z9tFgwJ!)+@I)GP)aCLys=GEQ|x~RSxbPStyy~e>A>nG2gz4+96k{^mFdbYi@W5;J2 zcQ^GswZ36S*U>?{@^ljRT7z>OJ`Z|9#xdDGEsfFggaSAs7t#;LB`7KR^uqyn2leTB zsUF3z)4V=`wgXEP3njHz4yVi4!y>k9w*J|r&t00cFPZC-<~nSkj&&zpwM(x4Z|?cl zfxkGAXzxii^d~I+%eCuXzV!7=iI#0kwY!tGyJx%SrjqvRgyGO@egs^i$BVSff*4!} zgu5{WL`CurY`~m6dME-=ie@Fezoh$ zp+wPEY*>H3_Y1u->{aiaHz7zEI_XHB;%#?{%lfz8KC>Zjo&3)<&Qc==Lu#Uc##m}b zOg0{8N2hoZUoni1NqV9Vvu%O(YBpF&#xs&+E{>5xLgBw7it(s+2;c)VL&xm4VgEN+TlOci%76>m-! zZ(bN)+?Xov`P0K!_NVPdvpB-CjB|iC<~YS!dfn(u-T_WlRL3a(X;cno9zXfDI@-&n z^EnGv^L=JRu@AauQn#*_A%A9llis&=tJh7Z1{SUiNkmve}t4=t5 zNzLtn_YnPi@b5G5SmM_S=r~qh)GBq;1?^ntOtJ(O~8c+%yU{x!f zRy)Pp2daJ^0t7K`AL=_sXHEh;ln@S~?VYk^zaoT#O*kj(E}k2lDP|k^b6e>7&_H#5 zsKf3Or&K|OD=8q@ku7wh7%7-!r`jaO1ImU`^jjLi#NulN=jmu}I=p)FGBp*9rmUM_ z-%lXiCBe)nFiV`*#`(@IRR2~A7{}R7F+yXqd31*38p8uCjwBrHH!n4V3++ogi)M8hlfzi}OS>~&Ss!!6_a@3(W(U%a zf+Yv`x75V#oj>{a-HW!@`fqg|e8az7ShiGH_hw;T zy!%#R=lgo0`;fR)-s(E|A@aUe`526X3eyj##s1al<3pVlCFM( z@Vd#lw@d$eN%`J7wr^dA)!Qdno(yTSUSHZ#(H)Me`z%|GffBmKt)TfbNT zuHa|#t!6E5%1%gBXc1-XD8o}n&1$b~*{8&59cH=MCx*x9wZMG2OtTyj8m zCE|ToTu^b7g{s>9XseKdi<$6+i!oEh!c`^8SU@17wwJG+Rm;h%lG9`3s4)ku-g zu>&MBumy8Rrek*boNNpV82Wah+$v-zAElou*UAPCaYyFGHNZT8fC{s96~)Fa`f}jMT%(^?*2NQpxdy2sZK*s(Et8>i@Sf z9}(1@p4chsQD;bD87vQgxgx(MAHMt+#xhv?EIBMgae%aBHvc3$9$w)YK;l+#41XB? z|HJu<2XG8|{K7dL&ji!TG;8X|XBT*R&MagZ7b5G_=a~|Jgrt1V@h}zE4$n-Xk{In$ zQl#w|wUZL5KO{Pnbr8~S7i3mh509@IM|*oy$_8L3J}uZVY2};{By8qI4bhW-t!H9UiF{S-nL}7a`l2#+212?WSfC zzajNeY!opZDhqoF6P#tTUeULjpg8AFKlnxxqLS=50gEcEj=UB)7GTpB0@=h)G>A(3 zD5VRv!H*2Tq!#fyvKj36Em*QeKS{9ezEp7~Ck?zCp- zJ)^O-3@3wCHz&Nkix+T@ee1QY*L$z^#+#Q) zT9YL>oUiQq!7m?-jlKn~qF!*X$IFW<(4BPQ+H_%6y09EAEh(7Yi~Ny+Fq&A{^!{OyM(yp5#IH5KUDW*hdz(H=qkn&| zMR1g6**Z_WD(;x?UnpO&E*3BLEp{aecHgo*mdDne2h`nctH9ZmuNx``aA<0cXa7#) z>$@z7|HxoL`j4D_6$7=xkDGSj$4~0|DDO{q+6HXapBX9F&z!b_8uQP}Deuo}dk)m- z->A_e{zk3sph5qJx9q@f{TsWDh|7iKSPk|%6q*TfjXywIyNIo4^8ed_?_3seC5s37 z89Dg1nrB(o(FU?JjDV}LPEg&^f@4?qU;_F|64)_~g(BO$DlelhgWmuv#9NR-iP3rp zOZD(GQlXGEX$NXS=AIaNSY1#!f-NGOL&OZtg4xFo3X53IU_TMR{AQK)@EO^TB`763 zh11)b@%ZSIU^LC}MKv2p5o$eUi|Uor^$(IP?YFMlR*PCA%Gk8FdePQzKdh}P!^G?< zkkb9Ohvi1k*7KghE#HP;hn0TXBi4v5r+;>}H9iwNHWjn)w;yx*?dtpJupGagkIXS_ zI#Bw)#Or&M-op<=ucDg8$(2gXro{9STg3XXW3vZaRJP|qtkDywpXY(i=s&5d2iTOk zcVfV^2?w&Wv#;5i+O*Apq;&3A4Vw@b%2({V%3VgPzTqUdAyIx+tXEIz?#SNw+EJ{)kfXGO> z^w$(4Rpg)qz$6;^P26M)j&GeLQC}`56L|0>eol~^E-4(iD=rD+Tt9#>7ve)ROm`{k z;S}I?NT-N2TBgP)xj~8OuJZZCphOssPSB5pWN{K_T3~-$lpmS@3_TGq$adBnSV-YD z$Ifcf$S@*`WC!dVpSt$ctZms+nkZ|!W$8*6mCTx!okeha8ouSMiwEARsGKvT+ctdf(2YZX zePpSvH`&&^ICZP7KUJ~s&9?sdr(Q09vHagxCfoXP`c%cfbXj$*@7nn}J?&1K{>qNH z_?6ytUBle|YlnaCu7OR=(+Dg5i5D;5GwN%Le^uKO(}S{k+rDSLVO^;IPC?4u|LubK z#veBSp!tX1uRJ;T`1L2QJ@K_CZxq~e_y4@M1qQaF8XQquOSGz@I<_-a-aNl9RlfeV zUMTham$t2N0tkG)WAOw&o>f%-75nXSp|T3pzMvk;23vEcRd5tvIr_n!TA^~k_`#h5 zp>&`4!CgJ-d%p;o-g4G`$TDJdl)N8Ci4FQ@)4odmO?+R=V!XMrvVWWHbz1}CuXj}L z(>Z=*uITS6{!zV~!mVaX>21N&A2(T0>c`uR6z?%2{U+802`euYp<#?(6kg0{jp-GRaaF)XSdMOw+jm3wuV0#tJxhI5O&u?nlY2+ zKX4I%srgc~J~M~uJQZhlC^Q5@$MM1{&7N>Qr8-7xmRuJVzD4u^eIir+Rs}DVH=L{_ z&Z<>Ept{48TZfu8U$I#i89oa5{id&z9;+>>*MT#g+Yn z5O^?cVdbj8RB8|ION5t(KFg_Pl`H^q-OS%9pQ7R)qad;XGK8&4^mz)x<^&(P2gc+< zKgpLWP!r)KN~u(V3K+g$IparA=b3&S0mZ+mgWOL&mHP#Y+;_o63`REk@&q-CU<=#) zKr!LeYYxKonf6rUi?kRVh{g0NVu{8=vQR+S1W-d`4!2zXonlF%U+|!!uNSYdS1Mne zq;EA`K(0{&2@*j;ncRY_?2BRe?G=beU{Z2SOBKpXEkW1Mqp$eKYj@= zMGq=r_vy1GR`veZjvw$j2^JzoUx6PO-RC0Q?kn^g5f*hp2*}mDsy%w3-nE!{H$u|^ zeW|8-WkbUPZMjL? zJM;SKQCpt9LldkWK=JYVwif+^4zo_J6FN+cXT+YfmMZ-Ad`PV&gS5TRK8lYO(>+If zSvhl4^I#mqvHI$X1nj$djD#v0P53qkAaqO-<{}qt=B1h=bv)X;m3ZToIP;i7_Rjz> z16dwfwFQJ8sjzmUJsns<2N{phIa#D}q?gmy2ec2LZ|uNk)y|pbqk%CHZpaIsf$-Fk zz&Y{(r>_ZM9|LeM_i=#jc5ueffpW|b;yuE+Gv3up(i`FQI6Zov@c&d4WoPt_oAxuY zFodlUB5Kq)dB>e(on2-o8y7nB*kJoG)o5cBdrc|*s%xs}Z$*c!q;ilZMoX=K}v z^3x`QGn=^1l*ky3Mb7vTw?k%5O6G3>$sC!0wYC_(NAyv6W}sz)nbbuMQG7W>DqyP zE(!O}#e<7G6U76!90$@R<>_+IQh9r_ygkveH&xz0>qwW^D+flD(KnDC>B1TqZ<8>$ zcBKlNXN}ADg4yXW?udzB>}A#UEQS})B#IB-avY-Sip#IJUTcl{iIQV;J#Up9Tdu5qx#sIN@$pn;_uQW4>a{WH<>y{}?i-&; zRd*!Z9qDrT5(|avGX;3h@9r6y3RJ|TG1K;**EYUZpD5VBWEn_U2GVvH?Ai42L-s|R z&o%7t7QWS2i16mNy*Au_V(4qb?Wf(gfinG1dkY4N^gk;yBA&2v)G^ z#ra5w`Y%0?Y~Lhwa{*Nmx)Chyxyv{WX4%y|Z-wNRuxwg(mB*~}O|La1T?5!{VRkK< ztCQyHSTqI4*_wu>nzm$3+fvPjWX*$xvJoD^1Lj`$}Ra1!v2S!a4zgEs75r?x^xmlDO<25X`EO{#!Uj$nUdegl5+PhaqLSdI6Zp??C`%ll!X)F%Qo1Sc+nrJ5>XO%NM zXB(OP126YhWiPw23Hom>y!FNVW6ym8#qFP zD{z*gN-A~2c&=9=cA?N`JaN#JUd;hRHq6S{6DzJpcKsSph$F+x6f>&e!#}k=;C0o{ zg{mAP5!?qEL?2aBF_vQg0yz&2E9-{cFc19bnI=SfMI3ANfRpF4#NVqM3=43M$EL^Q zs1CRDC>upoP!1@u>>3MXwEXCMP13$5Wv@*bYTq{_ z6{YWc@L`lxsekG1YtVy~?h(xr?Je@^-=kR>#UHNBZtO0_qLd9}!Wo>=wYPhZN!VMd zI9k1oc5~cbOEI=FXAi|_Pmc5?0@ThdM%+hC@sl6=e*LT%?{P(4icG5tV3adXrHb` z+bfy>8}n?FXgd>g#%KQR%&UyqvzaxC*$=qn!|*Awa2&l77Gyxj4>~4j=U{2s1%(Sf z#@Q?V9kSrR5dA&i0ZIl@ylWD~;&%l5J3_%bf|dPuydyZ@5sKasT)z;i|5>R2g|Oxq zLis-n%}Js8KMQz@^!2|GyuT0{kl`0XOU6_vnqnv8^)EhoPeAnjraH0hHom>nmN_B{ zkBg_o+xh}=SGouv_@jG6+r00Uqdb0)-`_Rbe`Cb6_v_Y))wcx%nFC@eYV~CV+~y7J zwor19eq}b4h{dt)xc2t+shy%(^MiB z#xMZ5&DXQr0>%!%GHp)rus9c`@jNWXrf5J9i+4Oaal;*(PTX|IrWea^J3J`fl@V~8 zN5!};oMgAxg1@0(xA%(!BC?=Cbe~67x-aze`)enE!+vK@>eh+HanyhtjTmkj*z~GA z=|Xq9wDPvSMl_`h@$pjJ-z_N07;y*Ab(d$%6tf8Sf{c}7Hhd^GL%DJ4x5@e&C&hZ5 zV#OTl#x2$#cgKh4yW;0xSqmKib*ducX+_KQ83DHi%VNo^&U+Nid_pW13*+?}0k`>< zg{~Xz_b8fabpw8fX>#51^YhVH#%Y4xnT`UnjFNFvlM&6-6^f12ZQK_5jX(*%?lu`k I52J_wFABgy761SM literal 0 HcmV?d00001 diff --git a/flows/lib/utils.py b/flows/lib/utils.py index ed8f8bd..ef6a75e 100644 --- a/flows/lib/utils.py +++ b/flows/lib/utils.py @@ -18,6 +18,8 @@ import boto3 import requests from botocore.exceptions import ClientError +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry from dateutil import parser from genomehubs import utils as gh_utils @@ -550,13 +552,56 @@ def __call__(self, parser, namespace, values, option_string=None): return EnumAction +def _build_session(retries=3, backoff_factor=1.0, status_forcelist=None): + """Build a requests Session with transport-level retry logic. + + Args: + retries (int): Total number of retries per request. + backoff_factor (float): Backoff factor for exponential delay between retries. + status_forcelist (list): HTTP status codes to trigger a retry. + + Returns: + requests.Session: Configured session with retry adapter. + """ + if status_forcelist is None: + status_forcelist = [429, 500, 502, 503, 504] + retry = Retry( + total=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + allowed_methods=["GET", "POST", "HEAD"], + raise_on_status=False, + ) + adapter = HTTPAdapter(max_retries=retry) + session = requests.Session() + session.mount("https://", adapter) + session.mount("http://", adapter) + return session + + def safe_get(*args, method="GET", timeout=300, **kwargs): + """Make an HTTP request with transport-level retries. + + Retries automatically on 429/5xx status codes and connection errors + with exponential backoff (1s, 2s, 4s). Separate from Prefect task-level + retries which re-run the entire task. + + Args: + *args: Positional arguments passed to requests (typically the URL). + method (str): HTTP method — "GET", "POST", or "HEAD". + timeout (int): Request timeout in seconds. + **kwargs: Additional keyword arguments passed to requests. + + Returns: + requests.Response: The HTTP response object. + """ + session = _build_session() if method == "GET": - return requests.get(*args, timeout=timeout, **kwargs) + return session.get(*args, timeout=timeout, **kwargs) elif method == "POST": - return requests.post(*args, timeout=timeout, **kwargs) + return session.post(*args, timeout=timeout, **kwargs) elif method == "HEAD": - return requests.head(*args, timeout=timeout, **kwargs) + return session.head(*args, timeout=timeout, **kwargs) def find_http_file(http_path: str, filename: str) -> str: diff --git a/flows/prefect.yaml b/flows/prefect.yaml index 07ae523..9f797bd 100644 --- a/flows/prefect.yaml +++ b/flows/prefect.yaml @@ -264,3 +264,142 @@ deployments: schedules: - *daily work_pool: *goat_data_work_pool + + # ----------------------------------------------------------------------- + # Phase 1 updaters — external data fetching (migrated from goat-data) + # ----------------------------------------------------------------------- + + - name: update-vgp-status + # Fetch VGP status list from GitHub YAML tracker + entrypoint: flows/updaters/update_vgp_status.py:update_vgp_status + parameters: + output_path: "/home/ubuntu/tmp/test/status-lists/vgp_status.tsv" + s3_path: s3://goat/resources/status-lists/vgp_status.tsv + min_records: 100 + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-ensembl-metadata-main + # Fetch species metadata from Ensembl main site + entrypoint: flows/updaters/update_ensembl_metadata.py:update_ensembl_metadata + parameters: + output_path: "/home/ubuntu/tmp/test/assembly-data/ensembl_metadata.tsv.gz" + division: ensembl + s3_path: s3://goat/resources/assembly-data/ensembl_metadata.tsv.gz + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-ensembl-metadata-metazoa + entrypoint: flows/updaters/update_ensembl_metadata.py:update_ensembl_metadata + parameters: + output_path: "/home/ubuntu/tmp/test/assembly-data/ensembl_metazoa_metadata.tsv.gz" + division: metazoa + s3_path: s3://goat/resources/assembly-data/ensembl_metazoa_metadata.tsv.gz + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-ensembl-metadata-plants + entrypoint: flows/updaters/update_ensembl_metadata.py:update_ensembl_metadata + parameters: + output_path: "/home/ubuntu/tmp/test/assembly-data/ensembl_plants_metadata.tsv.gz" + division: plants + s3_path: s3://goat/resources/assembly-data/ensembl_plants_metadata.tsv.gz + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-ensembl-metadata-fungi + entrypoint: flows/updaters/update_ensembl_metadata.py:update_ensembl_metadata + parameters: + output_path: "/home/ubuntu/tmp/test/assembly-data/ensembl_fungi_metadata.tsv.gz" + division: fungi + s3_path: s3://goat/resources/assembly-data/ensembl_fungi_metadata.tsv.gz + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-ensembl-metadata-protists + entrypoint: flows/updaters/update_ensembl_metadata.py:update_ensembl_metadata + parameters: + output_path: "/home/ubuntu/tmp/test/assembly-data/ensembl_protists_metadata.tsv.gz" + division: protists + s3_path: s3://goat/resources/assembly-data/ensembl_protists_metadata.tsv.gz + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-ensembl-metadata-rapid + entrypoint: flows/updaters/update_ensembl_metadata.py:update_ensembl_metadata + parameters: + output_path: "/home/ubuntu/tmp/test/assembly-data/ensembl_rapid_metadata.tsv.gz" + division: rapid + s3_path: s3://goat/resources/assembly-data/ensembl_rapid_metadata.tsv.gz + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-ucsc-assemblies + # Fetch UCSC assembly hub accession list + entrypoint: flows/updaters/update_ucsc_assemblies.py:update_ucsc_assemblies + parameters: + output_path: "/home/ubuntu/tmp/test/assembly-data/ucsc_assemblies.tsv" + s3_path: s3://goat/resources/assembly-data/ucsc_assemblies.tsv + min_records: 100 + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-jgi-status + # Fetch JGI 1KFG project data via OAuth API + entrypoint: flows/updaters/update_jgi_status.py:update_jgi_status + parameters: + output_path: "/home/ubuntu/tmp/test/assembly-data/jgi_1kfg_status.tsv" + s3_path: s3://goat/resources/assembly-data/jgi_1kfg_status.tsv + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-sra-data + # Fetch SRA data via NCBI E-utilities + entrypoint: flows/updaters/update_sra_data.py:update_sra_data + parameters: + output_path: "/home/ubuntu/tmp/test/assembly-data/sra_data.tsv.gz" + s3_path: s3://goat/resources/assembly-data/sra_data.tsv.gz + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-blobtoolkit + # Fetch BlobToolKit analysis data via API + entrypoint: flows/updaters/update_blobtoolkit.py:update_blobtoolkit + parameters: + output_path: "/home/ubuntu/tmp/test/assembly-data/btk.tsv.gz" + files_output_path: "/home/ubuntu/tmp/test/assembly-data/btk.files.yaml" + s3_path: s3://goat/resources/assembly-data/btk.tsv.gz + s3_files_path: s3://goat/resources/assembly-data/btk.files.yaml + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-refseq-organelles + # Fetch and parse RefSeq organelle data from NCBI FTP + entrypoint: flows/updaters/update_refseq_organelles.py:update_refseq_organelles + parameters: + output_path: "/home/ubuntu/tmp/test/assembly-data/refseq_organelles.tsv.gz" + s3_path: s3://goat/resources/assembly-data/refseq_organelles.tsv.gz + schedules: + - *weekly + work_pool: *goat_data_work_pool + + - name: update-google-sheets-status + # Fetch project status data from Google Sheets + entrypoint: flows/updaters/update_google_sheets_status.py:update_google_sheets_status + parameters: + output_path: "/home/ubuntu/tmp/test/status-lists/google-sheets" + s3_path: s3://goat/resources/status-lists/google-sheets/ + schedules: + - *weekly + work_pool: *goat_data_work_pool diff --git a/flows/updaters/__pycache__/__init__.cpython-312.pyc b/flows/updaters/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ee2ac0bf7df19ea7b39e8ff373a2056b91b2269b GIT binary patch literal 152 zcmX@j%ge<81kbxKW`O9&AOanHW&w&!XQ*V*Wb|9fP{ah}eFmxdWv(AuoLW?@pImHW zte>2jl$w*OTbx;vs-KcrlBl1SlV4t}Us?dWCG#URE4%P}H8OH&V5TqE85-A#uXL{{1volL~ zZ|pcTkwH1w0ts$00;C+cAamdt#3`I2aWQg4>`^!%xF9YT4iX9%s^?>GLQB(K{i&{3 z)m3kP?CL5aXqRvP={(FK^qZVyO-Y6A7eKg;Y;*Oimlt)Eqq12u2ymj z!;>>6qc&*?$9055IjQ@MyzO5gO`i$VC9S4y3d*ZdsqVOfGJ`j%W#P;gj5>TOeA_=j z2j;M?xTt|cjHRr)N<$45j6?yp<7R8CsXj&pq@X}qO?N^SW_V|&1t`!Ad4Jpjb+#*$ zcSliUiuY?|IY`MEFF^(DtyHo=q4P%%RsPF-Z~|h$95cfeiFvcpbZL#;I5h1U@_CGA zYsz)_47Cr*RXXo*!KE06VS?lm6)onqfq~c%D0;j~Tn zQp*!ggHAB!vl@A0N6SjWC4tvX#|3o>_YWEU3XaOp`U-2gB+C{C(?~nsv<&uc{#C2h zs3VRE<#4_`0cIqjJSf*pVlI;rue)obKpr4Z$X?^Z3JQ6_UK+9Rht4mz(U02tH| ziu5LPJP~Q|xJHzproxb6i3$_*7H!J+h&tO{ujH7_y7_-OheWsfV>{{rv4eiH&V=d z;a&|sJ`3F<`uhOVkN=`oe%A_X#bXb&;VmVnpU0a$&wVt%q;2jSTHiOkRN5FES}Hxr zKflq__er=Mu3Y~1#!|T6GrpD||L0L@?_UUm&ZF-AYcHO@`~F>PZSd9g?(zG*Yx#5k zZmDwb3GWAUpB)>+=pHVN9Z>HTd&UOTdjmSi7cI#IS=(WTFJR?15<9FBV3Cm&$|{%l z1qv(3`iH7lveik+53sVD>Q15ng=&CjcUU1ahJbCdQ)kfCJqU%G=*V^hHPEsxzZ~brMxnQ>w=4`WhAFn(4MEsWZPpl7u5IE>n|3!Nbrr#S8(Ov`p7Er(Bwr z$?K3eE->hn58G`=IaIqZPI|tGi@HK2d1Shz-tsJ&TUoI^%^6_6M&636QD!!s7t91g z+6A~v4RwdQc1+J?7_7Gg1frR6fWa70RXdc#lLTt)e4M4wfpg|AL@{Fm2b%AJZ^HoS z1TdL2<(UmydDgyMjP0Kweq1xA3)as{1rqPc|4Zh4=JWs9>D~ANQg5NY+nypZ7~$&0 ziWU_eco!~A&!UkEendKN!hbC4fZ{cUU==-50dZ8^4NfLYFp(Yy5-J|fa+y+b09YSL zQI8~mL@z$Yh}?&dUxjWFJwAXs_kBA1)!_%Fp7%Q6rTOF9XmfZX;a#WFm2LcSFLksA_a?73L&8+ZIQAVrAnpMWIawcY7grJ~8IoIps3OHf(~!G$;blNd(Y-8VDu&HLV) z_q{#n>dGP**FXEid6q)x7dc6W8Vd&)guBQ@44KLeWmGlQQ)pFZs%e-yND9-IwNh%M zXs!@OYq%*K*AWioxb8Fjfqw-zd?tvCn~fR~l$V2C-EjqF7H?47RuX^kHQ>?UId}*! zaH*K;9GX=;1xs0*Q)e|##b~huw!>lSW3fF(1E{K=L1Xcbr&X1h-8r|rkns}}nR5df6F=^D5I-1a4M|Gvt zL52ZXcFOlWY6~oUELLcHGKg2hj0)z^n-miqWdyPZDI%PTayC23mU$(MVYHomMCYH%`>W)o><9nEJEEUV4y*y)jovbxnTEVx@EPIx`2&>Utavi=x zYiHytU3a+PQrz6!%x34QXfiJb0BWLwBS(NyPO)E4?$uvfg%~+No+l2cvCVwb6V4i) zX3S?5{NACq9i>EqtP{rtv)^TAOG=Rce@w8JifU)0KhC7%Ez4vcRaLoM4$@)ll!JbE z1jdR|#y3vSfDeESHnHRXnpg0(Qb&*xzFW3^MwzUU;1xOLVRCsn0>FSBC@>n#@kF4* zynF>p$BIuZ2w`oJ>J?Lx?yOd&*MB$;)0eXW8H^|9Ljv)pLK(1@8&CD(zM3-Tl)awgchvvn98aame-WVC61aJnREiWie?zXe^eBu~qE^0+uH;l7Shp zd_hh0hQfIjCESELlAU1wko$>y1RW*O>P44(f#dpOI@w(qO-=-BZ3$3Z1r3ar)f~L5 z+7K#fx@*!;hy|gB!Pylq`m)q7$!K54A|V3>T)(v%GXMIkaZyEToBKufd-fZ zgE!zm6r_QsLp?I`GxRFCXJEr#VjJp2Z{ zw$O_K)YeqwMa>Gw@!@rfsZ*SZi-03;j%3S?^^>ObHJ_!3M)Ro_w-p!wSmd8)> z_#!h}{+t|(J?@a9Vp7=A33d8#u)LGcNascN;Z*8l+TG@tikzA|QM(2jw8$U5HV`ylJ!ghP z%CeNWkB?r7_j&G_bI(2ZoO928Qdh?lNDKe`IQgH43Hdi{Sjk(dJoyJ3A@38Bh|CnB zX30>-XxpgGDDBh^rEQ9xbWn$YF!m|uq>H*HIm%7CsoUsdr}#+^^-OxHce0MwP5P+M z=yy!{C+lhbWCLxOY^03>IYfGh=}MO>{EIp{x8Yg;wZ4gG^w zt5%9U^apEgt0sD&f7oi(O3@4buhiOBO{^Ps(heBgk0u}V??;m#`uC%$9{TsAsR8=; zqp1=4_oJx^XgVx5M?1xqXp`6)?KjTs zq5{AkjSA78lQst|5N37btW%Z6(wB@`hs7gj7*POdfE8Os2H<+0f$NQOJ;a*TO}dFV zDE0zui!~>D>?9{1!#Ot@3yvO-_KC;O&JrRI5F+-47_lGVUlIdByLcjKqy5pk9uoDz z&vS|~Ns4--{UN(Ja0+A-6AeU%Ie88jo$1|#uo==S>Zza`aHuDu8*218e z&*idIQ{qBpTv$*t*|Z|WWlf%!Rj5=Zs6pM4$Y&Okx+|?{av>}0&Rj~)Xt1&z&1y*% zY7Q#rl5yR0O97noRMr&rZ-Iecos*=DoK_@B=OrnfjptKX_e#>sLmnXGER59=rh9IT zhiAs$3Xjf=i+YPBshX@MW0I_CG&!FKY9xua!NQ*)TlI^fh^kOE6jM)*gks5gC8Z3h zNlgji;zPK-YABzB1_GHCNio?dArBBNm|LQLKxSmfQ}rdt{)GI7ef5#IX?eWlYg@il zs%u%kSn{_oUoIJajYobmMSHDTh47-L<<5pe2`v|_;5TMFsBIL2wn`R>1lW1hZIG0X&epFdpk*1?t2>=ytExgw>r?iDhYA=PWgX zJzAb-Vih$Ehky_#{LpAW8HW6Q5@F`B*GS>raFsqo@hgNxNEmI;o+L7&%~J9i+W|13 zG^;T717O%)Fzf*^j$JU0IhRGs0npAGv}JBG&P&X-hG$mHSy*bNvEr2jSL_-l)z!ET z_%^o-hKsmffOFiwFz(tKGB3ut{qg)CaPC_Eh1U8aQnEjd|Bu!hu~}YY99)3~e)gr= z@rX0z=p_M<{26qOzpGI>rU^6R4Iz?e1vQyjNGVkVZy60?QoJ@TnCCOd z^JB?GLO~l-v-mg>{SySPI#Ma0+RK zjSz0hsl1{N3c$1JY(^213FL=GDKILT0T5h=-uOHlbfULeE(myCao7T2j^SJB-rc;> zohEN|&o18RoP|;6z_XAuNj0r=d73g1Oa~mMD{~YSk8VeQ)DB2!4>SXAdK?PchePh_ zuIMc83LU~Byp7NhX6T~QmmyQ#kl{WUi5`QN?w1r(Qo-3nDW$JL@9PMh04ZK3FEyw|z52;&U{`_gd7^Jwo^pI_?#`YS7*fAut%{7ofq%UX2v_llnLMfSYm zd%|wxg_FLD*|R(N6&l^I@S;BhmlYRSYzpqC-h`DUDXygCB}oOto{6hEpUN&sIS_4d z|M_fQ%jGpGo}|y%ceH4RegBnV-)q)ALePL`Y;*WQBUO6rbF9@9M(Y9pK`w z0xl1MfRG>mENrlkyVurW^~37w=BlBTuwc2?%aUO4)0! zH3+;Yb1-`l?63Cyv%+-<%;0Ra&Aa9axEzQD%_|hGy6Io2#y2!26XXjG22aq?TT~aE zrX!Y|1rdi>l1odb6+l^NAAc9_@fdqC94eVodC3GVI0uD=tR@ufa2uqk8+GP3#gl{c zGs(Rtl=d%ocOeYo!_Lcv?5 z8x|Po(Cfw~ZcK<1*QTXN_zJ{Ls?ADFYkKr5gi~x;RcDQO6?-$A%;*k7>FW-|p`oZ^ z^?LBeAlwCBoDq4+!vqXQ=sbj+G7yUa&roO4BhvXB=mw2bnxzmzTgoZ)5F!8 z_yT^zaiQn%8+Pz_bWcTS(0!>k%wQaNw7D!qdC>0R-4)_naG5nDb#9uyEyc2V2>&q& z`Uil{aEo+1^g}>ULbfC%z^$SwG2NbOZu&p1hU8Y`NS55wcVI#qe(GJ=vt{zMlXwoT zaa;V+W(L43TZu9eHVsrClj?^E1=^Kfi z;R8<_3Gagsun*i_>&<_y{ms%}E^QwEIR2pT!YBC>S9jNQ$Fn+if8yT6FQy)F!q;}v zKgK+CcYVn^|8(NV6Dx}k*fwC+%Ei?)s{?D%je(7>BLCV0_H@b9TJ#((vPZw}g3o;J z?*HcNMx6V3--T~r{^#zlZ&dV={?T{tV#xk0e+TC04qa-n|GGuEL>#|9#X;u*cR|mm4g94et^h;PxkWcACZ|j(a;YOmovy9huSy)KID@N6%K4Xh3x)Igkw^&pY%8)U2@15B+!GRoa3NG|C4^}=SsCgsY0A{>7lEFtA%k(2F01T`zr@RaH zOUk~ZGogZDJ_=Yt`=Aw02quznwO&->1^*rhoe37|D;M}Gc)(t0%26eO!3VHD**uLY z!DPJ9@?0lc%0l~d-4=u46r)2<6Oh5h&0#)N!NZe@Q+MU$C465Bz?&+<&0sbKna-u< zcNAcqYD55Z2fTeK)UZi(633nJE<=;BGdP2VEWjBj+Wa1B4c!h@SZdBt6-Wh@ama&a zSTkR{R1SJ@n=zR>rtN^Y75W~4{uTVxO~^pm^bqf%HBZ%A6%U6tJsa;8+s|$L-Y9Z! z?0~AD`tj7NXFdKIJMbmnzSjRy@PpuX$BAuz0L*CTE1M(Ronu>U`|{+!yL>yX0~;5% zT8CFWrFP+??hm>*?Az^w#Y00IZ*Tn3M!MMg`if_VYh5io;6h)rzLIbF(Xo@8vzwD2 z%b$2Yey{k_)zY!oN`aRjyVyp)%#-fpTlH`J&d)iVn$~4 zby0>lQR9h}qDi1A9~9Lz#Q=ovuYu;ZWJ)z97u`+6aG4RSE^KyVhF4!IV1@?Dh+WYM ztQlg=Vjz%;*v7CE#YhA_37PINZUDL+ynj>dQPOo5^@AaXrhm#=uCdMY51en%p8*cg$3*P_gHg6K4D$uyzaSpt-~BJ7}xe z7J2ijlVgO{_)`K!`4Dk5KV;iMV9PefJyZr`3+=;O>|tz&D(#LfwgcP2O1pE5?ZozQ zrESd@D(xd%>=7IvDG#+b*p}mbS9v&^RP39+xUE@X_(HA=}7J} zD2qq0mg`*YwjyaMHy>l(WY)&Y1j~(UX88&G)M)?iI^*=)mOYOfiOsnpuZK6swwN0WOQ)+5^>^jEOml~SOb|@eD8p;kRVGwFK0zR&j zf9i%BV&KX)?7Pko*1J;JwzofO>|X7D)cl8~=C)E>FDQTp{;|VV?_9q8xSn`hf8%ag w9oTkvue&x56}eYR+#63EcCT}J>@i0iz{9`?@%58`adVpuln)cO!;qc-0+qDOasU7T literal 0 HcmV?d00001 diff --git a/flows/updaters/__pycache__/update_google_sheets_status.cpython-312.pyc b/flows/updaters/__pycache__/update_google_sheets_status.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17b5123c73b9517dabb38921d600ce341352dfdc GIT binary patch literal 20275 zcmdUXZEzb$me>q1`2HfmPmvTg5~?O4dorRrdoj zow_Y=DwX$oFaStFveuhQ>PBKuPxtGdH{Gw_d;Q*P{0EE0NWryt^O5({=PBx!_@G?o zbY}Z+)D(4x;;1o-qdAq29;3-!HKrnW^_ZI6nK6dkHDg+G*Ny4nuJ##bjAO&@mTRp$ymuu=~yY0X~)Vq z-Pj&ZKUU5e_zKPleVh8a7A3OO+ESc<_RO` z{H8{M9BbhZanD_^rzoL;YnY_DMu2R3Tg5fMtr}~EJ{|B^$lDYQ_*TAvw@xx#%LghM zr?EDGXyIDD6x`eSHU;wg^zYL&HLsdiIorY|0DP7Yd8XK)5SZpYAyy2zLt&BS+#xqR zAp~aF{y<>T$FsvzJRcI9jmE2d(C7B>A{(0GSsy>?_RO@pf#o7u}d@s{7p{@2Ctux|J!?<|0`Bg5BNub<;@u)%=WAM*Mq*-(J} zdfO4@tBownia~*QbLsY>DR+pS2>5(~Ir5$T0rv=e;}k#RX4{$%LzOV&4mBIwn%Ulw zp^NNgpW7c|`+0w028PNT;n_YhULRlwIC`dVMAB2KeA%Z+MT1VYWaMYOAs0W(`$G~l0Rtj2A-8zl zsg+EZ1_oVM`+A11_6|#?q0y1cqa&`%XGYFRy5aV8hT|vPVP8lx2Hk?lyWGO0DCq_M zwXj#EaEa}a-=7MFf?`)o3m5Q+%`$y8 zdjc~pxs%qyweT%%m(TRP*wi{Z96EHhb)@}LFferK$mrR@OYRZZm1C1v50AVwKRVXa zIyZUowHI7RT%L2sB9VcCqx_Vc6UNWIcyv5?Jv2Qs+vb`Nxh|Y@Pq*K2@vSYv@c5|! z8PpRYaW?Y)Z;a2#Xj7lxHF|aYrOTm-`RVR3e`U%a>70A%T&QX2+_f9CSLbJsg$8GP zMlbh_ym+jyFLGn(Vn^$hp3$izQrSEUhQO)!|r8ax#^qj#jg+nqjJ%3Y94JA)Fg>O_wRT?0vdCn^$rZsv1gA)6`4Utp{_|4fRXZ z9PPA3PWJ}p{KyHAorlMLUSQDN4w=NdMZ(Td%E&|lpuNCf&kF7tzS*gcXu_e1rlS$# z!0=Gh(W8ftHML4wpBuzL>!&y1_3zV?4tO;Wi)K)w4NgWfa3G|B>+yo5m8nqDaH07i zFX>(j2T*=QD$crreR3#AINlTBc$P>G6b$Sa*6P8Y?g3ZGeIwv^c{$0l<8kDDm8A0o zeBl|tDCxX@B2Fb;nnyeB0#b-nAh^B2FrmM+jKntn5OQIy{6S!kYsy+JB- z1%kXkQ=OIK!clnTF?{!bfn<^TOi$U{76-O$r7P;#@Vd>pcy5z1Z!mik%-(fo-$R3C zvHKUcvL$s&Wik|RmQ-z&)Fn#lHcFZjB~7cIWJ%jn|CYV*PQ~qttpdj#({0mAf3l!v zv%G4vw0f(ga;vQ3uJtYJ=O(Sgy43y1N?9DKVya+Is*=(h7b9ENvUtUjwYfEKe9!5m z^-NrU<{_i~-GMg-;-v>y_r*(&J?K~*SZB^{7FFE2bopytCm8dBjf z6nio(5gw3L5VrZZkhy~kJhv9g@Z#uDT92ftd`yrq)-*F+cv2q>r{T1~fOFyUFi%bg zj7N3PpuiEPlON9b7+i+g%6JppO`I9nnChM-myHcmPHW^uT6<;ZrhweUASkj;0Y7M7 z9^U5@**R}$iXC(hHiDiN2(tW4Ff`8!NR6;w#pa0S)mOylYDR!#Xas(ezabS9o&-9n z7goA^7I=bt+{a6X468aTsog%`r#Qmzt0nams!#44UQyT&B?1nyfNWZ*!US2eQzPjF zS;dxA{-C7xyZx}@0Uf_vXa#`xX)>iEcF78zkV*wE&@Fkv?Q?m2yxSiR3g@8sMfel_ zkStOU4W>8CZz24$SqqjotA3Z5+qDcrA+HHw^4IIiXbJ^g!s(u zbkEQ(520qHBjw7V9;JW)^^xO)xN`t820Ixd0$5qVh+QmjJzns*MZS>@gOVXmwl8WGUm0ePpB5m?AdYB3}TJ($ywu7rNb?RSA97iWuuj>g)fr!F;pu%@=OIuspib9pjc> zNLm`6TsC@h^jj~a43tLq&GRWUWwtI32m|nj9c+k!0-TR+q*E{}(V%zLfZjz%X<$iE zw)>i;Mf#TG>1-(qIv?sIx9UI#!VyVQ!iQk zo^fwhT#nhU++(EFHp-2|I<6+bZTxkjJ0v;eaLa^0= zy)ewQvdNX~BJ6uWAR?^1?{%ul)Cs+Svd{;KQ$-dxh;86ExFXo}65r$UfTR!cuFPk+ z(w`yV+|9tBcnFe3YMWsSv>?Qcl-agCerNjj^v5Q4tEBAC#O;Zt?j`Z}`coEwNLh2{ zC3o#*=H)ryFgf$0imHCAc~NEN1?HC{Pf;yEe@De> zCNp|1X!uAwd9>~*tp5r5onO|c7W&lX^r?AbpBhf5^hwrs_vJt^42mU~MFKFzIQ5OJ zOhPVT=>ji;_6DM27FKuwtS16cj2BE9AY;;V3EHY;$TXIU^EQ^OI~xngWP~wDq!L%& z7Q#QkN}Fp}J2#X%wYw-#xPq{Z7q*j<+86Ku0BR7j#`g;52!L)v08$jMOS(XiWJE30 z$?Bafl!{%lwE@-@SqU=53sV3#4}apnhGdcYrLBKSy=k&Ofl@V8!8sb(QfU<^L$ALv z^w3=R=B1mLzBRPiw`nbi+iMfnx(#bX!rHKDwl8(Y9rba8Gv3f0*Y|9zRW|KnKQunn zx!C=?XWuvr^+gSFQ$vDje3+;4a!cHBAYRiE*B{QdbsoyLt%;jz5=<>o$Yg$&yKYeZ zBK+(RAOlj42KuCdmQ*CCh8$2QlxZNRB{?1B^dx7<EpjH-WHk3nrpuFpUhfRzYwbGbdT?MSliF0d;gu$X+X_kkA{q+v28!pnfalO|#k zm~V!%C8InHK*~W8XAvk_CgVM@bh)sHcRj|u1tjBd!=D&~gh)?w;gWN?E3U7KnKn%h z$Uh&~?~7H+`NMJjo|PjcA1|zq>uX{&a>enuo{cpl1lT+XV$dys{Q?8d zFNlT0FBQXTLU_oIkumJLPyitLM%8HHj;dugdh2*vXsaU!dx*3nrsJT(Rnd|uPg9m( z8iYCMM+jpQghaCKrXwV}44K(>k)(k7vEai5DO)BFS)a-ZWr2q((6X}BqE{A;M2jK{ zOk{jAy_6_|I!#cDlwJg=e+GZzFeI=UMJc^_Zrey%OP1(OlXH5u6E0YOR zZ7do$wI`Skpsq}vFJY=!nM;`JR<&_cbAoBvZ12icPTcHTDNUGwOT|qs3Fgq|$v&kn zZrz6!{RvaU>eu3?t_1V^X4e^|c4;PIs*LFprh}_{opWK5ffVRyd6D4gMHKY`D`L**EQK_r7{MCJ|`n2FOl?qHees{s6HH}<5%cE*gE9uCQvtgeP zWt$`XBLKROfF4K|sZR|1V+U8;*NjQS^NZbEhCM3>l7_uIa|dE=Nkbi$RjmlI&ZMCM z^H3U_OBz}*zb~eW^(PIDJIizC-6Z=qh`(sZeiEk$r*!+TKr73g_^hbn=zAI40q!&N z*%y@E2Tg@ClRmA7es^8jNuD$wd~(xG9(G2cN(?GUDq_)S$AF2 z#k&gNQNcVfeCgxxXq;1s_Lc`uIfKhWr_lmRFhH4e4to{zmza(-<_RoK*o0&N4!Oz?*AZ-ZfiV?~n0h0paB;c3{ z7cjYq$t6q%A(0J9!Vo^6!~~WPC7gyt(tuepED8rO&maO4-HMJ1aBK_nXTgo37Wts0 zAs#wsq3pLPM>yBzi)RK$T>X86Lznto!vkY|;QG8M;6Mmk3eMvM}ikbp;-zY!8H z<1>rT8Zc#fgG5Uw+fCstDfI_Jez&AX|FCwL#~1)f3l3s*aj1Q~|9!RakFkpmOb81@ zevr}3s$3Ho>EPnPo$dmsiytBth)do@-08v!0RJ`oi7J>z;Aghd^|H46J?pm9I~@gE zU?Z!zS@E5#cV52za;)sVz3=W#7Ps8*_()tgTv+UWs5fR!Xq5?lWl~?2HI)soR3|Mt z&(Cg7Gs)&OE8Ng&kZYbbm_S>cyLn;5yf6o>wY_)b-6N~ce|7bTFa6-9fBAB< z`pkoCiRudp^TmYW5}0_4O4H5-+19&seI>Md7;U|+TPEw0@U70p!N)Cj84laU{C(oE z;1_1TN8i(Y09fM%yXXJ6d{)cp6h86H{1Uh<@xvko!}ZMV%+Jv7x8t)G&I*{>vYdH@fDt8IEUx@sg)#zIhO+0@ ze%Mf>6s#m8@RZpBn7v;HlRK)@$iR19M5YzqtIASF2$?&%cF44Sk&YahrtHo01wYki zW1gJ5<=Xp1Mn{D<-l`Vg^SqUIaOS-$EpwWbnIPv*^hde^cQ{-1m~f?IC(M zSfrT#o-?kqP8$4jZpk9=@5S&83PXuLCeeq%q7;J{OC}zT{t%<(ni&UsF7KZVO+gkU z@q38ZD#Bsf6J);PI8zgaa_)e}6V& zdDVnMAlzLS2jZO5?}%1V<=c)Cc8H-=X%P+^%FHHg2Rk%JPt!}D30eVAqTp~vXZXXO~EVrLhC7H1{d)Q$EGGi4pLcLCgef8DK+Dh?#j8L~yz1!DbH*T27RV zz5u#sA@*043}n0F!Vp7a+Lpk11rPS7T)5T5S=-h_noGejr@1BsR3y{0t2D*CBE(_F zGuEVhR`90>3PRs`3=3Hkf=sWWjF}-PSD;|SF>kAL6koNM0vD-zIuBJVID^vh4n@5J zqp_e_&_=Z!v9T`bAV(~s3;K{<=^e`SC}QqWMh%df-q9#9G{nE> ztOH$CAI-%5EEq$$VI^0-5W#%gc(Cc=d%#7eC_f9Ps4<^%88yL{+mdS_2xU~ss8PW$ z%0zXjI^}$cjl7{CA2ht#p8IXL?t;R_kWfh6G?NR$8ed&7Kh~ckYKD+ZMtLvL%2)mN z!klln-e84q>mw%2&TcaU~1pubY?YTa28Wqcg#nrEWJcbXjwQn5Mi| zN0CLZH+U7jeiApYMBvNO_Yj8 z=$OQ_2AMwOFs-y>CDRFZls1HdlaK@wH)bvJxZWGlVh?8y(w1X9htQm9o@E6J61@7N`+kLM!r{Z~nla?Y1=UQeit1;1+$4Yz>_1w%HCA$}WJu)&`d_QP=k z?Bi1;zZ~<8TC(l>EXk7b#}YPXf9$hB5ygcm>B7hXeWGmq`7`YJ-(d1@A&HnSXHCix z3tKM_3qePsoe@jgf6sHx5b`bymWYaFBdRX;3k`{i3!_21i#>a0;9_4dIp+{5+R>_c zf<(zm^c=A&4LXa6V0xkm?oDjYk}4od3^^~rOv8DOtk9AS zeK$OO5Z%Ybu_tLfzJSO(t-^i$>K|c(#v$kzPlLE1Ko#VM2 z7*^XJlc1m2>~Fq)^YuJxu~aayb)fdo)azGA6V_{>9-~7M)MJ}{v47K1zTv1&II4f< zs0HUCz0|(xD2|sLOgQS7bemTDa!bN`0Mx_cvX#R4-b0B!t;yoHCF7RfvJ6qJE5~A& z)}C9dyniKbJ+-bsol+SM`?u_hec%54QukKro;!25=k7#rM`P#LI@g@{Ps9%mBumdP zo!c~9-yFC(usjv(UDGTLtecN)?E@44cjuP+m-`du$}O{fxqEqT*}EcqzdAPl#|LA3 zR%_#qLq9dQLcDBYU5cg5g^Po~`rHJ;v!786ygJijVRd-5FYahtH@9zbDR5EeE){I^4BQ?^!bHY z4pZd6fk4dOjUaX4Kv^T%a5(cyh-l#86kBY4?KgIvQ$q>zj_DN2V5bg^OF zujDDu!62f?4HoH?!CXX(yEtfydbga-fd(({zk_n&FERNmOwf%X`~@ce9uqXbklh*t z@(}^}H&6h_@yIx%5EcF-DUeYoQq_R3m1Ub2>SVbl%QS~9)8s%x`6!0)bAWjsMcEV) zWvL<}fj)tg5~a6G?^NEdT$zqPcR6Xl0&z+zo#yD)k&#Vv353{n8kcnOg0_Ud{c{LB zw#Ex;VlTv-h7z@x<26^}SFXjaFRbfd{M<-^5hLBzrrr@R>Vvi<>Pf57IJXX-dSFZ* zyntO*C-gNS!XOjtk2ju5)SQlkj5ezP zad=&SWb;TT`JSKY+kc(X0Kl(5FG|BY32gJKxXQ<^9qanTn+HJ-1D-$A*AQ6euRkwH z!&Va5{jus8AbxONU%y#ifrx$E_R!>590cj5(o}vS_QR-upY738KdxnaTB)DZvAyNg zhk8R#v-(4OVUJV&p;LqTRzq*8`opfmUWfW4hX(Q>>-uQ*N7aSB$JHMl*FYX@kUvBk zfXGu1oFRbSFho4J%O3XJZ86F#A_b4YMCWdcv!N9Ej*37C1i<5^++mY>BkhOD$Uqqa zQ;?ErcoDp)`67hj1b_lT->S>{_w)Jbr%lJrAZ2H11}Z z10182J*$L2pyKM(2ol_cUqB*hCder-u$CYl5ME#s_?ZBNo#TUuY7pTDGBdEG&*%cJ z!25*%2DK9~FXAF(K&91Ej)7q0;dn)dhzEyg+@~z64cda?5BU|%!q6F zTwmYFuxntjx9>$X2(#V^&=o^yj+phr4+4M|sJt`H`4v=$$adQ8V~5*`IR<%-yzb$& zE!jF}o93)GwjTEHV9B}U^?PT+GtepoeWaHn_JMA-AecPJdmpo#j0Xa~E_MXYva|53 z5n`D5;N%xYibnnTGXZRR#Bf{z6x?!pa-hc*xGojv{b=GMBnn>mD3H%!AZ=2@@lXbW zSTV%wUfE!?Ytm(fCsMwHGP>9a9u97S0`B&+k%FCH+Qmj{92!O z{9Jtw05$TjJ;2#wxmSq!anIu+Y^O~?o3wBU6Vy8;{fzrM58NM4G6Ugx!R#jdAyy`^ zQVrfwNt^b*5|M+tqGY}b#x6K;P0kh*u_2&ZAv+s&va^vGY)~i({~eoX2&B*HB;;LjnS;jsh8CS~t~@qq;3?rqzY`o)byy$+-UH z7I-5rym4XK6ytu%G(6N5#C2sDsSPpPcLr|{#;d#TcPEQZEg2q~EpRraq-=5M=emNe z^22N12NlWkOG~y*z2!TF%dU;$hD32gvbZT}Z%*i&Z)!dPHPU)+T|e+iQC;lyWKsL# zrA@|gYj}C@1{`m(*Cy=;la~4fQ@_~vP-k9x{bODEmbHA-TEAMcYKb52d%!<4))+A|Lt9_)=*4{g>y|45^D7(t<}s)jSm zD?`b$#;wYIF-x+t`H_yX8~>-pq|v6zD06`a zl(R$9xm@t;ATF2CfK)?b%Ls+41UR=2PVL}4iRhQK_!B5@k;I`%mE^2>Gyc{LdWyVg zIt0~6{v{fjDp70;sQJKgEIgP_fW;{|k;##tGZAo)Bp~GpdQ4EY5YT)epynyrvW?)B zAedsw0z!pTK>bU26O%u|DM&jh%zk3T6`VAa5X zO8i@p>2L;}rXQ)yG_zev(WYNe2KoOlDaS9U!v8^?N>Hc%PNkz&e@9_bK+$LD1a&&4 zS7}PX8BJHD%qmSQ=1MX-L#nKjwr*MqQfj=Hmdp3&n#lbj#KFDt#w*(zs7syH(bg3Q z>W?Yt-@1C>PaEHF+{SFGNT02OtWpKpREdGM#tyvK_-!!Wp-#a ze%JSwFGWFiZJ6BdU;Xn}e)P&VmZa*f*~XB)Uznoc_Q3I%l|QcB#%$_<3HlK3M&F92 zD9EmP$nF04pZk8~+s2YqLtRW-RG5IiYBkWJMT(1mMI3%C2~ zZM>!GD(J%1>J$aHwd!rWrH-5F!WH-qxW$II@%E^mQfZgmv8!vn>-4dds-7;~v^i30 zytCEh{!nB1Oasr`YD(2bARb+#ceXkR{k5kkxUF`4hPN~<~hgM%qmQN8@?WH zs{jtSH2{Yjf|ru&M@0brJ0);rucUmluynJu?2+LpU9f3~pE!W~L%>Y~cc{18Q##D) zDMxV%VSt-yjh~qyhu!H@DlD6&DaO1M`B=T@p=00jzK2EUA@C8HpWXOKqqS=n2jE8< z%q2fJ*q0lUhN_rhwR7!4Tz`5~-?^<(o3)F*z)Cf2f@ylUCKmeR=aNiQY7fPflhyEl E0Ibr&$p8QV literal 0 HcmV?d00001 diff --git a/flows/updaters/__pycache__/update_google_sheets_status.cpython-313.pyc b/flows/updaters/__pycache__/update_google_sheets_status.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fc2aa1d8e38f8cc1dc7df63bbf80ce2853cc97ce GIT binary patch literal 19984 zcmdUXYj7J$df4Fo1_AIXlA=a@LnJ}*p$EBo@g<6sNJ<<+%RS1AfZ&jX1OnI|KoYTg z3HR*8)SZ1xoo^$qedXnrud>`!s!ThnoUY1=b+>CfNu?aXP?q!@<0!dJQd^ZD&`A|v z?Nz1neLa`~NI_cNUMeY%#Gan+uX}pBzy98hCpMdzf$QMTn5sFhe*>_dCe zHfksK(IQeb>L89$CvlFth-rlJz`1+3k+W4Y*R`aEvloVVZc8m!L|Y#E3N0P9CHoaxI%&(p&uxj{ z{Jd|~b1vGt_A9n7rmgF~V(Svxy8bJ+E~Tv-lr=4T;hL6H=ti!B&PH{&mODU8n(|62 zX-RWlNfj+|U$fRSTs3!)tKkmyXgC)7I86IF(yixe5$0GHfk?|fSQ$>s)$M_6rEoP| zeUFN3fW8{BKTZw(HSK|GyJo3n|l0^4@VvZ)N}M%6C~md{giyfU{HFRW=x&<8QOkNH81=h8Ngag#G)S$K|g! zvn(q_3GbiFwvR3PW9)n+6pGxU-#HxdPr)}X@=JcU({l`}$dW(iF?V{{!KsN$?B$R@ z9Ak(1aAXOlDwyEeAt4q75EwU(meU4Z(?$(HAekmC9@i+N!Ow`W9goxUhU%27ciNl?VXqV2BzCOZgR1slO0oCW6{XO*m3Xq@iG6D?@G_Y5W%? z$9;hdJ&DA~$VqzVK@x_I!<2T!`-t;Y8;`+u$r*F2=@hk-APIq*7cXoDmcXV|&3b2SF z{&bAQ`Nm&qs8{%@H-c?%gd^h%nphCwweU!T}IUl*(x%?x6F1SlUooqv|*iBQ_NiWHHHc6YZ!pnD(_J;lLjTLWW0Kru!apY zM!9&B3Rv=!bgyM*WZ{aLwC}CMr&`8qko)3vUiFMahM956xyceam%DTN9$i?+G81YG z!!ll70Q#OeDA#iaFJouA)N;?f%B!0^B-iHdoRKpfRdHs{(xZl6n90MrIyooLmsiJG zo1oCEt7i@~Gi7pp-uG9?Wx4xgWiBu0y44284>Qd&hnTSk3^UK1)*NEyRc>42++gHZ z7-;|tggibQ3Ib`K+ry1mzd#8Y@)rp`0CW&2>v`f|;yrFnLKlzCx1CIwN4SZ$lP8b$ zv~`I3kRKR8$LF_U2p_3L15jxmw#&GvGP$*)X%5&E&^w-pdI|qT-CS%r%8Q0K;t}K= ziK_E{Ae?i^LFV{CWR7Pk&p?*Ieqpa0ALt+P#r(G;VP9}gwC#DE`baGr0+CRBDJ+PF zV3;ye(U7IiZYQz9phO4p2Z2;Ve_8H`ZNf36zk2D7qB=T5CJy@@K<_? zqwQXJkI)_vy1Uu~!C5}Uw+S%c?Z^?^k-Z4*@hChXkd(>F6F!MG=OhNP!WW71VWm39 zzsX5>We>jlzd*9W{LIKWJ6A?_i^|tE8(gZ$y>ekkYu(l!Olc23)E;_dvaR(0q^M$5 zlTlktr8{Lc+hvWZvc~PQwp3Z$W*}YGxjMY-EWTIuovK}j>z?I1mi6JZqkgBdW~aPv zx2$@%qUwG7d-lf`z01DZ|JcshT#ri`$AL^WV>G{=*tJ(AtB!Bo+6pEQoJ-sLlE%JA zTK#X1ygiaEZ`wSREbDpDy)u&0Uf3zAx;J)r>~|(s`d60U9^0|lRz`mQ)X6yOzTf}h z$Oj|a^*yQjo@7z)6Gmk!6|E&14cxyFkUV_YccA~2`p2j02kLbn*Qp>SxLpG(NaRZo z86<;G{t}XRag*oQW}!IMOjd$qm_kI5l2lFjd9Agaj?)9tl^_ZbOwItLM$MUeG?dH~ zhA`(rAerBHaTd-B#7fQCBw|(1xb2D4S;>^WwFn3$L_tVxi-bX{3h<$jz}^bR7TIzC zcq_L41Q9DR>gP*RKgj>ZgDv=m@{S-xfff`7oZq414al#vcfzvS>CC{pjfS5%n5Mux}GI|D(zPd~6u-`&ZtFI<)Ezd9W-tm6x)r^VJ8Gd6l zV`Z%Nl@T%mZ`eazv`|2a%j=K(-@X1HE$nVt#l88v^Q-;e8h&hp+KfGKW%75NvNA_t&hu7AJjvT79i|ijU8n1E;XlXS0vW37BHxQJne;j-z~lI%X-v9 zkA}P+bF(~o9Erw3U<5-)1eO<F#|MO*@B&C>z#wkI9*=-MgaEb# z!8ie&CA*Fwg^DJnu~=HLv1s4hnD}6N$k!kd%X|e}kiUnOMZSIQ{7~xF@54TF1!1*8 zP!L5;C=vhwTntH)yM{RmV3;R>hQbZe5Q)-^fZ~EAwn?n0)F;^wU{BFqAYnUM1h8fJ z6Sg2(VSZXPysFu;*b88(j&WR20Z}TiS$XsAiAUDrcgF6FeQRQ6XvgkIIvY~<#%+5` z%HFbLb*`RBx|)+Fce15FX&iW>Q5We~hN0=v6Tf->?ekDu(vq~aq_nM%3N&47Pnv3z z_1#J1v0O{{QMqM((o&z&Hc;_OqGS2H1%)X}$G!s@;AkqqNEP6Zn&vc+)6$#{a(bFG zK+Z^WCM9Q9auy|Lg`ADT*&$bS)D%|3UU1O8fE<6{4zTv%*nIwgWCIe5 z_7^y15ZFQ?&}86+jx3B)LS|t6ZWU#ZKyDR$7R}N$00Kt^Tt&cN33(5|hMkKCc;7SR znjrH2Hv9=2kWj8^Enao6^(Kuq8rTnp^@xc0VnokzjC5`nPOH#$D zq>q(M%b0=y39IcdF*tcFpNj&FTBs(={V2{YgV* zN?Y|Dghl+J2>WfwC}t3~R}BmpWQwq6`snk>ydRMLOPH?)5f8{E5`%pgprsmL?$z{y zOzPE0q;u?49;4PIng%F`qekGUOij@yDJ@ivPMXLq=!?WLi9#Y4?Z+fUI|b2-_8}!f zX_%mVMsZt05J{5CF=E0)J;+x%d@;(~MpOv`GD;+P62_Ft%sGovGG|@^sNaV_VGI&b zE|D{@TzFz;>}9K}9gFqe=-tt@rDnIJ?B4mi=U4mh4DEFGyraLPUnNObL#n88^Kh!D zW%Ek1$dfj5^Q$Oo(oGEwO4}G zeo4Q&yfJ(ktp1mP)#kxw+4FP9YsiB&=CN{9;*|lv5CDT>j!wp-xBO(TH_y0)R1+jb zFvH7Kv6Thqh)2RGidz)R5IxD8n7oAvMOj2%AV@skByo>2*nQ$&mVrJ?gT4Xn{u%rU ze+~&KbtleHZq>PZM`h&_da%f_drHv&N3T7lX!Cle)T;pz zQI$A;J{}4oHwKp&n&1@|7c~e*!r*`@;GqKjgc_-+GEdbuD)3WTit;0}0xu+p6+o_X zHn(5LQlwt{Xq5x`F^)i1SI9pEAe>s^HAq&N&rF9mnzqbo(<>|eyQTx{wQ1AAy}8$;kQAZP1Wr;>=+AC^V`#3*pFHXLesxc+5G}<=Nt#$n@HP zJ>na~YR=_VbH!9zR!`PF+XFC54OhaI;y7m-^Dd>mi5( z1RWuy0utZ=IGF_JiCo0w5+*1xka0*PV-K0Y=QEhV_F%|4NJJf&`{Dwr#k>{=AgVgh zMF7rZvV0yK2pWNpiaP2Qa~De+Al<1>rTIxsf{qg0}%3L!a9OT&>^*e`0(C#&ap^t+3CaLS@{K@;M` zA8E)x!Y;Zo!L{8hb=UajF}lDv2QF+MxI4lS9YEdSKI+0I>(JugVJij}5U5m9`NN9N z`va+>b9+tnyI{wv`exO4C-2RCXJ(_~!-F3jOqaIb@BTzcnJ%vMKQfwgX0Ga#u{v$6 z$(ggb^}4hz@A>6TUKYvZWrrJ@>+{W~Cd=xrI~TXD2UFIAY3rel&JT}&aD3DKFD8HR z+8@96&u7wgeGjgt>Mo|Nmr|xNFuazOXN~%j1$OnudTjF;T3|bNE%sINtrIKb&)U=! z`uPP=(Z4J76a28+D=JRcqy2wQMfIFPrVB4k6DbV^ujk8CQ7cm2=eGFOsHlyzL*GSN zDk|9qUO+`9ng{gON$G6?ziy7+W?>5Pio>u4_g$QeE9Odi)NZB?uuyqVDSgk&_E9GH z@fF*b?``i@18!AZp~_An+Ae=9aDfA${8m!|GAB3G@B2h}tYxO_aBN&9SJk7Qu0t9* z2L2kfG{dY`)31HO`~$B!_+>D;`2)_0FW7fcj#SLmP>zJ8nO=K1(xF6m9!HXO;6mX- z&ABKc_kOPQkyccX_lbt3AeB@pCla;e-byBxq*c zC$d-Mvql=>m31GGKLp_agbhB)F~znmAyxEBL8iD1%+K;o%i0C>9jA@8JFc=_yHj+v zKQ`&i`jvk03*>u&{=cxtl!>Jt$<;USOl(`(l$A|eYd2;$t3HTqH}<3&d$t<~Q;mZU zt|l*glZ}JP#;eJOt10W%l<8I4ZmlR~W4CSfDO>%vtvzLH-_qRIC2j3V+nJ>GOv-fj zmw@^!Ye&k|xotX;GMz}9deHUXvZ~p2mE3FlPTPk11H+av5DDwpITTxN=5Ibu^3i&J)+bG4_>?dsGccEv3Ho2TMJw?vJ^LdFl zSS8|M#AwNvr3Wbf=C(As2akJYTT6BW2f_=pbs)U7jpH1 z`mQdd*5}lBN$8xh%WuicIpi`<>(NlLRCAPpkU-630SuN$4B`WL zgf4lVS%xc>zWWfvm4&T+*0KK$;vmhCD{y(}o}Y%|0VVj#2|op=~uue6oIm$EJh z+5vWwH7$ZOjRwxP=4{#6b1T}qA;vyyfyKCc4{e&;=JLx2AsjnE!zkdzx4?n@{J3%H z3;+h>X1`=E7>M<<;FFA^`7<&vkBZHX3cm%czu@VKhaqJC^HYEud8QJLShVjmM4RHL zq-4qY%x8eui1Mmvh$BS{36i1ZPq5>Eg~`8$Bw@LnGs-4xY_l{CbSR?Aov>xS+5DUb zLb@eJkx;X2Lfy-Lp`(#uaWn__vgi9oE)5ORIQ~S*o>rcD8Vf3_Zcz))xVwZhn&%o_ za^j|_0pcqfG13%c&_o@j#?)#nsw0A^rSb6E>u_En$Am=F(Cq*pMYk$-cZs?{C?fD~ zJGqZv{XE?1C^%-~$50-wV_LC2HVZg{;Y<5m3U12Ld2&^L9YANZ&;bx&qQv zM7G=|`Nygx(~i7&l60Sq(j+NpR0LZ_2tu;LJgsIN73-$W&SyB{XGP9;-u&j91%z19 zF|u3R@F$vwlirm5DhRXaW&>fi$hk7S&RC1{VPfP*@wn+8MWDTc-JY} z$DOaN_V1P-xOeOBt$T0XeQRTM%YFZJ^5{sqe024~j@ACo$eoe3#f`x&-RelndVKc~ zn8<(c*6MJ|TD@y^uJy0oS_`g|@7Hb2{z22mfz5`b>*%M}4u}RVZp^TZwRmOx=T9vV z8v2CM!b_DFU*F_5hmx+&ht{s$%If!<-fMcl^}W`O#MW!cGn2_KE?qgbdU4n4SgTqO zfaJVwZBAL6A6k#>9Aa08x2@H`$e5tNFN8{%_8-^xH4Qq|pE%7!Cv=}w^*M(+b$`~W zgNHq~dE{A?sehX?bzq6A87PvjmyM-E0YE0RVKp!{gWN+-q{AqD%2*T&GN-{Z900Hp zZI|NM0VW_fXMTwh7?lxdVFZc`$Ez?Gc$;b82+Uaae{BR-Wdu4Hf#NRmDhwaqX4^Lc zJC+q1fmcOYsN&B6777*!a9QQ%56&v&$MgdegGdx@Sqqy7Ddmwy@oG#0n z8hmIi^3gM8bX-o_Am12~D5kSZi?XisUN(+eMuGCY5hRSL7ST?%m`mtU_kyMgqNK#uNsn$&Xlq1DTLeFlaBh0SCefMsfNqR`YXvRSCjTv9~!5h zni()KWV_lix{@VB(AI@9KYE?cz1wv5fjQlD5xc5O8S8;nL1tq(*?Klre=Z5mgwmva z_@VIv^aDdO+%?>@-nFiOZL>1%fU^S`wN6*PTiLJ?OjjOVHLU6H6d|+IuiaX|dG{Xx zXWWPimrtf#Qb{*!9+kXG|aWAWfw%_nDdkO#Zq4-f|++A;vq@}R>)gm0JRlLii_Tn5Nd zzBdjFov+O1eA0!EJ%9?TsCcr{0!rdHfa0O@i@olUUQ)0JO@^Z^;2MB z%wrv(jwxd=^D*P}9!6_UU_^-yR2412GYjb=HA{?O%&Y)@c zsc5))={4Ogb3B$A`wdC;ThO}=i6dTmZj(hNNuj2w5&2nIk(`dfb$Jq6Bo7G#rk;g_ zsA^A`rPioNB(z9c$K57TFTg=zevVQI(SSM@uC0*=Lk1dRb3_9U2}(XpN;gn!b?b-- zH}VrmMBO|+$pp3!#1Bd%EQDW*K=?I22*`^mm5_*pEqekLa0uQ<{u|WJBYl7sM}bz) zxGKJ+ik3IgmbhG+EuT<=i^9)s(F4MM=-OHo6{PU)Y%1 zY=LMT*NKPLle^Z6^-2&Xe`aA^?bKH&T9zOr4rVn%=7fM^VSxcE7Mlt{pL`_GA+dsJ zFj2WE8&0+VxZsyrZa@U2hb+P3rwXvpmoko*MOZwJA)`}R#p%OZ5Jq)gl{`W}WO8EN zv_pXf1kE8W9BWk26J{WL$$GxL729AtgtZfv8O8PkF2n3LW%Fzwxp@lf_q;{_@^6_Q z=!FmxKa478>w!RF3~@K@-CA%cQ>}b0Z0Q-)Zt0cV%rLqtxfI=x(mG+-g+~n|RGd^f zp);>cIi$+jJsQcq2Jd;RR`qwC~|WDOH@MW_kOJZ3fL9>@{P- zaBwNU1TA9FKD!CAJ-WZ5VCaBfO30HSXCskNFFOUNvsw5Z1R+j+WFB*(#2dy#?`(Ef zaP9yMTxLdkki{3dA(j^WK1f(5~-Mt%n?Q&_11&!wo(dPXUWL77psPJ&qr z4iVGyvXo5_6fY!4ph0p3Qu7QlA@bj`iH;6EXZOmYnrM?oo_!Qhra+v9D{FCi!GT!v zbAWyW8I%hWU{H+=@C(Z}9CfhN(-WxeTb9kQCl8!X+s`D8XLiAzaPb=#*DM=zf2D1C zWN@Sm6&SJ%!OQo??~W(ydhhqAOU|yE9$9U0K%=Z;W#VrPj@`;*TOebOtrqPVZQm_k z^KF;5q)J=TrEO`aCuQ{9(R~K;r2WD}*KOS)FZcC@D7;?@ps!wC;(L)zJt zwl$};%_~EX4A!)va@StDV{hK9+O#E)4L#r=yqdI*;i(OChEbc$|I5f2L6$9DyYbN2 z@|o7YWAA>{(D$I_!NFwR#7@I2k98WC8Kl~pIyi*8K9R0y-K{>fVM|wg9viey^UrJ+ zojy~+SRE3>H2UCV0Psv-!qV2J(49m#Zkp#(927ZdWTza+DgCmETPLXEGax8tibc6ba5N6Au5|aHd`VE)BJW`GPciu;OfF;c zF(jgC5I&n8%759QS852Alt4xG6SZh*rhAcUYjm%PS|qF#`9)YB=58KYWPmeDY(s*F{w>%d%@k~3v0s#W$K znYy%j#-OsVyHHHbK>zm5+COalsP!pkGbP4c6=dZq z$Y#n+D*Hz5hpiv9W*EqBb^hSgADw!N*-V8KTFt&6dM}h=AiKrU+x^Kuz4pUvPq8G^ zY|k}@?ET^l1GfjRzo`CE^;67dYAw(QdH=2V-pVkL-3rj#{nI55lMDpeOua)@yalyzyRUhQw@hP|s(7<5!@zB;?kV0fr>v^t zb@&drZE#QV_PCi*>sS37lUsu+RZm9UtSa6qa%D7lXY1(wk&h^2+w6XVw=68=9#0|cT1g646&Ak}Uk|rU0EgQafWr;J%W3uF5`g~oGB~kT zR=HDLzEfWD*mP3m*m1&{eYih@vFYFr^>$~*fH@=MD$O7aP&2*xsReS_oiU@vvYRSK zYh6vGH3uHK4y_$}R5A(?g+TnA=Epj{Q@=6-zdB$o`)iYPttD-$*)VOM*t(cBp4%~= kc%su-^(%vqjf{>>Y1=-i--!Lek+ilgbAZuS(%tZX01!O55dZ)H literal 0 HcmV?d00001 diff --git a/flows/updaters/__pycache__/update_ncbi_datasets.cpython-312.pyc b/flows/updaters/__pycache__/update_ncbi_datasets.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e38ba47dbb96f1694a348996bbc26826921a5b3 GIT binary patch literal 9028 zcmb7JYj7Lab-s(;1+V~!H$X`wt)*yEgdP;%4_T5;iJ~GYqF&Ssv5Nt*O9~Vo^e!lg z4B#-Tr=dD(Or>o^*Y=cl)3GvEXOw@O=^xou8)f>Vput$shMBk%&$OQYfh2eA&_CL9 zcCi2?(eY$4xO?|L&b{~CyXX7P*?+I9vJkk=z4kxR*Xs!RJwE7{t(bZI6N-=1u1wo#0+sdNK;xF6J(%_j2Ay$N&=q$F-D(?S%oFzpy>VaA2hXP0NzgBtA+0tL(IRm0v)rr|ps1h9Fdci5KAGHXs*Vzw>iwSPfW!!RqV9*K$5A^fG-znCN z8U^3$OsRD(wC40D7^hYp$6st;9t(R~gzAcM>cr=Sbr+fmk+yIIq`2Vk6z#%#eN=dM zi&YT8JL@Jt0 zgkzAm?u$jmL~6ezC8YqZSj2cV6%sE(k;06|l9v=F6_zgqOp0@$f4Dz1cw+zl;Qr9D z{^9Q^)}yC}kDVIU9xNx09vu!1_n$pDpqNf}XoV(091Ty#Qi^3FEXiUhERD&ESrT8J zj7p*qFep`8x!e&NjmAX9B26YjuTCaYq7W%68|O^S@W<_YJO`yWNScgM8ABQln5r;l z435@&P$JM7I&ByrbJQy=AsHskj2G2~Li9`D9@(BM9tqfdX!{m<$Dph23MH_DQ7{SS zG$U|h#tfSRiQ1KB1xw|D${5qeaid;az6p2FrmT7g z!M=8@@hZJV`L3_-EjZA)Cc*g*tFMY8*M6LG=xqhp+ESm?14k<6;Z`Mj(iF5|%2HD< zU1pjRy!w&sA{i5m;MVKP_ijXOkFZikcg=p8K1VK54J5TrAK`f-c{5yv zj;hmKr9RT!6@x%B+-uw=>JriV2i6TKOnXmPU%-d=?hb|R=xFV0g*D#os^&c3!T>;etC+A-7G+uqpCoT+6 zO^B=ZM3UFU@i@>bcP1<)qKUDc{IsiuKY1ZKF#%87Z1`0tA5I8-B$-G>6O&NhvRbd& zDSlK;Mb2YGeYSjYvLL7Kj1-0T&(`syNr_*z@Tnw^#s(Hkg%!&3Ihu&oW=-&=Z|Ym> z6sS^6QCWzNMZv0_4#y@%wWs6+DPX@uP(vb;6hy@+rvy=w6m~+2f-z;~loXv%Y$qmR zHF2$z!XziegxV4mrPSTv<7Rbg=^qQdRJ91&Gx9k77(WHJ>}cS_-K389H_>bzo9 zOJv1@JwuTs4502+R8UNjBhl&qaCJ36$YCrlqe&T^QCKb-;PtVD9Jk_a%abm zNOVMuiQ8qE!w%H(4uk@E$K(V&U?VLPN<>DYA$<>8CZ-gpx-+4XsgOF?s30AOQP51u z3DENl$=lb>zC1ISH@W6ce{8CGWbnH!v&KRTaeHPCt+=XZ*}TL1M&@ef+7DLTwevr? zIeBv|Ti2a)_hfB71tY1d&f9!>n>TOspg~UpNUgKX7|sF}3U(m^J3D zuKU*7C2Q>h^|7@+@7T03`hNV~_>!X|+jHPv_kZmE)$XO9v!6MA^V~wy``h2$et%Qf z(x$H5rk-qlZ?^ZyJ@MX|?68pai_6Z@ymQ@W?&q@gd+&7ra`%V3@9fBKJdtys%-T*C z7&CX8TCusZ-nQF~x2qQ|clz#Z&ANw{ZAVvJzPadvXW7+w)0MB?^!G>JJaWIbbE&rT zliKdNfxOpudi~HtmelopNfUR|9|}8&dtG4{vH7!% z?@I?1WnGP5e_ucd@5@@^>u;8%2l})^!`zA-9WS9 zUwnN7wd`Go8S-~)*?~sxZoLWf&D?&Lz1!y4PuPDYEaacST@D1OxO+v#-Bbxp( zF_cj>Z5S`1B=js^x@FgGc7_(H3?mpatU$j`GRBliZwctZ=$0GN!$8u;G@WMkGQftr zhz&Y6k1=WUdz8Rkr@&_8T-7TLbngj#wxliNDs9zUXm?-_h|k9XpDp7$d97%2kpKpl z(XwjoR^uh?D|gjb4Crf5n-Phhz~D&Aqqh~DYfG)foGTPyaCsgsRigW83+d=*l2Nmg82mBwOhU74S`wdb)tO8YWL`w%ZP3ay1 zC<3-031!*3V`2g*GaZw`uqG5L zD&xBX7F9Y1cf}YvKbg27PjAy~BAUt2s3eNhz=I&%%PqTN$w)XR?`hFmABCRs zc1Zr4d^JN>T>iON-)@+Xyw$wW|B0(tCXdKSZh%y7m}mXq7+17N3fJtOQpkD@Lf!XFj3WxFa^6;zO;R; z{bTzlMW7Q7C(A0nt++R4`PQ7fZRRlA{|op1O-uf!oWD8e3d|hJyWE%n)IDMhY&}?V zi+wga?^)*R7F-W}HS>pVT0Zf0%!Q-Bx8h$BTQ}flm6PJ^kE(m$_>) zz{6cHH_&FfTjv3!gt$*^g| zOjQcs3-tm6N$K?&HeoBH5}M6&O`1*97hs)|x9k}TMno*Zl%^pyr&&M&qdxW(D$TCp zpY<25J{SHeOwxkyu_(kYcx0dGMI0Pxv8WyUA@Z;C$mj`z8CyE`C;Oayp{J{ZLociWh`VuWA5hebuOE?B*34M#yd6C!LAl1z%_oy&d zDsktV^)Sr$U=FXqBq~Kn|kBAbV z9Oc!+$ww2<&PZLv3H8ucCj&4W_6#7jI8^&;wco}SbD0xAlE~)rDG|diz zd%{mU`14_zj{(-fB*F=P+N{lLCm-MxLlR;^@$dx^mM1HW>dTlQp9sfA6)F^S3gST# z^f=IqP))6h5&`2=Z=-Y`>(Rcere8)nfcvaNagAoepKw<~MVC>Mpm75J@>`J1kT13n z*7T#pKRo<5h8w2qrW=myjs-5~+;V&P*UZk(xT=!9&YQV6ELSaaZP%(aOJ-D^m`_dB zS<}W9U*p2~?Lf}A>#F55Ut6|)@127=-_WcDd>FHP?%>C!x)u9|`BMvh3tMl77SAmn zzH>QyEO;-St$8_Tev1sZC+eilwWfeU5}`;Mg#=9% z@CeN#G(kKK)%gTPLT0XV`Lmiuf=BII%Ob7CE4~mxL3RSj3 zYx3?=ONiJp-~rIU^Xalhdl@W825b)pbZo5%SDHz)w<_(F3B3r_S16G?h&&nAV&+fw zg(#C*FeCXLFEMgmD-aXp^c7{ev? zrpT+iqJ}XfF%EIfZ?awRy2xvqs`9q3WoR_r@4P1(_ew-^Mj`?=M4DM6@B2 zt}C#M{3Ts}yJzW9N-(@`E-M(EB^V7AJYMp+agN%RE=AD6b@Hw0AyRAc5Peq9vH%!o z4c=02U*rhPkYbQVkc^iu+&on=O_2Fp>IS*`EoTc(i(x2Az(D~}VU;6d~0HID%0{jZag|70k&?Wpcibiq;p58+E{n2+vZw~(~CcOqF zY7#IiCcw}0vFM0Gg9Hr}8HmYAicyQeDcpJSG6W+wD)|@M2UDWq<8#0(W&oAE2eZ0pQKK?!Kdb$x)wkG-S;UUz*k`U6pb7-)rixr$4M` z`}<4^vwt`|rcj##7AX!@QW6rys2UepaUt&*N~Q)O3M#09S#bE0sbmLulB#p^mL85S)VMaYe)E7-V$|&Wb z{HXw|rulW>q-*I-ka`3DG9C+{A+^M2f1~GW&l`KL?wQ}RY;9bzZv?I|e)Hu;a4`?& z?1!@EL!WW>H)^le&IviLW@hgrqk*k?XeJ);33Kk|A0D0=oZVM2(yT9Uw!C4#YM&py z*)VHgHn%*ou?Zj0#AIv$MXAZ)4xV<;(uGcM$-fozy%elJIQLULDTyizscLkPQ zftw9ESIf*Hq$R$2%aV0dmf7^x7mdVL4Ne>Dd*JYbf5!S&WjDt^+Z&)i zYNjAntQslShzTlQztFwGclGgQNGi=2x*KWwRw+&cPK-g?QW47m8?T$K$`5pi2Wof> z=tY{93<8^=@MGTFG{V1Ne%o-1(_=prLDyr-60i;dox-Ss>fTM)Y42AHQpOhK;`mLZ zWfVxZd|ni$9V($LS4>wu(NLkMz@;=Kk^y?!gD?Y!tp!5x$w?_9wnT+#-!p|N_&hRQ z|4eZl<1s)DpyFaQIVr;=)!>G+SOMN?v4U1u?XxX>U)2UuOcUX$STZaO0=OZ|RnM4& zL{MkYe}|>&K}V-n6Ad3O!l_C584~SXE9fu%1N8e9{ACQWz_GOx_x78?J1=CzuP(c! znL`gu_7ylhTW?>=xn7(#u9)3$e4zKio`GMSy7&A;6XVA)$MY?B zx|h6%{?}pVECn}l`e&>1R;OwZ6>}&Q4};$wlD5KV(_1xnFf7Yrd?Yr-$Hi2*=r<+f zPcp3*4BugJDnJeWoCNBnzks}IK;Zky6n=D)@C-^nfkg43$%rH+u|?EBzsQ=cs8zp% zw17nhqnQxkLlb^R4@KknnM>8Igm^6BnO1z|#`1Y!3WECmTf)$QgxD!zP+CI!CLy(# z5DFwDmx`m@b21f;$!aJ~i=34_A%zC7QK7+WQ}z|kyhP;Rk*-I^KFa^-GPQ~FKRiScgK;*zaQt@XGSyo!lg(T6KL10mk@DxAo&pW` zPaW<83wN*})f?yK>+ymKpWthTr-1EX?~U9eE94&2#L!nTKByYkn>P3b$Gac>hcy?xWn*vSHXlivpx}Qhr?SQ zSs{ng%mo9MZ6l0rcKTy_eV(a)VDxn*Q3Z~L!)ha3 j_hX0IW}F##=q2p>C8p`k&IRVJojIndu%0j*RK@>4qGQIZ literal 0 HcmV?d00001 diff --git a/flows/updaters/__pycache__/update_refseq_organelles.cpython-312.pyc b/flows/updaters/__pycache__/update_refseq_organelles.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f8cf4037aa9f1724e81530c0acb4ed895861faa GIT binary patch literal 13537 zcmbt)Yfv0nc4lT}*89!-DG~${C7|>`Qn%1!1_2UC2qZu~-AHO&R3=cU9w;-BM0GL7 z?Fk!AE38tuV}-WI+toW0VcMI%6>-Jzn~1OG2firJ=IXvQYVGIg}YjD<&!4 z`0we_O5Vhqc?)mlZM>a#@J`;vyZIu%m@oN}ZM2Fn6{`6%p?ZSl%YVR**6%g1j=TiHd|xIgo*e3K>ta0sz-N{noak1eL|~6d<)RG zV(TzY1K;)oHb>hGv=%LgtwZ|3g(CPB z3l6P6!6~=|`y|Wn`vg`UpV1be@bCu%6vVAUi$?hq`j2Runxp4v&ud>H;edx$j9ro0 zuq=okMzIK?fIKE#6~eN@P6Q*@6jt_0mpw+sdZuq+Y^bMeaHxA&u@0UeIeUI&>}=;q zuVNV*92^-N>3p-VTQLr|tA$2hnDEU8WyLb%6D48HCr(O=NffTk21J2}o@@bWO!7?# zV>3Q^N->e-q#!G{*_mL($B)U8F{wS;G9}A1QpfJy6Y`8V>>m$!!@-a@9GLP>My~D_ zg$YTx0#Q)#NrK0z=mNZAj);@Kun-Iiic$7mkA(Yp#Wg8}$9>_;oqoR{Nr6aMvH2uP z2#p8lhG%?X#XRXhD~NvlYExKCS+nhS`(#0}Ns(C~=$w_OBBEkZb8iVgQL(D&k$?<6 znr3FlLjn(7TE+vB88I>~_+`ZmX~`Fw0Ro2__ecCb8ONd9vlo8*;MWen{qQ>gzk~35 z4Suh~?~tO~y9a(iw|Ae%rZA^E2NdQ+&vAu0(>bItoo699)A<&>I1evQo$rUT^CuN% zxaX|G40equ%)sCq3e(-wrI>j5mN7{d6@w_qvtrnvY+vaImuf2%T8n5xX%TQ0$#i(H_M)OQ1kbFS#tZdL|r_EX_9|NTxDW|RZ zGA*U;8{VMxjO{@UBdMXuK8+k=p3z400re;PVaj8QHgrY8R|Qe#y1U!XoH^6>7Cf9B z;V_1Rw(f3kRDYr4V%w#!a5BEaZ&&EHuW&YCIrfNH02@Ll3W{Eu2?k_^2?}AwaAt5| zq<2_hkc;)RGc$sy=%W*XV9=uzU&qRN*b1^FVSSGK`sD8Ok|0XE{nGyS-TuHhFmam{ zkcHi_M!wy+_oUskGq_+tq$b8xh9Apuxa6IgQyf?v5ko$Cj8OMM`+MLgjYF_NJ+V1% z_1@@>zqxdM)z-YwvthI>9{Bxeyz|}fr=8`C&a|UA-g#qcaWsDAM`a&Y-K|RWEZ2Y3 z`=*-0!?EJm~y%>aW9}hCe&EdTcOx^lZ|4F3FyI4!ux{#UFtk z=SQ9{%u^1Szi;O$ZMA@-SZo&`SYzNJSU+Z7CnJxNC#K_>d#uI>^Gu8>#7!E<&=kBU zT2rg9#?ys(Mz4*co4UpNb(54|cd3q|=7H0uoiEqNT#VJ|uha8-(Gt^<5$kz_%JF}~ z+}7Q!XQ7pOb{ImMq^fF^;N014)kXC=^0!gX2f#FqvMWU+tBV-Qc1RzR!3#tAFTtQbVS zBh5s@lAy4`Kp3_y>=pl%gmMVyi{r#ztmM@8S9TS|i%^O?S9%|U1?o3Nl+%4{`o?s! z^3bZIW8vh6y?n8MiC)^ZRFkyqSm@etmM<}h;Wejcp*PLi*4f$=Tf4^AWeikl?VU5X z&#ag1OqJ|hX4gvEp6gkg;U7xc7Q54>HFw^={dS^zwbYZec@}#h$ml7XbFpdhO1w0F z<#t8f^3YVBE@^v;l`<5oH~jBs6eP(f{H(h1xQ1v9#XL=<=jM9ZxAQvKdAh5VxC?fh4yn4SOAW9SaJS_oe!eN5;d49kBD6Qp&g)|= zuSeV>(Koln^dPGZN1(#ZMi$T@Y1;;E6Zx%<11L;LF=w7Ldo0l-J+Q()zszM-Ip+)W zoT@{)U{;B86JjL9!NwR8{1K7oCIsLPSaL5B)ryfHAG;1BQlUEtrzv{UmRJnED8?X& z&0r)f!UhL0#3&}Mwqm(5>k9^8Rz->Mn8%>#0U?NU;w7LJd92QohMCL4oT9(#3(g7! zm`VjeTC-vR>h;4M6rX(W`{_nc+z_98=&H{csnT5;6J>I%wYRq|2U6Q# zzwb_MKfGv2I_n;qxC}#?>cmsf^-GL`!j>?~Ramxvg_zcT3rNsuq7hhzn#NwXJkvRW zo`1nJlR0Ps43{CImz_4{DYO*N5=cznERGd?UkF6-Iq@#G@P-%->`mWnkLe1jg2Jul zzDXa`i-DM4WnYbP!@rYj^=;TD1r*G`pjFe>R-ukV`|5lwIt!P>Sj@PI;H@i9t))n< z0_9N4lC4#w(dJ{(1+5h*hg#~EegyAJ^7L8?HvT^rev_;DjHTwR9$WOFw%K8~3ZfA9 z3tUK$eF%uQ?r~o@904T&YD@A)8-@XO2SDKS7p05M7x{Pg&9_~=_D=hJ8)o*;doFsS z#rP_hz3_JPrCp7RCD)OpSkH$6yo2bU62OSawJ4(4BXUEm!c;W|H4qGVtVCmpLy%Fd zn+7CyV~KGZEQYWz2*!epxbIp(o)X`{H+q@a3TD+-7z3Nd=p%N8gxW7GiE;?^m;j_i z(tz!ueI6(%#g-c-<{0<_@dA*(B6!?jS)t{X|3nd0@0&t2gbeO_ZgaJJYU;bP1PLdCX6(A6r*T_AmB);Ven-Y+XM0(RC18v4^h4 zw7q)iYRcXOjXnNW`V`PRi*C*Ra4y~4@>BE2W@uzS%w~%UM_svM!8;lRV?N#qIU^;Ev!E!u%;)edBBN8 z$ie(>eL#p!pb(Q$#FD3wD9qbMnt(Y#0d8dr;aVQhuZDezrq$0IKxC-mun2@%0qZGd z(AuS`a2Ze&y)3#xNPP=6Pvf7~A5GnyVyT!;Ea43W#amA3!v;4Jfx?KT-$0wQ&wy6d z`!Ji^sl9*~t^rbh3uXZNOI&B(qOQ>%bJVMeh~t6CS=GGW4as3*U2}dhB1vs|j`t3T zzXS8Fs(nkoiek>yQRu^BEz)d*0L(f-3#ij2v}nL7a$ODv#=$)S_8cGzD18I9e2*wb z#4Paifu$Gb6?Qrj2z!j1wykwM=K+9vkWpcaq!`pL#7XRS3IYlFi~vy$irTW9mkdSx z19&})bbkQ>KnAPp*3lmxO`9C)qLPKvX?HO|hNAL2ZSS`&8UT~7+qsmT`$^R!duzI~ z?!%h9H3|Rn-qp&sJZ=a+bT*`2)$6VX*d~bsk6fO#yJX4w(7iqFs#VK6+9CKoErx31t3}2Umu{iK~H#E&oe6cm@_~f zf?y#wp}L5|5B4L&n=uEmC0A;U8Dd6!pY6{M`D{tPKkNtkv_fAt>?;ql^0p?tv*Wut zhsPQHZWkC$v$DW}J8YKdO|+0CuK8qFQ!6(BKMCAi9xoV>==G7f6TYA%aDfTV7YuR( z8d9bQzT%k*PXN`z-O_ z0Ldc!q)#CL4Q6-6OYc0ruMpXhpM+j+lzp(kBapV+>1c;UoiM~W>=8*Ops-5998wAmFezEQJe zOfYwCDRbk(@d71Bjy)*)^yGspe|dCiD6wx@_t9&2-(L2oYWJ>ePt_d!Yyw(6oMOv2 zYPm$&2QZGsV=1;OZFLewqu3|F_CV{NxIjgORscvM5qXT*C;ptWfEPG51fTyo!RH{3 zbwMgb&oeZYcfC#L9BV+GeRwACNGXJ%HK9Jwg8I_SIYlsy`*q6`1818?e}FJ}L3fA& z5O#Lcg5vuEwgMP>VqHuZGZsRl(`bind3ZzK0=P;8K5vA&1+XP-0^^T#^QM1A%W(Lh zjc(o?GmEvLV3F0gJTY@lWLRPr(Hyg=n>c3Tt+?&N^v$r0yP7;PW6Y$r3M+ybu~0ec znQZyLcqVN5@oFQ2F_5hmVD{&I~wt zZU+2gTRdY>foqrv`oKBIqnj)zrEaRdI8HZaM2L)RlTvFWl+;4ZY z^@Fnw-E3my8aM|1GJHOEr@EbKML#TD=^KppHh-ss7sS9-fsX}va7zUIzM$u#1jXnL ziOCVnT)24cB7dpH&q&We&+u>uH!%xhKP!ZFB1&MuI7I41LUZ7*cEcj8 z0twx5Av_n!+CaDo6_)s!DYGjaT z5V0_ClA@cK$}1r8-ytP2l2E>Bi-(iii`AW3u%)q-_-9axtxIQsX&0z}E~E5SPu#_K zs%}@MO~V@|%PsQ_^K(5@WLP)_{uY~Kp?kw@zg2moGJZ7CxMtqDyf0l^b?3tE3yGq{ z$gfIwq198lG`*r<-_x1e)49HN!E|zHWOdK^)VA|zD%I3~*n78k!S<7JuLA)E~(kDRWJ1<4zAgDZP;p;gv1+bw$_cteJcmn+q+Zk-RteWsrKH_%97`X zSKCKc8_$DnbQIL$hOJ_$G+|h??ab&{d*w#?j>PQp#9H}*bV&{1xr&4EwRZQd(>G2p z86Mf{o_eUVmKO|VY0k7#HrK-G|MeRqRZ{zmqV1JmxGFLX#9zPInScMav=^rJXH1v9 z$Hx4v+tz)Q{oC47OdmD(n2ouzRV=wpFTJE%@l=4QRvjQc@R8CB zELvBSx_Pwfm>Atn#pz2`#D2?)NI-9Z7~w=Bhaf@dV;khjV>D2x`5OAXqK@TUP;gRU zAT8z25WwQogWCKbh#)uB2Hd!%zntSNX5dZt@=DDJ$}opw}53@NHtsXy=CCA!@O1AuDy?0@h-%@B5ej>_HA-bSx@iK=<;#QCO2z2yg|X2-YfeA_!ysg zd(1xFqNX>8c}|Eiz-a)#<{d!UwwXpv$8t9e;EPHLDA?i&fUn#AYQ6l|iBm6qUHK-y za>^(R$TOV-6>L<)yHQY$$Dx+cO& zxy1S4tTQwYM|wP$@Bih)&`l~p!)O{xB#m(Ll)$M(*ap`OP|qt%SMj(q5A<>a1e+R9 z2jNUr!)du!1UgDMCuHdP%6Xek8^JfBL=_@_6Fvl;*1-+ThQ z(ZPA^?8HRiI{32S-hwPdjRf06Qx0vR)U^g*!emsTuZbT*6L{AsEL(k z;d^2P{2$R8n131IcYal}e1q%asRj^=(14um5=R9&S{6reBqmu1&7eK56b-;2_z}oP zM4vc!0?YJ-TZn@m<{NHD;XnyeR17d8bOGuja3rl>XBvn5NJyXB5BB15i$8@zaRmbevWf}(JY$HAv#37H1>&1X@fK1ra%cvo zAT*c_Yi80tY5?TwmV_$kyXk9gd7aYbwVq`IP zZe1ktzeBaZfuFPuSZsmH>;QjZ(pZ@;Yf8)|OZP5X9y{F&y&I)!K0f#0~FDORxQLEFMmrUNJAflXM(jGadPLan)kihRq%CPZTHg z$)X(qYB$R363p`OTKV3^UN~)7w>70~O;G;CUAgXVO}SfFPCb}?b|%<0 zSJQ^8bjkP=Cei(4^U^!Z-oGmP>+(;_*AMlj4)uMuef7{l^7X-F+1WMMxhJk7wWCL_ z?aykdlIo0=D&sP{DDciYcfv8svVPU+g?TXB7pLHM!O$af^K$2gy97^-(>D7?c}u#i zDqUTduB`u!)lg(vG-Mo<*|~12N|~zGOf_E?S1)#@ZAG^RZwxMVJ+g668LFh_MKNXZ zWGcX+yKbyb8NpguGd674-SOt7?lpUTV&vZbpC0=7(8`4ejcY9@KRb}Nsj#2@(0teY zfpyK+2)MtsmtMEmKT++Wc=MX6`Uz`ZJn?>e{Q4gsjqgs@AH9G1{-vbl^cvg0VRbBC zfA46Lt$1-1=P%_x^MBw%1z1&Nn&Ioeeqn$yCC$xWzo^2dlPxE|hUSy@`mZHCu>QHZ zx%Z&)=N+A;eGK~xQ>U@7mifB_<$VX3U)s1nkN%gfCP;sFP~S&cKWEDOs*Io4>ic#W zKlkYS_L)ET8sY8d2lRc%j2>G256~dm3Sc#>9B!1_N#Z5HG&}R55{VcSiJ-In(h4wek0o?zC5_}{_-uh8zCT|(N zyXkIVAqVAecTW~PnacIc>+a=F!N9J`9kt`!Z@L=>wpxt2jC#y1<4q#+@cJ0@s`d3Q z4X#R?j1rAJ(2_2{igM0X$s2hSL3uj7%lso9`0CjJ4R%v@{b9v*9=q>!4oT%PQ$b9X zRr&v4(B)OY5#V0|9I7)4uf=BlQTbZg4e&~p;dTHts$WgzIs%#CGz_ly=4)?aAFdfx zEFh#afp8!+8_EI#fNJ3VM;7I( z#*4xlUc+GlDZmPoa2;^XSmZKnZB%&CZCj^V0-RUPXB);0y3RK(z83Nuusos(EzLVmK|FfIEHArg}Zt8{ng*uM{HMRm`AJ$H?S> zb3?D*y~Z1BDUHq_`}n`clv$Z9ph~y(T>(tSL=%Wh&}SewJU8Z zPFQ}!=-h^dK5#9;#ihd!joUUHRcS}-@`dFy_p2WCJvf}S4L)adPD_T;nJrI&%(-DI zi@%dNxn^p8!aCB9y^ouY-GAqPD7n2q-SqlXaLietwW8XkK+@Ix&vv8Uka2;3N)>vh zv9XX3WZhVFhic?}+t&2ZAd?5+ZrH#8al(E~dD_J9!>5b?6@wpRfD59!KYs=(#hSl@ zDUm}lrG{MS^}=DY7yVQL)k{1UK!_=mr4e!dMU=KmX`Wb`^1*$1a(!MzkB?}=0R1~6 zI<7=326$E@qV!Xo`I@tGASkIkgF=t!6LA<*e~&>I27iPBx?J&MBplIQBuVr-Z*s~0 z0V%|7Nqn*@=4B8?Bc~D7Ls&wAnMDR18&gf*Ye1HGu1$ z8jsPHa5SG`Ab#vB$><@5mr#OlfXfZ$XJ$w}XDD4qMhB^<$92$Bykw0o%jg(-$C8wx zAXEiNqdBsz+hjx)ZA%5&C&gdbA_YQXkkdq5{ zR%r%lG8W2Ud1i$aHfPG{u|;G#kKJ`wPIP||U1fIx?q#b8qx?U}bMCSL literal 0 HcmV?d00001 diff --git a/flows/updaters/__pycache__/update_sra_data.cpython-313.pyc b/flows/updaters/__pycache__/update_sra_data.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8632a26c2675c66b229e28a45e5a901c1b3343be GIT binary patch literal 17088 zcmb_@dr%zLxo7vhUogP%MnHo=Vk9&mp@+eGL;~~x3CT1g%R%<+2nK0nm?5VJJ#bQ& z%HG{YNo@(O;}yEemd4p^f|I>BB-yI4iSMrBI9cyscgBOoq|MFs-s{@CuHCIdMr$Xo z{bPUM>FF83D0a5eFsDzS{?4P%`TF~Q-*>)V&C7ELINp8uDEy-qLHGx{(2x8~X0^v4 z2(JjTAd9^M)6OCj`L%%=_}SQFQVf#NV`dhKC=9*U9vidqys_8b<6w>+Cv);^Q?IKh zkLB^axi`PZ&D=c&te~fm74kAmZ&8njd3uUjaZd>==_zHUJ!Pz{r<|4dRIrMkN><6~ zti4q|TiBMKYF6D-!)hd6uT?HL zw2N{hsJ0QMEbg_)+leyUy6<8^y`W#wnE1F_ceWS^)#z(vo+a$J|nmHWUGbX+XESlnWkoAqn9=VB{nrnObL z*apfydd)6zzbFZp#ml01_Xkw5&nv3V;m}BMVl*}!9*TJls%bbF3#s;4cq}v=8jS^C z6IJVF7M&Qsbkl289ig#sED*XDio{gY$Y}JsYKjGw7ri#sajNf}e;{z~=)g(UdFE7K zpufBGY=4)mI?wtCvUeQ)XU`4<2991h)umeHHjUhx8G-5;4>Bbb2(rtHYGa`E}nL+;-)oUk2*FB07|il%KaFm~QIED{19;RZn_5M`NP?k37^MFAfxG67cfFR$9cRZw z5h*B*ghxZo(r{=fIyTPe1&dskE>DKXrSOOp2|;cQ`#v%HE>Efk7E^8D-{>$%RMSLk zr7N)?RbVi`PjIDD)tbq)n|OKy(2jVh2$E6n;uK z60DxJV98pMv{uYEE?K3dRa$m?Zr}Xc&Dn~0SJK@zzdzybS~!nf!q&CybkEpl+mg zwv{3Fi?V?@=L~oS)W*-GHgW1hoZ8H(OL^Pb#mbFBw&<*YUaY*{bG)AIGwRvb)N}B9 z6TF_2*Q@3IS#$m9IoYMxJ0$ANH;iWnGRVUS@_9>t!CP`uOLXtdx1ZQrf!bL8C=FYl~JuC9w3VClydSjsDioBl5ZQVCwEI1Sczmj;9Mp$%=#BKMD@e5<4 z5@gF5^q5a|91BOsS@ddXD5g3t~b&?R-h`NY{J8=3`0^K zXf(v2G@%%c&@-%@O0Dw5*cjA|YCEUv7lBpLoubnj1vDH|hFEw!7G-Mw=~Ku0kM=(o zkk9)2JG&WPV`ylzYK#T(^29UQ{qNz89c!hLD<~7RWdMyyTF&?-vHp*mZBFNVZijD$r;TYy z#paZ=CM`97&+)2bx^u>ra#g4E3TAp!d9~@3i+3w$J%fTibBQopMQZ>r^VQnkq%^MdrSks(EUu=J3L? zY|TV*(>r-h>5|I3FWh+{(XcmFvTyqMZ}%UJAOGIzS5JSZ??L@^XQDt#x#~Ww7K%zg z{TkcwT>LejMTT^=Uwj%bbP&_hB7jjvVjcc+xAT`9B{XB3Nn53`>MWq@n0UB&IX6)DJw8<6;vkDeMav znGRl*@n}p@#a5E5+141Tp*|E@g5!{-!|Wuky++oH6FW`G86=uQ*7?Pu@eA+ZJnoY0 zQ>X%o-Mkq&Wi3yamQNd}kKb~>%elwqr2LYtGHI(!SJvM5-t)%0QW&sJ3ch(tESq(k6gT~78Z(4u*f=nm|e&H5G1c)?IKvIwFt{E=<8q* zBF4e&N?q8a&prhBn}RTb`s3Ye^qpC+7qQd`gQV^G(N8^YIG{VTVm4y+Tl)cYHXb>C zK!4NVDVuDBVPx_dB6hzGWqy=7{EmLzyj#;i#OZg+nxKaO01X7-#^Y1s*66p(7A^pR zXi(qE1vf@ENFa?L7hcon09<#BQ*pJxg>nxlOE#r^R^w0E?ze+gH4*+IYTIQ8y(3MT zO1zcpC>Q%l9&R}NSp|c>zoxZ#=GU2)oihG%WAlp*LfzUmaBOG{msTMN)hr3M!Wq~x zBf=qLtuP|i3C+3j;Qbyk5QT(4YEz@}x+een99bEp2;}~wQfMR;0~j8T4k;63(uFgp zIl9r1IE!AF$SeW^UIVyw7L}Tm81r^W=U|i&n2ucu@%l|sCiy@F0OaM3p>{uK<_y9f z4J)w@=`>P8^}GX#u#lu1FU`_A3QE`rW?PdP&Dv zhwA+U$9FwdFC97TP~Bb8p$XDR87;&*F?C)8!_*1v86>Ks6JjM6>LNT%rK&~KjjD-c z3)gI%u+Qt_c#DHs)kF`V8oCG6d{U{H1)w`#=HOG?%^0+qiL4r~hS&+}khBaV>A}GK z0vZHo391Qmtf&r}(<_j0A*PBWsu)m>SP<1b8jge%qKU>}r$&bYSR>M>IYGrcP+Vw= z)SG@}fVgH((p;i_2Cfw@;-{Fv8^Ga3f-8TjD_wYSs%P2mnK`*=uZ|Zkmu`(8eOS8Z z*7511%f;oho|jKecdrs6>35GiV^2eK6Ziz zPnjY6L^+DtnAy^0H~wX5r_b~Yzf86@^6|&%3FBe z?&vDFvLUpR@ezcE*V)G;-AD$neitd=a}Fe8!E$!7ELLXac0|lcZL(1+1=-kcfW&++ z1G`D+Y&f7>v7&Sk0ADAOsv(gK{hQS>w+On-LF<2sx+X||tJV%`nPdx9%eHKOLOV9u zuD63aTqZt&(z%9m^D(*BP?+UBpE+W=;Gt2-W=c2JG*)OMXfrw_Aw!*i$Z*KO>$%Ac zSBwfbIWl)G`dzlmUoPnQMn?JrM6IRhhpDsi(4je`{SPzJ{_nLj z^g~u7fbFq8`Wv-&Alk6eX*t2R5HtkW$!=FRJP{y*x# z{rE@s11IGlU8nR9)j?~VCvnc_t6S!?oim(yMVnd07_ml-Tz6(s%V1V_?lnMX0@uJa zvqsFd!U?f2tm{p38f&#j1aJA|?@o}rk1I#gJJO|_nqq2}UJQlC$<`-Lx4Tsui$a63 z&=53?1mj1eTBj;WxJyl>8n~W>+8x%~)fKCz_oy4qTgBZ@AZp^?$PVf3cnnG|2xILA z8sUBh8XPFd<3n0^EW|ErjBvZn?xN!6VdwW~Yr~N|Nv26q8ll?+Kp&7|Q{>DB{?{}h#SJY6y z$aYhIBs?|CkbMbR#*o~i8zd2?gt@vOHon$4Z&@%b?s%FH23}+1i=mr2e4tGLb;j*p z4m8*RU6KvUEncr#yG72nT&Y%ZoXYBud_erFichaY{m>mk8-A-hkZc79XB;tb2}BEJ zf!>5`!YgLT8cZBF{>1~v5fMO#Z16t58m}2tgRfP4TpFfs+1L+W4n<%Gv%_i~=e^C= zMko0;Dy|Ywe1KU=H1@r*bK#je_rq;Zy|MH6tF$IxE%bkX`EqW0NRg zFCqE2l=(6xUqSLG>X~~TG!}kx&)-3RN-+{X72D3e(Yx@agtz_S_Kr7tfB&Z(4LM6b za~}R3jiyK+jnwrs=lt$?$9JoL=G$npr+y)tx_nX}<1TL-D%!iC1^rkIqJa{L=5a7x}AI)gps` zPw+Y!@jZEP7-STGUsWSq^Qwcp@imNXAGkRl(z2Ft1fFwvh(<#Z)i@p5t5g`g2-Vn$9S2IE2KQu7CMdfEQ?o`)#y zBzRr9TwXQX8Mod$ohtXvm{)8SL2eaIq7PiduHCa=-Qih zd8qty%C&8-F6r8d^V*`uq^ku*%kH9?14&Oq{Cv{WoO17)H@xHClP)Mr7SzSJB@4Vu z1-p_3yB-#_tkwy|+aKA5fq|5o5}nagGQuvDnob?eOZ$rX=KQJ1ck z?q9ffA=%jXVC#dCH+H5P`%=|?PbjUayMN{0mE^W_4?5qnywRK5b}m(OE?r)A-*(TI zl)4s-3&(%vOi5j-@~(7A`P~b5E+nfv=aq&1KYl4y-I*%sOjp&+UcYxRU5O{X*ZjE9 zZg)H?#&ABQfvpHGv*mY>YlV{9@7NQb9b_a_rpqf{Hj|l9`F~eyK*Tpieo>Tve6Q)v z(qok;@+`mJTXe!<{V#SADI>tPPTr6cgcx>p3({AFL1eXKGavY7fIR2ASqtD>1#C-X zLHPzl0SZ7n$WS0N0Zt!74)XMqRk-1R^Tr^XWOKWLOa%{M&`Br>@;7VHDq_yTV3b)& zSm+YqLJ3{THgdD9qq1*8#l7fg!AbWvgUC7GdBg&`yw5>r1sw-|pV6KTbneg5o*i`D zVEK&p962Xa&h-U&ek}vfU(d`;0q=7YnC^G^or77okl(7$Xa{6AIdU$%L^a{ZH>{(x zeTweuo6=>SOWF3mxhb7vU3=sl%b7espc?vpjHFO5@@1WZfEG}NB==82o=tiYMPajV)^lJ7?Fz(FtYrriKbL(3s;|ZKpySkk3oll!h~Ei1ED&0AX#@ zbG=6@yJ@zWODc<;sWtAKiYeE~H?T|A=}StUPygL42_1 z?fTgaSM^qb@Cvoq8*6Uj&O!Bq8H$lB35Lyam_gLgp)wffG7v2VCxKw&4O1A z+Y@jEjO9d-8XI9OoMfsY3Wdv~6f9wdZ&Q*`Zz?;YAtfsU>L=2}m`N-J6BoeN{ZP4Hp*MNYDjL#4`1UFz;9A$Pr ztN|`Is;UIIn@$X?R=pkxZe88;vu3jterf?2VVJ+dPx+r01 z$=Q%}Hl*E!)Ar^3ie%;SRQ`#n6XaMs6=!eTcgzKs3ybg8{HvO{I5&_i+%s)i_Eb!B z=UB&`Kbt}{Tt=eHzx9Zq?VOrPLNItfk8i>s5nj-bDbGC;yyYDz=#C7_=HCfR5p0_pqRQv0l4{Sd_lX~jZT>1B_ z-eT$Yj@RV}sb20Q?IuzO#b{x z>OjwTJn=8S`ut*DTjJa^fG;Jb9~o_hMURa_?Vd*-p}6v6v*6gdvI7`vs_)~S$S>M= zWG+A9j?cgD+Sh&1_^X4C<4)@k)+c+ANF(-JeF4d5>Ev9@Qf^xWMz5h=+y$eoKdXbY zt-~>=%S)J-rpaxZk6XGKQozK+!JFF^GScB_SET1~w46E9($(eTb{<|w#2MXGCT)$+ zwKR^kG!A=BngiM#i{b{Op@XAiY=i=lG&zwK5o&$_+n|ALN9+fnej7h!E0QVUeOJNl zp0D+M{q(omf$^#c{Vh4M$G?AmGUYs$FdgGk;sbh}KOiYE07Kr2?Isq6v)MzLn*j)m zAfkg1;($N})iD~q92ke28~bUdJqPx+tB&YIYE*E1qE!m^H6-i!6~^fE}E4t~%M zK1QGKHz0NUO-KvcvN%B!21!iu!*79ZhIW=c?WTy$Z`*7OO)l=Z4rBotJ(zV)$|ayB z8*#&tFSKQ!J0#{d!GI}C^%)1QwC6@};YL4MXd4bbDnuf-o1e6?x9O-Vu)+74qw?GP zN$G4jP;--U=SFTmMPMlXFena#5;!Ap?+$N7#KZJw>& zHKX8)zRqK(qzN7m6jDM#Hgu&$+h5~YgU9nJ+4%D?o^<+`#DD_axKiF5!%p_t_4|`BIee48*{JZMjUjVLGNcb=rlUb;h1}M z^*a4{nKf6YbgGLyj9;1vv6~vT{&MTIaxBPVnv^4)Mt)IFEj8sN-luRDq}uvTLmFKS zfmQnEqtBtb);>9Rr%jrhVh}Ks?!jm#{-gtbtLMF{H8azn7j*sX4NUZ~YUZ+>kpqi>5f3z=02&vbmPVLysVFf5TWl|pS#W`Yt7gtiaL@c0 zrR*n^{0F*k%}g~kK=20F0IHdrdsk0(7~nQ|{WSlkeqwW_o50&i+}# z)iTxno~>|Z-)v*Nd~V;0(crO6^?qa#Z0@wpqlIbA_RLumRjrG*-D#71$yAy&mCici zC+Ds%G$qP97foG%UwC+`=eO4Uw5@2#R*|$-%yz`DCn{SOZLRN{?C4|OG56(#JquSB zzW5;a*2%Xz68`fE$Fqy33(NWCvsLp04@_?jCG-2IPNc2*nOMDNQr4Q~{K`c2uDQ!U zjJy^}>^hXFI-JTslCT|l-<~&pb$08by(V6_T!^sDQi=~+778KS!XPF^X}v=+G;+| z6CAaV3k0(x0bkJ7*^3Fsjz!bXw6i2(D*4!hoA20bK2b>L{Ic?BWw*=tE0?3Y()xi0 zOK7v2V|=H^+S)~4)>@%^>(oEF)VTWJj}+c(gYM!8LL{$oIc62nS+R9p{7TL_5;3iX zXE`9?&#GjoPD5K3nnCPm-6R(}$w>ki2R#NqWYd}!vgR&YFt2R^dv;mXq1S{XBo_r^ z$tg#uX8;e`-fo1_v<_zDTQ-YgfiuQBm{oqV192d(j8fmkiJ(IrC@V9Z4|sPemZrZf zHzGZ^F4_btlAG&!GX8SP5YM!UO3gh}c7?8Dh1P%!Gti?bSOAfyh3*jaAYo5&ahht^_YL9WSO*F8S5;To zYh!iR0~k%guX&+2R>@%qjflK6TBUU_zFsZ6b4k*G)RIoMsaJ>ZVsel~x+oJT1aqT9 z>YEt5guPV+3h^yNiEpW95Cfuttb9hGg9~XVHp=ucS4q%G<$1W+TC|kiz~omEwbmNCXrQkb;6w^&k|NcFf5iQ zaQsyF>8`$`XS(qj37`K=jF63AU@F)iYP5}#|4J>G$jhOrfA~tM69gJ1>Gs={q=?3* zuY?x4;>z$&fMymSkSN!OY!POcgurIQzw_5<+J4}XZ=*yvdKPu;v1c_rk796Je{a% zNjO>;O}pRs)WsW;p7yEJbpOn^wj!RWa%ZBVd9FL**t2MAd(U0D=qyKm8O`m*RL~XpY^Ta1;=bgN|PZZj^{>8qd zmhL9wFB{8`JB+_PBz3o&->kMF|EnfP_kQcI+8xKO_Fo&VDEYO+alG7$cVeD+00uq2 z1<@3@RcYgyb)icLmC#i7L1bXDW976%7Rh#kr+TAsBn8ip3!*Oa5r=MsXQar_KT^^> zrFb>uHs7Q}D8BCmA#uxiOU02xu$&_;YA| z?c?W=h^`@rq2qHgTiW!jGpS8zk01Ti>V~6U7|h+bZMccc&5%cK4BEC2Ip6is1h-=} zf4If0HqZ6u|MWLBSWdqfF`v!n%WY~S+?3lc=%+0tXvD00;leq6(GjyX%r_!%FSl_b zW0MDLgJ`X_IyEG&ahnhpq}Aug1*6^`ddXrOLROmTPH4$zkX?RDD?Wc{H-gm+-uz$z zxJ5J1NjGv~#7!d@e8Fa}E7!)*(2jtn%%FNsjbNXFTv}XVg%v2Q=(RAubof@rAMy+2zGV9&mG6el3^JY8 zJU}Km_fu*ZVZYfO5gkX+yY%_o0lJmXA83tZAR|IPrTpbKOrECywO5z>2rxVN;4h48h2n!sii21RyAce;lp}I2p2wrukemc;OQKCLPiM!$*G6h@?#L zj~{^%iv1D#dNNas4+v!N!P@vt1fnlankZly!tp0yzR60O7p)8F3cR-?A$(*)s~?dj ztsFgcNR#E-MwAZ<&PNOF!$&D$>mQC8Xqr}!i^^SP#VJb05fD4^Uu7%}^2l-xx^*7u9#NK)7=U0RVP z6vOTj1zFEr8?#?ij-)3end7x;@Zf(@0ZjpyyzGBdj)R94r9RZCkdL z%v^cfw*UQtt?`qof{v+E_zF(5sMGmPb5(PB^Up3kx6qq#om@2<+>R9iUt4@+gVn!m zE1h|1(boLFDIb*YHMY)I&F3Yy9!@v5JTe=L9EiUts-6ue3YtF5vzjeH0ImW}eb@ql zu^_ZYU~_O$NV+tRul|6WrcFVmvwxRc+zmm@(9(eVW> z0b}~$k`IM^-P#9R1IU9Vs7Q6LDPsY&k1s%@qoZIlgbgZM2#t-rU3?pX^n$jvU?+^p z{fvy3Ro%irE%jmJ%ts#ouoiU~2#<{;{D$)gBQu)u__UH7v2rDdPn7u=%ZzMfMllDB zGz=rtmyyO`1(ZM=2#kzf)t#$85etth8Vkdo6&Q(swoJ)RN)AwRmXh<7e3cT~%i*9J zJ1E#noW=i~a)dfLi>oFQ44gSttRB4%fq-h_Us;>rNLBdE^VQYk7005( z6QiNS%mQY{zo2-aR}dW{ith@JcLnFWg8lD=`h-ybx5C!H6B_DKXkQnNvsn+anFi?(_HamI<1@(4d`dq_*kIxBZpwH zOb6rr^Ib`?9Uh{x>U6R6$l4;7rVBhPMx5Wn*^D!8x{Frm4jA5Ie{4r?)hHM`Rt(5J z8Wy#do|)g96rTb~Wz8dNqgb8JFI+L={9a-4iWz4#Q(U$}cQVZ&N6oaa7?68(j+&V% zPNElsQQS7GtOz*G$^0}wuu4}%UdC@fa)J7v($LDP(xqF{p0adV`6K5Zu^^4^j5zbr z;|vm%SSe={FeE$W970ja3Q@1P1dHQw9&*&SZN)%kS4F|(nx0G&x_M>JI0Rkq@^*d2(XvhQ0hw7V=*T?ijC7bi`1cZO&8zI-)hs#_@& JOqHBz{vZCP6-NL7 literal 0 HcmV?d00001 diff --git a/flows/updaters/__pycache__/update_tol_portal_status.cpython-312.pyc b/flows/updaters/__pycache__/update_tol_portal_status.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b8c26241d12b69c5d63a315306e657a19b73f22 GIT binary patch literal 10444 zcmcIKTTmQVcHJ{QGd&L&28Q=T%}dhg0h0AXwvdDcLJtI3Aj`tmj+^NQTFh&^8zH7< z5|V6HP*xR5Nw$Qub_yk~GA{drt=+1SKka6>;;Q7Mhmls}j$Dz~N~K)+0pw&$TlvU2 zx2FdLlATl~4RibU?Q>t}ea`KFEGTeMa9#Snf0xcQP}EoWU|!B#<>?KYqV7-vCD37N z2=A6K9kC2qBGw^mgc)LJY_o=K5&Mum;uvy7*daFJ9CAinLoV`;3A-bnArGnB!Ud7S zp+cG(r3Cw5(?edkJK$bqp+rVt1*hQp)cuLom|O7tfchl2j3J*;@B?b7SSWBUFktBX*(c2TXp3 z+D)+409ymUN`VI0+I6sX0BbdS3UxWSEjTW~vI4gbc8kaf_2b(qO0I`qPWZV*hq)e+ z744%|q2UvYvA!XM*&;Mb6xRdr@0 z9Glb`l~=|CHr;*hbZ_u{*NMLKoddeN@4{gJg~4Ed$KWa5F|g05xZ=F5h(TT+RdiMs zzds?#q7a~U&qO>N=ctQ-Yx~-$D`}BZr?HK6RZLfC> z?meqBor8TnI@5K&yF<5Q*?(HM_YYh;ajHwVzS-R$uU?>(2iy>7MbZ@SO z4;i!TUV&Hn;HVfCWnPtHQQa*?gS?`Mk>T)^?o=j*VZ+$d8J`#qOUjrie2vWV^${or z0V57sRAot2^b)L01qCt8PX!e*6pIRq?h407gK=4ks!9lPXkH9daG0kvPynyfz~fYe zPEo+KH1Ia?dJ3?wnleENj7b+7=#->QPDokcDbKSht6&rC5+%_RowNpR=5$HRl>)QA zb{8B;>!+;A2Ln{V$u|H~oQOrEfGtjqaq5`J4aR!7cuZFLFvmv)&e-l%k-Qs`!m0>F zoh`JO|6aHbtN&$3z%O#^GduA;v`{dTntYk35H?PXec}(z~XZw3t}kD6RfMeI)T7|neW~KpC`{4 zufVusl#7o|DN=|J!&G@0Ayh`>Mfj*Rg2+RR5d85&6>@z;{q*QMVw%0u1S9fG}}c4eM^Cr#S&iDDvy? zx?3+OFkx#**}tzfBn^vUaknC=Vk<(oq7GDAC*tscKt=^LSvY7cA*c`pbGPq@ZPT5f7dK>zo2JiZE7}&@ z?wwz3(aKLxpS{ocvu^Lr_UrAll?$)Uv}fGMH0Ib-7>NqyY!B8c+=&BzPme=^y5`FI zx6p=HlDd+!2gb8eUZWKYT252^%E5zL2z zYvnk(1zS8w(j2TJAA+Xc@Kb7_n5G`P3vaevZ@bm~(BGE!w=MaPF1~IcO5Oti|F4MZ z`VK^Ocl2;kK7#u4TQDTQ0_zM|^v0z7n|(guu0RW*t|zDnjuj_dx&b{<-5i8%IH{t zzT2{a(zzk>mTxmMN)Q?5B1~KyhtY6E6BpdJhg|m(2e_#Dzr@9mi3_4a*W9TW-~v&A zYJv}`fD$gu4|9_-dHm&ADTn2oSlKr$Mnhv^;EkXvDawQhf-lB{Ypn*ukw%;deT2=J zDn(3zp9f4Ag!xTeoq<5LY-O@D1YX?IrGQf~%0mhcqU%nQ>SECeqOh6~mPiEbwO~ zf6V)ilm*FeFsM>^gKQbzh8Xnwd zRZMse7!3sF3Ex5EX^qSY_7EqHM$r}%_O<9%K(W3@ih^$w0mnL|piPtn!1`K?vBsb> z0d@pDXCPI;N|d~O5Z0;}0V2}0^I_AObkiBFS6pfmZycF*WZay_M6~e;l$&vBhi(Ly{{@vhl$sX{ zHSc{0^rse61UjjkmYdd)RiZ+c%OGg)094A7vL>xb%b#0rxELzM;Iq0G>EyAl;ymP9 zqr_S~wXV&*8E%wG*#v9KE-)!a($+~`u7i24N#;dSRMHBuS#I?x>Pg3VCm@KvQJu6v zZy4nU$52W80PKOLLe`WuWtpS`wnT3yh?WdoZ^Tkzpb9wKn`D@<0iXBXhj|59BNpW% z6Jb>X@#ILPth9WMxCFyt1FFg@p}pa!J9MWQorqw7E9%7%pLYRo1kauB=^7a9>+KpK z_EJ6$y=ByZGK!Mi1x0{eCyk8EN5^%$!7{o(9#a%)7-H+3kdn8o@c_Yafw7z1Tir7z zjgGh=0|MxYtZ*%JSB&sW7&Gxn^v@u7EH+PiJuzCdTZugoyuVBgx6ao1(5 zx6JfpYZ|VfTXs?<)yp1GATZmrr_EH2NGr&GKPu>Gvp?rgVk@eHr*l9WV0Ne)SBn02 zcN^J0WEn-KFGhw3`;ljGa||jU^xwAJ@Pi-#TY+nzXZkg`q}5bJDdt52kg};HDTh5$ zkUT(^Puj0i^4_HVg$!@YkH@NBbDpH#Yz1-gTnGE?c^?@Z- zkn|+2uzE|&)p1>uR;Ai43--CP`Qp1(i@P+oR^z(ODZVOcnsH={o8~TlbouV(zX@th2ehJtGq!Bo zv3qUlwzGFr+8YDVWma5bo0e^qzj>LZJoQ?`AuMVKD*Xb7I}7Wc=iYdq`gq^l6%oQD~|nrD$HYC`A*&ik>mRI2lj^&|*!%bJB)o%Aw|!jHy~utWcJ6swL)JLb<8#AWMr;D!G!BPyzY) zs!yvw$?g0AQE6Au`QC5|HCJ#68?IzNPcPKsTGnPsvY-Zk4XblMx1XK3AGO*XMy)k# zNjL1W4yQ1^lWB`k6)pTW2y-^T%WlA(2S!E~BUjRGz9n8*L8>rm1-yE5c=f|Nyvc$q z^(Oqg-FIo7)-o8z}i&MU&>wke$kGXQ8266fl%omwZ z2F$lW7C#5MkxouwO9Ha(Dd&wWLpkxR3jWOf(-as)dK{!u_N$F1jsz}A{g@K!lQfhK zNhk1c-kkm+y^iY|$vcZVKaQ3+XH4Fl*M=wz{{M|Tv&3cKN zgp;0Ly&xJImVzpOEfx(*f^LxnOweJHNoO!?2WKdd1J+UA6*)gtZ&JgI?$1N8R@TrJF1}9DVQU`AN z>8-BCRk;NY!2;zt7N~m@J1)XrFeJwmc)xLwlCyh87!83uYH-Yp(~}9?ggUaj%?#aG z2;$K*hXj2T3E9N*XtH$nI1?SF7gAM#{4K;HE6p!((LP5U*+79G`lD3tC(v^vwO3?nh(Qic3;+4 z|0%S$J@yB({*x;M+CdrqX;PN@8hO|yryzMc0E zbf?)<%grT)_UV(~?4T-lg3tA@gYRZ)`SjqspOtIeD+`DIw*4pVKRu?sa$>2jQyY9& zW2&@T{>wmXreM#zZwJck7v+#Q1E%naS?Wwu4O!@Zt{ps@7`|O^t z${T08!R4xJdTO&)6@FRUc&qChE9LXQ-#Wi-;lN_qPueq89a`1t+pquA?`y#kZ7iZi z)%4gzrtj*5qRDJ+BluSZ<;xUw{>DY^I!WIf&Ft(=d$!M<(JHoo+4pW{3!mjR- z1$@Pl@)d{GR}LLZ%8j>sGsUm0I2qqQARoMS+kEBUSRDn0%f+yV+4}2!*Uzl%rYf3# z>!4gmmJd>F@vYNKj@n0sRkIi74$tkJ4=!F>JbUk&)<5)FQmcL^Q+QcpFXOS2XWzn^ zg(I5l_n;TfKU;l+rs{_2XTK@f^K7*Y@bhf7km8_W)dJi3Z6!>hIbZvog2{oO zS5|d5Ge1A<>E6NoV*l>$qt?H#-_za7{?g}%`Y)R+yZ5udJZgvfub9g2!|bnWJIdhU zA6mUw9<`lvIPgpyjvNh#)uHSomFL`4bP|c1dK4<_lSBf<&0rucXeK{ccObq#=>|vR zIhPkcUVyWR-h}@G`MdyCFe(CWM2hEFwFQItu45c9Vc>*jI zl!^R4ynl!=4Ny!|1gYFy)tqO(bD?|z82i=x%xjOBqFYt77auUqBnPIuV-sq80sP3lw@92^L)?jUvwJe$G zajehIf#-{eBUh42=CgK2es267KxSc*mOX0z+XH}FN1nCh0i?xNI-bJp;4WJaQ-G> z3%(k$8K2O=S1?8o2%GsS{0OEOK6bIh;QN>eKQ0QAtmq8k83)vf59Z*96>K49f-s|u zNP^?+58y_)SH_GPnMpy;K$~NjoQjtnPs<+z+!ydu6eu9#W+|KFkI(+W*}t%0WPQhb z+xwyABgb9GN8Y>M1$Tx!k|}EcjCsJEd*a(ZU%fD%@x4BM=CQ+f>+K~6clY8Ww)EEc z+@bsI&L>P^w(#KN=41EViz%(~T(e@U8W60#`B4mkEuYT{2{DZuOa8~En#UGz6gj4;BZ0qCLDzG5pcK&f@DO6jA=Bz zbPZS;!w)IQCrL7@9T`PX#;8p~j1 zdh2WmY%9HW{sg%#bgtr44jyVNE`a_c7~V(Q=Np$PxGh@A?K9&^JOAb?wqh5SfGuyQ z7|%>%$y)ijyYzFX@7B(Yvu@5YU#+nRvTQ4O86Nv|Ck$+>O*1>*Q|CH=oX9XcmJ2AR IieUS{0UM_4e*gdg literal 0 HcmV?d00001 diff --git a/flows/updaters/__pycache__/update_tol_portal_status.cpython-313.pyc b/flows/updaters/__pycache__/update_tol_portal_status.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0671839b72dc1146e19596b09e94a0d705d426a1 GIT binary patch literal 10455 zcmcgSTW}lKb$5XUuy_z4NbpTc%aiPh%v-c9G3L6c${G%MyoR$+N;GoQB%S{3V{O|cDf3O8t1?1K))LEo|Gor5mLMeCOH zC4+9o&674FSYKfj58Q2VFP&zah`K zoYdIaSp1sCCPnp%-=aB(&y9q}`+7#ldj-uo8k`slPK3s~Ck8Z|(4kix2~km{kf_Y5 z8mCB~UyLeJ#LsB1#Y8MFMncJWNbP`U&zzJD%|@kIBy?4bElPe)vvl|MpA$55x6rRy zUhbZF;k?H7PK*v|Y~Of)w`Rq1?3`vD6E5`(^l9b`{bPQsR;r2%iI@}$$73-moRlJ( zC*LB5_0cs?L`;gIIZ2ijF&T}^nlmbgL{*g*W@1a4U0s}k1!GTpVsR!GRp+J1r)D6= zr>CHp0E*ZYDXByyRV&BJQYa$D#HElbh2wHW)f}<-TqvPL<)j)$8XA`cDv0Lsm!JR+ zXMnTGAR`mtR0cR2I9z6UGG#~t5m;kicoJla+#s8RWXqFTun5+6($2IqL30d8p;yo} zUSiaXcfl4k3)~4)3-Q}U2e3m=T$X`Od@{}_=Oun3KEx;DN>YsRq8#D%g2%fxvfKTUQF`yBA^v{ zy4!lsq#DX^l?qic9F`Kv5bTu{T8PRA9oOcg;dwr)Ql!|7bHD*4QHjAklDyspCrEjK zVx?o{XBu!!%tL?p7kQZS^(6{%ZC(N_3ZhaZ`chspFd(PwK*L96I&Pj$`?~VbMV|8? zbRhL8Mj(pB8)HP1I6y3VMT#vU$!B0dIUHMzz@+n7BT_gfQYt6KQt93V5=)vFgdPP5 zHCcXsZ7_%re(G5$mdU^Qh-LrBtp6cvPdoPBW%qsL^!{Pza_>iFP1&-R<@34f&h^gs z#@7Sss&mWd@3Ov})AMH68(ph4w_jZ8N;^-d+0%~!6A9)RXGk#hv28yL$ORAINfh{zR`K3|DLZi*+2m2wzn>^CVLaMK#L5o=D2g%E{KEd~9Fr>I0l0Dx5GB)mdQ zJ1KM`<2!NBcY1w1>pYWY&+McSvJhpegHZW8vJc$1Uk3Atut@^b0uCCmjXBkyr@`L5 z1}LbVy?nH2D^Nj+GJmf3ARkBdcrB`e0$)fZLFIx?1-qeCzXXdh9XJb&Fd za|K-t(2Kjb?S39b!5Z!)OF5Fon1n%KkcmlzoE*j234riExK6G zpb8QL)rlBR0&)15IKy9yD_5RRpDI|0L7$EpNe<7)fL}t%sH!eXpdh3~sMxAAA#KEg z(1X}XvkIgc_mXg$)Jwx+f{#c^F&YC#0x!55}936TURC3C51X)%@ zO8_o)QcAgw>J60SDep1;skqH4?l>QvlhJxpMmA|?AhFgSmB9;I0B1X_q7{usfa=8- zeU2e@5$p|k)38;3Gc^XvF_^1Xl79~dc0o+2W@rNouG-;&*t-FEi)L5F@t4Vl zl~gz9Dw=YY&AGaLxoUr|A&~R6ZhE*1>+vtn<}0 z`|8dO#)0VuBkJcVM}eVHn3Iqxgr`?rOGDszs3s zo*lBCG6YKt!cT340)(XHz`d5iOv_+;M9Q{EUpcjE%R2cqyO6%J0Od|@I=6^xu z>tqT|551-e9t%cDCrst!g6V=;HMfJkD<=ccXAvM0&69^ireTPcoP;MbE1P8VR5A7` zP<#rW#pKZ|E0&C%kYL?BiX_ojcsgYg~F9kSXkF;X2FW{csQLuJGNu2$b(m-H>s#v9?q5fmWMtmt6j0?Jk9q!do!NBx2(6Btmp6w3oiKw&Rw~> z-77=6`lc@rZ`w(D-G&R)2aNUPaVx2p7!}#?tM2X-)~CEqY(-V@_=m75!01rbFPZsm z-E$NH)zqd7Kp7$Nrtt!+8`wx8Su|!EhUkNe1+W(c1_co)!UoNT%P+If5(n8bMN@Uy zLx$vnY@59BBGknffSs++6aY&BBRAzSMhaSuRuG@kyy$*&a`hxQspvlj*-;Sk$<$ux zFbpfhS&xXFb7@ZUSrha-AKE+ zNry=;nfwV|1CDE<&r_zd?!v)a|D-s+ARC4Po-zHP_hq=pbnH(9k(yLOBAf(=3Y# z7=lI$3JQnv3T=(b5$U=zM(Ys5N{ZjDM^)Z}j2eNI#w0YZ@ZiqZU{rL%l?#Z%m6zwZ z5|I?G!T?5(sjytlz93$Q79967PDH?-2JF*ftl9D?G>fjM+yI-G{rpW=0k!Ct88gBF zzc8GtL~#)Kss9cIRRuMzX|{sepMgxvz`d4{J1?hOMzWr<_gh9*#kXuXZGU9Xw2Ukd zq&;J~it4u}Z%(exXDjwDk3974S-Y0?wJi@nEN@*N0%xINcbctvVDn{dEjb^5&$lN7 zAYaV-j;`46H|$ya!iNn<-WXo#%e~Z-eQE67k@SVhw5u`e@;@?@T}L;qf1FvUiQaTuAWL3LZ#i% zqS+yf0*c;d~glzHlMosZMHV3%;s3U=mnJCF0#G*ytS-jx)ZF4$c~_696j)D zmry^B9{RQ`SV-Ut4LFM;{vZdM?_-#kbK7bTK$~@{&OkKPVALjmDD1*D8cquGgHStK z@_YzmG@uF$PO0Yt+{kM53OWr4>X7|Y&zDE^=0pq;m0L61;t%mQ%3E>hg3 zvIm{43*G_-prPyyx}S-|`xFkKD8*3*@c$F~x{MVF^+@06>_VvlrNjIh$g-y(x9^!< z*nOx3)?^?4xjtVw$bvtE7Cf1}c@k`(2$Y)wo1|X?A0%!fGzJ+cn}T-W=Yma@m@VAb zOy8M|8So@%FIY3SV9l)0sCf!)-dWN}4w8NOu_NHI%$j(z8>})R4kSs}%y@&N}J9i4rENRt-8(R#y z@)JaFR!LxidO5ZKBCG}DbE|zb1E|N?5apwG$22>0EoE6u&K~MCqB;&@{dfd= zYc^TBhFV`^>>A@xK9Be@9g(s?CST%H9yoP@69q}dLlj8Er8vmyf{{4Mr}mzW%Jis< zzh+EDxS)^84`EyqjxWl|lf1u7j}mEQfI%S5Hx```1x;_uK1zQ*my#cH49>v z^z^0~91R84x^B?>5XvGqSap9v^A;R|(7YsqSE4F&_`Wi~ zYlhEYyqu?<^(W1xA2Q%RLQcYg3x_-+3<+lr>Qy`;kaTB>aw9#Lg2TMRh^kgg{pplscKt%DvX%5ifxnBM^MpzJnaP0C*2dOnw65_`JE>Z3I;-7g`fHb z;2+3yaHOp9R!fF!%Xyp92fH)e%QlA8;A&M9zEQ?$Q1XH?YxK z?zS$U{cJy}IRMVrKTo`tZ4lp|cH|_+NE>uj~7#(}#PqyLvMducg`Abc6T{ ze_OVs{kxTGmK$fkvhZ$YrldXH(VHphP4{2Ulw4ju`>?zvTfS#`=z+IvdGJ0SmK?jw z9)D;rS&6QeuU%U=y>EYU{d$foebfDh`$qDurJGA@mD#F&w~l10+U|1gpHwxk^n(w! ztL3r9T&n)4XKK&geEA=KFCCgq%gN0AVs`ZEho#p( zYG~fDkdmqm0{uR7kb`HL_vW$(Ml!B_D}%oneJ#6Ne1G({_x$g;-*(?xT5tN0@5+q6 zmUh*rcZma?{Lss~(h9zU9B^PuCyJmL1x(n7jdS8q2FTZDwy3bS$sZ z-@cG7JG^OUy&bS+@YZSZR()o+mAE&`VEU_#Z;XC>C?)_pn6UB+`MSI6za+o#fwGk4kUoU;P<^vR=F z7_w`IdGgDW_9u@jfiO=Vxd{&qk4&(bU)8_>AJ|{~548ul@JBTb{Q>qzr`-J=?2nHf z8nBsv(tNnTlly5!In;l;udcs~`a^1III{+`GWNO15~*v z`24IAU*JsIgWs@uTIC-1T^-e*g1)PBIW(qx$Cj4vh=ixB6azwShT9nFdoQNy@M;{xwgU$hHs z@z(6Yvkvx}wqzH^3m&(DijZez0*apeGkbtPy!|RB%?w{dPt<|eizK7hq{$$-Y@R%P z@^8Vb0na8_ICvBRateH*JoJ|BDdr9!Z_eVzc9`%32Z$mdiPp}*#|vioM8)oRqyj;D zKugaXx7f|G{Q0Dy=f@`uGd2MUF^(QVhvHXKd-Oxy0N8lrP&cpM1F)fi*}OGO)jl6C zMGvR-L*GEYGn}86qt5~dqyb2o(u3eSn^i8Cs;P=i3(P(k*AbMQZb8|7Q&+gQVq zROAV?F1m54)|d7S>+&wZL3BaX1t=iM=7`1i2j{TCY|NP>8uHwd(yWD|?tUKpE_Mr9jd(QPQ zrkjUzttY_TlsV9H?OIKxOZPu6v01FTY;Yk+O@l;)Qmtpv0B&`QwrwvSPz2s3{$}Mn zFjuXf9^D6G(HZ!NK#qV@1*w`?2o6ZVsiLG&kO&HfLRv);Se?gjF6ehk3Mv`}1y8}K zR6!9?FdL?MitvlcC>*sSziZWct4>Tsr5vTdrv44}hcG>(`$NaE2H7w2%v39NsAxhb ziXbacXlD45Xv_BE-e^D|dKetik4V_7@RgPF7ucm8ivWc=wGfXi#-x{&@53Xg526l1 zxnX7)<{yduU&!eUIsJ2T=>u}im zDrL?vkIlq%a)VYMwVR;fk;BAv({gM}8G*WsnPML8GBGmLE0~%`?F~%l<{1WX-tc9Z z$_-OD!&I)R8wBsS^Y?Z2F@45ys&tUe0aLM$iP^In23yMPxz$5&w|gJsQyv~_n+|~f zeL(MJthbss2;A1q^ma#oN{?T7jIG#(qhK5R2Q%kNzxpJ%%6C+}AIFU!`9E!*8 z9xaPSQ6YV(0tB#(BCvwCs8AHB0^~uDeJt|xPE~TvTnxlOjTU~BssILzK6LhuCn~O+ zpiA)fZ)SIPX6KvP{ZL3mP=5BeJzWYQ^mp2Ei%@CopJ5RC0Le&ZCJ=UN1~brSC)hZL zxj2vcxCeVAI>$|T<38+*3s{Kzu|F{CD|%B4&Ovbl@Ln$yHQevzwa!Zo|dE#W96o^ ztkc77$rg~ir>- z2yX?hC#K&>Oe=4^K7G~l$pcP9%&VAaii+omC1CBR1s!W?SSP3xg{T=#$*bnPCD_e5 z&9s7reAZCYifJfhpm=HCH1lLQ7Mq_-8;dzQ6D5WgEu=^)s-+9D`NAxTB`(XCl^=~q zRYJ6d+3eEQ!t6Djm{IeNS?v0sljL@EuK1Md;3>7uGzjIxnu&FdSPitXq@=a1x}*>- zW#rPt60^pflE-?^gtk#I^955$>)7%FaRKL273ES*As_rX`0sxL8+?Ff=$KumDoS_z zw4S2-zfclr#_JBbhRQJsX3Fo5R%@AYO_pP4;OaX?W@mzKy9N!gE{%j{!tQvLLgr+C zh|O^cZmQ0mt=8_vqE|h^We;Qk>c%RS+sg4XEp9u(%3fD~7Uet%&s3W`SFIC#RXT7W z__;h1O7K%1RZ6!d3ob{_`;FcFE+_2fn>pgrPc^%(YTbpl7k5btawOIH=7699`p)2iTWywq~Qi; zSP30P68t8Acab}aGR(J7-1tt4w;hT|NR7kdS2QyIK{Y?Tdl+;v8BSbfH zQWktqQgdl(5$mP~RMK2g&*!zYG%ddw6~$@bCq@CMG%2HJHAyFuDw*nRKhXdzR1*kI z?T+b85)I#mfkoY%m&WDm{pZi09qK!CtSd^et{Z{HDCXNhCqku>$)4)q~0Y_wdLWj^60v4FEUA(e}2^gi# z0QX4=z~Cpt(i?zbl=}rfTM4NQ?ma{yz>MT#nDA3SAO>Xg6Q_~b5=`ImJuBQtIDkgA%-z2vy zApmP64@T2)52a!*vC95GrFSj!ThES*>@Br)-f0}XdCzISnX^oc*TGQY;{_>ZhJWx;>f z_nvQ+e?Rn$Lt^LSx~BW>_uAJ+x9eV6jysh6j=yug@sH-8mj1A`+41M}PVeQv6v{&A zZs5JZ>gfIPd*i>J*byY)?;B+v`a7TS-d~NsJH9f%!?%EtD_2&BS5L0p+UVE_mBe#9 z{Hx_ab1BeW;=7+Uz?KjEN1i_uDDQ*b%g=%Tfxq)PIR)zY#8W>S|~>hDvA;(l$kQLdB9Kdl0kGE~R6VJqi)ujEm>HgKLCe%Z z(%|&z7Xk}tPUeOnG|W^(_rWd&IBNb_f^m0Kk0S_-QwIU4x{`ble!v!dHFb<|&|5uBFR%y|NqZAVIQMi4^w2u&pZq#0!A0Le_jSRLmMic37Im z1qdiPi4^Q03Gm;T$kCM7jWTpSAOo})v^xMiit&!c#EM59xN_3nLvWSRb2^#V(nZLY zDX=*N#ju?gSLq^bu_2GO3PXyt+g) zZvb%m!m`9S)oeiW@eVKpuVGla)^zWfcJo$dLDPdob4)@I%&jXyMXOnE)dwx zM7Om9G)9=tYk<&nn3|&Eea$ZiZ9EEO=ipE7!Uw$Y2nx2Y1s(=^wgNqmYFbMjvCY87 z&r59=w`*Q239mf@^qY8hVl}Xy{wsgtN$07}j_uBiTYUR+{2#uWa+~z~t`EC5xb3!+ zrPlt98z29o6dfrwUs?$~5}H?wJ3{OUUsJA$JwASV^TcLn>DWm5__;j~S1;}gsJ?5h zSgQU0zd}BbcefS=Yw>xI$r2Q0LDh4LQtWlE_IIUC(Fim75sGwR>WhwYkJu3`n$>5c z^ckW%mtWBr@&-0-&0^~Pm@3U`suGiV6<%8Q!wXYT!1PIi1^S^FV1=us1yj!w+qQPd zx3LgYXtPadabQl%V<$Zp2MLdDW!tb8Pw%&F2zu6>x}YhF#oLcP{x&@P@D;iW)rAA3 z!i5DRUC3&$;TteU(VhGoKD!*lFkhm;m&pH5G_r+8zCzcw(Y3G8r7d)6*NcLa%(76f zKeHU(^CH%}qORZA9Nc2g@3LLYvGvQl2&#>-&10Knv+twp`?R~yA$G`~-}@m0Jgbdc zO!F?wG2dGwy9lZc*{(LH_i2xkx7gErL7@I+Gwj;fy5~8{)RyZScR8pZhiZ3eA7;Gb z*FI=bCSjMQW1|e>gDb^tuI-CJ(-;1_)f3zPu618Y=qn4S_dQ(DyF9ulAWz2@-}mA4 Sde3j(-sbyu1Bh?8weug?G#JjMN8L(8`Z}O#9jw1152{Mn&pF%i9@KrHxdn-^|{f52t-- z=d?55%zWSc`Mz&{-*y;#MPu+l(gMmJ1s3n}K`P(`DMZO!$8DJvbdJMhoyJ;A=7j z;BCDr*Ljn`E7f|iZkN$kv`0Ld_X5ge-RgB2VclZ?)L zqQ7sZ3+6kGZj;@j8+X%|7ubK`jJ!?CgG-}own7bjXD2a9 zM7E?!orOggR>Fxg*YuO2yPV#{!=CpZos-p}RPdFtM$?r|we^{eN>6IJT) z<2glIC{n&O<4f12>W-@J0Vf8jvu573i?*6C+M}4LPEIAxlndvFb5m?ox4yugx?y(^ z?pKHFGTit1l_<_{I>g2SJb`5Ez7G~=-ron`x(d}9u;48C+Yi9&uL}nHr7$aiO%)ly z;x`glReZ)|hkuPyq9>*gc!K7#1yuE`Z}(nuF>!md?xDWktT~D;yx#@w_8L0Xdl{a% zy~P4_7?V$ESH}vPujF)L+6xbWG2zZ+u%zag-|%_rUQG{JPFAVTwydJyEA#^BATY^Ht4tiX@9YDVrPf;#bEktw=1*DZp`wmt_6%;wZpc zF;6vE-LPP|nGJ&(>jPYl`%+#9NB3kV?@4aa_F@-6(zNkgggC?taBNG-ggsQ!4wWb5 z0QKO!{vP~}%>Bq#Hre!&vdBIdW5DD77OETQ#ZD9meJ*!AkULhD=;uoJ1Erf2J02)I zIMMS!>8VexD2W%*U?BLS1x4bw%(=GZ@YeanYI4iRpyS)|N6A-jMZT0!WLqVOl-8Nk z4?|m5bmG(RFU2mbOJZ}Rg2Zs-MGyrdw@%LV&+MC< zSm;`aE=3M4%ZFE@$t5}YWdhb)3UB?&Wsvy$ZEbJvmj19i@Mb!c5=biyF~|o8@Nqum z_Hp7RbZ02d(R~nzS+s%fhQKjSWEj_oU9=j?8M>D1=n6qmO39_GOP@DtI1Fk}k%m0A|YG8hOmz4b7|Bg&>w!%*HSP}9llmNl*V z+OHbak0~!BdV^yBCrqz8WliQ>bxZIYm1QSv$J*Wy_Xu{27JWG)R4uqUCiufD2N$-A zp3UL{Z3r;`Ejo-sUe<kRn!{yy_M~&V?w(aAO2=F9lzNP^>wVrEVXM(}2e5DjMe? zAi3k%C`B9QN|6nYlvrvj5S)Yfl!nW2cubo{DfzWQIt?LpFW40Fx`3ZuLV{QY)KEd6 zX7M}~@(9H2ziy^KXF0*P-E$i?GN@&>RT zhR>aZ3Le`lfQ%oW`SF=wh_j)OLbJ^uHP1&Mwd`A@kL7{Y#J0Kj7tTCN9J%q%N+>aN z=3!|2-BYVda%OV=o6E}HRXM&Af9>h+1B*S2O-s85S9Tv<3rMYzih^3V&6So~Ui*7f zC=jf)pjZnz2>W@8rrA2YRISwie`iH$0K_g>EF}&T7I#TE>-3PLY=c*LkWE|WXqqL; zrtkEcX|v>rD~_<2O<0EMB{!JeaUGI%p1TtU8HBxc!ZKUJUNT|9#B1K*FH+NTIm&sh z{;02N;-(>=Ck(^M4py<3Otv#=39=(kf_!DelAg>Ql-q=*cle&bKl|*}mx7&C6?U8Ts_GuveJOJ}d zP^pMORgNGzcB}MI>iQy@{34u~+50HGZ9cTnv!oneQHIt7QY?65XiY%@h~c|Fmgh!p PM;^($Dp4eNbM^ciho|rI literal 0 HcmV?d00001 diff --git a/flows/updaters/__pycache__/update_vgp_status.cpython-313.pyc b/flows/updaters/__pycache__/update_vgp_status.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cdaf4fb5a1d049d160fa78a4301ea9f44cc9e505 GIT binary patch literal 3827 zcmbVP-ESMm5#ReDc|1y#D4CKa#r9f`9n+R6Cz4uAQRFCAY=^34S0_3)MBsWNkJQ;D zkJ&v+G4o=i5A~x0#E62RfSQNGeJS+V$FzTeWZ8{!l~DtIDEvlM0)iGO(AhnbViXi8 zx&n7+cV>3?W@djg+G}l95wt6R-nD)K)Zf^o-Z%#)d*cE^kCBEnVHy!Oiqk?uBw|7$ zQbHy&W^2;4FX1QtM1TYm3Q-b45=^K>oc2w(Cc>nR(|(}aNd%*m+>f+? zrf5M;9hHtF?Wh(?b!aWAPOUZ7IqcWMz385NPZ+7)jU3?`L9K06(Aq&V0>4&G0Lcy} z*^rB-q<&PN!M;6OXHEJD-@9vA(V~a;?oJ)my2kDxWSm4O73@bT6@GycumWo-CFLKK zwC)kno0FbYH{8DWB1-jwTu)tYO8AL@QBKGSvA$PKbS5Ubs#&lc-CQz@jw@&L_OdHG z2E8BixuF{$%;>Z4U%5FurMaP-$+=s}IsMk;+;umg4S9rGG6*$wgXE~I5c8w5Ma)c0 za9hfyylrH3$JXf(%vGag>1n%|wQ?{EvYBJsdHRao_>~X3Ik-xIm=3W_>UJ{Xfu1q* z#si(2pqQbqnzwU$iC9I4y1}fKH}#U?EV{vhRn+rV5!C#kRwl)C%?#`dkpe#netRVz zGV^SWx7lW?+FC!Fmun=N_1CF`v5Cv}J=B`G^+0WsXyUdsE!W5-8evv|hS7YRMwwTXg3x?tLtjp!Fw%TU0eXK@ z3Zo%^eGgNZk2d52jHf9~dPuHIV3d?zM<>w$I$IOWfOWGXMknC@7|p!>j&uTLg;>xa zz_@BU=|z0|+AT~S!zojox2S`ccG(Zsh+;+PA>a7=v7EZPN=;#<@=EqFap_rD;Gz1nV5KAOO z2UMe&!OO&QV9mgD+HJgSIg7Ytk_&aw+Eufbna@x-c@koLOvTU|v4_u6hs4J4Er>(x z*!)U%*(sGBz9o~3bB#ub;qWzHm>+%)oK8Jd|rE zg3=hCX~^8d^e+eHh&c zA`}8%PWY(=z7t=u9cF%qVxlX?FS>$G*)|xv?+zxZN$6mj4i62ctpzi0UZ7yh!HnS; zgA7CHV7UYnCgc(NQm&+XCXJUKxWRF78+MR;XTu~t003F6~ z#s49(A;>DKx56wJ3%R1=}PHr&J8c+@_L!%b-M(&C2o|>7t0HP zWaXlx!|s_pEVr>`@Lywr>+!%T8W7E{%=BG}mJ4pnENEH<^L;{W;`%t8#bmtA3<( ztbDK)IJOh+-41uJ23J1W=znW{bp8Av?tF9ao76_j?4BgHsZ}Hf)qOwmsgJI%jI9i; z-h0yXB($LpKa)qcL(vU6`h5iUdmcRYFUm0Rk0(0cdqeuu8|r%_ftWx5q!2@S@V^J` z0XoEq9{@UoWe(`$Ks-kCjWE{)4rwBTv}UN9XVSdgX@DsNxfGKt(Iih(9GVyiKtO0i z@k+P1Y!!124hO4>%p_)FA~^^9kq~r1#~D2cUF~_)Tvf|}@CgvscLtos=RH8pxbRIx%8~~3&ndb9~MX2{`U>PVG z4_H!;1zgdFLa<=mH!~Ii;Nulq0B8n;9IyCGAS`|X_E<{dPlpUMitT?0@ghG;A7lcg zSp7^*=-?^62B`O|=})IW7rzL67Wks=v$nO+R{OxZ^Hd(+i5y@3WGiy%VPZQF*$SNa z?X?{xx^n-Sa(+h+Z->vlJbiJ!Z@qQn)Wr7bH+Owfhgwxo$MMz5M*F#cv<7_sYCCFa zCvU+?uA=J&1Kyxs8T-ErnmE8*4{R0_X9!ECq?b+l$QV1|HJ&-+d21ogDqhPAY27NY z+J>8quop{Mq3A{rgz2IIRW~of2?GPd-Y{VWDq-)GuoU689mtm*D^EFkxgFlBXJkU- zP?!^j%!I)dVK0Lnz% zJgRI-J>P|*-vuKp=eL5#*8)%aHk6TVWopkSwfG-S?JCIEwJD$aQeK_=O5KuARYOSb H<>vVhRfWVW literal 0 HcmV?d00001 diff --git a/flows/updaters/api/__pycache__/__init__.cpython-312.pyc b/flows/updaters/api/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1514a278ebe3f1100f7c9d76b2b032378314bc20 GIT binary patch literal 156 zcmX@j%ge<81kbxKW`O9&AOanHW&w&!XQ*V*Wb|9fP{ah}eFmxdWvw4toLW?@pImHW zte>2jl$w*OTbx;vs-KcrlBl1SlV4t}Us?dx{rphCZdb#`}8C+NU+pT$UXzB?K86Udqkg!)j@1# z^$=TFL!T8Wj6h*yO?`IG%9;mB*1}p@8*7J@gLPil@Z=!L!yoIqPW3rhcb}8>uwJ(4 zx~9*?76TtQ>x0z2b&XPvMt9h z7*B994k0ISErXwcqe)^_OBw^K6hT|QF%TBdOy^w3vN@2_2A)7Q5n7WCWU%b;Sf8AL z=qizyTR0~%C|(N^aU<6jBuuNtAYrluGHTsfVnNd*Q(`<6pk(bB6OG75Wiv$L3@cL+ zE`|kzf+BZOlnr=oF-=Y~$N`*WFPTg1b^gWMbA3}}%CaM4uwAxJTW6TN2Hz@8cxqR5gvB-4 z{oBVhVcjWUseR%*?AGzWft?tz%62{`h`g8-`FQMToaJP9TpZ%iQ8;vw_+U>m+7G8f zwj^L$92#Lnegw2iVrVRqjLLKxt*vcsO`%=w zOw;Zip%9>}A;sS^{?ooih>84ha+wLto== zmO-O@%nv$*^T!AL5q^NfhecUdL8eETNRq>+O5n#i*#ZKHCsBw4B?*-<5i~ZwOVw>y zlQiQ&GNI<=u%bkEwJ;$p)Rqxe2u5K`@WE^&2W+DV9|BQAWmt&E#@jpNOs~JAH{RtJ zLWoa@{;i=TV#GKS{8&!{qZj>yaYlS8t0CdA3e9me7zk*_EjdNnB#tGxaT8XpO$?BU zAy|;LfLBL3j_r_Ge;mcNL(7sP)=x?hF})IQYHs98L7P)Ovi6nJXL}bYSr_HRp*Sn+ zhd2gq7D3kHnZTLB)G}T#tbt`SPN*bSoJqt5QBmmuQaLpOR&xIAk+9{Oshai(RJ{W~ z0nR)=5oY%XopZ|3Rj;xde72RjB0r>L zT8zUu*U1ugw;soWyqQ&nl+C@6WDDQ ziI#NP^haxLVIA`lr&Nwmx<&0r#J8RjSgY8mrX}j8P1QXUwmc8kzK%zpvn^jL*}_dK zm;85a2iPS$>%a|JCypl^U~)Jm2T1*G$hjcrmI%nfzMb$$F4l#q!Yz4DYD)5`eO@F5 zC+s2X@1~mYN*>ld46{YFz+@}sRjIY>Dfh}Dp6|4(D>rc=# znoA|M#7;sWN5RB?9W*;hK-r&Pq+TaR$pST(pIdYKIW^>^^1?Njo;;`gCxy$iv&_~7 z%6bqskH&S)%mCPvGKpk5molPesFAfDy{Efms^>&^fI?X5D8&dXi*fI9P7Do!<>`lN zKjK2*gA4wL*j$mxCiEhv#?l;jzRC%Scue3h5eEh~n>9W3xi+#Eo0q5oXdhzdQzn^b z+)8{5pJ0}!K)6>4iBS9`QX+p~&byiUEfoVd@~_FOBpgEUlg5a@p+?C7IFbZ-)fCvv zU*_qnCAVFcnB=)gUxF+KY{@X_@4R=@jsJ9 zs`^5@_x$G_^J1#y&?mKX>?eWGUR|o`gu>qnTA=#Vk}^^zO)@z;?(2#Bqj98&M3I{q z_YYusKE#Mmpb8y^k8B0W246Ycc`SJLWZyAa69+pq!H7e$j=RVUA{hJNw<+ekNimZz ziV?ma(2`^V`!j;^dK5uJu$JR|3|+t_FjN3S zSnN1Pea%mC`b+%)2Vtc~z7PNrb_WXk}5kz<33y8}oN2u9hA=#)as!tDAP5In-` zD4Xz($kL)3Twqq*rCe+5Fsokf#&6E&-Lz%Ud(eP~pU?}IJ6QOoTTXW2bxUD{ct!ZEDV&~7!-aF8pKG6L+w{)O4wd=KI-??v%MA5cyDZwbCVOh=})^5e#9jUR5mSGql2x_!QPskC+GXvR}|wfjoi8&5)VO`niE? z@q3j!)0I0HtCuQw->E!x+j6h{M7sUN=gy_}Q#0m_+mrG&-ElW%YMSQz=R>KQ)|pco zN9jFBb=pxqx9zT@c@;$Ehq+i?m%FCBW=ro{>Q?Q9uXNQx7|ZV&YSM<9xz71B^A{K1 zxLurXI-IIKvTQi|mD#i6^ycMS=Eo8&x_aWuiHxuOW8*brT2UiQ)fbk07eL~VD6*vL zzbVRF^oSzLtFyvY)+RhxyXX5f61Kp%Rj}96-o~eR znC)rj-lyNfyc*KZ`lsI>R1;9LCFN{*`mGJdkn-+(`fKmLd{<8e22R&M?Xh+@QvXaH zc6C?KKld6S^YfkU-FvCe_qO(Esee0I-Q%Qw(NNS=LH&D0QBQ#SqJr+;V*H{8Q-0A% zckkAJv6t>4^kR5&|$wqw(_w7#00qQ zQ86B5Q%*z}h_?yJ{wOa3CKepTxRp5z(JQ?pdpn&#C#LIS(7lb4iWH~_7FpOr+ z737BQTQ`VoQBdG9@+KEOTf@Qv%HC(o8DUT^z=%VG0D2T+glh^h0vNgwd4GhtSg4#$ zqW(f`5Zsyth96-@V{-Ac!v`P}Bvqgy%JFbv7T^@+h2cUoP6%V6A+8WBxG8rIXH7$U zZYwaMcauDVB{B1{AZ(UFY~1^`5Y+*ok#!@mg$Kq0njA0(M-KyYVCDg^1uZtfZ@Eep ze)}mf_!@o!##3_e+kEmfZ@RuS^~!6@=5s0Sxj!-lcNTg^9XL9MqfUr6n4?}ic)(pQVZ$hc^#H9sLfX>^|tVUd; zm%uq12Wr*vVJXGQ&B3Z2#0>e?55Wo2Ivs;0&_@XBZ`;(klpT!o*qtMlyx*Y&Hefs*ZTV!rJl$QsU#EY!rtI>dRhZ1CajVb00pp+Cu~Y94?s2cO#_3hIyaDx z2J^jC3ebj`bt=%w0oEJFUeKl>t<48UW5KPy1D)eL(m63*tP{rIP{t7W4!r-MF~mxM zw?*L%ufqSY`4wL6B6YRZFG#5f&q{e)v#)zwvlm`PX$^MEzOBJa8N6*zIN&ADy=f^h zSBKgUxccO!6IvYq8LT3`_ReM>B?84g4?hQL^a?}|v#$1`1iW4x7&kZIFI-aN+yR95 z1G)*(adZnOu~xty47g|53rFwcJl%8p+_5g%e)#O!lbt=sI)c5YgI$M@pm|KOfTL?T zn!&WLXKx3Y+>Sngx`0Wh5jI|A3Yy#(=kMT$>N_<2?AOxDU|9ilc=lp-^$Llwrcl?WiDY5+RvPD1H{Ig?a(+ z`;C4TALqD z;Dym>0#nvzpBkBdeP$$Wt(?+)t+*3gUPxK$GOm)V+pcU&Rkkd-c1#`3*o&^$&W1kR znzq-Z8utEd*KOe++tUq4Gu!s39a|r4YfnMk`tnp~%F~>-v^;S7u9jUXn|*u9xqa%$ zeQz=NCi{D5^eOPI&fGe`aQ?>Y^QTh2_B)Oj-~qnK2Lo|zoj-8L-kxdRd2484Xc66r z0J*O*?bw!S+Hvc`!iB~D8*k#GK-$rm32eXRTktK`+$f*X&sy&|8XhoCm)H zgFC$Y2l1;b+TKa_o1mO9YAw6kYv>uEf-y0oSKmyNrZoNWt@b7y9@ zU@X#9(vc`b&j}7@pY}t)pq~l33krWlg z06%Rj|LvgsF952;?1Gq`@=mwDA$IFGH^czVItb(l&f=#q#phSf6}|Q9$ zGeLQB$DZ>3&N0W_@9U9XA*rX#}tEB`$4c5n6n?h8XC3;{f$$0mr zJo_IQoEeMrfu}Z8yM1n3rh3mKnrbLorD&&XmC}1`tCZ1WTBXc3-6Or-shPB{))D5H z$VvSJPeaDl1=M!Ur1`OhBwr&R879|{eF_*LnQbRC5R#*Q7`I2*|Hv#ERmGBgsg;VkR`AnVF6g=?t+ro zY09*d87j#{bZggik{L6dbSlQ~h;B2jC(RE}n$9?#)&T@YL|2?tow!Z(qcfD`sVDl? za~4>DqG(&`OfCm^ckey-b?!Omp7UKkvf0cOq+_8!O@6zTqJD)RYBE+{9xcPmYZObd z^jV6OpERMBZw=8XPlhncQ%khUQ%7|0)ST77Xds3F6{c9`0!@spmY7&wBSp-t9&!t7 zfZWO&i47=BKw)Ri#PJ0iYYEe=m9?>U)&Xx$))mt1qQmqq_+#B6hB#Rdaj{<3$JT{3 z#Ld=2KOWW(xtDE#+{ZSOI<|?_L%+??ub(}9LK8`8GB=(Rc@h&c5= zi3>~Ic#4logo|C~N$6TSH5#O4y(kf$OA`hL4QgclnChpx6jwQ$W6yGBzB|fb5oJa{*#BeSPA4tSyllY6m{1VBDQ1+T^uXGm`vV4Xo z#09-J!B50&hFa6orr;Vw>8u|aDXVvGVBti;63lCZkD!{0Rk;EmTKM5yAH4|KYt$Iz z!l_@YpB}McqW6o;^eK>^xl`&-W5$y=hXhVt^RO$dVxW7yt z(RRWZ0r+L~3D8Ao64a79c#wL?WCk=$)^k~qhIr6O8nDcqNb;#TDw{~`NQrs@T<67H zN|Fs?mIsv+WjZ0#aS?4&ppwcP;J^(a!j&rPJAq_7{KN=k8(CknIc83kJWcZ_Uwv-Y z@b6p0MeDA={QLraW9<6ayz5(E{NCqh>AZDU$!Nb}yKbA~?il?KbdZ(nv+@3eBhh*8qX2gCGua+ z@tIgMGZC%W9;e8qv!LQJA(i4|5+9d68%v_&*QO1b$uwG{>CRpI5{cg4efxT1d-}MZ zy}M!ud&c+gJJ7Q?78~EUj~nkz@Z+)}kxZdl%M8f0Yy*3NL&D%(3hYFd2rt7})1Ej- zt|T)7uqJ#!NCZ;J1dmFiY^o^hrnpp&N9_`m)4Xg21_(J4U0nFt0o)7FYf?UAl!`Drs6*&Ytaq!?^S zHC5HJ7m7eFtM+NEs=O>chL%c6W4N12Vl@XrF@nsHtUW)>jxI8?KFv#$LR>aX@*JEi zqO8R=fUaO`8Fvb;ux!B>mBfZ0SwWN(vmS)Q5@=o z=hcCQQ$IZW({#Zyn%9n&4Nr9yKm^iUwz5az<##eXv}#*f4Qhw7S`vUaStkjg1&bPj zeabpCts;CWx=+9Sq%p+>luwZL#`Fv{jKfbgrbg=x^ViLD`-{fLnPVl3b@tNr!*f>( zpI}VUWGKee3~%e#lu?W&oWHu-{$=zmn7jy$1_aXp&F#Ze*$&Rk9!qHYF)-OhFhwnh)EqSKt)XIy18w@NoE9( z#tn(pjy0=ABQ5G%?Fr|3wZ~mmeg@DLg+F!vPk*mlC;h}a%@H#kgnQs{v_veQwyKe~ z+AP#JR;R8mV}+x!LoKg8BUY6Hjz(Y*9;6%RI<{4%k7!lujE>c_2G+=$LQKZiLe)6R znj<>a(gRIE60Mgmmf!YNTf&~!xX-BlK`HROoW|5`pjg}34z(0q1^Yp5#$Id9I@YzW z^=Yq_M(iUv{DxGO14qQcI?)EOF3dAda7kPdC-8d$zTEKTiBRx$7QVa@H|s{Kk?@A@ z;4|V?$Lz1_V;nq4kSW`z(`S4UFY7sU33LQHkN6-^00$CEJ)7>9PaOw#Yp~+TxHhHq ztfNd;C_h1>t6b3H`qoQr&AM=Z1R7k^MVj@gvJARtj<|qJz-f>&^T_U}`NQf)K!P_W zwyu$iSUr@wAL@+RdfYVi3swpo-&8JZ^D(ggb@M^nu*odO)PblcSFDbtKA+YA(47Mt zXAisOs(N4? zt6k2=4yp4UIikMRp6wLdSmP8}4jV|6Za_1*i=x`7ornd+1 z5`r38dt!9>oXiZJ9t<)BwMQu?XsV`tr+6tg2_A6(ssn_N0X`rG?gJAkneHLDCFC#? zx<+_0D`Z3-iFk1Q<0ODYhDsY*i*7IJg0lM<9?LZMRN#wh{^)tgI$&#FgWwhn1H@K> zYaAU398*a(5gmp=cZzyV2Wk$%7pnOLb%06GK?rv#i2fPy=D|zXgm%eVi00Ezcwwd^ z;Q8`bv*f7Ujt<$n;3{+^CB(r0KN_mid>e)q@yxytE@r;%((Tcg-akOrq+APyoru{=gAT1DuD+=o*A&@|p-bX+x*h-GUL$(2Pqvwwe zoQ$$(K7Ufy2;eAZIcZYX^H-Ci1aSxiluBe_Ry_EtQYtwPU`{TJp_VASP9;ek5P`^Agd$`ufFE&Lhe1tN)@32Ifw;5`WI((lO0qQtd?8#a%0mPLgb-{<1eZzt znBi*3bC^Ak8M?M)3uLnH3W0{;&I`#5fddF4SpW|p+{K7CEkN9-m6Cuf>BJPzXM=SF z0Rq_sdytJnz{M+0z3h3y)sHG-SrHorX8>{40>YD(%}SNhQr2(@at?dbNC{b+N@jSO zfxVNhiR4v29#tYS0IGm7iaZiHfPpp+s~hKl@uUsE%_t6KTGEaM7A2Caw8ltY4Zoj! zR)X;j@Z`Ci>-)dcKeYc?)JpLY63=XWinpRw$w_Tmf zTlfFMbzs%kFu!9Vw(Q$+bG+2t^5(!B1K&CG=I|TC#pa&5ldIl_`K_;>`(=G|$=|)= z?=ARymj-^!uJjMy?jL-gFZPe-_e7Tc7e6*pb=@B`l)LXEE9I`6OD=epU0ZLu?>4v0 zohUW7yt(6z9pBlx(zvtGxbx;{v9WjVc*)!N`rs>r3mtd7U4R#CX~pJU0BY4Y&UgL! z<=KIITf(L0_EKx-1B2G9pEc-PlDN0(Z*E+pO%R$6u!T6QnB6IvbfKdTh ziOqfE-1T$wjd!e_4;+-gam`7YnpceN1!Mcdz|9LcuP%%+x@W!tLN zm3Qx4w)XtD!@uflUGZ(d?c2T_JoF3S;Z>N^nLj%-pIr8Chi>YQe7Jf2%gf%NQh(3t zs=!2R2H;8M{UaS^vMKRu?>FrwqvN_`*6{EG4cq^5E6CX$-;T#P$-XxbKzf78_u_c9+C%tLji4_rw3L4$c{yWxXw^Uxu~ z2mR(@tKsJk^Kgsd=WXWUu;J&u=JOiEbhnDqfY}X6LMkM3nF+v?YgoHv! z;#6>_T+dB$$&>;?mq95pinjL}LAMwVKOVpu1@`ilk#?w6U@`z~6qu}W)8Z(Yx687% zjO0W!f&}0p1~sxRnMn{X%2zNm2aFuOB9O}}R%S_aq*)Y{ zKRQ8#TowR2!h`8V*iON^0AB-S&sv4n0HPp99CPbBNNz)q(ejxRk8+aYLCEF`i}0qv zF6PG5NeKYE=mdglmhv5fGCILy5Y7eVRv|}{vO`LWVvdi-d4T#5NqnVa;KPKuW|TX9yr7MtWZ>k7`w-IvS$p`#)ST=!mZhu(qK?d+M9o$SDJ9lnTBkM+m z%iyyZ&X?uz6rgAQC>{DK5Z=VKpr(Qvg3=*s2>u;lZqVWhY_L+PVuL?~4nBmRh>%VN z8@!o&&sW$wkUt+OST5$Z7k}^gAPZxY6PTUEYyh$+JW(G``e{7y?|}}muTRDUFJ3=5 z_fo;KC9mC5UIX%8O{lVnu02XYsdE3O1x!L$bE8?!Se5>gwg+g{_=#nd7`lR&c>o3w+54kR)wNra^fEXIofiDq6cVLGB-#N%^h83!zG~#fq84V(6q^=J*qrqMW;i?CNlZb_NDQFFdHR~I} zFla-HR96;(^<&#-pmRPWoeSxP&%i94$}ED<)cfx=i{VD--Kz8sf!j08{nL8ieDCYj zy;q;WsXAPla{cFc;`;CLX3`U>B7e2(Ke)()3;2vPq8ssS*b?m7sg4DR0X;ZU-c5M> zR%Ksx)$2e?()Z?TOQvuZ?F4&vb2;tCutU&bne?Oe)T7FjH8V;>E56@~>UH&=KeBFX} zerjQCsbR^Iw;!2hR;}KW)v**QG#q~KBt#oF^RLbJ>z&`|S_41U3_h*hF*|krb8}M# zTg$BGLnW@*b}(=4EV&zA?|!8_-x4mmcg-FzIqJUKF(3QR_M)Rb-*w=}d)^iQv9H*5 zywrW9;M{(%yDtxU?~8eFs9+7>bNOFydZlUpi$&MY*%$8m>LElqGMb0*>cXv;7GHYn zb2rcC{e4B}K?nfq{4fdU_M81hM_(zl`_|;*uL#qGnh19@*}!MgQ7ckC^?_WfYT!Ug2bJogu&#T^CLzPxk)?6H#7 zjS<^?+lsaMqXQ5&LG0u@vUZxT+%y{~_kIx_hkxGRcxtyX*i6#!L3qp*>E!TD!VJT5 z*`fS5MhF0q5Zptj#IjSM{0BzuhF$jHb-9p}vN=gX4e;Jw`L_(^UlvFwcK0RhPPt56 z-w;FljSY_>c?|}60YA@R_BG5nF7FDi?+MWOEpE@ZD|GlZ33m|yu1^zoyY*fL zwIB|3oq|=C>l@1rYJM%SC+>D**FtV+HS`9FsxE(?3uP$(!;sDyw4e0xvVfophQT|) zfn^SYMF8nj1r>e~l;JSIVb9kHKub0GLKh@f>BTOb+ijeF=^YOkZP3M^l_oBtxMvnGEjSLs&|bz~WO993ZhBw+i2 zZmmgqyJX>%;iT4tNvs!&szCfGi}ea7GSZ!riL-4Aoj5}`H7i%p9mH;zNQ|CcGEAMo zGJz6-L6swtrkh5=z&avf)39_33Wk-#Hd-5S?1GM03~M>Hl5Ppt(e zmvYpTMquRyn^aXMzIGBq5g8UHKD($}gwH!)5Puo9-HRP*mx<&dhe-34yk5kHZ6P*7 zByy8_ov#=UcIFYX5#gPxLwtJ0z^X!mC1hrqXPT|jE;GB5vow>cA!L_51+hsb5xazo zCETuf$<^N#ApHFddZ~D(xoC*7%5jY*qg<+Z90?1Dv2!XG#O+UvW{Isc^zG$0azLiq#gVU z9QzYO@=%V~?=|J&b=c~SZpI(S*Zqfyk?qfaPfV=KN20PJ)hFtMjqq4gR1YHqt-k(N z|CLtatyVPA>K|())ty))o<0rvdxGn}(;g6xZlt%Tn#$AAH2OxiKW_9+?BxHD-~Hz(GWZfeW{Nzz z_3+llogY3u=Ba*mBqvSlmLpGfh(BFx4E`%#|Z zczf@c=0tvfXlRb-4}{2#7Pwp(cPv7(tvV*jmZdKSU~3*^E2vAk7`Z=OFke2>u09$0GO) O-i4?908FAWYW^4LxPdkR literal 0 HcmV?d00001 diff --git a/flows/updaters/api/__pycache__/api_tools.cpython-313.pyc b/flows/updaters/api/__pycache__/api_tools.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5fcaf5f019deeadde7567d51833f4d4dc240dcad GIT binary patch literal 1595 zcmZux!B5*(7=O=>6UT892qOuo1H-7m6(dqvHL(H{6uPQNWyN@h<`B7w6Wk_t^z4k3 z)3jY!HwiYWYK|P)ZT~`HhaH0!jo`6UPrYSIr%jtS?Rx>22<=(%`@Z*n-|zjt?|n`? z92O9ag{d!$7K_kJ>Ue`WH``S(NbxD`AbNy}uYhIl z6%D;7V^_#qq@+0+y_(JX&-v)Y!w=3w?HHW$4B}NZid0U)GuZU2{xyoZG|YgC0TZ}^ z&o)dxO&K~vVN^bXrWtQfLMlVCDzCCCr}`pDVW%0zH^Z61H0MRBK83q+o+X1!A^k?- z8r64c0H2`vm4L#}ux3aNt%WWDz*X5U1);U@{~&@F2&f=Do&pIK&Wn*&Jzoyf=HVRcv2Jb! zvsT4)B*T`rxG6_3m1WNLlZ>NR9aki}T`4-54g|W*(2F@!E9o|&x9kQfJd-zyx{Ehm z9CWcDGj1@a7mbqP=){c_4O0iBWSE*`k*z`7D(TLKVHTtf&6Wz5sY{#0G7BF_TY;&( z2242@S6{*1AX#*M9T8la7^dU;tg>#pY}WqN^>306J%2V^n{LpvY&W=E&FW>xuuMW5 z*p1!>*5#^UJN7am7I9hoiS7FIs^Q3CSE!I;hBAZNa3{^ohGyo9;D(5`={baUtQ)l| zPPyWAGNxrXs{$s3b~Xu6h!28nH`E0r{dAp$Pt>LjD|HWdHl^A+u~S+5z3Eiec&Hck zcWuZmmD3z8l`jHWs!|4rg0!+he;LQJiuP3bi5o5GAck0_jBQm&Rwq~C04l@&2VS3{ zmOsHIPsDglJrdv9#?9W?^Y}OMZU0GPZ10nw5|i8FDIeYq)+TF1^~gknmrtUD&Ax%= zz}04Aq!~*z2PW!CdB49NPyH71_Xyj**F7i_+fD6FH^SrlH&6SM&mVmIU@!E&^W)M{ z|NZUTO)<8!`sHfvZbKXg`&hhkBwnelH^k9D0w^+Z93KBUJpT7<9*O;J)15FK`fLsomFk z$Au+7dmxQ2VfK)X-byfs3F+26clbW0D|49I^DNjuU=ENnz9(~pevo7kUhoYNEvrA& zvX49M*13Z3P#d-o8KVGxwq)fhMSY&g;L^^vX&{Kj_$3m4LBg*{YVqh}ypDfo2XT_n Hzo`EJONDCB literal 0 HcmV?d00001 diff --git a/flows/updaters/update_blobtoolkit.py b/flows/updaters/update_blobtoolkit.py new file mode 100644 index 0000000..56056d0 --- /dev/null +++ b/flows/updaters/update_blobtoolkit.py @@ -0,0 +1,280 @@ +import contextlib +import csv +import gzip +import json +import os +import time + +from flows.lib.conditional_import import emit_event, flow, task +from flows.lib.shared_args import MIN_RECORDS, OUTPUT_PATH, S3_PATH, parse_args, required +from flows.lib.utils import _build_session, is_safe_path, upload_to_s3 + +BTK_API = "https://blobtoolkit.genomehubs.org/api/v1" +BTK_VIEW = "https://blobtoolkit.genomehubs.org/view" + +TSV_FIELDNAMES = [ + "accession", + "taxid", + "species", + "taxon_name", + "subspecies", + "id", + "source", + "sourceSlug", + "sourceStub", + "busco_lineage", + "busco_string", + "busco_complete", + "nohit", + "target", + "at_percent", + "gc_percent", + "n_percent", +] + + +def _stream_datasets(root: str, session) -> list: + """Stream BlobToolKit dataset entries for a taxon root. + + Args: + root (str): Taxonomic root to query (e.g., "Eukaryota"). + session: A requests.Session with retry support. + + Returns: + list: List of dataset metadata dicts. + """ + url = f"{BTK_API}/search/{root}" + response = session.get(url, timeout=300) + response.raise_for_status() + return response.json() + + +def _extract_stats(meta: dict) -> dict: + """Extract BlobToolKit summary stats into a flat dict row. + + Args: + meta (dict): Raw BTK dataset metadata. + + Returns: + dict: Flat row dict matching TSV_FIELDNAMES. + """ + summary = meta.get("summaryStats", {}) + row = { + "accession": meta.get("accession", ""), + "taxid": str(meta.get("taxid", "")), + "species": meta.get("species", meta.get("taxon_name", "")), + "taxon_name": meta.get("taxon_name", ""), + "subspecies": "", + "id": meta.get("id", ""), + "source": "BlobToolKit", + "sourceSlug": meta.get("id", ""), + "sourceStub": "https://blobtoolkit.genomehubs.org/view/dataset/", + "busco_lineage": "", + "busco_string": "", + "busco_complete": "", + "nohit": "", + "target": "", + "at_percent": "", + "gc_percent": "", + "n_percent": "", + } + + with contextlib.suppress(KeyError): + taxon_name = meta.get("taxon_name", "") + species = meta.get("species", "") + if species and taxon_name and len(taxon_name) > len(species): + row["subspecies"] = taxon_name + + if "busco" in summary: + for lineage, stats in summary["busco"].items(): + row["busco_lineage"] = lineage + row["busco_string"] = stats.get("string", "") + total = stats.get("t", 0) + if total > 0: + row["busco_complete"] = f"{stats.get('c', 0) / total * 100:.2f}" + break + + if "stats" in summary: + row["nohit"] = f"{summary['stats'].get('noHit', 0) * 100:.2f}" + with contextlib.suppress(KeyError): + row["target"] = f"{summary['stats']['target'] * 100:.2f}" + + if "baseComposition" in summary: + bc = summary["baseComposition"] + row["at_percent"] = f"{bc.get('at', 0) * 100:.2f}" + row["gc_percent"] = f"{bc.get('gc', 0) * 100:.2f}" + row["n_percent"] = f"{bc.get('n', 0) * 100:.2f}" + + return row + + +def _describe_files(meta: dict) -> list: + """Generate analysis file descriptors for a BlobToolKit dataset. + + Args: + meta (dict): Raw BTK dataset metadata. + + Returns: + list: List of file descriptor dicts. + """ + plots = ["cumulative", "snail"] + summary = meta.get("summaryStats", {}) + if summary.get("readMapping"): + plots.append("blob") + + files = [] + dataset_id = meta.get("id", "") + accession = meta.get("accession", "") + taxid = str(meta.get("taxid", "")) + + for plot in plots: + if plot == "blob": + url = f"{BTK_API}/image/{dataset_id}/{plot}/circle?format=png" + else: + url = f"{BTK_API}/image/{dataset_id}/{plot}?format=png" + files.append( + { + "name": f"{plot}.png", + "url": url, + "source_url": f"{BTK_VIEW}/{dataset_id}/dataset/{dataset_id}/{plot}", + "analysis_id": f"btk-{dataset_id}", + "description": f"a {plot} plot from BlobToolKit analysis {dataset_id}", + "title": f"{plot} plot {dataset_id}", + "command": "blobtoolkit pipeline", + "assembly_id": accession, + "taxon_id": taxid, + "analysis": { + "name": "BlobToolKit", + "title": f"BlobToolKit analysis of {accession}", + "description": ( + f"Analysis of public assembly {accession} " + f"using BlobToolKit" + ), + "source": "BlobToolKit", + "source_url": ( + f"https://blobtoolkit.genomehubs.org/view/dataset/{dataset_id}" + ), + }, + } + ) + return files + + +@task(retries=2, retry_delay_seconds=30, log_prints=True) +def fetch_blobtoolkit( + output_dir: str, + root: str = "Eukaryota", + min_records: int = 1, +) -> tuple[int, int]: + """Fetch BlobToolKit data and write TSV + files YAML. + + Uses a persistent session with connection pooling for the many API calls. + + Args: + output_dir (str): Directory to write btk.tsv.gz and btk.files.yaml. + root (str): Taxonomic root to query. + min_records (int): Minimum dataset count to accept. + + Returns: + tuple[int, int]: Number of dataset rows and file entries written. + """ + session = _build_session() + print(f"Fetching BlobToolKit datasets for {root}") + datasets = _stream_datasets(root, session) + print(f"Found {len(datasets)} datasets") + + if len(datasets) < min_records: + raise RuntimeError( + f"BlobToolKit returned fewer than {min_records} datasets: {len(datasets)}" + ) + + tsv_path = os.path.join(output_dir, "btk.tsv") + gz_path = os.path.join(output_dir, "btk.tsv.gz") + files_path = os.path.join(output_dir, "btk.files.yaml") + + all_rows = [] + all_files = [] + for dataset in datasets: + meta = dataset if isinstance(dataset, dict) else {} + row = _extract_stats(meta) + all_rows.append(row) + files = _describe_files(meta) + all_files.extend(files) + + with open(tsv_path, "w", newline="") as f: + writer = csv.DictWriter( + f, fieldnames=TSV_FIELDNAMES, delimiter="\t", lineterminator="\n" + ) + writer.writeheader() + for row in all_rows: + writer.writerow(row) + + with open(tsv_path, "rb") as f_in, gzip.open(gz_path, "wb") as f_out: + f_out.write(f_in.read()) + os.remove(tsv_path) + + import yaml + + with open(files_path, "w") as f: + yaml.dump(all_files, f, default_flow_style=False) + + print(f"Wrote {len(all_rows)} rows to {gz_path}") + print(f"Wrote {len(all_files)} file entries to {files_path}") + return len(all_rows), len(all_files) + + +@task(log_prints=True) +def upload_s3_files(output_dir: str, s3_path: str) -> None: + """Upload BTK output files to S3.""" + for filename in ("btk.tsv.gz", "btk.files.yaml"): + local = os.path.join(output_dir, filename) + remote = f"{s3_path.rstrip('/')}/{filename}" + if os.path.exists(local): + print(f"Uploading {local} to {remote}") + upload_to_s3(local, remote) + + +@flow() +def update_blobtoolkit( + output_path: str, + s3_path: str = None, + min_records: int = 0, +) -> bool: + """Fetch BlobToolKit analysis data and optionally upload to S3. + + Args: + output_path (str): Directory to write output files. + s3_path (str): Optional S3 directory to upload results. + min_records (int): Minimum dataset count to accept. + + Returns: + bool: True on success. + """ + if not is_safe_path(output_path): + raise ValueError(f"Unsafe output path: {output_path}") + os.makedirs(output_path, exist_ok=True) + + row_count, file_count = fetch_blobtoolkit( + output_path, min_records=min_records + ) + + if s3_path: + upload_s3_files(output_path, s3_path) + + emit_event( + event="update.blobtoolkit.finished", + resource={ + "prefect.resource.id": f"update.btk.{output_path}", + "prefect.resource.type": "blobtoolkit", + }, + payload={"row_count": row_count, "file_count": file_count}, + ) + return True + + +if __name__ == "__main__": + args = parse_args( + [required(OUTPUT_PATH), S3_PATH, MIN_RECORDS], + "Fetch BlobToolKit analysis data.", + ) + update_blobtoolkit(**vars(args)) diff --git a/flows/updaters/update_ensembl_metadata.py b/flows/updaters/update_ensembl_metadata.py new file mode 100644 index 0000000..87da171 --- /dev/null +++ b/flows/updaters/update_ensembl_metadata.py @@ -0,0 +1,217 @@ +import csv +import gzip +import json +import os +from enum import Enum + +from flows.lib.conditional_import import emit_event, flow, task +from flows.lib.shared_args import OUTPUT_PATH, S3_PATH, parse_args, required +from flows.lib.utils import is_safe_path, safe_get, upload_to_s3 + + +class EnsemblDivision(Enum): + """Supported Ensembl genome database divisions.""" + + FUNGI = "fungi" + METAZOA = "metazoa" + PLANTS = "plants" + PROTISTS = "protists" + RAPID = "rapid" + VERTEBRATES = "vertebrates" + + +DIVISION_URLS = { + EnsemblDivision.FUNGI: ( + "http://ftp.ensemblgenomes.org/pub/current/fungi/" + "species_metadata_EnsemblFungi.json" + ), + EnsemblDivision.METAZOA: ( + "http://ftp.ensemblgenomes.org/pub/current/metazoa/" + "species_metadata_EnsemblMetazoa.json" + ), + EnsemblDivision.PLANTS: ( + "http://ftp.ensemblgenomes.org/pub/current/plants/" + "species_metadata_EnsemblPlants.json" + ), + EnsemblDivision.PROTISTS: ( + "http://ftp.ensemblgenomes.org/pub/current/protists/" + "species_metadata_EnsemblProtists.json" + ), + EnsemblDivision.RAPID: ( + "https://ftp.ensembl.org/pub/rapid-release/" + "species_metadata.json" + ), + EnsemblDivision.VERTEBRATES: ( + "https://ftp.ensembl.org/pub/current/" + "species_metadata_EnsemblVertebrates.json" + ), +} + +DIVISION_OUTPUT_NAMES = { + EnsemblDivision.FUNGI: "species_metadata_EnsemblFungi.tsv.gz", + EnsemblDivision.METAZOA: "species_metadata_EnsemblMetazoa.tsv.gz", + EnsemblDivision.PLANTS: "species_metadata_EnsemblPlants.tsv.gz", + EnsemblDivision.PROTISTS: "species_metadata_EnsemblProtists.tsv.gz", + EnsemblDivision.RAPID: "species_metadata_EnsemblRapid.tsv.gz", + EnsemblDivision.VERTEBRATES: "species_metadata_EnsemblVertebrates.tsv.gz", +} + + +def _extract_fields(record: dict, division: EnsemblDivision) -> list: + """Extract TSV fields from a single Ensembl metadata JSON record. + + Different divisions use slightly different JSON structures for the + same conceptual fields. This normalises them to a common 5-column + format: assembly_accession, name, release_date, strain, taxonomy_id. + + Args: + record (dict): A single species metadata JSON object. + division (EnsemblDivision): The Ensembl division. + + Returns: + list: A list of 5 string values, or None if the record is invalid. + """ + if division == EnsemblDivision.RAPID: + accession = record.get("assembly_accession", "") + name = record.get("ensembl_production_name", "") + release_date = record.get("release_date", "") + strain = record.get("strain", "") + taxonomy_id = str(record.get("taxonomy_id", "")) + elif division == EnsemblDivision.VERTEBRATES: + assembly = record.get("assembly", {}) + organism = record.get("organism", {}) + accession = assembly.get("assembly_accession", "") + name = organism.get("url_name", "") + release_date = record.get("release_date", "") + strain = organism.get("strain", "") + taxonomy_id = str(record.get("taxonomy_id", "")) + else: + organism = record.get("organism", {}) + accession = record.get("assembly_accession", "") + name = organism.get("url_name", "") + release_date = record.get("release_date", "") + strain = organism.get("strain", "") + taxonomy_id = str(record.get("taxonomy_id", "")) + if not accession: + return None + return [accession, name, release_date, strain, taxonomy_id] + + +TSV_HEADERS = [ + "assembly_accession", + "name", + "release_date", + "strain", + "taxonomy_id", +] + + +@task(retries=2, retry_delay_seconds=10, log_prints=True) +def fetch_ensembl_division( + division: EnsemblDivision, + output_dir: str, +) -> tuple[str, int]: + """Fetch Ensembl species metadata JSON and convert to gzipped TSV. + + Args: + division (EnsemblDivision): Ensembl division to fetch. + output_dir (str): Directory to write the output file. + + Returns: + tuple[str, int]: Path to the output file and number of records written. + """ + url = DIVISION_URLS[division] + output_name = DIVISION_OUTPUT_NAMES[division] + output_path = os.path.join(output_dir, output_name) + + print(f"Fetching Ensembl {division.value} from {url}") + response = safe_get(url, timeout=600) + response.raise_for_status() + + records = response.json() + if not isinstance(records, list): + raise ValueError( + f"Expected JSON array from {url}, got {type(records).__name__}" + ) + + tsv_path = output_path.removesuffix(".gz") + row_count = 0 + with open(tsv_path, "w", newline="") as f: + writer = csv.writer(f, delimiter="\t", lineterminator="\n") + writer.writerow(TSV_HEADERS) + for record in records: + row = _extract_fields(record, division) + if row is not None: + writer.writerow(row) + row_count += 1 + + with open(tsv_path, "rb") as f_in, gzip.open(output_path, "wb") as f_out: + f_out.write(f_in.read()) + os.remove(tsv_path) + + print(f"Wrote {row_count} records to {output_path}") + return output_path, row_count + + +@task(log_prints=True) +def upload_s3_file(local_path: str, s3_path: str) -> None: + """Upload file to S3.""" + print(f"Uploading {local_path} to {s3_path}") + upload_to_s3(local_path, s3_path) + + +@flow() +def update_ensembl_metadata( + output_path: str, + division: str = "vertebrates", + s3_path: str = None, +) -> bool: + """Fetch Ensembl species metadata for a given division. + + Args: + output_path (str): Directory to write output files. + division (str): Ensembl division name (fungi, metazoa, plants, + protists, rapid, vertebrates). + s3_path (str): Optional S3 directory to upload the result. + + Returns: + bool: True on success. + """ + if not is_safe_path(output_path): + raise ValueError(f"Unsafe output path: {output_path}") + os.makedirs(output_path, exist_ok=True) + + div = EnsemblDivision(division.lower()) + local_file, row_count = fetch_ensembl_division(div, output_path) + + if s3_path: + output_name = DIVISION_OUTPUT_NAMES[div] + remote_path = f"{s3_path.rstrip('/')}/{output_name}" + upload_s3_file(local_file, remote_path) + + emit_event( + event="update.ensembl.metadata.finished", + resource={ + "prefect.resource.id": f"update.ensembl.{division}.{output_path}", + "prefect.resource.type": "ensembl.metadata", + "prefect.resource.division": division, + }, + payload={"division": division, "row_count": row_count}, + ) + return True + + +if __name__ == "__main__": + DIVISION = { + "flags": ["--division"], + "keys": { + "help": "Ensembl division (fungi, metazoa, plants, protists, rapid, vertebrates).", + "type": str, + "default": "vertebrates", + }, + } + args = parse_args( + [required(OUTPUT_PATH), S3_PATH, DIVISION], + "Fetch Ensembl species metadata for a given division.", + ) + update_ensembl_metadata(**vars(args)) diff --git a/flows/updaters/update_google_sheets_status.py b/flows/updaters/update_google_sheets_status.py new file mode 100644 index 0000000..f75228c --- /dev/null +++ b/flows/updaters/update_google_sheets_status.py @@ -0,0 +1,441 @@ +"""Fetch project status data from Google Sheets. + +Replaces the legacy R script (get_googlesheets.R) and Python pipeline +(import_status.py + import_status_lib.py). Fetches three categories: + +1. Project status sheets — a private TSV index pointing to ~26 project + spreadsheets that follow the GoaT schema 2.5 format. +2. DTOL Plant Genome Size Estimates — Kew genome size data. +3. DTOL assembly informatics status — tolqc kmer draft sizes. +4. CNGB project status. + +Outputs are per-project expanded TSV files matching legacy format. +""" + +import csv +import io +import os +import re + +import numpy as np +import pandas as pd + +from flows.lib.conditional_import import emit_event, flow, task +from flows.lib.shared_args import ( + MIN_RECORDS, + OUTPUT_PATH, + S3_PATH, + default, + parse_args, + required, +) +from flows.lib.utils import is_safe_path, safe_get, upload_to_s3 + +# Google Sheets URLs for non-project-status data +DTOL_PLANT_GENOME_SIZE_URL = ( + "https://docs.google.com/spreadsheets/d/e/" + "2PACX-1vSt0R1T3MpoOM6UFNMaT_Q9gR5TYyUZC1wgLqW_6_cH9zzII8ehadrbHX8bpktjTv2_yt_KHaj3x_e1" + "/pub?output=tsv" +) +DTOL_TOLQC_STATUS_URL = ( + "https://docs.google.com/spreadsheets/d/e/" + "2PACX-1vTU-En_URbYPtfyjBueQhnz7wYHt-OHVxvRyv9tNvCUPCTX9EEzxOL41QCUh6hgVNv-Vv_gLSAMJXv-" + "/pub?gid=1442224132&single=true&output=tsv" +) +CNGB_URL = ( + "https://docs.google.com/spreadsheets/d/e/" + "2PACX-1vQeTqi-qnoNgNl58gWDBT4CcR8nF9SmFOkC82KC6pkH42CoEi94yInhBE25SfxBqNeMBeVbpeEVs9GI" + "/pub?gid=1726876704&single=true&output=tsv" +) + + +# --------------------------------------------------------------------------- +# Project status processing (port of import_status_lib.py) +# --------------------------------------------------------------------------- + + +def _open_google_spreadsheet( + acronym: str, url: str, header_index: int +) -> pd.DataFrame: + """Download a published Google Sheet as TSV and return a DataFrame.""" + encodings = ["utf-8", "ISO-8859-1", "latin1"] + response = safe_get(url, timeout=120) + response.raise_for_status() + + df = None + for enc in encodings: + try: + content = response.content.decode(enc) + df = pd.read_csv( + io.StringIO(content), + delimiter="\t", + header=header_index, + dtype=object, + quoting=csv.QUOTE_NONE, + ) + break + except (UnicodeDecodeError, pd.errors.ParserError): + continue + + if df is None: + raise ValueError(f"Failed to decode sheet for {acronym}") + + df.rename(columns={"#NCBI_taxon_id": "NCBI_taxon_id"}, inplace=True) + df["project"] = acronym.upper() + return df + + +def _general_cleanup(df: pd.DataFrame) -> pd.DataFrame: + """Replace whitespace-only cells with NaN, drop empty rows/cols.""" + df = df.replace(r"^\s*$", np.nan, regex=True) + df = df.replace("publication_available", "published", regex=False) + df.dropna(how="all", axis=1, inplace=True) + df.dropna(how="all", axis=0, inplace=True) + df.rename(columns={"#NCBI_taxon_id": "NCBI_taxon_id"}, inplace=True) + return df + + +def _cleanup_headers(df: pd.DataFrame) -> pd.DataFrame: + """Normalise column headers: lowercase, underscored, no parens.""" + df.columns = ( + df.columns.str.replace(" ", "_") + .str.replace(r"\(", "", regex=True) + .str.replace(r"\)", "", regex=True) + .str.lower() + ) + return df + + +def _create_mandatory_columns(df: pd.DataFrame) -> pd.DataFrame: + """Ensure mandatory columns exist.""" + for col in [ + "ncbi_taxon_id", "species", "family", "synonym", + "publication_id", "contributing_project_lab", + "target_list_status", "sequencing_status", + ]: + if col not in df.columns: + df[col] = np.nan + return df + + +def _expand_target_status(df: pd.DataFrame, acronym: str) -> pd.DataFrame: + """Populate long_list, family_representative, other_priority columns.""" + for col in ["long_list", "family_representative", "other_priority"]: + if col not in df.columns: + df[col] = np.nan + df["long_list"] = acronym + + lower = acronym.lower() + fr_mask = df["target_list_status"].isin( + [f"{lower}_family_representative", "family_representative"] + ) + df.loc[fr_mask, "family_representative"] = acronym + + op_mask = df["target_list_status"].isin( + [f"{lower}_other_priority", "other_priority"] + ) + df.loc[op_mask, "other_priority"] = acronym + return df + + +def _reduce_sequencing_status(df: pd.DataFrame, acronym: str) -> pd.DataFrame: + """Map project-prefixed statuses to simple GoaT statuses.""" + status_map = { + f"{acronym}_published": "published", + f"{acronym}_insdc_open": "insdc_open", + f"{acronym}_open": "open", + f"{acronym}_insdc_submitted": "in_progress", + f"{acronym}_in_assembly": "in_progress", + f"{acronym}_data_generation": "in_progress", + f"{acronym}_in_progress": "in_progress", + f"{acronym}_sample_acquired": "sample_acquired", + f"{acronym}_sample_collected": "sample_collected", + } + df["sequencing_status"] = df["sequencing_status"].replace(status_map) + return df + + +def _create_status_columns(df: pd.DataFrame, acronym: str) -> pd.DataFrame: + """Create and populate per-status columns.""" + statuses = [ + "sample_collected", "sample_acquired", "in_progress", + "data_generation", "in_assembly", "insdc_submitted", + "open", "insdc_open", "published", + ] + for s in statuses: + if s not in df.columns: + df[s] = np.nan + df.loc[df["sequencing_status"] == s, s] = acronym + return df + + +def _expand_sequencing_status(df: pd.DataFrame, acronym: str) -> pd.DataFrame: + """Cascade statuses upward: published implies insdc_open, etc.""" + df.loc[df["published"] == acronym, "insdc_open"] = acronym + df.loc[df["insdc_open"] == acronym, "open"] = acronym + df.loc[df["open"] == acronym, "in_progress"] = acronym + df.loc[df["data_generation"] == acronym, "in_progress"] = acronym + df.loc[df["in_assembly"] == acronym, "in_progress"] = acronym + df.loc[df["in_progress"] == acronym, "sample_acquired"] = acronym + df.loc[df["sample_acquired"] == acronym, "sample_collected"] = acronym + return df + + +def _process_project(acronym: str, url: str, header_row: int) -> pd.DataFrame: + """Full processing pipeline for one project status sheet.""" + df = _open_google_spreadsheet(acronym, url, header_row) + df = _general_cleanup(df) + df = _cleanup_headers(df) + df = _create_mandatory_columns(df) + df = _expand_target_status(df, acronym) + df = _reduce_sequencing_status(df, acronym) + df = _create_status_columns(df, acronym) + df = _expand_sequencing_status(df, acronym) + return df + + +# --------------------------------------------------------------------------- +# Dedicated sheet fetchers (port of get_googlesheets.R) +# --------------------------------------------------------------------------- + + +def _fetch_dtol_plant_genome_sizes(output_path: str) -> int: + """Fetch DTOL Plant Genome Size Estimates from Kew.""" + response = safe_get(DTOL_PLANT_GENOME_SIZE_URL, timeout=120) + response.raise_for_status() + df = pd.read_csv(io.StringIO(response.text), delimiter="\t", dtype=str) + df.columns = ( + df.columns.str.strip() + .str.replace(" ", "_") + .str.replace(r"\(", "", regex=True) + .str.replace(r"\)", "", regex=True) + .str.lower() + ) + df.dropna(how="all", axis=0, inplace=True) + df = df[df["genus"].notna() & (df.get("project", pd.Series()) == "DTOL")] + df["primary"] = "1" + df.to_csv(output_path, sep="\t", index=False) + return len(df) + + +def _fetch_dtol_tolqc_status(output_path: str) -> int: + """Fetch DTOL assembly informatics status (kmer draft).""" + response = safe_get(DTOL_TOLQC_STATUS_URL, timeout=120) + response.raise_for_status() + df = pd.read_csv( + io.StringIO(response.text), + delimiter="\t", + dtype=str, + na_values=["NA", "missing", "", "NULL"], + ) + df.columns = ( + df.columns.str.strip() + .str.replace(" ", "_") + .str.replace(r"\(", "", regex=True) + .str.replace(r"\)", "", regex=True) + .str.lower() + ) + df.dropna(how="all", axis=0, inplace=True) + df = df[df["taxon"].notna()] + df = df[df["accession"].isna() | ~df["accession"].str.startswith("GCA_", na=False)] + df = df[~df["statussummary"].str.startswith("9", na=False)] + df = df[~df["statussummary"].str.startswith("5", na=False)] + df = df[["taxon", "est_size_mb", "length_mb"]].copy() + for col in ["est_size_mb", "length_mb"]: + df[col] = pd.to_numeric(df[col], errors="coerce") + df = df[df["est_size_mb"].notna() | df["length_mb"].notna()] + df.to_csv(output_path, sep="\t", index=False) + return len(df) + + +def _fetch_cngb(output_path: str) -> int: + """Fetch CNGB project status sheet.""" + response = safe_get(CNGB_URL, timeout=120) + response.raise_for_status() + df = pd.read_csv( + io.StringIO(response.text), + delimiter="\t", + dtype=str, + na_values=["NA", "missing", "", "NULL"], + ) + df.dropna(how="all", axis=0, inplace=True) + df.to_csv(output_path, sep="\t", index=False) + return len(df) + + +# --------------------------------------------------------------------------- +# Prefect tasks and flow +# --------------------------------------------------------------------------- + + +@task(retries=2, retry_delay_seconds=30, log_prints=True) +def fetch_project_status_sheets( + index_url: str, output_dir: str +) -> dict: + """Fetch all project status sheets listed in the private index TSV. + + Args: + index_url (str): URL (or path) to the index TSV with columns + project_acronym, published_url, start_header_line. + output_dir (str): Directory to write per-project expanded TSVs. + + Returns: + dict: Mapping of project acronym to row count. + """ + response = safe_get(index_url, timeout=60) + response.raise_for_status() + + index_df = pd.read_csv( + io.StringIO(response.text), + delimiter="\t", + usecols=["project_acronym", "published_url", "start_header_line"], + dtype={"project_acronym": str, "published_url": str, "start_header_line": int}, + ) + + results = {} + for _, row in index_df.iterrows(): + acronym = row["project_acronym"] + url = row["published_url"] + header_row = int(row["start_header_line"]) + print(f"Processing {acronym} (header row {header_row})") + try: + df = _process_project(acronym, url, header_row) + out_file = os.path.join(output_dir, f"{acronym}_expanded.tsv") + df.to_csv(out_file, sep="\t", index=False) + results[acronym] = len(df) + print(f" {acronym}: {len(df)} rows") + except Exception as exc: + print(f" {acronym}: FAILED — {exc}") + failed_path = os.path.join(output_dir, f"{acronym}_expanded.tsv.failed") + open(failed_path, "w").close() # noqa: SIM115 — legacy compat + results[acronym] = 0 + return results + + +@task(retries=2, retry_delay_seconds=30, log_prints=True) +def fetch_other_sheets(output_dir: str) -> dict: + """Fetch the three non-project Google Sheets (DTOL plant, tolqc, CNGB). + + Args: + output_dir (str): Directory to write TSV files. + + Returns: + dict: Mapping of filename to row count. + """ + results = {} + + plant_path = os.path.join(output_dir, "DTOL_Plant_Genome_Size_Estimates.tsv") + try: + results["DTOL_Plant_Genome_Size_Estimates"] = _fetch_dtol_plant_genome_sizes( + plant_path + ) + print(f"Plant genome sizes: {results['DTOL_Plant_Genome_Size_Estimates']} rows") + except Exception as exc: + print(f"Plant genome sizes: FAILED — {exc}") + results["DTOL_Plant_Genome_Size_Estimates"] = 0 + + tolqc_path = os.path.join( + output_dir, "DTOL_assembly_informatics_status_kmer_draft.tsv" + ) + try: + results["DTOL_tolqc_status"] = _fetch_dtol_tolqc_status(tolqc_path) + print(f"DTOL tolqc status: {results['DTOL_tolqc_status']} rows") + except Exception as exc: + print(f"DTOL tolqc status: FAILED — {exc}") + results["DTOL_tolqc_status"] = 0 + + cngb_path = os.path.join(output_dir, "cngb.tsv") + try: + results["cngb"] = _fetch_cngb(cngb_path) + print(f"CNGB: {results['cngb']} rows") + except Exception as exc: + print(f"CNGB: FAILED — {exc}") + results["cngb"] = 0 + + return results + + +@task(log_prints=True) +def upload_s3_dir(local_dir: str, s3_path: str) -> None: + """Upload all TSV files in a directory to S3.""" + for fname in sorted(os.listdir(local_dir)): + if fname.endswith(".tsv") or fname.endswith(".tsv.gz"): + local_path = os.path.join(local_dir, fname) + remote_path = f"{s3_path.rstrip('/')}/{fname}" + print(f"Uploading {fname} to {remote_path}") + upload_to_s3(local_path, remote_path) + + +@flow() +def update_google_sheets_status( + output_path: str, + index_url: str = None, + s3_path: str = None, + min_records: int = 0, +) -> bool: + """Fetch all Google Sheets project status and supplementary data. + + Args: + output_path (str): Directory to write output TSVs. + index_url (str): URL to the private index TSV (from env + GOAT_SHEETS_INDEX_URL if not provided). + s3_path (str): Optional S3 path to upload results. + min_records (int): Minimum total records to accept. + + Returns: + bool: True on success. + """ + if not is_safe_path(output_path): + raise ValueError(f"Unsafe output path: {output_path}") + os.makedirs(output_path, exist_ok=True) + + if index_url is None: + index_url = os.environ.get("GOAT_SHEETS_INDEX_URL", "") + + total = 0 + project_results = {} + if index_url: + project_results = fetch_project_status_sheets(index_url, output_path) + total += sum(project_results.values()) + else: + print("No index URL provided — skipping project status sheets") + + other_results = fetch_other_sheets(output_path) + total += sum(other_results.values()) + + if total < min_records: + raise RuntimeError( + f"Google Sheets: fewer than {min_records} total records: {total}" + ) + + if s3_path: + upload_s3_dir(output_path, s3_path) + + emit_event( + event="update.google.sheets.status.finished", + resource={ + "prefect.resource.id": f"update.google.sheets.status.{output_path}", + "prefect.resource.type": "google.sheets.status", + }, + payload={ + "total_records": total, + "projects": len(project_results), + "other_sheets": len(other_results), + }, + ) + return True + + +if __name__ == "__main__": + INDEX_URL = { + "flags": ["--index_url"], + "keys": { + "help": "URL to the private index TSV listing project sheets.", + "type": str, + }, + } + args = parse_args( + [required(OUTPUT_PATH), INDEX_URL, S3_PATH, MIN_RECORDS], + "Fetch project status data from Google Sheets.", + ) + update_google_sheets_status(**vars(args)) diff --git a/flows/updaters/update_jgi_status.py b/flows/updaters/update_jgi_status.py new file mode 100644 index 0000000..7326f9f --- /dev/null +++ b/flows/updaters/update_jgi_status.py @@ -0,0 +1,204 @@ +import csv +import os + +from flows.lib.conditional_import import emit_event, flow, task +from flows.lib.shared_args import MIN_RECORDS, OUTPUT_PATH, S3_PATH, parse_args, required +from flows.lib.utils import is_safe_path, safe_get, upload_to_s3 + +JGI_BASE_URL = "https://gold-ws.jgi.doe.gov" +JGI_STUDY_ID = "Gs0000001" + +FIELDNAMES = [ + "projectGoldId", + "projectName", + "legacyGoldId", + "studyGoldId", + "biosampleGoldId", + "organismGoldId", + "itsProposalId", + "itsSpid", + "itsSampleId", + "pmoProjectId", + "gptsProposalId", + "ncbiBioProjectAccession", + "ncbiBioSampleAccession", + "projectStatus", + "sequencingStatus", + "jgiFundingProgram", + "jgiFundingYear", + "hmpId", + "modDate", + "addDate", + "sequencingStrategy", + "sequencingCenters", + "seqMethod", + "genomePublications", + "otherPublications", + "sraExperimentIds", + "ncbiTaxId", +] + + +def _exchange_token(offline_token: str) -> str: + """Exchange a JGI offline token for an access token. + + Args: + offline_token (str): The JGI offline (API) token. + + Returns: + str: A valid access token. + + Raises: + RuntimeError: If the token exchange fails. + """ + url = f"{JGI_BASE_URL}/exchange?offlineToken={offline_token}" + response = safe_get(url, timeout=30) + if response.status_code != 200: + raise RuntimeError( + f"JGI token exchange failed: HTTP {response.status_code} — " + f"check that JGI_OFFLINE_TOKEN is valid" + ) + token = response.content.decode().strip() + if not token: + raise RuntimeError("JGI token exchange returned empty access token") + return token + + +def _fetch_organisms(access_token: str) -> dict: + """Fetch organism-to-taxid mapping from JGI GOLD API. + + Args: + access_token (str): Valid JGI access token. + + Returns: + dict: Mapping of organismGoldId to ncbiTaxId. + """ + headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"} + url = f"{JGI_BASE_URL}/api/v1/organisms?studyGoldId={JGI_STUDY_ID}" + response = safe_get(url, headers=headers, timeout=120) + response.raise_for_status() + organisms = response.json() + return {org["organismGoldId"]: org.get("ncbiTaxId", "") for org in organisms} + + +def _fetch_projects(access_token: str) -> list: + """Fetch project records from JGI GOLD API. + + Args: + access_token (str): Valid JGI access token. + + Returns: + list: List of project dictionaries. + """ + headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"} + url = f"{JGI_BASE_URL}/api/v1/projects?studyGoldId={JGI_STUDY_ID}" + response = safe_get(url, headers=headers, timeout=120) + response.raise_for_status() + return response.json() + + +@task(retries=2, retry_delay_seconds=10, log_prints=True) +def fetch_jgi_tsv(file_path: str, min_lines: int = 1) -> int: + """Fetch JGI 1KFG project data and write to TSV. + + Exchanges the offline token for an access token, fetches organism-taxid + mapping and project records, filters for whole genome sequencing projects, + and writes a TSV. + + Args: + file_path (str): Path to the output TSV file. + min_lines (int): Minimum number of data rows expected. + + Returns: + int: Number of lines written (including header). + """ + offline_token = os.environ.get("JGI_OFFLINE_TOKEN") + if not offline_token: + raise RuntimeError( + "JGI_OFFLINE_TOKEN environment variable is not set — " + "cannot authenticate with JGI GOLD API" + ) + + print("Exchanging JGI offline token for access token") + access_token = _exchange_token(offline_token) + + print(f"Fetching organisms for study {JGI_STUDY_ID}") + org_to_taxid = _fetch_organisms(access_token) + print(f"Found {len(org_to_taxid)} organisms") + + print(f"Fetching projects for study {JGI_STUDY_ID}") + projects = _fetch_projects(access_token) + print(f"Found {len(projects)} total projects") + + source_fields = [f for f in FIELDNAMES if f != "ncbiTaxId"] + row_count = 0 + with open(file_path, "w", newline="") as f: + writer = csv.writer(f, delimiter="\t", lineterminator="\n") + writer.writerow(FIELDNAMES) + for project in projects: + if project.get("sequencingStrategy") != "Whole Genome Sequencing": + continue + organism_id = project.get("organismGoldId", "") + taxid = org_to_taxid.get(organism_id, "") + row = [project.get(field, "") for field in source_fields] + [taxid] + writer.writerow(row) + row_count += 1 + + line_count = row_count + 1 # include header + if row_count < min_lines: + raise RuntimeError( + f"JGI file has fewer than {min_lines} data rows: {row_count}" + ) + print(f"Wrote {row_count} WGS projects to {file_path}") + return line_count + + +@task(log_prints=True) +def upload_s3_tsv(local_path: str, s3_path: str) -> None: + """Upload JGI TSV to S3.""" + print(f"Uploading JGI TSV from {local_path} to {s3_path}") + upload_to_s3(local_path, s3_path) + + +@flow() +def update_jgi_status( + output_path: str, + s3_path: str = None, + min_records: int = 0, +) -> bool: + """Fetch JGI 1KFG status list and optionally upload to S3. + + Args: + output_path (str): Path to the output TSV file. + s3_path (str): Optional S3 path to upload the result. + min_records (int): Minimum record count to accept the output. + + Returns: + bool: True on success. + """ + if not is_safe_path(output_path): + raise ValueError(f"Unsafe output path: {output_path}") + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + line_count = fetch_jgi_tsv(output_path, min_records) + + if line_count > min_records and s3_path: + upload_s3_tsv(output_path, s3_path) + + emit_event( + event="update.jgi.status.finished", + resource={ + "prefect.resource.id": f"update.jgi.{output_path}", + "prefect.resource.type": "jgi.status", + }, + payload={"line_count": line_count}, + ) + return True + + +if __name__ == "__main__": + args = parse_args( + [required(OUTPUT_PATH), S3_PATH, MIN_RECORDS], + "Fetch JGI 1KFG status list.", + ) + update_jgi_status(**vars(args)) diff --git a/flows/updaters/update_refseq_organelles.py b/flows/updaters/update_refseq_organelles.py new file mode 100644 index 0000000..aea7eea --- /dev/null +++ b/flows/updaters/update_refseq_organelles.py @@ -0,0 +1,318 @@ +import contextlib +import csv +import gzip +import os +import re +import tempfile +from collections import Counter + +from flows.lib.conditional_import import emit_event, flow, task +from flows.lib.shared_args import ( + MIN_RECORDS, + OUTPUT_PATH, + ROOT_TAXID, + S3_PATH, + default, + parse_args, + required, +) +from flows.lib.utils import is_safe_path, safe_get, upload_to_s3 + +REFSEQ_FTP = "https://ftp.ncbi.nlm.nih.gov/refseq/release" + +ORGANELLE_FIELDNAMES = [ + "id", + "organelle", + "taxonId", + "genbankAccession", + "assemblySpan", + "gcPercent", + "nPercent", + "releaseDate", + "sourceAuthor", + "sourceYear", + "sourceTitle", + "pubmedId", + "bioproject", + "biosample", + "sampleLocation", +] + +MONTHS = { + "JAN": "01", "FEB": "02", "MAR": "03", "APR": "04", + "MAY": "05", "JUN": "06", "JUL": "07", "AUG": "08", + "SEP": "09", "OCT": "10", "NOV": "11", "DEC": "12", +} + + +def _reformat_date(date_str: str) -> str: + """Convert DD-MMM-YYYY to YYYY-MM-DD.""" + parts = re.split(r"[:\-]", date_str) + if len(parts) < 3: + return date_str + return f"{parts[2]}-{MONTHS.get(parts[1].upper(), '00')}-{parts[0].zfill(2)}" + + +def _refseq_listing(collection: str) -> list: + """Fetch directory listing of GenBank files for a RefSeq collection. + + Args: + collection (str): Collection name (e.g., "mitochondrion"). + + Returns: + list: URLs of .genomic.gbff.gz files. + """ + pattern = re.compile(r"(\w+\.\d+\.genomic\.gbff\.gz)") + url = f"{REFSEQ_FTP}/{collection}" + response = safe_get(url, timeout=120) + response.raise_for_status() + return [ + f"{url}/{match[1]}" + for line in response.text.split("\n") + if (match := pattern.search(line)) + ] + + +def _parse_features(entry, fields: dict) -> None: + """Extract taxonId and sample location from SeqRecord features.""" + qualifiers = entry.features[0].qualifiers + if "db_xref" in qualifiers: + for xref in qualifiers["db_xref"]: + key, value = xref.split(":", 1) + if key == "taxon": + fields["taxonId"] = value + if "lat_lon" in qualifiers: + fields["sampleLocation"] = qualifiers["lat_lon"][0] + + +def _parse_references(entry, fields: dict) -> None: + """Extract reference metadata from SeqRecord annotations.""" + submitted_re = re.compile(r"Submitted\s\(\d{2}-\w{3}-(\d{4})\)") + published_re = re.compile(r"\s\((\d{4})\)[^(]*$") + for ref in entry.annotations.get("references", []): + if ref.journal == "Unpublished": + continue + if ref.journal.startswith("Submitted"): + if "sourceAuthor" in fields: + continue + match = submitted_re.search(ref.journal) + if match: + fields["sourceYear"] = match[1] + elif "sourceAuthor" in fields: + continue + else: + match = published_re.search(ref.journal) + if match: + fields["sourceYear"] = match[1] + if ref.title: + fields["sourceTitle"] = ref.title + if ref.pubmed_id: + fields["pubmedId"] = ref.pubmed_id + if ref.authors: + fields["sourceAuthor"] = ref.authors + elif ref.consrtm: + fields["sourceAuthor"] = ref.consrtm + + +def _parse_xrefs(entry, fields: dict) -> None: + """Extract BioProject/BioSample cross-references.""" + if not entry.dbxrefs: + return + bioprojects = [] + biosamples = [] + for dbxref in entry.dbxrefs: + with contextlib.suppress(ValueError): + key, value = dbxref.split(":", 1) + if key == "BioProject": + bioprojects.append(value) + elif key == "BioSample": + biosamples.append(value) + if bioprojects: + fields["bioproject"] = ";".join(bioprojects) + if biosamples: + fields["biosample"] = ";".join(biosamples) + + +def _parse_sequence(entry, fields: dict) -> bool: + """Compute sequence stats (GC%, N%, span). Returns False if all Ns.""" + seqstr = str(entry.seq.upper()) + counter = Counter(seqstr) + length = len(seqstr) + n_pct = counter["N"] / length * 100 if length > 0 else 100 + fields["nPercent"] = f"{n_pct:.2f}" + if n_pct == 100: + return False + gc = counter["G"] + counter["C"] + at = counter["A"] + counter["T"] + fields["gcPercent"] = f"{gc / (gc + at) * 100:.2f}" if (gc + at) > 0 else "0.00" + fields["assemblySpan"] = str(length) + return True + + +def _parse_flatfile(flatfile_path: str, organelle: str, root_taxon: str = None) -> list: + """Parse a single GenBank flatfile for organelle sequences. + + Args: + flatfile_path (str): Path to a gzipped GenBank file. + organelle (str): Organelle type ("mitochondrion" or "plastid"). + root_taxon (str): Optional taxonomic root to filter by. + + Returns: + list: List of row dicts. + """ + from Bio import SeqIO + + comment_re = re.compile( + r"(?:derived|identical)\s(?:from|to)\s([\w\d]+).*COMPLETENESS: full length", + re.DOTALL, + ) + rows = [] + with gzip.open(flatfile_path, "rt") as fh: + for entry in SeqIO.parse(fh, "gb"): + if root_taxon and root_taxon not in entry.annotations.get("taxonomy", []): + continue + fields = {"id": entry.id, "organelle": organelle} + comment = entry.annotations.get("comment", "") + if comment: + match = comment_re.search(comment) + if match: + fields["genbankAccession"] = match[1] + else: + continue + _parse_features(entry, fields) + _parse_references(entry, fields) + fields["releaseDate"] = _reformat_date(entry.annotations.get("date", "")) + _parse_xrefs(entry, fields) + try: + if not _parse_sequence(entry, fields): + continue + except Exception: + continue + rows.append(fields) + return rows + + +@task(retries=2, retry_delay_seconds=30, log_prints=True) +def fetch_and_parse_organelles( + output_path: str, + organelles: list = None, + root_taxon: str = None, +) -> int: + """Fetch RefSeq organelle data and parse to gzipped TSV. + + Downloads GenBank flatfiles from NCBI FTP for each organelle type, + parses sequence records, and writes a combined TSV. + + Args: + output_path (str): Path to write the output TSV (or .tsv.gz). + organelles (list): List of organelle types to parse. + root_taxon (str): Optional taxonomic root filter. + + Returns: + int: Number of rows written. + """ + if organelles is None: + organelles = ["mitochondrion", "plastid"] + + all_rows = [] + for organelle in organelles: + print(f"Fetching listing for {organelle}") + listing = _refseq_listing(organelle) + print(f"Found {len(listing)} files for {organelle}") + + for url in listing: + print(f"Downloading {url}") + response = safe_get(url, timeout=600) + response.raise_for_status() + + with tempfile.NamedTemporaryFile(suffix=".gbff.gz", delete=False) as tmp: + tmp.write(response.content) + tmp_path = tmp.name + + try: + rows = _parse_flatfile(tmp_path, organelle, root_taxon) + all_rows.extend(rows) + print(f"Parsed {len(rows)} records from {os.path.basename(url)}") + finally: + os.unlink(tmp_path) + + tsv_path = output_path.removesuffix(".gz") + with open(tsv_path, "w", newline="") as f: + writer = csv.DictWriter( + f, + fieldnames=ORGANELLE_FIELDNAMES, + delimiter="\t", + lineterminator="\n", + extrasaction="ignore", + ) + writer.writeheader() + for row in all_rows: + writer.writerow(row) + + if output_path.endswith(".gz"): + with open(tsv_path, "rb") as f_in, gzip.open(output_path, "wb") as f_out: + f_out.write(f_in.read()) + os.remove(tsv_path) + + print(f"Wrote {len(all_rows)} total organelle records to {output_path}") + return len(all_rows) + + +@task(log_prints=True) +def upload_s3_file(local_path: str, s3_path: str) -> None: + """Upload file to S3.""" + print(f"Uploading {local_path} to {s3_path}") + upload_to_s3(local_path, s3_path) + + +@flow() +def update_refseq_organelles( + output_path: str, + root_taxid: str = None, + s3_path: str = None, + min_records: int = 0, +) -> bool: + """Fetch and parse RefSeq organelle data. + + Args: + output_path (str): Path to write the output TSV. + root_taxid (str): Optional root taxon filter. + s3_path (str): Optional S3 path to upload the result. + min_records (int): Minimum record count to accept. + + Returns: + bool: True on success. + """ + if not is_safe_path(output_path): + raise ValueError(f"Unsafe output path: {output_path}") + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + row_count = fetch_and_parse_organelles( + output_path, root_taxon=root_taxid + ) + + if row_count < min_records: + raise RuntimeError( + f"RefSeq organelles: fewer than {min_records} records: {row_count}" + ) + + if s3_path: + upload_s3_file(output_path, s3_path) + + emit_event( + event="update.refseq.organelles.finished", + resource={ + "prefect.resource.id": f"update.refseq.organelles.{output_path}", + "prefect.resource.type": "refseq.organelles", + }, + payload={"row_count": row_count}, + ) + return True + + +if __name__ == "__main__": + args = parse_args( + [required(OUTPUT_PATH), ROOT_TAXID, S3_PATH, MIN_RECORDS], + "Fetch and parse RefSeq organelle data.", + ) + update_refseq_organelles(**vars(args)) diff --git a/flows/updaters/update_sra_data.py b/flows/updaters/update_sra_data.py new file mode 100644 index 0000000..868361b --- /dev/null +++ b/flows/updaters/update_sra_data.py @@ -0,0 +1,379 @@ +import csv +import gzip +import os +import xml.etree.ElementTree as ET +from collections import defaultdict +from datetime import date, timedelta +from itertools import groupby + +from flows.lib.conditional_import import emit_event, flow, task +from flows.lib.shared_args import ( + INPUT_PATH, + MIN_RECORDS, + OUTPUT_PATH, + ROOT_TAXID, + S3_PATH, + default, + parse_args, + required, +) +from flows.lib.utils import is_safe_path, run_quoted, upload_to_s3 + + +SRA_FIELDNAMES = [ + "taxon_id", + "sra_accession", + "run_accession", + "library_source", + "platform", + "reads", + "total_reads", + "total_runs", +] + + +def _split_chunks(values, split_val): + """Split an iterable into chunks at occurrences of split_val. + + Args: + values: Iterable to split. + split_val: Value at which to split. + + Yields: + (int, group) pairs. + """ + index = 0 + + def chunk_index(val): + nonlocal index + if val == split_val: + index += 1 + return index + + return groupby(values, chunk_index) + + +def _open_file(file_path, **kwargs): + """Open a file, decompressing gzip if needed.""" + if file_path.endswith(".gz"): + return gzip.open(file_path, "rt", encoding="utf8", **kwargs) + return open(file_path, "r", encoding="utf8", **kwargs) + + +def _read_exp_xml(node, obj): + """Extract fields from an ExpXml element.""" + for child in node: + tag = child.tag + if tag == "Bioproject": + obj["bioproject"] = child.text + elif tag == "Biosample": + obj["biosample"] = child.text + elif tag == "Organism": + obj["taxon_id"] = child.get("taxid") + elif tag == "Experiment": + obj["sra_accession"] = child.get("acc") + elif tag == "Summary": + obj["platform"] = child.findtext("Platform") or "" + elif tag == "Library_descriptor": + source = child.findtext("LIBRARY_SOURCE") + obj["library_source"] = source.lower() if source else "" + + +def _read_runs(node, obj): + """Extract run accessions and read counts from a Runs element.""" + if "runs" not in obj: + obj["runs"] = [] + for child in node: + obj["runs"].append( + {"accession": child.get("acc"), "reads": child.get("total_spots", "0")} + ) + + +def parse_sra_xml(xml_file: str) -> list: + """Parse an SRA efetch docsum XML file into row dicts. + + Args: + xml_file (str): Path to the XML (or .xml.gz) file. + + Returns: + list: List of dicts with taxon_id, sra_accession, runs, etc. + """ + rows = [] + xml_header = '\n' + with _open_file(xml_file) as f: + for _, doc in _split_chunks(f, xml_header): + lines = list(doc) + try: + root = ET.fromstringlist(lines) + except ET.ParseError: + continue + for doc_summary in root.iter("DocumentSummary"): + obj = {"date": "", "runs": []} + for child in doc_summary: + tag = child.tag + if tag == "CreateDate": + obj["date"] = child.text or "" + elif tag == "ExpXml": + _read_exp_xml(child, obj) + elif tag == "Runs": + _read_runs(child, obj) + if "taxon_id" in obj and obj["runs"]: + rows.append(obj) + return rows + + +def group_by_taxon(rows: list, grouped: dict = None) -> list: + """Group SRA runs by taxon, keeping the 10 most recent per taxon. + + Args: + rows (list): Parsed SRA row dicts with runs. + grouped (dict): Optional existing grouped data to merge into. + + Returns: + list: One dict per taxon with aggregated fields. + """ + if not grouped: + grouped = defaultdict(lambda: {"count": 0, "reads": 0, "runs": []}) + for obj in sorted(rows, key=lambda r: r.get("date", "")): + taxon_id = obj.get("taxon_id") + if not taxon_id: + continue + for run in obj.get("runs", []): + try: + reads = int(run["reads"]) + except (ValueError, TypeError): + reads = 0 + row = { + "sra_accession": obj.get("sra_accession", ""), + "run_accession": run["accession"], + "library_source": obj.get("library_source", ""), + "platform": obj.get("platform", ""), + "reads": reads, + } + grouped[taxon_id]["runs"].insert(0, row) + grouped[taxon_id]["count"] += 1 + grouped[taxon_id]["reads"] += reads + if len(grouped[taxon_id]["runs"]) > 10: + grouped[taxon_id]["runs"].pop() + + return [ + { + "taxon_id": taxon_id, + "sra_accession": ";".join(r["sra_accession"] for r in grp["runs"]), + "run_accession": ";".join(r["run_accession"] for r in grp["runs"]), + "library_source": ";".join(r["library_source"] for r in grp["runs"]), + "platform": ";".join(r["platform"] for r in grp["runs"]), + "reads": ";".join(str(r["reads"]) for r in grp["runs"]), + "total_reads": grp["reads"], + "total_runs": grp["count"], + } + for taxon_id, grp in grouped.items() + ] + + +def load_previous_tsv(file_path: str) -> dict: + """Load previously grouped SRA data from a TSV for incremental updates. + + Args: + file_path (str): Path to the existing TSV (or .tsv.gz). + + Returns: + dict: Grouped data keyed by taxon_id, or empty dict if file missing. + """ + if not os.path.isfile(file_path): + return {} + grouped = defaultdict(lambda: {"count": 0, "reads": 0, "runs": []}) + with _open_file(file_path, newline="") as f: + reader = csv.DictReader(f, delimiter="\t") + for row in reader: + taxon_id = row["taxon_id"] + grouped[taxon_id]["count"] = int(row["total_runs"]) + grouped[taxon_id]["reads"] = int(row["total_reads"]) + run_accs = row["run_accession"].split(";") + sra_accs = row["sra_accession"].split(";") + lib_srcs = row["library_source"].split(";") + platforms = row["platform"].split(";") + reads_list = row["reads"].split(";") + for i, run_acc in enumerate(run_accs): + grouped[taxon_id]["runs"].append( + { + "run_accession": run_acc, + "sra_accession": sra_accs[i] if i < len(sra_accs) else "", + "library_source": lib_srcs[i] if i < len(lib_srcs) else "", + "platform": platforms[i] if i < len(platforms) else "", + "reads": int(reads_list[i]) if i < len(reads_list) else 0, + } + ) + return grouped + + +def _get_yesterday() -> str: + """Return yesterday's date as YYYY/MM/DD.""" + return (date.today() - timedelta(days=1)).strftime("%Y/%m/%d") + + +@task(retries=2, retry_delay_seconds=30, log_prints=True) +def fetch_sra_xml( + output_xml: str, + root_taxid: str = "2759", + min_date: str = "2024/01/01", +) -> str: + """Fetch SRA docsum XML from NCBI using esearch/efetch. + + Requires the NCBI Entrez Direct (edirect) tools and NCBI_API_KEY + environment variable. + + Args: + output_xml (str): Path to write the XML output. + root_taxid (str): Root taxon ID to query. + min_date (str): Start date for the query (YYYY/MM/DD). + + Returns: + str: Path to the written XML file. + """ + api_key = os.environ.get("NCBI_API_KEY", "") + max_date = _get_yesterday() + + query = f"(txid{root_taxid}[organism:exp])" + esearch_cmd = [ + "esearch", "-db", "sra", "-query", query, + ] + if api_key: + esearch_cmd.extend(["-api_key", api_key]) + esearch_cmd.extend(["-mindate", min_date, "-maxdate", max_date]) + + efetch_cmd = ["efetch", "-db", "sra", "-format", "docsum"] + if api_key: + efetch_cmd.extend(["-api_key", api_key]) + + print(f"Running esearch | efetch for taxid {root_taxid} ({min_date} to {max_date})") + esearch = run_quoted(esearch_cmd, capture_output=True, text=True, timeout=300) + if esearch.returncode != 0: + raise RuntimeError(f"esearch failed: {esearch.stderr}") + + with open(output_xml, "w") as f: + efetch = run_quoted( + efetch_cmd, input=esearch.stdout, capture_output=True, text=True, timeout=600 + ) + if efetch.returncode != 0: + raise RuntimeError(f"efetch failed: {efetch.stderr}") + f.write(efetch.stdout) + + print(f"Wrote SRA XML to {output_xml}") + return output_xml + + +@task(retries=2, retry_delay_seconds=5, log_prints=True) +def parse_and_write_sra( + xml_path: str, + output_path: str, + previous_path: str = None, +) -> int: + """Parse SRA XML and write grouped TSV. + + Args: + xml_path (str): Path to the SRA docsum XML. + output_path (str): Path to write the output TSV. + previous_path (str): Optional path to previous TSV for incremental merge. + + Returns: + int: Number of taxon rows written. + """ + previous = load_previous_tsv(previous_path) if previous_path else {} + rows = parse_sra_xml(xml_path) + print(f"Parsed {len(rows)} records from XML") + + grouped_rows = group_by_taxon(rows, grouped=previous) + print(f"Grouped into {len(grouped_rows)} taxa") + + tsv_path = output_path.removesuffix(".gz") + with open(tsv_path, "w", newline="") as f: + writer = csv.DictWriter( + f, fieldnames=SRA_FIELDNAMES, delimiter="\t", lineterminator="\n" + ) + writer.writeheader() + for row in grouped_rows: + writer.writerow(row) + + if output_path.endswith(".gz"): + with open(tsv_path, "rb") as f_in, gzip.open(output_path, "wb") as f_out: + f_out.write(f_in.read()) + os.remove(tsv_path) + + print(f"Wrote {len(grouped_rows)} taxon rows to {output_path}") + return len(grouped_rows) + + +@task(log_prints=True) +def upload_s3_file(local_path: str, s3_path: str) -> None: + """Upload file to S3.""" + print(f"Uploading {local_path} to {s3_path}") + upload_to_s3(local_path, s3_path) + + +@flow() +def update_sra_data( + output_path: str, + input_path: str = None, + root_taxid: str = "2759", + s3_path: str = None, + min_records: int = 0, +) -> bool: + """Fetch and parse SRA data, writing grouped TSV output. + + If input_path is provided, parses that XML file directly. Otherwise + fetches fresh data from NCBI using esearch/efetch. + + Args: + output_path (str): Path to write the output TSV (or .tsv.gz). + input_path (str): Optional path to an existing SRA XML file. + root_taxid (str): Root taxon ID for the NCBI query. + s3_path (str): Optional S3 path to upload the result. + min_records (int): Minimum taxon count to accept the output. + + Returns: + bool: True on success. + """ + if not is_safe_path(output_path): + raise ValueError(f"Unsafe output path: {output_path}") + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + if input_path and os.path.isfile(input_path): + xml_path = input_path + else: + xml_path = f"{output_path}.xml" + fetch_sra_xml(xml_path, root_taxid=root_taxid) + + row_count = parse_and_write_sra(xml_path, output_path) + + if row_count < min_records: + raise RuntimeError( + f"SRA output has fewer than {min_records} taxa: {row_count}" + ) + + if s3_path: + upload_s3_file(output_path, s3_path) + + emit_event( + event="update.sra.data.finished", + resource={ + "prefect.resource.id": f"update.sra.{output_path}", + "prefect.resource.type": "sra.data", + }, + payload={"row_count": row_count}, + ) + return True + + +if __name__ == "__main__": + args = parse_args( + [ + required(OUTPUT_PATH), + INPUT_PATH, + default(ROOT_TAXID, "2759"), + S3_PATH, + MIN_RECORDS, + ], + "Fetch and parse SRA data into grouped TSV.", + ) + update_sra_data(**vars(args)) diff --git a/flows/updaters/update_ucsc_assemblies.py b/flows/updaters/update_ucsc_assemblies.py new file mode 100644 index 0000000..e31ffb0 --- /dev/null +++ b/flows/updaters/update_ucsc_assemblies.py @@ -0,0 +1,94 @@ +import gzip +import os + +from flows.lib.conditional_import import emit_event, flow, task +from flows.lib.shared_args import OUTPUT_PATH, S3_PATH, parse_args, required +from flows.lib.utils import is_safe_path, safe_get, upload_to_s3 + + +UCSC_URL = "https://hgdownload.soe.ucsc.edu/hubs/UCSC_GI.assemblyHubList.txt" +OUTPUT_FILENAME = "UCSC_GI.assemblyHubList.tsv.gz" + + +@task(retries=2, retry_delay_seconds=5, log_prints=True) +def fetch_ucsc_hub_list(output_dir: str) -> tuple[str, int]: + """Fetch the UCSC assembly hub accession list and write as gzipped TSV. + + The source file is a tab-separated text file served with ISO-8859-1 + encoding. We decode to UTF-8 for consistency. + + Args: + output_dir (str): Directory to write the output file. + + Returns: + tuple[str, int]: Path to the output file and number of data lines. + """ + output_path = os.path.join(output_dir, OUTPUT_FILENAME) + tsv_path = output_path.removesuffix(".gz") + + print(f"Fetching UCSC hub list from {UCSC_URL}") + response = safe_get(UCSC_URL, timeout=60) + response.raise_for_status() + response.encoding = "iso-8859-1" + text = response.text + + with open(tsv_path, "w") as f: + f.write(text) + line_count = text.count("\n") + + with open(tsv_path, "rb") as f_in, gzip.open(output_path, "wb") as f_out: + f_out.write(f_in.read()) + os.remove(tsv_path) + + print(f"Wrote {line_count} lines to {output_path}") + return output_path, line_count + + +@task(log_prints=True) +def upload_s3_file(local_path: str, s3_path: str) -> None: + """Upload file to S3.""" + print(f"Uploading {local_path} to {s3_path}") + upload_to_s3(local_path, s3_path) + + +@flow() +def update_ucsc_assemblies( + output_path: str, + s3_path: str = None, +) -> bool: + """Fetch the UCSC assembly hub list and optionally upload to S3. + + Args: + output_path (str): Directory to write the output file. + s3_path (str): Optional S3 directory path to upload the result. + + Returns: + bool: True on success. + """ + if not is_safe_path(output_path): + raise ValueError(f"Unsafe output path: {output_path}") + os.makedirs(output_path, exist_ok=True) + + local_file, line_count = fetch_ucsc_hub_list(output_path) + + if s3_path: + remote_path = f"{s3_path.rstrip('/')}/{OUTPUT_FILENAME}" + upload_s3_file(local_file, remote_path) + + emit_event( + event="update.ucsc.assemblies.finished", + resource={ + "prefect.resource.id": f"update.ucsc.{output_path}", + "prefect.resource.type": "ucsc.assemblies", + }, + payload={"line_count": line_count}, + ) + return True + + +if __name__ == "__main__": + args = parse_args( + [required(OUTPUT_PATH), S3_PATH], + "Fetch UCSC assembly hub accession list.", + ) + update_ucsc_assemblies(**vars(args)) diff --git a/flows/updaters/update_vgp_status.py b/flows/updaters/update_vgp_status.py new file mode 100644 index 0000000..687abb9 --- /dev/null +++ b/flows/updaters/update_vgp_status.py @@ -0,0 +1,87 @@ +import os + +from flows.lib.conditional_import import emit_event, flow, task +from flows.lib.shared_args import MIN_RECORDS, OUTPUT_PATH, S3_PATH, parse_args, required +from flows.lib.utils import upload_to_s3 +from flows.updaters.api import api_config as cfg +from flows.updaters.api import api_tools as at + + +@task(retries=2, retry_delay_seconds=5, log_prints=True) +def fetch_vgp_tsv( + file_path: str, + min_lines: int = 1, +) -> int: + """Fetch VGP status list from the Vertebrate Genomes Project GitHub tracker. + + Downloads the VGP genome portal YAML tracker, extracts species records, + and writes a TSV with per-species status fields. + + Args: + file_path (str): Path to the output TSV file. + min_lines (int): Minimum number of rows expected. + + Returns: + int: Number of lines written to the output file. + """ + at.get_from_source( + cfg.vgl_url_opener, + cfg.vgl_hub_count_handler, + cfg.vgl_row_handler, + cfg.vgl_fieldnames, + file_path, + ) + + with open(file_path, "r") as f: + line_count = sum(1 for _ in f) + + if line_count < min_lines: + raise RuntimeError( + f"VGP file {file_path} has fewer than {min_lines} lines: {line_count}" + ) + print(f"Wrote {line_count} lines to {file_path}") + return line_count + + +@task(log_prints=True) +def upload_s3_tsv(local_path: str, s3_path: str) -> None: + """Upload VGP TSV to S3.""" + print(f"Uploading VGP TSV from {local_path} to {s3_path}") + upload_to_s3(local_path, s3_path) + + +@flow() +def update_vgp_status( + output_path: str, s3_path: str = None, min_records: int = 0 +) -> bool: + """Fetch the VGP status list and optionally upload to S3. + + Args: + output_path (str): Path to the output TSV file. + s3_path (str): Optional S3 path to upload the result. + min_records (int): Minimum record count to accept the output. + + Returns: + bool: True on success. + """ + os.makedirs(os.path.dirname(output_path), exist_ok=True) + line_count = fetch_vgp_tsv(output_path, min_records) + if line_count > min_records and s3_path: + upload_s3_tsv(output_path, s3_path) + emit_event( + event="update.vgp.status.finished", + resource={ + "prefect.resource.id": f"update.vgp.{output_path}", + "prefect.resource.type": "vgp.status", + }, + payload={"line_count": line_count}, + ) + return True + + +if __name__ == "__main__": + args = parse_args( + [required(OUTPUT_PATH), S3_PATH, MIN_RECORDS], + "Fetch VGP status list from the Vertebrate Genomes Project.", + ) + update_vgp_status(**vars(args)) From 02b62e9e68f9291b2a4cdb4a8bca647faf932930 Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Thu, 30 Apr 2026 23:26:46 +0100 Subject: [PATCH 04/18] correct updaters --- flows/updaters/update_jgi_status.py | 7 ++++--- flows/updaters/update_refseq_organelles.py | 5 +++-- flows/updaters/update_sra_data.py | 11 ++++++----- flows/updaters/update_vgp_status.py | 10 ++++++---- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/flows/updaters/update_jgi_status.py b/flows/updaters/update_jgi_status.py index 7326f9f..64e6537 100644 --- a/flows/updaters/update_jgi_status.py +++ b/flows/updaters/update_jgi_status.py @@ -178,12 +178,13 @@ def update_jgi_status( """ if not is_safe_path(output_path): raise ValueError(f"Unsafe output path: {output_path}") - os.makedirs(os.path.dirname(output_path), exist_ok=True) + resolved_path = os.path.abspath(output_path) + os.makedirs(os.path.dirname(resolved_path), exist_ok=True) - line_count = fetch_jgi_tsv(output_path, min_records) + line_count = fetch_jgi_tsv(resolved_path, min_records) if line_count > min_records and s3_path: - upload_s3_tsv(output_path, s3_path) + upload_s3_tsv(resolved_path, s3_path) emit_event( event="update.jgi.status.finished", diff --git a/flows/updaters/update_refseq_organelles.py b/flows/updaters/update_refseq_organelles.py index aea7eea..8316c1c 100644 --- a/flows/updaters/update_refseq_organelles.py +++ b/flows/updaters/update_refseq_organelles.py @@ -285,10 +285,11 @@ def update_refseq_organelles( """ if not is_safe_path(output_path): raise ValueError(f"Unsafe output path: {output_path}") - os.makedirs(os.path.dirname(output_path), exist_ok=True) + resolved_path = os.path.abspath(output_path) + os.makedirs(os.path.dirname(resolved_path), exist_ok=True) row_count = fetch_and_parse_organelles( - output_path, root_taxon=root_taxid + resolved_path, root_taxon=root_taxid ) if row_count < min_records: diff --git a/flows/updaters/update_sra_data.py b/flows/updaters/update_sra_data.py index 868361b..cedb523 100644 --- a/flows/updaters/update_sra_data.py +++ b/flows/updaters/update_sra_data.py @@ -315,7 +315,7 @@ def upload_s3_file(local_path: str, s3_path: str) -> None: def update_sra_data( output_path: str, input_path: str = None, - root_taxid: str = "2759", + root_taxid: str = "9612", s3_path: str = None, min_records: int = 0, ) -> bool: @@ -336,15 +336,16 @@ def update_sra_data( """ if not is_safe_path(output_path): raise ValueError(f"Unsafe output path: {output_path}") - os.makedirs(os.path.dirname(output_path), exist_ok=True) + resolved_path = os.path.abspath(output_path) + os.makedirs(os.path.dirname(resolved_path), exist_ok=True) if input_path and os.path.isfile(input_path): xml_path = input_path else: - xml_path = f"{output_path}.xml" + xml_path = f"{resolved_path}.xml" fetch_sra_xml(xml_path, root_taxid=root_taxid) - row_count = parse_and_write_sra(xml_path, output_path) + row_count = parse_and_write_sra(xml_path, resolved_path) if row_count < min_records: raise RuntimeError( @@ -370,7 +371,7 @@ def update_sra_data( [ required(OUTPUT_PATH), INPUT_PATH, - default(ROOT_TAXID, "2759"), + default(ROOT_TAXID, "9612"), S3_PATH, MIN_RECORDS, ], diff --git a/flows/updaters/update_vgp_status.py b/flows/updaters/update_vgp_status.py index 687abb9..2fc57aa 100644 --- a/flows/updaters/update_vgp_status.py +++ b/flows/updaters/update_vgp_status.py @@ -64,14 +64,16 @@ def update_vgp_status( Returns: bool: True on success. """ - os.makedirs(os.path.dirname(output_path), exist_ok=True) - line_count = fetch_vgp_tsv(output_path, min_records) + + resolved_path = os.path.abspath(output_path) + os.makedirs(os.path.dirname(resolved_path), exist_ok=True) + line_count = fetch_vgp_tsv(resolved_path, min_records) if line_count > min_records and s3_path: - upload_s3_tsv(output_path, s3_path) + upload_s3_tsv(resolved_path, s3_path) emit_event( event="update.vgp.status.finished", resource={ - "prefect.resource.id": f"update.vgp.{output_path}", + "prefect.resource.id": f"update.vgp.{resolved_path}", "prefect.resource.type": "vgp.status", }, payload={"line_count": line_count}, From 63750b38d619b74aa30e2683ef5a521a2f0a170d Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Fri, 1 May 2026 00:20:34 +0100 Subject: [PATCH 05/18] create parses from phase 2 of migration --- flows/lib/utils.py | 147 +++++++++++++++ flows/parsers/parse_blobtoolkit.py | 93 ++++++++++ flows/parsers/parse_conservation.py | 50 +++++ flows/parsers/parse_genomesize_karyotype.py | 53 ++++++ flows/parsers/parse_legislation.py | 51 ++++++ flows/parsers/parse_refseq_organelles.py | 139 ++++++++++++-- flows/parsers/parse_sequencing_status.py | 110 +++++++++-- flows/parsers/parse_sra_data.py | 55 ++++++ flows/prefect.yaml | 172 ++++++++++++++++++ flows/updaters/update_blobtoolkit.py | 10 +- flows/updaters/update_ensembl_metadata.py | 8 +- flows/updaters/update_google_sheets_status.py | 12 +- flows/updaters/update_ucsc_assemblies.py | 8 +- 13 files changed, 865 insertions(+), 43 deletions(-) create mode 100644 flows/parsers/parse_blobtoolkit.py create mode 100644 flows/parsers/parse_conservation.py create mode 100644 flows/parsers/parse_genomesize_karyotype.py create mode 100644 flows/parsers/parse_legislation.py create mode 100644 flows/parsers/parse_sra_data.py diff --git a/flows/lib/utils.py b/flows/lib/utils.py index ef6a75e..60e4855 100644 --- a/flows/lib/utils.py +++ b/flows/lib/utils.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 import contextlib +import glob import gzip import hashlib import os @@ -133,6 +134,152 @@ def parse(self, data: dict) -> dict: return parsed_data +def open_tsv(input_path: str): + """Open a TSV file (plain or gzipped) for reading. + + Args: + input_path (str): Path to the input TSV file (.tsv or .tsv.gz). + + Returns: + File handle in text mode. + """ + if input_path.endswith(".gz"): + return gzip.open(input_path, "rt", encoding="utf-8", newline="") + return open(input_path, "rt", encoding="utf-8", newline="") + + +def parse_tsv_with_config( + input_path: str, + config: "Config", + key_field: Optional[str] = None, + delimiter: str = "\t", +) -> Dict[str, dict]: + """Parse a TSV file row-by-row through a Config's parse functions. + + Each row of the TSV is treated as a flat dict keyed by column header, + matching the expected ``path:`` references in YAML attribute definitions. + Rows are passed through ``gh_utils.parse_report_values`` to apply any + YAML-defined translations and field mappings, then keyed in the returned + dict by ``key_field`` (or by row index if not provided). + + Args: + input_path (str): Path to the input TSV file (.tsv or .tsv.gz). + config (Config): Loaded YAML configuration. + key_field (str): Optional input column name to use as the dict key. + If not provided, rows are keyed by sequential integer. + delimiter (str): Field delimiter in the TSV (default: tab). + + Returns: + Dict[str, dict]: Mapping of key → parsed row dict (YAML-named fields). + """ + parsed: Dict[str, dict] = {} + with open_tsv(input_path) as fh: + reader = DictReader(fh, delimiter=delimiter) + for index, record in enumerate(reader): + row = gh_utils.parse_report_values(config.parse_fns, record) + if key_field and key_field in record and record[key_field]: + key = record[key_field] + else: + key = str(index) + parsed[key] = row + return parsed + + +def locate_input_tsv(work_dir: str, expected_name: Optional[str] = None) -> str: + """Locate the input TSV in ``work_dir`` for a generic parser. + + Picks the single ``*.tsv`` or ``*.tsv.gz`` in ``work_dir`` whose basename + is not the expected output. Falls back to a direct hit on ``expected_name`` + when present. + + Args: + work_dir (str): Working directory. + expected_name (str): Output filename from ``config.meta["file_name"]``; + used to exclude the parser's intended output from candidate + inputs. + + Returns: + str: Path to the input TSV. + + Raises: + FileNotFoundError: If no candidate TSV is found. + ValueError: If multiple candidate TSVs are found. + """ + candidates = sorted( + glob.glob(os.path.join(work_dir, "*.tsv")) + + glob.glob(os.path.join(work_dir, "*.tsv.gz")) + ) + if expected_name: + candidates = [ + c for c in candidates if os.path.basename(c) != expected_name + ] + if not candidates: + raise FileNotFoundError( + f"No TSV input found in {work_dir} (expected != {expected_name})" + ) + if len(candidates) > 1: + raise ValueError( + f"Multiple TSV inputs in {work_dir}: {candidates!r}" + ) + return candidates[0] + + +def run_generic_tsv_parser( + working_yaml: str, + work_dir: str, + append: bool = False, + key_field: Optional[str] = None, +) -> None: + """Run the generic flat-TSV-with-Config parsing pipeline. + + Locates the input TSV in ``work_dir``, loads the YAML config, + applies ``parse_report_values`` row-by-row, and writes the + canonical TSV to ``work_dir`` (preserving the YAML-defined + ``file_name`` for downstream validation). + + Args: + working_yaml (str): Path to the working YAML config file. + work_dir (str): Working directory. + append (bool): If True, load previous parsed data. + key_field (str): Optional input column to key parsed rows by. + """ + config = load_config(config_file=working_yaml, load_previous=append) + expected_name = config.meta["file_name"] + input_path = locate_input_tsv(work_dir, expected_name) + print(f"Parsing {input_path} with {working_yaml}") + + parsed = parse_tsv_with_config(input_path, config, key_field=key_field) + print(f"Parsed {len(parsed)} records") + + output_name = config.meta["file_name"] + config.meta["file_name"] = os.path.join( + work_dir, os.path.basename(output_name) + ) + try: + write_parsed_tsv(parsed, config) + finally: + config.meta["file_name"] = output_name + + +def write_parsed_tsv(parsed: Dict[str, dict], config: "Config") -> None: + """Write a parsed dict to TSV using config-defined headers and meta. + + Handles ``.gz`` filenames by writing uncompressed then gzipping. + + Args: + parsed (Dict[str, dict]): Mapping of key → row dict. + config (Config): Loaded YAML configuration. + """ + file_name = config.meta["file_name"] + if file_name.endswith(".gz"): + config.meta["file_name"] = file_name[:-3] + gh_utils.write_tsv(parsed, config.headers, config.meta) + os.system(f"gzip -f {config.meta['file_name']}") + config.meta["file_name"] = file_name + else: + gh_utils.write_tsv(parsed, config.headers, config.meta) + + def format_entry(entry, key: str, meta: dict) -> str: """ Formats a single entry in a dictionary, handling the case where the entry is a list. diff --git a/flows/parsers/parse_blobtoolkit.py b/flows/parsers/parse_blobtoolkit.py new file mode 100644 index 0000000..0971bf5 --- /dev/null +++ b/flows/parsers/parse_blobtoolkit.py @@ -0,0 +1,93 @@ +"""Parse BlobToolKit assembly summary TSV. + +The corresponding updater (``update_blobtoolkit``) emits a flat TSV +(one row per BTK dataset) whose column headers already match the +``header:`` values in ``btk.types.yaml``. This parser simply applies +the YAML parse functions and writes the canonical TSV. +""" + +import os +from glob import glob + +from flows.lib.conditional_import import flow +from flows.lib.utils import ( # noqa: E402 + Parser, + load_config, + parse_tsv_with_config, + write_parsed_tsv, +) +from flows.parsers.args import parse_args # noqa: E402 + + +def _locate_input_tsv(work_dir: str, expected_name: str) -> str: + """Find the input TSV in ``work_dir``. + + Args: + work_dir (str): Working directory. + expected_name (str): YAML-defined output filename. + + Returns: + str: Path to the input TSV. + """ + candidates = sorted( + glob(os.path.join(work_dir, "*.tsv")) + + glob(os.path.join(work_dir, "*.tsv.gz")) + ) + candidates = [c for c in candidates if os.path.basename(c) != expected_name] + if not candidates: + raise FileNotFoundError(f"No BTK input TSV found in {work_dir}") + if len(candidates) > 1: + raise ValueError(f"Multiple TSV inputs in {work_dir}: {candidates!r}") + return candidates[0] + + +@flow(log_prints=True) +def parse_blobtoolkit( + working_yaml: str, + work_dir: str, + append: bool = False, + **kwargs, +) -> None: + """Parse BTK summary TSV using YAML schema. + + Args: + working_yaml (str): Path to the YAML configuration file. + work_dir (str): Working directory containing the input TSV. + append (bool): If True, load previous parsed data. + **kwargs: Ignored extra arguments from the wrapper. + """ + config = load_config(config_file=working_yaml, load_previous=append) + + expected_name = config.meta["file_name"] + input_path = _locate_input_tsv(work_dir, expected_name) + print(f"Parsing BlobToolKit summary: {input_path}") + + parsed = parse_tsv_with_config(input_path, config, key_field="accession") + print(f"Parsed {len(parsed)} BTK dataset records") + + output_name = config.meta["file_name"] + config.meta["file_name"] = os.path.join( + work_dir, os.path.basename(output_name) + ) + try: + write_parsed_tsv(parsed, config) + finally: + config.meta["file_name"] = output_name + + +def plugin(): + """Register the parser plugin.""" + return Parser( + name="BLOBTOOLKIT", + func=parse_blobtoolkit, + description="Parse BlobToolKit assembly summary TSV using YAML schema.", + ) + + +if __name__ == "__main__": + args = parse_args("Parse BlobToolKit assembly summary TSV.") + parse_blobtoolkit( + working_yaml=args.yaml_path, + work_dir=os.path.dirname(args.input_path) or ".", + append=args.append, + ) diff --git a/flows/parsers/parse_conservation.py b/flows/parsers/parse_conservation.py new file mode 100644 index 0000000..f86c549 --- /dev/null +++ b/flows/parsers/parse_conservation.py @@ -0,0 +1,50 @@ +"""Parse conservation-status source TSV using a YAML schema. + +Handles the ``FILE_CITES_full_index.types.yaml`` (and any future +``FILE_*.types.yaml``) configurations under ``sources/conservation``. +Delegates to the shared generic flat-TSV pipeline. +""" + +import os + +from flows.lib.conditional_import import flow +from flows.lib.utils import Parser, run_generic_tsv_parser +from flows.parsers.args import parse_args + + +@flow(log_prints=True) +def parse_conservation( + working_yaml: str, + work_dir: str, + append: bool = False, + **kwargs, +) -> None: + """Parse a conservation-status TSV using YAML schema. + + Args: + working_yaml (str): Path to the YAML configuration file. + work_dir (str): Working directory containing the input TSV. + append (bool): If True, load previous parsed data. + **kwargs: Ignored extra arguments from the wrapper. + """ + run_generic_tsv_parser( + working_yaml=working_yaml, work_dir=work_dir, append=append + ) + + +def plugin(): + """Register the parser plugin.""" + return Parser( + name="CONSERVATION", + func=parse_conservation, + description="Parse a conservation-status TSV using a YAML schema.", + ) + + +if __name__ == "__main__": + args = parse_args("Parse a conservation-status TSV using a YAML schema.") + parse_conservation( + working_yaml=args.yaml_path, + work_dir=os.path.dirname(args.input_path) or ".", + append=args.append, + ) diff --git a/flows/parsers/parse_genomesize_karyotype.py b/flows/parsers/parse_genomesize_karyotype.py new file mode 100644 index 0000000..715e4b7 --- /dev/null +++ b/flows/parsers/parse_genomesize_karyotype.py @@ -0,0 +1,53 @@ +"""Parse genome size & karyotype source TSV using a YAML schema. + +Generic parser for the ~25 ``FILE_*.types.yaml`` configurations under +``sources/genomesize-karyotype``. Each source is a flat TSV whose +columns map directly to YAML attribute headers; this parser delegates +to the shared generic flat-TSV pipeline. +""" + +import os + +from flows.lib.conditional_import import flow +from flows.lib.utils import Parser, run_generic_tsv_parser +from flows.parsers.args import parse_args + + +@flow(log_prints=True) +def parse_genomesize_karyotype( + working_yaml: str, + work_dir: str, + append: bool = False, + **kwargs, +) -> None: + """Parse a genome-size or karyotype TSV using YAML schema. + + Args: + working_yaml (str): Path to the YAML configuration file. + work_dir (str): Working directory containing the input TSV. + append (bool): If True, load previous parsed data. + **kwargs: Ignored extra arguments from the wrapper. + """ + run_generic_tsv_parser( + working_yaml=working_yaml, work_dir=work_dir, append=append + ) + + +def plugin(): + """Register the parser plugin.""" + return Parser( + name="GENOMESIZE_KARYOTYPE", + func=parse_genomesize_karyotype, + description="Parse a genome-size or karyotype TSV using a YAML schema.", + ) + + +if __name__ == "__main__": + args = parse_args( + "Parse a genome-size or karyotype TSV using a YAML schema." + ) + parse_genomesize_karyotype( + working_yaml=args.yaml_path, + work_dir=os.path.dirname(args.input_path) or ".", + append=args.append, + ) diff --git a/flows/parsers/parse_legislation.py b/flows/parsers/parse_legislation.py new file mode 100644 index 0000000..43d3aa5 --- /dev/null +++ b/flows/parsers/parse_legislation.py @@ -0,0 +1,51 @@ +"""Parse legislation source TSV using a YAML schema. + +Handles the ``FILE_*.types.yaml`` configurations under +``sources/uk-legislation`` (Council Directive 92/43/EEC, Conservation +of Habitats and Species Regulations 2017, Wildlife and Countryside +Act 1981, etc.). Delegates to the shared generic flat-TSV pipeline. +""" + +import os + +from flows.lib.conditional_import import flow +from flows.lib.utils import Parser, run_generic_tsv_parser +from flows.parsers.args import parse_args + + +@flow(log_prints=True) +def parse_legislation( + working_yaml: str, + work_dir: str, + append: bool = False, + **kwargs, +) -> None: + """Parse a legislation TSV using YAML schema. + + Args: + working_yaml (str): Path to the YAML configuration file. + work_dir (str): Working directory containing the input TSV. + append (bool): If True, load previous parsed data. + **kwargs: Ignored extra arguments from the wrapper. + """ + run_generic_tsv_parser( + working_yaml=working_yaml, work_dir=work_dir, append=append + ) + + +def plugin(): + """Register the parser plugin.""" + return Parser( + name="LEGISLATION", + func=parse_legislation, + description="Parse a legislation TSV using a YAML schema.", + ) + + +if __name__ == "__main__": + args = parse_args("Parse a legislation TSV using a YAML schema.") + parse_legislation( + working_yaml=args.yaml_path, + work_dir=os.path.dirname(args.input_path) or ".", + append=args.append, + ) diff --git a/flows/parsers/parse_refseq_organelles.py b/flows/parsers/parse_refseq_organelles.py index c7245bd..c441264 100644 --- a/flows/parsers/parse_refseq_organelles.py +++ b/flows/parsers/parse_refseq_organelles.py @@ -1,32 +1,143 @@ -from flows.lib.utils import Parser # noqa: E402 +"""Parse RefSeq organelle data into one-row-per-assembly TSV. + +The corresponding updater (``update_refseq_organelles``) emits one row per +organelle sequence (mitochondrion or plastid). The GoaT YAML schema +(``refseq_organelles.types.yaml``) expects one row per assembly with +combined ``mitochondrion*`` / ``plastid*`` columns. This parser pivots +the per-organelle rows by the assembly accession (genbank), then runs +the records through the YAML parse functions. +""" + +import os +from csv import DictReader +from glob import glob + +from genomehubs import utils as gh_utils + +from flows.lib.conditional_import import flow +from flows.lib.utils import ( # noqa: E402 + Parser, + load_config, + open_tsv, + write_parsed_tsv, +) from flows.parsers.args import parse_args # noqa: E402 +ORGANELLE_FIELDS = ("id", "assemblySpan", "gcPercent", "nPercent") + + +def _locate_input_tsv(work_dir: str, expected_name: str) -> str: + """Find the per-organelle input TSV in ``work_dir``.""" + expected_path = os.path.join(work_dir, expected_name) + if os.path.exists(expected_path): + return expected_path + candidates = sorted( + glob(os.path.join(work_dir, "*.tsv")) + + glob(os.path.join(work_dir, "*.tsv.gz")) + ) + if not candidates: + raise FileNotFoundError( + f"No TSV input found in {work_dir} (expected {expected_name})" + ) + if len(candidates) > 1: + raise ValueError( + f"Multiple TSV inputs in {work_dir}: {candidates!r}" + ) + return candidates[0] + + +def _pivot_by_assembly(input_path: str) -> dict: + """Group per-organelle rows by GenBank accession. + + Args: + input_path (str): Path to the per-organelle TSV. + + Returns: + dict: Mapping of assembly accession → nested record with + ``mitochondrion``/``plastid`` sub-dicts. + """ + by_assembly: dict = {} + with open_tsv(input_path) as fh: + reader = DictReader(fh, delimiter="\t") + for row in reader: + assembly = row.get("genbankAccession") or row.get("id") + if not assembly: + continue + record = by_assembly.setdefault( + assembly, + { + "id": row.get("id", assembly), + "genbankAccession": assembly, + "bioproject": row.get("bioproject", ""), + "biosample": row.get("biosample", ""), + "releaseDate": row.get("releaseDate", ""), + "annotations": {"organism": row.get("organismName", "")}, + "taxonId": row.get("taxonId", ""), + "sourceAuthor": row.get("sourceAuthor", ""), + "sourceYear": row.get("sourceYear", ""), + "sourceTitle": row.get("sourceTitle", ""), + "pubmedId": row.get("pubmedId", ""), + "sampleLocation": row.get("sampleLocation", ""), + }, + ) + organelle = (row.get("organelle") or "").lower() + if organelle in ("mitochondrion", "plastid"): + record[organelle] = { + field: row.get(field, "") for field in ORGANELLE_FIELDS + } + return by_assembly + +@flow(log_prints=True) def parse_refseq_organelles( - working_yaml: str, work_dir: str, append: bool, **kwargs + working_yaml: str, + work_dir: str, + append: bool = False, + **kwargs, ) -> None: - """ - Wrapper function to parse the RefSeq organelles JSONL file. + """Pivot per-organelle TSV to per-assembly and apply YAML schema. Args: - working_yaml (str): Path to the working YAML file. - work_dir (str): Path to the working directory. - append (bool): Whether to append to the existing TSV file. - **kwargs: Additional keyword arguments. + working_yaml (str): Path to the YAML configuration file. + work_dir (str): Working directory containing the input TSV. + append (bool): If True, load previous parsed data. + **kwargs: Ignored extra arguments from the wrapper. """ - print("parsing RefSeq organelles files") + config = load_config(config_file=working_yaml, load_previous=append) + + expected_name = config.meta["file_name"] + input_path = _locate_input_tsv(work_dir, expected_name) + print(f"Parsing RefSeq organelles: {input_path}") + + grouped = _pivot_by_assembly(input_path) + print(f"Pivoted to {len(grouped)} assemblies") + + parsed = { + key: gh_utils.parse_report_values(config.parse_fns, record) + for key, record in grouped.items() + } + + output_name = config.meta["file_name"] + config.meta["file_name"] = os.path.join(work_dir, os.path.basename(output_name)) + try: + write_parsed_tsv(parsed, config) + finally: + config.meta["file_name"] = output_name def plugin(): - """Register the flow.""" + """Register the parser plugin.""" return Parser( name="REFSEQ_ORGANELLES", func=parse_refseq_organelles, - description="Parse the RefSeq organelles files in a directory.", + description="Pivot per-organelle TSV to per-assembly and apply YAML schema.", ) if __name__ == "__main__": - """Run the flow.""" - args = parse_args() - parse_refseq_organelles(**vars(args)) + args = parse_args("Parse RefSeq organelle data into one-row-per-assembly TSV.") + parse_refseq_organelles( + working_yaml=args.yaml_path, + work_dir=os.path.dirname(args.input_path) or ".", + append=args.append, + ) diff --git a/flows/parsers/parse_sequencing_status.py b/flows/parsers/parse_sequencing_status.py index 37915d7..c7091bf 100644 --- a/flows/parsers/parse_sequencing_status.py +++ b/flows/parsers/parse_sequencing_status.py @@ -1,32 +1,114 @@ -from flows.lib.utils import Parser # noqa: E402 +"""Parse status list TSVs using a YAML schema. + +Generic parser that handles all sequencing status list inputs: +VGP, JGI 1KFG, Google Sheets project lists, NHM, CNGB, +ToL Portal, ToL Genome Notes, and similar. + +The input is a tab-separated file produced by the corresponding +updater (one row per record). The YAML schema describes how each +input column maps to a GoaT attribute (and may translate values +via ``translate:`` blocks). This parser: + +1. Locates the input TSV in ``work_dir`` matching ``meta.file_name`` + (or, failing that, the single TSV in the directory). +2. Reads each row as a flat dict keyed by column header. +3. Applies YAML parse functions via ``gh_utils.parse_report_values``. +4. Writes the canonical TSV using YAML-defined headers. +""" + +import os +from glob import glob + +from flows.lib.conditional_import import flow +from flows.lib.utils import ( # noqa: E402 + Parser, + load_config, + parse_tsv_with_config, + write_parsed_tsv, +) from flows.parsers.args import parse_args # noqa: E402 +def _locate_input_tsv(work_dir: str, expected_name: str) -> str: + """Find the input TSV in ``work_dir``. + + Looks first for ``expected_name`` (matching ``meta.file_name`` from + the YAML); falls back to a single ``*.tsv`` or ``*.tsv.gz`` in + ``work_dir`` that is not the expected output. + + Args: + work_dir (str): Working directory. + expected_name (str): The filename declared in YAML ``file.name``. + + Returns: + str: Absolute path to the input TSV. + """ + expected_path = os.path.join(work_dir, expected_name) + if os.path.exists(expected_path): + return expected_path + + candidates = sorted( + glob(os.path.join(work_dir, "*.tsv")) + + glob(os.path.join(work_dir, "*.tsv.gz")) + ) + if not candidates: + raise FileNotFoundError( + f"No TSV input found in {work_dir} (expected {expected_name})" + ) + if len(candidates) > 1: + raise ValueError( + f"Multiple TSV inputs in {work_dir}: {candidates!r}; " + "place a single source TSV or name it to match YAML file.name." + ) + return candidates[0] + + +@flow(log_prints=True) def parse_sequencing_status( - working_yaml: str, work_dir: str, append: bool, **kwargs + working_yaml: str, + work_dir: str, + append: bool = False, + **kwargs, ) -> None: - """ - Wrapper function to parse the sequencing status files. + """Parse a sequencing status TSV using a YAML schema. Args: - working_yaml (str): Path to the working YAML file. - work_dir (str): Path to the working directory. - append (bool): Whether to append to the existing TSV file. - **kwargs: Additional keyword arguments. + working_yaml (str): Path to the YAML configuration file. + work_dir (str): Working directory containing the input TSV. + append (bool): If True, load previous parsed data for incremental + updates. + **kwargs: Ignored extra arguments from the wrapper. """ - print("parsing sequencing status files") + config = load_config(config_file=working_yaml, load_previous=append) + + expected_name = config.meta["file_name"] + input_path = _locate_input_tsv(work_dir, expected_name) + print(f"Parsing sequencing status: {input_path}") + + parsed = parse_tsv_with_config(input_path, config) + print(f"Parsed {len(parsed)} rows") + + output_name = config.meta["file_name"] + config.meta["file_name"] = os.path.join(work_dir, os.path.basename(output_name)) + try: + write_parsed_tsv(parsed, config) + finally: + config.meta["file_name"] = output_name def plugin(): - """Register the flow.""" + """Register the parser plugin.""" return Parser( name="SEQUENCING_STATUS", func=parse_sequencing_status, - description="Parse the sequencing status files in a directory.", + description="Parse a sequencing status TSV using a YAML schema.", ) if __name__ == "__main__": - """Run the flow.""" - args = parse_args() - parse_sequencing_status(**vars(args)) + args = parse_args("Parse a sequencing status TSV using a YAML schema.") + parse_sequencing_status( + working_yaml=args.yaml_path, + work_dir=os.path.dirname(args.input_path) or ".", + append=args.append, + ) diff --git a/flows/parsers/parse_sra_data.py b/flows/parsers/parse_sra_data.py new file mode 100644 index 0000000..cc29e50 --- /dev/null +++ b/flows/parsers/parse_sra_data.py @@ -0,0 +1,55 @@ +"""Parse SRA accession TSV using a YAML schema. + +The corresponding updater (``update_sra_data``) emits a flat TSV with +columns matching the headers in ``sra.types.yaml`` (``run_accession``, +``sra_accession``, ``platform``, ``library_source``, ``reads``, +``total_runs``, ``total_reads``, ``taxon_id``). This parser delegates +to the shared generic flat-TSV pipeline. +""" + +import os + +from flows.lib.conditional_import import flow +from flows.lib.utils import Parser, run_generic_tsv_parser +from flows.parsers.args import parse_args + + +@flow(log_prints=True) +def parse_sra_data( + working_yaml: str, + work_dir: str, + append: bool = False, + **kwargs, +) -> None: + """Parse SRA accession TSV using YAML schema. + + Args: + working_yaml (str): Path to the YAML configuration file. + work_dir (str): Working directory containing the input TSV. + append (bool): If True, load previous parsed data. + **kwargs: Ignored extra arguments from the wrapper. + """ + run_generic_tsv_parser( + working_yaml=working_yaml, + work_dir=work_dir, + append=append, + key_field="run_accession", + ) + + +def plugin(): + """Register the parser plugin.""" + return Parser( + name="SRA_DATA", + func=parse_sra_data, + description="Parse SRA accession TSV using a YAML schema.", + ) + + +if __name__ == "__main__": + args = parse_args("Parse SRA accession TSV using a YAML schema.") + parse_sra_data( + working_yaml=args.yaml_path, + work_dir=os.path.dirname(args.input_path) or ".", + append=args.append, + ) diff --git a/flows/prefect.yaml b/flows/prefect.yaml index 9f797bd..41bd2db 100644 --- a/flows/prefect.yaml +++ b/flows/prefect.yaml @@ -403,3 +403,175 @@ deployments: schedules: - *weekly work_pool: *goat_data_work_pool + + # ----------------------------------------------------------------------- + # Phase 2 fetch-parse-validate — triggered by Phase 1 update events + # ----------------------------------------------------------------------- + + - name: fetch-parse-validate-blobtoolkit + # Triggered by update.blobtoolkit.finished + entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate + parameters: + parser: "ParserEnum.BLOBTOOLKIT" + yaml_path: "../goat-data-main/sources/btk/btk.types.yaml" + s3_path: "s3://goat/sources/btk/" + work_dir: "/home/ubuntu/tmp/test/assembly-data" + dry_run: true + triggers: + - enabled: true + type: event + match: + prefect.resource.type: blobtoolkit + expect: + - update.blobtoolkit.finished + parameters: + parser: "ParserEnum.BLOBTOOLKIT" + yaml_path: "../goat-data-main/sources/btk/btk.types.yaml" + s3_path: "s3://goat/sources/btk/" + work_dir: "/home/ubuntu/tmp/test/assembly-data" + dry_run: true + work_pool: *goat_data_work_pool + + - name: fetch-parse-validate-refseq-organelles + # Triggered by update.refseq.organelles.finished + entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate + parameters: + parser: "ParserEnum.REFSEQ_ORGANELLES" + yaml_path: "../goat-data-main/sources/assembly-data/refseq_organelles.types.yaml" + s3_path: "s3://goat/sources/assembly-data/" + work_dir: "/home/ubuntu/tmp/test/assembly-data" + dry_run: true + triggers: + - enabled: true + type: event + match: + prefect.resource.type: refseq.organelles + expect: + - update.refseq.organelles.finished + parameters: + parser: "ParserEnum.REFSEQ_ORGANELLES" + yaml_path: "../goat-data-main/sources/assembly-data/refseq_organelles.types.yaml" + s3_path: "s3://goat/sources/assembly-data/" + work_dir: "/home/ubuntu/tmp/test/assembly-data" + dry_run: true + work_pool: *goat_data_work_pool + + - name: fetch-parse-validate-sra-data + # Triggered by update.sra.data.finished + entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate + parameters: + parser: "ParserEnum.SRA_DATA" + yaml_path: "../goat-data-main/sources/sra/sra.types.yaml" + s3_path: "s3://goat/sources/sra/" + work_dir: "/home/ubuntu/tmp/test/assembly-data" + dry_run: true + triggers: + - enabled: true + type: event + match: + prefect.resource.type: sra.data + expect: + - update.sra.data.finished + parameters: + parser: "ParserEnum.SRA_DATA" + yaml_path: "../goat-data-main/sources/sra/sra.types.yaml" + s3_path: "s3://goat/sources/sra/" + work_dir: "/home/ubuntu/tmp/test/assembly-data" + dry_run: true + work_pool: *goat_data_work_pool + + - name: fetch-parse-validate-vgp-status + # Triggered by update.vgp.status.finished + entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate + parameters: + parser: "ParserEnum.SEQUENCING_STATUS" + yaml_path: "../goat-data-main/sources/assembly-data/vgp_phase1.types.yaml" + s3_path: "s3://goat/sources/assembly-data/" + work_dir: "/home/ubuntu/tmp/test/status-lists" + dry_run: true + triggers: + - enabled: true + type: event + match: + prefect.resource.type: vgp.status + expect: + - update.vgp.status.finished + parameters: + parser: "ParserEnum.SEQUENCING_STATUS" + yaml_path: "../goat-data-main/sources/assembly-data/vgp_phase1.types.yaml" + s3_path: "s3://goat/sources/assembly-data/" + work_dir: "/home/ubuntu/tmp/test/status-lists" + dry_run: true + work_pool: *goat_data_work_pool + + - name: fetch-parse-validate-jgi-status + # Triggered by update.jgi.status.finished + entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate + parameters: + parser: "ParserEnum.SEQUENCING_STATUS" + yaml_path: "../goat-data-main/sources/assembly-data/1kfg_manual_bioprojects.types.yaml" + s3_path: "s3://goat/sources/assembly-data/" + work_dir: "/home/ubuntu/tmp/test/assembly-data" + dry_run: true + triggers: + - enabled: true + type: event + match: + prefect.resource.type: jgi.status + expect: + - update.jgi.status.finished + parameters: + parser: "ParserEnum.SEQUENCING_STATUS" + yaml_path: "../goat-data-main/sources/assembly-data/1kfg_manual_bioprojects.types.yaml" + s3_path: "s3://goat/sources/assembly-data/" + work_dir: "/home/ubuntu/tmp/test/assembly-data" + dry_run: true + work_pool: *goat_data_work_pool + + - name: fetch-parse-validate-ucsc-assemblies + # Triggered by update.ucsc.assemblies.finished + entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate + parameters: + parser: "ParserEnum.SEQUENCING_STATUS" + yaml_path: "../goat-data-main/sources/assembly-data/ucsc_ids.types.yaml" + s3_path: "s3://goat/sources/assembly-data/" + work_dir: "/home/ubuntu/tmp/test/assembly-data" + dry_run: true + triggers: + - enabled: true + type: event + match: + prefect.resource.type: ucsc.assemblies + expect: + - update.ucsc.assemblies.finished + parameters: + parser: "ParserEnum.SEQUENCING_STATUS" + yaml_path: "../goat-data-main/sources/assembly-data/ucsc_ids.types.yaml" + s3_path: "s3://goat/sources/assembly-data/" + work_dir: "/home/ubuntu/tmp/test/assembly-data" + dry_run: true + work_pool: *goat_data_work_pool + + - name: fetch-parse-validate-nhm-status + # Triggered by update.nhm.status.finished (legacy NHM updater) + entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate + parameters: + parser: "ParserEnum.SEQUENCING_STATUS" + yaml_path: "../goat-data-main/sources/status-lists/FILE_DTOL_nhm.types.yaml" + s3_path: "s3://goat/sources/status-lists/" + work_dir: "/home/ubuntu/tmp/test/status-lists" + dry_run: true + triggers: + - enabled: true + type: event + match: + prefect.resource.type: nhm.status + expect: + - update.nhm.status.finished + parameters: + parser: "ParserEnum.SEQUENCING_STATUS" + yaml_path: "../goat-data-main/sources/status-lists/FILE_DTOL_nhm.types.yaml" + s3_path: "s3://goat/sources/status-lists/" + work_dir: "/home/ubuntu/tmp/test/status-lists" + dry_run: true + work_pool: *goat_data_work_pool diff --git a/flows/updaters/update_blobtoolkit.py b/flows/updaters/update_blobtoolkit.py index 56056d0..cd3c78b 100644 --- a/flows/updaters/update_blobtoolkit.py +++ b/flows/updaters/update_blobtoolkit.py @@ -252,19 +252,21 @@ def update_blobtoolkit( """ if not is_safe_path(output_path): raise ValueError(f"Unsafe output path: {output_path}") - os.makedirs(output_path, exist_ok=True) + + resolved_path = os.path.abspath(output_path) + os.makedirs(resolved_path, exist_ok=True) row_count, file_count = fetch_blobtoolkit( - output_path, min_records=min_records + resolved_path, min_records=min_records ) if s3_path: - upload_s3_files(output_path, s3_path) + upload_s3_files(resolved_path, s3_path) emit_event( event="update.blobtoolkit.finished", resource={ - "prefect.resource.id": f"update.btk.{output_path}", + "prefect.resource.id": f"update.btk.{resolved_path}", "prefect.resource.type": "blobtoolkit", }, payload={"row_count": row_count, "file_count": file_count}, diff --git a/flows/updaters/update_ensembl_metadata.py b/flows/updaters/update_ensembl_metadata.py index 87da171..a4b3c7a 100644 --- a/flows/updaters/update_ensembl_metadata.py +++ b/flows/updaters/update_ensembl_metadata.py @@ -179,10 +179,12 @@ def update_ensembl_metadata( """ if not is_safe_path(output_path): raise ValueError(f"Unsafe output path: {output_path}") - os.makedirs(output_path, exist_ok=True) + + resolved_path = os.path.abspath(output_path) + os.makedirs(resolved_path, exist_ok=True) div = EnsemblDivision(division.lower()) - local_file, row_count = fetch_ensembl_division(div, output_path) + local_file, row_count = fetch_ensembl_division(div, resolved_path) if s3_path: output_name = DIVISION_OUTPUT_NAMES[div] @@ -192,7 +194,7 @@ def update_ensembl_metadata( emit_event( event="update.ensembl.metadata.finished", resource={ - "prefect.resource.id": f"update.ensembl.{division}.{output_path}", + "prefect.resource.id": f"update.ensembl.{division}.{resolved_path}", "prefect.resource.type": "ensembl.metadata", "prefect.resource.division": division, }, diff --git a/flows/updaters/update_google_sheets_status.py b/flows/updaters/update_google_sheets_status.py index f75228c..0d257bb 100644 --- a/flows/updaters/update_google_sheets_status.py +++ b/flows/updaters/update_google_sheets_status.py @@ -387,7 +387,9 @@ def update_google_sheets_status( """ if not is_safe_path(output_path): raise ValueError(f"Unsafe output path: {output_path}") - os.makedirs(output_path, exist_ok=True) + + resolved_path = os.path.abspath(output_path) + os.makedirs(resolved_path, exist_ok=True) if index_url is None: index_url = os.environ.get("GOAT_SHEETS_INDEX_URL", "") @@ -395,12 +397,12 @@ def update_google_sheets_status( total = 0 project_results = {} if index_url: - project_results = fetch_project_status_sheets(index_url, output_path) + project_results = fetch_project_status_sheets(index_url, resolved_path) total += sum(project_results.values()) else: print("No index URL provided — skipping project status sheets") - other_results = fetch_other_sheets(output_path) + other_results = fetch_other_sheets(resolved_path) total += sum(other_results.values()) if total < min_records: @@ -409,12 +411,12 @@ def update_google_sheets_status( ) if s3_path: - upload_s3_dir(output_path, s3_path) + upload_s3_dir(resolved_path, s3_path) emit_event( event="update.google.sheets.status.finished", resource={ - "prefect.resource.id": f"update.google.sheets.status.{output_path}", + "prefect.resource.id": f"update.google.sheets.status.{resolved_path}", "prefect.resource.type": "google.sheets.status", }, payload={ diff --git a/flows/updaters/update_ucsc_assemblies.py b/flows/updaters/update_ucsc_assemblies.py index e31ffb0..0233951 100644 --- a/flows/updaters/update_ucsc_assemblies.py +++ b/flows/updaters/update_ucsc_assemblies.py @@ -67,9 +67,11 @@ def update_ucsc_assemblies( """ if not is_safe_path(output_path): raise ValueError(f"Unsafe output path: {output_path}") - os.makedirs(output_path, exist_ok=True) - local_file, line_count = fetch_ucsc_hub_list(output_path) + resolved_path = os.path.abspath(output_path) + os.makedirs(resolved_path, exist_ok=True) + + local_file, line_count = fetch_ucsc_hub_list(resolved_path) if s3_path: remote_path = f"{s3_path.rstrip('/')}/{OUTPUT_FILENAME}" @@ -78,7 +80,7 @@ def update_ucsc_assemblies( emit_event( event="update.ucsc.assemblies.finished", resource={ - "prefect.resource.id": f"update.ucsc.{output_path}", + "prefect.resource.id": f"update.ucsc.{resolved_path}", "prefect.resource.type": "ucsc.assemblies", }, payload={"line_count": line_count}, From 6cbe6889e28cf3225b19e66e0a03b365917d11ab Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Wed, 6 May 2026 11:29:47 +0100 Subject: [PATCH 06/18] restore sra updater taxon root to eukaryota --- flows/updaters/update_sra_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flows/updaters/update_sra_data.py b/flows/updaters/update_sra_data.py index cedb523..6840f6b 100644 --- a/flows/updaters/update_sra_data.py +++ b/flows/updaters/update_sra_data.py @@ -315,7 +315,7 @@ def upload_s3_file(local_path: str, s3_path: str) -> None: def update_sra_data( output_path: str, input_path: str = None, - root_taxid: str = "9612", + root_taxid: str = "2759", s3_path: str = None, min_records: int = 0, ) -> bool: @@ -371,7 +371,7 @@ def update_sra_data( [ required(OUTPUT_PATH), INPUT_PATH, - default(ROOT_TAXID, "9612"), + default(ROOT_TAXID, "2759"), S3_PATH, MIN_RECORDS, ], From c22204d7804e841ed4c26ffed9437c15157d90aa Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Thu, 7 May 2026 11:47:09 +0100 Subject: [PATCH 07/18] adds a script to investigate individual parsers --- scripts/run_parse_validate_tests.py | 330 ++++++++++++++++++++++++++++ 1 file changed, 330 insertions(+) create mode 100755 scripts/run_parse_validate_tests.py diff --git a/scripts/run_parse_validate_tests.py b/scripts/run_parse_validate_tests.py new file mode 100755 index 0000000..1942a9f --- /dev/null +++ b/scripts/run_parse_validate_tests.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python3 +"""Test wrapper: run parsers on example TSVs and compare against S3 sources. + +Usage: + python scripts/run_parse_validate_tests.py [--parser PARSER] [--verbose] + python scripts/run_parse_validate_tests.py --all + +This script: +1. Discovers parsers and example TSV inputs +2. Maps examples to YAML configs (from goat-data/sources) +3. Runs each parser and validates output +4. Compares output columns & line counts vs S3 source versions +5. Generates a test report + +Example files should be organized as: + - tsv_examples/*.tsv or *.tsv.gz + - tsv_examples//*.tsv or *.tsv.gz + +S3 source files are mirrored in: + - goat-data_s3_sources/{assembly-data,status-lists,sra,btk,conservation,genomesize-karyotype,uk-legislation}/imported/*.tsv +""" + +import argparse +import gzip +import json +import os +import subprocess +import sys +import tempfile +from csv import DictReader +from pathlib import Path +from typing import Optional, Tuple, List, Dict + +# Set SKIP_PREFECT before any imports from flows +os.environ["SKIP_PREFECT"] = "true" + +# Use absolute import paths +sys.path.insert(0, str(Path(__file__).parent.parent)) + + +PARSER_INPUT_MAPPING = { + # parser_name -> (example_input, yaml_config, comparison_file) + "SEQUENCING_STATUS": [ + ("tsv_examples/vgp.tsv", "../goat-data/sources/assembly-data/vgp_phase1.types.yaml", "vgp_phase1.tsv"), + ("tsv_examples/AEGIS_expanded.tsv", "../goat-data/sources/status-lists/FILE_AEGIS.types.yaml", None), # no S3 source + ], + "REFSEQ_ORGANELLES": [ + ("tsv_examples/organelle_test.tsv", "../goat-data/sources/assembly-data/refseq_organelles.types.yaml", "refseq_organelles.tsv"), + ], + "BLOBTOOLKIT": [ + ("tsv_examples/blobtoolkit_test_results.tsv/btk.tsv.gz", "../goat-data/sources/btk/btk.types.yaml", "btk.tsv"), + ], + "SRA_DATA": [ + ("tsv_examples/sra.tsv", "../goat-data/sources/sra/sra.types.yaml", "sra.tsv"), + ], + "GENOMESIZE_KARYOTYPE": [ + ("tsv_examples/gsheets_test/DTOL_Plant_Genome_Size_Estimates.tsv", "../goat-data/sources/genomesize-karyotype/FILE_DTOL_Plant_Genome_Size_Estimates.types.yaml", None), + ], +} + +S3_SOURCES_ROOT = Path(__file__).parent.parent.parent / "goat-data_s3_sources" + + +def load_tsv_headers_and_count(path: str) -> Tuple[List[str], int]: + """Load TSV headers and line count (excluding header).""" + if path.endswith(".gz"): + fh = gzip.open(path, "rt", encoding="utf-8", newline="") + else: + fh = open(path, "rt", encoding="utf-8", newline="") + + try: + reader = DictReader(fh, delimiter="\t") + headers = reader.fieldnames or [] + count = sum(1 for _ in reader) + return list(headers), count + finally: + fh.close() + + +def find_s3_source(expected_name: str) -> Optional[str]: + """Locate the S3 source file for a parser output.""" + # Search in all subdirectories + for root, dirs, files in os.walk(S3_SOURCES_ROOT): + for f in files: + if f == expected_name or f == f"{expected_name}.gz": + return os.path.join(root, f) + return None + + +def run_parser( + parser_name: str, + input_path: str, + yaml_path: str, + work_dir: str, +) -> Tuple[bool, str, Optional[str]]: + """Run a parser and return (success, output_file, error_msg).""" + try: + import shutil + + # Copy input to work_dir with a renamed prefix to avoid conflicting with output names + input_abs = Path(input_path).resolve() + work_path = Path(work_dir) + # Rename to avoid output file conflicts (e.g., input btk.tsv.gz vs output btk.tsv.gz) + work_input = work_path / f"_input_{input_abs.name}" + + if not work_input.exists(): + shutil.copy2(str(input_abs), str(work_input)) + + # Construct the Python module path from parser name + module_name = "flows.parsers." + "parse_" + parser_name.lower().replace("_", "_") + + cmd = [ + sys.executable, + "-m", + module_name, + "-i", + str(work_input), + "-y", + yaml_path, + ] + + env = os.environ.copy() + env["SKIP_PREFECT"] = "true" + + result = subprocess.run( + cmd, + cwd=Path(__file__).parent.parent, + capture_output=True, + text=True, + timeout=60, + env=env, + ) + + if result.returncode != 0: + return False, None, f"Parser failed: {result.stderr}" + + # Find the output file in work_dir (excluding the input file and any _input_* files) + output_files = list(work_path.glob("*.tsv")) + list(work_path.glob("*.tsv.gz")) + output_files = [f for f in output_files if not f.name.startswith("_input_")] + + if not output_files: + return False, None, "No output file generated" + + output_file = str(output_files[0]) + return True, output_file, None + + except subprocess.TimeoutExpired: + return False, None, "Parser timeout" + except Exception as e: + return False, None, str(e) + + +def compare_outputs( + parsed_output: str, + s3_source: str, +) -> Dict[str, any]: + """Compare parsed output against S3 source.""" + try: + parsed_headers, parsed_count = load_tsv_headers_and_count(parsed_output) + s3_headers, s3_count = load_tsv_headers_and_count(s3_source) + + headers_match = set(parsed_headers) == set(s3_headers) + headers_extra = set(parsed_headers) - set(s3_headers) + headers_missing = set(s3_headers) - set(parsed_headers) + + count_diff = abs(parsed_count - s3_count) + count_pct_diff = 100.0 * count_diff / max(s3_count, 1) + + return { + "headers_match": headers_match, + "headers_extra": list(headers_extra), + "headers_missing": list(headers_missing), + "parsed_count": parsed_count, + "s3_count": s3_count, + "count_diff": count_diff, + "count_pct_diff": count_pct_diff, + "line_counts_similar": count_pct_diff < 10, # Allow 10% variance + } + except Exception as e: + return {"error": str(e)} + + +def run_tests(parser_name: Optional[str] = None, verbose: bool = False) -> int: + """Run tests for specified parser(s) and compare outputs.""" + # Determine which parsers to test + if parser_name: + parsers_to_test = [parser_name.upper()] + if parser_name.upper() not in PARSER_INPUT_MAPPING: + print(f"Error: Parser {parser_name} not configured in PARSER_INPUT_MAPPING") + return 1 + else: + parsers_to_test = list(PARSER_INPUT_MAPPING.keys()) + + results = {} + + for pname in parsers_to_test: + print(f"\n{'='*70}") + print(f"Testing parser: {pname}") + print(f"{'='*70}") + + if pname not in PARSER_INPUT_MAPPING: + print(f" ⚠️ No test configuration found") + continue + + test_configs = PARSER_INPUT_MAPPING[pname] + parser_results = [] + + for input_path, yaml_path, comparison_file in test_configs: + input_abs = Path(__file__).parent.parent / input_path + + if not input_abs.exists(): + print(f" ⚠️ Input not found: {input_path}") + parser_results.append({"status": "skipped", "reason": "input_not_found"}) + continue + + print(f"\n Input: {input_path}") + print(f" YAML: {yaml_path}") + + # Run parser in temp directory + with tempfile.TemporaryDirectory() as tmpdir: + yaml_abs = Path(__file__).parent.parent / yaml_path + + success, output_file, error = run_parser( + pname, + str(input_abs), + str(yaml_abs), + tmpdir, + ) + + if not success: + print(f" ❌ Parser failed: {error}") + parser_results.append({"status": "failed", "error": error}) + continue + + print(f" ✓ Parser succeeded") + output_headers, output_count = load_tsv_headers_and_count(output_file) + print(f" Output: {os.path.basename(output_file)} ({output_count} rows, {len(output_headers)} cols)") + + # Compare against S3 source if available + if comparison_file: + s3_source = find_s3_source(comparison_file) + + if s3_source: + print(f" Comparing against S3 source: {comparison_file}") + comparison = compare_outputs(output_file, s3_source) + + if "error" in comparison: + print(f" ⚠️ Comparison failed: {comparison['error']}") + else: + s3_headers, s3_count = load_tsv_headers_and_count(s3_source) + print(f" S3 source: {s3_count} rows, {len(s3_headers)} cols") + + if comparison["headers_match"]: + print(f" ✓ Headers match") + else: + print(f" ❌ Headers mismatch:") + if comparison["headers_extra"]: + print(f" Extra: {comparison['headers_extra']}") + if comparison["headers_missing"]: + print(f" Missing: {comparison['headers_missing']}") + + if comparison["line_counts_similar"]: + print(f" ✓ Line counts similar (~{comparison['count_pct_diff']:.1f}% diff)") + else: + print(f" ⚠️ Line counts differ substantially:") + print(f" Parsed: {comparison['parsed_count']}, S3: {comparison['s3_count']} ({comparison['count_pct_diff']:.1f}% diff)") + + parser_results.append({ + "status": "success", + "comparison": comparison, + }) + else: + print(f" ℹ️ No S3 source found for {comparison_file}") + parser_results.append({ + "status": "success", + "comparison": None, + }) + else: + parser_results.append({ + "status": "success", + "comparison": None, + }) + + results[pname] = parser_results + + # Summary + print(f"\n{'='*70}") + print("Summary") + print(f"{'='*70}") + + all_passed = True + for pname, presults in results.items(): + passed = sum(1 for r in presults if r.get("status") == "success" and (r.get("comparison") is None or r["comparison"].get("headers_match") and r["comparison"].get("line_counts_similar"))) + total = len(presults) + status = "✓" if passed == total else "❌" + print(f"{status} {pname}: {passed}/{total} passed") + if passed < total: + all_passed = False + + return 0 if all_passed else 1 + + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--parser", + help="Test a specific parser by name (e.g., SEQUENCING_STATUS)", + default=None, + ) + parser.add_argument( + "--all", + action="store_true", + help="Run all configured tests", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Print verbose output", + ) + + args = parser.parse_args() + + sys.exit(run_tests(parser_name=args.parser, verbose=args.verbose)) + + +if __name__ == "__main__": + main() From 6c9bd01ec0d88e28b0d581d36d23a621ea7f6232 Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Fri, 8 May 2026 15:41:35 +0100 Subject: [PATCH 08/18] fix tolqc updater --- flows/updaters/update_google_sheets_status.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flows/updaters/update_google_sheets_status.py b/flows/updaters/update_google_sheets_status.py index 0d257bb..8a5b891 100644 --- a/flows/updaters/update_google_sheets_status.py +++ b/flows/updaters/update_google_sheets_status.py @@ -230,6 +230,7 @@ def _fetch_dtol_tolqc_status(output_path: str) -> int: ) df.columns = ( df.columns.str.strip() + .str.replace(".", "") .str.replace(" ", "_") .str.replace(r"\(", "", regex=True) .str.replace(r"\)", "", regex=True) From 4aea5877a89546b57582ae703f100d891146fec1 Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Mon, 11 May 2026 15:17:07 +0100 Subject: [PATCH 09/18] fixes fetching for EBP schema and corrects logic for status lists Co-authored-by: Copilot --- flows/updaters/update_google_sheets_status.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/flows/updaters/update_google_sheets_status.py b/flows/updaters/update_google_sheets_status.py index 8a5b891..e9d4c2f 100644 --- a/flows/updaters/update_google_sheets_status.py +++ b/flows/updaters/update_google_sheets_status.py @@ -55,7 +55,7 @@ def _open_google_spreadsheet( - acronym: str, url: str, header_index: int + acronym: str, url: str, header_index: str ) -> pd.DataFrame: """Download a published Google Sheet as TSV and return a DataFrame.""" encodings = ["utf-8", "ISO-8859-1", "latin1"] @@ -114,7 +114,7 @@ def _create_mandatory_columns(df: pd.DataFrame) -> pd.DataFrame: "target_list_status", "sequencing_status", ]: if col not in df.columns: - df[col] = np.nan + df[col] = None return df @@ -122,7 +122,7 @@ def _expand_target_status(df: pd.DataFrame, acronym: str) -> pd.DataFrame: """Populate long_list, family_representative, other_priority columns.""" for col in ["long_list", "family_representative", "other_priority"]: if col not in df.columns: - df[col] = np.nan + df[col] = None df["long_list"] = acronym lower = acronym.lower() @@ -164,7 +164,7 @@ def _create_status_columns(df: pd.DataFrame, acronym: str) -> pd.DataFrame: ] for s in statuses: if s not in df.columns: - df[s] = np.nan + df[s] = None df.loc[df["sequencing_status"] == s, s] = acronym return df @@ -176,12 +176,13 @@ def _expand_sequencing_status(df: pd.DataFrame, acronym: str) -> pd.DataFrame: df.loc[df["open"] == acronym, "in_progress"] = acronym df.loc[df["data_generation"] == acronym, "in_progress"] = acronym df.loc[df["in_assembly"] == acronym, "in_progress"] = acronym + df.loc[df["in_progress"] == acronym, "data_generation"] = acronym df.loc[df["in_progress"] == acronym, "sample_acquired"] = acronym df.loc[df["sample_acquired"] == acronym, "sample_collected"] = acronym return df -def _process_project(acronym: str, url: str, header_row: int) -> pd.DataFrame: +def _process_project(acronym: str, url: str, header_row: str) -> pd.DataFrame: """Full processing pipeline for one project status sheet.""" df = _open_google_spreadsheet(acronym, url, header_row) df = _general_cleanup(df) @@ -292,6 +293,7 @@ def fetch_project_status_sheets( usecols=["project_acronym", "published_url", "start_header_line"], dtype={"project_acronym": str, "published_url": str, "start_header_line": int}, ) + print(f"Found {len(index_df)} project sheets in index") results = {} for _, row in index_df.iterrows(): @@ -310,6 +312,7 @@ def fetch_project_status_sheets( failed_path = os.path.join(output_dir, f"{acronym}_expanded.tsv.failed") open(failed_path, "w").close() # noqa: SIM115 — legacy compat results[acronym] = 0 + print(results) return results From 6b2d55f0af087a74cda4165dc1bcbe6186c1cbae Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Wed, 13 May 2026 18:07:54 +0100 Subject: [PATCH 10/18] corrects the updaters for VGP and distinguish between legacy versus current source Co-authored-by: Copilot --- flows/prefect.yaml | 17 +- flows/updaters/update_vgp_original_status.py | 92 +++++++ flows/updaters/update_vgp_status.py | 265 ++++++++++++++++--- 3 files changed, 339 insertions(+), 35 deletions(-) create mode 100644 flows/updaters/update_vgp_original_status.py diff --git a/flows/prefect.yaml b/flows/prefect.yaml index 41bd2db..1cae1d3 100644 --- a/flows/prefect.yaml +++ b/flows/prefect.yaml @@ -269,12 +269,23 @@ deployments: # Phase 1 updaters — external data fetching (migrated from goat-data) # ----------------------------------------------------------------------- + - name: update-vgp-original-status + # Fetch VGP original status list from GitHub YAML tracker (monthly) + entrypoint: flows/updaters/update_vgp_original_status.py:update_vgp_original_status + parameters: + output_path: "/home/ubuntu/tmp/test/status-lists/vgp_original_status.tsv" + s3_path: s3://goat/resources/status-lists/vgp_original_status.tsv + min_records: 100 + schedules: + - *monthly + work_pool: *goat_data_work_pool + - name: update-vgp-status - # Fetch VGP status list from GitHub YAML tracker + # Fetch VGP Ordinal Phase1+ status from the live Google Sheet entrypoint: flows/updaters/update_vgp_status.py:update_vgp_status parameters: - output_path: "/home/ubuntu/tmp/test/status-lists/vgp_status.tsv" - s3_path: s3://goat/resources/status-lists/vgp_status.tsv + output_path: "/home/ubuntu/tmp/test/status-lists/VGP_Ordinal_Phase1_plus.tsv" + s3_path: s3://goat/resources/status-lists/VGP_Ordinal_Phase1_plus.tsv min_records: 100 schedules: - *weekly diff --git a/flows/updaters/update_vgp_original_status.py b/flows/updaters/update_vgp_original_status.py new file mode 100644 index 0000000..63462ee --- /dev/null +++ b/flows/updaters/update_vgp_original_status.py @@ -0,0 +1,92 @@ +import os + +from flows.lib.conditional_import import emit_event, flow, task +from flows.lib.shared_args import MIN_RECORDS, OUTPUT_PATH, S3_PATH, parse_args, required +from flows.lib.utils import upload_to_s3 +from flows.updaters.api import api_config as cfg +from flows.updaters.api import api_tools as at + + +@task(retries=2, retry_delay_seconds=5, log_prints=True) +def fetch_vgp_original_tsv( + file_path: str, + min_lines: int = 1, +) -> int: + """Fetch VGP original status list from the Vertebrate Genomes Project GitHub tracker. + + Downloads the VGP genome portal YAML tracker, extracts species records, + and writes a TSV with per-species status fields. + + Args: + file_path (str): Path to the output TSV file. + min_lines (int): Minimum number of rows expected. + + Returns: + int: Number of lines written to the output file. + """ + at.get_from_source( + cfg.vgl_url_opener, + cfg.vgl_hub_count_handler, + cfg.vgl_row_handler, + cfg.vgl_fieldnames, + file_path, + ) + + with open(file_path, "r") as f: + line_count = sum(1 for _ in f) + + if line_count < min_lines: + raise RuntimeError( + f"VGP file {file_path} has fewer than {min_lines} lines: {line_count}" + ) + print(f"Wrote {line_count} lines to {file_path}") + return line_count + + +@task(log_prints=True) +def upload_s3_tsv(local_path: str, s3_path: str) -> None: + """Upload VGP TSV to S3.""" + print(f"Uploading VGP TSV from {local_path} to {s3_path}") + upload_to_s3(local_path, s3_path) + + +@flow() +def update_vgp_original_status( + output_path: str, s3_path: str = None, min_records: int = 0 +) -> bool: + """Fetch the VGP original status list and optionally upload to S3. + + This is the scarcely-updated VGP source from the GitHub YAML tracker. + See update_vgp_status.py for the frequently-updated live Google Sheet. + + Args: + output_path (str): Path to the output TSV file. + s3_path (str): Optional S3 path to upload the result. + min_records (int): Minimum record count to accept the output. + + Returns: + bool: True on success. + """ + + resolved_path = os.path.abspath(output_path) + os.makedirs(os.path.dirname(resolved_path), exist_ok=True) + line_count = fetch_vgp_original_tsv(resolved_path, min_records) + if line_count > min_records and s3_path: + upload_s3_tsv(resolved_path, s3_path) + emit_event( + event="update.vgp.original.status.finished", + resource={ + "prefect.resource.id": f"update.vgp.original.{resolved_path}", + "prefect.resource.type": "vgp.original.status", + }, + payload={"line_count": line_count}, + ) + return True + + +if __name__ == "__main__": + args = parse_args( + [required(OUTPUT_PATH), S3_PATH, MIN_RECORDS], + "Fetch VGP original status list from the Vertebrate Genomes Project.", + ) + update_vgp_original_status(**vars(args)) diff --git a/flows/updaters/update_vgp_status.py b/flows/updaters/update_vgp_status.py index 2fc57aa..d2d2f8a 100644 --- a/flows/updaters/update_vgp_status.py +++ b/flows/updaters/update_vgp_status.py @@ -1,52 +1,248 @@ +"""Fetch VGP Ordinal Phase1+ status from the live Google Sheet. + +This updater replaces the legacy ``vgp_live_sheet_curation.py`` script +from goat-data. It downloads the live VGP spreadsheet, cleans headers, +translates project names to canonical acronyms, expands sequencing +status columns following the GoaT status hierarchy, and writes a TSV +matching the ``FILE_VGP_Ordinal_Phase1.types.yaml`` schema. + +The companion ``update_vgp_original_status.py`` fetches the less +frequently updated VGP GitHub YAML tracker source. +""" + +import csv +import io import os +import numpy as np +import pandas as pd + from flows.lib.conditional_import import emit_event, flow, task from flows.lib.shared_args import MIN_RECORDS, OUTPUT_PATH, S3_PATH, parse_args, required -from flows.lib.utils import upload_to_s3 -from flows.updaters.api import api_config as cfg -from flows.updaters.api import api_tools as at +from flows.lib.utils import is_safe_path, safe_get, upload_to_s3 + +# Published (export) link to the VGP Ordinal Phase1+ Google Sheet +VGP_SHEET_URL = ( + "https://docs.google.com/spreadsheets/d/1Jwjv6Kwc6VIn1UMMhnG6kvFCxjwGdC5b7p_HtbDOMOs" + "/export?format=tsv" + "&id=1Jwjv6Kwc6VIn1UMMhnG6kvFCxjwGdC5b7p_HtbDOMOs" + "&gid=1380659438" +) + +# Columns to import from the spreadsheet +SOURCE_COLUMNS = [ + "Order", + "Lineage", + "Superorder", + "Family Scientific Name", + "Scientific Name", + "English Name", + "NCBI taxon ID", + "Status", + "QV", + "IUCN (2016-2024)", + "CITES", + "Main project", + "Second project", +] + +# Map free-text project names to canonical EBP acronyms +PROJECT_ACRONYMS = { + "Sanger 25G": "25GP", + "Sanger 25G project": "25GP", + "AfricaBP": "AFRICABP", + "Cetacean GP": "CGP", + "DToL": "DTOL", + "DToL?": "DTOL", + "Yggdrasil": "YGG", + "CatalanBP": "CBP", + "Canadian Biogenome Project": "CANBP", + "Canada Biogenome Project": "CANBP", + "Threatened Species Initiative (TSI)": "TSI", + "Minderoo OceanOmics": "OG", + "DToL, ERGA": "DTOL,ERGA", + "Amazoomics : Genomics of Brazilian Biodiversity": "AMAZOOMICS,GBB", + "AmaZoomics : Genomics of Brazilian Biodiversity": "AMAZOOMICS,GBB", + "Individual, Google": "Individual,Google", +} + +# Numeric status code → GoaT sequencing status +STATUS_MAP = { + "0": "", + "1": "sample_collected", + "2": "", + "3": "in_progress", + "4": "open", + "5": "open", +} + +# Full ordered list of GoaT sequencing status columns +SEQUENCING_STATUSES = [ + "sample_collected", + "sample_acquired", + "in_progress", + "data_generation", + "in_assembly", + "insdc_submitted", + "open", + "insdc_open", + "published", +] + +# --------------------------------------------------------------------------- +# Processing helpers +# --------------------------------------------------------------------------- + + +def _cleanup_table(df: pd.DataFrame) -> pd.DataFrame: + """Replace whitespace-only cells with NaN, drop empty rows/cols.""" + df = df.replace(r"^\s*$", np.nan, regex=True) + df = df.replace(r"^ +| +$", "", regex=True) + df.dropna(how="all", axis=1, inplace=True) + df.dropna(how="all", axis=0, inplace=True) + return df + + +def _cleanup_headers(df: pd.DataFrame) -> pd.DataFrame: + """Normalise column headers: lowercase, underscored, no parens.""" + df.columns = ( + df.columns.str.replace(" ", "_") + .str.replace(r"\(", "", regex=True) + .str.replace(r"\)", "", regex=True) + .str.lower() + ) + return df + + +def _translate_projects(df: pd.DataFrame) -> pd.DataFrame: + """Map free-text project names to canonical acronyms.""" + for col in ["main_project", "second_project", "project"]: + if col in df.columns: + df[col] = df[col].map( + lambda v: PROJECT_ACRONYMS.get(v, v) if pd.notna(v) else v + ) + return df + + +def _build_all_projects(df: pd.DataFrame) -> pd.DataFrame: + """Create 'all_projects' column from project + main + second.""" + df["all_projects"] = df.apply( + lambda row: ",".join( + sorted( + set( + x + for x in [ + row.get("project"), + row.get("main_project"), + row.get("second_project"), + ] + if pd.notna(x) + ) + ) + ), + axis=1, + ) + return df -@task(retries=2, retry_delay_seconds=5, log_prints=True) -def fetch_vgp_tsv( - file_path: str, - min_lines: int = 1, -) -> int: - """Fetch VGP status list from the Vertebrate Genomes Project GitHub tracker. - Downloads the VGP genome portal YAML tracker, extracts species records, - and writes a TSV with per-species status fields. +def _expand_sequencing_status(df: pd.DataFrame) -> pd.DataFrame: + """Map numeric status codes and cascade the GoaT status hierarchy.""" + # Ensure all status columns exist + for col in SEQUENCING_STATUSES: + if col not in df.columns: + df[col] = None + + # Map numeric codes to status names + df["sequencing_status"] = df["status"].map(STATUS_MAP) + + # Populate status columns with all_projects for matching rows + for status in SEQUENCING_STATUSES: + df.loc[df["sequencing_status"] == status, status] = df["all_projects"] + + # Cascade status hierarchy upward + df.loc[df["published"] == df["all_projects"], "insdc_open"] = df["all_projects"] + df.loc[df["insdc_open"] == df["all_projects"], "open"] = df["all_projects"] + df.loc[df["open"] == df["all_projects"], "in_progress"] = df["all_projects"] + df.loc[df["data_generation"] == df["all_projects"], "in_progress"] = df[ + "all_projects" + ] + df.loc[df["in_assembly"] == df["all_projects"], "in_progress"] = df["all_projects"] + df.loc[df["in_progress"] == df["all_projects"], "data_generation"] = df[ + "all_projects" + ] + df.loc[df["in_progress"] == df["all_projects"], "sample_acquired"] = df[ + "all_projects" + ] + df.loc[df["sample_acquired"] == df["all_projects"], "sample_collected"] = df[ + "all_projects" + ] + return df + + +def _process_vgp_sheet(raw_tsv: str) -> pd.DataFrame: + """Full processing pipeline for the VGP live sheet. Args: - file_path (str): Path to the output TSV file. - min_lines (int): Minimum number of rows expected. + raw_tsv (str): Raw TSV text content from Google Sheets. Returns: - int: Number of lines written to the output file. + pd.DataFrame: Cleaned, expanded DataFrame ready for export. """ - at.get_from_source( - cfg.vgl_url_opener, - cfg.vgl_hub_count_handler, - cfg.vgl_row_handler, - cfg.vgl_fieldnames, - file_path, + df = pd.read_csv( + io.StringIO(raw_tsv), + sep="\t", + dtype=object, + engine="python", + on_bad_lines="warn", + usecols=SOURCE_COLUMNS, ) + df = _cleanup_table(df) + df = _cleanup_headers(df) + df["project"] = "VGP" + df = _translate_projects(df) + df = _build_all_projects(df) + df = _expand_sequencing_status(df) + return df - with open(file_path, "r") as f: - line_count = sum(1 for _ in f) - if line_count < min_lines: +# --------------------------------------------------------------------------- +# Prefect tasks and flow +# --------------------------------------------------------------------------- + + +@task(retries=2, retry_delay_seconds=30, log_prints=True) +def fetch_vgp_live_sheet(output_path: str, min_records: int = 0) -> int: + """Download the VGP Ordinal Phase1+ Google Sheet and write a TSV. + + Args: + output_path (str): Path to the output TSV file. + min_records (int): Minimum number of rows expected. + + Returns: + int: Number of data rows written. + """ + response = safe_get(VGP_SHEET_URL, timeout=120) + response.raise_for_status() + + df = _process_vgp_sheet(response.text) + row_count = len(df) + print(f"VGP live sheet: {row_count} rows after processing") + + if row_count < min_records: raise RuntimeError( - f"VGP file {file_path} has fewer than {min_lines} lines: {line_count}" + f"VGP live sheet has fewer than {min_records} rows: {row_count}" ) - print(f"Wrote {line_count} lines to {file_path}") - return line_count + + df.to_csv(output_path, sep="\t", index=False) + print(f"Wrote {output_path}") + return row_count @task(log_prints=True) def upload_s3_tsv(local_path: str, s3_path: str) -> None: """Upload VGP TSV to S3.""" - print(f"Uploading VGP TSV from {local_path} to {s3_path}") + print(f"Uploading {local_path} to {s3_path}") upload_to_s3(local_path, s3_path) @@ -54,7 +250,7 @@ def upload_s3_tsv(local_path: str, s3_path: str) -> None: def update_vgp_status( output_path: str, s3_path: str = None, min_records: int = 0 ) -> bool: - """Fetch the VGP status list and optionally upload to S3. + """Fetch the VGP Ordinal Phase1+ live sheet and optionally upload to S3. Args: output_path (str): Path to the output TSV file. @@ -64,19 +260,24 @@ def update_vgp_status( Returns: bool: True on success. """ + if not is_safe_path(output_path): + raise ValueError(f"Unsafe output path: {output_path}") resolved_path = os.path.abspath(output_path) os.makedirs(os.path.dirname(resolved_path), exist_ok=True) - line_count = fetch_vgp_tsv(resolved_path, min_records) - if line_count > min_records and s3_path: + + row_count = fetch_vgp_live_sheet(resolved_path, min_records) + + if row_count > min_records and s3_path: upload_s3_tsv(resolved_path, s3_path) + emit_event( event="update.vgp.status.finished", resource={ "prefect.resource.id": f"update.vgp.{resolved_path}", "prefect.resource.type": "vgp.status", }, - payload={"line_count": line_count}, + payload={"row_count": row_count}, ) return True @@ -84,6 +285,6 @@ def update_vgp_status( if __name__ == "__main__": args = parse_args( [required(OUTPUT_PATH), S3_PATH, MIN_RECORDS], - "Fetch VGP status list from the Vertebrate Genomes Project.", + "Fetch VGP Ordinal Phase1+ status from the live Google Sheet.", ) update_vgp_status(**vars(args)) From f92500a9965c3945531c095a6928f6da029d8b88 Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Thu, 28 May 2026 09:19:52 +0100 Subject: [PATCH 11/18] create local tests and round of flow adjustment --- flows/lib/local_fetch_parse_validate.py | 266 ++++++++++++++++++ .../batch_validate_status_lists.py | 153 ++++++++++ flows/prefect.yaml | 48 +++- scripts/TESTING_README.md | 121 ++++++++ scripts/run_parse_validate_tests.py | 2 +- 5 files changed, 577 insertions(+), 13 deletions(-) create mode 100644 flows/lib/local_fetch_parse_validate.py create mode 100644 flows/orchestrators/batch_validate_status_lists.py create mode 100644 scripts/TESTING_README.md diff --git a/flows/lib/local_fetch_parse_validate.py b/flows/lib/local_fetch_parse_validate.py new file mode 100644 index 0000000..e1e4757 --- /dev/null +++ b/flows/lib/local_fetch_parse_validate.py @@ -0,0 +1,266 @@ +"""Local fetch-parse-validate wrapper for testing without S3. + +This mirrors the production wrapper_fetch_parse_validate.py but: +- Skips S3 fetch (uses a local input TSV directly) +- Copies YAML + input TSV into work_dir with expected names +- Runs the selected parser (or SKIP_PARSING) +- Runs blobtk validate locally (no S3 upload) + +Usage: + SKIP_PREFECT=true python -m flows.lib.local_fetch_parse_validate \ + -p SKIP_PARSING \ + --yaml-path ../goat-data/sources/status-lists/FILE_VGP_Ordinal_Phase1.types.yaml \ + --input-tsv tsv_examples/VGP_Ordinal_Phase1_plus.tsv \ + --work-dir /tmp/test-vgp + + SKIP_PREFECT=true python -m flows.lib.local_fetch_parse_validate \ + -p REFSEQ_ORGANELLES \ + --yaml-path ../goat-data/sources/assembly-data/refseq_organelles.types.yaml \ + --input-tsv tsv_examples/refseq_organelles.tsv \ + --work-dir /tmp/test-refseq +""" + +import argparse +import os +import shutil +import subprocess +import sys + +from flows.lib.conditional_import import flow +from flows.lib.utils import enum_action, load_config +from flows.parsers.register import register_plugins +from flows.validators.validate_file_pair import validate_file_pair + +PARSERS = register_plugins() + + +def _check_blobtk(): + """Verify blobtk is available on PATH.""" + try: + subprocess.run( + ["blobtk", "--version"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + except FileNotFoundError: + print( + "[local] WARNING: 'blobtk' not found on PATH. " + "Validation step will be skipped.\n" + " Install via: cd blobtk/rust && cargo build --release && " + "export PATH=$PWD/target/release:$PATH", + file=sys.stderr, + ) + return False + return True + + +def _copy_yaml_to_workdir(yaml_path: str, work_dir: str) -> str: + """Copy the YAML and its dependencies into work_dir. + + Returns: + str: Path to the copied YAML in work_dir. + """ + os.makedirs(work_dir, exist_ok=True) + config = load_config(yaml_path) + dest = os.path.join(work_dir, os.path.basename(yaml_path)) + shutil.copy(yaml_path, dest) + + # Copy dependency YAML files (e.g. "needs:" references) + if "needs" in config.config.get("file", {}): + source_dir = os.path.dirname(yaml_path) + needs = config.config["file"]["needs"] + if isinstance(needs, str): + needs = [needs] + for dep in needs: + dep_path = os.path.join(source_dir, dep) + if os.path.exists(dep_path): + shutil.copy(dep_path, work_dir) + return dest + + +def _place_input_tsv(input_tsv: str, yaml_path: str, work_dir: str) -> str: + """Copy or symlink the input TSV into work_dir with the name expected by the YAML. + + Handles gzip: if YAML expects .gz but input is plain, compress on copy. + If YAML expects plain but input is .gz, decompress on copy. + + Returns: + str: Path to the TSV in work_dir. + """ + import gzip as gzip_mod + + config = load_config(yaml_path) + expected_name = os.path.basename(config.config["file"]["name"]) + dest = os.path.join(work_dir, expected_name) + + # If input already matches expected location, skip + if os.path.abspath(input_tsv) == os.path.abspath(dest): + return dest + + expects_gz = expected_name.endswith(".gz") + input_is_gz = input_tsv.endswith(".gz") + + if expects_gz and not input_is_gz: + # Compress plain input into .gz destination + with open(input_tsv, "rb") as f_in, gzip_mod.open(dest, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + elif not expects_gz and input_is_gz: + # Decompress .gz input into plain destination + with gzip_mod.open(input_tsv, "rb") as f_in, open(dest, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + else: + # Same format — straight copy + shutil.copy(input_tsv, dest) + return dest + + +@flow(log_prints=True) +def local_fetch_parse_validate( + parser: str, + yaml_path: str, + input_tsv: str, + work_dir: str, + taxdump_path: str = None, + append: bool = False, + min_valid: int = 0, + min_assigned: int = 0, +) -> bool: + """Run the parse-validate pipeline locally without S3. + + Args: + parser: Parser enum name (e.g. "SKIP_PARSING", "REFSEQ_ORGANELLES"). + yaml_path: Path to the source YAML configuration file. + input_tsv: Path to the input TSV file from the updater. + work_dir: Working directory for intermediate files. + taxdump_path: Optional path to an NCBI taxdump for taxonomy validation. + append: Whether to run in append mode. + min_valid: Minimum expected valid row count. + min_assigned: Minimum expected assigned taxa count. + + Returns: + bool: True if validation passed. + """ + yaml_path = os.path.abspath(yaml_path) + input_tsv = os.path.abspath(input_tsv) + work_dir = os.path.abspath(work_dir) + + if not os.path.exists(yaml_path): + raise FileNotFoundError(f"YAML file not found: {yaml_path}") + if not os.path.exists(input_tsv): + raise FileNotFoundError(f"Input TSV not found: {input_tsv}") + + print(f"[local] Parser: {parser}") + print(f"[local] YAML: {yaml_path}") + print(f"[local] Input: {input_tsv}") + print(f"[local] Work: {work_dir}") + + # Step 1: Copy YAML to work_dir + working_yaml = _copy_yaml_to_workdir(yaml_path, work_dir) + print(f"[local] Copied YAML → {working_yaml}") + + # Step 2: Place input TSV with expected filename + tsv_dest = _place_input_tsv(input_tsv, yaml_path, work_dir) + print(f"[local] Input TSV → {tsv_dest}") + + # Step 3: Run parser + parser_key = parser.name if hasattr(parser, "name") else str(parser) + file_parser = PARSERS.parsers[parser_key] + print(f"[local] Running parser: {file_parser.name}") + file_parser.func( + working_yaml=working_yaml, + work_dir=work_dir, + append=append, + data_freeze_path=None, + ) + print("[local] Parser completed") + + # Step 4: Validate (no S3 upload — s3_path=None) + if _check_blobtk(): + print("[local] Running validation...") + status = validate_file_pair( + yaml_path=yaml_path, + work_dir=work_dir, + taxdump_path=taxdump_path, + s3_path=None, + min_valid=min_valid, + min_assigned=min_assigned, + ) + if status: + print("[local] ✓ Validation PASSED") + else: + print("[local] ✗ Validation FAILED") + else: + print("[local] ⚠ Validation SKIPPED (blobtk not available)") + status = None + return status + + +def main(): + """CLI entry point.""" + arg_parser = argparse.ArgumentParser( + description="Local fetch-parse-validate (no S3).", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + arg_parser.add_argument( + "-p", "--parser", + required=True, + type=str, + action=enum_action(PARSERS.ParserEnum), + help=f"Parser to use. Choices: {[e.name for e in PARSERS.ParserEnum]}", + ) + arg_parser.add_argument( + "--yaml-path", + required=True, + help="Path to the source YAML configuration file.", + ) + arg_parser.add_argument( + "--input-tsv", + required=True, + help="Path to the input TSV file (from the updater).", + ) + arg_parser.add_argument( + "--work-dir", + default="/tmp/local-fpv", + help="Working directory for intermediate files.", + ) + arg_parser.add_argument( + "--taxdump-path", + default=None, + help="Path to an NCBI taxdump directory.", + ) + arg_parser.add_argument( + "--append", + action="store_true", + help="Run in append mode.", + ) + arg_parser.add_argument( + "--min-valid", + type=int, + default=0, + help="Minimum expected valid row count.", + ) + arg_parser.add_argument( + "--min-assigned", + type=int, + default=0, + help="Minimum expected assigned taxa count.", + ) + + args = arg_parser.parse_args() + success = local_fetch_parse_validate( + parser=args.parser, + yaml_path=args.yaml_path, + input_tsv=args.input_tsv, + work_dir=args.work_dir, + taxdump_path=args.taxdump_path, + append=args.append, + min_valid=args.min_valid, + min_assigned=args.min_assigned, + ) + # Exit 0 if validation passed or was skipped (None), 1 if failed + sys.exit(0 if success is not False else 1) + + +if __name__ == "__main__": + main() diff --git a/flows/orchestrators/batch_validate_status_lists.py b/flows/orchestrators/batch_validate_status_lists.py new file mode 100644 index 0000000..dc118ac --- /dev/null +++ b/flows/orchestrators/batch_validate_status_lists.py @@ -0,0 +1,153 @@ +"""Batch validate all Google Sheets status list TSVs. + +Triggered by update.google.sheets.status.finished, this flow iterates +through all FILE_*.types.yaml in the status-lists directory and runs +SKIP_PARSING + validate for each one whose corresponding TSV is present +in work_dir. +""" + +import os +import sys +from glob import glob +from typing import Optional + +from flows.lib.conditional_import import flow +from flows.lib.utils import load_config +from flows.parsers.register import register_plugins +from flows.validators.validate_file_pair import validate_file_pair + +PARSERS = register_plugins() + + +def _copy_yaml_to_workdir(yaml_path: str, work_dir: str) -> str: + """Copy YAML (and dependencies) to work_dir.""" + import shutil + + os.makedirs(work_dir, exist_ok=True) + dest = os.path.join(work_dir, os.path.basename(yaml_path)) + shutil.copy(yaml_path, dest) + config = load_config(yaml_path) + if "needs" in config.config.get("file", {}): + source_dir = os.path.dirname(yaml_path) + needs = config.config["file"]["needs"] + if isinstance(needs, str): + needs = [needs] + for dep in needs: + dep_path = os.path.join(source_dir, dep) + if os.path.exists(dep_path): + shutil.copy(dep_path, work_dir) + return dest + + +@flow(log_prints=True) +def batch_validate_status_lists( + yaml_dir: str, + work_dir: str, + taxdump_path: Optional[str] = None, + dry_run: bool = False, + s3_path: Optional[str] = None, + min_valid: int = 0, + min_assigned: int = 0, +) -> bool: + """Validate all status list TSVs present in work_dir. + + For each FILE_*.types.yaml in yaml_dir, checks if the corresponding + TSV exists in work_dir. If present, runs SKIP_PARSING (file existence + check) then validate_file_pair. + + Args: + yaml_dir: Directory containing FILE_*.types.yaml files. + work_dir: Directory containing TSVs output by the updater. + taxdump_path: Optional NCBI taxdump path. + dry_run: If True, skip S3 upload. + s3_path: S3 path for validated files (None = local only). + min_valid: Minimum valid row count per file. + min_assigned: Minimum assigned taxa per file. + + Returns: + bool: True if all validations passed. + """ + yaml_files = sorted(glob(os.path.join(yaml_dir, "FILE_*.types.yaml"))) + if not yaml_files: + print(f"No FILE_*.types.yaml found in {yaml_dir}") + return False + + skip_parser = PARSERS.parsers["SKIP_PARSING"] + results = {} + + for yaml_path in yaml_files: + yaml_name = os.path.basename(yaml_path) + try: + config = load_config(yaml_path) + except Exception as e: + print(f" SKIP {yaml_name}: failed to load config — {e}") + results[yaml_name] = "skip-config-error" + continue + + tsv_name = os.path.basename(config.config["file"]["name"]) + tsv_path = os.path.join(work_dir, tsv_name) + + if not os.path.exists(tsv_path): + # TSV not present — updater may not have produced it this run + results[yaml_name] = "skip-no-tsv" + continue + + # Copy YAML to work_dir for validator + working_yaml = _copy_yaml_to_workdir(yaml_path, work_dir) + + # Run SKIP_PARSING (verifies file exists) + try: + skip_parser.func( + working_yaml=working_yaml, + work_dir=work_dir, + append=False, + ) + except FileNotFoundError as e: + print(f" FAIL {yaml_name}: TSV not found after copy — {e}") + results[yaml_name] = "fail-parser" + continue + + # Run validation + effective_s3 = None if dry_run else s3_path + try: + status = validate_file_pair( + yaml_path=yaml_path, + work_dir=work_dir, + taxdump_path=taxdump_path, + s3_path=effective_s3, + min_valid=min_valid, + min_assigned=min_assigned, + ) + results[yaml_name] = "pass" if status else "fail-validation" + if status: + print(f" ✓ {yaml_name}") + else: + print(f" ✗ {yaml_name}") + except Exception as e: + print(f" ✗ {yaml_name}: {e}") + results[yaml_name] = "fail-exception" + + # Summary + passed = sum(1 for v in results.values() if v == "pass") + failed = sum(1 for v in results.values() if v.startswith("fail")) + skipped = sum(1 for v in results.values() if v.startswith("skip")) + print(f"\nBatch validation: {passed} passed, {failed} failed, {skipped} skipped") + + return failed == 0 + + +if __name__ == "__main__": + import argparse + + p = argparse.ArgumentParser(description="Batch validate status list TSVs.") + p.add_argument("--yaml-dir", required=True, help="Directory with FILE_*.types.yaml") + p.add_argument("--work-dir", required=True, help="Directory with updater TSVs") + p.add_argument("--taxdump-path", default=None) + p.add_argument("--s3-path", default=None) + p.add_argument("--dry-run", action="store_true") + p.add_argument("--min-valid", type=int, default=0) + p.add_argument("--min-assigned", type=int, default=0) + args = p.parse_args() + + success = batch_validate_status_lists(**vars(args)) + sys.exit(0 if success else 1) diff --git a/flows/prefect.yaml b/flows/prefect.yaml index 1cae1d3..681b3f9 100644 --- a/flows/prefect.yaml +++ b/flows/prefect.yaml @@ -288,7 +288,7 @@ deployments: s3_path: s3://goat/resources/status-lists/VGP_Ordinal_Phase1_plus.tsv min_records: 100 schedules: - - *weekly + - *daily work_pool: *goat_data_work_pool - name: update-ensembl-metadata-main @@ -412,13 +412,34 @@ deployments: output_path: "/home/ubuntu/tmp/test/status-lists/google-sheets" s3_path: s3://goat/resources/status-lists/google-sheets/ schedules: - - *weekly + - *daily work_pool: *goat_data_work_pool # ----------------------------------------------------------------------- # Phase 2 fetch-parse-validate — triggered by Phase 1 update events # ----------------------------------------------------------------------- + - name: batch-validate-google-sheets-status + # Triggered by update.google.sheets.status.finished + # All Google Sheets status TSVs are already compatible with their YAML schemas + entrypoint: flows/orchestrators/batch_validate_status_lists.py:batch_validate_status_lists + parameters: + yaml_dir: "../goat-data-main/sources/status-lists" + work_dir: "/home/ubuntu/tmp/test/status-lists/google-sheets" + dry_run: true + triggers: + - enabled: true + type: event + match: + prefect.resource.type: google.sheets.status + expect: + - update.google.sheets.status.finished + parameters: + yaml_dir: "../goat-data-main/sources/status-lists" + work_dir: "/home/ubuntu/tmp/test/status-lists/google-sheets" + dry_run: true + work_pool: *goat_data_work_pool + - name: fetch-parse-validate-blobtoolkit # Triggered by update.blobtoolkit.finished entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate @@ -469,9 +490,10 @@ deployments: - name: fetch-parse-validate-sra-data # Triggered by update.sra.data.finished + # SRA updater output already matches YAML schema — skip parsing, validate only entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate parameters: - parser: "ParserEnum.SRA_DATA" + parser: "ParserEnum.SKIP_PARSING" yaml_path: "../goat-data-main/sources/sra/sra.types.yaml" s3_path: "s3://goat/sources/sra/" work_dir: "/home/ubuntu/tmp/test/assembly-data" @@ -484,7 +506,7 @@ deployments: expect: - update.sra.data.finished parameters: - parser: "ParserEnum.SRA_DATA" + parser: "ParserEnum.SKIP_PARSING" yaml_path: "../goat-data-main/sources/sra/sra.types.yaml" s3_path: "s3://goat/sources/sra/" work_dir: "/home/ubuntu/tmp/test/assembly-data" @@ -493,11 +515,12 @@ deployments: - name: fetch-parse-validate-vgp-status # Triggered by update.vgp.status.finished + # VGP Ordinal updater output is a superset of YAML columns — skip parsing, validate only entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate parameters: - parser: "ParserEnum.SEQUENCING_STATUS" - yaml_path: "../goat-data-main/sources/assembly-data/vgp_phase1.types.yaml" - s3_path: "s3://goat/sources/assembly-data/" + parser: "ParserEnum.SKIP_PARSING" + yaml_path: "../goat-data-main/sources/status-lists/FILE_VGP_Ordinal_Phase1.types.yaml" + s3_path: "s3://goat/sources/status-lists/" work_dir: "/home/ubuntu/tmp/test/status-lists" dry_run: true triggers: @@ -508,9 +531,9 @@ deployments: expect: - update.vgp.status.finished parameters: - parser: "ParserEnum.SEQUENCING_STATUS" - yaml_path: "../goat-data-main/sources/assembly-data/vgp_phase1.types.yaml" - s3_path: "s3://goat/sources/assembly-data/" + parser: "ParserEnum.SKIP_PARSING" + yaml_path: "../goat-data-main/sources/status-lists/FILE_VGP_Ordinal_Phase1.types.yaml" + s3_path: "s3://goat/sources/status-lists/" work_dir: "/home/ubuntu/tmp/test/status-lists" dry_run: true work_pool: *goat_data_work_pool @@ -565,9 +588,10 @@ deployments: - name: fetch-parse-validate-nhm-status # Triggered by update.nhm.status.finished (legacy NHM updater) + # NHM updater output is a superset of YAML columns — skip parsing, validate only entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate parameters: - parser: "ParserEnum.SEQUENCING_STATUS" + parser: "ParserEnum.SKIP_PARSING" yaml_path: "../goat-data-main/sources/status-lists/FILE_DTOL_nhm.types.yaml" s3_path: "s3://goat/sources/status-lists/" work_dir: "/home/ubuntu/tmp/test/status-lists" @@ -580,7 +604,7 @@ deployments: expect: - update.nhm.status.finished parameters: - parser: "ParserEnum.SEQUENCING_STATUS" + parser: "ParserEnum.SKIP_PARSING" yaml_path: "../goat-data-main/sources/status-lists/FILE_DTOL_nhm.types.yaml" s3_path: "s3://goat/sources/status-lists/" work_dir: "/home/ubuntu/tmp/test/status-lists" diff --git a/scripts/TESTING_README.md b/scripts/TESTING_README.md new file mode 100644 index 0000000..f6752e8 --- /dev/null +++ b/scripts/TESTING_README.md @@ -0,0 +1,121 @@ +# Parser Testing Wrapper + +## Usage + +Run all parser tests with comparison against S3 sources: + +```bash +python scripts/run_parse_validate_tests.py +``` + +Test a specific parser: + +```bash +python scripts/run_parse_validate_tests.py --parser REFSEQ_ORGANELLES +python scripts/run_parse_validate_tests.py --parser SRA_DATA +python scripts/run_parse_validate_tests.py --parser SEQUENCING_STATUS +``` + +Verbose output: + +```bash +python scripts/run_parse_validate_tests.py --verbose +``` + +## What the Script Does + +1. **Discovers parsers** from `flows/parsers/parse_*.py` +2. **Runs each parser** on example TSV files in `tsv_examples/` +3. **Compares output** to S3 source versions mirrored in `goat-data_s3_sources/` +4. **Validates**: + - Header names match between parsed output and S3 source + - Line counts are similar (within 10% tolerance) +5. **Generates a summary report** showing pass/fail status + +## Test Configuration + +Tests are configured in the `PARSER_INPUT_MAPPING` dict in the script: + +```python +PARSER_INPUT_MAPPING = { + "PARSER_NAME": [ + ("tsv_examples/input.tsv", "path/to/config.types.yaml", "s3_comparison_file.tsv"), + # (input_example, yaml_config, s3_source_name) + ], +} +``` + +- **input_example**: Path to example TSV in `tsv_examples/` +- **yaml_config**: Path to YAML schema (relative to data repo root) +- **s3_source_name**: Filename in `goat-data_s3_sources/` for comparison (or `None`) + +## Current Test Results + +### ✓ Passing (Perfect Match) + +- **REFSEQ_ORGANELLES**: 33,252 rows, 13 cols - matches S3 source exactly +- **SRA_DATA**: 27,606 rows, 8 cols - matches S3 source exactly + +### ✓ Partial Pass (Headers OK) + +- **SEQUENCING_STATUS (vgp)**: Headers match, but example is only a sample (292 vs 1093 rows) — expected +- **SEQUENCING_STATUS (AEGIS)**: Parser succeeds, no S3 source to compare +- **BLOBTOOLKIT**: Headers mostly match (23/25 cols), example input missing 'biosample'/'bioproject' — sample is incomplete + +### ❌ Known Issues + +- **GENOMESIZE_KARYOTYPE**: Genomehubs `write_tsv` error with composite headers (`header: [genus, species]`). This is a YAML schema complexity issue, not a parser bug. + +## Running Parsers Manually + +For direct parser invocation with SKIP_PREFECT: + +```bash +export SKIP_PREFECT=true +python -m flows.parsers.parse_refseq_organelles \ + -i path/to/input.tsv \ + -y path/to/config.types.yaml +``` + +## Adding New Tests + +1. Add example TSV to `tsv_examples/` or `tsv_examples//` +2. Update `PARSER_INPUT_MAPPING` with the new test config +3. Run the script to verify + +Example: + +```python +"MY_PARSER": [ + ("tsv_examples/my_example.tsv", "../goat-data/sources/my-category/my_config.types.yaml", "my_source.tsv"), +] +``` + +## Troubleshooting + +**"No output file generated"**: + +- Check that the YAML config specifies a different output filename than the input +- Verify the input TSV is accessible + +**"Headers mismatch"**: + +- Check if the example input has all expected columns +- Verify the YAML configuration includes all fields +- Example files may be incomplete samples + +**"Parser timeout"**: + +- Increase the timeout in the `run_parser()` function (currently 60 seconds) +- Check for issues in the parser logic or input data + +## Environment + +Requires: + +- `genomehubs >= 2.10.14` +- `boto3` +- `pyyaml` +- `requests` + +Install with: `pip install -q -r requirements.txt` diff --git a/scripts/run_parse_validate_tests.py b/scripts/run_parse_validate_tests.py index 1942a9f..052355e 100755 --- a/scripts/run_parse_validate_tests.py +++ b/scripts/run_parse_validate_tests.py @@ -41,7 +41,7 @@ PARSER_INPUT_MAPPING = { # parser_name -> (example_input, yaml_config, comparison_file) "SEQUENCING_STATUS": [ - ("tsv_examples/vgp.tsv", "../goat-data/sources/assembly-data/vgp_phase1.types.yaml", "vgp_phase1.tsv"), + ("tsv_examples/ATLASEA_expanded.tsv", "../goat-data/sources/assembly-data/FILE_ATLASEA.types.yaml", "ATLASEA_expanded.tsv"), ("tsv_examples/AEGIS_expanded.tsv", "../goat-data/sources/status-lists/FILE_AEGIS.types.yaml", None), # no S3 source ], "REFSEQ_ORGANELLES": [ From 46a7359ff26423318a143aa61b7aa9ed50865d86 Mon Sep 17 00:00:00 2001 From: Cibele Sotero-Caio Date: Thu, 28 May 2026 11:26:57 +0100 Subject: [PATCH 12/18] remove unecessary parsers and update plans --- docs/plans/README.md | 27 +++ .../plans/v1-initial-plan.md | 0 docs/plans/v2-current-plan.md | 172 ++++++++++++++++++ .../batch_validate_status_lists.py | 78 ++------ flows/prefect.yaml | 14 +- 5 files changed, 226 insertions(+), 65 deletions(-) create mode 100644 docs/plans/README.md rename plan-goatPipelineMigration.prompt.md => docs/plans/v1-initial-plan.md (100%) create mode 100644 docs/plans/v2-current-plan.md diff --git a/docs/plans/README.md b/docs/plans/README.md new file mode 100644 index 0000000..e830dde --- /dev/null +++ b/docs/plans/README.md @@ -0,0 +1,27 @@ +# Migration plans + +Versioned plans for the GoaT data import pipeline migration. The newest +version is the source of truth; older versions are preserved verbatim so the +evolution of the plan stays auditable. + +## Current + +- [v2-current-plan.md](v2-current-plan.md) — Phase 1 complete, Phase 2 in + progress (cleanup tasks tracked), Phases 3–5 outlined. + +## History + +- [v1-initial-plan.md](v1-initial-plan.md) — original plan written at project + kickoff. Defines the five-phase framing, the gap analysis, and the + network-robustness / logging / conventions reference that v2 still relies on. + +## Versioning convention + +- The newest plan is always named `vN-current-plan.md`. +- When a new revision lands, the previous `current` file is renamed to + `vN-.md` (e.g. `v1-initial-plan.md`) and a new + `v(N+1)-current-plan.md` is added. +- Historical plans are never edited after archival — corrections go into the + new version's change log. +- Each new plan ends with a `Change log` section summarizing what changed + versus the previous version. diff --git a/plan-goatPipelineMigration.prompt.md b/docs/plans/v1-initial-plan.md similarity index 100% rename from plan-goatPipelineMigration.prompt.md rename to docs/plans/v1-initial-plan.md diff --git a/docs/plans/v2-current-plan.md b/docs/plans/v2-current-plan.md new file mode 100644 index 0000000..eb4e4c2 --- /dev/null +++ b/docs/plans/v2-current-plan.md @@ -0,0 +1,172 @@ +# GoaT Data Import Pipeline Migration Plan — v2 (current) + +> **Status as of this revision:** Phase 1 complete · Phase 2 mostly complete (cleanup tasks remaining) · Phases 3–5 not yet started. +> +> For the original framing and historical context see [v1-initial-plan.md](v1-initial-plan.md). + +## TL;DR + +Phase 1 (external data fetching) and the bulk of Phase 2 (YAML-backed parsers + validation) are now implemented. Every legacy fetch job from `goat-data/.github/workflows/fetch-resources.yml` has a corresponding Prefect updater, and every source directory that needs transformation has either a parser or a `SKIP_PARSING` assignment plus a YAML config. What remains in Phase 2 is targeted cleanup: confirm parser-vs-skip assignments, ensure every status-list YAML can be validated end-to-end locally, get `blobtk` on the worker PATH, and finalize a couple of YAML schemas. After that, Phase 3 cuts the legacy import over to `s3://goat/validated/`. + +## Status at a glance + +| Phase | State | Notes | +| ------------------------------------ | -------------- | ----------------------------------------------------------------------------------- | +| Phase 1 — External data fetching | ✅ Complete | 18 updaters deployed (see inventory below). | +| Phase 2 — Parsers & validation | 🔄 In progress | 11 parsers + fetch-parse-validate wrapper landed. Cleanup tasks tracked below. | +| Phase 3 — Switch legacy import | ⬜ Not started | Gated on Phase 2 cleanup + a parity comparison between `validated/` and `sources/`. | +| Phase 4 — Replace `genomehubs index` | ⬜ Future | Requires new import code; out of scope for this revision. | +| Phase 5 — Full pipeline migration | ⬜ Future | Replaces remaining GitHub Actions workflows (release / init / index / fill / test). | + +--- + +## Phase 1 — External Data Fetching (COMPLETE) + +Every external fetch job from the legacy workflow now has a Prefect-backed updater that writes raw data to `s3://goat/resources/...` and emits an `update.*.finished` event. + +**Updater inventory (`flows/updaters/`):** + +- `update_ncbi_datasets.py` +- `update_ncbi_taxonomy.py` +- `update_ena_taxonomy_extra.py` +- `update_genomehubs_taxonomy.py` +- `update_tolid_prefixes.py` +- `update_ott_taxonomy.py` +- `update_tol_portal_status.py` +- `update_tol_genome_notes.py` +- `update_nhm_status_list.py` +- `update_boat_config.py` +- `update_vgp_status.py` +- `update_vgp_original_status.py` +- `update_jgi_status.py` +- `update_ensembl_metadata.py` +- `update_ucsc_assemblies.py` +- `update_sra_data.py` +- `update_blobtoolkit.py` +- `update_refseq_organelles.py` +- `update_google_sheets_status.py` + +Shared helpers: `tol_utils.py`, `flows/updaters/api/`, `flows/lib/utils.py`, `flows/lib/shared_args.py`. + +All deployments are wired in `flows/prefect.yaml`. There are no remaining Phase 1 items. + +--- + +## Phase 2 — YAML-Backed Parsers & Validation (IN PROGRESS) + +### What is in place + +**Parsers (`flows/parsers/`):** + +| Parser | Handles | +| --------------------------------------- | ------------------------------------------------------------------------ | +| `parse_ncbi_assemblies.py` | NCBI Datasets + data-freeze assembly TSVs. | +| `parse_refseq_organelles.py` | NCBI RefSeq mitochondrion / plastid GenBank → pivoted TSV. | +| `parse_sequencing_status.py` | JGI 1KFG (and any other status list whose source format needs pivoting). | +| `parse_blobtoolkit.py` | BlobToolKit analysis exports (stub; see cleanup). | +| `parse_sra_data.py` | SRA metadata TSV. | +| `parse_genomesize_karyotype.py` | Generic genomesize / karyotype `FILE_` sources. | +| `parse_conservation.py` | CITES + conservation sources. | +| `parse_legislation.py` | UK legislation FILE\_ sources. | +| `parse_skip_parsing.py` | Pass-through for inputs that already match their YAML schema. | +| `parse_backfill_historical_versions.py` | Historical assembly version backfill. | + +Discovery is automatic via `flows/parsers/register.py` (any `parse_*.py` is picked up). `Parser` enum members serialize to lowercase (e.g. `skip_parsing`) but the `PARSERS.parsers` dict is keyed by `Parser.name` (uppercase, e.g. `SKIP_PARSING`). + +**Wrappers, validators, and orchestrators:** + +- `flows/lib/wrapper_fetch_parse_validate.py` — production fetch → parse → validate → S3 upload pipeline. +- `flows/lib/local_fetch_parse_validate.py` — local equivalent: copies the YAML + TSV into a work directory, runs the parser, runs `validate_file_pair` with `s3_path=None`, and gracefully skips validation if the `blobtk` binary is not on PATH. Handles plain ↔ gz conversion so the input file matches the YAML's `file.name`. Handles the lowercase-enum / uppercase-dict-key mismatch when looking up parsers. +- `flows/lib/validate_file_pair.py` — wraps the `blobtk validate` Rust binary. +- `flows/orchestrators/batch_validate_status_lists.py` — triggered by `update.google.sheets.status.finished`. Iterates every `FILE_*.types.yaml` under `goat-data/sources/status-lists/`, calls the standard `fetch_parse_validate(parser=Parser.SKIP_PARSING, …)` for each TSV present, and reports pass / fail / skip-no-tsv / skip-config-error counts. CLI flags: `--yaml-dir --work-dir --taxdump-path --s3-path --dry-run --min-valid --min-assigned`. + +**Other lib modules in current use:** `conditional_import.py`, `fetch_genomehubs_target_list.py`, `fetch_previous_file_pair.py`, `for_each_record.py`, `index_assembly_features.py`, `process_features.py`, `shared_args.py`, `shared_tasks.py`, `utils.py`. + +### Parser ↔ source assignment audit (current) + +| Source directory | Deployment | Parser | +| -------------------------------------------- | ------------------------------------- | ---------------------- | +| `assembly-data/ncbi_datasets` | `fpv-ncbi-datasets` | `NCBI_ASSEMBLIES` | +| `assembly-data/data_freeze` | `fpv-data-freeze` | `NCBI_ASSEMBLIES` | +| `assembly-data/refseq_organelles` | `fpv-refseq-organelles` | `REFSEQ_ORGANELLES` | +| `assembly-data/ucsc` | `fpv-ucsc` | `SKIP_PARSING` | +| `btk/` | `fpv-blobtoolkit` | `SKIP_PARSING` | +| `sra/` | `fpv-sra` | `SKIP_PARSING` | +| `status-lists/vgp` (FILE_VGP_Ordinal_Phase1) | `fpv-vgp` | `SKIP_PARSING` | +| `status-lists/nhm` | `fpv-nhm` | `SKIP_PARSING` | +| `status-lists/jgi_1kfg` | `fpv-jgi` | `SEQUENCING_STATUS` | +| `status-lists/google_sheets/*` | `batch-validate-google-sheets-status` | `SKIP_PARSING` (batch) | + +Rationale for `SKIP_PARSING` on BTK and UCSC: `blobtk validate` can derive the taxonomy columns from a `taxon_id` column automatically, so no pre-parse transformation is required. The YAML schema is the source of truth. + +### Phase 2 cleanup — remaining work + +1. **`blobtk` on worker PATH.** Validation currently no-ops locally on the developer machine because the binary is not installed. Add it to the worker image (and document a local install option) so `local_fetch_parse_validate.py` reports real validation outcomes instead of skipping. +2. **`BLOBTOOLKIT` parser placeholder.** `parse_blobtoolkit.py` exists but is a thin pass-through. Decide whether to keep `SKIP_PARSING` permanently for `btk/` (current production setting) or graduate to a real parser once the BTK API export gains structured fields the YAML cannot describe. +3. **`GENOMESIZE_KARYOTYPE` schema confirmation.** `parse_genomesize_karyotype.py` is generic, but a handful of `FILE_` sources still need their YAMLs cross-checked against the parser's column expectations. Walk every YAML under `goat-data/sources/genomesize-karyotype/` and run `local_fetch_parse_validate.py` once per file. +4. **JGI YAML.** Confirm `sources/status-lists/jgi_1kfg/jgi_1kfg.types.yaml` matches the columns emitted by `update_jgi_status` after the OAuth pagination rewrite. +5. **End-to-end parity check.** Run the batch validator (`batch_validate_status_lists.py`) over all current `status-lists/` YAMLs locally and record the pass / fail / skip rates. Fix anything that fails before Phase 3. + +### Verification (Phase 2) + +1. `python -m flows.lib.local_fetch_parse_validate --yaml … --tsv …` returns exit 0 for every (parser, source) pair in the table above. +2. `python -m flows.orchestrators.batch_validate_status_lists --dry-run` lists every `FILE_*.types.yaml` under `status-lists/` with the expected parser assignment. +3. `prefect deploy --prefect-file flows/prefect.yaml --all` succeeds and the trigger for `batch-validate-google-sheets-status` shows `update.google.sheets.status.finished`. + +--- + +## Phase 3 — Switch Legacy Import to Validated Data (NOT STARTED) + +Unchanged from v1. Recap: + +1. Confirm parity between `s3://goat/validated/` and `s3://goat/sources/` per directory. +2. Update `goat-data/.github/workflows/genomehubs-index.yml` to read from `validated/`. +3. Disable fetch jobs in `goat-data/.github/workflows/fetch-resources.yml` and skip them from `s3_release.yml`. +4. Test release; compare ES indices, API tests, UI tests. +5. Staged rollout: assembly-data first, then status-lists, then the rest. + +Rollback path: revert the S3 path in the workflow — `sources/` and `resources/` remain intact. + +## Phase 4 — Replace Legacy Import (FUTURE) + +Unchanged from v1. Requires the new import code (skips re-validation/lookup, reads validated TSV/YAML pairs directly into Elasticsearch). + +## Phase 5 — Full Pipeline Migration (FUTURE) + +Unchanged from v1. Move ES init, indexing, fill, test, and release promotion out of GitHub Actions into Prefect. + +--- + +## Implemented surface area (snapshot) + +- **Parsers:** 11 (see Phase 2 table). +- **Updaters:** 19 (Phase 1 inventory). +- **Orchestrators:** 5 — `batch_validate_status_lists`, `tasks`, `tol_data_pipeline`, `tol_genome_notes_orchestration`, `tol_portal_status_orchestration`. +- **Lib modules:** 12 — `conditional_import`, `fetch_genomehubs_target_list`, `fetch_previous_file_pair`, `for_each_record`, `index_assembly_features`, `local_fetch_parse_validate`, `process_features`, `shared_args`, `shared_tasks`, `utils`, `validate_file_pair`, `wrapper_fetch_parse_validate`. +- **Deployments in `flows/prefect.yaml`:** 34 (including the new `batch-validate-google-sheets-status`). + +## Reference material carried forward from v1 + +The following sections of [v1-initial-plan.md](v1-initial-plan.md) remain authoritative and have not been duplicated here: + +- **Gap analysis** — historical record of which legacy jobs needed updaters. Now fully implemented. +- **Network robustness review** — `safe_get()` hardening guidance, per-source timeout table, paginated-API partial-failure handling, idempotency / freshness checks, S3 upload atomicity, connection pooling, DNS / TLS handling. +- **Logging review** — `log_progress()` helper proposal, network-call summaries, output-file summaries, event-emission logging, exception context, Docker orchestrator logging. +- **Conventions reference** — YAML/TSV pair conventions, repo coding conventions, list of legacy code bug risks to avoid. + +These are general-purpose engineering guidance and apply to any future updater or parser work. + +--- + +## Change log + +**v1 → v2 (this revision):** + +- Marked Phase 1 complete; replaced the "missing updaters" table with the implemented inventory. +- Marked Phase 2 mostly complete; added the parser-vs-source assignment audit table. +- Added the **Phase 2 cleanup** section enumerating the remaining items (blobtk PATH, BLOBTOOLKIT parser decision, GENOMESIZE_KARYOTYPE schema sweep, JGI YAML, end-to-end parity). +- Documented `flows/lib/local_fetch_parse_validate.py` and `flows/orchestrators/batch_validate_status_lists.py` (both new since v1). +- Recorded the BTK and UCSC `SKIP_PARSING` decision (auto-taxonomy in `blobtk validate`). +- Recorded the VGP YAML correction (`FILE_VGP_Ordinal_Phase1.types.yaml`). +- Phases 3–5 unchanged. +- Network-robustness, logging, and conventions sections kept in v1 by reference rather than duplicated. diff --git a/flows/orchestrators/batch_validate_status_lists.py b/flows/orchestrators/batch_validate_status_lists.py index dc118ac..29863bd 100644 --- a/flows/orchestrators/batch_validate_status_lists.py +++ b/flows/orchestrators/batch_validate_status_lists.py @@ -1,9 +1,9 @@ -"""Batch validate all Google Sheets status list TSVs. +"""Batch fetch-parse-validate for all Google Sheets status list TSVs. Triggered by update.google.sheets.status.finished, this flow iterates through all FILE_*.types.yaml in the status-lists directory and runs -SKIP_PARSING + validate for each one whose corresponding TSV is present -in work_dir. +the full fetch-parse-validate pipeline (with SKIP_PARSING) for each one +whose corresponding TSV is present in work_dir. """ import os @@ -13,54 +13,31 @@ from flows.lib.conditional_import import flow from flows.lib.utils import load_config -from flows.parsers.register import register_plugins -from flows.validators.validate_file_pair import validate_file_pair - -PARSERS = register_plugins() - - -def _copy_yaml_to_workdir(yaml_path: str, work_dir: str) -> str: - """Copy YAML (and dependencies) to work_dir.""" - import shutil - - os.makedirs(work_dir, exist_ok=True) - dest = os.path.join(work_dir, os.path.basename(yaml_path)) - shutil.copy(yaml_path, dest) - config = load_config(yaml_path) - if "needs" in config.config.get("file", {}): - source_dir = os.path.dirname(yaml_path) - needs = config.config["file"]["needs"] - if isinstance(needs, str): - needs = [needs] - for dep in needs: - dep_path = os.path.join(source_dir, dep) - if os.path.exists(dep_path): - shutil.copy(dep_path, work_dir) - return dest +from flows.lib.wrapper_fetch_parse_validate import Parser, fetch_parse_validate @flow(log_prints=True) def batch_validate_status_lists( yaml_dir: str, work_dir: str, + s3_path: str = "s3://goat/sources/status-lists/", taxdump_path: Optional[str] = None, dry_run: bool = False, - s3_path: Optional[str] = None, min_valid: int = 0, min_assigned: int = 0, ) -> bool: - """Validate all status list TSVs present in work_dir. + """Run fetch-parse-validate (SKIP_PARSING) for all status list TSVs in work_dir. For each FILE_*.types.yaml in yaml_dir, checks if the corresponding - TSV exists in work_dir. If present, runs SKIP_PARSING (file existence - check) then validate_file_pair. + TSV exists in work_dir. If present, invokes the standard + fetch_parse_validate flow with SKIP_PARSING. Args: yaml_dir: Directory containing FILE_*.types.yaml files. work_dir: Directory containing TSVs output by the updater. + s3_path: S3 path prefix for validated files. taxdump_path: Optional NCBI taxdump path. dry_run: If True, skip S3 upload. - s3_path: S3 path for validated files (None = local only). min_valid: Minimum valid row count per file. min_assigned: Minimum assigned taxa per file. @@ -72,7 +49,6 @@ def batch_validate_status_lists( print(f"No FILE_*.types.yaml found in {yaml_dir}") return False - skip_parser = PARSERS.parsers["SKIP_PARSING"] results = {} for yaml_path in yaml_files: @@ -92,46 +68,28 @@ def batch_validate_status_lists( results[yaml_name] = "skip-no-tsv" continue - # Copy YAML to work_dir for validator - working_yaml = _copy_yaml_to_workdir(yaml_path, work_dir) - - # Run SKIP_PARSING (verifies file exists) - try: - skip_parser.func( - working_yaml=working_yaml, - work_dir=work_dir, - append=False, - ) - except FileNotFoundError as e: - print(f" FAIL {yaml_name}: TSV not found after copy — {e}") - results[yaml_name] = "fail-parser" - continue - - # Run validation - effective_s3 = None if dry_run else s3_path try: - status = validate_file_pair( + fetch_parse_validate( + parser=Parser.SKIP_PARSING, yaml_path=yaml_path, + s3_path=s3_path, work_dir=work_dir, taxdump_path=taxdump_path, - s3_path=effective_s3, + dry_run=dry_run, min_valid=min_valid, min_assigned=min_assigned, ) - results[yaml_name] = "pass" if status else "fail-validation" - if status: - print(f" ✓ {yaml_name}") - else: - print(f" ✗ {yaml_name}") + results[yaml_name] = "pass" + print(f" ✓ {yaml_name}") except Exception as e: print(f" ✗ {yaml_name}: {e}") - results[yaml_name] = "fail-exception" + results[yaml_name] = "fail" # Summary passed = sum(1 for v in results.values() if v == "pass") - failed = sum(1 for v in results.values() if v.startswith("fail")) + failed = sum(1 for v in results.values() if v == "fail") skipped = sum(1 for v in results.values() if v.startswith("skip")) - print(f"\nBatch validation: {passed} passed, {failed} failed, {skipped} skipped") + print(f"\nBatch fetch-parse-validate: {passed} passed, {failed} failed, {skipped} skipped") return failed == 0 diff --git a/flows/prefect.yaml b/flows/prefect.yaml index 681b3f9..3cc647f 100644 --- a/flows/prefect.yaml +++ b/flows/prefect.yaml @@ -421,11 +421,12 @@ deployments: - name: batch-validate-google-sheets-status # Triggered by update.google.sheets.status.finished - # All Google Sheets status TSVs are already compatible with their YAML schemas + # Runs fetch-parse-validate (SKIP_PARSING) for each status list TSV entrypoint: flows/orchestrators/batch_validate_status_lists.py:batch_validate_status_lists parameters: yaml_dir: "../goat-data-main/sources/status-lists" work_dir: "/home/ubuntu/tmp/test/status-lists/google-sheets" + s3_path: "s3://goat/sources/status-lists/" dry_run: true triggers: - enabled: true @@ -437,14 +438,16 @@ deployments: parameters: yaml_dir: "../goat-data-main/sources/status-lists" work_dir: "/home/ubuntu/tmp/test/status-lists/google-sheets" + s3_path: "s3://goat/sources/status-lists/" dry_run: true work_pool: *goat_data_work_pool - name: fetch-parse-validate-blobtoolkit # Triggered by update.blobtoolkit.finished + # Skip parsing — blobtk validate handles taxonomy column detection entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate parameters: - parser: "ParserEnum.BLOBTOOLKIT" + parser: "ParserEnum.SKIP_PARSING" yaml_path: "../goat-data-main/sources/btk/btk.types.yaml" s3_path: "s3://goat/sources/btk/" work_dir: "/home/ubuntu/tmp/test/assembly-data" @@ -457,7 +460,7 @@ deployments: expect: - update.blobtoolkit.finished parameters: - parser: "ParserEnum.BLOBTOOLKIT" + parser: "ParserEnum.SKIP_PARSING" yaml_path: "../goat-data-main/sources/btk/btk.types.yaml" s3_path: "s3://goat/sources/btk/" work_dir: "/home/ubuntu/tmp/test/assembly-data" @@ -564,9 +567,10 @@ deployments: - name: fetch-parse-validate-ucsc-assemblies # Triggered by update.ucsc.assemblies.finished + # Skip parsing — YAML column mapping can be adjusted later if needed entrypoint: flows/lib/wrapper_fetch_parse_validate.py:fetch_parse_validate parameters: - parser: "ParserEnum.SEQUENCING_STATUS" + parser: "ParserEnum.SKIP_PARSING" yaml_path: "../goat-data-main/sources/assembly-data/ucsc_ids.types.yaml" s3_path: "s3://goat/sources/assembly-data/" work_dir: "/home/ubuntu/tmp/test/assembly-data" @@ -579,7 +583,7 @@ deployments: expect: - update.ucsc.assemblies.finished parameters: - parser: "ParserEnum.SEQUENCING_STATUS" + parser: "ParserEnum.SKIP_PARSING" yaml_path: "../goat-data-main/sources/assembly-data/ucsc_ids.types.yaml" s3_path: "s3://goat/sources/assembly-data/" work_dir: "/home/ubuntu/tmp/test/assembly-data" From 31a134c1cd61304f409c22bba1539dda0eb7f387 Mon Sep 17 00:00:00 2001 From: Richard Challis Date: Mon, 8 Jun 2026 15:38:28 +0100 Subject: [PATCH 13/18] update flows to fix test deployment issues --- flows/lib/conditional_import.py | 3 +- flows/lib/fetch_previous_file_pair.py | 9 +- flows/lib/shared_args.py | 23 ++++- flows/prefect.yaml | 19 +++-- flows/updaters/update_blobtoolkit.py | 32 +++---- flows/updaters/update_ensembl_metadata.py | 64 +++++--------- flows/updaters/update_google_sheets_status.py | 85 +++++++++---------- flows/updaters/update_jgi_status.py | 29 ++++--- flows/updaters/update_nhm_status_list.py | 12 +-- flows/updaters/update_ott_taxonomy.py | 24 +++--- flows/updaters/update_refseq_organelles.py | 53 ++++++------ flows/updaters/update_sra_data.py | 38 +++------ 12 files changed, 194 insertions(+), 197 deletions(-) diff --git a/flows/lib/conditional_import.py b/flows/lib/conditional_import.py index ada08c4..89f0184 100644 --- a/flows/lib/conditional_import.py +++ b/flows/lib/conditional_import.py @@ -35,5 +35,6 @@ def emit_event(*_, **__): from prefect.events import emit_event from prefect.runtime.task_run import run_count +NO_CACHE = NO_CACHE -__all__ = ["flow", "task", "emit_event", "run_count", "skip_prefect", "NO_CACHE"] +__all__ = ["flow", "task", "emit_event", "run_count", "skip_prefect"] diff --git a/flows/lib/fetch_previous_file_pair.py b/flows/lib/fetch_previous_file_pair.py index d0acbeb..cbb070b 100644 --- a/flows/lib/fetch_previous_file_pair.py +++ b/flows/lib/fetch_previous_file_pair.py @@ -112,13 +112,16 @@ def copy_yaml_files(yaml_path: str, config: Config, work_dir: str) -> None: # Copy any dependencies to the working directory if "needs" in config.config["file"]: source_dir = os.path.dirname(yaml_path) - for file in config.config["file"]["needs"]: + needs = config.config["file"]["needs"] + if not isinstance(needs, list): + needs = [needs] + for file in needs: file_path = os.path.join(source_dir, file) shutil.copy(file_path, work_dir) @flow() -def fetch_previous_file_pair(yaml_path: str, s3_path: str, work_dir: str) -> None: +def fetch_previous_file_pair(yaml_path: str, s3_path: str, work_dir: str) -> bool: """ Fetch the previous YAML/TSV files and compare headers. @@ -128,7 +131,7 @@ def fetch_previous_file_pair(yaml_path: str, s3_path: str, work_dir: str) -> Non work_dir (str): Path to the working directory. """ config = utils.load_config(yaml_path) - (local_file, remote_file) = get_filenames(config, s3_path, work_dir) + local_file, remote_file = get_filenames(config, s3_path, work_dir) line_count = fetch_tsv_file(remote_file, local_file) copy_yaml_files(yaml_path, config, work_dir) status = compare_headers(config, local_file) diff --git a/flows/lib/shared_args.py b/flows/lib/shared_args.py index 4d5d801..4f9f4fb 100644 --- a/flows/lib/shared_args.py +++ b/flows/lib/shared_args.py @@ -57,6 +57,16 @@ }, } +DIVISION = { + "flags": ["--division"], + "keys": { + "help": "Ensembl division (fungi, metazoa, plants, protists, rapid, vertebrates).", + "type": str, + "default": "vertebrates", + }, +} + + DRY_RUN = { "flags": ["-d", "--dry_run"], "keys": { @@ -94,6 +104,15 @@ "keys": {"help": "Type of index to fetch.", "type": str}, } +INDEX_URL = { + "flags": ["--index_url"], + "keys": { + "help": "URL to the private index TSV listing project sheets.", + "type": str, + }, +} + + INPUT_PATH = { "flags": ["-i", "--input_path"], "keys": {"help": "Path to the input file.", "type": str}, @@ -197,9 +216,7 @@ } -def default( - arg: Dict[str, Any], default: Union[int, float, bool, str] -) -> Dict[str, Any]: +def default(arg: Dict[str, Any], default: Union[int, float, bool, str]) -> Dict[str, Any]: """Return an argument with a default value.""" # append/replace the default value to the help message default_re = r"\s*\(default: .*\)" diff --git a/flows/prefect.yaml b/flows/prefect.yaml index 3cc647f..b846a2b 100644 --- a/flows/prefect.yaml +++ b/flows/prefect.yaml @@ -358,7 +358,6 @@ deployments: parameters: output_path: "/home/ubuntu/tmp/test/assembly-data/ucsc_assemblies.tsv" s3_path: s3://goat/resources/assembly-data/ucsc_assemblies.tsv - min_records: 100 schedules: - *weekly work_pool: *goat_data_work_pool @@ -371,7 +370,12 @@ deployments: s3_path: s3://goat/resources/assembly-data/jgi_1kfg_status.tsv schedules: - *weekly - work_pool: *goat_data_work_pool + work_pool: + name: goat-data + work_queue_name: default + job_variables: + env: + JGI_OFFLINE_TOKEN: "{{ $JGI_OFFLINE_TOKEN }}" - name: update-sra-data # Fetch SRA data via NCBI E-utilities @@ -381,16 +385,19 @@ deployments: s3_path: s3://goat/resources/assembly-data/sra_data.tsv.gz schedules: - *weekly - work_pool: *goat_data_work_pool + work_pool: + name: goat-data + work_queue_name: default + job_variables: + env: + NCBI_API_KEY: "{{ $NCBI_API_KEY }}" - name: update-blobtoolkit # Fetch BlobToolKit analysis data via API entrypoint: flows/updaters/update_blobtoolkit.py:update_blobtoolkit parameters: output_path: "/home/ubuntu/tmp/test/assembly-data/btk.tsv.gz" - files_output_path: "/home/ubuntu/tmp/test/assembly-data/btk.files.yaml" s3_path: s3://goat/resources/assembly-data/btk.tsv.gz - s3_files_path: s3://goat/resources/assembly-data/btk.files.yaml schedules: - *weekly work_pool: *goat_data_work_pool @@ -410,6 +417,8 @@ deployments: entrypoint: flows/updaters/update_google_sheets_status.py:update_google_sheets_status parameters: output_path: "/home/ubuntu/tmp/test/status-lists/google-sheets" + # This needs to be set to the URL of a TSV file containing the list of Google Sheets to fetch + index_url: "" s3_path: s3://goat/resources/status-lists/google-sheets/ schedules: - *daily diff --git a/flows/updaters/update_blobtoolkit.py b/flows/updaters/update_blobtoolkit.py index cd3c78b..6912a0f 100644 --- a/flows/updaters/update_blobtoolkit.py +++ b/flows/updaters/update_blobtoolkit.py @@ -1,9 +1,7 @@ import contextlib import csv import gzip -import json import os -import time from flows.lib.conditional_import import emit_event, flow, task from flows.lib.shared_args import MIN_RECORDS, OUTPUT_PATH, S3_PATH, parse_args, required @@ -146,14 +144,9 @@ def _describe_files(meta: dict) -> list: "analysis": { "name": "BlobToolKit", "title": f"BlobToolKit analysis of {accession}", - "description": ( - f"Analysis of public assembly {accession} " - f"using BlobToolKit" - ), + "description": (f"Analysis of public assembly {accession} " f"using BlobToolKit"), "source": "BlobToolKit", - "source_url": ( - f"https://blobtoolkit.genomehubs.org/view/dataset/{dataset_id}" - ), + "source_url": (f"https://blobtoolkit.genomehubs.org/view/dataset/{dataset_id}"), }, } ) @@ -184,9 +177,7 @@ def fetch_blobtoolkit( print(f"Found {len(datasets)} datasets") if len(datasets) < min_records: - raise RuntimeError( - f"BlobToolKit returned fewer than {min_records} datasets: {len(datasets)}" - ) + raise RuntimeError(f"BlobToolKit returned fewer than {min_records} datasets: {len(datasets)}") tsv_path = os.path.join(output_dir, "btk.tsv") gz_path = os.path.join(output_dir, "btk.tsv.gz") @@ -202,9 +193,7 @@ def fetch_blobtoolkit( all_files.extend(files) with open(tsv_path, "w", newline="") as f: - writer = csv.DictWriter( - f, fieldnames=TSV_FIELDNAMES, delimiter="\t", lineterminator="\n" - ) + writer = csv.DictWriter(f, fieldnames=TSV_FIELDNAMES, delimiter="\t", lineterminator="\n") writer.writeheader() for row in all_rows: writer.writerow(row) @@ -237,7 +226,7 @@ def upload_s3_files(output_dir: str, s3_path: str) -> None: @flow() def update_blobtoolkit( output_path: str, - s3_path: str = None, + s3_path: str, min_records: int = 0, ) -> bool: """Fetch BlobToolKit analysis data and optionally upload to S3. @@ -254,14 +243,17 @@ def update_blobtoolkit( raise ValueError(f"Unsafe output path: {output_path}") resolved_path = os.path.abspath(output_path) + filename = "btk.tsv.gz" + if ".tsv" in resolved_path: + filename = os.path.basename(resolved_path) + resolved_path = os.path.dirname(resolved_path) os.makedirs(resolved_path, exist_ok=True) - row_count, file_count = fetch_blobtoolkit( - resolved_path, min_records=min_records - ) + row_count, file_count = fetch_blobtoolkit(resolved_path, min_records=min_records) if s3_path: - upload_s3_files(resolved_path, s3_path) + upload_s3_files(f"{resolved_path}/{filename}", s3_path) + upload_s3_files(f"{resolved_path}/btk.files.yaml", s3_path) emit_event( event="update.blobtoolkit.finished", diff --git a/flows/updaters/update_ensembl_metadata.py b/flows/updaters/update_ensembl_metadata.py index a4b3c7a..8001372 100644 --- a/flows/updaters/update_ensembl_metadata.py +++ b/flows/updaters/update_ensembl_metadata.py @@ -1,17 +1,17 @@ import csv import gzip -import json import os from enum import Enum from flows.lib.conditional_import import emit_event, flow, task -from flows.lib.shared_args import OUTPUT_PATH, S3_PATH, parse_args, required +from flows.lib.shared_args import DIVISION, OUTPUT_PATH, S3_PATH, parse_args, required from flows.lib.utils import is_safe_path, safe_get, upload_to_s3 class EnsemblDivision(Enum): """Supported Ensembl genome database divisions.""" + ENSEMBL = "ensembl" FUNGI = "fungi" METAZOA = "metazoa" PLANTS = "plants" @@ -21,33 +21,21 @@ class EnsemblDivision(Enum): DIVISION_URLS = { - EnsemblDivision.FUNGI: ( - "http://ftp.ensemblgenomes.org/pub/current/fungi/" - "species_metadata_EnsemblFungi.json" - ), + EnsemblDivision.ENSEMBL: ("https://ftp.ensembl.org/pub/current/" "species_metadata_Ensembl.json"), + EnsemblDivision.FUNGI: ("http://ftp.ensemblgenomes.org/pub/current/fungi/" "species_metadata_EnsemblFungi.json"), EnsemblDivision.METAZOA: ( - "http://ftp.ensemblgenomes.org/pub/current/metazoa/" - "species_metadata_EnsemblMetazoa.json" - ), - EnsemblDivision.PLANTS: ( - "http://ftp.ensemblgenomes.org/pub/current/plants/" - "species_metadata_EnsemblPlants.json" + "http://ftp.ensemblgenomes.org/pub/current/metazoa/" "species_metadata_EnsemblMetazoa.json" ), + EnsemblDivision.PLANTS: ("http://ftp.ensemblgenomes.org/pub/current/plants/" "species_metadata_EnsemblPlants.json"), EnsemblDivision.PROTISTS: ( - "http://ftp.ensemblgenomes.org/pub/current/protists/" - "species_metadata_EnsemblProtists.json" - ), - EnsemblDivision.RAPID: ( - "https://ftp.ensembl.org/pub/rapid-release/" - "species_metadata.json" - ), - EnsemblDivision.VERTEBRATES: ( - "https://ftp.ensembl.org/pub/current/" - "species_metadata_EnsemblVertebrates.json" + "http://ftp.ensemblgenomes.org/pub/current/protists/" "species_metadata_EnsemblProtists.json" ), + EnsemblDivision.RAPID: ("https://ftp.ensembl.org/pub/rapid-release/" "species_metadata.json"), + EnsemblDivision.VERTEBRATES: ("https://ftp.ensembl.org/pub/current/" "species_metadata_EnsemblVertebrates.json"), } DIVISION_OUTPUT_NAMES = { + EnsemblDivision.ENSEMBL: "species_metadata_Ensembl.tsv.gz", EnsemblDivision.FUNGI: "species_metadata_EnsemblFungi.tsv.gz", EnsemblDivision.METAZOA: "species_metadata_EnsemblMetazoa.tsv.gz", EnsemblDivision.PLANTS: "species_metadata_EnsemblPlants.tsv.gz", @@ -76,7 +64,6 @@ def _extract_fields(record: dict, division: EnsemblDivision) -> list: name = record.get("ensembl_production_name", "") release_date = record.get("release_date", "") strain = record.get("strain", "") - taxonomy_id = str(record.get("taxonomy_id", "")) elif division == EnsemblDivision.VERTEBRATES: assembly = record.get("assembly", {}) organism = record.get("organism", {}) @@ -84,16 +71,15 @@ def _extract_fields(record: dict, division: EnsemblDivision) -> list: name = organism.get("url_name", "") release_date = record.get("release_date", "") strain = organism.get("strain", "") - taxonomy_id = str(record.get("taxonomy_id", "")) else: organism = record.get("organism", {}) accession = record.get("assembly_accession", "") name = organism.get("url_name", "") release_date = record.get("release_date", "") strain = organism.get("strain", "") - taxonomy_id = str(record.get("taxonomy_id", "")) + taxonomy_id = str(record.get("taxonomy_id", "")) if not accession: - return None + return [] return [accession, name, release_date, strain, taxonomy_id] @@ -126,13 +112,18 @@ def fetch_ensembl_division( print(f"Fetching Ensembl {division.value} from {url}") response = safe_get(url, timeout=600) + if response is None: + raise RuntimeError(f"Failed to fetch Ensembl {division.value}: no response received") + if response.status_code != 200: + raise RuntimeError( + f"Failed to fetch Ensembl {division.value}: HTTP {response.status_code} — " + f"check the URL and your network connection" + ) response.raise_for_status() records = response.json() if not isinstance(records, list): - raise ValueError( - f"Expected JSON array from {url}, got {type(records).__name__}" - ) + raise ValueError(f"Expected JSON array from {url}, got {type(records).__name__}") tsv_path = output_path.removesuffix(".gz") row_count = 0 @@ -140,8 +131,7 @@ def fetch_ensembl_division( writer = csv.writer(f, delimiter="\t", lineterminator="\n") writer.writerow(TSV_HEADERS) for record in records: - row = _extract_fields(record, division) - if row is not None: + if row := _extract_fields(record, division): writer.writerow(row) row_count += 1 @@ -163,16 +153,16 @@ def upload_s3_file(local_path: str, s3_path: str) -> None: @flow() def update_ensembl_metadata( output_path: str, + s3_path: str, division: str = "vertebrates", - s3_path: str = None, ) -> bool: """Fetch Ensembl species metadata for a given division. Args: output_path (str): Directory to write output files. + s3_path (str): Optional S3 directory to upload the result. division (str): Ensembl division name (fungi, metazoa, plants, protists, rapid, vertebrates). - s3_path (str): Optional S3 directory to upload the result. Returns: bool: True on success. @@ -204,14 +194,6 @@ def update_ensembl_metadata( if __name__ == "__main__": - DIVISION = { - "flags": ["--division"], - "keys": { - "help": "Ensembl division (fungi, metazoa, plants, protists, rapid, vertebrates).", - "type": str, - "default": "vertebrates", - }, - } args = parse_args( [required(OUTPUT_PATH), S3_PATH, DIVISION], "Fetch Ensembl species metadata for a given division.", diff --git a/flows/updaters/update_google_sheets_status.py b/flows/updaters/update_google_sheets_status.py index e9d4c2f..7c0447b 100644 --- a/flows/updaters/update_google_sheets_status.py +++ b/flows/updaters/update_google_sheets_status.py @@ -15,17 +15,16 @@ import csv import io import os -import re import numpy as np import pandas as pd from flows.lib.conditional_import import emit_event, flow, task from flows.lib.shared_args import ( + INDEX_URL, MIN_RECORDS, OUTPUT_PATH, S3_PATH, - default, parse_args, required, ) @@ -54,12 +53,12 @@ # --------------------------------------------------------------------------- -def _open_google_spreadsheet( - acronym: str, url: str, header_index: str -) -> pd.DataFrame: +def _open_google_spreadsheet(acronym: str, url: str, header_index: int) -> pd.DataFrame: """Download a published Google Sheet as TSV and return a DataFrame.""" encodings = ["utf-8", "ISO-8859-1", "latin1"] response = safe_get(url, timeout=120) + if response is None: + raise RuntimeError(f"Failed to fetch sheet for {acronym}: no response received") response.raise_for_status() df = None @@ -109,9 +108,14 @@ def _cleanup_headers(df: pd.DataFrame) -> pd.DataFrame: def _create_mandatory_columns(df: pd.DataFrame) -> pd.DataFrame: """Ensure mandatory columns exist.""" for col in [ - "ncbi_taxon_id", "species", "family", "synonym", - "publication_id", "contributing_project_lab", - "target_list_status", "sequencing_status", + "ncbi_taxon_id", + "species", + "family", + "synonym", + "publication_id", + "contributing_project_lab", + "target_list_status", + "sequencing_status", ]: if col not in df.columns: df[col] = None @@ -126,14 +130,10 @@ def _expand_target_status(df: pd.DataFrame, acronym: str) -> pd.DataFrame: df["long_list"] = acronym lower = acronym.lower() - fr_mask = df["target_list_status"].isin( - [f"{lower}_family_representative", "family_representative"] - ) + fr_mask = df["target_list_status"].isin([f"{lower}_family_representative", "family_representative"]) df.loc[fr_mask, "family_representative"] = acronym - op_mask = df["target_list_status"].isin( - [f"{lower}_other_priority", "other_priority"] - ) + op_mask = df["target_list_status"].isin([f"{lower}_other_priority", "other_priority"]) df.loc[op_mask, "other_priority"] = acronym return df @@ -158,9 +158,15 @@ def _reduce_sequencing_status(df: pd.DataFrame, acronym: str) -> pd.DataFrame: def _create_status_columns(df: pd.DataFrame, acronym: str) -> pd.DataFrame: """Create and populate per-status columns.""" statuses = [ - "sample_collected", "sample_acquired", "in_progress", - "data_generation", "in_assembly", "insdc_submitted", - "open", "insdc_open", "published", + "sample_collected", + "sample_acquired", + "in_progress", + "data_generation", + "in_assembly", + "insdc_submitted", + "open", + "insdc_open", + "published", ] for s in statuses: if s not in df.columns: @@ -182,7 +188,7 @@ def _expand_sequencing_status(df: pd.DataFrame, acronym: str) -> pd.DataFrame: return df -def _process_project(acronym: str, url: str, header_row: str) -> pd.DataFrame: +def _process_project(acronym: str, url: str, header_row: int) -> pd.DataFrame: """Full processing pipeline for one project status sheet.""" df = _open_google_spreadsheet(acronym, url, header_row) df = _general_cleanup(df) @@ -203,6 +209,8 @@ def _process_project(acronym: str, url: str, header_row: str) -> pd.DataFrame: def _fetch_dtol_plant_genome_sizes(output_path: str) -> int: """Fetch DTOL Plant Genome Size Estimates from Kew.""" response = safe_get(DTOL_PLANT_GENOME_SIZE_URL, timeout=120) + if response is None: + raise RuntimeError("Failed to fetch DTOL Plant Genome Size Estimates: no response received") response.raise_for_status() df = pd.read_csv(io.StringIO(response.text), delimiter="\t", dtype=str) df.columns = ( @@ -212,7 +220,7 @@ def _fetch_dtol_plant_genome_sizes(output_path: str) -> int: .str.replace(r"\)", "", regex=True) .str.lower() ) - df.dropna(how="all", axis=0, inplace=True) + df = df.dropna(how="all", axis=0) df = df[df["genus"].notna() & (df.get("project", pd.Series()) == "DTOL")] df["primary"] = "1" df.to_csv(output_path, sep="\t", index=False) @@ -222,6 +230,8 @@ def _fetch_dtol_plant_genome_sizes(output_path: str) -> int: def _fetch_dtol_tolqc_status(output_path: str) -> int: """Fetch DTOL assembly informatics status (kmer draft).""" response = safe_get(DTOL_TOLQC_STATUS_URL, timeout=120) + if response is None: + raise RuntimeError("Failed to fetch DTOL assembly informatics status: no response received") response.raise_for_status() df = pd.read_csv( io.StringIO(response.text), @@ -237,7 +247,7 @@ def _fetch_dtol_tolqc_status(output_path: str) -> int: .str.replace(r"\)", "", regex=True) .str.lower() ) - df.dropna(how="all", axis=0, inplace=True) + df = df.dropna(how="all", axis=0) df = df[df["taxon"].notna()] df = df[df["accession"].isna() | ~df["accession"].str.startswith("GCA_", na=False)] df = df[~df["statussummary"].str.startswith("9", na=False)] @@ -253,6 +263,8 @@ def _fetch_dtol_tolqc_status(output_path: str) -> int: def _fetch_cngb(output_path: str) -> int: """Fetch CNGB project status sheet.""" response = safe_get(CNGB_URL, timeout=120) + if response is None: + raise RuntimeError("Failed to fetch CNGB project status sheet: no response received") response.raise_for_status() df = pd.read_csv( io.StringIO(response.text), @@ -260,7 +272,7 @@ def _fetch_cngb(output_path: str) -> int: dtype=str, na_values=["NA", "missing", "", "NULL"], ) - df.dropna(how="all", axis=0, inplace=True) + df = df.dropna(how="all", axis=0) df.to_csv(output_path, sep="\t", index=False) return len(df) @@ -271,9 +283,7 @@ def _fetch_cngb(output_path: str) -> int: @task(retries=2, retry_delay_seconds=30, log_prints=True) -def fetch_project_status_sheets( - index_url: str, output_dir: str -) -> dict: +def fetch_project_status_sheets(index_url: str, output_dir: str) -> dict: """Fetch all project status sheets listed in the private index TSV. Args: @@ -285,6 +295,8 @@ def fetch_project_status_sheets( dict: Mapping of project acronym to row count. """ response = safe_get(index_url, timeout=60) + if response is None: + raise RuntimeError("Failed to fetch project status sheets index: no response received") response.raise_for_status() index_df = pd.read_csv( @@ -330,17 +342,13 @@ def fetch_other_sheets(output_dir: str) -> dict: plant_path = os.path.join(output_dir, "DTOL_Plant_Genome_Size_Estimates.tsv") try: - results["DTOL_Plant_Genome_Size_Estimates"] = _fetch_dtol_plant_genome_sizes( - plant_path - ) + results["DTOL_Plant_Genome_Size_Estimates"] = _fetch_dtol_plant_genome_sizes(plant_path) print(f"Plant genome sizes: {results['DTOL_Plant_Genome_Size_Estimates']} rows") except Exception as exc: print(f"Plant genome sizes: FAILED — {exc}") results["DTOL_Plant_Genome_Size_Estimates"] = 0 - tolqc_path = os.path.join( - output_dir, "DTOL_assembly_informatics_status_kmer_draft.tsv" - ) + tolqc_path = os.path.join(output_dir, "DTOL_assembly_informatics_status_kmer_draft.tsv") try: results["DTOL_tolqc_status"] = _fetch_dtol_tolqc_status(tolqc_path) print(f"DTOL tolqc status: {results['DTOL_tolqc_status']} rows") @@ -373,8 +381,8 @@ def upload_s3_dir(local_dir: str, s3_path: str) -> None: @flow() def update_google_sheets_status( output_path: str, - index_url: str = None, - s3_path: str = None, + index_url: str, + s3_path: str, min_records: int = 0, ) -> bool: """Fetch all Google Sheets project status and supplementary data. @@ -410,9 +418,7 @@ def update_google_sheets_status( total += sum(other_results.values()) if total < min_records: - raise RuntimeError( - f"Google Sheets: fewer than {min_records} total records: {total}" - ) + raise RuntimeError(f"Google Sheets: fewer than {min_records} total records: {total}") if s3_path: upload_s3_dir(resolved_path, s3_path) @@ -433,15 +439,8 @@ def update_google_sheets_status( if __name__ == "__main__": - INDEX_URL = { - "flags": ["--index_url"], - "keys": { - "help": "URL to the private index TSV listing project sheets.", - "type": str, - }, - } args = parse_args( - [required(OUTPUT_PATH), INDEX_URL, S3_PATH, MIN_RECORDS], + [required(OUTPUT_PATH), required(INDEX_URL), S3_PATH, MIN_RECORDS], "Fetch project status data from Google Sheets.", ) update_google_sheets_status(**vars(args)) diff --git a/flows/updaters/update_jgi_status.py b/flows/updaters/update_jgi_status.py index 64e6537..2a557b2 100644 --- a/flows/updaters/update_jgi_status.py +++ b/flows/updaters/update_jgi_status.py @@ -53,13 +53,13 @@ def _exchange_token(offline_token: str) -> str: """ url = f"{JGI_BASE_URL}/exchange?offlineToken={offline_token}" response = safe_get(url, timeout=30) + if response is None: + raise RuntimeError("JGI token exchange failed: no response received") if response.status_code != 200: raise RuntimeError( - f"JGI token exchange failed: HTTP {response.status_code} — " - f"check that JGI_OFFLINE_TOKEN is valid" + f"JGI token exchange failed: HTTP {response.status_code} — " f"check that JGI_OFFLINE_TOKEN is valid" ) - token = response.content.decode().strip() - if not token: + if not (token := response.content.decode().strip()): raise RuntimeError("JGI token exchange returned empty access token") return token @@ -76,6 +76,12 @@ def _fetch_organisms(access_token: str) -> dict: headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"} url = f"{JGI_BASE_URL}/api/v1/organisms?studyGoldId={JGI_STUDY_ID}" response = safe_get(url, headers=headers, timeout=120) + if response is None: + raise RuntimeError("JGI organism fetch failed: no response received") + if response.status_code != 200: + raise RuntimeError( + f"JGI organism fetch failed: HTTP {response.status_code} — " f"check that JGI_OFFLINE_TOKEN is valid" + ) response.raise_for_status() organisms = response.json() return {org["organismGoldId"]: org.get("ncbiTaxId", "") for org in organisms} @@ -93,6 +99,12 @@ def _fetch_projects(access_token: str) -> list: headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"} url = f"{JGI_BASE_URL}/api/v1/projects?studyGoldId={JGI_STUDY_ID}" response = safe_get(url, headers=headers, timeout=120) + if response is None: + raise RuntimeError("JGI project fetch failed: no response received") + if response.status_code != 200: + raise RuntimeError( + f"JGI project fetch failed: HTTP {response.status_code} — " f"check that JGI_OFFLINE_TOKEN is valid" + ) response.raise_for_status() return response.json() @@ -115,8 +127,7 @@ def fetch_jgi_tsv(file_path: str, min_lines: int = 1) -> int: offline_token = os.environ.get("JGI_OFFLINE_TOKEN") if not offline_token: raise RuntimeError( - "JGI_OFFLINE_TOKEN environment variable is not set — " - "cannot authenticate with JGI GOLD API" + "JGI_OFFLINE_TOKEN environment variable is not set — " "cannot authenticate with JGI GOLD API" ) print("Exchanging JGI offline token for access token") @@ -146,9 +157,7 @@ def fetch_jgi_tsv(file_path: str, min_lines: int = 1) -> int: line_count = row_count + 1 # include header if row_count < min_lines: - raise RuntimeError( - f"JGI file has fewer than {min_lines} data rows: {row_count}" - ) + raise RuntimeError(f"JGI file has fewer than {min_lines} data rows: {row_count}") print(f"Wrote {row_count} WGS projects to {file_path}") return line_count @@ -163,7 +172,7 @@ def upload_s3_tsv(local_path: str, s3_path: str) -> None: @flow() def update_jgi_status( output_path: str, - s3_path: str = None, + s3_path: str, min_records: int = 0, ) -> bool: """Fetch JGI 1KFG status list and optionally upload to S3. diff --git a/flows/updaters/update_nhm_status_list.py b/flows/updaters/update_nhm_status_list.py index 707e15d..a825e69 100644 --- a/flows/updaters/update_nhm_status_list.py +++ b/flows/updaters/update_nhm_status_list.py @@ -43,9 +43,7 @@ def fetch_nhm_tsv( # If the file has less than min_records lines, raise an error if line_count < min_lines: - raise RuntimeError( - f"File {file_path} has less than {min_lines} lines: {line_count}" - ) + raise RuntimeError(f"File {file_path} has less than {min_lines} lines: {line_count}") # Return the line count return line_count @@ -57,7 +55,7 @@ def upload_s3_tsv(local_path: str, s3_path: str) -> None: @flow() -def update_nhm_status_list(output_path: str, s3_path: str, min_records: int) -> None: +def update_nhm_status_list(output_path: str, s3_path: str, min_records: int) -> bool: """Update the NHM status list TSV file.""" os.makedirs(os.path.dirname(output_path), exist_ok=True) line_count = fetch_nhm_tsv(output_path, min_records) @@ -82,9 +80,3 @@ def update_nhm_status_list(output_path: str, s3_path: str, min_records: int) -> ) update_nhm_status_list(**vars(args)) - args = parse_args( - [required(OUTPUT_PATH), S3_PATH, MIN_RECORDS], - "Fetch species data from NHM.", - ) - - update_nhm_status_list(**vars(args)) diff --git a/flows/updaters/update_ott_taxonomy.py b/flows/updaters/update_ott_taxonomy.py index 7280a14..f2a1838 100644 --- a/flows/updaters/update_ott_taxonomy.py +++ b/flows/updaters/update_ott_taxonomy.py @@ -42,9 +42,7 @@ def fetch_ott_taxonomy( # Find the extracted subdirectory (should start with 'ott') extracted_dirs = [ - d - for d in os.listdir(local_path) - if os.path.isdir(os.path.join(local_path, d)) and d.startswith("ott") + d for d in os.listdir(local_path) if os.path.isdir(os.path.join(local_path, d)) and d.startswith("ott") ] if not extracted_dirs: raise RuntimeError("No extracted ott directory found.") @@ -111,28 +109,30 @@ def set_ott_url() -> str: # Extract required fields source = ott_json.get("source", "") - name = ott_json.get("name", "") - version = ott_json.get("version", "") # Replace "draft" with "." in source to get OTT_VERSION ott_version = source.replace("draft", ".") - ott_major_version = f"{name}{version}" - return ( - f"https://files.opentreeoflife.org/ott/" - f"{ott_major_version}/{ott_version}.tgz" - ) + # may need to restore this if ott switch back to the major version URL structure, but for now we want the full version in the URL + # name = ott_json.get("name", "") + # version = ott_json.get("version", "") + # ott_major_version = f"{name}{version}" + + # return ( + # f"https://files.opentreeoflife.org/ott/" + # f"{ott_major_version}/{ott_version}.tgz" + # ) + return f"https://files.opentreeoflife.org/ott/{ott_version}/{ott_version}.tar.gz" @flow() -def update_ott_taxonomy(output_path: str) -> None: +def update_ott_taxonomy(output_path: str) -> bool: """Fetch the OTT taxonomy file. Args: output_path (str): Path to save the taxonomy dump. """ http_path = set_ott_url() - status = None complete = False if ott_taxonomy_is_up_to_date(output_path, http_path): status = True diff --git a/flows/updaters/update_refseq_organelles.py b/flows/updaters/update_refseq_organelles.py index 8316c1c..a202eea 100644 --- a/flows/updaters/update_refseq_organelles.py +++ b/flows/updaters/update_refseq_organelles.py @@ -12,7 +12,6 @@ OUTPUT_PATH, ROOT_TAXID, S3_PATH, - default, parse_args, required, ) @@ -39,9 +38,18 @@ ] MONTHS = { - "JAN": "01", "FEB": "02", "MAR": "03", "APR": "04", - "MAY": "05", "JUN": "06", "JUL": "07", "AUG": "08", - "SEP": "09", "OCT": "10", "NOV": "11", "DEC": "12", + "JAN": "01", + "FEB": "02", + "MAR": "03", + "APR": "04", + "MAY": "05", + "JUN": "06", + "JUL": "07", + "AUG": "08", + "SEP": "09", + "OCT": "10", + "NOV": "11", + "DEC": "12", } @@ -65,12 +73,10 @@ def _refseq_listing(collection: str) -> list: pattern = re.compile(r"(\w+\.\d+\.genomic\.gbff\.gz)") url = f"{REFSEQ_FTP}/{collection}" response = safe_get(url, timeout=120) + if response is None: + raise RuntimeError(f"Failed to fetch RefSeq listing for {collection}: no response received") response.raise_for_status() - return [ - f"{url}/{match[1]}" - for line in response.text.split("\n") - if (match := pattern.search(line)) - ] + return [f"{url}/{match[1]}" for line in response.text.split("\n") if (match := pattern.search(line))] def _parse_features(entry, fields: dict) -> None: @@ -95,14 +101,12 @@ def _parse_references(entry, fields: dict) -> None: if ref.journal.startswith("Submitted"): if "sourceAuthor" in fields: continue - match = submitted_re.search(ref.journal) - if match: + if match := submitted_re.search(ref.journal): fields["sourceYear"] = match[1] elif "sourceAuthor" in fields: continue else: - match = published_re.search(ref.journal) - if match: + if match := published_re.search(ref.journal): fields["sourceYear"] = match[1] if ref.title: fields["sourceTitle"] = ref.title @@ -149,7 +153,7 @@ def _parse_sequence(entry, fields: dict) -> bool: return True -def _parse_flatfile(flatfile_path: str, organelle: str, root_taxon: str = None) -> list: +def _parse_flatfile(flatfile_path: str, organelle: str, root_taxon: str) -> list: """Parse a single GenBank flatfile for organelle sequences. Args: @@ -174,8 +178,7 @@ def _parse_flatfile(flatfile_path: str, organelle: str, root_taxon: str = None) fields = {"id": entry.id, "organelle": organelle} comment = entry.annotations.get("comment", "") if comment: - match = comment_re.search(comment) - if match: + if match := comment_re.search(comment): fields["genbankAccession"] = match[1] else: continue @@ -195,8 +198,8 @@ def _parse_flatfile(flatfile_path: str, organelle: str, root_taxon: str = None) @task(retries=2, retry_delay_seconds=30, log_prints=True) def fetch_and_parse_organelles( output_path: str, - organelles: list = None, - root_taxon: str = None, + organelles: list, + root_taxon: str, ) -> int: """Fetch RefSeq organelle data and parse to gzipped TSV. @@ -211,7 +214,7 @@ def fetch_and_parse_organelles( Returns: int: Number of rows written. """ - if organelles is None: + if not organelles: organelles = ["mitochondrion", "plastid"] all_rows = [] @@ -223,6 +226,8 @@ def fetch_and_parse_organelles( for url in listing: print(f"Downloading {url}") response = safe_get(url, timeout=600) + if response is None: + raise RuntimeError(f"Failed to download {url}: no response received") response.raise_for_status() with tempfile.NamedTemporaryFile(suffix=".gbff.gz", delete=False) as tmp: @@ -268,8 +273,8 @@ def upload_s3_file(local_path: str, s3_path: str) -> None: @flow() def update_refseq_organelles( output_path: str, - root_taxid: str = None, - s3_path: str = None, + root_taxid: str, + s3_path: str, min_records: int = 0, ) -> bool: """Fetch and parse RefSeq organelle data. @@ -289,13 +294,11 @@ def update_refseq_organelles( os.makedirs(os.path.dirname(resolved_path), exist_ok=True) row_count = fetch_and_parse_organelles( - resolved_path, root_taxon=root_taxid + resolved_path, organelles=["mitochondrion", "plastid"], root_taxon=root_taxid ) if row_count < min_records: - raise RuntimeError( - f"RefSeq organelles: fewer than {min_records} records: {row_count}" - ) + raise RuntimeError(f"RefSeq organelles: fewer than {min_records} records: {row_count}") if s3_path: upload_s3_file(output_path, s3_path) diff --git a/flows/updaters/update_sra_data.py b/flows/updaters/update_sra_data.py index 6840f6b..e10c6f5 100644 --- a/flows/updaters/update_sra_data.py +++ b/flows/updaters/update_sra_data.py @@ -19,7 +19,6 @@ ) from flows.lib.utils import is_safe_path, run_quoted, upload_to_s3 - SRA_FIELDNAMES = [ "taxon_id", "sra_accession", @@ -84,9 +83,7 @@ def _read_runs(node, obj): if "runs" not in obj: obj["runs"] = [] for child in node: - obj["runs"].append( - {"accession": child.get("acc"), "reads": child.get("total_spots", "0")} - ) + obj["runs"].append({"accession": child.get("acc"), "reads": child.get("total_spots", "0")}) def parse_sra_xml(xml_file: str) -> list: @@ -122,7 +119,7 @@ def parse_sra_xml(xml_file: str) -> list: return rows -def group_by_taxon(rows: list, grouped: dict = None) -> list: +def group_by_taxon(rows: list, grouped: dict) -> list: """Group SRA runs by taxon, keeping the 10 most recent per taxon. Args: @@ -231,20 +228,19 @@ def fetch_sra_xml( Returns: str: Path to the written XML file. """ - api_key = os.environ.get("NCBI_API_KEY", "") max_date = _get_yesterday() query = f"(txid{root_taxid}[organism:exp])" esearch_cmd = [ - "esearch", "-db", "sra", "-query", query, + "esearch", + "-db", + "sra", + "-query", + query, ] - if api_key: - esearch_cmd.extend(["-api_key", api_key]) esearch_cmd.extend(["-mindate", min_date, "-maxdate", max_date]) efetch_cmd = ["efetch", "-db", "sra", "-format", "docsum"] - if api_key: - efetch_cmd.extend(["-api_key", api_key]) print(f"Running esearch | efetch for taxid {root_taxid} ({min_date} to {max_date})") esearch = run_quoted(esearch_cmd, capture_output=True, text=True, timeout=300) @@ -252,9 +248,7 @@ def fetch_sra_xml( raise RuntimeError(f"esearch failed: {esearch.stderr}") with open(output_xml, "w") as f: - efetch = run_quoted( - efetch_cmd, input=esearch.stdout, capture_output=True, text=True, timeout=600 - ) + efetch = run_quoted(efetch_cmd, input=esearch.stdout, capture_output=True, text=True, timeout=600) if efetch.returncode != 0: raise RuntimeError(f"efetch failed: {efetch.stderr}") f.write(efetch.stdout) @@ -267,7 +261,7 @@ def fetch_sra_xml( def parse_and_write_sra( xml_path: str, output_path: str, - previous_path: str = None, + previous_path: str, ) -> int: """Parse SRA XML and write grouped TSV. @@ -288,9 +282,7 @@ def parse_and_write_sra( tsv_path = output_path.removesuffix(".gz") with open(tsv_path, "w", newline="") as f: - writer = csv.DictWriter( - f, fieldnames=SRA_FIELDNAMES, delimiter="\t", lineterminator="\n" - ) + writer = csv.DictWriter(f, fieldnames=SRA_FIELDNAMES, delimiter="\t", lineterminator="\n") writer.writeheader() for row in grouped_rows: writer.writerow(row) @@ -314,9 +306,9 @@ def upload_s3_file(local_path: str, s3_path: str) -> None: @flow() def update_sra_data( output_path: str, - input_path: str = None, + input_path: str, + s3_path: str, root_taxid: str = "2759", - s3_path: str = None, min_records: int = 0, ) -> bool: """Fetch and parse SRA data, writing grouped TSV output. @@ -345,12 +337,10 @@ def update_sra_data( xml_path = f"{resolved_path}.xml" fetch_sra_xml(xml_path, root_taxid=root_taxid) - row_count = parse_and_write_sra(xml_path, resolved_path) + row_count = parse_and_write_sra(xml_path, resolved_path, previous_path=resolved_path) if row_count < min_records: - raise RuntimeError( - f"SRA output has fewer than {min_records} taxa: {row_count}" - ) + raise RuntimeError(f"SRA output has fewer than {min_records} taxa: {row_count}") if s3_path: upload_s3_file(output_path, s3_path) From c4180bb9f6498713efc094276cda4ed74a79c6c2 Mon Sep 17 00:00:00 2001 From: Richard Challis Date: Mon, 8 Jun 2026 15:44:24 +0100 Subject: [PATCH 14/18] remove __pycache__ files from git --- flows/.gitignore | 2 ++ flows/__pycache__/__init__.cpython-312.pyc | Bin 143 -> 0 bytes flows/__pycache__/__init__.cpython-313.pyc | Bin 143 -> 0 bytes flows/lib/__pycache__/__init__.cpython-312.pyc | Bin 147 -> 0 bytes .../conditional_import.cpython-312.pyc | Bin 1537 -> 0 bytes .../lib/__pycache__/shared_args.cpython-312.pyc | Bin 5104 -> 0 bytes flows/lib/__pycache__/utils.cpython-312.pyc | Bin 39924 -> 0 bytes .../__pycache__/__init__.cpython-312.pyc | Bin 152 -> 0 bytes .../update_google_sheets_status.cpython-312.pyc | Bin 20275 -> 0 bytes .../update_ncbi_datasets.cpython-312.pyc | Bin 9028 -> 0 bytes .../update_refseq_organelles.cpython-312.pyc | Bin 13537 -> 0 bytes .../update_vgp_status.cpython-312.pyc | Bin 3882 -> 0 bytes .../api/__pycache__/__init__.cpython-312.pyc | Bin 156 -> 0 bytes .../api/__pycache__/api_config.cpython-312.pyc | Bin 9456 -> 0 bytes .../api/__pycache__/api_tools.cpython-312.pyc | Bin 1607 -> 0 bytes 15 files changed, 2 insertions(+) create mode 100644 flows/.gitignore delete mode 100644 flows/__pycache__/__init__.cpython-312.pyc delete mode 100644 flows/__pycache__/__init__.cpython-313.pyc delete mode 100644 flows/lib/__pycache__/__init__.cpython-312.pyc delete mode 100644 flows/lib/__pycache__/conditional_import.cpython-312.pyc delete mode 100644 flows/lib/__pycache__/shared_args.cpython-312.pyc delete mode 100644 flows/lib/__pycache__/utils.cpython-312.pyc delete mode 100644 flows/updaters/__pycache__/__init__.cpython-312.pyc delete mode 100644 flows/updaters/__pycache__/update_google_sheets_status.cpython-312.pyc delete mode 100644 flows/updaters/__pycache__/update_ncbi_datasets.cpython-312.pyc delete mode 100644 flows/updaters/__pycache__/update_refseq_organelles.cpython-312.pyc delete mode 100644 flows/updaters/__pycache__/update_vgp_status.cpython-312.pyc delete mode 100644 flows/updaters/api/__pycache__/__init__.cpython-312.pyc delete mode 100644 flows/updaters/api/__pycache__/api_config.cpython-312.pyc delete mode 100644 flows/updaters/api/__pycache__/api_tools.cpython-312.pyc diff --git a/flows/.gitignore b/flows/.gitignore new file mode 100644 index 0000000..47b052f --- /dev/null +++ b/flows/.gitignore @@ -0,0 +1,2 @@ +!.gitignore +__pycache__/ diff --git a/flows/__pycache__/__init__.cpython-312.pyc b/flows/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index a80046ecf22b58188e954633536b7947ba11b8f3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 143 zcmX@j%ge<81kbxKW`O9&AOanHW&w&!XQ*V*Wb|9fP{ah}eFmxdrKcZSoLW?@pImHW zte>2jl$w*OTbx;vs-KcrlBl1SlV4t}A0MBYmst`YuUAlci^C>2KczG$)vkyYsGSjr Qi$RQ!%#4hTMa)1J0J_m3E&u=k diff --git a/flows/__pycache__/__init__.cpython-313.pyc b/flows/__pycache__/__init__.cpython-313.pyc deleted file mode 100644 index 435d80e340db468fd2e1db3e6a157fc847ee4b49..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 143 zcmey&%ge<81kbxKW`O9&AOZ#$p^VQgK*m&tbOudEzm*I{OhDdekkl2jl$w*OTbx;vs-KcrlBl1SlV4t}pOcxSA0MBYmst`YuUAlci^C>2KczG$)vkyY UXapk=7lRldnHd=wi976rYi_+E3eYKHLz8qUO@*&|Ro=GG?I|hnT|F*l7>C2(2`>6_2E4X4Xwi z4-OcbkVB46uRYb|PwB;>y>yD9&_l>A@%GeH-<$nP$RRysV4mK4dhhA?zVz$NOpU;K z`Q*j!eVdTKQ5YYP%^5s`$1zDrLOBUYVjYoyT7=Q(BLgv;*r2l#2dJHtfI5jAloA@a z`3+0>|)mUe8KFiIN?XG=8Q*;{o+& zUSXh4Jo>hIAX2=8+dz(qGD;-(tO+||?v*C&gh}WaUt;auBxH;D_Q8)e56;$J+VeI9 z+h(!y(yUhqCB1apQ@hM#`F<*jJY%`?_9Kx-UCz8zdXlMT&3JDz)fc&3HUft076o7S z?utGb{*NH&q8nRZuWy7Kn`^h%R_|&@fwOOGtB|@pLZIy(rcTHJ6Z&_h^{r$=wqki} zsTHSP#@QD#RjidnDr#->;(OWRX}1*@d6KHM$Ri#O6KnP!=!S%Z!`?7q(SU#sTzMVH zA^B&PG-jVJKVE*?e%yZ6{H^Z)^4*L2(y6l~aE{(-3O0NW?w|q$#)lqZS*=byngq}i zg8U`80~g3K`2kE$Xh&2*I-#20muTRLI#69%;ZfF2qV~Z8Ced8sMI3S224<+g2v!3` zdq{qoH-LtLEKNggg`vc0@J7)bj3GV&%{;mx$V2kS+_lrnwU3vJzycT$;sS7QWg`&m z!Gen9-usDMK`$)JGzoDT{ohFdeZ?ZE@s+qbHvcG1_z>{xEK60$_F?khhi{;K2!3rE zL3|3>u@loIUSZ!47T3+brTNZe1GoEmj58~J%QVjvz^A&>`6gWbYW2>VPfa*bRYgA! zZ@ZJ@TfFnO>A5Uf-Z+)#CaGFCj>WWJn31H^f_w__!pTSksV-O+mzBj zN%h<<(_3#Y6X(KF``o3a#VL91lKG2=>wnE%dU)rJOG>rV+ULJJH(xq8&YaenKMBLC1Fls8(gsMx zg#^h#K-yhMh;)!nkksXp6efqrVZe^K)*T>6f%BLPX(Qc$9CslR@)GF*zFrr0kVvEt zu&4`bC;h-VKzQ0gPJHTN9(9sIz)rq^b&(;!VlQBa$S`>s14Zwcp_|QX= z*NQphJ27lorx5@dpUX$S425T!+tA(L+UqL;k+OL4v* zxCY219at2}6wFV!<{6j|k}N$*a-e6D^0%j(MYsP2In%H%wj#vdQvShnTp+0})k?H% z=#svyXp~3`)VND&S<(xdS~c`UAdprH%MwuwhFU2r+KQx>C1aUNMO8N>jaD_PGk&Ek zDULA7sDK}ul2C%wxfB-=6Ry49_A?RZ!Rux_UZyN*YdrGNVq@w`| zp4n6KbTWTc8k63SGBwdr=|(iJM_-Ym@wk3Ut}4cI^cKueHxS>CF4JNa2GMDtGy{;I zeVd6RszwV&1p-+qgX{!~(dfH1fC=#AwHgJ)SgBH&f#dh@2N<6dKnSYb^m=I6vF(Oq z7#K0Uk1=y@#RC7t0?5WZmM2+WvBVUl(rTG2Lpn`=Jg!t#xuzA(wz-)p2||U*N={EC zVuB^2AC`!=z^sKiZ;2?@6yl2ML*^nis6Iyb7x?GYm2zC8dSpimVbdlyoN2L@Hsn$KymX6nRmj)TAu# zW`8rrNRe!cT{jSLC&Z?gF;a6dr5Lp7q*^BQUP8KD(MpOjDqV*sp2@^hph$=l3=Pp% zWUW><&o$kvQf(2IONs;}2}sgi)mX0745?NH{pjTEsgWhsV7}@osAdM^@nyrP+U9mQ z&BZWZ&F80~P@Q^6m;qQ>Tae32iJDRt-c3`l&~l|juhtfH=sw+0$_1K0xhjzhm13<_ zHskK)HYPc})$A2egDo(TVs_m#L%@$AkTJjJ;XJ%iAHy)0i_|DAqYJ9Nv|2N49}c^H zP|JWbcbP>cwJbySA$5ZpwXBwECCGmj3Ld(wTq`Y54Rx>;l2*B^Ct@DE09pxMRBA;- zYlU(!55(gr2}(g-DpO*{enePvkz^?Mlmu1~vfAG2X8(_HI&09!3i3fB_nbW9cG4y| zUJCJd&9->_-5Q1Rs8n%V)XhUVdmur5uohxul&oR&v`WR0!9Iw~&x)(YfQEkHgP z4~sUY3hP*>u=4`1j#cRFFy=thB8`?RP{!C!BYV};EOn?MU0=2nHTdHk@2QFeYf+Jb z5eJ({t>hGHZ<7}~ID6^h1R4o_f`;QFR;Xn*SpwjhZX39>k<$=AG*<^V&YSgrH0WNBL4_@Cq@O!=vFSx$( z_XPt^QTw^=Ug=<~0ufd@;=v>Rc8deLK13wKV~6iCN`~jYXVpVmR{7s>#QR&18Ae3q zR=J08F~qMe?IFMFS>^HS^pIEihx{xT^FeJfnH}&D0`_W4b z-;7zD<_GmG8D`KGT8oc4+cb*|93+B9EstJXusloDh>2Po@?j>efTIWcj|tL6K=Bm) z_nDD7xB%!Q1^x7?k%GEFi!`pQ29z)q{K#SvHiMC(x-eqjL-35#6V(;FK2Y?R@Y8<* zuR6CI;Mz~tC;t)*|N88QXV-@}79R9JSp4ke-;aDYvN`x9`ZPNBG&o*QZHvCOI~ylA zjy~vXga)?6fqH5um{}_{f|>QRjqa(9!h^w2?!aL4^cPqD+w%rwNzbI`#R_R1@ykQ#w_i0Dd2i}~vRNQSWtEu<6*6s}fM3AQ^Gt{Kj8nPB0! zy!4&{XR5^yL(qdD+-iYq*OLA@_c_l3t)pp@c7&0b3{8UZSMbwMz^l&vfRoq0^z1pQ zq1i7uiPg!R9Dug)&p|2UU<12(4q+9>uV%5 z%^$-YjbV;1G;?&}(deeJ)qP^?C zVeWkdgO9oY)3*E{_{UBK+=m8Eakq|P8w%X2a2qT7dk-`$yT?OCc`8DrhVz1HxC zE9AZ)S^>6*g$CNRA0?nH%gb=ht|xQ|Ta=ZQ7Q;pO`YZhONqE({zlXb?g$K661DoAn zg~#fbzipG&JlnqRHSaTD_m;1FWA?$}Pi{7RuRogIJoNdEr@q&BJzS)3*UN?4p9PO^ z1&==s_B6yEmbTci)hf%RQjld08%pbhm*s_92<+_|=14<}G%QHiWem+~h5M)s*Hg=z zoSsf+Q&uoJJDbj2oVqSgq^w9PnNP}>XVU5UG`p$86*rkrTdk>?>+;N8*7D;$%cl6Q z&RvwV$xPbvPo(5axv9BK)(T8yQ|W7R{`zzp$J29pd$C6?8{VM! z2q>w!Ow&M^<+A8jn2mCo>$bhVpXX-Ylv5KkmLHERn-{L+6~(Z`g-WGp@4q7c?_v7^ z>ncmcziE~qPhY*N6sVnE){3|T*t>}DLa9P(MLMoc0fu$2_dy`p&mp^eC>_?nD%@B7r`=vdE!ecN4$C!Vd)1@P*C zP!H^e(Agg#Fj42WJ33KXtB4V5Wzssiwq6#W^Dh!3LOrw#L_0)hUL>+80kLB}*9Z@6 zca1#h*b0q-9gKi>G&)Z-!h_pg@lAazbRH-Ng?f13!kx&d%PnR-_}j5=2k@+m)(jvE zk~`|nAc3lZM)<^b*V!*17%5Q2Oa~(vOSel_lSRTbbsXJ}oODHl8OVH^YJ~fCBAM-u zLoPDoWn@g`cGu}Ag{{y<(8i(|^7C1K+aF#lte;w2{;+4)!+_|?N1jJ>o2e)LjiIrv z=(r8jMamZ=6PKd^ga__dWm! WKAW@b^W@sIvE0^J?q7(p?d(7KJ;DG0 diff --git a/flows/lib/__pycache__/utils.cpython-312.pyc b/flows/lib/__pycache__/utils.cpython-312.pyc deleted file mode 100644 index d7f957d4594a1316c0523ae01e4752d551fd4cd4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 39924 zcmdVD33MFSc`jJh`-;ZWjh#djJ3uT10g4n?Vc|}Kv`A4FBn4`s8eo&yT-_iE1e76< zl7O-D1Uk0aisYzed*bGiGKSvCOyGCUyojLB;XG5QXosE4)b~g2y*xB4?W@k&E1!vu`b;QY0Q(Dpk#-3^NV|onQLjqakG})- zelZ&s;UL~`5AIMd-A&y&ggg6%!^m^s4H|?aX!U@VqkqCP!cpWO!{2fI_2TaY{;1au z;UwxE6u!iM7x?r2Rmjy5BrrHM2#=$kLu{lDz}t2u_Y`su-A7c+dyM7MEYe#r2~Q&LxbS6+zIT#uc=p!>^1U~B$<#Rz3XF|5=p=bbv|Yu0c0-{FFcB9XKbBjf@4yM*QA$tlz;Fo6U;@4?$DGo$Lo?45ODj zy{FIl*_C%}FjawW->^z{Uohw&IXgVr9}pya(EpVQ|LB09ei(xxpBR$#{!u|P1w*2b z&cpuEq0l+WG7=ER#{#1vsX)Ez9~Z~Q$3&VBDF^MO{l3A88)!86O`HvSpXxk*%uBBmpwYsnym!DrPf`vO&g?CI z1Ml-X7vj|$=M}`O{-*OZ|E6+(n(No@r`4g>)*RE^`Sj;0chQnkA1!J3(^_JLF|(=c zEv5BHdwNKvlC#}2hu+c-XbPD|DM zsWW<`Juj6j@Qf(waF>8*&G9C^F&yc=6ZG^?omHNjM%{jG^wYW*wudU@S{(PLa)A=L zYM%9B{nQBt^v0`}Po2|xlc(lR>L>M8TpJhUFX?#hDa-=Ty>#TV{weMfU&W>7shShP zh_HFq>ORFy>MwC}*#@JJ2gotNC1h}TG!P2-h67W6ZwLrufUSm%1)yLVD%>d!rJ)(h zk<0fF28R9KdZ3Gjo!*na&^d2tj5Ws=OJ+qOQ z=%4hB3`-`tVaYKB6gA=x`3Q$e2D&0#8dA}fWLEH&R6r#e3Fsf37-b|VC|Q+^(V*lS z7yTClV-rESkRaLch>-ts2q;mq1SiIEKZqLpFAw+`?U9_UJ8Ct_kv@kEO-e2`OYKCS zT3SV`!3Kk52>OQyCA&N#_8P?w3|(wT5+pQ2lPVINOP2or0Dz^xf6CR(prl33V1-S9 zsu}KXAy-tfTv#73tdEv07H*q0ykuW-moK{;;_ilM^$$1S*gXIHi|%gZTUSuBXmh-1 zbF}A&M{gWmEZTuw$4bej<&tgjl5O+CUtf6p!eU7ma&reo`k#j`r&Jb!x!T2##qzdcMLyq+;pt!b@lmkhNax zmZXK-&_L~!B)<9)R~ z?c95H#-1(a_u6y!oAiHcVl z`-QQA{(kXMw8|(dBX;bah?SH_NeliMReX`Vt#k2W(O72-t2&a?vTlG zM*J~?RgQ~vf;A2DG_6yS5f~BPSD~VgV^|GY5JuV(&BPUH0QOtq2^E;X7>*#Ir6XgJ zce}$qT3(N@lEMBV$fbd2FQh>)X3lw#<`{jF7n7ZJ zO`a9;G2A(YzhD6ptf6wwkw3RNJRN;@(Y`BY*mXa5qY~y-dZlZ)8=W8gjo&C3{04Bs zMsUI=!Q?j!X1^KSyam54lhy{CWID-s(y3;KQRN?LEDuctL%EsEj$~jc(?EDkvJ3v; zfEZvXDT4_jR)oP6!R2*=Bd#A;^1%qH!uEjV>T!?Cvjg0^&R3{H8NI_b1d1U|QSRph zDHC;51vO%Y*>AAIMCIg0QpA9sLj&^G5BP?MeP@UL028W21;OXBo5T`kR!JLu`jgVL z*F5mS#7lk$sV4(Q*HEoqfK@jpO6;LzKavK6xSg&ub2CvlLmg8txp@t12T(0Y{L>7V zEap5Vgpz?o*{Q5{*6^XFm;s(-AW#sAn;6jE6Mh)*m-dbW+!bQ3wD*d;%E7RRQj^#y z!X#7wR+|deE%sU%$O#a%Fw;J9@`10Hm93}3%BxyanN~NwJ@6XZld^IQ)I{2p3k(Bz z(|bYCK?jM11RrO80~hcf*t8$S7$rMKRU7J{qvE9rF@%fi)34yQ=RT{_7P zQd^S#f`3wM#8n0zA*P$b!!f`>CLIZBaHF!*L3@bk4m+l~Hx;Nk1(A1JcaaYkqy&y> z$ZW*AhX~%-p|&Az7If#$YHsR0&Gm3E>j!j0fY)cij$hGF>u30vOleQlt53Y9dnw<5 za#ysRQ-DzUiRU#zC9Q2<(5cV=f8*H$VD70mmyoJg4SL^ROdtc}17qTdFBJ6pyo8s) zbVKLBIDBNS@5&D^;iw(iA8QB$HmtW$K?8#Zr74 z(ALC?KC7%X`7T!2SwD%#=uyZoG~p3}fpgvw-z4R$J=9hoEuo9P;R(MN0=4{TQcbWd zs50m_9e}NLWJ*6Fg^vvcpqLT7mjWbQU`(tFsanK*dsTGMnKl(9p@e*3Trn3auik|D z8W^4sD9`7u4EnJyeW5WiSgG}bdXkz?Jkb}zz{Z2ry&&Nht&$hBgND{s#^AP$UbKml z`%cqEZ-icf7wV7^j0mp;Ip?8b!B8g{<)N`^&&udtaNIu-7!05!-aZveA4po~zuIUcTX3EPm{UC(=2;>Fws%3c_FdPGQs&FMe#%)fuyNeqwRVJ`=Z;E?YLn zEgQqZNcW1A~VzK4Sk0us2eZc?J`Breg?JqAbOvGExELxsS>UBob=Xdm6 zNhkmL-?f|xl2+lS&fHaQxoLAEeU~$LZL+*;aUvC)a8ojf&`4#VHy5(?_*?a^MQ;ka zNFj!8gmU;E?pg3WR}3Ml#g1vi{YWgxzvQ0QXG0}|J}UwuNXdwR3zN%0w(0fGD(u7Xt!7mZq1MkGh5CKKYTAuSujnUn zTwbmjD-taeMq``^@Gl!N!xsZ%B5I6}QJ$Wx z*hFZ2A|%hgJj-l;ssRKN?~{0|#M^os%#vXYZ%`x&MeL<}#!Dhpx8f6&K`UN7gQUU4 zR*u+58N?h&{Gg=Alt_9EHb@c+wXO<4P9Fn{{?J%|D0opM)+B{;;I$gQ0lw8OZUT!la2SD!Xmgd&>S%Z_LZTZRFg=&<)sXW7e9_ z@7TDKCwU0??B~5;B|0-#*k#cFpcKhXzPfW??#-H9y546*`mVv)*cF|+Cb4E+Y|Vm+&S+5;2(^_S1Tzj+ zhRcRSRBa_N)YBaI40wwxdf+K-1i({Td#Sz-YK)JV&b%4azFjd0{CN!*v3X#LpnH)E zX?O{^jxgz(1A12F>FT*h)2XUCSB%pJh!RHPgmU%~nvT#&q)eb9GD4cR)ssk$ay-e^+_TU%u@}fg`htbyLJwA7?zAqO z9HzeZQSjK`=CAWW@LmHq$^WJP5~OYsV8Hc4#<30f#seXy5{BVR*Rn<81Crs=m?&h* zHrCxxoBA)0i+iVbc6Q$d|tBneBP`#MKi^jt$YCMaPzp$~r!0jK|liYk7(9Z?MSd=rmf9 zO4on7g0FzBMuH4qNml>p#E2j4o*$;J@!>!Sc#8>fIZ7*jS&owoREcpul9BPKk~39; z#<>QgiKFNXOa3PsA5AtB@Cu7(dRNSObJedImdqPI+S9q{>iW^?`O{0ejnR$|bGIy< zj=Q>ImaY|NQP}XBu;i>txC$T>7M5N=f9?FYhp*aZ^|Mc`*gfE*gzxu+pINduuDDyG z6N~O`v-=aprLUV`G2f<1{8d?XULA(9K!g|O@wNo?RNX9~A8FSU-^+TS5nyE5l&MIlrDwSEQY@%PiW22c} z3k)(z%}<7JS;AllM5=g5mnG-%^w%RfGoCD&m8bT?L=R<4&_6gfEJ&84{z+zHzrl+a z0YpIpDn*l;PZk$#SWKTTeigT4RPM`2unJ2pMmGq5ZoypJ)r+yh?Qz$RWmjk1g|*n7 zaBW<6)x}+Pi>`({dOo)iD9Bl!)ElifESmhXq!m}6-F0vdH+20*_eZV*sQHcVdqJoc zxUbbWgJEvw{?huWet~~fpMBu~DQ8(Yf}R!t1AN?jH=PB=l$sdcMesC)8^h zZG%tH6BDK78VH#qm$6n_u1U!?v$D9$e(MAaJL@SGtjaxemV0tNr5DPRGi%$Z_M~&t zDcH}`h}Im8B{nE5F&Il?FsBU|4;VqYJ+Q-0#tv&tD0Fw}{d34qPy|-2qlO&c{|QEi z!3Laaf?l5>zyMC(JV0{_9cdQ*!%W*G>!wuwinhMis+v`@!dj)3m;uHu-ox_x%*g!$ zz#Ry&24MykeFNw!wG2m;E?BC)KGjs+1kMr6TI!LquTGPf1ki)s-f_P;08!O9kjG@`lu~2Go=tdTl1DY3t z-c0R9YBXLjXJ_Fj1Tk8h%)qD+fYJaKQEza-H%J5_sJ@af(p_2q|2tCYl%`RGlI&z& zQw}iYBC#I|P^DVPkSNyERch}+VKoSu(V?}+5JS4AG~K|Zm6D$-sv^EOYUB3$#FqB?&bLjm?MLEUj?NyA6*k2k z&572n^KCaS&Yz37?xVcTaYs|4raqeY#+m4`c+I1;hvzQD9hHgWXA-$JxAnSSK4-Qk zme&w>G)DKv9Xl7090A91lt0ZUa?4QS=-Lvy;*Nu?L@)n=@29ni+^VdlZlRQib2KE& zxr6)>{wL>dp1W=0Hf~xq8k{-5+~A$tpD3t}*2D{1lSVGDFjm;GnA@1x;Jr=vSM{cY zylvJHbJoNywUOuJmPh7)|CXiu^E*1+WfIKIt+`zVmJGulp<+g09>E11r9nzA@}iM^ ziUeLp_;f{3f_$r-&jTD*}^+`Jh+D=J_l%%1S+Smn<6hFx*$Wa+=-?iV8TP z=-)Kp5jYg%u`vZ(jD%}%_qV0nGPo$1jsY34IGkG7m+~@z;h&4b!3Sig9cf^c!7HpM z-^t3Q^&Wr!v@n2xu@3Bx!X3YiS!I+!B-tL!uq!w=0fr$(3kVOx*i8}@gC&Mk;dHOZPc3A!Q zcqqA}1o=sRh%vx2K6{Xs0$S24(#OJ|li-~lfJ#jp3R{n=2_YrQngh_QKs?lKHKcBX zo`cp7J0Ri}K=Y~@ng#u|A+-{w4PWC0{)*vw!zEr50S!Jc8e~w~DS#6q!yAkh%uRV< ztXUIOQk(+Nu55JB&?y7o6#mL80sc-Po_Y`eJV?K96l|P~=BM*M>l#}%HASx&#sp89 z_dovliN|+($lZ@jq;D*3>Fqo!#DHa5d@S?bu@$i7@NW-(r8xa2- zH4hPJ6GoijZWqJ&nXu#}O3G$>X3OJ-!i1}2+2xJ9ygzeQ&g$WDXf0eRZi=3sZ(l6l zK6^Of$ek;XJ2oymYT}NXNZ>=qmfL17x9T%2gWQrkoDRhud20hPk@04c5!dkT)b~mQ8BSq|H_KMKgBxyni6O%f#=rVA3DV zY?B$LOl_&sLSK(c(Au%7ONGu^hqWFp(N>Q5I;K_fXgo}(vQvZ@da(xg#92xPk^Gu4 z8fu;5`)u4& zwrr`4TPi=PsESqZidXE8?Ku&zIGL!ZeErfZmtu807rGWJ_9aS66Gf%r9oMcTO3M>$H z_}c|`XU;$BomSgzJ(nxHO=V~WX{y+|%g05EaT0X0HV~{cb_m|5=vEYDLojTOri6V4 zbDGErZAQAZotm($2>xG;4f$==c&Yw<5k=k$m@V;ZlrZE#TzL2JVAyV z7o?oy9In}mS9irqTjKVXn4v{xXi-8t9{ygii}SaAFVKpNHjhpW4Euee_WJ(yyq2up z`vPwzwbJ`1Ac9QU(}tP-J>}p%p-{gv7vfu(|CiRjsISKD6_Gu0`<9qt%L87NET5`G zN?t&hc@fJ8d5ZjTkwcD|V{(0pbtQ)SnR$?Z*WZDy)v#6LCv< z%)2#i*?NaFSuhbxxb7I$n->humx)I9EX!Gi?X2* zyJJXiMbu1wD*epR#cM+Fzr)irb@8$|oRurmrM8u+{7Q!Vg(6C4tqb3NRY1_{!aeH> zxGM-XCZ>#$HScgiBd+L*X^6XGhQ{1-;Ta&$!fB&mWmkFA=JQzu7Qv>ri$!A+?5u3B zRzH88T8>n$(d~GKzAOO`B1k6LJ|>n8y@R z*gMaD!i1SKlaO>^L7Qspqr=7xhKUj;t?C5ogy!(fV!O|Qv>~lxJB)`aMKJyF%=jf z5>F1F;KkAfdFnU;r?;M%DcRJk*aaw8*@d7smr_NiR(Vi%qsw4BQ+L_!i8LqOj1}?V zhpIRy6G^6;exNKzBLR9 z|47UBq5t$B>G+3z^s74%`9mK0+giFQc_gJ9C&pT`!{p?Lxx0rPQ(XSD<7jYH@QDJ; zC`ilbR-j5Eb0$Me061j}HrVLDK=&DY%@}R?$uOaIo^ly;{WcAcZ7&QA^^b1bB01Ed z(J%4e(d}GJ3VXQvte^$iY~GN_EP4I_t&@IqtY3w5k+dss6B@2U0)4xe1Cx9@+#2^Z zE_+(yo|fqEFLW+?_Ach_n>n&lP&gL~pNJPVEf;Nx7j2o}@D9IN)U{a9J##EkvLR+D z{KyW8*`27Vo9T&JH^mK=iIUo=J6_U^dx@eFXnhj!t9kO4xja%nUwmuRqX|p#vZW$! zsfZYtEVT)9N%+()vo~S)go|$3y$MJ8vZE^QsEX|W(9xK1mV^)9a#lrc^MkkQci(^W z&@E?mv~>RBt%f~mH!wW;@U~-I)Sv2JvN1Djx-M%c#A|G_&^9R; ze6;W5iE%7S^6*z(atKwCI-k}mQ<=*y{LF6!3mYK~$RRf?kHo4x%%`8waOTtQLyY8> z`$d{j`UKoDA&(-XZ4G^-AzwBYrSJE-{*greTjDPT6_F}xFDX7GW5{>;umGFc#98>u zfU3|E9vQ(-WjHU2e@#UkY!(8+5z3PC&j!YVzLD|alqy#$K%PRJH2p4ZC}YC7NglIg zAlEikze}LW^t%#&VU2dzBbQH8gRyz}DQbL7D8P>kCU?P)?aSA`93GB#{mj!kYfV`4 zmMz6`OEFeS>4w*fUnzdI^p4I_kbg&SEzMswI-EJT4Y-=?gxw)8f3AJ*E7u;mW=-ml zcbo41qdtd;)`eR>zhl5}rkq=FcDigCQZ{imOv)x#QAL(ASAcB(|@549?QLxX1cWk^@!TK#vQp!U$)tRX-ZMk=6e~s&dfl4Ru1IG&V zbd~7yw{+LJtEM!33ylWh+w*KhnMpPwK*inDdSK=X0~}*uP19KUtHx>WPaMpl2ktTf z*wDVu{~I1rGc+s1Rnwb*aXP@_BL+pkR(BE}=o&`XPyLlP3pA*dAy;O8wO6M=hC1*} zMpw;k1bhc~de8X45o6P$SN2oUpkCRErj}OEjg1#B$rU@rvjI55K-2_=B5!1^Y8qck z)g&{VKLc=L>4j@=HwGzABZ0T?1UwhyUG%T9j1b%CYi1icB?CQ0GRaSqOft-iF}$;R29KRNRG9T(^6l^uNlu(C79Frd9{Je&kr(#)XmJ=Y7 zP8A%xn{&G74u?apPrfpVXdR6=8t2c%8+U)`*u!|$8$7lHW!sS~FRPDP|CH4-0FUhc ze&~6YkSl(Ogt;s{CNCDrmO{4Tf5c4_^@23wpR>)$R>wbDT-J(DD{a=jGMoIrQ8Pr* z(ImAdQU+`CIYlI~r!7747r6TwF(SouF~j{+x!1V&n`iEFI3?km;wX$6JhE<)8dVQk z)bx9mWEt?@x*3BUhbzO20T$$GY%dmc--RpXTZT846_w%BpnuD#Vp%A6v^960R@|CH z#pl@JV$8N*2L`irt8n%*Q1Php0I)jz=^HFQ5fjig5f}!pgi9WRd
oRVYQC|hX} z;0qhm#8C6FA6sTninbFEj|~L|veJ9HPx7~_XJEdEAOvvQBQ1k^8<;SBIUD0S8y9oDGY1m}`yU;D;rLu*cxcH`tNv;TKe=S6N!ase zdS#sPOFS*mMDwmAl1@`5nJKy-c0LZ^9y`!276XT{?dt!4B%KUuOA~C8z_Sf2ZXKAW ztfUeA<}?q6OHT|JZN8H|{6)EjfjuoFfpvRr4MW;9vY?8!H71P>rilX>!g1dPh!CS* z2KCBYU4MyV38z4xEok2agcYd5h$c+Z7MYH%kG5{Rd^vLkk@68T#VGWfv;x3SVb?rk z64_!%U$zP?d+Xb~mQgsO2;`LTU=D$ILHVo?T6T*niSA8SbA6&YMq z2U|9+O4i@aeA8QrRg9f6m9&oK9Ws@)m@8BD6);mV<>NB?^{Uz6bb@|NrI5zGrZKxI zuk`!4U4%7RJCfA9%nSsnj-U!8{2 zqH9ldJCgaqxNA=gTDFH5fm*>PM^}-ogc1>QjY3<})@kCjGBj~Swe|R0eH=+TTA1cu z1v;4KU**1|>BQhJqsxXCWV@sYBSfkWcD(69p$${-Gk`w;A9uD5TXy3jU=#rykZ}(8 zWCe7s<0^cn2i;?^Dqx`ynFWbcSzmRCYMovTS2Yc=o%T=x45BCNK@+&s%;=>{l{40oBENptAI{cKp&s`%efmZCxhcjE$J zQpn{NhaU}Zj(jO{IdVQ$+_vc2I&&ydzh$|8cf5Z0JNjGo-Lrw&r^9@>K3p2JSH}#; zKY(N4aYhIz+-h)QI0P4TaN2>h6QRuaOsJW?=Rd^7`Wn}Org6=xk<~rqzWyQi)10LM z8>XrM#NOONMQD;G5Aax|lTt?^qfr@NXn zkxYI31A*g=9NtAXooVgLAR8SpKYVu4x=Fr%a!y#Z(q=!^_G=EdGt`iQ5Qri(K$%kj1*-Q>QoW~KoU@{UG`DA{ zc{E`FjVK2ko%y#?E~U^|(M~4>2ibxkp*kQA+Vdp4G_ZYHvON>%2m7OVI5a>RZ)vbj z{V%^AI#GtIvChYcj|Qj_YLwY#V5C#+-m?<}7hwDR{|2S#g6+h%b+Kn6S{-o11ng8-LSDy)2FFGq` zb%_Gcasl>a)FcYZ69r}1nc=R*Mr^lj)`C5yj>2&9E&HZ~r&Psg^R{^1u7%Ec-Cl&O zsoNi`JrFB6NIO4VXqK_zEQ_lcG4_E~4H& zCzhc4b$V-0vbUDZ=L#xjjwXsW!-}|Kw*JxO7cS4)!>5?^y^fR`eX|{Np_siaW;pWB6G&uj0v@CtWHzAEgra5V5bMi@k$#Q;eJij(_X)(WbIe%L` zf7|@PLfc~go)?bJ9GLA+*z@MvXAwiSQ04<|%-Kw4`E^5683DdDv-=i2Af4IIhTeoU zik`MiHGgzAAqFV)xbxcbM3Z8)=O0)ih-s$HC(}A{k^~Mx_Z=O)y|M=f zGa$g12?|N#QE&jE6Z&CCwy@{$@6!D95wy&l7N&<6Oe(6!9ZQI`BXT>r>Q(AIZJxHM zrSbI<#F`b%>;Dds!GRs;$vbz=F`Z_dnlwLPoP7E?C(Ua^k-fBA4UWDhX!>JneyC7= zG`8WCU9pD9WsDtyg$Zj`Hs7{sEBLImUQJuEEysMt`n)x5TaMkg7q8StO!WajNS0R# zp3C$cr`oeJ<{H}t$qIVVlOXO)N6Ey$5#UfJ6Ou9}hz3-CJq5z5AeUZ|-DUMsHHN0V zSz;d#O^i+j#u;kltJG zJp;9SPlw$;PLW)-MHeN6G9(K{pd%HPNAb(J z6|^By-=ykG8IHNrW5y}oxdg|$KwLBZ!u?cytzi0 zi8*K4TogAKg^f$*jghJ4x}9lcP%j`AhPfnVi$u9sXZ32%?|Eaf#sPybcNg6*B2rS`)gJh75nxSU%V z&#jDfL_JHnt%#S_afDyaYxtET=jFp!56=yK=-Bv4Mx*D^=!cHOpIWJ8to`skl#b;! z{08PpM-jF%8FN=G&KO$U9%+bn%-f>V3tJa_3#Vc^olBOkM2>6LOq-jGx%Yy-n6aDI zw(d6W=1ymKjs9J8QFn>)-HjHyuHAtA9~<11mKeJ$tv}vqM*fd$jNL8fA2*sQzs=g? z(f@d7VNb69y<8)%v!ng3po!^dKNXfXn3LN1N4uec4m1S*we-kt8d1oo0nskAMs|;^ zR17o_1umPAm?{CHU{Hv{uNkV+gDB`zoFG)s>4-gThhaAR)3iPV*^?DYBO91}<$%(g zYzAu%RY9&2w1=aAM3l4$R>4O8WkYaWPLURnBRFfkWmm};+?LJfvydCXaRK&G@sSiE z5u9umau~vPp;i{cl8MAaMe0C=+;z*Ry=CnFBugSY&X=lJsq=ZVpsqPmdq2gmq=?J; zjcbbGXR0J@=VoPoz_tJXXMQHlSxE4IRY`3o37+F$(yMoVr&%rh;)o2BmDV3!uyOmKQYd}C!kP%5TkYI{y(3B|8|_hI&?pS z>Yqh~Etpg$Sl54dqA#~2ll&AbLWG&-f(YMK!4sn-m$OJ~!%W4r(>s-`WCq)n9I+L% zdRlBlD$;)OUP!9#kT)egeAcEM&CR}n;pXv)v%`S_h!#1BS;)Q=C5JSX4D|67rWKmX zl{p1%fL^g3%}qIF;}~lb&-btkWg1|K2Kg6udc_VDZOCKV1Ifm;o2)f)8+D7g25~!G zJ%=lV42DgF3B8gH7A*R@7nI+OVW2N*;Zl?wkHfYK`=i{-PO8fk8zvTaA}H>noULdL zf!`T-!8BEj#i0F4;vPzf(GV&%}QB?P6WKf1Co7OU%BEIko(pMd_&-FP>ja~EFUer@~pz1Q|e>Xr%+ezA~!YG&v| zcP;dJdBqq;B6njVw=|JkgdP^;%yy%Ph2DrBK?*`ko}O8RxArWvz2A|}TdtawT+b(^ zjnSRarg>pu`$7Y{*2C8&>Y9IW<&7(mF?6kDJGz#akFFI}#cFmgJn}bteza$yHCA?<_sA zgL^Nh13&&&-&u;&`xV9m_15>R%_#N$X5)cv=J&Ty+4pyBKiI1ONvj^$KiO(MRH*;S zuEIlk`VaDqxR!Fs`5PRu-#_J-)$o8{?I2E^sp0=@ji|nsJA(kMh{9Y}zcrP6frtxh zUN+tc`ZdjOGCk@8ePat8m&PZNVO|w^(VAh}0Mo!t==g}6dH~;aUK6af(ZM#Ta#PS3 znv?)&`Dn{DZF*2!&07H3gvCnwg>f*#XEms35)`9GVl=u8?L%A z9{UyR6-?oEm50gXZoZ1P3C#B6Rb1+h*}Rj?saKHIe+FWfYtU*bjaSpb?GZ037(^<5;rBIzGN z>`1JZEQq)}PLj8jPqt~{G=7ee%P-0QvMVpi#0!HXaEQl<##urm zO+9nQSn>WuVOhBI+POJBZK<00)149i8+#L#)pG~J z?bnVb+~puq-WnLrAAe=?u2El+|I3Q{upVv@4NX6^-muPBy_K`*?zxc@X$pQZJJ2{+ZKmYlv9xZ;F zhe9!D<-PQR_kzS*eXnjmr@v{d>*+AwY%A)iw!Leq!u7kGi+freKh}GC8uEW!l27So zBjs$_guCzMTTtu0Y9n1YQ2u)@Cc5r0_Uy5~_ox|n|CTfEx0&$)3<~_o0_jQ?eA|-+ z2a}xGYBL5K^Q8QfK7QoK#vAn;AsOD&nz!gT&|TPf^fT2DwxFr>CaZL>X%DXGNAsch z%&Ha!Z2<13gT68GDqPX<2LmxXLo>n7ddMwi(!%>*2V}Aps`_5DF*672tSZUWlxUcR zVk*NH%FuaRGZm4QqXTCH zX^vL(g6?mphjP(vLaTDuGqjncuuSzyd%O3yqd2U=CzbqCwbtYr%xJQ*xf#7;lYJku zWv(;nLra^@DND^$Ej1%8HG;%9OMDC5t?125`cZs4!GfBDBr%XXCfnNz^Yznssp2z~ zJWI*%QPPiOt>GFU($MB4&dl1P`{w@cEW4OcLj%;p?!IF&|hP0p;tfiW{5V%|?hqXeGUh@Gw&pXJ>LVMW5!J_#zLTKFSYRJQ zWF%D4XToEdHl4EI&g5Xtd`nsNOcCYbzNy4KuZI~=D6GKKz{Ws{$FlZ_kRUapeVTy6 zbx_>tL5e6QqsFNM>IiLpK%enV&lYU9ZJnw+?i&Unhj8IN7ziEnoh7et`eX$5F#x;D zz6r3!_D&gEP>=a9{F?CUR73Wf^i;ScP7IwRj2{Z3?v%b}>jCk5sPteT>r2{I^fv&@n%4W%CHOhRD9b!Fx`c-R9$RObuabtE?#{ z!bNkDHb2DukcpDAk~yjH^tF6B~%MD83%Si9yxZLwp?h;uGj4 z^AHyQZ|bk%d~j@34p%`dRk8$vV~lXJI2MwXI3Kh>GA@5x>L}G_>QM)CDV7|{oG}%V z_#{2qtYpD;$v-F=2u!eHWjxRicQNMPhy9`Qj7mBogb!(B4(Qv<>{EKOjh56)v@Fy^ z7XFGeO7;UfFqLGb7ymmv_Y#rGpW*_1mAk@th)=kRmt9qH7h=vmy5!o4O)fF_u7$%3 zyJGnVmmG%@1x1M>?{ZOdyr?uGn*C zC%(Qj%>VIT*4&QuNU!bf!BhDBHPva2!fYK*oox?1P?xN9pIyqvcAy7?05B6pP}O3K5=@R>+Y z zx+Z61@oQA^1te?O@cc}4Lvtbi0g8Q}5Y0u@Bt*km+;fw0vvt>cDNPm)BWV(l#q>e*9+QaSW>{hP?7@n2dm>#tRuN9!&FRrCD{Q+ zOV2(@mG(8BsrVNLaPs0>#~l36Gw!k{-lt&r7h{{4D-*V0q`a>j$3P)fCZQJ*rPlaX z*=aAB;$JV5at+X#%M_w8Fe_%Jk5mfy@7p|@n+f?aUS0*<|A^rJGF}CfLm%J~l{6WR zZg}96xh`5be`;>;V!@*zSH<4f9j`bdzc+t+v3TdKGtss;)^_Bj;M|_&g2s43<6=S6 zqN_P(IUPHB8tG>%_Coq#PulmM$YbQc-`82k-K^m$t+RHv>u+x5k!Ik2S`2JKd>a>7 z5J=@C+k(g@y{47}#VLPC{VcL=sy@CusV$FG?0u#N`OOys-INVKD~A5!o(%s2U`r8j zd|rH#`ZC)^$@s!_34lF9<=IpbfqmtH$p*@&9jHJrlqH(+T|5Hlk}TLg5GW9TRi(u2%JvIWq8(V83S<&*#CIzNoLH3>SK{J)%rAiwQe@8G;U*E$?f7%Ae z?67rzSmLw+AgJKPcynm1nZ%@ZBAiY+j?t$p$)e?lROf2Kw?)}_<%|RHQDwxNsIu%A zYLJ~ah`39Nt2vdW=zdg@$z*HDd?~i6$g7AXm41A?RD`CF?SvHP(C{w_$_}7G24(pb z;i<*EMnuocua1DlZ=E^1VzAC03zx#V^Thnod6=C#mkeD9j2A=ryQgCtwonK?M8Lz& z0PKmZzMp(+U`M_FBYisHWy4o0uLsPc0Ud3PS9!~A+#jd;A_KHY$rZx6qf0K1A9- zB5X?qp;GU|dd3CGap1EVF0$zdwQnHy;rE+l?K)|uC_=+IA9n8q&PsZ;AX`Q0tEHm& zKJH_8v;VRX7=rwa54l?zbdlNsrY_W?XG+F_a}%Q%%IkaMR&IZEnCF{n>rWH@gVrk2~{%4*Je3;bXrVqG=5o@s$X2Qa4 z7>})jL@@WVeM8*7VbNX@GgN$P&X#*GNV3UyYdbgTZ*DSnK57uY?ro{>} z{zuBww&5#rGlFvPjeKY%@b=nU$nCrZE@R>(3(X-}2x(BJ90ACd+Vd)sH>^nT$kh7s zt|Bk}B!!@4Ae<*YM!&l#A=hc~DN2Z@$X}@aDqX!u2^lTrZ@OHkt3RjY2bA2P7QCtCN4Q53;{F8CY31C$dyziQ(6{Es>N$6U_G zoR$4|e9SpN=JGz~TtDZ^e!*4!oZIkouILwBU7V}?dk%Mzzxn4}!_T>D6!9_=tkNl2S#7mIlaKRGes=`z;uZ{{knaCKggp9Iz;Cviqd($M?SxE zdX@c7p4M&R^CM^hCz>&wlD#}v>P_Uj6P}XW_Hy2o$i-)CasDJHFKNUXnAlyEG}D!Z zv*#qObY;UQa+6dVA<<3NRVQ8Tb@Ih?Xd9<+PsAM=h_*-0y|EGM0~%Cu(%Xoh>608z z^Ol8zx1FnWne5~9`P@iVlEW!lKi__%d6h1cjc&m20KHsy2jl$w*OTbx;vs-KcrlBl1SlV4t}Us?dq1`2HfmPmvTg5~?O4dorRrdoj zow_Y=DwX$oFaStFveuhQ>PBKuPxtGdH{Gw_d;Q*P{0EE0NWryt^O5({=PBx!_@G?o zbY}Z+)D(4x;;1o-qdAq29;3-!HKrnW^_ZI6nK6dkHDg+G*Ny4nuJ##bjAO&@mTRp$ymuu=~yY0X~)Vq z-Pj&ZKUU5e_zKPleVh8a7A3OO+ESc<_RO` z{H8{M9BbhZanD_^rzoL;YnY_DMu2R3Tg5fMtr}~EJ{|B^$lDYQ_*TAvw@xx#%LghM zr?EDGXyIDD6x`eSHU;wg^zYL&HLsdiIorY|0DP7Yd8XK)5SZpYAyy2zLt&BS+#xqR zAp~aF{y<>T$FsvzJRcI9jmE2d(C7B>A{(0GSsy>?_RO@pf#o7u}d@s{7p{@2Ctux|J!?<|0`Bg5BNub<;@u)%=WAM*Mq*-(J} zdfO4@tBownia~*QbLsY>DR+pS2>5(~Ir5$T0rv=e;}k#RX4{$%LzOV&4mBIwn%Ulw zp^NNgpW7c|`+0w028PNT;n_YhULRlwIC`dVMAB2KeA%Z+MT1VYWaMYOAs0W(`$G~l0Rtj2A-8zl zsg+EZ1_oVM`+A11_6|#?q0y1cqa&`%XGYFRy5aV8hT|vPVP8lx2Hk?lyWGO0DCq_M zwXj#EaEa}a-=7MFf?`)o3m5Q+%`$y8 zdjc~pxs%qyweT%%m(TRP*wi{Z96EHhb)@}LFferK$mrR@OYRZZm1C1v50AVwKRVXa zIyZUowHI7RT%L2sB9VcCqx_Vc6UNWIcyv5?Jv2Qs+vb`Nxh|Y@Pq*K2@vSYv@c5|! z8PpRYaW?Y)Z;a2#Xj7lxHF|aYrOTm-`RVR3e`U%a>70A%T&QX2+_f9CSLbJsg$8GP zMlbh_ym+jyFLGn(Vn^$hp3$izQrSEUhQO)!|r8ax#^qj#jg+nqjJ%3Y94JA)Fg>O_wRT?0vdCn^$rZsv1gA)6`4Utp{_|4fRXZ z9PPA3PWJ}p{KyHAorlMLUSQDN4w=NdMZ(Td%E&|lpuNCf&kF7tzS*gcXu_e1rlS$# z!0=Gh(W8ftHML4wpBuzL>!&y1_3zV?4tO;Wi)K)w4NgWfa3G|B>+yo5m8nqDaH07i zFX>(j2T*=QD$crreR3#AINlTBc$P>G6b$Sa*6P8Y?g3ZGeIwv^c{$0l<8kDDm8A0o zeBl|tDCxX@B2Fb;nnyeB0#b-nAh^B2FrmM+jKntn5OQIy{6S!kYsy+JB- z1%kXkQ=OIK!clnTF?{!bfn<^TOi$U{76-O$r7P;#@Vd>pcy5z1Z!mik%-(fo-$R3C zvHKUcvL$s&Wik|RmQ-z&)Fn#lHcFZjB~7cIWJ%jn|CYV*PQ~qttpdj#({0mAf3l!v zv%G4vw0f(ga;vQ3uJtYJ=O(Sgy43y1N?9DKVya+Is*=(h7b9ENvUtUjwYfEKe9!5m z^-NrU<{_i~-GMg-;-v>y_r*(&J?K~*SZB^{7FFE2bopytCm8dBjf z6nio(5gw3L5VrZZkhy~kJhv9g@Z#uDT92ftd`yrq)-*F+cv2q>r{T1~fOFyUFi%bg zj7N3PpuiEPlON9b7+i+g%6JppO`I9nnChM-myHcmPHW^uT6<;ZrhweUASkj;0Y7M7 z9^U5@**R}$iXC(hHiDiN2(tW4Ff`8!NR6;w#pa0S)mOylYDR!#Xas(ezabS9o&-9n z7goA^7I=bt+{a6X468aTsog%`r#Qmzt0nams!#44UQyT&B?1nyfNWZ*!US2eQzPjF zS;dxA{-C7xyZx}@0Uf_vXa#`xX)>iEcF78zkV*wE&@Fkv?Q?m2yxSiR3g@8sMfel_ zkStOU4W>8CZz24$SqqjotA3Z5+qDcrA+HHw^4IIiXbJ^g!s(u zbkEQ(520qHBjw7V9;JW)^^xO)xN`t820Ixd0$5qVh+QmjJzns*MZS>@gOVXmwl8WGUm0ePpB5m?AdYB3}TJ($ywu7rNb?RSA97iWuuj>g)fr!F;pu%@=OIuspib9pjc> zNLm`6TsC@h^jj~a43tLq&GRWUWwtI32m|nj9c+k!0-TR+q*E{}(V%zLfZjz%X<$iE zw)>i;Mf#TG>1-(qIv?sIx9UI#!VyVQ!iQk zo^fwhT#nhU++(EFHp-2|I<6+bZTxkjJ0v;eaLa^0= zy)ewQvdNX~BJ6uWAR?^1?{%ul)Cs+Svd{;KQ$-dxh;86ExFXo}65r$UfTR!cuFPk+ z(w`yV+|9tBcnFe3YMWsSv>?Qcl-agCerNjj^v5Q4tEBAC#O;Zt?j`Z}`coEwNLh2{ zC3o#*=H)ryFgf$0imHCAc~NEN1?HC{Pf;yEe@De> zCNp|1X!uAwd9>~*tp5r5onO|c7W&lX^r?AbpBhf5^hwrs_vJt^42mU~MFKFzIQ5OJ zOhPVT=>ji;_6DM27FKuwtS16cj2BE9AY;;V3EHY;$TXIU^EQ^OI~xngWP~wDq!L%& z7Q#QkN}Fp}J2#X%wYw-#xPq{Z7q*j<+86Ku0BR7j#`g;52!L)v08$jMOS(XiWJE30 z$?Bafl!{%lwE@-@SqU=53sV3#4}apnhGdcYrLBKSy=k&Ofl@V8!8sb(QfU<^L$ALv z^w3=R=B1mLzBRPiw`nbi+iMfnx(#bX!rHKDwl8(Y9rba8Gv3f0*Y|9zRW|KnKQunn zx!C=?XWuvr^+gSFQ$vDje3+;4a!cHBAYRiE*B{QdbsoyLt%;jz5=<>o$Yg$&yKYeZ zBK+(RAOlj42KuCdmQ*CCh8$2QlxZNRB{?1B^dx7<EpjH-WHk3nrpuFpUhfRzYwbGbdT?MSliF0d;gu$X+X_kkA{q+v28!pnfalO|#k zm~V!%C8InHK*~W8XAvk_CgVM@bh)sHcRj|u1tjBd!=D&~gh)?w;gWN?E3U7KnKn%h z$Uh&~?~7H+`NMJjo|PjcA1|zq>uX{&a>enuo{cpl1lT+XV$dys{Q?8d zFNlT0FBQXTLU_oIkumJLPyitLM%8HHj;dugdh2*vXsaU!dx*3nrsJT(Rnd|uPg9m( z8iYCMM+jpQghaCKrXwV}44K(>k)(k7vEai5DO)BFS)a-ZWr2q((6X}BqE{A;M2jK{ zOk{jAy_6_|I!#cDlwJg=e+GZzFeI=UMJc^_Zrey%OP1(OlXH5u6E0YOR zZ7do$wI`Skpsq}vFJY=!nM;`JR<&_cbAoBvZ12icPTcHTDNUGwOT|qs3Fgq|$v&kn zZrz6!{RvaU>eu3?t_1V^X4e^|c4;PIs*LFprh}_{opWK5ffVRyd6D4gMHKY`D`L**EQK_r7{MCJ|`n2FOl?qHees{s6HH}<5%cE*gE9uCQvtgeP zWt$`XBLKROfF4K|sZR|1V+U8;*NjQS^NZbEhCM3>l7_uIa|dE=Nkbi$RjmlI&ZMCM z^H3U_OBz}*zb~eW^(PIDJIizC-6Z=qh`(sZeiEk$r*!+TKr73g_^hbn=zAI40q!&N z*%y@E2Tg@ClRmA7es^8jNuD$wd~(xG9(G2cN(?GUDq_)S$AF2 z#k&gNQNcVfeCgxxXq;1s_Lc`uIfKhWr_lmRFhH4e4to{zmza(-<_RoK*o0&N4!Oz?*AZ-ZfiV?~n0h0paB;c3{ z7cjYq$t6q%A(0J9!Vo^6!~~WPC7gyt(tuepED8rO&maO4-HMJ1aBK_nXTgo37Wts0 zAs#wsq3pLPM>yBzi)RK$T>X86Lznto!vkY|;QG8M;6Mmk3eMvM}ikbp;-zY!8H z<1>rT8Zc#fgG5Uw+fCstDfI_Jez&AX|FCwL#~1)f3l3s*aj1Q~|9!RakFkpmOb81@ zevr}3s$3Ho>EPnPo$dmsiytBth)do@-08v!0RJ`oi7J>z;Aghd^|H46J?pm9I~@gE zU?Z!zS@E5#cV52za;)sVz3=W#7Ps8*_()tgTv+UWs5fR!Xq5?lWl~?2HI)soR3|Mt z&(Cg7Gs)&OE8Ng&kZYbbm_S>cyLn;5yf6o>wY_)b-6N~ce|7bTFa6-9fBAB< z`pkoCiRudp^TmYW5}0_4O4H5-+19&seI>Md7;U|+TPEw0@U70p!N)Cj84laU{C(oE z;1_1TN8i(Y09fM%yXXJ6d{)cp6h86H{1Uh<@xvko!}ZMV%+Jv7x8t)G&I*{>vYdH@fDt8IEUx@sg)#zIhO+0@ ze%Mf>6s#m8@RZpBn7v;HlRK)@$iR19M5YzqtIASF2$?&%cF44Sk&YahrtHo01wYki zW1gJ5<=Xp1Mn{D<-l`Vg^SqUIaOS-$EpwWbnIPv*^hde^cQ{-1m~f?IC(M zSfrT#o-?kqP8$4jZpk9=@5S&83PXuLCeeq%q7;J{OC}zT{t%<(ni&UsF7KZVO+gkU z@q38ZD#Bsf6J);PI8zgaa_)e}6V& zdDVnMAlzLS2jZO5?}%1V<=c)Cc8H-=X%P+^%FHHg2Rk%JPt!}D30eVAqTp~vXZXXO~EVrLhC7H1{d)Q$EGGi4pLcLCgef8DK+Dh?#j8L~yz1!DbH*T27RV zz5u#sA@*043}n0F!Vp7a+Lpk11rPS7T)5T5S=-h_noGejr@1BsR3y{0t2D*CBE(_F zGuEVhR`90>3PRs`3=3Hkf=sWWjF}-PSD;|SF>kAL6koNM0vD-zIuBJVID^vh4n@5J zqp_e_&_=Z!v9T`bAV(~s3;K{<=^e`SC}QqWMh%df-q9#9G{nE> ztOH$CAI-%5EEq$$VI^0-5W#%gc(Cc=d%#7eC_f9Ps4<^%88yL{+mdS_2xU~ss8PW$ z%0zXjI^}$cjl7{CA2ht#p8IXL?t;R_kWfh6G?NR$8ed&7Kh~ckYKD+ZMtLvL%2)mN z!klln-e84q>mw%2&TcaU~1pubY?YTa28Wqcg#nrEWJcbXjwQn5Mi| zN0CLZH+U7jeiApYMBvNO_Yj8 z=$OQ_2AMwOFs-y>CDRFZls1HdlaK@wH)bvJxZWGlVh?8y(w1X9htQm9o@E6J61@7N`+kLM!r{Z~nla?Y1=UQeit1;1+$4Yz>_1w%HCA$}WJu)&`d_QP=k z?Bi1;zZ~<8TC(l>EXk7b#}YPXf9$hB5ygcm>B7hXeWGmq`7`YJ-(d1@A&HnSXHCix z3tKM_3qePsoe@jgf6sHx5b`bymWYaFBdRX;3k`{i3!_21i#>a0;9_4dIp+{5+R>_c zf<(zm^c=A&4LXa6V0xkm?oDjYk}4od3^^~rOv8DOtk9AS zeK$OO5Z%Ybu_tLfzJSO(t-^i$>K|c(#v$kzPlLE1Ko#VM2 z7*^XJlc1m2>~Fq)^YuJxu~aayb)fdo)azGA6V_{>9-~7M)MJ}{v47K1zTv1&II4f< zs0HUCz0|(xD2|sLOgQS7bemTDa!bN`0Mx_cvX#R4-b0B!t;yoHCF7RfvJ6qJE5~A& z)}C9dyniKbJ+-bsol+SM`?u_hec%54QukKro;!25=k7#rM`P#LI@g@{Ps9%mBumdP zo!c~9-yFC(usjv(UDGTLtecN)?E@44cjuP+m-`du$}O{fxqEqT*}EcqzdAPl#|LA3 zR%_#qLq9dQLcDBYU5cg5g^Po~`rHJ;v!786ygJijVRd-5FYahtH@9zbDR5EeE){I^4BQ?^!bHY z4pZd6fk4dOjUaX4Kv^T%a5(cyh-l#86kBY4?KgIvQ$q>zj_DN2V5bg^OF zujDDu!62f?4HoH?!CXX(yEtfydbga-fd(({zk_n&FERNmOwf%X`~@ce9uqXbklh*t z@(}^}H&6h_@yIx%5EcF-DUeYoQq_R3m1Ub2>SVbl%QS~9)8s%x`6!0)bAWjsMcEV) zWvL<}fj)tg5~a6G?^NEdT$zqPcR6Xl0&z+zo#yD)k&#Vv353{n8kcnOg0_Ud{c{LB zw#Ex;VlTv-h7z@x<26^}SFXjaFRbfd{M<-^5hLBzrrr@R>Vvi<>Pf57IJXX-dSFZ* zyntO*C-gNS!XOjtk2ju5)SQlkj5ezP zad=&SWb;TT`JSKY+kc(X0Kl(5FG|BY32gJKxXQ<^9qanTn+HJ-1D-$A*AQ6euRkwH z!&Va5{jus8AbxONU%y#ifrx$E_R!>590cj5(o}vS_QR-upY738KdxnaTB)DZvAyNg zhk8R#v-(4OVUJV&p;LqTRzq*8`opfmUWfW4hX(Q>>-uQ*N7aSB$JHMl*FYX@kUvBk zfXGu1oFRbSFho4J%O3XJZ86F#A_b4YMCWdcv!N9Ej*37C1i<5^++mY>BkhOD$Uqqa zQ;?ErcoDp)`67hj1b_lT->S>{_w)Jbr%lJrAZ2H11}Z z10182J*$L2pyKM(2ol_cUqB*hCder-u$CYl5ME#s_?ZBNo#TUuY7pTDGBdEG&*%cJ z!25*%2DK9~FXAF(K&91Ej)7q0;dn)dhzEyg+@~z64cda?5BU|%!q6F zTwmYFuxntjx9>$X2(#V^&=o^yj+phr4+4M|sJt`H`4v=$$adQ8V~5*`IR<%-yzb$& zE!jF}o93)GwjTEHV9B}U^?PT+GtepoeWaHn_JMA-AecPJdmpo#j0Xa~E_MXYva|53 z5n`D5;N%xYibnnTGXZRR#Bf{z6x?!pa-hc*xGojv{b=GMBnn>mD3H%!AZ=2@@lXbW zSTV%wUfE!?Ytm(fCsMwHGP>9a9u97S0`B&+k%FCH+Qmj{92!O z{9Jtw05$TjJ;2#wxmSq!anIu+Y^O~?o3wBU6Vy8;{fzrM58NM4G6Ugx!R#jdAyy`^ zQVrfwNt^b*5|M+tqGY}b#x6K;P0kh*u_2&ZAv+s&va^vGY)~i({~eoX2&B*HB;;LjnS;jsh8CS~t~@qq;3?rqzY`o)byy$+-UH z7I-5rym4XK6ytu%G(6N5#C2sDsSPpPcLr|{#;d#TcPEQZEg2q~EpRraq-=5M=emNe z^22N12NlWkOG~y*z2!TF%dU;$hD32gvbZT}Z%*i&Z)!dPHPU)+T|e+iQC;lyWKsL# zrA@|gYj}C@1{`m(*Cy=;la~4fQ@_~vP-k9x{bODEmbHA-TEAMcYKb52d%!<4))+A|Lt9_)=*4{g>y|45^D7(t<}s)jSm zD?`b$#;wYIF-x+t`H_yX8~>-pq|v6zD06`a zl(R$9xm@t;ATF2CfK)?b%Ls+41UR=2PVL}4iRhQK_!B5@k;I`%mE^2>Gyc{LdWyVg zIt0~6{v{fjDp70;sQJKgEIgP_fW;{|k;##tGZAo)Bp~GpdQ4EY5YT)epynyrvW?)B zAedsw0z!pTK>bU26O%u|DM&jh%zk3T6`VAa5X zO8i@p>2L;}rXQ)yG_zev(WYNe2KoOlDaS9U!v8^?N>Hc%PNkz&e@9_bK+$LD1a&&4 zS7}PX8BJHD%qmSQ=1MX-L#nKjwr*MqQfj=Hmdp3&n#lbj#KFDt#w*(zs7syH(bg3Q z>W?Yt-@1C>PaEHF+{SFGNT02OtWpKpREdGM#tyvK_-!!Wp-#a ze%JSwFGWFiZJ6BdU;Xn}e)P&VmZa*f*~XB)Uznoc_Q3I%l|QcB#%$_<3HlK3M&F92 zD9EmP$nF04pZk8~+s2YqLtRW-RG5IiYBkWJMT(1mMI3%C2~ zZM>!GD(J%1>J$aHwd!rWrH-5F!WH-qxW$II@%E^mQfZgmv8!vn>-4dds-7;~v^i30 zytCEh{!nB1Oasr`YD(2bARb+#ceXkR{k5kkxUF`4hPN~<~hgM%qmQN8@?WH zs{jtSH2{Yjf|ru&M@0brJ0);rucUmluynJu?2+LpU9f3~pE!W~L%>Y~cc{18Q##D) zDMxV%VSt-yjh~qyhu!H@DlD6&DaO1M`B=T@p=00jzK2EUA@C8HpWXOKqqS=n2jE8< z%q2fJ*q0lUhN_rhwR7!4Tz`5~-?^<(o3)F*z)Cf2f@ylUCKmeR=aNiQY7fPflhyEl E0Ibr&$p8QV diff --git a/flows/updaters/__pycache__/update_ncbi_datasets.cpython-312.pyc b/flows/updaters/__pycache__/update_ncbi_datasets.cpython-312.pyc deleted file mode 100644 index 9e38ba47dbb96f1694a348996bbc26826921a5b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9028 zcmb7JYj7Lab-s(;1+V~!H$X`wt)*yEgdP;%4_T5;iJ~GYqF&Ssv5Nt*O9~Vo^e!lg z4B#-Tr=dD(Or>o^*Y=cl)3GvEXOw@O=^xou8)f>Vput$shMBk%&$OQYfh2eA&_CL9 zcCi2?(eY$4xO?|L&b{~CyXX7P*?+I9vJkk=z4kxR*Xs!RJwE7{t(bZI6N-=1u1wo#0+sdNK;xF6J(%_j2Ay$N&=q$F-D(?S%oFzpy>VaA2hXP0NzgBtA+0tL(IRm0v)rr|ps1h9Fdci5KAGHXs*Vzw>iwSPfW!!RqV9*K$5A^fG-znCN z8U^3$OsRD(wC40D7^hYp$6st;9t(R~gzAcM>cr=Sbr+fmk+yIIq`2Vk6z#%#eN=dM zi&YT8JL@Jt0 zgkzAm?u$jmL~6ezC8YqZSj2cV6%sE(k;06|l9v=F6_zgqOp0@$f4Dz1cw+zl;Qr9D z{^9Q^)}yC}kDVIU9xNx09vu!1_n$pDpqNf}XoV(091Ty#Qi^3FEXiUhERD&ESrT8J zj7p*qFep`8x!e&NjmAX9B26YjuTCaYq7W%68|O^S@W<_YJO`yWNScgM8ABQln5r;l z435@&P$JM7I&ByrbJQy=AsHskj2G2~Li9`D9@(BM9tqfdX!{m<$Dph23MH_DQ7{SS zG$U|h#tfSRiQ1KB1xw|D${5qeaid;az6p2FrmT7g z!M=8@@hZJV`L3_-EjZA)Cc*g*tFMY8*M6LG=xqhp+ESm?14k<6;Z`Mj(iF5|%2HD< zU1pjRy!w&sA{i5m;MVKP_ijXOkFZikcg=p8K1VK54J5TrAK`f-c{5yv zj;hmKr9RT!6@x%B+-uw=>JriV2i6TKOnXmPU%-d=?hb|R=xFV0g*D#os^&c3!T>;etC+A-7G+uqpCoT+6 zO^B=ZM3UFU@i@>bcP1<)qKUDc{IsiuKY1ZKF#%87Z1`0tA5I8-B$-G>6O&NhvRbd& zDSlK;Mb2YGeYSjYvLL7Kj1-0T&(`syNr_*z@Tnw^#s(Hkg%!&3Ihu&oW=-&=Z|Ym> z6sS^6QCWzNMZv0_4#y@%wWs6+DPX@uP(vb;6hy@+rvy=w6m~+2f-z;~loXv%Y$qmR zHF2$z!XziegxV4mrPSTv<7Rbg=^qQdRJ91&Gx9k77(WHJ>}cS_-K389H_>bzo9 zOJv1@JwuTs4502+R8UNjBhl&qaCJ36$YCrlqe&T^QCKb-;PtVD9Jk_a%abm zNOVMuiQ8qE!w%H(4uk@E$K(V&U?VLPN<>DYA$<>8CZ-gpx-+4XsgOF?s30AOQP51u z3DENl$=lb>zC1ISH@W6ce{8CGWbnH!v&KRTaeHPCt+=XZ*}TL1M&@ef+7DLTwevr? zIeBv|Ti2a)_hfB71tY1d&f9!>n>TOspg~UpNUgKX7|sF}3U(m^J3D zuKU*7C2Q>h^|7@+@7T03`hNV~_>!X|+jHPv_kZmE)$XO9v!6MA^V~wy``h2$et%Qf z(x$H5rk-qlZ?^ZyJ@MX|?68pai_6Z@ymQ@W?&q@gd+&7ra`%V3@9fBKJdtys%-T*C z7&CX8TCusZ-nQF~x2qQ|clz#Z&ANw{ZAVvJzPadvXW7+w)0MB?^!G>JJaWIbbE&rT zliKdNfxOpudi~HtmelopNfUR|9|}8&dtG4{vH7!% z?@I?1WnGP5e_ucd@5@@^>u;8%2l})^!`zA-9WS9 zUwnN7wd`Go8S-~)*?~sxZoLWf&D?&Lz1!y4PuPDYEaacST@D1OxO+v#-Bbxp( zF_cj>Z5S`1B=js^x@FgGc7_(H3?mpatU$j`GRBliZwctZ=$0GN!$8u;G@WMkGQftr zhz&Y6k1=WUdz8Rkr@&_8T-7TLbngj#wxliNDs9zUXm?-_h|k9XpDp7$d97%2kpKpl z(XwjoR^uh?D|gjb4Crf5n-Phhz~D&Aqqh~DYfG)foGTPyaCsgsRigW83+d=*l2Nmg82mBwOhU74S`wdb)tO8YWL`w%ZP3ay1 zC<3-031!*3V`2g*GaZw`uqG5L zD&xBX7F9Y1cf}YvKbg27PjAy~BAUt2s3eNhz=I&%%PqTN$w)XR?`hFmABCRs zc1Zr4d^JN>T>iON-)@+Xyw$wW|B0(tCXdKSZh%y7m}mXq7+17N3fJtOQpkD@Lf!XFj3WxFa^6;zO;R; z{bTzlMW7Q7C(A0nt++R4`PQ7fZRRlA{|op1O-uf!oWD8e3d|hJyWE%n)IDMhY&}?V zi+wga?^)*R7F-W}HS>pVT0Zf0%!Q-Bx8h$BTQ}flm6PJ^kE(m$_>) zz{6cHH_&FfTjv3!gt$*^g| zOjQcs3-tm6N$K?&HeoBH5}M6&O`1*97hs)|x9k}TMno*Zl%^pyr&&M&qdxW(D$TCp zpY<25J{SHeOwxkyu_(kYcx0dGMI0Pxv8WyUA@Z;C$mj`z8CyE`C;Oayp{J{ZLociWh`VuWA5hebuOE?B*34M#yd6C!LAl1z%_oy&d zDsktV^)Sr$U=FXqBq~Kn|kBAbV z9Oc!+$ww2<&PZLv3H8ucCj&4W_6#7jI8^&;wco}SbD0xAlE~)rDG|diz zd%{mU`14_zj{(-fB*F=P+N{lLCm-MxLlR;^@$dx^mM1HW>dTlQp9sfA6)F^S3gST# z^f=IqP))6h5&`2=Z=-Y`>(Rcere8)nfcvaNagAoepKw<~MVC>Mpm75J@>`J1kT13n z*7T#pKRo<5h8w2qrW=myjs-5~+;V&P*UZk(xT=!9&YQV6ELSaaZP%(aOJ-D^m`_dB zS<}W9U*p2~?Lf}A>#F55Ut6|)@127=-_WcDd>FHP?%>C!x)u9|`BMvh3tMl77SAmn zzH>QyEO;-St$8_Tev1sZC+eilwWfeU5}`;Mg#=9% z@CeN#G(kKK)%gTPLT0XV`Lmiuf=BII%Ob7CE4~mxL3RSj3 zYx3?=ONiJp-~rIU^Xalhdl@W825b)pbZo5%SDHz)w<_(F3B3r_S16G?h&&nAV&+fw zg(#C*FeCXLFEMgmD-aXp^c7{ev? zrpT+iqJ}XfF%EIfZ?awRy2xvqs`9q3WoR_r@4P1(_ew-^Mj`?=M4DM6@B2 zt}C#M{3Ts}yJzW9N-(@`E-M(EB^V7AJYMp+agN%RE=AD6b@Hw0AyRAc5Peq9vH%!o z4c=02U*rhPkYbQVkc^iu+&on=O_2Fp>IS*`EoTc(i(x2Az(D~}VU;6d~0HID%0{jZag|70k&?Wpcibiq;p58+E{n2+vZw~(~CcOqF zY7#IiCcw}0vFM0Gg9Hr}8HmYAicyQeDcpJSG6W+wD)|@M2UDWq<8#0(W&oAE2eZ0pQKK?!Kdb$x)wkG-S;UUz*k`U6pb7-)rixr$4M` z`}<4^vwt`|rcj##7AX!@QW6rys2UepaUt&*N~Q)O3M#09S#bE0sbmLulB#p^mL85S)VMaYe)E7-V$|&Wb z{HXw|rulW>q-*I-ka`3DG9C+{A+^M2f1~GW&l`KL?wQ}RY;9bzZv?I|e)Hu;a4`?& z?1!@EL!WW>H)^le&IviLW@hgrqk*k?XeJ);33Kk|A0D0=oZVM2(yT9Uw!C4#YM&py z*)VHgHn%*ou?Zj0#AIv$MXAZ)4xV<;(uGcM$-fozy%elJIQLULDTyizscLkPQ zftw9ESIf*Hq$R$2%aV0dmf7^x7mdVL4Ne>Dd*JYbf5!S&WjDt^+Z&)i zYNjAntQslShzTlQztFwGclGgQNGi=2x*KWwRw+&cPK-g?QW47m8?T$K$`5pi2Wof> z=tY{93<8^=@MGTFG{V1Ne%o-1(_=prLDyr-60i;dox-Ss>fTM)Y42AHQpOhK;`mLZ zWfVxZd|ni$9V($LS4>wu(NLkMz@;=Kk^y?!gD?Y!tp!5x$w?_9wnT+#-!p|N_&hRQ z|4eZl<1s)DpyFaQIVr;=)!>G+SOMN?v4U1u?XxX>U)2UuOcUX$STZaO0=OZ|RnM4& zL{MkYe}|>&K}V-n6Ad3O!l_C584~SXE9fu%1N8e9{ACQWz_GOx_x78?J1=CzuP(c! znL`gu_7ylhTW?>=xn7(#u9)3$e4zKio`GMSy7&A;6XVA)$MY?B zx|h6%{?}pVECn}l`e&>1R;OwZ6>}&Q4};$wlD5KV(_1xnFf7Yrd?Yr-$Hi2*=r<+f zPcp3*4BugJDnJeWoCNBnzks}IK;Zky6n=D)@C-^nfkg43$%rH+u|?EBzsQ=cs8zp% zw17nhqnQxkLlb^R4@KknnM>8Igm^6BnO1z|#`1Y!3WECmTf)$QgxD!zP+CI!CLy(# z5DFwDmx`m@b21f;$!aJ~i=34_A%zC7QK7+WQ}z|kyhP;Rk*-I^KFa^-GPQ~FKRiScgK;*zaQt@XGSyo!lg(T6KL10mk@DxAo&pW` zPaW<83wN*})f?yK>+ymKpWthTr-1EX?~U9eE94&2#L!nTKByYkn>P3b$Gac>hcy?xWn*vSHXlivpx}Qhr?SQ zSs{ng%mo9MZ6l0rcKTy_eV(a)VDxn*Q3Z~L!)ha3 j_hX0IW}F##=q2p>C8p`k&IRVJojIndu%0j*RK@>4qGQIZ diff --git a/flows/updaters/__pycache__/update_refseq_organelles.cpython-312.pyc b/flows/updaters/__pycache__/update_refseq_organelles.cpython-312.pyc deleted file mode 100644 index 3f8cf4037aa9f1724e81530c0acb4ed895861faa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13537 zcmbt)Yfv0nc4lT}*89!-DG~${C7|>`Qn%1!1_2UC2qZu~-AHO&R3=cU9w;-BM0GL7 z?Fk!AE38tuV}-WI+toW0VcMI%6>-Jzn~1OG2firJ=IXvQYVGIg}YjD<&!4 z`0we_O5Vhqc?)mlZM>a#@J`;vyZIu%m@oN}ZM2Fn6{`6%p?ZSl%YVR**6%g1j=TiHd|xIgo*e3K>ta0sz-N{noak1eL|~6d<)RG zV(TzY1K;)oHb>hGv=%LgtwZ|3g(CPB z3l6P6!6~=|`y|Wn`vg`UpV1be@bCu%6vVAUi$?hq`j2Runxp4v&ud>H;edx$j9ro0 zuq=okMzIK?fIKE#6~eN@P6Q*@6jt_0mpw+sdZuq+Y^bMeaHxA&u@0UeIeUI&>}=;q zuVNV*92^-N>3p-VTQLr|tA$2hnDEU8WyLb%6D48HCr(O=NffTk21J2}o@@bWO!7?# zV>3Q^N->e-q#!G{*_mL($B)U8F{wS;G9}A1QpfJy6Y`8V>>m$!!@-a@9GLP>My~D_ zg$YTx0#Q)#NrK0z=mNZAj);@Kun-Iiic$7mkA(Yp#Wg8}$9>_;oqoR{Nr6aMvH2uP z2#p8lhG%?X#XRXhD~NvlYExKCS+nhS`(#0}Ns(C~=$w_OBBEkZb8iVgQL(D&k$?<6 znr3FlLjn(7TE+vB88I>~_+`ZmX~`Fw0Ro2__ecCb8ONd9vlo8*;MWen{qQ>gzk~35 z4Suh~?~tO~y9a(iw|Ae%rZA^E2NdQ+&vAu0(>bItoo699)A<&>I1evQo$rUT^CuN% zxaX|G40equ%)sCq3e(-wrI>j5mN7{d6@w_qvtrnvY+vaImuf2%T8n5xX%TQ0$#i(H_M)OQ1kbFS#tZdL|r_EX_9|NTxDW|RZ zGA*U;8{VMxjO{@UBdMXuK8+k=p3z400re;PVaj8QHgrY8R|Qe#y1U!XoH^6>7Cf9B z;V_1Rw(f3kRDYr4V%w#!a5BEaZ&&EHuW&YCIrfNH02@Ll3W{Eu2?k_^2?}AwaAt5| zq<2_hkc;)RGc$sy=%W*XV9=uzU&qRN*b1^FVSSGK`sD8Ok|0XE{nGyS-TuHhFmam{ zkcHi_M!wy+_oUskGq_+tq$b8xh9Apuxa6IgQyf?v5ko$Cj8OMM`+MLgjYF_NJ+V1% z_1@@>zqxdM)z-YwvthI>9{Bxeyz|}fr=8`C&a|UA-g#qcaWsDAM`a&Y-K|RWEZ2Y3 z`=*-0!?EJm~y%>aW9}hCe&EdTcOx^lZ|4F3FyI4!ux{#UFtk z=SQ9{%u^1Szi;O$ZMA@-SZo&`SYzNJSU+Z7CnJxNC#K_>d#uI>^Gu8>#7!E<&=kBU zT2rg9#?ys(Mz4*co4UpNb(54|cd3q|=7H0uoiEqNT#VJ|uha8-(Gt^<5$kz_%JF}~ z+}7Q!XQ7pOb{ImMq^fF^;N014)kXC=^0!gX2f#FqvMWU+tBV-Qc1RzR!3#tAFTtQbVS zBh5s@lAy4`Kp3_y>=pl%gmMVyi{r#ztmM@8S9TS|i%^O?S9%|U1?o3Nl+%4{`o?s! z^3bZIW8vh6y?n8MiC)^ZRFkyqSm@etmM<}h;Wejcp*PLi*4f$=Tf4^AWeikl?VU5X z&#ag1OqJ|hX4gvEp6gkg;U7xc7Q54>HFw^={dS^zwbYZec@}#h$ml7XbFpdhO1w0F z<#t8f^3YVBE@^v;l`<5oH~jBs6eP(f{H(h1xQ1v9#XL=<=jM9ZxAQvKdAh5VxC?fh4yn4SOAW9SaJS_oe!eN5;d49kBD6Qp&g)|= zuSeV>(Koln^dPGZN1(#ZMi$T@Y1;;E6Zx%<11L;LF=w7Ldo0l-J+Q()zszM-Ip+)W zoT@{)U{;B86JjL9!NwR8{1K7oCIsLPSaL5B)ryfHAG;1BQlUEtrzv{UmRJnED8?X& z&0r)f!UhL0#3&}Mwqm(5>k9^8Rz->Mn8%>#0U?NU;w7LJd92QohMCL4oT9(#3(g7! zm`VjeTC-vR>h;4M6rX(W`{_nc+z_98=&H{csnT5;6J>I%wYRq|2U6Q# zzwb_MKfGv2I_n;qxC}#?>cmsf^-GL`!j>?~Ramxvg_zcT3rNsuq7hhzn#NwXJkvRW zo`1nJlR0Ps43{CImz_4{DYO*N5=cznERGd?UkF6-Iq@#G@P-%->`mWnkLe1jg2Jul zzDXa`i-DM4WnYbP!@rYj^=;TD1r*G`pjFe>R-ukV`|5lwIt!P>Sj@PI;H@i9t))n< z0_9N4lC4#w(dJ{(1+5h*hg#~EegyAJ^7L8?HvT^rev_;DjHTwR9$WOFw%K8~3ZfA9 z3tUK$eF%uQ?r~o@904T&YD@A)8-@XO2SDKS7p05M7x{Pg&9_~=_D=hJ8)o*;doFsS z#rP_hz3_JPrCp7RCD)OpSkH$6yo2bU62OSawJ4(4BXUEm!c;W|H4qGVtVCmpLy%Fd zn+7CyV~KGZEQYWz2*!epxbIp(o)X`{H+q@a3TD+-7z3Nd=p%N8gxW7GiE;?^m;j_i z(tz!ueI6(%#g-c-<{0<_@dA*(B6!?jS)t{X|3nd0@0&t2gbeO_ZgaJJYU;bP1PLdCX6(A6r*T_AmB);Ven-Y+XM0(RC18v4^h4 zw7q)iYRcXOjXnNW`V`PRi*C*Ra4y~4@>BE2W@uzS%w~%UM_svM!8;lRV?N#qIU^;Ev!E!u%;)edBBN8 z$ie(>eL#p!pb(Q$#FD3wD9qbMnt(Y#0d8dr;aVQhuZDezrq$0IKxC-mun2@%0qZGd z(AuS`a2Ze&y)3#xNPP=6Pvf7~A5GnyVyT!;Ea43W#amA3!v;4Jfx?KT-$0wQ&wy6d z`!Ji^sl9*~t^rbh3uXZNOI&B(qOQ>%bJVMeh~t6CS=GGW4as3*U2}dhB1vs|j`t3T zzXS8Fs(nkoiek>yQRu^BEz)d*0L(f-3#ij2v}nL7a$ODv#=$)S_8cGzD18I9e2*wb z#4Paifu$Gb6?Qrj2z!j1wykwM=K+9vkWpcaq!`pL#7XRS3IYlFi~vy$irTW9mkdSx z19&})bbkQ>KnAPp*3lmxO`9C)qLPKvX?HO|hNAL2ZSS`&8UT~7+qsmT`$^R!duzI~ z?!%h9H3|Rn-qp&sJZ=a+bT*`2)$6VX*d~bsk6fO#yJX4w(7iqFs#VK6+9CKoErx31t3}2Umu{iK~H#E&oe6cm@_~f zf?y#wp}L5|5B4L&n=uEmC0A;U8Dd6!pY6{M`D{tPKkNtkv_fAt>?;ql^0p?tv*Wut zhsPQHZWkC$v$DW}J8YKdO|+0CuK8qFQ!6(BKMCAi9xoV>==G7f6TYA%aDfTV7YuR( z8d9bQzT%k*PXN`z-O_ z0Ldc!q)#CL4Q6-6OYc0ruMpXhpM+j+lzp(kBapV+>1c;UoiM~W>=8*Ops-5998wAmFezEQJe zOfYwCDRbk(@d71Bjy)*)^yGspe|dCiD6wx@_t9&2-(L2oYWJ>ePt_d!Yyw(6oMOv2 zYPm$&2QZGsV=1;OZFLewqu3|F_CV{NxIjgORscvM5qXT*C;ptWfEPG51fTyo!RH{3 zbwMgb&oeZYcfC#L9BV+GeRwACNGXJ%HK9Jwg8I_SIYlsy`*q6`1818?e}FJ}L3fA& z5O#Lcg5vuEwgMP>VqHuZGZsRl(`bind3ZzK0=P;8K5vA&1+XP-0^^T#^QM1A%W(Lh zjc(o?GmEvLV3F0gJTY@lWLRPr(Hyg=n>c3Tt+?&N^v$r0yP7;PW6Y$r3M+ybu~0ec znQZyLcqVN5@oFQ2F_5hmVD{&I~wt zZU+2gTRdY>foqrv`oKBIqnj)zrEaRdI8HZaM2L)RlTvFWl+;4ZY z^@Fnw-E3my8aM|1GJHOEr@EbKML#TD=^KppHh-ss7sS9-fsX}va7zUIzM$u#1jXnL ziOCVnT)24cB7dpH&q&We&+u>uH!%xhKP!ZFB1&MuI7I41LUZ7*cEcj8 z0twx5Av_n!+CaDo6_)s!DYGjaT z5V0_ClA@cK$}1r8-ytP2l2E>Bi-(iii`AW3u%)q-_-9axtxIQsX&0z}E~E5SPu#_K zs%}@MO~V@|%PsQ_^K(5@WLP)_{uY~Kp?kw@zg2moGJZ7CxMtqDyf0l^b?3tE3yGq{ z$gfIwq198lG`*r<-_x1e)49HN!E|zHWOdK^)VA|zD%I3~*n78k!S<7JuLA)E~(kDRWJ1<4zAgDZP;p;gv1+bw$_cteJcmn+q+Zk-RteWsrKH_%97`X zSKCKc8_$DnbQIL$hOJ_$G+|h??ab&{d*w#?j>PQp#9H}*bV&{1xr&4EwRZQd(>G2p z86Mf{o_eUVmKO|VY0k7#HrK-G|MeRqRZ{zmqV1JmxGFLX#9zPInScMav=^rJXH1v9 z$Hx4v+tz)Q{oC47OdmD(n2ouzRV=wpFTJE%@l=4QRvjQc@R8CB zELvBSx_Pwfm>Atn#pz2`#D2?)NI-9Z7~w=Bhaf@dV;khjV>D2x`5OAXqK@TUP;gRU zAT8z25WwQogWCKbh#)uB2Hd!%zntSNX5dZt@=DDJ$}opw}53@NHtsXy=CCA!@O1AuDy?0@h-%@B5ej>_HA-bSx@iK=<;#QCO2z2yg|X2-YfeA_!ysg zd(1xFqNX>8c}|Eiz-a)#<{d!UwwXpv$8t9e;EPHLDA?i&fUn#AYQ6l|iBm6qUHK-y za>^(R$TOV-6>L<)yHQY$$Dx+cO& zxy1S4tTQwYM|wP$@Bih)&`l~p!)O{xB#m(Ll)$M(*ap`OP|qt%SMj(q5A<>a1e+R9 z2jNUr!)du!1UgDMCuHdP%6Xek8^JfBL=_@_6Fvl;*1-+ThQ z(ZPA^?8HRiI{32S-hwPdjRf06Qx0vR)U^g*!emsTuZbT*6L{AsEL(k z;d^2P{2$R8n131IcYal}e1q%asRj^=(14um5=R9&S{6reBqmu1&7eK56b-;2_z}oP zM4vc!0?YJ-TZn@m<{NHD;XnyeR17d8bOGuja3rl>XBvn5NJyXB5BB15i$8@zaRmbevWf}(JY$HAv#37H1>&1X@fK1ra%cvo zAT*c_Yi80tY5?TwmV_$kyXk9gd7aYbwVq`IP zZe1ktzeBaZfuFPuSZsmH>;QjZ(pZ@;Yf8)|OZP5X9y{F&y&I)!K0f#0~FDORxQLEFMmrUNJAflXM(jGadPLan)kihRq%CPZTHg z$)X(qYB$R363p`OTKV3^UN~)7w>70~O;G;CUAgXVO}SfFPCb}?b|%<0 zSJQ^8bjkP=Cei(4^U^!Z-oGmP>+(;_*AMlj4)uMuef7{l^7X-F+1WMMxhJk7wWCL_ z?aykdlIo0=D&sP{DDciYcfv8svVPU+g?TXB7pLHM!O$af^K$2gy97^-(>D7?c}u#i zDqUTduB`u!)lg(vG-Mo<*|~12N|~zGOf_E?S1)#@ZAG^RZwxMVJ+g668LFh_MKNXZ zWGcX+yKbyb8NpguGd674-SOt7?lpUTV&vZbpC0=7(8`4ejcY9@KRb}Nsj#2@(0teY zfpyK+2)MtsmtMEmKT++Wc=MX6`Uz`ZJn?>e{Q4gsjqgs@AH9G1{-vbl^cvg0VRbBC zfA46Lt$1-1=P%_x^MBw%1z1&Nn&Ioeeqn$yCC$xWzo^2dlPxE|hUSy@`mZHCu>QHZ zx%Z&)=N+A;eGK~xQ>U@7mifB_<$VX3U)s1nkN%gfCP;sFP~S&cKWEDOs*Io4>ic#W zKlkYS_L)ET8sY8d2lRc%j2>G256~dm3Sc#>9B!1_N#Z5HG&}R55{VcSiJ-In(h4wek0o?zC5_}{_-uh8zCT|(N zyXkIVAqVAecTW~PnacIc>+a=F!N9J`9kt`!Z@L=>wpxt2jC#y1<4q#+@cJ0@s`d3Q z4X#R?j1rAJ(2_2{igM0X$s2hSL3uj7%lso9`0CjJ4R%v@{b9v*9=q>!4oT%PQ$b9X zRr&v4(B)OY5#V0|9I7)4uf=BlQTbZg4e&~p;dTHts$WgzIs%#CGz_ly=4)?aAFdfx zEFh#afp8!+8_EI#fNJ3VM;7I( z#*4xlUc+GlDZmPoa2;^XSmZKnZB%&CZCj^V0-RUPXB);0y3RK(z83Nuusos(EzLVmK|FfIEHArg}Zt8{ng*uM{HMRm`AJ$H?S> zb3?D*y~Z1BDUHq_`}n`clv$Z9ph~y(T>(tSL=%Wh&}SewJU8Z zPFQ}!=-h^dK5#9;#ihd!joUUHRcS}-@`dFy_p2WCJvf}S4L)adPD_T;nJrI&%(-DI zi@%dNxn^p8!aCB9y^ouY-GAqPD7n2q-SqlXaLietwW8XkK+@Ix&vv8Uka2;3N)>vh zv9XX3WZhVFhic?}+t&2ZAd?5+ZrH#8al(E~dD_J9!>5b?6@wpRfD59!KYs=(#hSl@ zDUm}lrG{MS^}=DY7yVQL)k{1UK!_=mr4e!dMU=KmX`Wb`^1*$1a(!MzkB?}=0R1~6 zI<7=326$E@qV!Xo`I@tGASkIkgF=t!6LA<*e~&>I27iPBx?J&MBplIQBuVr-Z*s~0 z0V%|7Nqn*@=4B8?Bc~D7Ls&wAnMDR18&gf*Ye1HGu1$ z8jsPHa5SG`Ab#vB$><@5mr#OlfXfZ$XJ$w}XDD4qMhB^<$92$Bykw0o%jg(-$C8wx zAXEiNqdBsz+hjx)ZA%5&C&gdbA_YQXkkdq5{ zR%r%lG8W2Ud1i$aHfPG{u|;G#kKJ`wPIP||U1fIx?q#b8qx?U}bMCSL diff --git a/flows/updaters/__pycache__/update_vgp_status.cpython-312.pyc b/flows/updaters/__pycache__/update_vgp_status.cpython-312.pyc deleted file mode 100644 index 278e635a842d6cb7ad46537a51ae69b39e7b2f3c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3882 zcmbVPUu+b|8K3=gcYA+)_Srrg+W{6R5f|VLF{t1=rUVBjMN8L(8`Z}O#9jw1152{Mn&pF%i9@KrHxdn-^|{f52t-- z=d?55%zWSc`Mz&{-*y;#MPu+l(gMmJ1s3n}K`P(`DMZO!$8DJvbdJMhoyJ;A=7j z;BCDr*Ljn`E7f|iZkN$kv`0Ld_X5ge-RgB2VclZ?)L zqQ7sZ3+6kGZj;@j8+X%|7ubK`jJ!?CgG-}own7bjXD2a9 zM7E?!orOggR>Fxg*YuO2yPV#{!=CpZos-p}RPdFtM$?r|we^{eN>6IJT) z<2glIC{n&O<4f12>W-@J0Vf8jvu573i?*6C+M}4LPEIAxlndvFb5m?ox4yugx?y(^ z?pKHFGTit1l_<_{I>g2SJb`5Ez7G~=-ron`x(d}9u;48C+Yi9&uL}nHr7$aiO%)ly z;x`glReZ)|hkuPyq9>*gc!K7#1yuE`Z}(nuF>!md?xDWktT~D;yx#@w_8L0Xdl{a% zy~P4_7?V$ESH}vPujF)L+6xbWG2zZ+u%zag-|%_rUQG{JPFAVTwydJyEA#^BATY^Ht4tiX@9YDVrPf;#bEktw=1*DZp`wmt_6%;wZpc zF;6vE-LPP|nGJ&(>jPYl`%+#9NB3kV?@4aa_F@-6(zNkgggC?taBNG-ggsQ!4wWb5 z0QKO!{vP~}%>Bq#Hre!&vdBIdW5DD77OETQ#ZD9meJ*!AkULhD=;uoJ1Erf2J02)I zIMMS!>8VexD2W%*U?BLS1x4bw%(=GZ@YeanYI4iRpyS)|N6A-jMZT0!WLqVOl-8Nk z4?|m5bmG(RFU2mbOJZ}Rg2Zs-MGyrdw@%LV&+MC< zSm;`aE=3M4%ZFE@$t5}YWdhb)3UB?&Wsvy$ZEbJvmj19i@Mb!c5=biyF~|o8@Nqum z_Hp7RbZ02d(R~nzS+s%fhQKjSWEj_oU9=j?8M>D1=n6qmO39_GOP@DtI1Fk}k%m0A|YG8hOmz4b7|Bg&>w!%*HSP}9llmNl*V z+OHbak0~!BdV^yBCrqz8WliQ>bxZIYm1QSv$J*Wy_Xu{27JWG)R4uqUCiufD2N$-A zp3UL{Z3r;`Ejo-sUe<kRn!{yy_M~&V?w(aAO2=F9lzNP^>wVrEVXM(}2e5DjMe? zAi3k%C`B9QN|6nYlvrvj5S)Yfl!nW2cubo{DfzWQIt?LpFW40Fx`3ZuLV{QY)KEd6 zX7M}~@(9H2ziy^KXF0*P-E$i?GN@&>RT zhR>aZ3Le`lfQ%oW`SF=wh_j)OLbJ^uHP1&Mwd`A@kL7{Y#J0Kj7tTCN9J%q%N+>aN z=3!|2-BYVda%OV=o6E}HRXM&Af9>h+1B*S2O-s85S9Tv<3rMYzih^3V&6So~Ui*7f zC=jf)pjZnz2>W@8rrA2YRISwie`iH$0K_g>EF}&T7I#TE>-3PLY=c*LkWE|WXqqL; zrtkEcX|v>rD~_<2O<0EMB{!JeaUGI%p1TtU8HBxc!ZKUJUNT|9#B1K*FH+NTIm&sh z{;02N;-(>=Ck(^M4py<3Otv#=39=(kf_!DelAg>Ql-q=*cle&bKl|*}mx7&C6?U8Ts_GuveJOJ}d zP^pMORgNGzcB}MI>iQy@{34u~+50HGZ9cTnv!oneQHIt7QY?65XiY%@h~c|Fmgh!p PM;^($Dp4eNbM^ciho|rI diff --git a/flows/updaters/api/__pycache__/__init__.cpython-312.pyc b/flows/updaters/api/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 1514a278ebe3f1100f7c9d76b2b032378314bc20..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 156 zcmX@j%ge<81kbxKW`O9&AOanHW&w&!XQ*V*Wb|9fP{ah}eFmxdWvw4toLW?@pImHW zte>2jl$w*OTbx;vs-KcrlBl1SlV4t}Us?dx{rphCZdb#`}8C+NU+pT$UXzB?K86Udqkg!)j@1# z^$=TFL!T8Wj6h*yO?`IG%9;mB*1}p@8*7J@gLPil@Z=!L!yoIqPW3rhcb}8>uwJ(4 zx~9*?76TtQ>x0z2b&XPvMt9h z7*B994k0ISErXwcqe)^_OBw^K6hT|QF%TBdOy^w3vN@2_2A)7Q5n7WCWU%b;Sf8AL z=qizyTR0~%C|(N^aU<6jBuuNtAYrluGHTsfVnNd*Q(`<6pk(bB6OG75Wiv$L3@cL+ zE`|kzf+BZOlnr=oF-=Y~$N`*WFPTg1b^gWMbA3}}%CaM4uwAxJTW6TN2Hz@8cxqR5gvB-4 z{oBVhVcjWUseR%*?AGzWft?tz%62{`h`g8-`FQMToaJP9TpZ%iQ8;vw_+U>m+7G8f zwj^L$92#Lnegw2iVrVRqjLLKxt*vcsO`%=w zOw;Zip%9>}A;sS^{?ooih>84ha+wLto== zmO-O@%nv$*^T!AL5q^NfhecUdL8eETNRq>+O5n#i*#ZKHCsBw4B?*-<5i~ZwOVw>y zlQiQ&GNI<=u%bkEwJ;$p)Rqxe2u5K`@WE^&2W+DV9|BQAWmt&E#@jpNOs~JAH{RtJ zLWoa@{;i=TV#GKS{8&!{qZj>yaYlS8t0CdA3e9me7zk*_EjdNnB#tGxaT8XpO$?BU zAy|;LfLBL3j_r_Ge;mcNL(7sP)=x?hF})IQYHs98L7P)Ovi6nJXL}bYSr_HRp*Sn+ zhd2gq7D3kHnZTLB)G}T#tbt`SPN*bSoJqt5QBmmuQaLpOR&xIAk+9{Oshai(RJ{W~ z0nR)=5oY%XopZ|3Rj;xde72RjB0r>L zT8zUu*U1ugw;soWyqQ&nl+C@6WDDQ ziI#NP^haxLVIA`lr&Nwmx<&0r#J8RjSgY8mrX}j8P1QXUwmc8kzK%zpvn^jL*}_dK zm;85a2iPS$>%a|JCypl^U~)Jm2T1*G$hjcrmI%nfzMb$$F4l#q!Yz4DYD)5`eO@F5 zC+s2X@1~mYN*>ld46{YFz+@}sRjIY>Dfh}Dp6|4(D>rc=# znoA|M#7;sWN5RB?9W*;hK-r&Pq+TaR$pST(pIdYKIW^>^^1?Njo;;`gCxy$iv&_~7 z%6bqskH&S)%mCPvGKpk5molPesFAfDy{Efms^>&^fI?X5D8&dXi*fI9P7Do!<>`lN zKjK2*gA4wL*j$mxCiEhv#?l;jzRC%Scue3h5eEh~n>9W3xi+#Eo0q5oXdhzdQzn^b z+)8{5pJ0}!K)6>4iBS9`QX+p~&byiUEfoVd@~_FOBpgEUlg5a@p+?C7IFbZ-)fCvv zU*_qnCAVFcnB=)gUxF+KY{@X_@4R=@jsJ9 zs`^5@_x$G_^J1#y&?mKX>?eWGUR|o`gu>qnTA=#Vk}^^zO)@z;?(2#Bqj98&M3I{q z_YYusKE#Mmpb8y^k8B0W246Ycc`SJLWZyAa69+pq!H7e$j=RVUA{hJNw<+ekNimZz ziV?ma(2`^V`!j;^dK5uJu$JR|3|+t_FjN3S zSnN1Pea%mC`b+%)2Vtc~z7PNrb_WXk}5kz<33y8}oN2u9hA=#)as!tDAP5In-` zD4Xz($kL)3Twqq*rCe+5Fsokf#&6E&-Lz%Ud(eP~pU?}IJ6QOoTTXW2bxUD{ct!ZEDV&~7!-aF8pKG6L+w{)O4wd=KI-??v%MA5cyDZwbCVOh=})^5e#9jUR5mSGql2x_!QPskC+GXvR}|wfjoi8&5)VO`niE? z@q3j!)0I0HtCuQw->E!x+j6h{M7sUN=gy_}Q#0m_+mrG&-ElW%YMSQz=R>KQ)|pco zN9jFBb=pxqx9zT@c@;$Ehq+i?m%FCBW=ro{>Q?Q9uXNQx7|ZV&YSM<9xz71B^A{K1 zxLurXI-IIKvTQi|mD#i6^ycMS=Eo8&x_aWuiHxuOW8*brT2UiQ)fbk07eL~VD6*vL zzbVRF^oSzLtFyvY)+RhxyXX5f61Kp%Rj}96-o~eR znC)rj-lyNfyc*KZ`lsI>R1;9LCFN{*`mGJdkn-+(`fKmLd{<8e22R&M?Xh+@QvXaH zc6C?KKld6S^YfkU-FvCe_qO(Esee0I-Q%Qw(NNS=LH&D0QBQ#SqJr+;V*H{8Q-0A% zckkAJv6t>4^kR5&|$wqw(_w7#00qQ zQ86B5Q%*z}h_?yJ{wOa3CKepTxRp5z(JQ?pdpn&#C#LIS(7lb4iWH~_7FpOr+ z737BQTQ`VoQBdG9@+KEOTf@Qv%HC(o8DUT^z=%VG0D2T+glh^h0vNgwd4GhtSg4#$ zqW(f`5Zsyth96-@V{-Ac!v`P}Bvqgy%JFbv7T^@+h2cUoP6%V6A+8WBxG8rIXH7$U zZYwaMcauDVB{B1{AZ(UFY~1^`5Y+*ok#!@mg$Kq0njA0(M-KyYVCDg^1uZtfZ@Eep ze)}mf_!@o!##3_e+kEmfZ@RuS^~!6@=5s0Sxj!-lcNTg^9XL9MqfUr6n4?}ic)(pQVZ$hc^#H9sLfX>^|tVUd; zm%uq12Wr*vVJXGQ&B3Z2#0>e?55Wo2Ivs;0&_@XBZ`;(klpT!o*qtMlyx*Y&Hefs*ZTV!rJl$QsU#EY!rtI>dRhZ1CajVb00pp+Cu~Y94?s2cO#_3hIyaDx z2J^jC3ebj`bt=%w0oEJFUeKl>t<48UW5KPy1D)eL(m63*tP{rIP{t7W4!r-MF~mxM zw?*L%ufqSY`4wL6B6YRZFG#5f&q{e)v#)zwvlm`PX$^MEzOBJa8N6*zIN&ADy=f^h zSBKgUxccO!6IvYq8LT3`_ReM>B?84g4?hQL^a?}|v#$1`1iW4x7&kZIFI-aN+yR95 z1G)*(adZnOu~xty47g|53rFwcJl%8p+_5g%e)#O!lbt=sI)c5YgI$M@pm|KOfTL?T zn!&WLXKx3Y+>Sngx`0Wh5jI|A3Yy#(=kMT$>N_<2?AOxDU|9ilc=lp-^$Llwrcl?WiDY5+RvPD1H{Ig?a(+ z`;C4TALqD z;Dym>0#nvzpBkBdeP$$Wt(?+)t+*3gUPxK$GOm)V+pcU&Rkkd-c1#`3*o&^$&W1kR znzq-Z8utEd*KOe++tUq4Gu!s39a|r4YfnMk`tnp~%F~>-v^;S7u9jUXn|*u9xqa%$ zeQz=NCi{D5^eOPI&fGe`aQ?>Y^QTh2_B)Oj-~qnK2Lo|zoj-8L-kxdRd2484Xc66r z0J*O*?bw!S+Hvc`!iB~D8*k#GK-$rm32eXRTktK`+$f*X&sy&|8XhoCm)H zgFC$Y2l1;b+TKa_o1mO9YAw6kYv>uEf-y0oSKmyNrZoNWt@b7y9@ zU@X#9(vc`b&j}7@pY}t)pq~l33krWlg z06%Rj|LvgsF952;?1Gq`@=mwDA$IFGH^czVItb(l&f=#q#phSf6}|Q9$ zGeLQB$DZ>3&N0W_@9U9XA*rX#}tEB`$4c5n6n?h8XC3;{f$$0mr zJo_IQoEeMrfu}Z8yM1n3rh3mKnrbLorD&&XmC}1`tCZ1WTBXc3-6Or-shPB{))D5H z$VvSJPeaDl1=M!Ur1`OhBwr&R8nF7FDi?+MWOEpE@ZD|GlZ33m|yu1^zoyY*fL zwIB|3oq|=C>l@1rYJM%SC+>D**FtV+HS`9FsxE(?3uP$(!;sDyw4e0xvVfophQT|) zfn^SYMF8nj1r>e~l;JSIVb9kHKub0GLKh@f>BTOb+ijeF=^YOkZP3M^l_oBtxMvnGEjSLs&|bz~WO993ZhBw+i2 zZmmgqyJX>%;iT4tNvs!&szCfGi}ea7GSZ!riL-4Aoj5}`H7i%p9mH;zNQ|CcGEAMo zGJz6-L6swtrkh5=z&avf)39_33Wk-#Hd-5S?1GM03~M>Hl5Ppt(e zmvYpTMquRyn^aXMzIGBq5g8UHKD($}gwH!)5Puo9-HRP*mx<&dhe-34yk5kHZ6P*7 zByy8_ov#=UcIFYX5#gPxLwtJ0z^X!mC1hrqXPT|jE;GB5vow>cA!L_51+hsb5xazo zCETuf$<^N#ApHFddZ~D(xoC*7%5jY*qg<+Z90?1Dv2!XG#O+UvW{Isc^zG$0azLiq#gVU z9QzYO@=%V~?=|J&b=c~SZpI(S*Zqfyk?qfaPfV=KN20PJ)hFtMjqq4gR1YHqt-k(N z|CLtatyVPA>K|())ty))o<0rvdxGn}(;g6xZlt%Tn#$AAH2OxiKW_9+?BxHD-~Hz(GWZfeW{Nzz z_3+llogY3u=Ba*mBqvSlmLpGfh(BFx4E`%#|Z zczf@c=0tvfXlRb-4}{2#7Pwp(cPv7(tvV*jmZdKSU~3*^E2vAk7`Z=OFke2>u09$0GO) O-i4?908FAWYW^4LxPdkR From 798724137eb71dc7fd2c946c04420d3abea16dc9 Mon Sep 17 00:00:00 2001 From: Richard Challis Date: Mon, 8 Jun 2026 16:10:59 +0100 Subject: [PATCH 15/18] update default value handling across updaters --- flows/updaters/update_blobtoolkit.py | 2 +- flows/updaters/update_boat_config.py | 75 +++++-------------- flows/updaters/update_ena_taxonomy_extra.py | 17 ++--- flows/updaters/update_ensembl_metadata.py | 5 +- flows/updaters/update_genomehubs_taxonomy.py | 18 ++--- flows/updaters/update_google_sheets_status.py | 2 +- flows/updaters/update_jgi_status.py | 2 +- flows/updaters/update_ncbi_datasets.py | 2 +- flows/updaters/update_ncbi_taxonomy.py | 2 +- flows/updaters/update_nhm_status_list.py | 2 +- flows/updaters/update_refseq_organelles.py | 4 +- flows/updaters/update_sra_data.py | 4 +- flows/updaters/update_tol_genome_notes.py | 2 +- flows/updaters/update_tol_portal_status.py | 2 +- flows/updaters/update_ucsc_assemblies.py | 5 +- flows/updaters/update_vgp_original_status.py | 8 +- flows/updaters/update_vgp_status.py | 40 ++++------ 17 files changed, 66 insertions(+), 126 deletions(-) diff --git a/flows/updaters/update_blobtoolkit.py b/flows/updaters/update_blobtoolkit.py index 6912a0f..33721fc 100644 --- a/flows/updaters/update_blobtoolkit.py +++ b/flows/updaters/update_blobtoolkit.py @@ -226,7 +226,7 @@ def upload_s3_files(output_dir: str, s3_path: str) -> None: @flow() def update_blobtoolkit( output_path: str, - s3_path: str, + s3_path: str = "", min_records: int = 0, ) -> bool: """Fetch BlobToolKit analysis data and optionally upload to S3. diff --git a/flows/updaters/update_boat_config.py b/flows/updaters/update_boat_config.py index 4ee5303..71b27cb 100644 --- a/flows/updaters/update_boat_config.py +++ b/flows/updaters/update_boat_config.py @@ -30,29 +30,16 @@ def taxon_id_to_ssh_path(ssh_host, taxon_id, assembly_name): ssh_host, "bash", "-c", - ( - f"'. /etc/profile && module load speciesops && " - f"speciesops getdir --taxon_id {taxon_id}'" - ), + (f"'. /etc/profile && module load speciesops && " f"speciesops getdir --taxon_id {taxon_id}'"), ] result = run_quoted(command, capture_output=True, text=True) if result.returncode != 0: - print( - ( - f"WARNING: Error fetching directory for taxon_id {taxon_id}: " - f"{result.stderr}" - ) - ) + print((f"WARNING: Error fetching directory for taxon_id {taxon_id}: " f"{result.stderr}")) return # Filter the result to get the lustre path lustre_path = [line for line in result.stdout.splitlines() if "/lustre" in line] if not lustre_path: - print( - ( - f"WARNING: No lustre path found for taxon_id {taxon_id} in result: " - f"{result.stdout}" - ) - ) + print((f"WARNING: No lustre path found for taxon_id {taxon_id} in result: " f"{result.stdout}")) return # Use the first lustre path lustre_path = lustre_path[0].strip() @@ -60,6 +47,7 @@ def taxon_id_to_ssh_path(ssh_host, taxon_id, assembly_name): def lookup_buscos(ssh_host, file_path): + busco_dirs = [] if "lustre" in file_path: if not is_safe_path(ssh_host): raise ValueError(f"Unsafe ssh host: {ssh_host}") @@ -77,9 +65,7 @@ def lookup_buscos(ssh_host, file_path): if result.returncode != 0: return [] busco_dirs = [ - os.path.basename(os.path.normpath(line)) - for line in result.stdout.splitlines() - if "/busco" in line + os.path.basename(os.path.normpath(line)) for line in result.stdout.splitlines() if "/busco" in line ] return busco_dirs @@ -117,11 +103,9 @@ def assembly_id_to_busco_sets(alt_host, assembly_id): ] busco_sets = [] for lineage in lineages: - busco_url = ( - f"https://busco.cog.sanger.ac.uk/{assembly_id}/{lineage}/full_table.tsv" - ) + busco_url = f"https://busco.cog.sanger.ac.uk/{assembly_id}/{lineage}/full_table.tsv" response = safe_get(busco_url) - if response.status_code == 200: + if response is not None and response.status_code == 200: busco_sets.append(lineage) return f"https://busco.cog.sanger.ac.uk/{assembly_id}", busco_sets @@ -192,10 +176,10 @@ def fetch_goat_results(root_taxid: str, output_path: str) -> list[dict]: # fetch query_url with accept header tsv. use python module requests headers = {"Accept": "text/tab-separated-values"} response = safe_get(query_url, headers=headers) + if response is None: + raise RuntimeError("Error fetching BoaT config info: No response received") if response.status_code != 200: - raise RuntimeError( - f"Error fetching BoaT config info: {response.status_code} {response.text}" - ) + raise RuntimeError(f"Error fetching BoaT config info: {response.status_code} {response.text}") # Parse the TSV response if tsv_data := parse_tsv(response.text): @@ -294,29 +278,21 @@ def fetch_boat_config_info( int: Number of lines written to the output file. """ - tsv_data = fetch_goat_results(root_taxid) + tsv_data = fetch_goat_results(root_taxid, file_path) # Prepare output files and get visited assembly IDs visited_file_path = f"{os.path.splitext(file_path)[0]}.visited" - visited_assembly_ids, line_count = prepare_output_files( - file_path, visited_file_path, append - ) + visited_assembly_ids, line_count = prepare_output_files(file_path, visited_file_path, append) for row in tsv_data: taxon_id = row["taxon_id"] assembly_id = row["assembly_id"] # Skip if the assembly_id has already been visited if assembly_id in visited_assembly_ids: - print( - ( - f"Skipping already visited assembly_id {assembly_id} " - f"for taxon_id {taxon_id}." - ) - ) + print((f"Skipping already visited assembly_id {assembly_id} " f"for taxon_id {taxon_id}.")) continue print( - f"Processing taxon_id {taxon_id}, assembly_id {assembly_id} " - f"for assembly_name {row['assembly_name']}." + f"Processing taxon_id {taxon_id}, assembly_id {assembly_id} " f"for assembly_name {row['assembly_name']}." ) # Add the assembly_id to the new visited list with open(visited_file_path, "a") as f: @@ -336,8 +312,7 @@ def fetch_boat_config_info( if not busco_sets: print( - f"Warning: No BUSCO sets found for taxon_id {taxon_id} " - f"and assembly_name {assembly_name}. Skipping." + f"Warning: No BUSCO sets found for taxon_id {taxon_id} " f"and assembly_name {assembly_name}. Skipping." ) continue @@ -358,9 +333,7 @@ def fetch_boat_config_info( line_count += 1 if line_count < min_lines: - print( - f"WARNING: File {file_path} has less than {min_lines} lines: {line_count}" - ) + print(f"WARNING: File {file_path} has less than {min_lines} lines: {line_count}") # Return the number of lines written to the file return line_count @@ -419,13 +392,7 @@ def generate_md5(file_path): def filter_buscos(buscos): # Exclude bacteria_odb and archaea_odb buscos = [ - b - for b in buscos - if not ( - b.startswith("bacteria_odb") - or b.startswith("archaea_odb") - or b.startswith("mm49_") - ) + b for b in buscos if not (b.startswith("bacteria_odb") or b.startswith("archaea_odb") or b.startswith("mm49_")) ] # Group by prefix before _odb prefix_map = defaultdict(list) @@ -446,9 +413,7 @@ def filter_buscos(buscos): @task(log_prints=True) -def filter_farm_data( - farm_results_path: str, goat_results_path: str, output_path: str -) -> None: +def filter_farm_data(farm_results_path: str, goat_results_path: str, output_path: str) -> None: """Filter farm results to include only assemblies with lepidoptera BUSCOs. Combine with GoaT results to add additional fields. @@ -513,9 +478,7 @@ def filter_farm_data( @flow() -def update_boat_config( - root_taxid: str, output_path: str, append: bool, s3_path: str -) -> None: +def update_boat_config(root_taxid: str, output_path: str, append: bool = False, s3_path: str = "") -> None: # fetch_goat_results(root_taxid, f"{output_path}/goat_results.tsv") # trawl_farm_data( diff --git a/flows/updaters/update_ena_taxonomy_extra.py b/flows/updaters/update_ena_taxonomy_extra.py index 132c557..44eae59 100644 --- a/flows/updaters/update_ena_taxonomy_extra.py +++ b/flows/updaters/update_ena_taxonomy_extra.py @@ -33,9 +33,7 @@ def read_ncbi_tax_ids(taxdump_path: str) -> set[str]: @task(log_prints=True) -def add_jsonl_tax_ids( - jsonl_path: str, tax_ids: set[str], allowed_tax_ids: set[str] | None = None -) -> None: +def add_jsonl_tax_ids(jsonl_path: str, tax_ids: set[str], allowed_tax_ids: set[str] | None = None) -> None: print(f"Reading previously fetched ENA taxids from {jsonl_path}") filtered_path = f"{jsonl_path}.filtered" try: @@ -43,9 +41,7 @@ def add_jsonl_tax_ids( for line in f: data = json.loads(line) tax_id = data["taxId"] - if ( - allowed_tax_ids is None or tax_id in allowed_tax_ids - ) and tax_id not in tax_ids: + if (allowed_tax_ids is None or tax_id in allowed_tax_ids) and tax_id not in tax_ids: f_out.write(line) tax_ids.add(tax_id) os.replace(filtered_path, jsonl_path) @@ -59,10 +55,7 @@ def get_ena_api_taxids(root_taxid: str) -> set[str]: print(f"Fetching taxids for tax_tree({root_taxid}) from ENA API") limit = 10000000 - url = ( - f"https://www.ebi.ac.uk/ena/portal/api/search?result=taxon" - f"&query=tax_tree({root_taxid})&limit={limit}" - ) + url = f"https://www.ebi.ac.uk/ena/portal/api/search?result=taxon" f"&query=tax_tree({root_taxid})&limit={limit}" # Stream the content of the URL column_index = None @@ -138,8 +131,8 @@ def upload_s3_jsonl(local_path: str, s3_path: str) -> None: @flow() def update_ena_taxonomy_extra( - root_taxid: str, taxdump_path: str, output_path: str, s3_path: str, append: bool -) -> None: + root_taxid: str, taxdump_path: str, output_path: str, s3_path: str = "", append: bool = False +) -> bool: """Update the ENA taxonomy JSONL file. Args: diff --git a/flows/updaters/update_ensembl_metadata.py b/flows/updaters/update_ensembl_metadata.py index 8001372..03420df 100644 --- a/flows/updaters/update_ensembl_metadata.py +++ b/flows/updaters/update_ensembl_metadata.py @@ -11,7 +11,6 @@ class EnsemblDivision(Enum): """Supported Ensembl genome database divisions.""" - ENSEMBL = "ensembl" FUNGI = "fungi" METAZOA = "metazoa" PLANTS = "plants" @@ -21,7 +20,6 @@ class EnsemblDivision(Enum): DIVISION_URLS = { - EnsemblDivision.ENSEMBL: ("https://ftp.ensembl.org/pub/current/" "species_metadata_Ensembl.json"), EnsemblDivision.FUNGI: ("http://ftp.ensemblgenomes.org/pub/current/fungi/" "species_metadata_EnsemblFungi.json"), EnsemblDivision.METAZOA: ( "http://ftp.ensemblgenomes.org/pub/current/metazoa/" "species_metadata_EnsemblMetazoa.json" @@ -35,7 +33,6 @@ class EnsemblDivision(Enum): } DIVISION_OUTPUT_NAMES = { - EnsemblDivision.ENSEMBL: "species_metadata_Ensembl.tsv.gz", EnsemblDivision.FUNGI: "species_metadata_EnsemblFungi.tsv.gz", EnsemblDivision.METAZOA: "species_metadata_EnsemblMetazoa.tsv.gz", EnsemblDivision.PLANTS: "species_metadata_EnsemblPlants.tsv.gz", @@ -153,7 +150,7 @@ def upload_s3_file(local_path: str, s3_path: str) -> None: @flow() def update_ensembl_metadata( output_path: str, - s3_path: str, + s3_path: str = "", division: str = "vertebrates", ) -> bool: """Fetch Ensembl species metadata for a given division. diff --git a/flows/updaters/update_genomehubs_taxonomy.py b/flows/updaters/update_genomehubs_taxonomy.py index d4eb7e9..7d58ce1 100644 --- a/flows/updaters/update_genomehubs_taxonomy.py +++ b/flows/updaters/update_genomehubs_taxonomy.py @@ -20,12 +20,13 @@ def get_file_paths_from_config(config: dict, file_paths: dict) -> dict: key = config.get("xref_label") input_path = config.get("path") - output_path = config.get("out") + output_path = config.get("out", "./taxonomy.jsonl") if key is not None and input_path is not None: file_paths[key] = { "input": input_path, } - return output_path + file_paths["out"] = output_path + return file_paths @task(log_prints=True) @@ -39,9 +40,7 @@ def read_input_config(input_path: str) -> dict: print(f"Error reading {input_path}: {e}") exit() try: - output_path = get_file_paths_from_config(config, file_paths) - if output_path is not None: - file_paths["out"] = output_path + file_paths = get_file_paths_from_config(config, file_paths) for taxonomy in config.get("taxonomies", []): get_file_paths_from_config(taxonomy, file_paths) except Exception as e: @@ -79,8 +78,9 @@ def run_blobtk_taxonomy(root_taxid: str, input_path: str, output_path: str) -> N text=True, bufsize=1, ) - for line in process.stdout: - print(line, end="") + if process.stdout is not None: + for line in process.stdout: + print(line, end="") process.wait() if process.returncode != 0: print(f"Command failed with exit code {process.returncode}") @@ -103,9 +103,7 @@ def upload_s3_file(local_path: str, s3_path: str) -> None: @flow() -def update_genomehubs_taxonomy( - root_taxid: str, input_path: str, output_path: str, s3_path: str -) -> None: +def update_genomehubs_taxonomy(root_taxid: str, input_path: str, output_path: str, s3_path: str = "") -> None: """Update the GenomeHubs taxonomy JSONL file. Args: diff --git a/flows/updaters/update_google_sheets_status.py b/flows/updaters/update_google_sheets_status.py index 7c0447b..390389a 100644 --- a/flows/updaters/update_google_sheets_status.py +++ b/flows/updaters/update_google_sheets_status.py @@ -382,7 +382,7 @@ def upload_s3_dir(local_dir: str, s3_path: str) -> None: def update_google_sheets_status( output_path: str, index_url: str, - s3_path: str, + s3_path: str = "", min_records: int = 0, ) -> bool: """Fetch all Google Sheets project status and supplementary data. diff --git a/flows/updaters/update_jgi_status.py b/flows/updaters/update_jgi_status.py index 2a557b2..2910bf9 100644 --- a/flows/updaters/update_jgi_status.py +++ b/flows/updaters/update_jgi_status.py @@ -172,7 +172,7 @@ def upload_s3_tsv(local_path: str, s3_path: str) -> None: @flow() def update_jgi_status( output_path: str, - s3_path: str, + s3_path: str = "", min_records: int = 0, ) -> bool: """Fetch JGI 1KFG status list and optionally upload to S3. diff --git a/flows/updaters/update_ncbi_datasets.py b/flows/updaters/update_ncbi_datasets.py index e2f1e4c..c02ca1b 100644 --- a/flows/updaters/update_ncbi_datasets.py +++ b/flows/updaters/update_ncbi_datasets.py @@ -202,7 +202,7 @@ def generate_md5(file_path): def update_ncbi_datasets( root_taxid: str, output_path: str, - s3_path: str, + s3_path: str = "", data_freeze_path: Optional[str] = None, ) -> bool: line_count = fetch_ncbi_datasets_summary(root_taxid, file_path=output_path, data_freeze_path=data_freeze_path) diff --git a/flows/updaters/update_ncbi_taxonomy.py b/flows/updaters/update_ncbi_taxonomy.py index 890d11a..f6f9935 100644 --- a/flows/updaters/update_ncbi_taxonomy.py +++ b/flows/updaters/update_ncbi_taxonomy.py @@ -86,7 +86,7 @@ def taxonomy_is_up_to_date(local_path: str, http_path: str) -> bool: @flow() -def update_ncbi_taxonomy(output_path: str) -> None: +def update_ncbi_taxonomy(output_path: str) -> bool: """Fetch and the NCBI taxonomy dump. Args: diff --git a/flows/updaters/update_nhm_status_list.py b/flows/updaters/update_nhm_status_list.py index a825e69..234c3cd 100644 --- a/flows/updaters/update_nhm_status_list.py +++ b/flows/updaters/update_nhm_status_list.py @@ -55,7 +55,7 @@ def upload_s3_tsv(local_path: str, s3_path: str) -> None: @flow() -def update_nhm_status_list(output_path: str, s3_path: str, min_records: int) -> bool: +def update_nhm_status_list(output_path: str, s3_path: str = "", min_records: int = 0) -> bool: """Update the NHM status list TSV file.""" os.makedirs(os.path.dirname(output_path), exist_ok=True) line_count = fetch_nhm_tsv(output_path, min_records) diff --git a/flows/updaters/update_refseq_organelles.py b/flows/updaters/update_refseq_organelles.py index a202eea..68511b7 100644 --- a/flows/updaters/update_refseq_organelles.py +++ b/flows/updaters/update_refseq_organelles.py @@ -273,8 +273,8 @@ def upload_s3_file(local_path: str, s3_path: str) -> None: @flow() def update_refseq_organelles( output_path: str, - root_taxid: str, - s3_path: str, + root_taxid: str = "", + s3_path: str = "", min_records: int = 0, ) -> bool: """Fetch and parse RefSeq organelle data. diff --git a/flows/updaters/update_sra_data.py b/flows/updaters/update_sra_data.py index e10c6f5..4438729 100644 --- a/flows/updaters/update_sra_data.py +++ b/flows/updaters/update_sra_data.py @@ -306,8 +306,8 @@ def upload_s3_file(local_path: str, s3_path: str) -> None: @flow() def update_sra_data( output_path: str, - input_path: str, - s3_path: str, + input_path: str = "", + s3_path: str = "", root_taxid: str = "2759", min_records: int = 0, ) -> bool: diff --git a/flows/updaters/update_tol_genome_notes.py b/flows/updaters/update_tol_genome_notes.py index 914dff6..c01ad94 100644 --- a/flows/updaters/update_tol_genome_notes.py +++ b/flows/updaters/update_tol_genome_notes.py @@ -101,7 +101,7 @@ def upload_s3_tsv(local_path: str, s3_path: str) -> None: @flow() -def update_tol_genome_notes(output_path: str, s3_path: str, min_records: int) -> None: +def update_tol_genome_notes(output_path: str, s3_path: str = "", min_records: int = 0) -> bool: """Update the ToL genome notes TSV file.""" os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True) line_count = fetch_tol_genome_notes(output_path, min_records) diff --git a/flows/updaters/update_tol_portal_status.py b/flows/updaters/update_tol_portal_status.py index 6fbe77f..bf10bc6 100644 --- a/flows/updaters/update_tol_portal_status.py +++ b/flows/updaters/update_tol_portal_status.py @@ -219,7 +219,7 @@ def upload_s3_tsv(local_path: str, s3_path: str) -> None: @flow() -def update_tol_portal_status(output_path: str, s3_path: str, min_records: int) -> None: +def update_tol_portal_status(output_path: str, s3_path: str = "", min_records: int = 0) -> None: """Update the ToL Portal Project Status TSV file.""" os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True) diff --git a/flows/updaters/update_ucsc_assemblies.py b/flows/updaters/update_ucsc_assemblies.py index 0233951..c2d9578 100644 --- a/flows/updaters/update_ucsc_assemblies.py +++ b/flows/updaters/update_ucsc_assemblies.py @@ -5,7 +5,6 @@ from flows.lib.shared_args import OUTPUT_PATH, S3_PATH, parse_args, required from flows.lib.utils import is_safe_path, safe_get, upload_to_s3 - UCSC_URL = "https://hgdownload.soe.ucsc.edu/hubs/UCSC_GI.assemblyHubList.txt" OUTPUT_FILENAME = "UCSC_GI.assemblyHubList.tsv.gz" @@ -28,6 +27,8 @@ def fetch_ucsc_hub_list(output_dir: str) -> tuple[str, int]: print(f"Fetching UCSC hub list from {UCSC_URL}") response = safe_get(UCSC_URL, timeout=60) + if response is None: + raise RuntimeError("Failed to fetch UCSC hub list: no response received") response.raise_for_status() response.encoding = "iso-8859-1" text = response.text @@ -54,7 +55,7 @@ def upload_s3_file(local_path: str, s3_path: str) -> None: @flow() def update_ucsc_assemblies( output_path: str, - s3_path: str = None, + s3_path: str = "", ) -> bool: """Fetch the UCSC assembly hub list and optionally upload to S3. diff --git a/flows/updaters/update_vgp_original_status.py b/flows/updaters/update_vgp_original_status.py index 63462ee..8feb950 100644 --- a/flows/updaters/update_vgp_original_status.py +++ b/flows/updaters/update_vgp_original_status.py @@ -36,9 +36,7 @@ def fetch_vgp_original_tsv( line_count = sum(1 for _ in f) if line_count < min_lines: - raise RuntimeError( - f"VGP file {file_path} has fewer than {min_lines} lines: {line_count}" - ) + raise RuntimeError(f"VGP file {file_path} has fewer than {min_lines} lines: {line_count}") print(f"Wrote {line_count} lines to {file_path}") return line_count @@ -51,9 +49,7 @@ def upload_s3_tsv(local_path: str, s3_path: str) -> None: @flow() -def update_vgp_original_status( - output_path: str, s3_path: str = None, min_records: int = 0 -) -> bool: +def update_vgp_original_status(output_path: str, s3_path: str = "", min_records: int = 0) -> bool: """Fetch the VGP original status list and optionally upload to S3. This is the scarcely-updated VGP source from the GitHub YAML tracker. diff --git a/flows/updaters/update_vgp_status.py b/flows/updaters/update_vgp_status.py index d2d2f8a..5e0cba6 100644 --- a/flows/updaters/update_vgp_status.py +++ b/flows/updaters/update_vgp_status.py @@ -10,7 +10,6 @@ frequently updated VGP GitHub YAML tracker source. """ -import csv import io import os @@ -115,13 +114,16 @@ def _cleanup_headers(df: pd.DataFrame) -> pd.DataFrame: return df +def _get_acronym(project_name: str) -> str: + """Map a free-text project name to a canonical acronym.""" + return PROJECT_ACRONYMS.get(project_name, project_name) + + def _translate_projects(df: pd.DataFrame) -> pd.DataFrame: """Map free-text project names to canonical acronyms.""" for col in ["main_project", "second_project", "project"]: if col in df.columns: - df[col] = df[col].map( - lambda v: PROJECT_ACRONYMS.get(v, v) if pd.notna(v) else v - ) + df[col] = df[col].map(lambda v: _get_acronym(str(v)) if pd.notna(v) else v) return df @@ -130,7 +132,7 @@ def _build_all_projects(df: pd.DataFrame) -> pd.DataFrame: df["all_projects"] = df.apply( lambda row: ",".join( sorted( - set( + { x for x in [ row.get("project"), @@ -138,7 +140,7 @@ def _build_all_projects(df: pd.DataFrame) -> pd.DataFrame: row.get("second_project"), ] if pd.notna(x) - ) + } ) ), axis=1, @@ -164,19 +166,11 @@ def _expand_sequencing_status(df: pd.DataFrame) -> pd.DataFrame: df.loc[df["published"] == df["all_projects"], "insdc_open"] = df["all_projects"] df.loc[df["insdc_open"] == df["all_projects"], "open"] = df["all_projects"] df.loc[df["open"] == df["all_projects"], "in_progress"] = df["all_projects"] - df.loc[df["data_generation"] == df["all_projects"], "in_progress"] = df[ - "all_projects" - ] + df.loc[df["data_generation"] == df["all_projects"], "in_progress"] = df["all_projects"] df.loc[df["in_assembly"] == df["all_projects"], "in_progress"] = df["all_projects"] - df.loc[df["in_progress"] == df["all_projects"], "data_generation"] = df[ - "all_projects" - ] - df.loc[df["in_progress"] == df["all_projects"], "sample_acquired"] = df[ - "all_projects" - ] - df.loc[df["sample_acquired"] == df["all_projects"], "sample_collected"] = df[ - "all_projects" - ] + df.loc[df["in_progress"] == df["all_projects"], "data_generation"] = df["all_projects"] + df.loc[df["in_progress"] == df["all_projects"], "sample_acquired"] = df["all_projects"] + df.loc[df["sample_acquired"] == df["all_projects"], "sample_collected"] = df["all_projects"] return df @@ -223,6 +217,8 @@ def fetch_vgp_live_sheet(output_path: str, min_records: int = 0) -> int: int: Number of data rows written. """ response = safe_get(VGP_SHEET_URL, timeout=120) + if response is None: + raise RuntimeError("Failed to fetch VGP live sheet: no response received") response.raise_for_status() df = _process_vgp_sheet(response.text) @@ -230,9 +226,7 @@ def fetch_vgp_live_sheet(output_path: str, min_records: int = 0) -> int: print(f"VGP live sheet: {row_count} rows after processing") if row_count < min_records: - raise RuntimeError( - f"VGP live sheet has fewer than {min_records} rows: {row_count}" - ) + raise RuntimeError(f"VGP live sheet has fewer than {min_records} rows: {row_count}") df.to_csv(output_path, sep="\t", index=False) print(f"Wrote {output_path}") @@ -247,9 +241,7 @@ def upload_s3_tsv(local_path: str, s3_path: str) -> None: @flow() -def update_vgp_status( - output_path: str, s3_path: str = None, min_records: int = 0 -) -> bool: +def update_vgp_status(output_path: str, s3_path: str = "", min_records: int = 0) -> bool: """Fetch the VGP Ordinal Phase1+ live sheet and optionally upload to S3. Args: From db10005779cbccc1c8ea31e1cd1272e0d66ea7f1 Mon Sep 17 00:00:00 2001 From: Richard Challis Date: Mon, 8 Jun 2026 16:14:12 +0100 Subject: [PATCH 16/18] update main ensembl dividion --- flows/prefect.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flows/prefect.yaml b/flows/prefect.yaml index b846a2b..ee29ca7 100644 --- a/flows/prefect.yaml +++ b/flows/prefect.yaml @@ -296,7 +296,7 @@ deployments: entrypoint: flows/updaters/update_ensembl_metadata.py:update_ensembl_metadata parameters: output_path: "/home/ubuntu/tmp/test/assembly-data/ensembl_metadata.tsv.gz" - division: ensembl + division: vertebrates s3_path: s3://goat/resources/assembly-data/ensembl_metadata.tsv.gz schedules: - *weekly From 74f5f6ecc46544f91ed2b571bd70f36eb3549476 Mon Sep 17 00:00:00 2001 From: Richard Challis Date: Tue, 9 Jun 2026 09:00:10 +0100 Subject: [PATCH 17/18] update ott and sra updaters --- flows/updaters/update_ott_taxonomy.py | 2 +- flows/updaters/update_sra_data.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flows/updaters/update_ott_taxonomy.py b/flows/updaters/update_ott_taxonomy.py index f2a1838..725df70 100644 --- a/flows/updaters/update_ott_taxonomy.py +++ b/flows/updaters/update_ott_taxonomy.py @@ -122,7 +122,7 @@ def set_ott_url() -> str: # f"https://files.opentreeoflife.org/ott/" # f"{ott_major_version}/{ott_version}.tgz" # ) - return f"https://files.opentreeoflife.org/ott/{ott_version}/{ott_version}.tar.gz" + return f"https://files.opentreeoflife.org/ott/{ott_version}/{ott_version}.tgz" @flow() diff --git a/flows/updaters/update_sra_data.py b/flows/updaters/update_sra_data.py index 4438729..e1ffe79 100644 --- a/flows/updaters/update_sra_data.py +++ b/flows/updaters/update_sra_data.py @@ -243,12 +243,12 @@ def fetch_sra_xml( efetch_cmd = ["efetch", "-db", "sra", "-format", "docsum"] print(f"Running esearch | efetch for taxid {root_taxid} ({min_date} to {max_date})") - esearch = run_quoted(esearch_cmd, capture_output=True, text=True, timeout=300) + esearch = run_quoted(esearch_cmd, capture_output=True, text=True, timeout=3000) if esearch.returncode != 0: raise RuntimeError(f"esearch failed: {esearch.stderr}") with open(output_xml, "w") as f: - efetch = run_quoted(efetch_cmd, input=esearch.stdout, capture_output=True, text=True, timeout=600) + efetch = run_quoted(efetch_cmd, input=esearch.stdout, capture_output=True, text=True, timeout=6000) if efetch.returncode != 0: raise RuntimeError(f"efetch failed: {efetch.stderr}") f.write(efetch.stdout) From c4db87974194db60f4854f6fd7dbd4a688df7785 Mon Sep 17 00:00:00 2001 From: Richard Challis Date: Tue, 9 Jun 2026 09:21:28 +0100 Subject: [PATCH 18/18] update nhm resource type --- flows/updaters/update_nhm_status_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flows/updaters/update_nhm_status_list.py b/flows/updaters/update_nhm_status_list.py index 234c3cd..6e75055 100644 --- a/flows/updaters/update_nhm_status_list.py +++ b/flows/updaters/update_nhm_status_list.py @@ -65,7 +65,7 @@ def update_nhm_status_list(output_path: str, s3_path: str = "", min_records: int event="update.nhm.tsv.finished", resource={ "prefect.resource.id": f"update.nhm.{output_path}", - "prefect.resource.type": "nhm.tsv", + "prefect.resource.type": "nhm.status", }, payload={"line_count": line_count}, )