From deeafdb47f5abecb867cfc551a287fdd6e4a90b2 Mon Sep 17 00:00:00 2001 From: robgpita-noaa Date: Thu, 8 Jan 2026 17:47:20 -0700 Subject: [PATCH 1/2] Add full tooling to generate an animation from a flooding event --- .gitignore | 3 + animation_from_event/.dockerignore | 47 + animation_from_event/.env.example | 17 + animation_from_event/Dockerfile | 49 + animation_from_event/Makefile | 73 ++ animation_from_event/README.md | 1056 +++++++++++++++++ animation_from_event/config.yaml | 104 ++ animation_from_event/config_utils.py | 109 ++ animation_from_event/docker-compose.yml | 38 + animation_from_event/download_lake_polygon.py | 351 ++++++ animation_from_event/generate_animation.py | 863 ++++++++++++++ animation_from_event/generate_batch_fims.py | 367 ++++++ animation_from_event/generate_flow_files.py | 660 +++++++++++ animation_from_event/requirements.txt | 32 + animation_from_event/run_workflow.py | 135 +++ animation_from_event/test_setup.sh | 148 +++ animation_from_event/utils_s3.py | 105 ++ 17 files changed, 4157 insertions(+) create mode 100644 animation_from_event/.dockerignore create mode 100644 animation_from_event/.env.example create mode 100644 animation_from_event/Dockerfile create mode 100644 animation_from_event/Makefile create mode 100644 animation_from_event/README.md create mode 100644 animation_from_event/config.yaml create mode 100644 animation_from_event/config_utils.py create mode 100644 animation_from_event/docker-compose.yml create mode 100755 animation_from_event/download_lake_polygon.py create mode 100755 animation_from_event/generate_animation.py create mode 100755 animation_from_event/generate_batch_fims.py create mode 100755 animation_from_event/generate_flow_files.py create mode 100644 animation_from_event/requirements.txt create mode 100755 animation_from_event/run_workflow.py create mode 100755 animation_from_event/test_setup.sh create mode 100755 animation_from_event/utils_s3.py diff --git a/.gitignore b/.gitignore index c55e4d5..c9e58bb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .python-version +__pycache__/ **/.venv **/data **/cache @@ -7,3 +8,5 @@ *.tif *.csv *.xml +*.env +*.mp4 \ No newline at end of file diff --git a/animation_from_event/.dockerignore b/animation_from_event/.dockerignore new file mode 100644 index 0000000..402cd38 --- /dev/null +++ b/animation_from_event/.dockerignore @@ -0,0 +1,47 @@ +# Git +.git +.gitignore + +# Python +__pycache__ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ + +# Environment +.env +.env.local +venv/ +env/ + +# Data directories +data/ +*.gpkg +*.csv +*.tif +*.tiff +*.mp4 +*.avi + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Documentation (we copy specific files) +*.md +!README.md + +# Test files +tests/ +*.test diff --git a/animation_from_event/.env.example b/animation_from_event/.env.example new file mode 100644 index 0000000..27c2f4e --- /dev/null +++ b/animation_from_event/.env.example @@ -0,0 +1,17 @@ +# AWS Credentials for S3 Access +# ============================== +# Copy this file to .env and fill in your credentials + +# AWS credentials (required for accessing private S3 buckets) +AWS_ACCESS_KEY_ID=your_access_key_here +AWS_SECRET_ACCESS_KEY=your_secret_key_here +AWS_DEFAULT_REGION=us-east-1 + +# Optional: AWS session token (for temporary credentials) +# AWS_SESSION_TOKEN=your_session_token_here + +# Optional: S3 endpoint (for non-AWS S3-compatible storage) +# AWS_S3_ENDPOINT=https://s3.example.com + +# Note: If using anonymous access to public buckets (like noaa-nwm-pds), +# these credentials are not required. Set nwm.use_anonymous: true in config.yaml diff --git a/animation_from_event/Dockerfile b/animation_from_event/Dockerfile new file mode 100644 index 0000000..668ddbe --- /dev/null +++ b/animation_from_event/Dockerfile @@ -0,0 +1,49 @@ +FROM python:3.11-slim + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + ffmpeg \ + libgdal-dev \ + gdal-bin \ + python3-gdal \ + libhdf5-dev \ + libnetcdf-dev \ + git \ + wget \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Copy requirements first (for better caching) +COPY requirements.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Install flows2fim from GitHub releases +# Download and install flows2fim binary (v0.4.1 - Linux AMD64) +ARG FLOWS2FIM_VERSION=v0.4.1 +RUN wget -q https://github.com/NGWPC/flows2fim-archive/releases/download/${FLOWS2FIM_VERSION}/flows2fim-linux-amd64.tar.gz \ + && tar -xzf flows2fim-linux-amd64.tar.gz \ + && mv flows2fim /usr/local/bin/flows2fim \ + && chmod +x /usr/local/bin/flows2fim \ + && rm flows2fim-linux-amd64.tar.gz + +# Verify flows2fim installation +RUN flows2fim --version + +# Copy application files +COPY *.py ./ +COPY config.yaml ./ + +# Create output directories +RUN mkdir -p /data/input /data/output /data/cache + +# Set environment variables +ENV PYTHONUNBUFFERED=1 +ENV GDAL_DATA=/usr/share/gdal + +# Default command +CMD ["python", "--version"] diff --git a/animation_from_event/Makefile b/animation_from_event/Makefile new file mode 100644 index 0000000..338c2c4 --- /dev/null +++ b/animation_from_event/Makefile @@ -0,0 +1,73 @@ +.PHONY: help build up down shell clean generate-flows generate-fims generate-animation run-workflow download-lake + +help: + @echo "Flood Animation Tool - Make Commands" + @echo "" + @echo "Setup:" + @echo " make build Build Docker image" + @echo " make setup Initial setup (copy .env)" + @echo "" + @echo "Run:" + @echo " make run-workflow Run complete workflow" + @echo " make shell Open interactive shell in container" + @echo "" + @echo "Generate:" + @echo " make generate-flows Generate flow files from NWM data" + @echo " make generate-fims Generate FIM GeoTIFFs" + @echo " make generate-animation Generate animation video" + @echo "" + @echo "Utilities:" + @echo " make download-lake Download lake polygon (interactive)" + @echo "" + @echo "Cleanup:" + @echo " make clean Remove output files" + @echo " make clean-all Remove all generated files and images" + +build: + docker-compose build + +setup: + @if [ ! -f .env ]; then \ + cp .env.example .env; \ + echo "Created .env file - please edit with your credentials"; \ + else \ + echo ".env file already exists"; \ + fi + @mkdir -p data/input data/output data/cache + @echo "Directory structure created" + +run-workflow: + docker-compose run --rm flood-animation python run_workflow.py --config config.yaml + +shell: + docker-compose run --rm flood-animation /bin/bash + +generate-flows: + docker-compose run --rm flood-animation python generate_flow_files.py --config config.yaml + +generate-fims: + docker-compose run --rm flood-animation python generate_batch_fims.py --config config.yaml + +generate-animation: + docker-compose run --rm flood-animation python generate_animation.py --config config.yaml + +download-lake: + docker-compose run --rm flood-animation python download_lake_polygon.py --interactive + +clean: + rm -rf data/output/* + rm -rf data/cache/* + @echo "Output and cache directories cleaned" + +clean-all: clean + docker-compose down --rmi all -v + @echo "Docker images and volumes removed" + +up: + docker-compose up -d + +down: + docker-compose down + +logs: + docker-compose logs -f diff --git a/animation_from_event/README.md b/animation_from_event/README.md new file mode 100644 index 0000000..8dfe27d --- /dev/null +++ b/animation_from_event/README.md @@ -0,0 +1,1056 @@ +# Flood Animation from Event + +Automated flood inundation mapping and animation tool using NWM streamflow data, RIPPLE-FIM libraries, and flows2fim. + +## Table of Contents + +- [Overview](#overview) +- [Features](#features) +- [Quick Start](#quick-start) +- [Installation](#installation) +- [Configuration](#configuration) +- [Usage](#usage) +- [Lake Fill Feature](#lake-fill-feature) +- [Advanced Configuration](#advanced-configuration) +- [Troubleshooting](#troubleshooting) +- [Development](#development) + +--- + +## Overview + +This containerized tool automates the complete workflow for creating flood inundation animations from hydro-meteorological events: + +``` +NWM Data (S3) → Flow Files → flows2fim → FIM GeoTIFFs → Animation Video +``` + +### Workflow Steps + +1. **Download Data** - RIPPLE collection (ripple.gpkg, start_reaches.csv) from S3 +2. **Generate Flows** - Extract NWM streamflow data for event period +3. **Generate FIMs** - Create flood inundation maps using flows2fim +4. **Create Animation** - Render video with basemap, timestamps, and overlays + +--- + +## Features + +- **Fully Containerized** - Docker-based, runs anywhere +- **S3 Integration** - Auto-downloads RIPPLE data and accesses NWM data +- **Config-Driven** - Single YAML file for all settings +- **Dynamic Paths** - Collection ID-based S3 paths (change once, update all) +- **County Agnostic** - Works with any US county boundary +- **Lake Fill** - Fill permanent water bodies in animations +- **Customizable Viz** - Basemaps, colormaps, extents, overlays +- **Parallel Processing** - Multi-threaded FIM generation + +--- + +## Quick Start + +### Prerequisites + +- Docker and Docker Compose installed +- 8GB+ RAM, 20GB+ disk space +- AWS credentials (optional, for private S3 buckets) + +### 1. Initial Setup + +```bash +cd animation_from_event + +# Create directories and environment file +make setup + +# Paste AWS Creds to `.env` +export AWS_ACCESS_KEY_ID= +export AWS_SECRET_ACCESS_KEY= +export AWS_SESSION_TOKEN= + +# Edit config with your collection and event dates +vim config.yaml +``` + +### 2. Configure Your Event + +Edit `config.yaml` - **only need to change 3 lines**: + +```yaml +collection: + id: "ble_12100201_UpperGuadalupe" # ← Change to your collection ID + +event: + start_date: "2025-07-04 00:00" # ← Your event start time + end_date: "2025-07-08 14:00" # ← Your event end time +``` + +**Note:** For events longer than 20 hours, see [Long-Duration Events](#long-duration-events-20-hours) section for best practices on breaking animations into segments. + +### 3. Build Container + +```bash +make build +``` + +This takes 5+ minutes the first time. + +### 4. Run Workflow + +```bash +# Run complete workflow (all steps) +make run-workflow + +# OR run individual steps: +make generate-flows # Generate flow files +make generate-fims # Generate FIMs +make generate-animation # Create video +``` + +### 5. Get Your Video + +```bash +ls -lh data/output/flood_animation.mp4 +``` + +--- + +## Installation + +### Using Make Commands (Recommended) + +```bash +# Initial setup +make setup # Create directories, copy .env template +make build # Build Docker image +``` +### Verify Setup (Optional) + +Before running the workflow, you can verify your environment: +```bash +./test_setup.sh +``` + +### Manual Setup + +```bash +# Create directories +mkdir -p data/input data/output data/cache + +# Copy environment template +cp .env.example .env + +# Insert AWS credentials +vim .env + +# Build container +docker-compose build + +# Run workflow +docker-compose run --rm flood-animation python run_workflow.py --config config.yaml +``` + +--- + +## Configuration + +All settings are in [`config.yaml`](config.yaml). The file has detailed comments for each option. + +### Essential Settings + +#### 1. Collection Configuration + +```yaml +collection: + id: "ble_12100201_UpperGuadalupe" # CHANGE THIS to your collection + name: "Upper Guadalupe Basin" + + s3: + bucket: "fimc-data" + base_path: "ripple/fim_100_domain/collections" + # Paths auto-constructed: {base_path}/{collection.id}/{filename} +``` + +Change `collection.id` and all S3 paths update automatically: +- `{base_path}/{id}/ripple.gpkg` +- `{base_path}/{id}/start_reaches.csv` +- `{base_path}/{id}/library_extent` + +#### 2. Event Time Period + +```yaml +event: + start_date: "2025-07-04 02:00" # Format: YYYY-MM-DD HH:MM (or just YYYY-MM-DD for midnight) + end_date: "2025-07-08" # Format: YYYY-MM-DD HH:MM (or just YYYY-MM-DD for midnight) + + nwm: + bucket: "noaa-nwm-pds" # For dates after 2023 + # bucket: "noaa-nwm-retrospective-3-0-pds" # For 1979-2023 + configuration: "analysis_assim" + use_anonymous: true # No credentials needed for public buckets +``` + +#### 3. FIM Settings + +```yaml +fim: + type: "extent" # "depth" or "extent" + output_format: "COG" # "VRT", "COG", or "GTIFF" + boundary_condition: "nd" # "nd" (normal depth) or "kwse" + + # Starting reach IDs (boundary conditions for flows2fim) + starting_reaches: "/data/input/start_reaches.csv" # Options: + # "/data/input/start_reaches.csv" - Use CSV file (default) + # "auto" - Auto-detect upstream reaches from ripple.gpkg + # "123456,789012" - Comma-separated reach IDs +``` + +**Starting Reaches Explained:** + +The `starting_reaches` setting controls which reaches are used as upstream boundary conditions for FIM generation: + +- **CSV file path** (default: `"/data/input/start_reaches.csv"`) - Use a CSV file with two columns: `reach_id,control_stage`. This allows you to specify custom water surface elevations for each starting reach. The workflow attempts to download this file from S3 along with ripple.gpkg. + +- **`"auto"`** - Automatically detects the upstream-most reaches (headwaters) from the ripple.gpkg database by finding reaches with no upstream connections. This is a reliable fallback when CSV files aren't available. + +- **Comma-separated IDs** - Manually specify reach IDs (e.g., `"123456,789012,345678"`). Useful when you know specific reaches to use as boundary conditions. + +**Automatic Fallback:** If the configured CSV file doesn't exist locally (e.g., not available in your S3 collection), the tool automatically falls back to `"auto"` mode and detects upstream reaches from ripple.gpkg. + +#### 4. Animation Settings + +```yaml +animation: + # Map extent (customize for your area) + extent: + use_custom: true # false = use full FIM extent + center_lon: -99.23 # Center longitude (degrees) + center_lat: 30.12 # Center latitude (degrees) + size_km_ew: 10 # East-west extent (km) + size_km_ns: 6 # North-south extent (km) + + # Visual settings + visual: + fps: 1.0 # Frames per second + dpi: 250 # Resolution + duration_last_frame: 3 # Hold last frame (seconds) + depth_min: 0.0 # Min depth to display (ft) + depth_max: 30.0 # Max depth for colormap (ft) + colormap: "GnBu" # Matplotlib colormap + figsize: [14, 10] # Figure size (inches) + + # Overlays + overlay: + show_timestamp: true + show_colorbar: false + show_disclaimer: true + disclaimer_text: "Disclaimer: Experimental Guidance" + title_prefix: "Flood Extent" + + # Basemap + basemap: + enabled: true + source: "OpenStreetMap.Mapnik" # or "OpenTopoMap", "Esri.WorldImagery", etc. + alpha: 0.5 # Transparency (0-1) + + # County boundary (works for any US county) + county: + show_boundary: false + show_label: false + name: "Kerr" # Any county name + state: "Texas" # Full name or abbreviation (TX) + + # Lake fill (optional - see Lake Fill Feature section) + lake_fill: + enabled: false + # file_path: "/data/input/lake_polygons.geojson" + depth: 5.0 # Depth to assign (ft) +``` + +#### 5. Processing Settings + +```yaml +processing: + flows2fim_executable: "flows2fim" # Binary installed in container + max_workers: 4 # Parallel FIM workers +``` + +--- + +## Usage + +### Run Complete Workflow + +```bash +make run-workflow +``` + +This runs all four steps: +1. Downloads RIPPLE data from S3 +2. Generates hourly flow files from NWM +3. Generates FIMs using flows2fim +4. Creates animation video + +### Workflow Efficiency Tips + +**Important:** Steps 1-2 (downloading NWM NetCDF files and generating flow/control files) are **time-consuming**. + +**Best Practice:** Pull data for a **longer time period initially** (e.g., full week), then re-run only the animation step (Step 4) with different visualization parameters, time windows, or extents. + +```bash +# Initial run: Pull full event data (e.g., 7 days) +# Edit config.yaml: start_date: 2025-07-04, end_date: 2025-07-11 +make run-workflow # Takes 45-90 minutes + +# Later: Create different animations from same data +# Just modify animation settings in config.yaml (extent, colormap, etc.) +make generate-animation # Takes ~ 5-10 minutes + +# Or create animation for subset of time period +docker-compose run --rm flood-animation python generate_animation.py \ + --config config.yaml \ + --start-time "2025-07-04 06:00" \ + --end-time "2025-07-04 18:00" +``` + +**Smart File Skipping:** +- **Step 2 (Flow generation)** automatically checks if flow files exist for the configured time range and skips regeneration if all files are present. +- **Step 3 (FIM generation)** automatically skips already-existing controls and FIM files. If your workflow is interrupted or you need to regenerate only new timesteps, simply re-run `make generate-fims` - it will skip completed files and only generate missing ones. + +This approach saves significant time when experimenting with different visualizations or creating multiple animations for different time windows. + +To force regeneration of existing files: +```bash +# Force regenerate flow files +docker-compose run --rm flood-animation python generate_flow_files.py --config config.yaml --force + +# Force regenerate FIM files +docker-compose run --rm flood-animation python generate_batch_fims.py --config config.yaml --force +``` + +### Long-Duration Events (>20 hours) + +**Important:** For events longer than 20 hours, it's recommended to create multiple shorter animations and stitch them together. This approach: +- Reduces memory pressure and OOM risk +- Allows parallel processing of segments +- Makes it easier to recover from failures +- Provides checkpoint progress for long events + +**Workflow for multi-day events:** + +```bash +# Step 1: Generate all flow and FIM files for the full event period +# Edit config.yaml: start_date: 2025-07-04, end_date: 2025-07-11 (7 days) +make run-workflow --skip-animation + +# Step 2: Create animations for each day separately +docker-compose run --rm flood-animation python generate_animation.py \ + --config config.yaml \ + --start-time "2025-07-04 00:00" \ + --end-time "2025-07-04 20:00" \ + --output video_1.mp4 + +docker-compose run --rm flood-animation python generate_animation.py \ + --config config.yaml \ + --start-time "2025-07-04 20:00" \ + --end-time "2025-07-05 16:00" \ + --output video_2.mp4 + +docker-compose run --rm flood-animation python generate_animation.py \ + --config config.yaml \ + --start-time "2025-07-05 16:00" \ + --end-time "2025-07-06 12:00" \ + --output video_3.mp4 +d +ocker-compose run --rm flood-animation python generate_animation.py \ + --config config.yaml \ + --start-time "2025-07-06 12:00" \ + --end-time "2025-07-07 8:00" \ + --output video_4.mp4 + +# Step 3: Stitch videos together using ffmpeg +docker-compose run --rm flood-animation bash -c "\ + ffmpeg -i /data/output/video_1.mp4 \ + -i /data/output/video_2.mp4 \ + -i /data/output/video_3.mp4 \ + -i /data/output/video_4.mp4 \ + -filter_complex 'concat=n=4:v=1:a=0' \ + /data/output/flood_animation_full.mp4" +``` + +**Alternative: Using a file list for many segments** + +```bash +# Create a file list for ffmpeg +cat > data/output/video_list.txt < 200), consider 6-12 hour segments + +### Run Individual Steps + +```bash +# Step 1: Download RIPPLE data +docker-compose run --rm flood-animation python utils_s3.py --config config.yaml --download-ripple + +# Step 2: Generate flow files +make generate-flows + +# Step 3: Generate FIMs +make generate-fims + +# Step 4: Create animation +make generate-animation +``` + +### Skip Steps + +```bash +# Skip download (use existing local files) +docker-compose run --rm flood-animation python run_workflow.py --config config.yaml --skip-download + +# Skip flows (use existing flow files) +docker-compose run --rm flood-animation python run_workflow.py --config config.yaml --skip-flows + +# Skip FIMs (use existing FIM files) +docker-compose run --rm flood-animation python run_workflow.py --config config.yaml --skip-fims +``` + +### Cleanup Commands + +Remove generated files to free disk space or start fresh: + +```bash +make clean +``` + +This removes: +- `data/output/*` - All generated outputs (flows, controls, FIMs, videos) +- `data/cache/*` - Temporary files + +**Use when:** +- Starting a new event analysis +- Freeing disk space +- Troubleshooting issues with stale data + +**Note:** Input files (`data/input/*`) are preserved. Downloaded RIPPLE data and lake polygons remain intact. + +```bash +# Complete cleanup - remove all Docker artifacts +make clean-all +``` + +This performs `make clean` plus: +- Removes Docker images +- Removes Docker volumes +- Stops all containers + +**Use when:** +- Rebuilding from scratch +- Freeing maximum disk space (~3.5GB from Docker image) +- Resolving Docker-related issues + +**Warning:** After `make clean-all`, you'll need to rebuild: `make build` (takes 5-10 minutes) + +**Example workflow:** +```bash +# Clean output for new analysis +make clean +make run-workflow # Uses existing Docker image + +# Complete fresh start +make clean-all +make build # Rebuild Docker image +make run-workflow +``` + +### Command-Line Overrides + +```bash +# Override dates +docker-compose run --rm flood-animation python generate_flow_files.py \ + --config config.yaml \ + --start-date 2025-07-01 \ + --end-date 2025-07-03 + +# Override visualization +docker-compose run --rm flood-animation python generate_animation.py \ + --config config.yaml \ + --dpi 300 \ + --fps 2.0 +``` + +### Interactive Shell + +```bash +make shell +# Now inside container: +python generate_flow_files.py --config config.yaml +exit +``` + +--- + +## Lake Fill Feature + +Fill permanent water bodies (lakes, reservoirs) in your animations to show continuous flooding, even where FIM data has gaps. + +### Quick Start + +```bash +# Download lake polygon interactively +make download-lake + +# Follow prompts: +# Lake name: Lake Mead +# Bounding box: 36.0,-114.8,36.3,-114.4 +# Output: lake_mead.geojson + +# In config.yaml +vim config.yaml +# Set lake_fill.enabled = true +# Set lake_fill.file_path: "/data/input/.geojson" +``` + +### Finding Bounding Box Coordinates + +For simplicity, you can use https://boundingbox.klokantech.com/: +1. Navigate to your lake +2. Draw a box around it +3. Select "CSV" format +4. Copy coordinates: `south,west,north,east` + +### Usage Examples + +**Interactive Mode:** +```bash +make download-lake +``` + +**Command Line:** +```bash +# Lake Mead, Nevada/Arizona +docker-compose run --rm flood-animation python download_lake_polygon.py \ + --name "Lake Mead" \ + --bbox 36.0,-114.8,36.3,-114.4 \ + --output /data/input/lake_mead.geojson + +# Lake Travis, Texas +docker-compose run --rm flood-animation python download_lake_polygon.py \ + --name "Travis" \ + --bbox 30.3,-98.1,30.5,-97.9 \ + --output /data/input/lake_travis.geojson + +# Lake Ingram (multiple lakes with partial name match) +docker-compose run --rm flood-animation python download_lake_polygon.py \ + --name "Ingram" \ + --bbox 29.9,-99.5,30.3,-99.0 \ + --output /data/input/lake_ingram.geojson +``` + +### Configuration + +Edit `config.yaml`: + +```yaml +animation: + lake_fill: + enabled: true + file_path: "/data/input/lake_.geojson" + depth: 5.0 # For extent: any value works; for depth: controls color +``` + +### Supported File Formats + +- GeoJSON (`.geojson`) - Recommended +- GeoPackage (`.gpkg`) +- Shapefile (`.shp`) +- GeoTIFF (`.tif`) + +### Multiple Lakes + +Single file can contain multiple lakes (FeatureCollection). All will be filled: + +```json +{ + "type": "FeatureCollection", + "features": [ + {"properties": {"name": "Lake New Ingram"}, "geometry": {...}}, + {"properties": {"name": "Lake Old Ingram"}, "geometry": {...}} + ] +} +``` + +### Alternative Data Sources + +If OpenStreetMap doesn't have your lake: + +1. **USGS National Map** + - Visit: https://apps.nationalmap.gov/downloader/ + - Download NHD (National Hydrography Dataset) + +2. **Manual Digitization** + - Open QGIS + - Load basemap + - Digitize lake boundary + - Export as GeoJSON + +3. **NWM Lakes Dataset** + - Extract from nwm_lakes.gpkg by location + +--- + +## Advanced Configuration + +### Custom Map Extent + +Zoom to a specific area instead of full county: + +```yaml +animation: + extent: + use_custom: true + center_lon: -99.23 # Your center point + center_lat: 30.12 + size_km_ew: 10 # Width (km) + size_km_ns: 6 # Height (km) +``` + +### Basemap Options + +```yaml +basemap: + source: "OpenStreetMap.Mapnik" # Street map (default) + # source: "OpenTopoMap" # Topographic + # source: "Esri.WorldImagery" # Satellite + alpha: 0.5 # Adjust transparency +``` + +### Custom Colormaps + +```yaml +visual: + colormap: "GnBu" # Default: Green-Blue + # colormap: "Blues" # Blue shades + # colormap: "YlOrRd" # Yellow-Orange-Red + # colormap: "viridis" # Perceptually uniform +``` + +See Matplotlib colormaps: https://matplotlib.org/stable/tutorials/colors/colormaps.html + +### County Boundaries (Any US County) + +```yaml +county: + show_boundary: true + show_label: true + name: "Los Angeles" + state: "California" # or "CA" + + # Also works: + # name: "Cook", + # state: "IL" + # OR + # name: "Miami-Dade" + # state: "Florida" +``` + +State names or abbreviations both work (e.g., "Texas" or "TX"). + +### Parallel Processing + +Adjust based on your CPU cores and RAM: + +```yaml +processing: + max_workers: 4 # Default + # max_workers: 8 # More cores = faster (if you have RAM) + # max_workers: 2 # Less RAM usage +``` + +Rule of thumb: `max_workers = CPU cores - 1`, but watch RAM usage. + +--- + +## Troubleshooting + +### Docker Issues + +**Can't connect to Docker?** +```bash +# Start Docker +sudo systemctl start docker # Linux +# Or start Docker Desktop (Mac/Windows) +``` + +**Permission denied?** +```bash +# Add user to docker group (Linux) +sudo usermod -aG docker $USER +# Log out and back in +``` + +### AWS Credentials + +**Credentials not working?** +```bash +# Check environment variables +docker-compose run --rm flood-animation env | grep AWS + +# Verify .env file +cat .env + +# For public buckets, use anonymous access: +# In config.yaml: use_anonymous: true +``` + +### Memory Issues + +**Out of memory / container killed?** + +The Docker memory limits have been removed by default (see `docker-compose.yml`). Common memory related errors are `exit code -9` or `Error 137`, If still experiencing issues: + +Reduce animation resolution: +```yaml +visual: + dpi: 150 # Reduce from 250 + downsample_factor: 4 # Downsample large rasters +``` + +Or reduce parallel workers in `config.yaml`: +```yaml +processing: + max_workers: 2 # Reduce from 4 +``` + +**For events longer than 20 hours:** Break the animation into segments and stitch them together. This is the recommended approach for multi-day events: + +```bash +# See "Long-Duration Events (>20 hours)" section for detailed workflow +# Generate segments separately, then stitch with ffmpeg +``` + +This approach significantly reduces peak memory usage and allows you to process very long events that would otherwise fail. + +### Disk Space Issues + +**Running out of disk space?** + +```bash +# Check disk usage +du -sh data/output/* + +# Remove output files +make clean + +# For more space, remove Docker artifacts (~3.5GB) +make clean-all +docker system prune -a # Remove unused Docker data +``` + +### flows2fim Errors + +**flows2fim not found?** + +Rebuild container (flows2fim should be installed): +```bash +make build + +# Verify installation +docker-compose run --rm flood-animation flows2fim --version +``` + +**flows2fim fails on FIM generation?** + +Check paths in config.yaml: +- `ripple.gpkg` exists +- `start_reaches.csv` exists +- FIM library path correct on S3 + +### S3 Download Fails + +**Can't download RIPPLE data?** + +Check: +1. Collection ID correct in config.yaml +2. S3 paths exist: `s3://fimc-data/ripple/fim_100_domain/collections/{collection.id}/` +3. Valid AWS credentials in `.env` +4. Internet connection + +**Anonymous access fails?** +```yaml +# Try with credentials in .env +nwm: + use_anonymous: false +``` + +### Animation Issues + +**No FIM files found?** +```bash +# Check FIM directory +ls -lh data/output/fims/ + +# If empty, run FIM generation: +make generate-fims +``` + +**Lake fill not working?** + +Verify: +1. `lake_fill.enabled: true` in config.yaml +2. File path correct: `/data/input/.geojson` +3. File exists: `ls data/input/.geojson` +4. File is valid GeoJSON (check with QGIS or geojson.io) + +**Basemap not showing?** + +Check internet connection - basemap tiles download at runtime. + +Or disable basemap: +```yaml +basemap: + enabled: false +``` + +### No Output Video + +**Video not created?** + +Check logs: +```bash +docker-compose logs +``` + +Common issues: +- No FIM files (run `make generate-fims`) +- Memory issue (reduce DPI or workers) +- FFmpeg error (check logs) + +### Common Error Messages + +| Error | Solution | +|-------|----------| +| `OOM killed` | Increase `downsample_factor`, reduce `max_workers` or `dpi` in config.yaml | +| `No space left on device` | `make clean` then `docker system prune -a` | +| `ripple.gpkg not found` | Run download step: `make generate-flows` | +| `flows2fim command not found` | `make clean-all` then `make build` | +| `Access Denied (S3)` | Check AWS credentials or use anonymous | +| `No NWM data for date` | Check date range and NWM bucket | +| `Stale output data` | `make clean` to remove old files | +| `Docker build fails` | `make clean-all` then `make build` | + +--- + +## Development + +### Directory Structure + +``` +animation_from_event/ +├── config.yaml # Main configuration +├── .env # AWS credentials (gitignored) +├── .env.example # Credentials template +├── Dockerfile # Container definition +├── docker-compose.yml # Docker Compose config +├── requirements.txt # Python dependencies +├── Makefile # Convenience commands +│ +├── run_workflow.py # Workflow orchestrator +├── config_utils.py # Config loading utilities +├── utils_s3.py # S3 download functions +│ +├── generate_flow_files.py # Step 1: Flow generation +├── generate_batch_fims.py # Step 2: FIM generation +├── generate_animation.py # Step 3: Animation creation +├── download_lake_polygon.py # Utility: Lake download +│ +├── data/ # Mounted volume (gitignored) +│ ├── input/ # Downloaded RIPPLE data +│ │ ├── ripple.gpkg +│ │ ├── start_reaches.csv +│ │ └── lake_*.geojson +│ ├── output/ # Generated outputs +│ │ ├── flows/ # NWM flow CSVs +│ │ ├── controls/ # flows2fim control files +│ │ ├── fims/ # FIM GeoTIFFs +│ │ └── flood_animation.mp4 +│ └── cache/ # Temporary files +│ +└── README.md # This file +``` + +### Container Paths + +All data uses standard container paths (not user-specific): + +| Purpose | Container Path | Host Path | +|---------|---------------|-----------| +| Input data | `/data/input/` | `./data/input/` | +| Output data | `/data/output/` | `./data/output/` | +| Cache | `/data/cache/` | `./data/cache/` | +| Config | `/app/config.yaml` | `./config.yaml` | +| Scripts | `/app/*.py` | `./*.py` | + +### Make Commands Reference + +```bash +# Setup +make setup # Create directories, copy .env +make build # Build Docker image + +# Run +make run-workflow # Complete workflow (all steps) +make shell # Interactive shell in container + +# Generate individual steps +make generate-flows # Generate flow files from NWM data +make generate-fims # Generate FIM GeoTIFFs +make generate-animation # Generate animation video + +# Utilities +make download-lake # Download lake polygon (interactive) + +# Cleanup +make clean # Remove output files (data/output, data/cache) +make clean-all # Remove output + Docker images and volumes +make logs # Show Docker logs +make help # Show all commands +``` + +### Python Script Arguments + +All three main scripts support config-based execution: + +**generate_flow_files.py** +```bash +python generate_flow_files.py --config config.yaml \ + [--start-date YYYY-MM-DD] \ + [--end-date YYYY-MM-DD] +``` + +**generate_batch_fims.py** +```bash +python generate_batch_fims.py --config config.yaml \ + [--max-workers N] +``` + +**generate_animation.py** +```bash +python generate_animation.py --config config.yaml \ + [--dpi N] \ + [--fps N.N] +``` + +### Extending the Tool + +#### Add Custom Processing Step + +1. Create Python script in project root +2. Import config utilities: +```python +from config_utils import load_config, get_paths +``` +3. Add to `run_workflow.py` if desired +4. Add Make command in `Makefile` + +#### Custom Basemap Source + +Edit `generate_animation.py` basemap parsing (lines 96-110) to add new providers. + +#### Custom Output Formats + +Modify `generate_animation.py` to support additional video formats (MP4, AVI, GIF, etc.). + +--- + +## System Requirements + +### Minimum + +- **CPU:** 2 cores +- **RAM:** 8 GB +- **Disk:** 20 GB free space +- **OS:** Linux, macOS, Windows (with Docker) + +### Recommended + +- **CPU:** 4+ cores (for parallel processing) +- **RAM:** 16 GB +- **Disk:** 50 GB+ SSD +- **Network:** Broadband (for S3 downloads and basemap tiles) + +--- + +## Dependencies + +### System (in Container) + +- Python 3.11 +- ffmpeg (video encoding) +- GDAL 3.x (geospatial library) +- libhdf5, libnetcdf (NWM data) +- flows2fim v0.4.1 (FIM generation) + +### Python Packages + +- **Data:** boto3, pandas, numpy, xarray, netCDF4 +- **Geospatial:** rasterio, fiona, geopandas, shapely, pyproj +- **Visualization:** matplotlib, imageio, contextily +- **Utilities:** pyyaml, python-dotenv + +See [`requirements.txt`](requirements.txt) for full list. + +--- + +## Support + +### Documentation + +- This README (comprehensive guide) +- Inline comments in `config.yaml` +- Script docstrings (`--help` flags) + +### Troubleshooting + +1. Check [Troubleshooting](#troubleshooting) section above +2. Review Docker logs: `docker-compose logs` +3. Test individual steps: `make generate-flows`, etc. +4. Check file paths and permissions + +--- + +## Version History + +### v1.0.0 (2026-01-02) +- Initial release +- Fully containerized workflow +- Config-driven operation +- S3 integration with dynamic paths +- make commands +- Lake fill feature +- County-agnostic boundary support +- flows2fim binary integration + +--- + +**Created:** 2026-01-02 +**Tool:** Flood Animation from Event +**Maintainer:** NGWPC diff --git a/animation_from_event/config.yaml b/animation_from_event/config.yaml new file mode 100644 index 0000000..89701e0 --- /dev/null +++ b/animation_from_event/config.yaml @@ -0,0 +1,104 @@ +# Flood Animation from Event Configuration +# =========================================== + +# RIPPLE Collection Configuration +collection: + id: "ble_12100201_UpperGuadalupe" # CHANGE THIS to your collection ID + name: "Upper Guadalupe Basin" + + # S3 paths for collection data (uses collection.id automatically) + s3: + bucket: "fimc-data" + base_path: "ripple/fim_100_domain/collections" # Base path template + # Full paths are constructed as: {base_path}/{collection.id}/{filename} + # Example: ripple/fim_100_domain/collections/ble_12100201_UpperGuadalupe/ripple.gpkg + +# Event Time Series +event: + start_date: "2025-07-04 08:00" # Format: YYYY-MM-DD HH:MM (or just YYYY-MM-DD for midnight) + end_date: "2025-07-04 17:00" # End time is exclusive (will generate up to but not including this time) + + # NWM data source + nwm: + # For dates after 2023: noaa-nwm-pds (operational) + # For dates 1979-2023: noaa-nwm-retrospective-3-0-pds + bucket: "noaa-nwm-pds" + configuration: "analysis_assim" # NWM configuration to use + use_anonymous: true # Use anonymous S3 access for public buckets + +# FIM Generation Settings +fim: + type: "extent" # "depth" or "extent" + output_format: "COG" # "VRT", "COG", or "GTIFF" + boundary_condition: "nd" # "nd" (normal depth) or "kwse" (known WSE) + + # Starting reach IDs for flows2fim controls + starting_reaches: "/data/input/start_reaches.csv" + # Options: + # "/data/input/start_reaches.csv" - Use CSV file with reach_id,control_stage columns (default) + # "auto" - Automatically detect upstream-most reaches from ripple.gpkg + # "123456,789012,345678" - Comma-separated list of reach IDs + # If start_reaches.csv doesn't exist, automatically falls back to "auto" mode + +# Animation Settings +animation: + # Geographic extent (customize for your area of interest) + extent: + use_custom: true # Set to false to use full FIM extent + center_lon: -99.3732 # Center longitude in degrees (example: west of Kerrville, TX) + center_lat: 30.0050 # Center latitude in degrees + size_km_ew: 15 # East-west extent (km) + size_km_ns: 10 # North-south extent (km) + + # Visual settings + visual: + fps: 1.0 # Frames per second + dpi: 250 # Resolution + duration_last_frame: 3 # Seconds to hold last frame + depth_min: 0.0 # Minimum depth to display (ft) + depth_max: 30.0 # Maximum depth for colormap (ft) + colormap: "GnBu" # Matplotlib colormap + figsize: [14, 10] # Figure size in inches + downsample_factor: 1 # Downsample large rasters by this factor (1=no downsampling, 2=half resolution, 4=quarter, etc.) + + # Overlay settings + overlay: + show_timestamp: true + show_colorbar: false + show_disclaimer: true + disclaimer_text: "Disclaimer: Experimental Guidance" + title_prefix: "Flood Extent" + + # Basemap settings + basemap: + enabled: true + source: "OpenStreetMap.Mapnik" # Adtl options: OpenTopoMap, Esri.WorldImagery + alpha: 0.5 # Transparency (0=invisible, 1=opaque) + + # County boundary settings (works for any US county) + county: + show_boundary: false # Show county boundary outline + show_label: false # Show county name label + name: "Kerr" # County name (e.g., "Los Angeles", "Cook", "Miami-Dade") + state: "Texas" # State name or abbreviation (e.g., "Texas" or "TX") + + # Lake fill settings (optional) + lake_fill: + enabled: false + # file_path: "/data/input/lake_ingram.geojson" + depth: 5.0 # Depth to assign to lake pixels (ft) + +# Output Settings +output: + base_dir: "/data/output" + video_filename: "flood_animation.mp4" + flow_file_suffix: "flows" # Suffix for flow CSV files (YYYYMMDD_HHMM_{suffix}.csv) + keep_intermediate: true # Keep flow files, controls, and FIMs + +# Processing Settings +processing: + # flows2fim executable (will look in PATH if not full path) + flows2fim_executable: "flows2fim" + + # Parallel processing + max_workers: 4 # Number of parallel FIM generation workers diff --git a/animation_from_event/config_utils.py b/animation_from_event/config_utils.py new file mode 100644 index 0000000..6f896cd --- /dev/null +++ b/animation_from_event/config_utils.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +""" +Configuration Utilities +======================== + +Helper functions for loading and accessing configuration. +""" + +import yaml +import os +from pathlib import Path +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +def load_config(config_path='config.yaml'): + """Load YAML configuration file.""" + with open(config_path, 'r') as f: + return yaml.safe_load(f) + +def get_s3_paths(config): + """Construct S3 paths using collection ID template.""" + collection_id = config['collection']['id'] + bucket = config['collection']['s3']['bucket'] + base_path = config['collection']['s3']['base_path'] + + return { + 'bucket': bucket, + 'ripple_path': f"{base_path}/{collection_id}/ripple.gpkg", + 'start_reaches_path': f"{base_path}/{collection_id}/start_reaches.csv", + 'fim_library': f"/vsis3/{bucket}/{base_path}/{collection_id}/library_extent" + } + +def get_paths(config): + """Get standardized paths from config.""" + base_dir = Path(config['output']['base_dir']) + + # Handle starting reaches configuration + starting_reaches = config['fim'].get('starting_reaches', '/data/input/start_reaches.csv') + + # If it's "auto" or a comma-separated list, keep as string + if starting_reaches == "auto" or (',' in str(starting_reaches) and not str(starting_reaches).endswith('.csv')): + start_reaches_value = starting_reaches + else: + # Convert to Path, but check if file exists + reaches_path = Path(starting_reaches) + # If configured CSV file doesn't exist, fall back to auto-detection + if not reaches_path.exists(): + start_reaches_value = "auto" + else: + start_reaches_value = reaches_path + + return { + 'base_dir': base_dir, + 'ripple_db': Path('/data/input/ripple.gpkg'), + 'start_reaches': start_reaches_value, + 'flows_dir': base_dir / 'flows', + 'controls_dir': base_dir / 'controls', + 'fims_dir': base_dir / 'fims', + 'output_video': base_dir / config['output']['video_filename'], + 'flow_file_suffix': config['output'].get('flow_file_suffix', 'flows'), + } + +def get_nwm_config(config): + """Get NWM configuration.""" + return { + 'bucket': config['event']['nwm']['bucket'], + 'config': config['event']['nwm']['configuration'], + 'use_anonymous': config['event']['nwm']['use_anonymous'], + 'start_date': config['event']['start_date'], + 'end_date': config['event']['end_date'], + } + +def get_fim_config(config): + """Get FIM generation configuration.""" + s3_paths = get_s3_paths(config) + + return { + 'library': s3_paths['fim_library'], + 'type': config['fim']['type'], + 'format': config['fim']['output_format'], + 'boundary_condition': config['fim']['boundary_condition'], + } + +def get_animation_config(config): + """Get animation configuration.""" + anim_cfg = config['animation'] + + return { + 'extent': anim_cfg['extent'], + 'visual': anim_cfg['visual'], + 'overlay': anim_cfg['overlay'], + 'basemap': anim_cfg['basemap'], + 'county': anim_cfg['county'], + 'lake_fill': anim_cfg['lake_fill'], + } + +def setup_aws_env(): + """Ensure AWS environment variables are set.""" + required_vars = ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'] + + missing = [var for var in required_vars if not os.getenv(var)] + + if missing: + print(f"Warning: Missing AWS environment variables: {', '.join(missing)}") + print("Set these in .env file or use anonymous S3 access") + + return len(missing) == 0 diff --git a/animation_from_event/docker-compose.yml b/animation_from_event/docker-compose.yml new file mode 100644 index 0000000..6a89887 --- /dev/null +++ b/animation_from_event/docker-compose.yml @@ -0,0 +1,38 @@ +services: + flood-animation: + build: . + image: flood-animation:latest + container_name: flood-animation-tool + + # Mount current directory to /data + volumes: + - ./data:/data + - ./config.yaml:/app/config.yaml + + # Load environment variables from .env file + env_file: + - .env + + # Keep container running for interactive use + stdin_open: true + tty: true + + # Override command for interactive shell + # command: /bin/bash + + # Resource limits + # Note: Memory limits removed to allow Docker's use of host memory as needed + # deploy: + # resources: + # limits: + # cpus: '4.0' + # memory: 16G + # reservations: + # cpus: '2.0' + # memory: 4G + + # Service for running the full workflow + workflow: + extends: flood-animation + container_name: flood-animation-workflow + command: python run_workflow.py --config /app/config.yaml diff --git a/animation_from_event/download_lake_polygon.py b/animation_from_event/download_lake_polygon.py new file mode 100755 index 0000000..b126773 --- /dev/null +++ b/animation_from_event/download_lake_polygon.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 +""" +Download Lake Polygon from OpenStreetMap +========================================= + +Generic script to download any lake/reservoir polygon from OpenStreetMap +and save as GeoJSON for use in flood animation lake fill. + +This script queries the OpenStreetMap Overpass API for water bodies matching +a given name within a specified bounding box, then saves the results as GeoJSON. + +Requirements: + pip install requests + +Usage: + # Using command-line arguments + python download_lake_polygon.py --name "Lake Ingram" --bbox 29.9,-99.5,30.3,-99.0 --output lake_ingram.geojson + + # Using config file + python download_lake_polygon.py --config config.yaml --lake-name "Lake Mead" + + # Interactive mode (prompts for inputs) + python download_lake_polygon.py --interactive + +Examples: + # Lake Mead, Nevada/Arizona + python download_lake_polygon.py --name "Lake Mead" --bbox 36.0,-114.8,36.3,-114.4 --output lake_mead.geojson + + # Lake Travis, Texas + python download_lake_polygon.py --name "Lake Travis" --bbox 30.3,-98.1,30.5,-97.9 --output lake_travis.geojson + + # Reservoir with partial name match + python download_lake_polygon.py --name "Ingram" --bbox 29.9,-99.5,30.3,-99.0 --output lake_ingram.geojson +""" + +import requests +import json +import argparse +import sys +from pathlib import Path + + +def query_overpass(lake_name, bbox, timeout=25): + """ + Query OpenStreetMap Overpass API for water bodies matching the lake name. + + Args: + lake_name: Name of lake to search for (case-insensitive, partial match) + bbox: Tuple of (south, west, north, east) in degrees + timeout: Query timeout in seconds + + Returns: + JSON response from Overpass API + + Raises: + requests.exceptions.RequestException: If query fails + """ + south, west, north, east = bbox + + # Build Overpass QL query + # Searches for ways and relations with natural=water, water=lake, or water=reservoir + # that match the lake name (case-insensitive) + query = f""" +[out:json][timeout:{timeout}]; +( + way["natural"="water"]["name"~"{lake_name}",i]({south},{west},{north},{east}); + relation["natural"="water"]["name"~"{lake_name}",i]({south},{west},{north},{east}); + way["water"="lake"]["name"~"{lake_name}",i]({south},{west},{north},{east}); + relation["water"="lake"]["name"~"{lake_name}",i]({south},{west},{north},{east}); + way["water"="reservoir"]["name"~"{lake_name}",i]({south},{west},{north},{east}); + relation["water"="reservoir"]["name"~"{lake_name}",i]({south},{west},{north},{east}); +); +out geom; +""" + + overpass_url = "http://overpass-api.de/api/interpreter" + + print(f"Querying OpenStreetMap for '{lake_name}'...") + print(f"Search area: ({south}, {west}) to ({north}, {east})") + print() + + response = requests.post(overpass_url, data=query, timeout=60) + response.raise_for_status() + + return response.json() + + +def osm_to_geojson(osm_data): + """ + Convert OSM data to GeoJSON FeatureCollection format. + + Args: + osm_data: OSM JSON response from Overpass API + + Returns: + GeoJSON FeatureCollection dictionary + """ + features = [] + + for element in osm_data.get('elements', []): + if element['type'] == 'way' and 'geometry' in element: + # Convert way to polygon + coords = [[node['lon'], node['lat']] for node in element['geometry']] + + # Close the polygon if needed + if coords[0] != coords[-1]: + coords.append(coords[0]) + + feature = { + 'type': 'Feature', + 'geometry': { + 'type': 'Polygon', + 'coordinates': [coords] + }, + 'properties': { + 'osm_id': element.get('id'), + 'name': element.get('tags', {}).get('name', 'Unknown'), + 'type': element.get('tags', {}).get('water', element.get('tags', {}).get('natural', 'water')), + 'source': 'OpenStreetMap', + 'osm_type': 'way' + } + } + features.append(feature) + + elif element['type'] == 'relation' and 'members' in element: + # Handle multipolygon relations + # Note: Full multipolygon processing is complex, this is simplified + print(f" Note: Found relation '{element.get('tags', {}).get('name', 'Unknown')}' (ID: {element.get('id')})") + print(f" Relations require more complex processing. Extracting outer members only.") + + # Try to extract outer way geometries + outer_coords = [] + for member in element.get('members', []): + if member.get('role') == 'outer' and 'geometry' in member: + coords = [[node['lon'], node['lat']] for node in member['geometry']] + outer_coords.extend(coords) + + if outer_coords: + # Close the polygon + if outer_coords[0] != outer_coords[-1]: + outer_coords.append(outer_coords[0]) + + feature = { + 'type': 'Feature', + 'geometry': { + 'type': 'Polygon', + 'coordinates': [outer_coords] + }, + 'properties': { + 'osm_id': element.get('id'), + 'name': element.get('tags', {}).get('name', 'Unknown'), + 'type': element.get('tags', {}).get('water', element.get('tags', {}).get('natural', 'water')), + 'source': 'OpenStreetMap', + 'osm_type': 'relation' + } + } + features.append(feature) + + return { + 'type': 'FeatureCollection', + 'features': features + } + + +def interactive_mode(): + """ + Interactive mode - prompts user for inputs. + + Returns: + Tuple of (lake_name, bbox, output_path) + """ + print("=" * 70) + print("INTERACTIVE LAKE POLYGON DOWNLOAD") + print("=" * 70) + print() + print("This tool will download lake polygons from OpenStreetMap.") + print() + + # Get lake name + lake_name = input("Enter lake name (e.g., 'Ingram'): ").strip() + + # Get bounding box + print() + print("Enter bounding box coordinates (in decimal degrees):") + print(" Tip: Use https://boundingbox.klokantech.com/ to find coordinates") + print(" Select 'CSV' format and copy the values") + print() + + south = float(input(" South latitude (e.g., 29.9): ")) + west = float(input(" West longitude (e.g., -99.5): ")) + north = float(input(" North latitude (e.g., 30.3): ")) + east = float(input(" East longitude (e.g., -99.0): ")) + + bbox = (south, west, north, east) + + # Get output path + print() + default_output = f"{lake_name.lower().replace(' ', '_')}.geojson" + output_str = input(f"Output file path (default: {default_output}): ").strip() + output_filename = output_str if output_str else default_output + + # Ensure file is saved to mounted volume (/data/input) + output_path = Path(output_filename) + if not output_path.is_absolute(): + output_path = Path('/data/input') / output_path + + return lake_name, bbox, output_path + + +def main(): + parser = argparse.ArgumentParser( + description="Download lake polygon from OpenStreetMap", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s --name "Lake Mead" --bbox 36.0,-114.8,36.3,-114.4 --output lake_mead.geojson + %(prog)s --name "Ingram" --bbox 29.9,-99.5,30.3,-99.0 --output lake_ingram.geojson + %(prog)s --interactive + """ + ) + + parser.add_argument('--name', '-n', help="Lake name to search for") + parser.add_argument('--bbox', '-b', help="Bounding box: south,west,north,east (degrees)") + parser.add_argument('--output', '-o', help="Output GeoJSON file path") + parser.add_argument('--interactive', '-i', action='store_true', + help="Interactive mode (prompts for inputs)") + parser.add_argument('--timeout', type=int, default=25, + help="Query timeout in seconds (default: 25)") + + args = parser.parse_args() + + # Interactive mode + if args.interactive: + lake_name, bbox, output_path = interactive_mode() + else: + # Validate required arguments + if not args.name or not args.bbox or not args.output: + parser.error("--name, --bbox, and --output are required (or use --interactive)") + + lake_name = args.name + + # Parse bounding box + try: + bbox_parts = [float(x.strip()) for x in args.bbox.split(',')] + if len(bbox_parts) != 4: + raise ValueError("Bounding box must have 4 values") + bbox = tuple(bbox_parts) + except ValueError as e: + parser.error(f"Invalid bounding box format: {e}") + + # Ensure file is saved to mounted volume (/data/input) if relative path + output_path = Path(args.output) + if not output_path.is_absolute(): + output_path = Path('/data/input') / output_path + + # Display configuration + print() + print("=" * 70) + print("DOWNLOAD LAKE POLYGON FROM OPENSTREETMAP") + print("=" * 70) + print(f"Lake name: {lake_name}") + print(f"Bounding box: {bbox}") + print(f"Output: {output_path}") + print() + + try: + # Query Overpass API + osm_data = query_overpass(lake_name, bbox, timeout=args.timeout) + + # Check results + if not osm_data.get('elements'): + print("=" * 70) + print("NO RESULTS FOUND") + print("=" * 70) + print(f"No water bodies matching '{lake_name}' found in the specified area.") + print() + print("Suggestions:") + print(" 1. Try a broader search area (larger bounding box)") + print(" 2. Try a partial name (e.g., 'Mead' instead of 'Lake Mead')") + print(" 3. Check the lake name spelling on OpenStreetMap.org") + print(" 4. The lake may not be mapped in OpenStreetMap") + print() + print("Alternative data sources:") + print(" - USGS National Map: https://apps.nationalmap.gov/downloader/") + print(" - NHD: https://www.usgs.gov/national-hydrography") + return 1 + + # Print found features + print("=" * 70) + print(f"FOUND {len(osm_data['elements'])} FEATURE(S)") + print("=" * 70) + for elem in osm_data['elements']: + name = elem.get('tags', {}).get('name', 'Unnamed') + osm_type = elem.get('type', 'unknown') + osm_id = elem.get('id', 'N/A') + water_type = elem.get('tags', {}).get('water', elem.get('tags', {}).get('natural', 'N/A')) + print(f" • {name}") + print(f" Type: {osm_type} | ID: {osm_id} | Water: {water_type}") + print() + + # Convert to GeoJSON + geojson = osm_to_geojson(osm_data) + + # Save to file + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, 'w') as f: + json.dump(geojson, f, indent=2) + + print("=" * 70) + print("SUCCESS") + print("=" * 70) + print(f"Saved {len(geojson['features'])} feature(s) to: {output_path}") + print() + print("Next steps:") + print(" 1. Verify the lake polygon in QGIS or a GeoJSON viewer") + print(" 2. Add to config.yaml under animation.lake_fill:") + print(f" enabled: true") + print(f" file_path: \"/data/input/{output_path.name}\"") + print(" 3. Run the animation workflow") + print() + + return 0 + + except requests.exceptions.Timeout: + print("ERROR: Query timed out. Try:") + print(" - Smaller bounding box") + print(" - Increase timeout with --timeout ") + return 1 + + except requests.exceptions.RequestException as e: + print(f"ERROR: Failed to query OpenStreetMap: {e}") + print() + print("Possible issues:") + print(" - No internet connection") + print(" - Overpass API is down or overloaded") + print(" - Invalid bounding box coordinates") + print() + print("Try alternative data sources:") + print(" - USGS National Map") + print(" - National Hydrography Dataset (NHD)") + return 1 + + except Exception as e: + print(f"ERROR: Unexpected error: {e}") + import traceback + traceback.print_exc() + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/animation_from_event/generate_animation.py b/animation_from_event/generate_animation.py new file mode 100755 index 0000000..a12c2fd --- /dev/null +++ b/animation_from_event/generate_animation.py @@ -0,0 +1,863 @@ +#!/usr/bin/env python3 +""" +Generate Animation from Flood Inundation Maps +============================================== + +Creates an animated video showing flood progression over time from FIM outputs. + +Reads all FIM GeoTIFF files from the outputs/fims/ directory and creates a video +animation showing flood extent evolution with basemap and county boundary (optional). + +All dependencies are pre-installed in the Docker container. + +Usage: + python generate_animation.py --config config.yaml + +Configuration is loaded from config.yaml which specifies: + - FIM directory location + - Frame rate, resolution, and visualization parameters + - Basemap and overlay settings + - Geographic extent and county boundaries +""" + +import sys +from pathlib import Path +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.colors as mcolors +from matplotlib.patches import Rectangle, Patch +import rasterio +from rasterio.warp import calculate_default_transform, reproject, Resampling +from rasterio.features import rasterize +import imageio +from datetime import datetime +import warnings +import gc +import argparse +from config_utils import load_config, get_paths, get_animation_config +import fiona +from shapely.geometry import shape, mapping +from shapely.ops import transform as shapely_transform +import contextily as ctx +import geopandas as gpd +warnings.filterwarnings('ignore', category=rasterio.errors.NotGeoreferencedWarning) + +# ============================================================================== +# CONFIGURATION +# ============================================================================== + +# Load configuration +parser = argparse.ArgumentParser(description="Generate flood animation from FIM files") +parser.add_argument('--config', default='config.yaml', help="Path to config file") +parser.add_argument('--dpi', type=int, help="Override DPI") +parser.add_argument('--fps', type=float, help="Override FPS") +parser.add_argument('--start-time', help="Start time filter (YYYY-MM-DD HH:MM)") +parser.add_argument('--end-time', help="End time filter (YYYY-MM-DD HH:MM)") +parser.add_argument('--output', help="Override output video filename") +args = parser.parse_args() + +config = load_config(args.config) +paths = get_paths(config) +anim_cfg = get_animation_config(config) + +# Set paths from config +FIMS_DIR = paths['fims_dir'] +OUTPUT_VIDEO = Path(args.output) if args.output else paths['output_video'] + +# Date range filter from command-line args (format: YYYY-MM-DD HH:MM) +# Convert to internal format YYYYMMDD_HHMM for filtering +def convert_time_format(time_str): + """Convert 'YYYY-MM-DD HH:MM' to 'YYYYMMDD_HHMM'""" + if not time_str: + return None + try: + dt = datetime.strptime(time_str, '%Y-%m-%d %H:%M') + return dt.strftime('%Y%m%d_%H%M') + except ValueError: + # If already in YYYYMMDD_HHMM format, return as-is + return time_str + +START_TIME = convert_time_format(args.start_time) if args.start_time else None +END_TIME = convert_time_format(args.end_time) if args.end_time else None + +# Visual settings from config +extent_cfg = anim_cfg['extent'] +visual_cfg = anim_cfg['visual'] +overlay_cfg = anim_cfg['overlay'] +basemap_cfg = anim_cfg['basemap'] +county_cfg = anim_cfg['county'] +lake_cfg = anim_cfg['lake_fill'] + +# Animation settings +FPS = args.fps if args.fps else visual_cfg['fps'] +DPI = args.dpi if args.dpi else visual_cfg['dpi'] +DURATION_LAST_FRAME = visual_cfg['duration_last_frame'] + +# Visualization settings +DEPTH_MIN = visual_cfg['depth_min'] +DEPTH_MAX = visual_cfg['depth_max'] +COLORMAP = visual_cfg['colormap'] +FIGSIZE = visual_cfg['figsize'] +DOWNSAMPLE_FACTOR = visual_cfg.get('downsample_factor', 1) # Default: no downsampling + +# Overlay settings +SHOW_TIMESTAMP = overlay_cfg['show_timestamp'] +SHOW_COLORBAR = overlay_cfg['show_colorbar'] +SHOW_EXTENT_INFO = False # Not in config +SHOW_DISCLAIMER = overlay_cfg['show_disclaimer'] +TITLE_PREFIX = overlay_cfg['title_prefix'] +DISCLAIMER_TEXT = overlay_cfg['disclaimer_text'] + +# Basemap settings +SHOW_BASEMAP = basemap_cfg['enabled'] +# Parse basemap source string into contextily provider +if SHOW_BASEMAP: + basemap_source_str = basemap_cfg['source'] + if basemap_source_str == 'OpenStreetMap.Mapnik': + BASEMAP_SOURCE = ctx.providers.OpenStreetMap.Mapnik + elif basemap_source_str == 'OpenTopoMap': + BASEMAP_SOURCE = ctx.providers.OpenTopoMap + elif basemap_source_str == 'Esri.WorldImagery': + BASEMAP_SOURCE = ctx.providers.Esri.WorldImagery + else: + BASEMAP_SOURCE = ctx.providers.OpenStreetMap.Mapnik # Default +else: + BASEMAP_SOURCE = None +BASEMAP_ALPHA = basemap_cfg['alpha'] + +# County boundary settings +SHOW_COUNTY_BOUNDARY = county_cfg['show_boundary'] +SHOW_COUNTY_LABEL = county_cfg['show_label'] +COUNTY_NAME = county_cfg['name'] +STATE_NAME = county_cfg['state'] + +# Custom map extent +USE_CUSTOM_EXTENT = extent_cfg['use_custom'] +CUSTOM_EXTENT_CENTER = (extent_cfg['center_lon'], extent_cfg['center_lat']) +CUSTOM_EXTENT_SIZE_KM = (extent_cfg['size_km_ew'], extent_cfg['size_km_ns']) + +# Lake masking settings +LAKE_FILL_FILE = Path(lake_cfg['file_path']) if lake_cfg.get('enabled') and lake_cfg.get('file_path') else None +LAKE_FILL_DEPTH = lake_cfg.get('depth', 5.0) + +# ============================================================================== +# HELPER FUNCTIONS +# ============================================================================== + +def get_state_abbreviation(state_name: str) -> str: + """ + Convert full state name to 2-letter abbreviation, or return as-is if already abbreviated. + + Args: + state_name: Full state name (e.g., "Texas") or abbreviation (e.g., "TX") + + Returns: + 2-letter state abbreviation + """ + state_map = { + 'alabama': 'AL', 'alaska': 'AK', 'arizona': 'AZ', 'arkansas': 'AR', + 'california': 'CA', 'colorado': 'CO', 'connecticut': 'CT', 'delaware': 'DE', + 'florida': 'FL', 'georgia': 'GA', 'hawaii': 'HI', 'idaho': 'ID', + 'illinois': 'IL', 'indiana': 'IN', 'iowa': 'IA', 'kansas': 'KS', + 'kentucky': 'KY', 'louisiana': 'LA', 'maine': 'ME', 'maryland': 'MD', + 'massachusetts': 'MA', 'michigan': 'MI', 'minnesota': 'MN', 'mississippi': 'MS', + 'missouri': 'MO', 'montana': 'MT', 'nebraska': 'NE', 'nevada': 'NV', + 'new hampshire': 'NH', 'new jersey': 'NJ', 'new mexico': 'NM', 'new york': 'NY', + 'north carolina': 'NC', 'north dakota': 'ND', 'ohio': 'OH', 'oklahoma': 'OK', + 'oregon': 'OR', 'pennsylvania': 'PA', 'rhode island': 'RI', 'south carolina': 'SC', + 'south dakota': 'SD', 'tennessee': 'TN', 'texas': 'TX', 'utah': 'UT', + 'vermont': 'VT', 'virginia': 'VA', 'washington': 'WA', 'west virginia': 'WV', + 'wisconsin': 'WI', 'wyoming': 'WY', 'district of columbia': 'DC' + } + + state_lower = state_name.lower().strip() + + # If already an abbreviation (2 chars), return uppercase + if len(state_lower) == 2: + return state_name.upper() + + # Otherwise look up in map + return state_map.get(state_lower, state_name.upper()) + + +def get_county_boundary(county_name: str, state_name: str): + """ + Fetch county boundary from US Census Bureau for any US county. + + Args: + county_name: Name of county (e.g., "Kerr", "Los Angeles") + state_name: Full state name or 2-letter abbreviation (e.g., "Texas" or "TX") + + Returns: + GeoDataFrame with county boundary in original CRS, or None if not found + """ + try: + # Fetch county boundaries from US Census Bureau + url = "https://www2.census.gov/geo/tiger/GENZ2021/shp/cb_2021_us_county_500k.zip" + counties = gpd.read_file(url) + + # Convert state name to abbreviation + state_abbr = get_state_abbreviation(state_name) + + # Filter for specific county and state + county = counties[ + (counties['NAME'].str.lower() == county_name.lower()) & + (counties['STUSPS'] == state_abbr) + ] + + if county.empty: + print(f"Warning: Could not find {county_name} County, {state_name} ({state_abbr})") + return None + + # Keep in original CRS - will be reprojected to match FIM later + return county + + except Exception as e: + print(f"Warning: Could not fetch county boundary: {e}") + return None + + +def compute_custom_extent(center_lon, center_lat, size_km_ew, size_km_ns, target_crs): + """ + Compute custom map extent from center point and size in kilometers. + + Args: + center_lon: Center longitude (degrees) + center_lat: Center latitude (degrees) + size_km_ew: East-west extent in kilometers + size_km_ns: North-south extent in kilometers + target_crs: Target CRS (e.g., 'EPSG:5070') + + Returns: + Tuple of (minx, miny, maxx, maxy) in target CRS, or None if conversion fails + """ + try: + from pyproj import Transformer + + # Create transformer from WGS84 to target CRS + transformer = Transformer.from_crs("EPSG:4326", target_crs, always_xy=True) + + # Convert center point to target CRS + center_x, center_y = transformer.transform(center_lon, center_lat) + + # In EPSG:5070 (Albers), units are meters + # Convert km to meters + half_width = (size_km_ew / 2) * 1000 + half_height = (size_km_ns / 2) * 1000 + + # Compute bounds + minx = center_x - half_width + maxx = center_x + half_width + miny = center_y - half_height + maxy = center_y + half_height + + return (minx, miny, maxx, maxy) + + except Exception as e: + print(f"Warning: Could not compute custom extent: {e}") + return None + + +def get_fim_files(fims_dir: Path, pattern: str = "*_extent.tif", + start_time: str = None, end_time: str = None) -> list: + """ + Get all FIM files sorted by timestamp, optionally filtered by date range. + + Args: + fims_dir: Directory containing FIM files + pattern: Glob pattern to match (default extent TIFFs, also supports "*_depth.tif") + start_time: Optional start time filter (format: YYYYMMDD_HHMM) + end_time: Optional end time filter (format: YYYYMMDD_HHMM) + + Returns: + List of Path objects sorted by extracted timestamp + """ + files = list(fims_dir.glob(pattern)) + + # If no files found with default pattern, try depth pattern as fallback + if not files and pattern == "*_extent.tif": + files = list(fims_dir.glob("*_depth.tif")) + if files: + print(f"Note: No extent files found, using {len(files)} depth file(s) instead") + + # Sort by timestamp extracted from filename + def extract_timestamp(filepath): + # Format: YYYYMMDD_HHMM_extent.tif or YYYYMMDD_HHMM_depth.tif + name = filepath.stem.replace('_depth', '').replace('_extent', '') + try: + return datetime.strptime(name, '%Y%m%d_%H%M') + except ValueError: + return datetime.min + + files = sorted(files, key=extract_timestamp) + + # Filter by date range if specified + if start_time or end_time: + filtered_files = [] + for f in files: + timestamp_str = f.stem.replace('_depth', '').replace('_extent', '') + + # Check if within range + if start_time and timestamp_str < start_time: + continue + if end_time and timestamp_str > end_time: + continue + + filtered_files.append(f) + + return filtered_files + + return files + + +def format_timestamp(filename: str) -> str: + """ + Extract and format timestamp from filename. + + Args: + filename: Filename like '20250703_0700_depth.tif' + + Returns: + Formatted string like '2025-07-03 07:00' + """ + name = filename.replace('_depth.tif', '').replace('_extent.tif', '') + try: + dt = datetime.strptime(name, '%Y%m%d_%H%M') + return dt.strftime('%Y-%m-%d %H:%M') + except ValueError: + return name + + +def apply_lake_fill(data, transform, bounds, crs, is_extent_file): + """ + Apply lake fill mask to FIM data to fill gaps for permanent waterbodies. + + Args: + data: FIM data array (masked array) + transform: Rasterio affine transform + bounds: Raster bounds + crs: Raster CRS + is_extent_file: Whether this is an extent file (vs depth file) + + Returns: + Modified data array with lake pixels filled + """ + try: + from rasterio.features import rasterize + from rasterio.warp import reproject, Resampling + + # Check file extension to determine how to read + file_ext = LAKE_FILL_FILE.suffix.lower() + + if file_ext in ['.tif', '.tiff']: + # Read as raster and reproject to match FIM + with rasterio.open(LAKE_FILL_FILE) as lake_src: + lake_mask = np.zeros(data.shape, dtype=np.uint8) + reproject( + source=rasterio.band(lake_src, 1), + destination=lake_mask, + src_transform=lake_src.transform, + src_crs=lake_src.crs, + dst_transform=transform, + dst_crs=crs, + resampling=Resampling.nearest + ) + + elif file_ext in ['.gpkg', '.geojson', '.shp']: + # Read as vector and rasterize + with fiona.open(LAKE_FILL_FILE) as lake_src: + # Reproject geometries to match FIM CRS if needed + from pyproj import Transformer + + src_crs_str = lake_src.crs_wkt if hasattr(lake_src, 'crs_wkt') else str(lake_src.crs) + dst_crs_str = crs.to_string() if hasattr(crs, 'to_string') else str(crs) + + # Read all features and prepare for rasterization + geometries = [] + for feature in lake_src: + geom = feature['geometry'] + + # If CRS differs, reproject the geometry + if src_crs_str != dst_crs_str: + transformer = Transformer.from_crs( + lake_src.crs, + crs, + always_xy=True + ) + + geom_shape = shape(geom) + geom_transformed = shapely_transform(transformer.transform, geom_shape) + geom = mapping(geom_transformed) + + geometries.append((geom, 1)) + + # Rasterize geometries + if geometries: + lake_mask = rasterize( + geometries, + out_shape=data.shape, + transform=transform, + fill=0, + dtype=np.uint8 + ) + else: + lake_mask = np.zeros(data.shape, dtype=np.uint8) + else: + print(f" Warning: Unsupported lake fill file format: {file_ext}") + return data + + # Apply the lake mask to fill gaps + lake_pixels = lake_mask == 1 + + if np.any(lake_pixels): + # Unmask the data array if needed to modify it + if np.ma.is_masked(data): + data_filled = data.filled(0) + else: + data_filled = data.copy() + + # Set lake pixels to appropriate value + if is_extent_file: + data_filled[lake_pixels] = 1 # Mark as flooded + else: + data_filled[lake_pixels] = LAKE_FILL_DEPTH # Set to specified depth + + # Re-create masked array preserving original mask except for lake pixels + if np.ma.is_masked(data): + new_mask = data.mask.copy() + new_mask[lake_pixels] = False # Unmask lake pixels + data = np.ma.masked_array(data_filled, mask=new_mask) + else: + data = data_filled + + pixel_count = np.sum(lake_pixels) + print(f" Applied lake fill: {pixel_count:,} pixels added") + + return data + + except Exception as e: + print(f" Warning: Could not apply lake fill: {e}") + import traceback + traceback.print_exc() + return data + + +def read_fim(fim_path: Path) -> tuple: + """ + Read FIM GeoTIFF and return data array and metadata. + + Args: + fim_path: Path to FIM file + + Returns: + Tuple of (data_array, transform, bounds, crs, is_extent_file) + """ + with rasterio.open(fim_path) as src: + # Apply downsampling if configured (reduces memory usage for large rasters) + if DOWNSAMPLE_FACTOR > 1: + # Calculate new dimensions + out_shape = ( + src.height // DOWNSAMPLE_FACTOR, + src.width // DOWNSAMPLE_FACTOR + ) + # Read with downsampling + data = src.read( + 1, + out_shape=out_shape, + resampling=rasterio.enums.Resampling.nearest + ) + # Adjust transform for downsampled data + transform = src.transform * src.transform.scale( + (src.width / out_shape[1]), + (src.height / out_shape[0]) + ) + print(f" Downsampled from {src.height}x{src.width} to {out_shape[0]}x{out_shape[1]} (factor: {DOWNSAMPLE_FACTOR}x)") + else: + data = src.read(1) + transform = src.transform + + bounds = src.bounds + crs = src.crs + nodata = src.nodata + + # Check if this is an extent or depth file + is_extent_file = '_extent' in fim_path.name + + # Mask nodata values + if nodata is not None: + data = np.ma.masked_equal(data, nodata) + + # For extent files, mask zeros but keep positive values (typically 1) + # For depth files, mask values <= small threshold (0.1 ft) + if is_extent_file: + # Extent files: keep any positive value (usually 1 = flooded, 0 = not flooded) + data = np.ma.masked_less_equal(data, 0) + else: + # Depth files: mask very small depths (< 0.1 ft) to avoid noise + data = np.ma.masked_less(data, 0.1) + + # Apply lake fill if configured + if LAKE_FILL_FILE is not None and LAKE_FILL_FILE.exists(): + data = apply_lake_fill(data, transform, bounds, crs, is_extent_file) + + # Debug: print statistics about the data + valid_data = data[~data.mask] if np.ma.is_masked(data) else data + if valid_data.size > 0: + print(f" Data range: {valid_data.min():.3f} - {valid_data.max():.3f}, " + f"Flooded cells: {valid_data.size:,}, " + f"Type: {'extent' if is_extent_file else 'depth'}") + else: + print(f" WARNING: No valid flood data in this file!") + + return data, transform, bounds, crs, is_extent_file + + +def create_frame(fim_path: Path, frame_num: int, total_frames: int, + depth_range: tuple, cmap, figsize: tuple, + county_boundary=None) -> np.ndarray: + """ + Create a single animation frame. + + Args: + fim_path: Path to FIM file + frame_num: Current frame number (1-indexed) + total_frames: Total number of frames + depth_range: Tuple of (min_depth, max_depth) + cmap: Matplotlib colormap + figsize: Figure size tuple + county_boundary: GeoDataFrame with county boundary (optional) + + Returns: + RGB image array + """ + # Read FIM data + data, transform, bounds, crs, is_extent_file = read_fim(fim_path) + + # Create figure + fig, ax = plt.subplots(figsize=figsize, dpi=DPI) + + # Set axis limits based on: custom extent > county boundary > FIM extent + if USE_CUSTOM_EXTENT and crs: + # Use custom extent centered on specified location + custom_bounds = compute_custom_extent( + CUSTOM_EXTENT_CENTER[0], + CUSTOM_EXTENT_CENTER[1], + CUSTOM_EXTENT_SIZE_KM[0], + CUSTOM_EXTENT_SIZE_KM[1], + crs.to_string() + ) + if custom_bounds: + ax.set_xlim(custom_bounds[0], custom_bounds[2]) + ax.set_ylim(custom_bounds[1], custom_bounds[3]) + else: + # Fallback to FIM extent if custom extent fails + ax.set_xlim(bounds.left, bounds.right) + ax.set_ylim(bounds.bottom, bounds.top) + elif county_boundary is not None and crs: + # Reproject county to FIM CRS and get its bounds with padding + county_in_fim_crs = county_boundary.to_crs(crs) + county_bounds = county_in_fim_crs.total_bounds # [minx, miny, maxx, maxy] + + # Add 5% padding around county boundary + width = county_bounds[2] - county_bounds[0] + height = county_bounds[3] - county_bounds[1] + padding_x = width * 0.05 + padding_y = height * 0.05 + + ax.set_xlim(county_bounds[0] - padding_x, county_bounds[2] + padding_x) + ax.set_ylim(county_bounds[1] - padding_y, county_bounds[3] + padding_y) + else: + # Fallback to FIM extent + ax.set_xlim(bounds.left, bounds.right) + ax.set_ylim(bounds.bottom, bounds.top) + + # Add basemap + if SHOW_BASEMAP and BASEMAP_SOURCE: + try: + # FIM files are typically in EPSG:5070 + # Contextily needs to know the CRS to properly fetch and reproject tiles + target_crs = crs.to_string() if crs else 'EPSG:5070' + ctx.add_basemap(ax, + crs=target_crs, + source=BASEMAP_SOURCE, + alpha=BASEMAP_ALPHA, + zoom='auto') + except Exception as e: + print(f" Warning: Could not add basemap: {e}") + + # Plot flood depth with blue colormap + # Set colormap to start with transparent for zero/no flooding + import matplotlib + flood_cmap = matplotlib.colormaps.get_cmap(cmap).copy() + flood_cmap.set_bad(color='none', alpha=0) # Transparent for masked values + + # For extent files, use fixed color (all flooded areas same blue) + # For depth files, use gradient based on depth + if is_extent_file: + # Extent files: single blue color for all flooded areas + im = ax.imshow(data, + extent=[bounds.left, bounds.right, bounds.bottom, bounds.top], + cmap=flood_cmap, + vmin=0, + vmax=1, # Binary: 0 or 1 + interpolation='nearest', + origin='upper', + alpha=0.8, # More opaque for visibility + zorder=2) + else: + # Depth files: gradient based on depth values + im = ax.imshow(data, + extent=[bounds.left, bounds.right, bounds.bottom, bounds.top], + cmap=flood_cmap, + vmin=depth_range[0], + vmax=depth_range[1], + interpolation='nearest', + origin='upper', + alpha=0.7, + zorder=2) + + # Format timestamp + timestamp_str = format_timestamp(fim_path.name) + + # Add title + if SHOW_TIMESTAMP: + title = f"{TITLE_PREFIX} - {timestamp_str}" + ax.set_title(title, fontsize=16, fontweight='bold', pad=15) + + # Add county boundary if enabled + if SHOW_COUNTY_BOUNDARY and county_boundary is not None: + try: + # Reproject county boundary to match FIM CRS + if crs: + county_in_fim_crs = county_boundary.to_crs(crs) + county_in_fim_crs.boundary.plot(ax=ax, edgecolor='red', linewidth=3, + label=f'{COUNTY_NAME} County', zorder=3) + except Exception as e: + print(f" Warning: Could not plot county boundary: {e}") + + # Add colorbar + if SHOW_COLORBAR: + cbar = plt.colorbar(im, ax=ax, fraction=0.02, pad=0.04, shrink=0.5) + if is_extent_file: + cbar.set_label('Flood Extent', rotation=270, labelpad=15, fontsize=10) + else: + cbar.set_label('Depth (ft)', rotation=270, labelpad=15, fontsize=10) + + # Add frame counter + if SHOW_EXTENT_INFO: + info_text = f"Frame {frame_num}/{total_frames}" + ax.text(0.02, 0.98, info_text, + transform=ax.transAxes, + fontsize=10, + verticalalignment='top', + bbox=dict(boxstyle='round', facecolor='white', alpha=0.8)) + + # Add disclaimer + if SHOW_DISCLAIMER: + ax.text(0.98, 0.02, DISCLAIMER_TEXT, + transform=ax.transAxes, + fontsize=11, + fontweight='bold', + horizontalalignment='right', + verticalalignment='bottom', + bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.9, edgecolor='red', linewidth=2)) + + # Add county label in lower left corner + if SHOW_COUNTY_LABEL: + county_label = f"{COUNTY_NAME} County, {STATE_NAME}" + ax.text(0.02, 0.02, county_label, + transform=ax.transAxes, + fontsize=12, + fontweight='bold', + color='red', + horizontalalignment='left', + verticalalignment='bottom', + bbox=dict(boxstyle='round', facecolor='white', alpha=0.8, edgecolor='red', linewidth=2)) + + # Format axes - remove labels and ticks for cleaner map view + ax.set_xlabel('') + ax.set_ylabel('') + ax.set_xticks([]) + ax.set_yticks([]) + ax.grid(False) # Remove grid for cleaner appearance + + # Remove axis spines for cleaner map appearance + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.spines['bottom'].set_visible(False) + ax.spines['left'].set_visible(False) + + # Tight layout + plt.tight_layout() + + # Convert figure to RGB array + fig.canvas.draw() + # Use buffer_rgba() instead of deprecated tostring_rgb() + buf = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8) + w, h = fig.canvas.get_width_height() + image = buf.reshape(h, w, 4) # RGBA format + image = image[:, :, :3] # Convert RGBA to RGB by dropping alpha channel + + # Aggressively clean up matplotlib objects to prevent memory leaks + plt.close(fig) + plt.clf() + plt.cla() + + return image + + +# ============================================================================== +# MAIN ANIMATION GENERATION +# ============================================================================== + +def main(): + print("=" * 70) + print("FLOOD ANIMATION GENERATION") + print("=" * 70) + print() + + # Validate paths + if not FIMS_DIR.exists(): + print(f" ERROR: FIMs directory not found: {FIMS_DIR}") + return 1 + + # Get all FIM files with date range filter + fim_files = get_fim_files(FIMS_DIR, start_time=START_TIME, end_time=END_TIME) + + if not fim_files: + print(f" ERROR: No FIM files found in {FIMS_DIR}") + if START_TIME or END_TIME: + print(f" Date range filter: {START_TIME or 'start'} to {END_TIME or 'end'}") + print(" Looking for files matching pattern: *_extent.tif or *_depth.tif") + return 1 + + print(f"Found {len(fim_files)} FIM file(s)") + if START_TIME or END_TIME: + print(f"Date range filter: {START_TIME or 'start'} to {END_TIME or 'end'}") + print(f"Time range: {format_timestamp(fim_files[0].name)} to {format_timestamp(fim_files[-1].name)}") + print() + + # Create output directory + OUTPUT_VIDEO.parent.mkdir(parents=True, exist_ok=True) + + print(f"Configuration:") + print(f" Input directory: {FIMS_DIR}") + print(f" Output video: {OUTPUT_VIDEO}") + if START_TIME or END_TIME: + print(f" Time window: {START_TIME or 'start'} to {END_TIME or 'end'}") + print(f" Frame rate: {FPS} fps") + print(f" Resolution: {DPI} dpi") + print(f" Depth range: {DEPTH_MIN} - {DEPTH_MAX} ft") + print(f" Colormap: {COLORMAP}") + if DOWNSAMPLE_FACTOR > 1: + print(f" Downsampling: {DOWNSAMPLE_FACTOR}x (reduces memory usage)") + print(f" Basemap: {'Enabled' if SHOW_BASEMAP else 'Disabled'}") + print(f" County boundary: {'Enabled' if SHOW_COUNTY_BOUNDARY else 'Disabled'}") + print(f" Disclaimer: {'Enabled' if SHOW_DISCLAIMER else 'Disabled'}") + print() + + # Fetch county boundary if enabled + county_boundary = None + if SHOW_COUNTY_BOUNDARY: + print(f"Fetching {COUNTY_NAME} County, {STATE_NAME} boundary...") + county_boundary = get_county_boundary(COUNTY_NAME, STATE_NAME) + if county_boundary is not None: + print(f" County boundary loaded") + else: + print(f" Could not load county boundary") + print() + + # Create colormap + cmap = plt.get_cmap(COLORMAP) + + # Open video writer to write frames directly (avoid storing all in memory) + print(f"Opening video writer: {OUTPUT_VIDEO}") + try: + writer = imageio.get_writer( + OUTPUT_VIDEO, + fps=FPS, + codec='libx264', + pixelformat='yuv420p', + ffmpeg_params=['-crf', '18', '-preset', 'fast'] # Use 'fast' instead of 'slow' for speed + ) + except Exception as e: + print(f" ERROR: Could not open video writer: {e}") + return 1 + + # Generate and write frames directly to video + print("Generating and writing frames...") + frame_count = 0 + last_frame = None + + for i, fim_file in enumerate(fim_files, 1): + print(f" [{i}/{len(fim_files)}] Processing {fim_file.name}...", end='') + + try: + frame = create_frame( + fim_file, + frame_num=i, + total_frames=len(fim_files), + depth_range=(DEPTH_MIN, DEPTH_MAX), + cmap=cmap, + figsize=FIGSIZE, + county_boundary=county_boundary + ) + writer.append_data(frame) + last_frame = frame.copy() # Make a copy for holding at end + del frame # Explicitly delete frame to free memory + frame_count += 1 + print(" ") + + # Force garbage collection every 10 frames to prevent memory buildup + if i % 10 == 0: + gc.collect() + + except Exception as e: + print(f" ERROR: {e}") + import traceback + traceback.print_exc() + continue + + if frame_count == 0: + print("\n ERROR: No frames were generated") + writer.close() + return 1 + + # Add extra frames at the end to hold last frame + if DURATION_LAST_FRAME > 0 and last_frame is not None: + extra_frames = int(FPS * DURATION_LAST_FRAME) + print(f"\nHolding last frame for {DURATION_LAST_FRAME}s ({extra_frames} frames)...") + for i in range(extra_frames): + writer.append_data(last_frame) + frame_count += 1 + + # Close the writer + print("\nFinalizing video...") + writer.close() + + # Clean up memory + del last_frame + gc.collect() + + print(" Video created successfully!") + + print() + print("=" * 70) + print("SUMMARY") + print("=" * 70) + print(f"Total frames: {frame_count}") + print(f"Duration: {frame_count / FPS:.1f} seconds") + print(f"Output: {OUTPUT_VIDEO}") + print() + + # File size + if OUTPUT_VIDEO.exists(): + size_mb = OUTPUT_VIDEO.stat().st_size / (1024 * 1024) + print(f"File size: {size_mb:.1f} MB") + + print("\n Animation generation complete!") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/animation_from_event/generate_batch_fims.py b/animation_from_event/generate_batch_fims.py new file mode 100755 index 0000000..12c2347 --- /dev/null +++ b/animation_from_event/generate_batch_fims.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 +""" +Batch Generate Flood Inundation Maps from Flow Files +===================================================== + +Automates flows2fim workflow to generate FIMs for all timestep flow files. + +For each flow CSV file in a time range specified in config.yaml: + 1. Runs flows2fim controls to create controls file (skips if exists) + 2. Runs flows2fim fim to generate flood inundation map (skips if exists) + +The script intelligently checks for existing files and skips regeneration +unless the --force flag is used. This saves significant time when re-running +after interruptions or when only wanting to regenerate animations. + +Usage: + python generate_batch_fims.py [--config config.yaml] [--force] + + --config Path to configuration file (default: config.yaml) + --force Force regeneration of existing files +""" + +import subprocess +from pathlib import Path +import sys +import sqlite3 +import argparse +from dataclasses import dataclass +from config_utils import load_config, get_paths, get_fim_config + +# ============================================================================== +# CONFIGURATION +# ============================================================================== + +# Load configuration +parser = argparse.ArgumentParser(description="Batch generate FIMs from flow files") +parser.add_argument('--config', default='config.yaml', help="Path to config file") +parser.add_argument('--max-workers', type=int, help="Override max parallel workers") +parser.add_argument('--force', action='store_true', help="Force regeneration of existing files") +args = parser.parse_args() + +config = load_config(args.config) +paths = get_paths(config) +fim_cfg = get_fim_config(config) + +# Set paths from config +RIPPLE_DB_PATH = paths['ripple_db'] +FLOW_FILES_DIR = paths['flows_dir'] +CONTROLS_DIR = paths['controls_dir'] +FIMS_DIR = paths['fims_dir'] +STARTING_REACH_IDS = str(paths['start_reaches']) + +# FIM settings from config +FIM_LIBRARY = fim_cfg['library'] +FIM_TYPE = fim_cfg['type'] +OUTPUT_FORMAT = fim_cfg['format'] +DEFAULT_BOUNDARY_CONDITION = fim_cfg['boundary_condition'] + +# flows2fim executable path +FLOWS2FIM_EXECUTABLE = config['processing'].get('flows2fim_executable', 'flows2fim') + + +# ============================================================================== +# HELPER FUNCTIONS +# ============================================================================== + +@dataclass +class ProcessingStats: + """Track processing statistics.""" + controls_success: int = 0 + controls_skipped: int = 0 + fim_success: int = 0 + fim_skipped: int = 0 + + @property + def total_success(self) -> int: + """Total successfully completed FIMs.""" + return self.fim_success + self.fim_skipped + + +def get_fim_extension(output_format: str) -> str: + """ + Get file extension for FIM output based on format. + + Args: + output_format: 'VRT', 'COG', or 'GTIFF' + + Returns: + File extension including dot (e.g., '.vrt', '.tif') + """ + return '.vrt' if output_format == 'VRT' else '.tif' + + +def run_subprocess(cmd: list, error_prefix: str) -> bool: + """ + Run subprocess command with standardized error handling. + + Args: + cmd: Command list to execute + error_prefix: Prefix for error messages + + Returns: + True if successful, False otherwise + """ + try: + subprocess.run(cmd, capture_output=True, text=True, check=True) + return True + except subprocess.CalledProcessError as e: + print(f" ERROR {error_prefix}: {e.stderr}", file=sys.stderr) + return False + except FileNotFoundError: + print(f" ERROR: flows2fim executable not found: {cmd[0]}", file=sys.stderr) + print(f" Make sure flows2fim is installed and in your PATH", file=sys.stderr) + return False + + +def get_upstream_reaches(db_path: Path, max_reaches: int = 10) -> list: + """ + Find upstream-most reaches (reaches with no upstream connections). + These are good candidates for starting reach IDs. + + Args: + db_path: Path to ripple.gpkg database + max_reaches: Maximum number of starting reaches to return + + Returns: + List of reach IDs + """ + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Find reaches that are not listed as downstream (nwm_to_id) of any other reach + # These are the headwater reaches + query = """ + SELECT DISTINCT r.reach_id + FROM reaches r + LEFT JOIN reaches r2 ON r.reach_id = r2.nwm_to_id + WHERE r2.reach_id IS NULL + ORDER BY r.reach_id + LIMIT ? + """ + + cursor.execute(query, (max_reaches,)) + reach_ids = [row[0] for row in cursor.fetchall()] + + conn.close() + return reach_ids + + +def get_all_flow_files(flow_dir: Path) -> list: + """Get all flow CSV files sorted by timestamp.""" + flow_files = sorted(flow_dir.glob("*.csv")) + return flow_files + + +def run_flows2fim_controls(flow_file: Path, controls_file: Path, + starting_ids: str, boundary_condition: str = "nd") -> bool: + """ + Run flows2fim controls command. + + Args: + flow_file: Path to input flow CSV + controls_file: Path to output controls CSV + starting_ids: Comma-separated reach IDs or path to CSV + boundary_condition: 'nd' or 'kwse' + + Returns: + True if successful, False otherwise + """ + cmd = [ + FLOWS2FIM_EXECUTABLE, + "controls", + "-db", str(RIPPLE_DB_PATH), + "-f", str(flow_file), + "-o", str(controls_file), + ] + + # Add starting reach IDs + if starting_ids.endswith('.csv'): + cmd.extend(["-scsv", starting_ids]) + else: + cmd.extend(["-sids", starting_ids, "-scs", boundary_condition]) + + return run_subprocess(cmd, "running controls") + + +def run_flows2fim_fim(controls_file: Path, output_fim: Path, + fim_library: str, fim_type: str = "depth", + output_format: str = "VRT") -> bool: + """ + Run flows2fim fim command. + + Args: + controls_file: Path to input controls CSV + output_fim: Path to output FIM file + fim_library: Path to FIM library (local or S3) + fim_type: 'depth' or 'extent' + output_format: 'VRT', 'COG', or 'GTIFF' + + Returns: + True if successful, False otherwise + """ + cmd = [ + FLOWS2FIM_EXECUTABLE, + "fim", + "-c", str(controls_file), + "-lib", fim_library, + "-o", str(output_fim), + "-type", fim_type, + "-fmt", output_format, + ] + + return run_subprocess(cmd, "running fim") + + +def extract_timestamp_from_filename(filename: str) -> str: + """ + Extract timestamp from flow filename. + Format: YYYYMMDD_HHMM_*.csv -> YYYYMMDD_HHMM + """ + parts = filename.split('_') + if len(parts) >= 2: + return f"{parts[0]}_{parts[1]}" + return filename.replace('.csv', '') + + +def process_flow_file(flow_file: Path, timestamp: str, starting_ids: str, + force: bool, stats: ProcessingStats) -> None: + """ + Process a single flow file to generate controls and FIM. + + Args: + flow_file: Path to flow CSV file + timestamp: Extracted timestamp string + starting_ids: Starting reach IDs configuration + force: Force regeneration of existing files + stats: ProcessingStats object to update + """ + # Define output paths + controls_file = CONTROLS_DIR / f"{timestamp}_controls.csv" + fim_extension = get_fim_extension(OUTPUT_FORMAT) + fim_file = FIMS_DIR / f"{timestamp}_{FIM_TYPE}{fim_extension}" + + # Step 1: Generate controls + controls_generated = False + if controls_file.exists() and not force: + stats.controls_skipped += 1 + controls_generated = True + else: + action = "Regenerating" if controls_file.exists() else "Generating" + print(f"{timestamp}: {action} controls...") + if run_flows2fim_controls(flow_file, controls_file, starting_ids, DEFAULT_BOUNDARY_CONDITION): + stats.controls_success += 1 + controls_generated = True + else: + print(f" ✗ Failed to generate controls") + + # Step 2: Generate FIM (only if controls exist) + if controls_generated: + if fim_file.exists() and not force: + stats.fim_skipped += 1 + else: + action = "Regenerating" if fim_file.exists() else "Generating" + print(f"{timestamp}: {action} FIM...") + if run_flows2fim_fim(controls_file, fim_file, FIM_LIBRARY, FIM_TYPE, OUTPUT_FORMAT): + stats.fim_success += 1 + else: + print(f" ✗ Failed to generate FIM") + + +# ============================================================================== +# MAIN PROCESSING +# ============================================================================== + +def main(): + print("=" * 70) + print("BATCH FIM GENERATION") + print("=" * 70) + print() + + # Validate paths + if not RIPPLE_DB_PATH.exists(): + print(f" ERROR: Database not found: {RIPPLE_DB_PATH}") + return 1 + + if not FLOW_FILES_DIR.exists(): + print(f" ERROR: Flow files directory not found: {FLOW_FILES_DIR}") + return 1 + + # Create output directories + CONTROLS_DIR.mkdir(parents=True, exist_ok=True) + FIMS_DIR.mkdir(parents=True, exist_ok=True) + + # Get starting reach IDs + if STARTING_REACH_IDS == "auto": + print("Auto-detecting upstream starting reaches...") + upstream_reaches = get_upstream_reaches(RIPPLE_DB_PATH) + if not upstream_reaches: + print(" ERROR: Could not detect upstream reaches") + print(" Please set STARTING_REACH_IDS manually in the configuration") + return 1 + starting_ids = ",".join(str(r) for r in upstream_reaches) + print(f" Found {len(upstream_reaches)} upstream reaches: {starting_ids}") + else: + starting_ids = STARTING_REACH_IDS + print(f"Using starting reach IDs: {starting_ids}") + + print() + print(f"Configuration:") + print(f" Database: {RIPPLE_DB_PATH}") + print(f" Flow files: {FLOW_FILES_DIR}") + print(f" Controls output: {CONTROLS_DIR}") + print(f" FIMs output: {FIMS_DIR}") + print(f" FIM library: {FIM_LIBRARY}") + print(f" FIM type: {FIM_TYPE}") + print(f" Output format: {OUTPUT_FORMAT}") + print() + + # Get all flow files + flow_files = get_all_flow_files(FLOW_FILES_DIR) + + if not flow_files: + print(f" ERROR: No flow CSV files found in {FLOW_FILES_DIR}") + return 1 + + print(f"Found {len(flow_files)} flow file(s) to process") + print() + + # Process each flow file + stats = ProcessingStats() + + for flow_file in flow_files: + timestamp = extract_timestamp_from_filename(flow_file.name) + process_flow_file(flow_file, timestamp, starting_ids, args.force, stats) + + # Summary + print("=" * 70) + print("SUMMARY") + print("=" * 70) + print(f"Total flow files: {len(flow_files)}") + print(f"Controls generated: {stats.controls_success}") + print(f"Controls skipped: {stats.controls_skipped} (already existed)") + print(f"FIMs generated: {stats.fim_success}") + print(f"FIMs skipped: {stats.fim_skipped} (already existed)") + print(f"Complete: {stats.total_success}") + print() + + if args.force: + print("Note: Ran with --force flag (regenerated existing files)") + print() + + if stats.total_success == len(flow_files): + print("✓ All FIMs complete!") + print(f"\nOutput location: {FIMS_DIR}") + if stats.controls_skipped > 0 or stats.fim_skipped > 0: + print(f"\nTip: Use --force flag to regenerate existing files") + return 0 + elif stats.total_success > 0: + print(f" {len(flow_files) - stats.total_success} FIM(s) failed") + return 1 + else: + print(" No FIMs were generated") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/animation_from_event/generate_flow_files.py b/animation_from_event/generate_flow_files.py new file mode 100755 index 0000000..4bbc126 --- /dev/null +++ b/animation_from_event/generate_flow_files.py @@ -0,0 +1,660 @@ +#!/usr/bin/env python3 +""" +Generate Hourly Flow Files from NWM Data +========================================== + +Fetches NWM streamflow data from S3 for all reaches in a RIPPLE rating +curve database (ripple.gpkg) and writes one CSV file per hourly timestep. + +Output format (per file): + nwm_feature_id,discharge + 3458519,1250.5 + 3458691,4320.0 + ... + +Configuration: + - Output directory: config.yaml -> output.base_dir/flows + - Filename format: YYYYMMDD_HHMM_{suffix}.csv (suffix from config.yaml) + - Time period: config.yaml -> event.start_date to event.end_date + +Usage: + python generate_flow_files.py --config config.yaml + python generate_flow_files.py --config config.yaml --force # Regenerate even if files exist + python generate_flow_files.py --start-date "2025-07-04 06:00" --end-date "2025-07-05 18:00" +""" + +import pandas as pd +import sqlite3 +import xarray as xr +import boto3 +from datetime import datetime +from pathlib import Path +import sys +import numpy as np +from botocore import UNSIGNED +from botocore.config import Config +import argparse +from dataclasses import dataclass +from config_utils import load_config, get_paths, get_nwm_config + +# ============================================================================== +# CONFIGURATION +# ============================================================================== + +# Load configuration +parser = argparse.ArgumentParser(description="Generate hourly flow files from NWM data") +parser.add_argument('--config', default='config.yaml', help="Path to config file") +parser.add_argument('--start-date', help="Override start date/time (YYYY-MM-DD or YYYY-MM-DD HH:MM)") +parser.add_argument('--end-date', help="Override end date/time (YYYY-MM-DD or YYYY-MM-DD HH:MM)") +parser.add_argument('--force', action='store_true', help="Force regeneration even if files exist") +args = parser.parse_args() + +config = load_config(args.config) +paths = get_paths(config) +nwm_cfg = get_nwm_config(config) + +# Set paths from config +RIPPLE_DB_PATH = paths['ripple_db'] +OUTPUT_DIR = paths['flows_dir'] +FLOW_FILE_SUFFIX = paths['flow_file_suffix'] +START_DATE = args.start_date if args.start_date else nwm_cfg['start_date'] +END_DATE = args.end_date if args.end_date else nwm_cfg['end_date'] +S3_BUCKET = nwm_cfg['bucket'] +NWM_CONFIG = nwm_cfg['config'] +USE_ANONYMOUS_S3 = nwm_cfg['use_anonymous'] + + +# ============================================================================== +# UTILITY FUNCTIONS +# ============================================================================== + +@dataclass +class ProcessingStats: + """Track processing statistics.""" + files_processed: int = 0 + files_failed: int = 0 + files_written: int = 0 + files_skipped: int = 0 + reaches_with_data: int = 0 + + +def parse_datetime_string(date_str: str) -> datetime: + """ + Parse a datetime string that can be in multiple formats. + + Supports: + - "YYYY-MM-DD" (defaults to 00:00:00) + - "YYYY-MM-DD HH:MM" + - "YYYY-MM-DD HH:MM:SS" + + Args: + date_str: Date/datetime string + + Returns: + datetime object + """ + date_str = date_str.strip() + + # Try different formats + formats = [ + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%d %H:%M", + "%Y-%m-%d", + ] + + for fmt in formats: + try: + return datetime.strptime(date_str, fmt) + except ValueError: + continue + + raise ValueError(f"Could not parse date string: {date_str}. Expected format: YYYY-MM-DD or YYYY-MM-DD HH:MM") + + +def extract_timestamp_from_nwm_filename(filename: str, date_str: str, bucket_type: str) -> pd.Timestamp: + """ + Extract timestamp from NWM NetCDF filename. + + Args: + filename: NetCDF filename + date_str: Date string in YYYY-MM-DD format + bucket_type: 'operational' or 'retrospective' + + Returns: + pandas Timestamp with UTC timezone, or None if parsing fails + """ + try: + if bucket_type == 'operational': + # Operational format: nwm.tHHz.analysis_assim.channel_rt.tm00.conus.nc + parts = filename.split('.') + for part in parts: + if part.startswith('t') and part.endswith('z') and len(part) == 4: + hour = part[1:3] # Extract HH from tHHz + return pd.Timestamp(f"{date_str} {hour}:00:00", tz='UTC') + else: + # Retrospective format: YYYYMMDDHHMI.CHRTOUT_DOMAIN1 + if len(filename) >= 12: + date_part = filename[:8] # YYYYMMDD + hour_part = filename[8:10] # HH + return pd.Timestamp( + f"{date_part[:4]}-{date_part[4:6]}-{date_part[6:8]} {hour_part}:00:00", + tz='UTC' + ) + except Exception: + pass + return None + + +def build_s3_prefix_and_pattern(date: datetime, bucket: str, config: str) -> tuple[str, str]: + """ + Build S3 prefix and file pattern for listing NWM files. + + Args: + date: Date to build path for + bucket: S3 bucket name + config: NWM configuration (e.g., 'analysis_assim') + + Returns: + Tuple of (prefix, file_pattern) + """ + year_month_day = date.strftime("%Y%m%d") + + if bucket == "noaa-nwm-pds": + # Operational bucket structure + prefix = f"nwm.{year_month_day}/{config}/" + file_pattern = f".{config}.channel_rt.tm00.conus.nc" + else: + # Retrospective bucket structure + year = date.strftime("%Y") + prefix = f"CONUS/netcdf/CHRTOUT/{year}/" + file_pattern = year_month_day + + return prefix, file_pattern + + +# ============================================================================== +# DATABASE FUNCTIONS +# ============================================================================== + +def get_reach_ids_from_database(db_path: Path) -> list: + """ + Read all unique reach IDs from the rating_curves table. + These reach_ids are the NWM feature_ids. + + Returns: + List of reach_ids (NWM feature IDs) + """ + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Get all unique reach_ids from rating_curves table + cursor.execute('SELECT DISTINCT reach_id FROM rating_curves ORDER BY reach_id') + reach_ids = [row[0] for row in cursor.fetchall()] + + conn.close() + return reach_ids + + +# ============================================================================== +# S3 ACCESS FUNCTIONS +# ============================================================================== + +def get_s3_client(): + """Create S3 client with appropriate configuration.""" + if USE_ANONYMOUS_S3: + # Use unsigned requests for public bucket + return boto3.client('s3', config=Config(signature_version=UNSIGNED)) + else: + # Use default credentials + return boto3.client('s3') + + +def list_nwm_files_for_date(s3_client, date_str: str) -> list: + """ + List NWM files for a specific date. + + Args: + s3_client: boto3 S3 client + date_str: Date string in YYYY-MM-DD format + + Returns: + List of S3 keys for NetCDF files + """ + dt = datetime.strptime(date_str, "%Y-%m-%d") + prefix, file_pattern = build_s3_prefix_and_pattern(dt, S3_BUCKET, NWM_CONFIG) + + try: + response = s3_client.list_objects_v2(Bucket=S3_BUCKET, Prefix=prefix) + + if 'Contents' not in response: + return [] + + # Filter for channel_rt (channel routing - streamflow) files + files = [ + obj['Key'] for obj in response['Contents'] + if file_pattern in obj['Key'] and obj['Key'].endswith('.nc') + ] + + return sorted(files) + + except Exception as e: + print(f" Error listing files for {date_str}: {e}", file=sys.stderr) + return [] + + +def load_nwm_streamflow_for_hour(s3_client, s3_key: str, feature_ids: list) -> dict: + """ + Load streamflow data from a single NWM NetCDF file for specific feature IDs. + + Args: + s3_client: boto3 S3 client + s3_key: S3 key to NetCDF file + feature_ids: List of NWM feature IDs to extract + + Returns: + Dict mapping feature_id to streamflow value (cms) + """ + try: + # Open dataset directly from S3 using xarray and s3fs + s3_path = f"s3://{S3_BUCKET}/{s3_key}" + + # Use anonymous access if needed + storage_options = {} + if USE_ANONYMOUS_S3: + storage_options = {'anon': True} + + with xr.open_dataset(s3_path, engine='h5netcdf', storage_options=storage_options) as ds: + # Get streamflow variable (typically 'streamflow' or 'qSfcLatRunoff') + if 'streamflow' in ds.variables: + streamflow_var = 'streamflow' + elif 'qSfcLatRunoff' in ds.variables: + streamflow_var = 'qSfcLatRunoff' + else: + # Try to find the right variable + print(f" Warning: streamflow variable not found in {s3_key}", file=sys.stderr) + return {} + + # Get feature_id dimension + if 'feature_id' in ds.variables: + all_feature_ids = ds['feature_id'].values + else: + print(f" Warning: feature_id not found in {s3_key}", file=sys.stderr) + return {} + + # Find indices for our feature IDs + feature_id_to_idx = {fid: idx for idx, fid in enumerate(all_feature_ids)} + + results = {} + for fid in feature_ids: + if fid in feature_id_to_idx: + idx = feature_id_to_idx[fid] + # Get streamflow value (convert from cms to cfs: 1 cms = 35.3147 cfs) + flow_cms = float(ds[streamflow_var][idx].values) + flow_cfs = flow_cms * 35.3147 # Convert to cubic feet per second + + if not np.isnan(flow_cfs): + results[fid] = flow_cfs + + return results + + except Exception as e: + # Silently skip files that can't be read + # print(f" Error loading {s3_key}: {e}", file=sys.stderr) + return {} + + +# ============================================================================== +# MAIN PROCESSING +# ============================================================================== + +def process_nwm_file(s3_client, nc_file: str, date_str: str, reach_ids: list, + reach_data: dict, bucket_type: str) -> bool: + """ + Process a single NWM NetCDF file and store results. + + Args: + s3_client: boto3 S3 client + nc_file: S3 key to NetCDF file + date_str: Date string in YYYY-MM-DD format + reach_ids: List of reach IDs to extract + reach_data: Dictionary to store results in + bucket_type: 'operational' or 'retrospective' + + Returns: + True if successful, False otherwise + """ + try: + filename = nc_file.split('/')[-1] + timestamp = extract_timestamp_from_nwm_filename(filename, date_str, bucket_type) + + if timestamp is None: + return False + + # Load streamflow data for this hour + hour_data = load_nwm_streamflow_for_hour(s3_client, nc_file, reach_ids) + + # Store in reach_data + for fid, flow in hour_data.items(): + reach_data[fid][timestamp] = flow + + return True + + except Exception: + return False + + +def fetch_all_reach_data(reach_ids: list) -> dict: + """ + Fetch discharge data for all reaches from NWM on S3. + + Args: + reach_ids: List of NWM feature IDs (reach_ids from database) + + Returns: + Dict mapping nwm_feature_id to DataFrame of hourly discharge + """ + print("=" * 70) + print("FETCHING NWM DATA FROM S3") + print("=" * 70) + print(f"\nDatabase: {RIPPLE_DB_PATH}") + print(f"Period: {START_DATE} to {END_DATE}") + print(f"S3 Bucket: s3://{S3_BUCKET}/") + print(f"Output: {OUTPUT_DIR}") + print(f"Total reaches: {len(reach_ids)}") + print(f"Configuration: {NWM_CONFIG}") + print() + + # Create S3 client + print("Connecting to S3...") + s3_client = get_s3_client() + + # Generate date range + start_dt = parse_datetime_string(START_DATE) + end_dt = parse_datetime_string(END_DATE) + date_range = pd.date_range(start=start_dt.date(), end=end_dt.date(), freq='D', inclusive='left') + + # Dictionary to store data: {feature_id: {timestamp: flow}} + reach_data = {fid: {} for fid in reach_ids} + stats = ProcessingStats() + + # Determine bucket type + bucket_type = 'operational' if S3_BUCKET == "noaa-nwm-pds" else 'retrospective' + + print(f"Processing {len(date_range)} day(s) of data...") + + # Process each day + for date in date_range: + date_str = date.strftime("%Y-%m-%d") + print(f"\n Processing {date_str}...") + + nc_files = list_nwm_files_for_date(s3_client, date_str) + + if not nc_files: + print(f" No files found for {date_str}") + continue + + print(f" Found {len(nc_files)} NetCDF files") + + # Process each hourly file + for nc_file in nc_files: + if process_nwm_file(s3_client, nc_file, date_str, reach_ids, reach_data, bucket_type): + stats.files_processed += 1 + if stats.files_processed % 10 == 0: + print(f" Processed {stats.files_processed} files...") + else: + stats.files_failed += 1 + + print(f"\n Files processed: {stats.files_processed}") + print(f" Files failed: {stats.files_failed}") + + # Convert to DataFrame format + print("\nConverting to DataFrames...") + all_data = {} + + for fid, time_series in reach_data.items(): + if time_series: + df = pd.DataFrame.from_dict(time_series, orient='index', columns=['discharge']) + df.index.name = 'datetime' + df = df.sort_index() + all_data[fid] = df + stats.reaches_with_data += 1 + + print(f" Reaches with data: {stats.reaches_with_data}/{len(reach_ids)}") + + return all_data + + +def generate_hourly_timestamps() -> list: + """ + Generate list of hourly timestamps for the event period. + Uses the exact start and end times specified in config. + """ + start_dt = parse_datetime_string(START_DATE) + end_dt = parse_datetime_string(END_DATE) + + start = pd.Timestamp(start_dt, tz='UTC') + end = pd.Timestamp(end_dt, tz='UTC') + + timestamps = pd.date_range(start=start, end=end, freq='h', inclusive='left') + return list(timestamps) + + +def write_timestep_csvs(all_data: dict) -> ProcessingStats: + """ + Write one CSV file per hourly timestep. + + Each CSV has format: + nwm_feature_id,discharge + 3458519,1250.5 + 3458691,4320.0 + ... + + Filename format: YYYYMMDD_HHMM_{suffix}.csv (suffix from config) + + Returns: + ProcessingStats with files_written and files_skipped counts + """ + print("\n" + "=" * 70) + print("WRITING PER-TIMESTEP CSV FILES") + print("=" * 70) + + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + print(f"\nOutput directory: {OUTPUT_DIR}") + + timestamps = generate_hourly_timestamps() + print(f"Writing {len(timestamps)} hourly files...") + print() + + stats = ProcessingStats() + + for ts in timestamps: + # Build data for this timestep + rows = [ + {'nwm_feature_id': feature_id, 'discharge': df.loc[ts, 'discharge']} + for feature_id, df in all_data.items() + if ts in df.index and pd.notna(df.loc[ts, 'discharge']) + ] + + if rows: + ts_df = pd.DataFrame(rows) + filename = ts.strftime('%Y%m%d_%H%M') + f'_{FLOW_FILE_SUFFIX}.csv' + filepath = OUTPUT_DIR / filename + + ts_df.to_csv(filepath, index=False) + stats.files_written += 1 + + # Show progress for key timestamps + if ts.hour in [0, 4, 5, 6, 12]: + print(f" ✓ {filename} ({len(rows)} gauges)") + else: + stats.files_skipped += 1 + + print() + print(f"Files written: {stats.files_written}") + print(f"Files skipped (no data): {stats.files_skipped}") + + return stats + + +def print_sample_output(all_data: dict): + """Print a sample of what the CSV files will contain.""" + print("\n" + "=" * 70) + print("SAMPLE OUTPUT FORMAT") + print("=" * 70) + + # Find a timestep with data + sample_ts = None + for feature_id, df in all_data.items(): + if not df.empty: + sample_ts = df.index[len(df)//2] # Middle of the data + break + + if sample_ts: + print(f"\nExample file: {sample_ts.strftime('%Y%m%d_%H%M')}_{FLOW_FILE_SUFFIX}.csv") + print("-" * 40) + print("nwm_feature_id,discharge") + + lines_printed = 0 + for feature_id, df in all_data.items(): + if sample_ts in df.index: + discharge = df.loc[sample_ts, 'discharge'] + if pd.notna(discharge): + print(f"{feature_id},{discharge:.1f}") + lines_printed += 1 + if lines_printed >= 10: + print("...") + break + + print("-" * 40) + + +def check_existing_flow_files() -> tuple[bool, int, int]: + """ + Check if flow files already exist for the configured time range. + + Returns: + Tuple of (all_exist, existing_count, expected_count) + """ + # Generate expected timestamps + start_dt = parse_datetime_string(START_DATE) + end_dt = parse_datetime_string(END_DATE) + + start = pd.Timestamp(start_dt, tz='UTC') + end = pd.Timestamp(end_dt, tz='UTC') + + timestamps = pd.date_range(start=start, end=end, freq='h', inclusive='left') + expected_count = len(timestamps) + + if expected_count == 0: + return False, 0, 0 + + # Check if output directory exists + if not OUTPUT_DIR.exists(): + return False, 0, expected_count + + # Check for existing files + existing_count = 0 + for ts in timestamps: + filename = ts.strftime('%Y%m%d_%H%M') + f'_{FLOW_FILE_SUFFIX}.csv' + filepath = OUTPUT_DIR / filename + if filepath.exists(): + existing_count += 1 + + all_exist = (existing_count == expected_count) + return all_exist, existing_count, expected_count + + +def print_database_info(reach_ids: list): + """Print information about the rating curve database.""" + print("\n" + "=" * 70) + print("RATING CURVE DATABASE INFO") + print("=" * 70) + print(f"\nDatabase path: {RIPPLE_DB_PATH}") + print(f"Total reaches: {len(reach_ids)}") + print(f"\nReach ID range: {min(reach_ids)} to {max(reach_ids)}") + print(f"\nSample reach IDs:") + for reach_id in reach_ids[:10]: + print(f" {reach_id}") + if len(reach_ids) > 10: + print(f" ... and {len(reach_ids) - 10} more") + print() + + +# ============================================================================== +# MAIN +# ============================================================================== + +def main(): + # Check if database exists + if not RIPPLE_DB_PATH.exists(): + print(f"\n ERROR: Database file not found: {RIPPLE_DB_PATH}") + print(" Please update RIPPLE_DB_PATH in the configuration section.") + return 1 + + # Check if flow files already exist (unless --force flag is used) + if not args.force: + all_exist, existing_count, expected_count = check_existing_flow_files() + + if all_exist: + print("\n" + "=" * 70) + print("FLOW FILES ALREADY EXIST") + print("=" * 70) + print(f"\nAll {expected_count} flow files already exist for the configured time range:") + print(f" Start: {START_DATE}") + print(f" End: {END_DATE}") + print(f" Location: {OUTPUT_DIR}") + print("\nSkipping flow file generation.") + print("\nTo force regeneration, use: --force flag") + print("Example: python generate_flow_files.py --config config.yaml --force") + return 0 + elif existing_count > 0: + print(f"\n⚠ Warning: Found {existing_count}/{expected_count} existing flow files.") + print(" Will regenerate all files to ensure consistency.") + + # Read all reach IDs from database + print("Reading reach IDs from database...") + reach_ids = get_reach_ids_from_database(RIPPLE_DB_PATH) + + if not reach_ids: + print(f"\n ERROR: No reach IDs found in database: {RIPPLE_DB_PATH}") + return 1 + + # Print database info + print_database_info(reach_ids) + + # Fetch all reach data + all_data = fetch_all_reach_data(reach_ids) + + if not all_data: + print("\n⚠ WARNING: No data was retrieved from NWM AnA APIs.") + print(" This could mean:") + print(" - No data available for the specified time period") + print(" - API connectivity issues") + print(" - All reaches failed to return data") + return 1 + + # Show sample output format + print_sample_output(all_data) + + # Write per-timestep CSV files + write_stats = write_timestep_csvs(all_data) + + print("\n" + "=" * 70) + print("COMPLETE") + print("=" * 70) + print(f"\nOutput location: {OUTPUT_DIR}") + print(f"Files created: {write_stats.files_written}") + print(f"Reaches with data: {len(all_data)}") + print("\nFile format:") + print(" Header: nwm_feature_id,discharge") + print(" Values: NWM feature ID (reach_id), discharge in cfs") + print("\nNext steps:") + print(" Use flows2fim to generate flood inundation maps:") + print(" 1. flows2fim controls -db -f -o -sids ") + print(" 2. flows2fim fim -c -lib -o -type depth") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/animation_from_event/requirements.txt b/animation_from_event/requirements.txt new file mode 100644 index 0000000..ef66fb2 --- /dev/null +++ b/animation_from_event/requirements.txt @@ -0,0 +1,32 @@ +# Core dependencies +boto3>=1.34.0 +pandas>=2.0.0 +xarray>=2023.1.0 +netCDF4>=1.6.0 +h5netcdf>=1.2.0 +fsspec==2024.6.1 +s3fs==2024.6.1 + +# Geospatial dependencies +rasterio>=1.3.0 +fiona>=1.9.0 +geopandas>=0.14.0 +shapely>=2.0.0 +pyproj>=3.5.0 + +# Visualization dependencies +matplotlib>=3.7.0 +contextily>=1.3.0 +pillow>=10.0.0 + +# Video generation +imageio>=2.31.0 +imageio-ffmpeg>=0.4.9 + +# Configuration +pyyaml>=6.0 +python-dotenv>=1.0.0 + +# Utilities +requests>=2.31.0 +numpy>=1.24.0 diff --git a/animation_from_event/run_workflow.py b/animation_from_event/run_workflow.py new file mode 100755 index 0000000..6fe89e5 --- /dev/null +++ b/animation_from_event/run_workflow.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +""" +Flood Animation Workflow Orchestrator +====================================== + +Runs the complete workflow: +1. Download RIPPLE data from S3 +2. Generate hourly flow files from NWM +3. Generate FIMs using flows2fim +4. Create flood animation video + +Usage: + python run_workflow.py --config config.yaml + python run_workflow.py --config config.yaml --skip-flows --skip-fims +""" + +import argparse +import subprocess +import sys +from pathlib import Path +import time +from config_utils import load_config + +def run_command(cmd, description): + """Run a command and handle errors.""" + print(f"\n{'='*70}") + print(f"{description}") + print(f"{'='*70}") + print(f"Command: {' '.join(cmd)}") + print() + + start_time = time.time() + + try: + subprocess.run(cmd, check=True) + elapsed = time.time() - start_time + print(f"\n {description} completed in {elapsed:.1f}s") + return True + except subprocess.CalledProcessError as e: + print(f"\n {description} failed with exit code {e.returncode}") + return False + +def main(): + parser = argparse.ArgumentParser(description="Run complete flood animation workflow") + parser.add_argument('--config', required=True, help="Path to config.yaml") + parser.add_argument('--skip-download', action='store_true', help="Skip S3 download step") + parser.add_argument('--skip-flows', action='store_true', help="Skip flow file generation") + parser.add_argument('--skip-fims', action='store_true', help="Skip FIM generation") + parser.add_argument('--skip-animation', action='store_true', help="Skip animation creation") + + args = parser.parse_args() + + # Load configuration + config = load_config(args.config) + + print("=" * 70) + print("FLOOD ANIMATION WORKFLOW") + print("=" * 70) + print(f"\nCollection: {config['collection']['id']}") + print(f"Event: {config['event']['start_date']} to {config['event']['end_date']}") + print() + + steps_run = 0 + steps_failed = 0 + + # Step 1: Download RIPPLE data from S3 + if not args.skip_download: + if run_command( + ['python', 'utils_s3.py', '--config', args.config, '--download-ripple'], + "Step 1: Download RIPPLE data from S3" + ): + steps_run += 1 + else: + steps_failed += 1 + print(" Continuing with existing local files...") + else: + print("\n Skipping S3 download (using existing files)") + + # Step 2: Generate flow files + if not args.skip_flows: + if run_command( + ['python', 'generate_flow_files.py', '--config', args.config], + "Step 2: Generate hourly flow files from NWM" + ): + steps_run += 1 + else: + steps_failed += 1 + print(" Cannot continue without flow files") + return 1 + else: + print("\n Skipping flow file generation") + + # Step 3: Generate FIMs + if not args.skip_fims: + if run_command( + ['python', 'generate_batch_fims.py', '--config', args.config], + "Step 3: Generate flood inundation maps" + ): + steps_run += 1 + else: + steps_failed += 1 + print(" Cannot continue without FIMs") + return 1 + else: + print("\n Skipping FIM generation") + + # Step 4: Create animation + if not args.skip_animation: + if run_command( + ['python', 'generate_animation.py', '--config', args.config], + "Step 4: Create flood animation video" + ): + steps_run += 1 + else: + steps_failed += 1 + else: + print("\n Skipping animation creation") + + # Summary + print("\n" + "=" * 70) + print("WORKFLOW COMPLETE") + print("=" * 70) + print(f"\nSteps completed: {steps_run}") + print(f"Steps failed: {steps_failed}") + + if steps_failed == 0: + output_file = Path(config['output']['base_dir']) / config['output']['video_filename'] + print(f"\n Animation ready: {output_file}") + return 0 + else: + print(f"\n Workflow completed with {steps_failed} error(s)") + return 1 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/animation_from_event/test_setup.sh b/animation_from_event/test_setup.sh new file mode 100755 index 0000000..2368d7b --- /dev/null +++ b/animation_from_event/test_setup.sh @@ -0,0 +1,148 @@ +#!/bin/bash +# Test Setup Script +# Verifies that the tool is correctly configured + +set -e + +echo "======================================================================" +echo "Flood Animation Tool - Setup Verification" +echo "======================================================================" +echo "" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Check functions +check_file() { + if [ -f "$1" ]; then + echo -e "${GREEN}✓${NC} Found: $1" + return 0 + else + echo -e "${RED}✗${NC} Missing: $1" + return 1 + fi +} + +check_dir() { + if [ -d "$1" ]; then + echo -e "${GREEN}✓${NC} Found: $1/" + return 0 + else + echo -e "${YELLOW}⚠${NC} Missing: $1/ (will be created)" + mkdir -p "$1" + return 0 + fi +} + +# Check Docker +echo "1. Checking Docker..." +if command -v docker &> /dev/null; then + echo -e "${GREEN}✓${NC} Docker installed: $(docker --version)" + if docker ps &> /dev/null; then + echo -e "${GREEN}✓${NC} Docker daemon running" + else + echo -e "${RED}✗${NC} Docker daemon not running" + echo " Start Docker Desktop or run: sudo systemctl start docker" + exit 1 + fi +else + echo -e "${RED}✗${NC} Docker not found" + echo " Install from: https://docs.docker.com/get-docker/" + exit 1 +fi +echo "" + +# Check Docker Compose +echo "2. Checking Docker Compose..." +if command -v docker-compose &> /dev/null; then + echo -e "${GREEN}✓${NC} Docker Compose installed: $(docker-compose --version)" +else + echo -e "${RED}✗${NC} Docker Compose not found" + exit 1 +fi +echo "" + +# Check required files +echo "3. Checking required files..." +check_file "config.yaml" +check_file "Dockerfile" +check_file "docker-compose.yml" +check_file "requirements.txt" +check_file "run_workflow.py" +check_file "generate_flow_files.py" +check_file "generate_batch_fims.py" +check_file "generate_animation.py" +echo "" + +# Check .env +echo "4. Checking environment..." +if [ -f ".env" ]; then + echo -e "${GREEN}✓${NC} Found: .env" + if grep -q "AWS_ACCESS_KEY_ID=your_access_key_here" .env; then + echo -e "${YELLOW}⚠${NC} .env contains template values" + echo " Edit .env with your AWS credentials if using private S3" + fi +else + echo -e "${YELLOW}⚠${NC} Missing: .env" + echo " Run: cp .env.example .env" + echo " Then edit .env with your credentials" +fi +echo "" + +# Check directories +echo "5. Checking directory structure..." +check_dir "data" +check_dir "data/input" +check_dir "data/output" +check_dir "data/cache" +echo "" + +# Check config +echo "6. Checking config.yaml..." +if command -v python3 &> /dev/null; then + python3 -c " +import yaml +try: + with open('config.yaml') as f: + config = yaml.safe_load(f) + print('\033[0;32m✓\033[0m Config is valid YAML') + print(f' Collection: {config[\"collection\"][\"id\"]}') + print(f' Event: {config[\"event\"][\"start_date\"]} to {config[\"event\"][\"end_date\"]}') +except Exception as e: + print(f'\033[0;31m✗\033[0m Config error: {e}') + exit(1) +" || echo -e "${YELLOW}⚠${NC} Could not validate config (Python 3 with PyYAML required)" +else + echo -e "${YELLOW}⚠${NC} Python 3 not found, skipping config validation" +fi +echo "" + +# Check disk space +echo "7. Checking disk space..." +AVAILABLE=$(df -h . | awk 'NR==2 {print $4}') +echo " Available: $AVAILABLE" +echo " Required: ~20GB minimum" +echo "" + +# Summary +echo "======================================================================" +echo "Setup Verification Complete!" +echo "======================================================================" +echo "" +echo "Next steps:" +echo " 1. Review config.yaml" +echo " 2. Edit .env if using private S3 buckets" +echo " 3. Run: make build" +echo " 4. Run: make run-workflow" +echo "" +echo "Quick commands:" +echo " make help - Show all commands" +echo " make shell - Open interactive shell" +echo " make generate-flows - Generate flow files" +echo "" +echo "Documentation:" +echo " README.md - Full documentation" +echo "" diff --git a/animation_from_event/utils_s3.py b/animation_from_event/utils_s3.py new file mode 100755 index 0000000..d3b1371 --- /dev/null +++ b/animation_from_event/utils_s3.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +S3 Utility Functions +==================== + +Handles downloading RIPPLE collection data from S3. + +Usage: + python utils_s3.py --config config.yaml --download-ripple +""" + +import argparse +import boto3 +import sys +import os +from pathlib import Path +from dotenv import load_dotenv +from config_utils import load_config, get_s3_paths + +# Load environment variables +load_dotenv() + +def get_s3_client(): + """Create S3 client with credentials from environment.""" + return boto3.client('s3', + aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), + aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'), + aws_session_token=os.getenv('AWS_SESSION_TOKEN'), + region_name=os.getenv('AWS_DEFAULT_REGION', 'us-east-1') + ) + +def download_from_s3(s3_client, bucket, key, local_path): + """Download file from S3.""" + print(f" Downloading: s3://{bucket}/{key}") + print(f" To: {local_path}") + + local_path.parent.mkdir(parents=True, exist_ok=True) + + try: + s3_client.download_file(bucket, key, str(local_path)) + file_size = local_path.stat().st_size / (1024 * 1024) + print(f" Downloaded {file_size:.1f} MB") + return True + except Exception as e: + print(f" Error: {e}") + return False + +def download_ripple_data(config): + """Download RIPPLE database and start reaches from S3.""" + print("\n" + "=" * 70) + print("DOWNLOADING RIPPLE DATA FROM S3") + print("=" * 70) + + # Get dynamically constructed S3 paths + s3_paths = get_s3_paths(config) + collection_id = config['collection']['id'] + + print(f"\nCollection: {collection_id}") + print(f"Bucket: {s3_paths['bucket']}") + + s3_client = get_s3_client() + + # Download ripple.gpkg + print("\n1. Downloading ripple.gpkg...") + ripple_key = s3_paths['ripple_path'] + ripple_local = Path('/data/input/ripple.gpkg') + + if ripple_local.exists(): + print(f" File already exists: {ripple_local}") + else: + if not download_from_s3(s3_client, s3_paths['bucket'], ripple_key, ripple_local): + return False + + # Download start_reaches.csv (optional - will auto-detect if missing) + print("\n2. Downloading start_reaches.csv...") + reaches_key = s3_paths['start_reaches_path'] + reaches_local = Path('/data/input/start_reaches.csv') + + if reaches_local.exists(): + print(f" File already exists: {reaches_local}") + else: + if not download_from_s3(s3_client, s3_paths['bucket'], reaches_key, reaches_local): + print(f" Warning: start_reaches.csv not found in S3") + print(f" Will use auto-detection of upstream reaches instead") + + print("\n RIPPLE data ready") + return True + +def main(): + parser = argparse.ArgumentParser(description="S3 utility functions") + parser.add_argument('--config', required=True, help="Path to config.yaml") + parser.add_argument('--download-ripple', action='store_true', help="Download RIPPLE data") + + args = parser.parse_args() + + config = load_config(args.config) + + if args.download_ripple: + success = download_ripple_data(config) + return 0 if success else 1 + + return 0 + +if __name__ == "__main__": + sys.exit(main()) From 31adfa161d5f20c52050aff98fda00a1e3f97237 Mon Sep 17 00:00:00 2001 From: robgpita-noaa Date: Fri, 9 Jan 2026 09:25:06 -0700 Subject: [PATCH 2/2] Update README.md --- animation_from_event/README.md | 99 ++++++++++------------------------ 1 file changed, 28 insertions(+), 71 deletions(-) diff --git a/animation_from_event/README.md b/animation_from_event/README.md index 8dfe27d..57b1379 100644 --- a/animation_from_event/README.md +++ b/animation_from_event/README.md @@ -36,13 +36,14 @@ NWM Data (S3) → Flow Files → flows2fim → FIM GeoTIFFs → Animation Video ## Features +- **Make Build System** - Simple commands for build, run, and clean workflows - **Fully Containerized** - Docker-based, runs anywhere - **S3 Integration** - Auto-downloads RIPPLE data and accesses NWM data - **Config-Driven** - Single YAML file for all settings - **Dynamic Paths** - Collection ID-based S3 paths (change once, update all) - **County Agnostic** - Works with any US county boundary - **Lake Fill** - Fill permanent water bodies in animations -- **Customizable Viz** - Basemaps, colormaps, extents, overlays +- **Customizable Visualization** - Basemaps, colormaps, extents, overlays - **Parallel Processing** - Multi-threaded FIM generation --- @@ -300,11 +301,11 @@ This runs all four steps: ```bash # Initial run: Pull full event data (e.g., 7 days) # Edit config.yaml: start_date: 2025-07-04, end_date: 2025-07-11 -make run-workflow # Takes 45-90 minutes +make run-workflow --skip-animation -# Later: Create different animations from same data -# Just modify animation settings in config.yaml (extent, colormap, etc.) -make generate-animation # Takes ~ 5-10 minutes +# Later: Create different animations (subset of full timeserires from comprehensive data) +# Just modify animation settings (and start / end times) in config.yaml (extent, colormap, etc.) +make generate-animation # Or create animation for subset of time period docker-compose run --rm flood-animation python generate_animation.py \ @@ -361,8 +362,8 @@ docker-compose run --rm flood-animation python generate_animation.py \ --start-time "2025-07-05 16:00" \ --end-time "2025-07-06 12:00" \ --output video_3.mp4 -d -ocker-compose run --rm flood-animation python generate_animation.py \ + +docker-compose run --rm flood-animation python generate_animation.py \ --config config.yaml \ --start-time "2025-07-06 12:00" \ --end-time "2025-07-07 8:00" \ @@ -401,7 +402,7 @@ docker-compose run --rm flood-animation bash -c "\ **Memory considerations:** - Each day (24 hours) = ~24-48 frames at 1-2 hour intervals - Peak memory usage scales with: `frames × DPI × figure_size × downsample_factor` -- Recommended segment length: 12-24 hours per video +- Recommended segment length: 12-20 hours per video - For very high resolution (DPI > 200), consider 6-12 hour segments ### Run Individual Steps @@ -727,11 +728,16 @@ cat .env The Docker memory limits have been removed by default (see `docker-compose.yml`). Common memory related errors are `exit code -9` or `Error 137`, If still experiencing issues: +Increasing the downsample_factor: +```yaml +visual: + downsample_factor: 4 # Downsample large rasters +``` + Reduce animation resolution: ```yaml visual: dpi: 150 # Reduce from 250 - downsample_factor: 4 # Downsample large rasters ``` Or reduce parallel workers in `config.yaml`: @@ -740,14 +746,7 @@ processing: max_workers: 2 # Reduce from 4 ``` -**For events longer than 20 hours:** Break the animation into segments and stitch them together. This is the recommended approach for multi-day events: - -```bash -# See "Long-Duration Events (>20 hours)" section for detailed workflow -# Generate segments separately, then stitch with ffmpeg -``` - -This approach significantly reduces peak memory usage and allows you to process very long events that would otherwise fail. +**For events longer than 20 hours:** Break the animation into segments and stitch them together. See [Long-Duration Events](#long-duration-events-20-hours). This is the recommended approach for multi-day events: ### Disk Space Issues @@ -877,6 +876,8 @@ animation_from_event/ ├── config_utils.py # Config loading utilities ├── utils_s3.py # S3 download functions │ +├── test_setup.sh # Verifies installation & environment +│ ├── generate_flow_files.py # Step 1: Flow generation ├── generate_batch_fims.py # Step 2: FIM generation ├── generate_animation.py # Step 3: Animation creation @@ -913,12 +914,12 @@ All data uses standard container paths (not user-specific): ```bash # Setup -make setup # Create directories, copy .env -make build # Build Docker image +make setup # Create directories, copy .env +make build # Build Docker image # Run -make run-workflow # Complete workflow (all steps) -make shell # Interactive shell in container +make run-workflow # Complete workflow (all steps) +make shell # Interactive shell in container # Generate individual steps make generate-flows # Generate flow files from NWM data @@ -926,13 +927,13 @@ make generate-fims # Generate FIM GeoTIFFs make generate-animation # Generate animation video # Utilities -make download-lake # Download lake polygon (interactive) +make download-lake # Download lake polygon (interactive) # Cleanup -make clean # Remove output files (data/output, data/cache) -make clean-all # Remove output + Docker images and volumes -make logs # Show Docker logs -make help # Show all commands +make clean # Remove output files (data/output, data/cache) +make clean-all # Remove output + Docker images and volumes +make logs # Show Docker logs +make help # Show all commands ``` ### Python Script Arguments @@ -981,16 +982,7 @@ Modify `generate_animation.py` to support additional video formats (MP4, AVI, GI --- -## System Requirements - -### Minimum - -- **CPU:** 2 cores -- **RAM:** 8 GB -- **Disk:** 20 GB free space -- **OS:** Linux, macOS, Windows (with Docker) - -### Recommended +## Recommended System Requirements - **CPU:** 4+ cores (for parallel processing) - **RAM:** 16 GB @@ -1019,38 +1011,3 @@ Modify `generate_animation.py` to support additional video formats (MP4, AVI, GI See [`requirements.txt`](requirements.txt) for full list. --- - -## Support - -### Documentation - -- This README (comprehensive guide) -- Inline comments in `config.yaml` -- Script docstrings (`--help` flags) - -### Troubleshooting - -1. Check [Troubleshooting](#troubleshooting) section above -2. Review Docker logs: `docker-compose logs` -3. Test individual steps: `make generate-flows`, etc. -4. Check file paths and permissions - ---- - -## Version History - -### v1.0.0 (2026-01-02) -- Initial release -- Fully containerized workflow -- Config-driven operation -- S3 integration with dynamic paths -- make commands -- Lake fill feature -- County-agnostic boundary support -- flows2fim binary integration - ---- - -**Created:** 2026-01-02 -**Tool:** Flood Animation from Event -**Maintainer:** NGWPC