diff --git a/.env.example b/.env.example index e63a653..26b0034 100644 --- a/.env.example +++ b/.env.example @@ -4,3 +4,29 @@ EUOSINT_SITE_ADDRESS=:80 EUOSINT_HTTP_PORT=8080 EUOSINT_HTTPS_PORT=8443 +EUOSINT_WEB_IMAGE=ghcr.io/scalytics/euosint-web:latest +EUOSINT_COLLECTOR_IMAGE=ghcr.io/scalytics/euosint-collector:latest +HTTP_TIMEOUT_MS=60000 +BROWSER_ENABLED=true +BROWSER_TIMEOUT_MS=60000 + +# Candidate crawler intake and dead-letter queue. +CANDIDATE_QUEUE_PATH=registry/source_candidates.json +REPLACEMENT_QUEUE_PATH=registry/source_dead_letter.json +SEARCH_DISCOVERY_ENABLED=false +SEARCH_DISCOVERY_MAX_TARGETS=4 +SEARCH_DISCOVERY_MAX_URLS_PER_TARGET=3 + +# Source vetting agent. +SOURCE_VETTING_ENABLED=false +SOURCE_VETTING_PROVIDER=xai +SOURCE_VETTING_BASE_URL=https://api.x.ai/v1 +SOURCE_VETTING_API_KEY= +SOURCE_VETTING_MODEL=grok-4-1-fast +SOURCE_VETTING_TEMPERATURE=0 +SOURCE_VETTING_MAX_SAMPLE_ITEMS=6 + +# Alert-level LLM gate: yes/no + English translation + category id. +ALERT_LLM_ENABLED=false +ALERT_LLM_MODEL=grok-4-1-fast +ALERT_LLM_MAX_ITEMS_PER_SOURCE=4 diff --git a/.github/workflows/alerts-feed.yml b/.github/workflows/alerts-feed.yml deleted file mode 100644 index c3dbf3c..0000000 --- a/.github/workflows/alerts-feed.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: Update alerts feed - -on: - schedule: - - cron: "*/15 * * * *" - workflow_dispatch: {} - -concurrency: - group: alerts-feed - cancel-in-progress: true - -permissions: - contents: write - -jobs: - refresh: - name: refresh - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Setup Go - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - cache: true - - - name: Fetch alerts with Go collector - run: go run ./cmd/euosint-collector - env: - MAX_PER_SOURCE: "20" - OUTPUT_PATH: "public/alerts.json" - FILTERED_OUTPUT_PATH: "public/alerts-filtered.json" - STATE_OUTPUT_PATH: "public/alerts-state.json" - SOURCE_HEALTH_OUTPUT_PATH: "public/source-health.json" - MISSING_PERSON_RELEVANCE_THRESHOLD: "0" - FAIL_ON_CRITICAL_SOURCE_GAP: "0" - CRITICAL_SOURCE_PREFIXES: "interpol-red,interpol-yellow" - - - name: Commit updates - run: | - if git diff --quiet -- public/alerts.json public/alerts-filtered.json public/alerts-state.json public/source-health.json; then - echo "No changes" - exit 0 - fi - git config user.name "euosint-bot" - git config user.email "euosint-bot@users.noreply.github.com" - git add public/alerts.json public/alerts-filtered.json public/alerts-state.json public/source-health.json - git commit -m "chore: update alerts feed" - git push diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index ef4e0ae..e2a2f48 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -56,7 +56,9 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Build image + - name: Build image (attempt 1) + id: build_image_1 + continue-on-error: true uses: docker/build-push-action@v6 with: context: . @@ -64,5 +66,17 @@ jobs: push: false load: false provenance: false - cache-from: type=gha - cache-to: type=gha,mode=max + cache-from: type=gha,scope=docker-${{ matrix.image.name }} + cache-to: type=gha,mode=max,scope=docker-${{ matrix.image.name }} + + - name: Build image (attempt 2 on transient failure) + if: steps.build_image_1.outcome == 'failure' + uses: docker/build-push-action@v6 + with: + context: . + file: ./${{ matrix.image.dockerfile }} + push: false + load: false + provenance: false + cache-from: type=gha,scope=docker-${{ matrix.image.name }} + cache-to: type=gha,mode=max,scope=docker-${{ matrix.image.name }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2224105..a165df7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -71,7 +71,9 @@ jobs: type=sha type=raw,value=latest - - name: Build and push image + - name: Build and push image (attempt 1) + id: build_push_1 + continue-on-error: true uses: docker/build-push-action@v6 with: context: . @@ -80,8 +82,21 @@ jobs: provenance: false tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max + cache-from: type=gha,scope=release-${{ matrix.image.name }} + cache-to: type=gha,mode=max,scope=release-${{ matrix.image.name }} + + - name: Build and push image (attempt 2 on transient failure) + if: steps.build_push_1.outcome == 'failure' + uses: docker/build-push-action@v6 + with: + context: . + file: ./${{ matrix.image.dockerfile }} + push: true + provenance: false + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha,scope=release-${{ matrix.image.name }} + cache-to: type=gha,mode=max,scope=release-${{ matrix.image.name }} - name: Publish GitHub release if: matrix.image.name == 'web' diff --git a/.github/workflows/source-discovery.yml b/.github/workflows/source-discovery.yml new file mode 100644 index 0000000..05b9e88 --- /dev/null +++ b/.github/workflows/source-discovery.yml @@ -0,0 +1,67 @@ +# Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +# SPDX-License-Identifier: Apache-2.0 + +name: Source Discovery + +on: + schedule: + - cron: "0 6 * * 1" # Every Monday at 06:00 UTC + workflow_dispatch: + +permissions: + contents: write + pull-requests: write + +jobs: + discover: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Run source discovery + run: go run ./cmd/euosint-collector --discover --discover-output discover-results.json + + - name: Check for new candidates + id: check + run: | + count=$(jq '.new_candidate_count' discover-results.json) + echo "count=$count" >> "$GITHUB_OUTPUT" + if [ "$count" -eq 0 ]; then + echo "No new source candidates found." + else + echo "Found $count new source candidates." + fi + + - name: Create PR with results + if: steps.check.outputs.count != '0' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + branch="discover/new-sources-$(date +%Y%m%d)" + git checkout -b "$branch" + git add discover-results.json + git commit -m "chore: add ${{ steps.check.outputs.count }} discovered source candidates + + Automated discovery via FIRST.org CSIRT team directory. + + Co-Authored-By: github-actions[bot] " + git push origin "$branch" + gh pr create \ + --title "Add ${{ steps.check.outputs.count }} discovered OSINT source candidates" \ + --body "## Source Discovery Results + + Found **${{ steps.check.outputs.count }}** new feed candidates via automated discovery. + + Review \`discover-results.json\` and promote worthy candidates to \`registry/source_registry.json\`. + + ## How to review + - Check each feed URL is reachable and returns valid RSS/Atom + - Verify the organization is a legitimate CSIRT or security authority + - Add geographic coordinates and reporting metadata before merging + + 🤖 Generated by weekly source discovery workflow" \ + --label "discovery" diff --git a/.gitignore b/.gitignore index 453d796..9ce4abc 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ dist-ssr/ coverage/ .tmp/ /euosint-collector +cmd/test-*/ # Runtime logs logs/ @@ -34,6 +35,9 @@ docker-compose.override.yml *.sln *.sw? +# GeoNames dataset (downloaded at build time, ~30MB) +registry/cities500.txt + # Tool caches .eslintcache .npm/ diff --git a/Dockerfile.collector b/Dockerfile.collector index 0edd46c..2bf3d9f 100644 --- a/Dockerfile.collector +++ b/Dockerfile.collector @@ -5,7 +5,8 @@ FROM golang:1.25-alpine AS build WORKDIR /app -COPY go.mod ./ +COPY go.mod go.sum ./ +RUN go mod download COPY cmd ./cmd COPY internal ./internal COPY registry ./registry @@ -13,15 +14,36 @@ COPY public ./public RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /out/euosint-collector ./cmd/euosint-collector +FROM alpine:3.20 AS geonames +RUN apk add --no-cache curl unzip +RUN curl -sL https://download.geonames.org/export/dump/cities500.zip -o /tmp/cities500.zip \ + && unzip /tmp/cities500.zip -d /tmp \ + && rm /tmp/cities500.zip + FROM alpine:3.20 -RUN apk add --no-cache ca-certificates +RUN set -eux; \ + apk add --no-cache ca-certificates; \ + i=0; \ + until [ "$i" -ge 3 ]; do \ + if apk add --no-cache chromium; then \ + break; \ + fi; \ + i=$((i + 1)); \ + if [ "$i" -ge 3 ]; then \ + echo "ERROR: failed to install chromium after 3 attempts" >&2; \ + exit 1; \ + fi; \ + echo "WARN: chromium install failed, retrying in 5s..." >&2; \ + sleep 5; \ + done +ENV CHROME_PATH=/usr/bin/chromium-browser WORKDIR /app COPY --from=build /out/euosint-collector /usr/local/bin/euosint-collector +COPY --from=geonames /tmp/cities500.txt ./registry/cities500.txt COPY registry ./registry -COPY public ./public-defaults COPY docker/collector-entrypoint.sh /usr/local/bin/collector-entrypoint.sh RUN chmod +x /usr/local/bin/collector-entrypoint.sh diff --git a/Makefile b/Makefile index c7c6b7d..8957ebb 100644 --- a/Makefile +++ b/Makefile @@ -127,15 +127,34 @@ dev-start: ## Start the local HTTP dev stack on localhost @echo "EUOSINT available at http://localhost:$${EUOSINT_HTTP_PORT:-8080}" @open "http://localhost:$${EUOSINT_HTTP_PORT:-8080}" -dev-stop: ## Stop the local dev stack - $(DOCKER_COMPOSE) down --remove-orphans - -dev-restart: ## Restart the local dev stack - $(DOCKER_COMPOSE) down --remove-orphans +dev-stop: ## Stop the local dev stack, remove feed-data volume and prune images + $(DOCKER_COMPOSE) down --remove-orphans -v + @docker image prune -f --filter "label=com.docker.compose.project" >/dev/null 2>&1 || true + @docker builder prune -f >/dev/null 2>&1 || true + +dev-restart: ## Restart the local dev stack (removes volumes, rebuilds from scratch) + $(DOCKER_COMPOSE) down --remove-orphans -v + @docker image prune -f --filter "label=com.docker.compose.project" >/dev/null 2>&1 || true + @docker builder prune -f >/dev/null 2>&1 || true $(DOCKER_COMPOSE) up --build -d @echo "EUOSINT available at http://localhost:$${EUOSINT_HTTP_PORT:-8080}" @open "http://localhost:$${EUOSINT_HTTP_PORT:-8080}" +dev-sync-registry: ## Merge source_registry.json into the running DB (adds new feeds) + $(DOCKER_COMPOSE) exec collector euosint-collector --source-db /data/sources.db --curated-seed /app/registry/source_registry.json --source-db-merge-registry + +dev-export-db: ## Export seeded sources.db from running container for distribution + @mkdir -p registry + @docker cp euosint-collector-1:/data/sources.db registry/sources.seed.db 2>/dev/null && \ + echo "Exported registry/sources.seed.db ($$(wc -c < registry/sources.seed.db | tr -d ' ') bytes)" || \ + echo "Container not running or no DB found" + +dev-sync-dlq: ## Copy the dead-letter queue from the running container to update the local JSON registry + @docker cp euosint-collector-1:/data/source_dead_letter.json .tmp/dlq.json 2>/dev/null && \ + python3 scripts/apply-dlq.py registry/source_registry.json .tmp/dlq.json && \ + echo "DLQ applied — review changes with: git diff registry/source_registry.json" || \ + echo "No DLQ data or container not running" + dev-logs: ## Tail local dev stack logs $(DOCKER_COMPOSE) logs -f --tail=200 diff --git a/README.md b/README.md index 2a486c4..ccc602d 100644 --- a/README.md +++ b/README.md @@ -30,10 +30,26 @@ make dev-restart make dev-logs ``` +## Remote Install (wget bootstrap) + +```bash +wget -qO- https://raw.githubusercontent.com/scalytics/EUOSINT/main/deploy/install.sh | bash +``` + +The installer will: +- verify Docker + Compose availability +- clone or update the repo on the host +- set GHCR runtime images (`ghcr.io/scalytics/euosint-web` + `ghcr.io/scalytics/euosint-collector`) +- prompt for install mode (`preserve` or `fresh` volume reset) +- prompt for domain (`EUOSINT_SITE_ADDRESS`) +- when domain mode is enabled, optionally check `ufw`/`firewalld` and validate local 80/443 availability +- prompt for key runtime flags (browser + LLM vetting settings) +- optionally run `docker compose pull` and start with `--no-build` + - The release pipeline now builds two images: a web image and a Go collector image. - The scheduled feed refresh workflow now runs the Go collector. - The web image now uses Caddy instead of nginx, with the collector output mounted into the web container at runtime. -- In Docker dev mode, the collector seeds the shared feed volume with the repository snapshots first, then replaces them with live output on the first successful run. +- In Docker dev mode, the collector initializes empty JSON outputs on a fresh volume and then writes live output on the first successful run. ## Run Locally Without Docker diff --git a/deploy/install.sh b/deploy/install.sh new file mode 100755 index 0000000..e1a2007 --- /dev/null +++ b/deploy/install.sh @@ -0,0 +1,307 @@ +#!/usr/bin/env bash +# EUOSINT remote installer +# Usage: +# wget -qO- https://raw.githubusercontent.com/scalytics/EUOSINT/main/deploy/install.sh | bash +# +# Optional environment overrides: +# REPO_URL=https://github.com/scalytics/EUOSINT.git +# REPO_REF=main +# INSTALL_DIR=/opt/euosint +# IMAGE_TAG=latest + +set -euo pipefail + +REPO_URL="${REPO_URL:-https://github.com/scalytics/EUOSINT.git}" +REPO_REF="${REPO_REF:-main}" +INSTALL_DIR="${INSTALL_DIR:-/opt/euosint}" +IMAGE_TAG="${IMAGE_TAG:-latest}" +INSTALL_MODE="${INSTALL_MODE:-preserve}" +TLS_MODE="false" + +info() { echo "[euosint-install] $*"; } +warn() { echo "[euosint-install][warn] $*" >&2; } +fatal() { echo "[euosint-install][error] $*" >&2; exit 1; } + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || fatal "Missing required command: $1" +} + +prompt() { + local label="$1" + local default_value="${2:-}" + local value + if [[ -n "${default_value}" ]]; then + read -r -p "$label [$default_value]: " value + echo "${value:-$default_value}" + else + read -r -p "$label: " value + echo "$value" + fi +} + +prompt_yes_no() { + local label="$1" + local default_value="$2" + local value + while true; do + read -r -p "$label [$default_value]: " value + value="${value:-$default_value}" + value="$(echo "$value" | tr '[:upper:]' '[:lower:]')" + case "$value" in + y|yes) echo "yes"; return 0 ;; + n|no) echo "no"; return 0 ;; + *) echo "Please answer yes or no." ;; + esac + done +} + +prompt_install_mode() { + local value + while true; do + read -r -p "Install mode (preserve/fresh) [${INSTALL_MODE}]: " value + value="${value:-$INSTALL_MODE}" + value="$(echo "$value" | tr '[:upper:]' '[:lower:]')" + case "$value" in + preserve|fresh) echo "$value"; return 0 ;; + *) echo "Please answer 'preserve' or 'fresh'." ;; + esac + done +} + +ensure_docker() { + if ! command -v docker >/dev/null 2>&1; then + fatal "Docker is not installed. Install Docker Engine + Compose plugin first, then re-run." + fi + if ! docker info >/dev/null 2>&1; then + fatal "Docker daemon is not reachable. Start Docker and re-run." + fi + + if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" + return 0 + fi + if command -v docker-compose >/dev/null 2>&1; then + COMPOSE_CMD="docker-compose" + return 0 + fi + fatal "Docker Compose is not available (need 'docker compose' plugin or 'docker-compose')." +} + +clone_or_update_repo() { + if [[ -d "$INSTALL_DIR/.git" ]]; then + info "Repository already exists in $INSTALL_DIR. Updating to $REPO_REF." + git -C "$INSTALL_DIR" fetch --tags origin + git -C "$INSTALL_DIR" checkout "$REPO_REF" + git -C "$INSTALL_DIR" pull --ff-only origin "$REPO_REF" || true + elif [[ -d "$INSTALL_DIR" && -n "$(ls -A "$INSTALL_DIR" 2>/dev/null || true)" ]]; then + fatal "Install directory exists and is not empty: $INSTALL_DIR" + else + info "Cloning $REPO_URL into $INSTALL_DIR." + git clone --depth 1 --branch "$REPO_REF" "$REPO_URL" "$INSTALL_DIR" + fi +} + +upsert_env() { + local file="$1" + local key="$2" + local value="$3" + if grep -qE "^${key}=" "$file"; then + sed -i.bak -E "s|^${key}=.*$|${key}=${value}|" "$file" + else + echo "${key}=${value}" >> "$file" + fi +} + +configure_env() { + local env_file="$INSTALL_DIR/.env" + local example_file="$INSTALL_DIR/.env.example" + + [[ -f "$example_file" ]] || fatal "Missing .env.example in repository." + + if [[ -f "$env_file" ]]; then + cp "$env_file" "$env_file.preinstall.$(date +%Y%m%d%H%M%S).bak" + info "Existing .env backed up." + else + cp "$example_file" "$env_file" + info "Created .env from .env.example." + fi + + local image_tag + image_tag="$(prompt "GHCR image tag to deploy" "$IMAGE_TAG")" + upsert_env "$env_file" "EUOSINT_WEB_IMAGE" "ghcr.io/scalytics/euosint-web:${image_tag}" + upsert_env "$env_file" "EUOSINT_COLLECTOR_IMAGE" "ghcr.io/scalytics/euosint-collector:${image_tag}" + info "Configured GHCR images with tag '${image_tag}'." + + local domain + domain="$(prompt "Domain for public access (blank for localhost dev mode)" "")" + + if [[ -n "$domain" ]]; then + TLS_MODE="true" + upsert_env "$env_file" "EUOSINT_SITE_ADDRESS" "$domain" + upsert_env "$env_file" "EUOSINT_HTTP_PORT" "80" + upsert_env "$env_file" "EUOSINT_HTTPS_PORT" "443" + info "Configured domain '$domain' with ports 80/443." + warn "Ensure DNS A/AAAA records point to this host and inbound 80/443 are open." + else + TLS_MODE="false" + upsert_env "$env_file" "EUOSINT_SITE_ADDRESS" ":80" + upsert_env "$env_file" "EUOSINT_HTTP_PORT" "8080" + upsert_env "$env_file" "EUOSINT_HTTPS_PORT" "8443" + info "Configured localhost mode on 8080/8443." + fi + + local browser_choice + browser_choice="$(prompt_yes_no "Enable browser-assisted fetches (higher accuracy, higher resource use)?" "yes")" + if [[ "$browser_choice" == "yes" ]]; then + upsert_env "$env_file" "BROWSER_ENABLED" "true" + else + upsert_env "$env_file" "BROWSER_ENABLED" "false" + fi + + local vetting_choice + vetting_choice="$(prompt_yes_no "Enable LLM source vetting?" "no")" + if [[ "$vetting_choice" == "yes" ]]; then + local provider base_url model api_key + provider="$(prompt "Vetting provider label (openai/xai/mistral/...)" "xai")" + base_url="$(prompt "Vetting base URL" "https://api.x.ai/v1")" + model="$(prompt "Vetting model" "grok-4-1-fast")" + api_key="$(prompt "Vetting API key (required)" "")" + [[ -n "$api_key" ]] || fatal "Vetting enabled but API key is empty." + + upsert_env "$env_file" "SOURCE_VETTING_ENABLED" "true" + upsert_env "$env_file" "SOURCE_VETTING_PROVIDER" "$provider" + upsert_env "$env_file" "SOURCE_VETTING_BASE_URL" "$base_url" + upsert_env "$env_file" "SOURCE_VETTING_MODEL" "$model" + upsert_env "$env_file" "SOURCE_VETTING_API_KEY" "$api_key" + else + upsert_env "$env_file" "SOURCE_VETTING_ENABLED" "false" + upsert_env "$env_file" "SOURCE_VETTING_API_KEY" "" + fi + + local alert_llm_choice + alert_llm_choice="$(prompt_yes_no "Enable LLM alert translation/classification?" "no")" + if [[ "$alert_llm_choice" == "yes" ]]; then + upsert_env "$env_file" "ALERT_LLM_ENABLED" "true" + else + upsert_env "$env_file" "ALERT_LLM_ENABLED" "false" + fi +} + +container_running_for_service() { + local service="$1" + local output + output="$(cd "$INSTALL_DIR" && $COMPOSE_CMD ps --status running --services 2>/dev/null || true)" + echo "$output" | grep -qx "$service" +} + +find_port_listener() { + local port="$1" + + if command -v lsof >/dev/null 2>&1; then + lsof -nP -iTCP:"$port" -sTCP:LISTEN 2>/dev/null | awk 'NR==2 {print $1 " (pid " $2 ") " $9; exit}' + return 0 + fi + + if command -v ss >/dev/null 2>&1; then + ss -ltnp 2>/dev/null | awk -v p=":$port" '$4 ~ p {print $0; exit}' + return 0 + fi + + return 1 +} + +validate_tls_ports() { + local listener + for port in 80 443; do + listener="$(find_port_listener "$port" || true)" + if [[ -n "$listener" ]]; then + fatal "Port ${port} is already in use (${listener}). Free it before TLS mode startup." + fi + done +} + +run_firewall_checks() { + if command -v ufw >/dev/null 2>&1; then + info "ufw detected. Current status:" + ufw status || true + elif command -v firewall-cmd >/dev/null 2>&1; then + info "firewalld detected. Listing public zone rules:" + firewall-cmd --zone=public --list-services || true + firewall-cmd --zone=public --list-ports || true + else + warn "No ufw/firewalld command found; skipping firewall inspection." + fi +} + +preflight_tls_checks() { + if [[ "$TLS_MODE" != "true" ]]; then + return 0 + fi + + info "TLS mode detected (domain set)." + local firewall_choice + firewall_choice="$(prompt_yes_no "Run firewall checks for 80/443 (ufw/firewalld)?" "yes")" + if [[ "$firewall_choice" == "yes" ]]; then + run_firewall_checks + fi + + if [[ "$INSTALL_MODE" == "preserve" ]] && container_running_for_service "euosint"; then + info "Existing euosint service is running; skipping strict local port-collision pre-check." + else + info "Validating that ports 80 and 443 are free..." + validate_tls_ports + fi +} + +start_stack() { + local start_choice + start_choice="$(prompt_yes_no "Start EUOSINT now with Docker Compose?" "yes")" + if [[ "$start_choice" != "yes" ]]; then + if [[ "$INSTALL_MODE" == "fresh" ]]; then + info "Installation complete. Start later with: cd $INSTALL_DIR && $COMPOSE_CMD down -v --remove-orphans && $COMPOSE_CMD pull && $COMPOSE_CMD up -d --no-build" + else + info "Installation complete. Start later with: cd $INSTALL_DIR && $COMPOSE_CMD pull && $COMPOSE_CMD up -d --no-build" + fi + return 0 + fi + + preflight_tls_checks + + if [[ "$INSTALL_MODE" == "fresh" ]]; then + warn "Fresh mode selected: stopping stack and deleting Compose volumes (feed-data/caddy-data/caddy-config)." + ( + cd "$INSTALL_DIR" + $COMPOSE_CMD down -v --remove-orphans || true + ) + else + info "Preserve mode selected: keeping existing Docker volumes/data." + fi + + info "Pulling latest GHCR images..." + ( + cd "$INSTALL_DIR" + $COMPOSE_CMD pull + ) + + info "Starting stack without local builds..." + ( + cd "$INSTALL_DIR" + $COMPOSE_CMD up -d --no-build + ) + + local http_port + http_port="$(grep -E '^EUOSINT_HTTP_PORT=' "$INSTALL_DIR/.env" | cut -d= -f2 || echo "8080")" + info "EUOSINT started. HTTP endpoint: http://$(hostname -f 2>/dev/null || hostname):${http_port}" +} + +main() { + require_cmd git + ensure_docker + INSTALL_MODE="$(prompt_install_mode)" + info "Selected install mode: ${INSTALL_MODE}" + clone_or_update_repo + configure_env + start_stack +} + +main "$@" diff --git a/docker-compose.yml b/docker-compose.yml index 6f25178..a598977 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,11 +1,19 @@ services: collector: + image: "${EUOSINT_COLLECTOR_IMAGE:-ghcr.io/scalytics/euosint-collector:latest}" build: context: . dockerfile: Dockerfile.collector + env_file: + - .env command: - --watch + - --api + - --api-addr + - ":3001" - --registry + - /data/sources.db + - --registry-seed - /app/registry/source_registry.json - --output - /data/alerts.json @@ -15,9 +23,18 @@ services: - /data/alerts-state.json - --source-health-output - /data/source-health.json + - --candidate-queue + - /data/source_candidates.json + - --replacement-queue + - /data/source_dead_letter.json environment: + GEONAMES_PATH: "/app/registry/cities500.txt" + NOMINATIM_ENABLED: "1" + DDG_SEARCH_ENABLED: "1" + DDG_SEARCH_DELAY_MS: "8000" + BROWSER_ENABLED: "${BROWSER_ENABLED:-1}" INTERVAL_MS: "900000" - MAX_PER_SOURCE: "20" + MAX_PER_SOURCE: "40" MISSING_PERSON_RELEVANCE_THRESHOLD: "0" FAIL_ON_CRITICAL_SOURCE_GAP: "0" CRITICAL_SOURCE_PREFIXES: "interpol-red,interpol-yellow" @@ -26,11 +43,14 @@ services: restart: unless-stopped euosint: + image: "${EUOSINT_WEB_IMAGE:-ghcr.io/scalytics/euosint-web:latest}" build: context: . dockerfile: Dockerfile depends_on: - collector + env_file: + - .env environment: EUOSINT_SITE_ADDRESS: "${EUOSINT_SITE_ADDRESS:-:80}" ports: diff --git a/docker/Caddyfile b/docker/Caddyfile index 5d803af..ca6cedb 100644 --- a/docker/Caddyfile +++ b/docker/Caddyfile @@ -16,6 +16,10 @@ X-Frame-Options "DENY" } + handle /api/* { + reverse_proxy collector:3001 + } + handle /alerts.json { root * /var/lib/euosint file_server diff --git a/docker/collector-entrypoint.sh b/docker/collector-entrypoint.sh index 8db33ef..2447748 100644 --- a/docker/collector-entrypoint.sh +++ b/docker/collector-entrypoint.sh @@ -13,11 +13,42 @@ seed_if_missing() { fi } +init_json_if_missing() { + target_path="$1" + payload="$2" + if [ ! -f "$target_path" ]; then + printf '%s\n' "$payload" > "$target_path" + fi +} + mkdir -p /data -seed_if_missing /app/public-defaults/alerts.json /data/alerts.json -seed_if_missing /app/public-defaults/alerts-filtered.json /data/alerts-filtered.json -seed_if_missing /app/public-defaults/alerts-state.json /data/alerts-state.json -seed_if_missing /app/public-defaults/source-health.json /data/source-health.json +# Start fresh volumes with empty JSON documents to avoid serving stale +# baked snapshots from previous registry revisions. +init_json_if_missing /data/alerts.json '[]' +init_json_if_missing /data/alerts-filtered.json '[]' +init_json_if_missing /data/alerts-state.json '[]' +init_json_if_missing /data/source-health.json '{"generated_at":"","critical_source_prefixes":[],"fail_on_critical_source_gap":false,"total_sources":0,"sources_ok":0,"sources_error":0,"duplicate_audit":{"suppressed_variant_duplicates":0,"repeated_title_groups_in_active":0,"repeated_title_samples":[]},"sources":[]}' +seed_if_missing /app/registry/source_candidates.json /data/source_candidates.json + +if [ ! -f /data/sources.db ]; then + if [ -f /app/registry/sources.seed.db ]; then + cp /app/registry/sources.seed.db /data/sources.db + echo "Seeded sources.db from pre-built snapshot" + else + euosint-collector --source-db /data/sources.db --source-db-init + euosint-collector --source-db /data/sources.db --registry /app/registry/source_registry.json --source-db-import-registry + if [ -f /app/registry/curated_agencies.seed.json ]; then + euosint-collector --source-db /data/sources.db --curated-seed /app/registry/curated_agencies.seed.json --source-db-merge-registry + fi + fi +fi + +# Always merge the baked-in JSON registry into the DB on startup. +# MergeRegistry upserts only — it adds new sources and updates existing +# ones but never deletes discovered or runtime-added sources. +# This ensures new feeds (FBI API, travel warnings, etc.) from image +# updates are picked up without manual intervention. +euosint-collector --source-db /data/sources.db --curated-seed /app/registry/source_registry.json --source-db-merge-registry exec euosint-collector "$@" diff --git a/docs/operations.md b/docs/operations.md index 6db1814..e510d31 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -33,7 +33,7 @@ Default local behavior: - HTTP on `http://localhost:8080` - HTTPS listener mapped to `https://localhost:8443` but not used unless `EUOSINT_SITE_ADDRESS` is changed to a hostname that enables TLS -- The collector seeds the shared feed volume with bundled JSON snapshots if the volume is empty, so the UI has data immediately while the first live refresh runs +- The collector initializes empty JSON outputs on a fresh shared feed volume, then replaces them with live collector output on the first successful run ## Domain Setup For A VM @@ -74,6 +74,10 @@ If the VM only has `docker-compose`, adjust the unit commands accordingly. ## Operational Notes - The collector writes feed output into the `feed-data` volume shared with the web container. +- The UI footer freshness line is derived from `source-health.json.generated_at` and shows the age of the current collector snapshot. It is normal below 20 minutes, warning from 20 to 60 minutes, and stale above 60 minutes. +- Discovery intake lives in [registry/source_candidates.json](/Users/alo/Development/scalytics/EUOSINT/registry/source_candidates.json). +- Dead sources are written to the terminal DLQ in `source_dead_letter.json` and are not crawled again. +- LLM-assisted source vetting is documented in [docs/source-vetting.md](/Users/alo/Development/scalytics/EUOSINT/docs/source-vetting.md). - TLS state and certificates persist in the `caddy-data` volume. - Caddy runtime state persists in the `caddy-config` volume. - Scheduled refreshes, Docker runtime, and local collection commands all run through the Go collector. diff --git a/docs/source-vetting.md b/docs/source-vetting.md new file mode 100644 index 0000000..e1fe27b --- /dev/null +++ b/docs/source-vetting.md @@ -0,0 +1,267 @@ + + +# Source Vetting + +## Runtime Model + +The crawler and source vetter now have separate roles: + +- `registry/source_candidates.json`: untrusted crawler intake +- `registry/sources.db`: vetted live sources only +- `registry/source_dead_letter.json`: terminal dead-letter queue, never crawled again + +Discovery reads `source_candidates.json`, probes for RSS/Atom or durable HTML listing pages, samples content, and optionally calls an LLM source vetter. Only approved sources are promoted into `sources.db`. Promoted candidates are removed from the candidate queue. + +If `SEARCH_DISCOVERY_ENABLED=true`, discovery also uses the configured OpenAI-compatible model as a narrow search accelerator. It asks for a small number of official candidate URLs for a capped set of agencies and feeds those URLs back into the same candidate queue and vetting pipeline. + +## LLM Endpoint Contract + +The source vetter uses an OpenAI-compatible `chat/completions` endpoint. It supports: + +- OpenAI +- Mistral +- xAI +- Scalytics Copilot +- Claude-compatible gateways +- Gemini-compatible gateways +- vLLM +- Ollama + +The implementation is endpoint-driven, not vendor-SDK driven. If a provider does not expose a native OpenAI-compatible endpoint, place a compatible gateway in front of it and point `SOURCE_VETTING_BASE_URL` at that gateway. + +## Search-Capable Models + +Search-capable models can be used as discovery accelerators instead of scraping public search engines directly. + +Examples: + +- xAI `grok-4-1-fast` +- Gemini fast variants +- Claude Haiku variants +- Scalytics Copilot models with search enabled + +Recommended use: + +- generate a small set of candidate URLs for a specific agency, country, or sector +- pass those URLs into the candidate queue +- let the crawler, deterministic hygiene, and source vetter decide whether they are usable + +Do not use search-capable models as direct truth sources or direct promotion sources. + +### Token-Safe Search + +Use the configured model in a narrow, token-safe way: + +- ask for a small number of candidate URLs, not a long report +- constrain by agency, country, and sector +- request only official or high-confidence source URLs +- avoid broad prompts like "find everything about police feeds worldwide" +- prefer short JSON output with URLs and a one-line reason + +Good pattern: + +- input: `Find up to 5 official feed/API/newsroom URLs for Bundeskriminalamt related to wanted suspects or public appeals. Return JSON only.` +- output: small candidate list + +Bad pattern: + +- input: `Research German law enforcement internet presence in detail and summarize everything.` + +## Environment Variables + +```dotenv +SEARCH_DISCOVERY_ENABLED=true +SEARCH_DISCOVERY_MAX_TARGETS=4 +SEARCH_DISCOVERY_MAX_URLS_PER_TARGET=3 +HTTP_TIMEOUT_MS=60000 +SOURCE_VETTING_ENABLED=true +SOURCE_VETTING_PROVIDER=xai +SOURCE_VETTING_BASE_URL=https://api.x.ai/v1 +SOURCE_VETTING_API_KEY= +SOURCE_VETTING_MODEL=grok-4-1-fast +SOURCE_VETTING_TEMPERATURE=0 +SOURCE_VETTING_MAX_SAMPLE_ITEMS=6 +ALERT_LLM_ENABLED=true +ALERT_LLM_MODEL=grok-4-1-fast +ALERT_LLM_MAX_ITEMS_PER_SOURCE=4 +``` + +Put the real API key only in your local `.env`. Do not commit it. + +## Example Endpoints + +OpenAI: + +```dotenv +SOURCE_VETTING_PROVIDER=openai +SOURCE_VETTING_BASE_URL=https://api.openai.com/v1 +``` + +Mistral: + +```dotenv +SOURCE_VETTING_PROVIDER=mistral +SOURCE_VETTING_BASE_URL=https://api.mistral.ai/v1 +``` + +xAI: + +```dotenv +SOURCE_VETTING_PROVIDER=xai +SOURCE_VETTING_BASE_URL=https://api.x.ai/v1 +SOURCE_VETTING_MODEL=grok-4-1-fast +ALERT_LLM_MODEL=grok-4-1-fast +``` + +Scalytics Copilot: + +```dotenv +SOURCE_VETTING_PROVIDER=scalytics-copilot +SOURCE_VETTING_BASE_URL=https://YOUR_SCALYTICS_COPILOT_URL/v1 +SOURCE_VETTING_MODEL=your-copilot-model +ALERT_LLM_MODEL=your-copilot-model +``` + +vLLM: + +```dotenv +SOURCE_VETTING_PROVIDER=vllm +SOURCE_VETTING_BASE_URL=http://vllm-host:8000/v1 +SOURCE_VETTING_API_KEY=dummy +``` + +Ollama: + +```dotenv +SOURCE_VETTING_PROVIDER=ollama +SOURCE_VETTING_BASE_URL=http://localhost:11434/v1 +SOURCE_VETTING_API_KEY=dummy +``` + +Claude-compatible gateway: + +```dotenv +SOURCE_VETTING_PROVIDER=claude +SOURCE_VETTING_BASE_URL=https://your-gateway.example/v1 +``` + +Gemini-compatible gateway: + +```dotenv +SOURCE_VETTING_PROVIDER=gemini +SOURCE_VETTING_BASE_URL=https://your-gateway.example/v1 +``` + +## CLI Usage + +Run the crawler and vetter once: + +```bash +go run ./cmd/euosint-collector \ + --discover \ + --registry registry/sources.db \ + --candidate-queue registry/source_candidates.json \ + --replacement-queue registry/source_dead_letter.json \ + --search-discovery \ + --search-discovery-max-targets 4 \ + --search-discovery-max-urls 3 \ + --source-vetting \ + --source-vetting-provider xai \ + --source-vetting-base-url https://api.x.ai/v1 \ + --source-vetting-api-key "$SOURCE_VETTING_API_KEY" \ + --source-vetting-model grok-4-1-fast \ + --alert-llm \ + --alert-llm-model grok-4-1-fast +``` + +## Promotion Policy + +The LLM does not bypass deterministic hygiene. + +Sources are rejected before the LLM stage if they look like: + +- local or municipal police +- generic institutional news +- low-signal public relations pages +- sources with no sample items to assess + +Approved sources are promoted into `sources.db` with: + +- `promotion_status` +- `source_quality` +- `operational_relevance` +- `level` +- `mission_tags` + +The live watcher only loads `promotion_status = active`. + +## Alert-Level LLM Gate + +You can also enable an item-level LLM gate for ambiguous `html-list` sources. + +When enabled, each candidate HTML item is sent to the same OpenAI-compatible endpoint with a short prompt that must return strict JSON: + +- `yes`: whether the item is intelligence-relevant or just noise +- `translation`: a short English title +- `category_id`: the normalized category id if `yes = true` + +If `yes = false`, the collector drops the item. +If `yes = true`, the collector uses the English title and category override during normalization. + +RSS, Atom, and structured API sources stay on the deterministic collector path so the live watcher does not stall behind LLM latency. + +Example: + +```dotenv +ALERT_LLM_ENABLED=true +ALERT_LLM_MODEL=grok-4-1-fast +ALERT_LLM_MAX_ITEMS_PER_SOURCE=4 +``` + +This uses the same provider/base URL/API key as source vetting. + +For xAI and similar reasoning-heavy models, keep `ALERT_LLM_MAX_ITEMS_PER_SOURCE` low and raise `HTTP_TIMEOUT_MS` above the default collector timeout. A practical starting point is: + +```dotenv +HTTP_TIMEOUT_MS=60000 +ALERT_LLM_MAX_ITEMS_PER_SOURCE=4 +``` + +The same principle applies here: if your configured model supports search, use it to return a strict, short yes/no decision, a short English title, and a category id. Keep prompts short and outputs structured to avoid wasting tokens. + +Equivalent xAI request shape: + +```bash +curl https://api.x.ai/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $SOURCE_VETTING_API_KEY" \ + -d '{ + "messages": [ + {"role": "system", "content": "You are a test assistant."}, + {"role": "user", "content": "Testing. Just say hi and hello world and nothing else."} + ], + "model": "grok-4-1-fast", + "stream": false, + "temperature": 0 + }' +``` + +Equivalent Scalytics Copilot request shape: + +```bash +curl https://YOUR_SCALYTICS_COPILOT_URL/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $SOURCE_VETTING_API_KEY" \ + -d '{ + "messages": [ + {"role": "system", "content": "You are a test assistant."}, + {"role": "user", "content": "Testing. Just say hi and hello world and nothing else."} + ], + "model": "your-copilot-model", + "stream": false, + "temperature": 0 + }' +``` diff --git a/docs/userguide.md b/docs/userguide.md new file mode 100644 index 0000000..4cc6433 --- /dev/null +++ b/docs/userguide.md @@ -0,0 +1,149 @@ +# EUOSINT User Guide + +## Alert Categories + +EUOSINT classifies every alert into one of the following categories. Each category groups a specific type of intelligence and is sourced from relevant official feeds. + +### Cyber Advisory +Vulnerability disclosures, patch advisories, and threat intelligence from national CERTs and cybersecurity agencies. Covers zero-days, actively exploited CVEs, ransomware campaigns, and critical infrastructure advisories. + +**Sources:** CISA, BSI, CERT-EU, CERT.AT, NCSC-UK, ANSSI, ENISA, NVD/KEV, and 60+ national CERTs worldwide. + +### Wanted Suspect +Active arrest warrants and wanted person notices from law enforcement agencies. Includes fugitives, persons of interest, and internationally wanted individuals. + +**Sources:** Interpol Red Notices (newest 160 per run), FBI Most Wanted, Europol Most Wanted, BKA, national police agencies across Europe, Americas, and Asia-Pacific. + +### Missing Person +Active missing person cases including children, endangered adults, and unidentified remains. Covers AMBER alerts and international missing person notices. + +**Sources:** Interpol Yellow Notices (newest 160 per run), NCMEC, NamUs, BKA Vermisste, national police missing person feeds. + +### Public Appeal +Police appeals for information from the public — witness calls, identification requests, crime tip lines, and community safety notices. + +**Sources:** Metropolitan Police, Police.uk, Polizei.de state feeds, Gendarmerie, FBI tips, and regional law enforcement across 30+ countries. + +### Fraud Alert +Consumer fraud warnings, financial crime alerts, scam advisories, and money laundering notices from financial regulators and law enforcement. + +**Sources:** FCA, BaFin, SEC, FINMA, Europol financial crime, ACCC Scamwatch, national consumer protection agencies. + +### Intelligence Report +Strategic intelligence assessments, geopolitical analysis, and security briefings from think tanks and intelligence-adjacent organisations. + +**Sources:** SIPRI, IISS, RUSI, Jane's, UN Security Council press, OSCE, NATO CCDCOE, national intelligence agency public releases. + +### Travel Warning +Government-issued travel advisories and consular warnings. Covers security situations, health risks, and entry restrictions for countries and regions. + +**Sources:** US State Department, UK FCDO, German Auswaertiges Amt, and other foreign ministry travel advisory feeds. + +### Conflict Monitoring +Armed conflict tracking, ceasefire violations, military operations, and peace process updates from conflict zones worldwide. + +**Sources:** UN Peace & Security, SIPRI conflict data, OSCE monitoring missions, peacekeeping operation feeds. + +### Humanitarian Security +Security incidents affecting humanitarian operations — aid worker safety, access restrictions, and operational environment assessments in crisis zones. + +**Sources:** ICRC field operations, ICRC IHL updates, UN OCHA, UNHCR, and humanitarian coordination feeds. + +### Humanitarian Tasking +Active humanitarian missions, disaster response deployments, and relief operation updates. + +**Sources:** UN Peacekeeping (Blue Helmets), UNOCHA coordination, UN humanitarian aid operations. + +### Health Emergency +Disease outbreaks, pandemic updates, public health emergencies, and biosecurity alerts from health authorities. + +**Sources:** WHO, ECDC, CDC, RKI, national public health agencies. + +### Public Safety +Civil protection alerts, natural disaster warnings, critical infrastructure incidents, and emergency notifications. + +**Sources:** National emergency management agencies, civil protection feeds, disaster response organisations. + +### Emergency Management +Large-scale emergency coordination, disaster declarations, evacuation orders, and crisis management updates. + +**Sources:** FEMA, BBK (German Federal Office of Civil Protection), EU Civil Protection Mechanism. + +### Terrorism Tip +Counter-terrorism alerts, threat assessments, and public safety notices related to terrorism and extremism. + +**Sources:** Europol TE-SAT, national counter-terrorism units, security service public advisories. + +### Private Sector +Corporate security alerts, supply chain disruptions, and industry-specific threat intelligence relevant to private sector operations. + +**Sources:** Industry ISACs, sector-specific CERTs, corporate security advisory feeds. + +--- + +## Severity Levels + +Every alert is assigned a severity level based on keyword analysis of the title and content: + +| Level | Colour | Criteria | +|-------|--------|----------| +| **Critical** | Red | Zero-days, ransomware, active exploitation, wanted fugitives, missing persons, AMBER alerts, emergencies | +| **High** | Orange | Vulnerabilities, compromises, phishing, fraud, urgent advisories, security warnings | +| **Medium** | Yellow | Arrests, charges, sentences, moderate-severity items | +| **Low** | Blue | Minor advisories, routine updates | +| **Informational** | Grey | Newsletters, info packets, guidance documents, educational material | + +Keyword matching supports English and German (e.g., "Kritische Schwachstelle" = critical, "Sicherheitslücke" = high, "Infopaket" = informational). + +--- + +## Interpol Notices + +EUOSINT pulls the **newest 160 Red Notices** (wanted suspects) and **newest 160 Yellow Notices** (missing persons) from the Interpol public API per collector run. This limit is intentional to avoid data overflow and excessive API load. + +- Red Notices: ~6,400 active notices globally +- Yellow Notices: ~4,000 active notices globally + +Only the most recent window is fetched each cycle. Notices are pinned on the map to the suspect's nationality country rather than Interpol HQ in Lyon. Links point to the Interpol web view, not the raw API. + +--- + +## Map + +The map uses [CARTO](https://carto.com/) dark basemap tiles loaded from their CDN. An active internet connection is required for map rendering. Missing or slow-loading tiles indicate network connectivity issues to `basemaps.cartocdn.com`. + +Alerts are plotted at their source country coordinates. International sources (Interpol, UN agencies) are geocoded to the relevant crisis or nationality country when possible. + +--- + +## Collector Cycle + +The collector runs on a configurable interval (default: 15 minutes). Each run: + +1. Fetches all active sources from the registry (~266 sources) +2. Parses and normalizes alerts with severity and category classification +3. Deduplicates across sources +4. Reconciles with previous state (tracks new, active, and removed alerts) +5. Outputs JSON snapshots consumed by the frontend + +Removed alerts (e.g., a resolved Interpol notice) are retained in state for 14 days before being purged. + +--- + +## Regions + +The dashboard supports region-scoped views: + +| Region | Coverage | +|--------|----------| +| **Global** | All sources worldwide | +| **Europe** | EU/EEA member states, UK, Switzerland, Balkans, Eastern Europe | +| **North America** | US, Canada, Mexico | +| **South America** | Central and South America | +| **Asia** | East Asia, Southeast Asia, South Asia, Central Asia, Middle East | +| **Africa** | All African nations | +| **Oceania** | Australia, New Zealand, Pacific Islands | +| **Caribbean** | Caribbean island nations | +| **International** | Sources with global scope (Interpol, UN, ICRC) | + +Region shortcuts in the header bar and the dropdown selector both filter the map and alert feed simultaneously. diff --git a/go.mod b/go.mod index 4e8c44c..defcbdb 100644 --- a/go.mod +++ b/go.mod @@ -3,4 +3,34 @@ module github.com/scalytics/euosint -go 1.25 +go 1.25.0 + +require ( + github.com/andybalholm/brotli v1.0.6 + github.com/chromedp/chromedp v0.13.6 + github.com/refraction-networking/utls v1.8.2 +) + +require ( + github.com/chromedp/cdproto v0.0.0-20250403032234-65de8f5d025b // indirect + github.com/chromedp/sysutil v1.1.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/go-json-experiment/json v0.0.0-20250211171154-1ae217ad3535 // indirect + github.com/gobwas/httphead v0.1.0 // indirect + github.com/gobwas/pool v0.2.1 // indirect + github.com/gobwas/ws v1.4.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/klauspost/compress v1.17.4 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/ncruces/go-strftime v1.0.0 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect + golang.org/x/crypto v0.49.0 // indirect + golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect + golang.org/x/net v0.52.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/text v0.35.0 // indirect + modernc.org/libc v1.67.6 // indirect + modernc.org/mathutil v1.7.1 // indirect + modernc.org/memory v1.11.0 // indirect + modernc.org/sqlite v1.46.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..4d8fbda --- /dev/null +++ b/go.sum @@ -0,0 +1,57 @@ +github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sxfOI= +github.com/andybalholm/brotli v1.0.6/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/chromedp/cdproto v0.0.0-20250403032234-65de8f5d025b h1:jJmiCljLNTaq/O1ju9Bzz2MPpFlmiTn0F7LwCoeDZVw= +github.com/chromedp/cdproto v0.0.0-20250403032234-65de8f5d025b/go.mod h1:NItd7aLkcfOA/dcMXvl8p1u+lQqioRMq/SqDp71Pb/k= +github.com/chromedp/chromedp v0.13.6 h1:xlNunMyzS5bu3r/QKrb3fzX6ow3WBQ6oao+J65PGZxk= +github.com/chromedp/chromedp v0.13.6/go.mod h1:h8GPP6ZtLMLsU8zFbTcb7ZDGCvCy8j/vRoFmRltQx9A= +github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM= +github.com/chromedp/sysutil v1.1.0/go.mod h1:WiThHUdltqCNKGc4gaU50XgYjwjYIhKWoHGPTUfWTJ8= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/go-json-experiment/json v0.0.0-20250211171154-1ae217ad3535 h1:yE7argOs92u+sSCRgqqe6eF+cDaVhSPlioy1UkA0p/w= +github.com/go-json-experiment/json v0.0.0-20250211171154-1ae217ad3535/go.mod h1:BWmvoE1Xia34f3l/ibJweyhrT+aROb/FQ6d+37F0e2s= +github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= +github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= +github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= +github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= +github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs= +github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= +github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= +github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= +github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= +github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw= +github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0= +github.com/refraction-networking/utls v1.8.2 h1:j4Q1gJj0xngdeH+Ox/qND11aEfhpgoEvV+S9iJ2IdQo= +github.com/refraction-networking/utls v1.8.2/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= +golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= +golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY= +golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= +golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= +modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI= +modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE= +modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= +modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= +modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= +modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= +modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU= +modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA= diff --git a/index.html b/index.html index b924e91..c8d44cc 100644 --- a/index.html +++ b/index.html @@ -7,7 +7,7 @@ - + 0 && n <= 500 { + limit = n + } + } + + if q == "" && category == "" && region == "" { + writeJSON(w, http.StatusBadRequest, map[string]string{"error": "q, category, or region parameter required"}) + return + } + + // Sanitize FTS query: if user passes bare text without operators, wrap words for prefix matching. + ftsQuery := q + if q != "" && !strings.ContainsAny(q, `"*()`) && !strings.Contains(q, " OR ") && !strings.Contains(q, " AND ") && !strings.Contains(q, " NOT ") { + words := strings.Fields(q) + parts := make([]string, len(words)) + for i, w := range words { + parts[i] = `"` + strings.ReplaceAll(w, `"`, `""`) + `"` + "*" + } + ftsQuery = strings.Join(parts, " ") + } + + results, err := s.db.SearchAlerts(r.Context(), ftsQuery, category, region, status, limit) + if err != nil { + // FTS parse error — try as plain phrase. + if strings.Contains(err.Error(), "fts5") || strings.Contains(err.Error(), "syntax") { + phrase := `"` + strings.ReplaceAll(q, `"`, `""`) + `"` + results, err = s.db.SearchAlerts(r.Context(), phrase, category, region, status, limit) + } + if err != nil { + writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()}) + return + } + } + writeJSON(w, http.StatusOK, map[string]any{ + "query": q, + "count": len(results), + "results": results, + }) +} + +func (s *Server) handleHealth(w http.ResponseWriter, _ *http.Request) { + writeJSON(w, http.StatusOK, map[string]string{"status": "ok"}) +} + +func writeJSON(w http.ResponseWriter, status int, v any) { + w.Header().Set("Content-Type", "application/json; charset=utf-8") + w.WriteHeader(status) + enc := json.NewEncoder(w) + enc.SetEscapeHTML(false) + _ = enc.Encode(v) +} + +func cors(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Access-Control-Allow-Methods", "GET, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type") + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusNoContent) + return + } + next.ServeHTTP(w, r) + }) +} + +// ---------- per-IP token bucket rate limiter ---------- + +type ipBucket struct { + tokens float64 + lastSeen time.Time +} + +type rateLimiterState struct { + mu sync.Mutex + buckets map[string]*ipBucket + burst float64 + rate float64 // tokens per second + staleAge time.Duration +} + +func newRateLimiter(burst int, perSecond float64, staleAge time.Duration) *rateLimiterState { + rl := &rateLimiterState{ + buckets: make(map[string]*ipBucket), + burst: float64(burst), + rate: perSecond, + staleAge: staleAge, + } + go rl.evictLoop() + return rl +} + +func (rl *rateLimiterState) allow(ip string) bool { + rl.mu.Lock() + defer rl.mu.Unlock() + now := time.Now() + b, ok := rl.buckets[ip] + if !ok { + b = &ipBucket{tokens: rl.burst, lastSeen: now} + rl.buckets[ip] = b + } + elapsed := now.Sub(b.lastSeen).Seconds() + b.tokens += elapsed * rl.rate + if b.tokens > rl.burst { + b.tokens = rl.burst + } + b.lastSeen = now + if b.tokens < 1 { + return false + } + b.tokens-- + return true +} + +func (rl *rateLimiterState) evictLoop() { + ticker := time.NewTicker(rl.staleAge) + defer ticker.Stop() + for range ticker.C { + rl.mu.Lock() + cutoff := time.Now().Add(-rl.staleAge) + for ip, b := range rl.buckets { + if b.lastSeen.Before(cutoff) { + delete(rl.buckets, ip) + } + } + rl.mu.Unlock() + } +} + +func rateLimit(rl *rateLimiterState, next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Skip rate limiting for health checks. + if r.URL.Path == "/api/health" { + next.ServeHTTP(w, r) + return + } + ip := clientIP(r) + if !rl.allow(ip) { + w.Header().Set("Retry-After", "1") + writeJSON(w, http.StatusTooManyRequests, map[string]string{"error": "rate limit exceeded"}) + return + } + next.ServeHTTP(w, r) + }) +} + +func clientIP(r *http.Request) string { + // Trust X-Forwarded-For from Caddy reverse proxy. + if xff := r.Header.Get("X-Forwarded-For"); xff != "" { + if ip := strings.TrimSpace(strings.SplitN(xff, ",", 2)[0]); ip != "" { + return ip + } + } + if xri := r.Header.Get("X-Real-Ip"); xri != "" { + return strings.TrimSpace(xri) + } + host, _, _ := net.SplitHostPort(r.RemoteAddr) + return host +} diff --git a/internal/collector/api/api_test.go b/internal/collector/api/api_test.go new file mode 100644 index 0000000..11f663d --- /dev/null +++ b/internal/collector/api/api_test.go @@ -0,0 +1,234 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package api + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "github.com/scalytics/euosint/internal/collector/model" + "github.com/scalytics/euosint/internal/sourcedb" +) + +func testDB(t *testing.T) *sourcedb.DB { + t.Helper() + dir := t.TempDir() + db, err := sourcedb.Open(filepath.Join(dir, "test.db")) + if err != nil { + t.Fatal(err) + } + if err := db.Init(context.Background()); err != nil { + t.Fatal(err) + } + return db +} + +func seedAlerts(t *testing.T, db *sourcedb.DB) { + t.Helper() + alerts := []model.Alert{ + { + AlertID: "a1", + SourceID: "europol", + Status: "active", + Title: "Europol dismantles major drug trafficking network", + CanonicalURL: "https://europol.europa.eu/a1", + Category: "public_appeal", + Severity: "high", + RegionTag: "EU", + Source: model.SourceMetadata{ + SourceID: "europol", + AuthorityName: "Europol", + Country: "Netherlands", + CountryCode: "NL", + Region: "Europe", + }, + }, + { + AlertID: "a2", + SourceID: "fbi-wanted", + Status: "active", + Title: "FBI Most Wanted: Cyber fugitive identified", + CanonicalURL: "https://fbi.gov/a2", + Category: "wanted_suspect", + Severity: "critical", + RegionTag: "US", + Source: model.SourceMetadata{ + SourceID: "fbi-wanted", + AuthorityName: "FBI", + Country: "United States", + CountryCode: "US", + Region: "North America", + }, + }, + { + AlertID: "a3", + SourceID: "cert-ua", + Status: "active", + Title: "CERT-UA reports new malware campaign targeting energy sector", + CanonicalURL: "https://cert.gov.ua/a3", + Category: "cyber_advisory", + Severity: "high", + RegionTag: "UA", + Source: model.SourceMetadata{ + SourceID: "cert-ua", + AuthorityName: "CERT-UA", + Country: "Ukraine", + CountryCode: "UA", + Region: "Europe", + }, + }, + } + if err := db.SaveAlerts(context.Background(), alerts); err != nil { + t.Fatal(err) + } +} + +func TestSearchReturnsRankedResults(t *testing.T) { + db := testDB(t) + defer db.Close() + seedAlerts(t, db) + + srv := New(db, ":0", os.Stderr) + handler := srv.srv.Handler + + req := httptest.NewRequest("GET", "/api/search?q=drug+trafficking", nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + var resp struct { + Count int `json:"count"` + Results []model.Alert `json:"results"` + } + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatal(err) + } + if resp.Count == 0 { + t.Fatal("expected search results for 'drug trafficking'") + } + if resp.Results[0].AlertID != "a1" { + t.Fatalf("expected Europol alert first, got %s", resp.Results[0].AlertID) + } +} + +func TestSearchWithCategoryFilter(t *testing.T) { + db := testDB(t) + defer db.Close() + seedAlerts(t, db) + + srv := New(db, ":0", os.Stderr) + handler := srv.srv.Handler + + req := httptest.NewRequest("GET", "/api/search?category=cyber_advisory", nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + var resp struct { + Count int `json:"count"` + Results []model.Alert `json:"results"` + } + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatal(err) + } + for _, r := range resp.Results { + if r.Category != "cyber_advisory" { + t.Fatalf("expected only cyber_advisory, got %s", r.Category) + } + } +} + +func TestSearchEmptyQueryReturns400(t *testing.T) { + db := testDB(t) + defer db.Close() + + srv := New(db, ":0", os.Stderr) + handler := srv.srv.Handler + + req := httptest.NewRequest("GET", "/api/search", nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } +} + +func TestRateLimitReturns429(t *testing.T) { + db := testDB(t) + defer db.Close() + seedAlerts(t, db) + + srv := New(db, ":0", os.Stderr) + handler := srv.srv.Handler + + // Exhaust the burst (30 requests). + for i := 0; i < 30; i++ { + req := httptest.NewRequest("GET", "/api/search?q=europol", nil) + req.RemoteAddr = "10.0.0.1:12345" + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + if w.Code == http.StatusTooManyRequests { + // Burst may be slightly less than 30 due to token consumption timing. + // Reaching 429 early is fine — the limiter works. + return + } + } + + // The 31st request should be rate limited. + req := httptest.NewRequest("GET", "/api/search?q=europol", nil) + req.RemoteAddr = "10.0.0.1:12345" + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + if w.Code != http.StatusTooManyRequests { + t.Fatalf("expected 429, got %d", w.Code) + } + if w.Header().Get("Retry-After") == "" { + t.Fatal("expected Retry-After header") + } +} + +func TestRateLimitSkipsHealth(t *testing.T) { + db := testDB(t) + defer db.Close() + + srv := New(db, ":0", os.Stderr) + handler := srv.srv.Handler + + // Even after many requests, health should never be rate limited. + for i := 0; i < 50; i++ { + req := httptest.NewRequest("GET", "/api/health", nil) + req.RemoteAddr = "10.0.0.2:12345" + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + if w.Code != http.StatusOK { + t.Fatalf("health request %d: expected 200, got %d", i, w.Code) + } + } +} + +func TestHealthEndpoint(t *testing.T) { + db := testDB(t) + defer db.Close() + + srv := New(db, ":0", os.Stderr) + handler := srv.srv.Handler + + req := httptest.NewRequest("GET", "/api/health", nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", w.Code) + } +} diff --git a/internal/collector/app/app.go b/internal/collector/app/app.go index 677066c..f201acc 100644 --- a/internal/collector/app/app.go +++ b/internal/collector/app/app.go @@ -6,10 +6,14 @@ package app import ( "context" "flag" + "fmt" "io" + "github.com/scalytics/euosint/internal/collector/api" "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/discover" "github.com/scalytics/euosint/internal/collector/run" + "github.com/scalytics/euosint/internal/sourcedb" ) func Run(ctx context.Context, args []string, stdout io.Writer, stderr io.Writer) error { @@ -31,6 +35,39 @@ func Run(ctx context.Context, args []string, stdout io.Writer, stderr io.Writer) fs.Float64Var(&cfg.MissingPersonRelevanceThreshold, "missing-person-threshold", cfg.MissingPersonRelevanceThreshold, "Relevance threshold for missing person alerts") fs.BoolVar(&cfg.FailOnCriticalSourceGap, "fail-on-critical-source-gap", cfg.FailOnCriticalSourceGap, "Fail the run when critical sources fetch zero records") fs.BoolVar(&cfg.TranslateEnabled, "translate", cfg.TranslateEnabled, "Translate non-Latin RSS titles and summaries to English") + fs.BoolVar(&cfg.BrowserEnabled, "browser", cfg.BrowserEnabled, "Enable headless Chrome fetching for browser-mode sources") + fs.IntVar(&cfg.BrowserTimeoutMS, "browser-timeout-ms", cfg.BrowserTimeoutMS, "Timeout in milliseconds for headless Chrome page loads") + fs.BoolVar(&cfg.DiscoverMode, "discover", cfg.DiscoverMode, "Run source discovery instead of collection") + fs.BoolVar(&cfg.DiscoverBackground, "discover-background", cfg.DiscoverBackground, "Run source discovery in the background while the collector is watching feeds") + fs.IntVar(&cfg.DiscoverIntervalMS, "discover-interval-ms", cfg.DiscoverIntervalMS, "Background discovery interval in milliseconds") + fs.StringVar(&cfg.DiscoverOutputPath, "discover-output", cfg.DiscoverOutputPath, "Path for discovery results JSON file") + fs.StringVar(&cfg.CandidateQueuePath, "candidate-queue", cfg.CandidateQueuePath, "Path for the crawler candidate intake JSON file") + fs.BoolVar(&cfg.SearchDiscoveryEnabled, "search-discovery", cfg.SearchDiscoveryEnabled, "Use the configured OpenAI-compatible model as a token-safe candidate URL discovery accelerator") + fs.IntVar(&cfg.SearchDiscoveryMaxTargets, "search-discovery-max-targets", cfg.SearchDiscoveryMaxTargets, "Maximum number of search-discovery targets per run") + fs.IntVar(&cfg.SearchDiscoveryMaxURLsPerTarget, "search-discovery-max-urls", cfg.SearchDiscoveryMaxURLsPerTarget, "Maximum number of URLs requested from the model per search target") + fs.StringVar(&cfg.WikimediaUserAgent, "wikimedia-user-agent", cfg.WikimediaUserAgent, "Identifying bot User-Agent for Wikimedia/Wikidata requests") + fs.StringVar(&cfg.WikidataCachePath, "wikidata-cache-path", cfg.WikidataCachePath, "Directory for cached Wikidata discovery responses") + fs.IntVar(&cfg.WikidataCacheTTLHours, "wikidata-cache-ttl-hours", cfg.WikidataCacheTTLHours, "How long to reuse cached Wikidata discovery responses") + fs.BoolVar(&cfg.VettingEnabled, "source-vetting", cfg.VettingEnabled, "Enable LLM-assisted source vetting and promotion for discovered candidates") + fs.StringVar(&cfg.VettingProvider, "source-vetting-provider", cfg.VettingProvider, "LLM provider label for docs/logging (openai, mistral, xai, claude, gemini, vllm, ollama)") + fs.StringVar(&cfg.VettingBaseURL, "source-vetting-base-url", cfg.VettingBaseURL, "OpenAI-compatible base URL for source vetting") + fs.StringVar(&cfg.VettingAPIKey, "source-vetting-api-key", cfg.VettingAPIKey, "API key for the source vetting endpoint") + fs.StringVar(&cfg.VettingModel, "source-vetting-model", cfg.VettingModel, "Model name for the source vetting endpoint") + fs.Float64Var(&cfg.VettingTemperature, "source-vetting-temperature", cfg.VettingTemperature, "Temperature for source vetting requests") + fs.IntVar(&cfg.VettingMaxSampleItems, "source-vetting-max-samples", cfg.VettingMaxSampleItems, "Maximum sample items fetched per discovered source for vetting") + fs.BoolVar(&cfg.AlertLLMEnabled, "alert-llm", cfg.AlertLLMEnabled, "Enable LLM alert translation and yes/no category gating") + fs.StringVar(&cfg.AlertLLMModel, "alert-llm-model", cfg.AlertLLMModel, "Model name for LLM alert translation/gating") + fs.IntVar(&cfg.AlertLLMMaxItemsPerSource, "alert-llm-max-items", cfg.AlertLLMMaxItemsPerSource, "Maximum number of alert items per source sent to the alert LLM in one collector pass") + fs.StringVar(&cfg.ReplacementQueuePath, "replacement-queue", cfg.ReplacementQueuePath, "Path for the dead-source DLQ JSON file") + fs.StringVar(&cfg.SourceDBPath, "source-db", cfg.SourceDBPath, "Path to the SQLite source database") + fs.BoolVar(&cfg.SourceDBInit, "source-db-init", cfg.SourceDBInit, "Initialize the SQLite source database schema") + fs.BoolVar(&cfg.SourceDBImportRegistry, "source-db-import-registry", cfg.SourceDBImportRegistry, "Import the JSON registry into the SQLite source database") + fs.BoolVar(&cfg.SourceDBMergeRegistry, "source-db-merge-registry", cfg.SourceDBMergeRegistry, "Merge a JSON registry or curated seed into the SQLite source database") + fs.BoolVar(&cfg.SourceDBExportRegistry, "source-db-export-registry", cfg.SourceDBExportRegistry, "Export the SQLite source database back into the JSON registry") + fs.StringVar(&cfg.CuratedSeedPath, "curated-seed", cfg.CuratedSeedPath, "Path to the curated agency seed JSON file") + fs.StringVar(&cfg.RegistrySeedPath, "registry-seed", cfg.RegistrySeedPath, "Path to the baked-in JSON registry for live merge on each cycle") + fs.BoolVar(&cfg.APIEnabled, "api", cfg.APIEnabled, "Start the search API server alongside the collector") + fs.StringVar(&cfg.APIAddr, "api-addr", cfg.APIAddr, "Listen address for the search API server") if err := fs.Parse(args); err != nil { return err @@ -42,5 +79,57 @@ func Run(ctx context.Context, args []string, stdout io.Writer, stderr io.Writer) default: } + if cfg.DiscoverMode { + return discover.Run(ctx, cfg, stdout, stderr) + } + if cfg.SourceDBInit || cfg.SourceDBImportRegistry || cfg.SourceDBMergeRegistry || cfg.SourceDBExportRegistry { + db, err := sourcedb.Open(cfg.SourceDBPath) + if err != nil { + return err + } + defer db.Close() + + if cfg.SourceDBInit { + if err := db.Init(ctx); err != nil { + return err + } + fmt.Fprintf(stdout, "Initialized source DB schema -> %s\n", cfg.SourceDBPath) + } + if cfg.SourceDBImportRegistry { + if err := db.ImportRegistry(ctx, cfg.RegistryPath); err != nil { + return err + } + fmt.Fprintf(stdout, "Imported registry JSON into source DB -> %s\n", cfg.SourceDBPath) + } + if cfg.SourceDBMergeRegistry { + if err := db.MergeRegistry(ctx, cfg.CuratedSeedPath); err != nil { + return err + } + fmt.Fprintf(stdout, "Merged curated seed into source DB -> %s\n", cfg.SourceDBPath) + } + if cfg.SourceDBExportRegistry { + if err := db.ExportRegistry(ctx, cfg.RegistryPath); err != nil { + return err + } + fmt.Fprintf(stdout, "Exported source DB registry -> %s\n", cfg.RegistryPath) + } + return nil + } + + if cfg.APIEnabled { + apiDB, err := sourcedb.Open(cfg.RegistryPath) + if err != nil { + return fmt.Errorf("open DB for API: %w", err) + } + defer apiDB.Close() + + srv := api.New(apiDB, cfg.APIAddr, stderr) + if err := srv.Start(); err != nil { + return err + } + defer srv.Stop(ctx) + fmt.Fprintf(stdout, "Search API listening on %s\n", cfg.APIAddr) + } + return run.New(stdout, stderr).Run(ctx, cfg) } diff --git a/internal/collector/app/app_test.go b/internal/collector/app/app_test.go index 78d2a94..370937a 100644 --- a/internal/collector/app/app_test.go +++ b/internal/collector/app/app_test.go @@ -26,11 +26,12 @@ func TestRunWritesOutputs(t *testing.T) { "--filtered-output", filepath.Join(dir, "filtered.json"), "--state-output", filepath.Join(dir, "state.json"), "--source-health-output", filepath.Join(dir, "health.json"), + "--replacement-queue", filepath.Join(dir, "replacement.json"), }, io.Discard, io.Discard) if err != nil { t.Fatal(err) } - for _, path := range []string{"alerts.json", "filtered.json", "state.json", "health.json"} { + for _, path := range []string{"alerts.json", "filtered.json", "state.json", "health.json", "replacement.json"} { if _, err := os.Stat(filepath.Join(dir, path)); err != nil { t.Fatalf("expected %s to be written: %v", path, err) } diff --git a/internal/collector/config/config.go b/internal/collector/config/config.go index c2f4736..3483217 100644 --- a/internal/collector/config/config.go +++ b/internal/collector/config/config.go @@ -14,10 +14,10 @@ const ( defaultFilteredPath = "public/alerts-filtered.json" defaultStatePath = "public/alerts-state.json" defaultSourceHealthPath = "public/source-health.json" - defaultRegistryPath = "registry/source_registry.json" + defaultRegistryPath = "registry/sources.db" defaultTimeoutMS = 15000 defaultIntervalMS = 900000 - defaultMaxPerSource = 20 + defaultMaxPerSource = 40 defaultMaxAgeDays = 180 defaultRemovedDays = 14 defaultMaxBodyBytes = 2 * 1024 * 1024 @@ -41,7 +41,48 @@ type Config struct { HTTPTimeoutMS int MaxResponseBodyBytes int64 UserAgent string + WikimediaUserAgent string TranslateEnabled bool + BrowserEnabled bool + BrowserTimeoutMS int + DiscoverMode bool + DiscoverBackground bool + DiscoverIntervalMS int + DiscoverOutputPath string + CandidateQueuePath string + SearchDiscoveryEnabled bool + SearchDiscoveryMaxTargets int + SearchDiscoveryMaxURLsPerTarget int + DDGSearchEnabled bool + DDGSearchMaxQueries int + DDGSearchDelayMS int + WikidataCachePath string + WikidataCacheTTLHours int + VettingEnabled bool + VettingProvider string + VettingBaseURL string + VettingAPIKey string + VettingModel string + VettingTemperature float64 + VettingMaxSampleItems int + AlertLLMEnabled bool + AlertLLMModel string + AlertLLMMaxItemsPerSource int + CategoryDictionaryPath string + ReplacementQueuePath string + SourceDBPath string + SourceDBInit bool + SourceDBImportRegistry bool + SourceDBMergeRegistry bool + SourceDBExportRegistry bool + CuratedSeedPath string + RegistrySeedPath string + CursorsPath string + GeoNamesPath string + NominatimBaseURL string + NominatimEnabled bool + APIEnabled bool + APIAddr string } func Default() Config { @@ -62,8 +103,48 @@ func Default() Config { IntervalMS: defaultIntervalMS, HTTPTimeoutMS: defaultTimeoutMS, MaxResponseBodyBytes: defaultMaxBodyBytes, - UserAgent: "euosint-bot/1.0", + UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + WikimediaUserAgent: "EUOSINTBot/1.0 (https://www.scalytics.io; ops@scalytics.io) WDQS discovery", TranslateEnabled: true, + BrowserEnabled: false, + BrowserTimeoutMS: 30000, + DiscoverMode: false, + DiscoverBackground: true, + DiscoverIntervalMS: defaultIntervalMS, + DiscoverOutputPath: "discover-results.json", + CandidateQueuePath: "registry/source_candidates.json", + SearchDiscoveryEnabled: false, + SearchDiscoveryMaxTargets: 4, + SearchDiscoveryMaxURLsPerTarget: 3, + DDGSearchEnabled: true, + DDGSearchMaxQueries: 10, + DDGSearchDelayMS: 8000, + WikidataCachePath: "registry/wikidata_cache", + WikidataCacheTTLHours: 24, + VettingEnabled: false, + VettingProvider: "openai-compatible", + VettingBaseURL: "https://api.openai.com/v1", + VettingModel: "gpt-4.1-mini", + VettingTemperature: 0, + VettingMaxSampleItems: 6, + AlertLLMEnabled: false, + AlertLLMModel: "gpt-4.1-mini", + AlertLLMMaxItemsPerSource: 4, + CategoryDictionaryPath: "registry/category_dictionary.json", + ReplacementQueuePath: "registry/source_dead_letter.json", + SourceDBPath: "registry/sources.db", + SourceDBInit: false, + SourceDBImportRegistry: false, + SourceDBMergeRegistry: false, + SourceDBExportRegistry: false, + CuratedSeedPath: "registry/curated_agencies.seed.json", + RegistrySeedPath: "registry/source_registry.json", + CursorsPath: "public/cursors.json", + GeoNamesPath: "registry/cities500.txt", + NominatimBaseURL: "https://nominatim.openstreetmap.org", + NominatimEnabled: true, + APIEnabled: false, + APIAddr: ":3001", } } @@ -86,7 +167,48 @@ func FromEnv() Config { cfg.HTTPTimeoutMS = envInt("HTTP_TIMEOUT_MS", cfg.HTTPTimeoutMS) cfg.MaxResponseBodyBytes = int64(envInt("MAX_RESPONSE_BODY_BYTES", int(cfg.MaxResponseBodyBytes))) cfg.UserAgent = envString("USER_AGENT", cfg.UserAgent) + cfg.WikimediaUserAgent = envString("WIKIMEDIA_USER_AGENT", cfg.WikimediaUserAgent) cfg.TranslateEnabled = envBool("TRANSLATE_ENABLED", cfg.TranslateEnabled) + cfg.BrowserEnabled = envBool("BROWSER_ENABLED", cfg.BrowserEnabled) + cfg.BrowserTimeoutMS = envInt("BROWSER_TIMEOUT_MS", cfg.BrowserTimeoutMS) + cfg.DiscoverMode = envBool("DISCOVER_MODE", cfg.DiscoverMode) + cfg.DiscoverBackground = envBool("DISCOVER_BACKGROUND", cfg.DiscoverBackground) + cfg.DiscoverIntervalMS = envInt("DISCOVER_INTERVAL_MS", cfg.DiscoverIntervalMS) + cfg.DiscoverOutputPath = envString("DISCOVER_OUTPUT_PATH", cfg.DiscoverOutputPath) + cfg.CandidateQueuePath = envString("CANDIDATE_QUEUE_PATH", cfg.CandidateQueuePath) + cfg.SearchDiscoveryEnabled = envBool("SEARCH_DISCOVERY_ENABLED", cfg.SearchDiscoveryEnabled) + cfg.SearchDiscoveryMaxTargets = envInt("SEARCH_DISCOVERY_MAX_TARGETS", cfg.SearchDiscoveryMaxTargets) + cfg.SearchDiscoveryMaxURLsPerTarget = envInt("SEARCH_DISCOVERY_MAX_URLS_PER_TARGET", cfg.SearchDiscoveryMaxURLsPerTarget) + cfg.WikidataCachePath = envString("WIKIDATA_CACHE_PATH", cfg.WikidataCachePath) + cfg.WikidataCacheTTLHours = envInt("WIKIDATA_CACHE_TTL_HOURS", cfg.WikidataCacheTTLHours) + cfg.VettingEnabled = envBool("SOURCE_VETTING_ENABLED", cfg.VettingEnabled) + cfg.VettingProvider = envString("SOURCE_VETTING_PROVIDER", cfg.VettingProvider) + cfg.VettingBaseURL = envString("SOURCE_VETTING_BASE_URL", cfg.VettingBaseURL) + cfg.VettingAPIKey = envString("SOURCE_VETTING_API_KEY", cfg.VettingAPIKey) + cfg.VettingModel = envString("SOURCE_VETTING_MODEL", cfg.VettingModel) + cfg.VettingTemperature = envFloat("SOURCE_VETTING_TEMPERATURE", cfg.VettingTemperature) + cfg.VettingMaxSampleItems = envInt("SOURCE_VETTING_MAX_SAMPLE_ITEMS", cfg.VettingMaxSampleItems) + cfg.AlertLLMEnabled = envBool("ALERT_LLM_ENABLED", cfg.AlertLLMEnabled) + cfg.AlertLLMModel = envString("ALERT_LLM_MODEL", cfg.AlertLLMModel) + cfg.AlertLLMMaxItemsPerSource = envInt("ALERT_LLM_MAX_ITEMS_PER_SOURCE", cfg.AlertLLMMaxItemsPerSource) + cfg.CategoryDictionaryPath = envString("CATEGORY_DICTIONARY_PATH", cfg.CategoryDictionaryPath) + cfg.ReplacementQueuePath = envString("REPLACEMENT_QUEUE_PATH", cfg.ReplacementQueuePath) + cfg.SourceDBPath = envString("SOURCE_DB_PATH", cfg.SourceDBPath) + cfg.SourceDBInit = envBool("SOURCE_DB_INIT", cfg.SourceDBInit) + cfg.SourceDBImportRegistry = envBool("SOURCE_DB_IMPORT_REGISTRY", cfg.SourceDBImportRegistry) + cfg.SourceDBMergeRegistry = envBool("SOURCE_DB_MERGE_REGISTRY", cfg.SourceDBMergeRegistry) + cfg.SourceDBExportRegistry = envBool("SOURCE_DB_EXPORT_REGISTRY", cfg.SourceDBExportRegistry) + cfg.CuratedSeedPath = envString("CURATED_SEED_PATH", cfg.CuratedSeedPath) + cfg.RegistrySeedPath = envString("REGISTRY_SEED_PATH", cfg.RegistrySeedPath) + cfg.CursorsPath = envString("CURSORS_PATH", cfg.CursorsPath) + cfg.DDGSearchEnabled = envBool("DDG_SEARCH_ENABLED", cfg.DDGSearchEnabled) + cfg.DDGSearchMaxQueries = envInt("DDG_SEARCH_MAX_QUERIES", cfg.DDGSearchMaxQueries) + cfg.DDGSearchDelayMS = envInt("DDG_SEARCH_DELAY_MS", cfg.DDGSearchDelayMS) + cfg.GeoNamesPath = envString("GEONAMES_PATH", cfg.GeoNamesPath) + cfg.NominatimBaseURL = envString("NOMINATIM_BASE_URL", cfg.NominatimBaseURL) + cfg.NominatimEnabled = envBool("NOMINATIM_ENABLED", cfg.NominatimEnabled) + cfg.APIEnabled = envBool("API_ENABLED", cfg.APIEnabled) + cfg.APIAddr = envString("API_ADDR", cfg.APIAddr) return cfg } diff --git a/internal/collector/dictionary/dictionary.go b/internal/collector/dictionary/dictionary.go new file mode 100644 index 0000000..c47fe3d --- /dev/null +++ b/internal/collector/dictionary/dictionary.go @@ -0,0 +1,232 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package dictionary + +import ( + "encoding/json" + "fmt" + "os" + "sort" + "strings" + + "github.com/scalytics/euosint/internal/collector/model" +) + +type Entry struct { + Strong []string `json:"strong"` + Weak []string `json:"weak"` + Negative []string `json:"negative"` + URLHints []string `json:"url_hints"` +} + +type CategoryDictionary struct { + Default Entry `json:"default"` + Languages map[string]Entry `json:"languages"` +} + +type Document struct { + Categories map[string]CategoryDictionary `json:"categories"` +} + +type Store struct { + doc Document +} + +func Load(path string) (*Store, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read category dictionary %s: %w", path, err) + } + var doc Document + if err := json.Unmarshal(data, &doc); err != nil { + return nil, fmt.Errorf("decode category dictionary %s: %w", path, err) + } + if doc.Categories == nil { + doc.Categories = map[string]CategoryDictionary{} + } + return &Store{doc: doc}, nil +} + +func (s *Store) Match(category string, source model.RegistrySource, title string, link string) bool { + if s == nil { + return true + } + cat := s.doc.Categories[strings.ToLower(strings.TrimSpace(category))] + combined := strings.ToLower(strings.TrimSpace(title + " " + link)) + urlOnly := strings.ToLower(strings.TrimSpace(link)) + + positive := merge(cat.Default.Strong) + negative := merge(cat.Default.Negative) + urlHints := merge(cat.Default.URLHints) + for _, lang := range inferLanguages(source) { + entry, ok := cat.Languages[lang] + if !ok { + continue + } + positive = append(positive, entry.Strong...) + positive = append(positive, entry.Weak...) + negative = append(negative, entry.Negative...) + urlHints = append(urlHints, entry.URLHints...) + } + + if len(positive) == 0 && len(urlHints) == 0 && len(negative) == 0 { + return true + } + for _, term := range negative { + if contains(combined, term) { + return false + } + } + for _, term := range positive { + if contains(combined, term) { + return true + } + } + for _, term := range urlHints { + if contains(urlOnly, term) { + return true + } + } + return false +} + +func inferLanguages(source model.RegistrySource) []string { + set := map[string]struct{}{"default": {}} + for _, code := range languagesForCountry(strings.ToUpper(strings.TrimSpace(source.Source.CountryCode))) { + set[code] = struct{}{} + } + lowerFeedURL := strings.ToLower(source.FeedURL + " " + strings.Join(source.FeedURLs, " ")) + switch { + case strings.Contains(lowerFeedURL, "idiomaactual=ca"), strings.Contains(lowerFeedURL, "/_ca/"): + set["ca"] = struct{}{} + case strings.Contains(lowerFeedURL, "idiomaactual=eu"): + set["eu"] = struct{}{} + case strings.Contains(lowerFeedURL, "idiomaactual=gl"): + set["gl"] = struct{}{} + } + out := make([]string, 0, len(set)) + for code := range set { + out = append(out, code) + } + sort.Strings(out) + return out +} + +func languagesForCountry(countryCode string) []string { + switch countryCode { + case "ES", "MX", "AR", "CL", "CO", "CR", "GT", "SV", "HN", "NI", "PA", "PE", "UY", "PY", "VE", "BO", "DO", "CU", "EC": + return []string{"es"} + case "FR", "BE", "LU", "MC", "SN", "CI", "CM", "TN", "DZ", "MA": + return []string{"fr"} + case "DE", "AT": + return []string{"de"} + case "IT", "SM", "VA": + return []string{"it"} + case "PT", "BR", "AO", "MZ", "GW", "CV", "ST", "TL": + return []string{"pt"} + case "NL", "SR": + return []string{"nl"} + case "SE": + return []string{"sv"} + case "NO": + return []string{"no"} + case "DK": + return []string{"da"} + case "FI": + return []string{"fi"} + case "PL": + return []string{"pl"} + case "CZ": + return []string{"cs"} + case "SK": + return []string{"sk"} + case "SI": + return []string{"sl"} + case "HR", "BA", "RS", "ME": + return []string{"hr", "sr"} + case "RO", "MD": + return []string{"ro"} + case "HU": + return []string{"hu"} + case "LT": + return []string{"lt"} + case "LV": + return []string{"lv"} + case "EE": + return []string{"et"} + case "GR", "CY": + return []string{"el"} + case "TR": + return []string{"tr"} + case "UA": + return []string{"uk"} + case "RU", "BY", "KG": + return []string{"ru"} + case "GE": + return []string{"ka"} + case "AM": + return []string{"hy"} + case "IL": + return []string{"he"} + case "SA", "AE", "EG", "JO", "LB", "IQ", "QA", "KW", "OM", "BH": + return []string{"ar"} + case "IR": + return []string{"fa"} + case "IN": + return []string{"hi", "en"} + case "PK": + return []string{"ur", "en"} + case "BD": + return []string{"bn"} + case "LK": + return []string{"si", "ta", "en"} + case "NP": + return []string{"ne"} + case "CN": + return []string{"zh"} + case "TW": + return []string{"zh-hant", "zh"} + case "HK", "MO": + return []string{"zh-hant", "zh", "en"} + case "JP": + return []string{"ja"} + case "KR": + return []string{"ko"} + case "TH": + return []string{"th"} + case "VN": + return []string{"vi"} + case "ID": + return []string{"id"} + case "MY": + return []string{"ms", "en"} + case "PH": + return []string{"fil", "en"} + case "ZA": + return []string{"en", "af"} + case "KE", "UG", "TZ": + return []string{"sw", "en"} + default: + return nil + } +} + +func merge(values []string) []string { + out := make([]string, 0, len(values)) + for _, value := range values { + value = strings.ToLower(strings.TrimSpace(value)) + if value != "" { + out = append(out, value) + } + } + return out +} + +func contains(haystack string, needle string) bool { + needle = strings.ToLower(strings.TrimSpace(needle)) + if needle == "" { + return false + } + return strings.Contains(haystack, needle) +} diff --git a/internal/collector/discover/discover.go b/internal/collector/discover/discover.go new file mode 100644 index 0000000..47e9069 --- /dev/null +++ b/internal/collector/discover/discover.go @@ -0,0 +1,632 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/url" + "os" + "path/filepath" + "strings" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/fetch" + "github.com/scalytics/euosint/internal/collector/model" + "github.com/scalytics/euosint/internal/collector/parse" + "github.com/scalytics/euosint/internal/collector/registry" + "github.com/scalytics/euosint/internal/collector/vet" + "github.com/scalytics/euosint/internal/sourcedb" +) + +// DiscoveredSource represents a newly discovered OSINT feed candidate. +type DiscoveredSource struct { + FeedURL string `json:"feed_url"` + FeedType string `json:"feed_type"` // "rss", "atom", or "html-list" + AuthorityType string `json:"authority_type"` // "cert", "police", "national_security", etc. + Category string `json:"suggested_category"` // suggested category for registry + OrgName string `json:"org_name"` + Country string `json:"country"` + CountryCode string `json:"country_code,omitempty"` + TeamURL string `json:"team_url"` + DiscoveredVia string `json:"discovered_via"` +} + +// Run executes the candidate crawler pipeline: reads the candidate intake JSON, +// probes each candidate for stable feeds or HTML listing pages, skips dead-letter +// entries, deduplicates against the live registry, and writes a discovery report. +func Run(ctx context.Context, cfg config.Config, stdout io.Writer, stderr io.Writer) error { + discoveryCfg := cfg + if discoveryCfg.HTTPTimeoutMS < 45000 { + discoveryCfg.HTTPTimeoutMS = 45000 + } + client := fetch.New(discoveryCfg) + var searchClient *vet.Client + var sourceVetter *vet.Vetter + var browser *fetch.BrowserClient + if cfg.VettingEnabled { + sourceVetter = vet.New(cfg) + } + if cfg.SearchDiscoveryEnabled { + searchClient = vet.NewClient(cfg) + } + if cfg.DDGSearchEnabled && cfg.BrowserEnabled { + b, err := fetch.NewBrowser(cfg.BrowserTimeoutMS) + if err != nil { + fmt.Fprintf(stderr, "WARN DDG search disabled (browser init failed): %v\n", err) + } else { + browser = b + defer browser.Close() + } + } + + // Load existing registry for deduplication and gap analysis. + existing := map[string]struct{}{} + var registrySources []model.RegistrySource + if sources, err := registry.Load(cfg.RegistryPath); err == nil { + registrySources = sources + for _, src := range sources { + if src.FeedURL != "" { + existing[normalizeURL(src.FeedURL)] = struct{}{} + } + for _, u := range src.FeedURLs { + existing[normalizeURL(u)] = struct{}{} + } + } + } + + fmt.Fprintf(stderr, "Starting source discovery (existing registry has %d feed URLs)\n", len(existing)) + + var discovered []DiscoveredSource + dead := loadDeadLetterQueue(cfg.ReplacementQueuePath) + fmt.Fprintf(stderr, "Dead-letter queue: %d sources will be skipped\n", len(dead)) + seededCandidates, err := generateAutonomousCandidates(ctx, cfg, client, browser, searchClient, dead, registrySources, stderr) + if err != nil { + fmt.Fprintf(stderr, "WARN autonomous candidate discovery failed: %v\n", err) + } + candidates := mergeCandidates(loadCandidateQueue(cfg.CandidateQueuePath), seededCandidates, existing, dead) + fmt.Fprintf(stderr, "Candidate queue: %d sources queued for crawl\n", len(candidates)) + remainingCandidates := make([]model.SourceCandidate, 0, len(candidates)) + promotedSources := make([]model.RegistrySource, 0) + + for _, candidate := range candidates { + if ctx.Err() != nil { + break + } + if !passesDiscoveryHygiene(candidate.AuthorityName, firstNonEmpty(candidate.BaseURL, candidate.URL), candidate.AuthorityType) { + remainingCandidates = append(remainingCandidates, candidate) + continue + } + if isDeadLettered(candidate, dead) { + continue + } + + baseURL := candidateBaseURL(candidate) + if baseURL == "" { + remainingCandidates = append(remainingCandidates, candidate) + continue + } + promotedForCandidate := false + results := ProbeFeeds(ctx, client, baseURL) + for _, r := range results { + if _, ok := existing[normalizeURL(r.FeedURL)]; ok { + continue + } + existing[normalizeURL(r.FeedURL)] = struct{}{} + found := DiscoveredSource{ + FeedURL: r.FeedURL, + FeedType: r.FeedType, + AuthorityType: candidate.AuthorityType, + Category: candidate.Category, + OrgName: candidate.AuthorityName, + Country: candidate.Country, + CountryCode: candidate.CountryCode, + TeamURL: baseURL, + DiscoveredVia: "candidate-queue", + } + discovered = append(discovered, found) + if sourceVetter == nil { + continue + } + promoted, verdict, err := vetAndPromote(ctx, cfg, client, sourceVetter, candidate, found) + if err != nil { + fmt.Fprintf(stderr, "WARN source vetting failed for %s: %v\n", found.FeedURL, err) + continue + } + if promoted != nil { + promotedSources = append(promotedSources, *promoted) + promotedForCandidate = true + fmt.Fprintf(stderr, "Promoted source %s via %s (%s)\n", promoted.Source.SourceID, cfg.VettingProvider, verdict.Reason) + } + } + if len(results) == 0 { + target := strings.TrimSpace(candidate.URL) + if target == "" { + target = baseURL + } + if _, ok := existing[normalizeURL(target)]; !ok && probeHTMLPage(ctx, client, target) { + existing[normalizeURL(target)] = struct{}{} + found := DiscoveredSource{ + FeedURL: target, + FeedType: "html-list", + AuthorityType: candidate.AuthorityType, + Category: candidate.Category, + OrgName: candidate.AuthorityName, + Country: candidate.Country, + CountryCode: candidate.CountryCode, + TeamURL: baseURL, + DiscoveredVia: "candidate-queue", + } + discovered = append(discovered, found) + if sourceVetter != nil { + promoted, verdict, err := vetAndPromote(ctx, cfg, client, sourceVetter, candidate, found) + if err != nil { + fmt.Fprintf(stderr, "WARN source vetting failed for %s: %v\n", found.FeedURL, err) + } else if promoted != nil { + promotedSources = append(promotedSources, *promoted) + promotedForCandidate = true + fmt.Fprintf(stderr, "Promoted source %s via %s (%s)\n", promoted.Source.SourceID, cfg.VettingProvider, verdict.Reason) + } + } + } + } + if !promotedForCandidate { + remainingCandidates = append(remainingCandidates, candidate) + } + } + + // Write results. + fmt.Fprintf(stderr, "Discovery finished: %d new candidates\n", len(discovered)) + if err := WriteReport(cfg.DiscoverOutputPath, discovered, len(existing), stdout); err != nil { + return err + } + if err := promoteDiscoveredSources(ctx, cfg.RegistryPath, promotedSources); err != nil { + return err + } + if err := writeCandidateQueue(cfg.CandidateQueuePath, remainingCandidates); err != nil { + return err + } + return nil +} + +func generateAutonomousCandidates(ctx context.Context, cfg config.Config, client *fetch.Client, browser *fetch.BrowserClient, searchClient searchCompleter, dead []model.SourceReplacementCandidate, registrySources []model.RegistrySource, stderr io.Writer) ([]model.SourceCandidate, error) { + candidates := make([]model.SourceCandidate, 0) + var failures []string + var slowSkips []string + + teams, err := FetchFIRSTTeams(ctx, client) + if err != nil { + if isDiscoveryTimeout(err) { + slowSkips = append(slowSkips, "FIRST.org") + } else { + failures = append(failures, fmt.Sprintf("FIRST.org: %v", err)) + } + } else { + fmt.Fprintf(stderr, "FIRST.org: fetched %d teams for candidate seeding\n", len(teams)) + for _, team := range teams { + candidates = append(candidates, model.SourceCandidate{ + URL: team.Website, + AuthorityName: team.ShortName, + AuthorityType: "cert", + Category: "cyber_advisory", + Country: team.Country, + BaseURL: team.Website, + Notes: "autonomous seed: first.org", + }) + } + } + + agencies, err := FetchPoliceAgencies(ctx, cfg, client) + if err != nil { + if isDiscoveryTimeout(err) { + slowSkips = append(slowSkips, "Wikidata police") + } else { + failures = append(failures, fmt.Sprintf("Wikidata police: %v", err)) + } + } else { + fmt.Fprintf(stderr, "Wikidata: fetched %d police/law-enforcement agencies for candidate seeding\n", len(agencies)) + for _, agency := range agencies { + if !passesDiscoveryHygiene(agency.Name, agency.Website, agency.AuthorityType) { + continue + } + candidates = append(candidates, model.SourceCandidate{ + URL: agency.Website, + AuthorityName: agency.Name, + AuthorityType: agency.AuthorityType, + Category: agency.Category, + Country: agency.Country, + CountryCode: agency.CountryCode, + BaseURL: agency.Website, + Notes: "autonomous seed: wikidata-police", + }) + } + } + + humOrgs, err := FetchHumanitarianOrgs(ctx, cfg, client) + if err != nil { + if isDiscoveryTimeout(err) { + slowSkips = append(slowSkips, "Wikidata humanitarian") + } else { + failures = append(failures, fmt.Sprintf("Wikidata humanitarian: %v", err)) + } + } else { + fmt.Fprintf(stderr, "Wikidata: fetched %d humanitarian/emergency orgs for candidate seeding\n", len(humOrgs)) + for _, org := range humOrgs { + if !passesDiscoveryHygiene(org.Name, org.Website, "public_safety_program") { + continue + } + candidates = append(candidates, model.SourceCandidate{ + URL: org.Website, + AuthorityName: org.Name, + AuthorityType: "public_safety_program", + Category: "humanitarian_security", + Country: org.Country, + CountryCode: org.CountryCode, + BaseURL: org.Website, + Notes: "autonomous seed: wikidata-humanitarian", + }) + } + } + + if len(failures) > 0 { + fmt.Fprintf(stderr, "WARN structured discovery partially failed: %s\n", strings.Join(failures, " | ")) + } + if len(slowSkips) > 0 { + fmt.Fprintf(stderr, "Structured discovery skipped slow providers: %s\n", strings.Join(slowSkips, ", ")) + } + // Gap analysis: find countries with missing categories and seed searches. + gapCandidates := AnalyzeGaps(registrySources, stderr) + if len(gapCandidates) > 0 { + candidates = append(candidates, gapCandidates...) + } + + searchSeeds := append([]model.SourceCandidate{}, candidates...) + replacementTargets := buildReplacementSearchTargets(dead) + if len(replacementTargets) > 0 { + fmt.Fprintf(stderr, "Replacement search targets: %d dead-source metadata entries queued for feed search\n", len(replacementTargets)) + searchSeeds = append(searchSeeds, replacementTargets...) + } + + // DDG search is the first citizen — free, no API key needed. + // LLM search is the fallback for targets DDG didn't cover. + ddgCandidates, err := ddgSearchCandidates(ctx, cfg, browser, searchSeeds) + if err != nil { + failures = append(failures, fmt.Sprintf("ddg-search: %v", err)) + } else if len(ddgCandidates) > 0 { + fmt.Fprintf(stderr, "DDG search discovery: found %d candidate URLs\n", len(ddgCandidates)) + candidates = append(candidates, ddgCandidates...) + } + + // LLM search only for remaining seeds that DDG didn't find results for. + if cfg.SearchDiscoveryEnabled && searchClient != nil { + ddgCoveredKeys := map[string]struct{}{} + for _, c := range ddgCandidates { + key := strings.ToLower(c.AuthorityName) + "|" + strings.ToUpper(c.CountryCode) + "|" + strings.ToLower(c.Category) + ddgCoveredKeys[key] = struct{}{} + } + var llmSeeds []model.SourceCandidate + for _, seed := range searchSeeds { + key := strings.ToLower(seed.AuthorityName) + "|" + strings.ToUpper(seed.CountryCode) + "|" + strings.ToLower(seed.Category) + if _, covered := ddgCoveredKeys[key]; !covered { + llmSeeds = append(llmSeeds, seed) + } + } + if len(llmSeeds) > 0 { + llmCandidates, err := llmSearchCandidates(ctx, cfg, searchClient, llmSeeds) + if err != nil { + failures = append(failures, fmt.Sprintf("llm-search: %v", err)) + } else if len(llmCandidates) > 0 { + fmt.Fprintf(stderr, "LLM search discovery (fallback): generated %d candidate URLs via %s\n", len(llmCandidates), cfg.VettingProvider) + candidates = append(candidates, llmCandidates...) + } + } + } + if len(failures) > 0 { + return candidates, fmt.Errorf("%s", strings.Join(failures, " | ")) + } + return candidates, nil +} + +func isDiscoveryTimeout(err error) bool { + if err == nil { + return false + } + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "timeout") || + strings.Contains(msg, "deadline exceeded") || + strings.Contains(msg, "request canceled") +} + +func buildReplacementSearchTargets(dead []model.SourceReplacementCandidate) []model.SourceCandidate { + out := make([]model.SourceCandidate, 0, len(dead)) + seen := map[string]struct{}{} + for _, entry := range dead { + target := model.SourceCandidate{ + AuthorityName: strings.TrimSpace(entry.AuthorityName), + AuthorityType: strings.TrimSpace(entry.AuthorityType), + Category: strings.TrimSpace(entry.Category), + Country: strings.TrimSpace(entry.Country), + CountryCode: strings.ToUpper(strings.TrimSpace(entry.CountryCode)), + Region: strings.TrimSpace(entry.Region), + BaseURL: strings.TrimSpace(entry.BaseURL), + Notes: "replacement-search: dead-source metadata", + } + if target.BaseURL == "" { + target.BaseURL = strings.TrimSpace(entry.FeedURL) + } + if !passesDiscoveryHygiene(target.AuthorityName, firstNonEmpty(target.BaseURL, target.URL), target.AuthorityType) { + continue + } + key := strings.ToLower(target.AuthorityName) + "|" + target.CountryCode + "|" + strings.ToLower(target.Category) + "|" + normalizeURL(firstNonEmpty(target.BaseURL, target.URL)) + if key == "|||" { + continue + } + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + out = append(out, target) + } + return out +} + +func mergeCandidates(existingQueue []model.SourceCandidate, discovered []model.SourceCandidate, active map[string]struct{}, dead []model.SourceReplacementCandidate) []model.SourceCandidate { + out := make([]model.SourceCandidate, 0, len(existingQueue)+len(discovered)) + seen := map[string]struct{}{} + add := func(candidate model.SourceCandidate) { + if isDeadLettered(candidate, dead) { + return + } + key := normalizeURL(firstNonEmpty(candidate.URL, candidate.BaseURL)) + if key == "" { + return + } + if _, ok := active[key]; ok { + return + } + if _, ok := seen[key]; ok { + return + } + seen[key] = struct{}{} + out = append(out, candidate) + } + for _, candidate := range existingQueue { + add(candidate) + } + for _, candidate := range discovered { + add(candidate) + } + return out +} + +func normalizeURL(u string) string { + u = strings.TrimRight(strings.TrimSpace(u), "/") + u = strings.TrimPrefix(u, "https://") + u = strings.TrimPrefix(u, "http://") + return strings.ToLower(u) +} + +func loadDeadLetterQueue(path string) []model.SourceReplacementCandidate { + raw, err := os.ReadFile(path) + if err != nil { + return nil + } + var doc model.SourceReplacementDocument + if err := json.Unmarshal(raw, &doc); err != nil { + return nil + } + return doc.Sources +} + +func loadCandidateQueue(path string) []model.SourceCandidate { + raw, err := os.ReadFile(path) + if err != nil { + return nil + } + var doc model.SourceCandidateDocument + if err := json.Unmarshal(raw, &doc); err != nil { + return nil + } + return doc.Sources +} + +func writeCandidateQueue(path string, candidates []model.SourceCandidate) error { + doc := model.SourceCandidateDocument{ + GeneratedAt: "", + Sources: candidates, + } + if doc.Sources == nil { + doc.Sources = []model.SourceCandidate{} + } + return writeJSON(path, doc) +} + +func candidateBaseURL(candidate model.SourceCandidate) string { + for _, value := range []string{candidate.BaseURL, candidate.URL} { + value = strings.TrimSpace(value) + if value == "" { + continue + } + parsed, err := url.Parse(value) + if err != nil { + continue + } + if parsed.Scheme == "" || parsed.Host == "" { + continue + } + return (&url.URL{Scheme: parsed.Scheme, Host: parsed.Host}).String() + } + return "" +} + +func isDeadLettered(candidate model.SourceCandidate, dead []model.SourceReplacementCandidate) bool { + candidateURLs := compactNormalizedURLs(candidate.URL, candidate.BaseURL) + for _, entry := range dead { + for _, deadURL := range compactNormalizedURLs(entry.FeedURL, entry.BaseURL) { + for _, candidateURL := range candidateURLs { + if candidateURL == deadURL { + return true + } + } + } + } + return false +} + +func compactNormalizedURLs(values ...string) []string { + out := make([]string, 0, len(values)) + seen := map[string]struct{}{} + for _, value := range values { + norm := normalizeURL(value) + if norm == "" { + continue + } + if _, ok := seen[norm]; ok { + continue + } + seen[norm] = struct{}{} + out = append(out, norm) + } + return out +} + +func vetAndPromote(ctx context.Context, cfg config.Config, client *fetch.Client, sourceVetter *vet.Vetter, candidate model.SourceCandidate, discovered DiscoveredSource) (*model.RegistrySource, vet.Verdict, error) { + samples, err := sampleSource(ctx, client, discovered, cfg.VettingMaxSampleItems) + if err != nil { + return nil, vet.Verdict{}, err + } + verdict, err := sourceVetter.Evaluate(ctx, vet.Input{ + AuthorityName: candidate.AuthorityName, + AuthorityType: candidate.AuthorityType, + Category: candidate.Category, + Country: candidate.Country, + CountryCode: candidate.CountryCode, + URL: discovered.FeedURL, + BaseURL: candidateBaseURL(candidate), + FeedType: discovered.FeedType, + Samples: samples, + }) + if err != nil { + return nil, vet.Verdict{}, err + } + if !verdict.Approve || verdict.PromotionStatus == "rejected" { + return nil, verdict, nil + } + + src := model.RegistrySource{ + Type: discoveredTypeToRegistryType(discovered.FeedType), + FeedURL: discovered.FeedURL, + Category: firstNonEmpty(candidate.Category, discovered.Category), + RegionTag: strings.ToUpper(strings.TrimSpace(candidate.CountryCode)), + SourceQuality: float64(verdict.SourceQuality), + PromotionStatus: verdict.PromotionStatus, + Reporting: model.ReportingMetadata{}, + Source: model.SourceMetadata{ + SourceID: sourceIDForCandidate(candidate, discovered), + AuthorityName: firstNonEmpty(candidate.AuthorityName, discovered.OrgName), + Country: firstNonEmpty(candidate.Country, discovered.Country), + CountryCode: strings.ToUpper(firstNonEmpty(candidate.CountryCode, discovered.CountryCode)), + Region: firstNonEmpty(candidate.Region, "International"), + AuthorityType: firstNonEmpty(candidate.AuthorityType, discovered.AuthorityType, "public_safety_program"), + BaseURL: candidateBaseURL(candidate), + Scope: verdict.Level, + Level: verdict.Level, + MissionTags: verdict.MissionTags, + OperationalRelevance: float64(verdict.OperationalRelevance), + LanguageCode: "", + }, + } + if src.Source.BaseURL == "" { + src.Source.BaseURL = discovered.TeamURL + } + return &src, verdict, nil +} + +func promoteDiscoveredSources(ctx context.Context, registryPath string, sources []model.RegistrySource) error { + if len(sources) == 0 || !isSQLitePath(registryPath) { + return nil + } + db, err := sourcedb.Open(registryPath) + if err != nil { + return fmt.Errorf("open source DB for promoted sources: %w", err) + } + defer db.Close() + if err := db.UpsertRegistrySources(ctx, sources); err != nil { + return fmt.Errorf("promote discovered sources: %w", err) + } + return nil +} + +func sampleSource(ctx context.Context, client *fetch.Client, discovered DiscoveredSource, limit int) ([]vet.Sample, error) { + accept := "application/rss+xml, application/atom+xml, application/xml, text/xml;q=0.9, */*;q=0.8" + if discovered.FeedType == "html-list" { + accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + } + body, err := client.Text(ctx, discovered.FeedURL, true, accept) + if err != nil { + return nil, fmt.Errorf("sample source fetch %s: %w", discovered.FeedURL, err) + } + var items []parse.FeedItem + if discovered.FeedType == "html-list" { + items = parse.ParseHTMLAnchors(string(body), discovered.FeedURL) + } else { + items = parse.ParseFeed(string(body)) + } + return vet.SamplesFromFeedItems(items, limit), nil +} + +func discoveredTypeToRegistryType(feedType string) string { + switch strings.TrimSpace(feedType) { + case "html-list": + return "html-list" + default: + return "rss" + } +} + +func sourceIDForCandidate(candidate model.SourceCandidate, discovered DiscoveredSource) string { + base := firstNonEmpty(candidate.AuthorityName, discovered.OrgName, candidate.URL, discovered.FeedURL) + base = strings.ToLower(base) + replacer := strings.NewReplacer("https://", "", "http://", "", ".", "-", "/", "-", " ", "-", "_", "-", ":", "-", "&", "and") + base = replacer.Replace(base) + base = strings.Trim(base, "-") + for strings.Contains(base, "--") { + base = strings.ReplaceAll(base, "--", "-") + } + if base == "" { + return "candidate-source" + } + return base +} + +// writeJSON is a helper that marshals data to indented JSON and writes to a file. +func writeJSON(path string, data any) error { + b, err := json.MarshalIndent(data, "", " ") + if err != nil { + return err + } + b = append(b, '\n') + return os.WriteFile(path, b, 0o644) +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + value = strings.TrimSpace(value) + if value != "" { + return value + } + } + return "" +} + +func isSQLitePath(path string) bool { + switch strings.ToLower(filepath.Ext(strings.TrimSpace(path))) { + case ".db", ".sqlite", ".sqlite3": + return true + default: + return false + } +} diff --git a/internal/collector/discover/discover_test.go b/internal/collector/discover/discover_test.go new file mode 100644 index 0000000..d50a049 --- /dev/null +++ b/internal/collector/discover/discover_test.go @@ -0,0 +1,295 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "context" + "encoding/json" + "errors" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/model" + "github.com/scalytics/euosint/internal/collector/vet" +) + +func TestDetectFeedTypeRSS(t *testing.T) { + body := `Test` + if got := detectFeedType(body); got != "rss" { + t.Errorf("expected rss, got %q", got) + } +} + +func TestDetectFeedTypeAtom(t *testing.T) { + body := `Test` + if got := detectFeedType(body); got != "atom" { + t.Errorf("expected atom, got %q", got) + } +} + +func TestDetectFeedTypeHTML(t *testing.T) { + body := `Press Releases` + if got := detectFeedType(body); got != "" { + t.Errorf("expected empty, got %q", got) + } +} + +func TestNormalizeURL(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"https://www.example.com/feed/", "www.example.com/feed"}, + {"http://Example.COM/RSS", "example.com/rss"}, + {" https://foo.bar/ ", "foo.bar"}, + } + for _, tt := range tests { + got := normalizeURL(tt.input) + if got != tt.want { + t.Errorf("normalizeURL(%q) = %q, want %q", tt.input, got, tt.want) + } + } +} + +func TestCommonPressReleasePathsNotEmpty(t *testing.T) { + if len(commonPressReleasePaths) == 0 { + t.Fatal("expected non-empty press release paths") + } + for _, p := range commonPressReleasePaths { + if !strings.HasPrefix(p, "/") { + t.Errorf("press release path should start with /: %q", p) + } + } +} + +func TestPoliceAgencyQueryNotEmpty(t *testing.T) { + query := buildPoliceAgencyQuery(policeAgencyTypeIDs[0]) + if strings.TrimSpace(query) == "" { + t.Fatal("SPARQL query should not be empty") + } + // Basic sanity — query must select the fields we parse. + for _, field := range []string{"website", "countryCode"} { + if !strings.Contains(query, field) { + t.Errorf("SPARQL query missing field %q", field) + } + } +} + +func TestDiscoveryHygieneRejectsLocalPolice(t *testing.T) { + if passesDiscoveryHygiene("City of Valletta Police Department", "https://city.police.example", "police") { + t.Fatal("expected local police source to fail hygiene gate") + } + if !passesDiscoveryHygiene("Europol", "https://www.europol.europa.eu", "police") { + t.Fatal("expected supranational source to pass hygiene gate") + } +} + +func TestDiscoveryHygieneDoesNotTreatTransportAsSport(t *testing.T) { + if !passesDiscoveryHygiene("Ministry of Transport", "https://transport.gov.example", "national_security") { + t.Fatal("expected transport ministry to avoid sport false-positive") + } +} + +func TestSearchTopicLabelIncludesNewCategories(t *testing.T) { + if got := searchTopicLabel("maritime_security", "national_security"); !strings.Contains(got, "maritime security") { + t.Fatalf("expected maritime topic label, got %q", got) + } + if got := searchTopicLabel("legislative", "regulatory"); !strings.Contains(got, "sanctions") { + t.Fatalf("expected legislative topic label, got %q", got) + } +} + +func TestLoadCandidateQueueAndDeadLetterSkip(t *testing.T) { + dir := t.TempDir() + candidatePath := filepath.Join(dir, "candidates.json") + deadPath := filepath.Join(dir, "dead.json") + if err := os.WriteFile(candidatePath, []byte(`{"sources":[{"url":"https://example.test/news","authority_name":"Example Agency","authority_type":"police","category":"public_appeal","country":"France","country_code":"FR"}]}`), 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(deadPath, []byte(`{"sources":[{"feed_url":"https://example.test/news"}]}`), 0o644); err != nil { + t.Fatal(err) + } + candidates := loadCandidateQueue(candidatePath) + if len(candidates) != 1 { + t.Fatalf("expected 1 candidate, got %d", len(candidates)) + } + dead := loadDeadLetterQueue(deadPath) + if !isDeadLettered(candidates[0], dead) { + t.Fatal("expected candidate to be skipped when present in dead-letter queue") + } + if isDeadLettered(model.SourceCandidate{URL: "https://other.test/feed"}, dead) { + t.Fatal("unexpected dead-letter match for unrelated candidate") + } +} + +func TestMergeCandidatesSkipsDeadAndActive(t *testing.T) { + merged := mergeCandidates( + []model.SourceCandidate{ + {URL: "https://existing-queue.test/feed", AuthorityName: "Queued"}, + }, + []model.SourceCandidate{ + {URL: "https://active.test/feed", AuthorityName: "Active"}, + {URL: "https://dead.test/feed", AuthorityName: "Dead"}, + {URL: "https://new.test/feed", AuthorityName: "New"}, + }, + map[string]struct{}{ + normalizeURL("https://active.test/feed"): {}, + }, + []model.SourceReplacementCandidate{ + {FeedURL: "https://dead.test/feed"}, + }, + ) + if len(merged) != 2 { + t.Fatalf("expected 2 merged candidates, got %d", len(merged)) + } + if normalizeURL(merged[0].URL) != normalizeURL("https://existing-queue.test/feed") { + t.Fatalf("unexpected first merged candidate %#v", merged[0]) + } + if normalizeURL(merged[1].URL) != normalizeURL("https://new.test/feed") { + t.Fatalf("unexpected second merged candidate %#v", merged[1]) + } +} + +type stubSearchCompleter struct { + content string + err error +} + +func (s stubSearchCompleter) Complete(_ context.Context, _ []vet.Message) (string, error) { + return s.content, s.err +} + +func TestDecodeLLMSearchResponse(t *testing.T) { + resp, err := decodeLLMSearchResponse("```json\n{\"urls\":[{\"url\":\"https://www.europol.europa.eu/cms/api/rss/news\",\"reason\":\"official rss\"}]}\n```") + if err != nil { + t.Fatalf("decodeLLMSearchResponse returned error: %v", err) + } + if len(resp.URLs) != 1 || resp.URLs[0].URL != "https://www.europol.europa.eu/cms/api/rss/news" { + t.Fatalf("unexpected decoded search response: %#v", resp) + } +} + +func TestSelectSearchTargetsHonorsCap(t *testing.T) { + cfg := config.Default() + cfg.SearchDiscoveryMaxTargets = 2 + targets := selectSearchTargets(cfg, []model.SourceCandidate{ + {AuthorityName: "Europol", URL: "https://www.europol.europa.eu", AuthorityType: "police", Category: "public_appeal"}, + {AuthorityName: "Interpol", URL: "https://www.interpol.int", AuthorityType: "police", Category: "wanted_suspect"}, + {AuthorityName: "FIRST", URL: "https://www.first.org", AuthorityType: "cert", Category: "cyber_advisory"}, + }) + if len(targets) != 2 { + t.Fatalf("expected 2 search targets, got %d", len(targets)) + } +} + +func TestLLMSearchCandidatesReturnsTokenSafeCandidates(t *testing.T) { + cfg := config.Default() + cfg.SearchDiscoveryEnabled = true + cfg.SearchDiscoveryMaxTargets = 1 + cfg.SearchDiscoveryMaxURLsPerTarget = 2 + cfg.VettingProvider = "xai" + + got, err := llmSearchCandidates(context.Background(), cfg, stubSearchCompleter{ + content: `{"urls":[{"url":"https://www.europol.europa.eu/cms/api/rss/news","reason":"official rss"},{"url":"https://www.europol.europa.eu/feed.xml","reason":"official atom"},{"url":"https://www.europol.europa.eu/newsroom","reason":"ignore non-feed"}]}`, + }, []model.SourceCandidate{ + {AuthorityName: "Europol", URL: "https://www.europol.europa.eu", AuthorityType: "police", Category: "public_appeal", Country: "Netherlands", CountryCode: "NL"}, + }) + if err != nil { + t.Fatalf("llmSearchCandidates returned error: %v", err) + } + if len(got) != 2 { + t.Fatalf("expected 2 llm-search candidates, got %d", len(got)) + } + if !strings.HasPrefix(got[0].Notes, "llm-search:xai") { + t.Fatalf("expected llm-search note, got %q", got[0].Notes) + } + if got[0].AuthorityName != "Europol" || got[0].CountryCode != "NL" { + t.Fatalf("expected metadata to be preserved, got %#v", got[0]) + } +} + +func TestFirstWebsiteFieldAcceptsStringOrArray(t *testing.T) { + var got struct { + Website firstWebsiteField `json:"website"` + } + if err := json.Unmarshal([]byte(`{"website":"https://example.test"}`), &got); err != nil { + t.Fatalf("unmarshal string website: %v", err) + } + if string(got.Website) != "https://example.test" { + t.Fatalf("unexpected string website %q", got.Website) + } + if err := json.Unmarshal([]byte(`{"website":["","https://array.example/feed"]}`), &got); err != nil { + t.Fatalf("unmarshal array website: %v", err) + } + if string(got.Website) != "https://array.example/feed" { + t.Fatalf("unexpected array website %q", got.Website) + } +} + +func TestBuildReplacementSearchTargetsUsesMetadataNotDeadURL(t *testing.T) { + targets := buildReplacementSearchTargets([]model.SourceReplacementCandidate{ + { + SourceID: "bka", + AuthorityName: "Bundeskriminalamt", + FeedURL: "https://dead.example/rss", + BaseURL: "https://www.bka.de", + Country: "Germany", + CountryCode: "DE", + Region: "Europe", + AuthorityType: "police", + Category: "wanted_suspect", + }, + }) + if len(targets) != 1 { + t.Fatalf("expected 1 replacement search target, got %d", len(targets)) + } + if targets[0].BaseURL != "https://www.bka.de" { + t.Fatalf("expected base URL to be used for replacement search, got %#v", targets[0]) + } + if targets[0].URL != "" { + t.Fatalf("expected dead feed URL not to be reintroduced as direct candidate, got %#v", targets[0]) + } + if !strings.HasPrefix(targets[0].Notes, "replacement-search:") { + t.Fatalf("expected replacement-search note, got %#v", targets[0]) + } +} + +func TestIsDiscoveryTimeout(t *testing.T) { + tests := []struct { + name string + err error + want bool + }{ + {name: "deadline", err: context.DeadlineExceeded, want: true}, + {name: "timeout text", err: errors.New("fetch failed: context deadline exceeded"), want: true}, + {name: "request canceled", err: errors.New("request canceled while awaiting headers"), want: true}, + {name: "parse failure", err: errors.New("json parse error"), want: false}, + } + for _, tt := range tests { + if got := isDiscoveryTimeout(tt.err); got != tt.want { + t.Fatalf("%s: got %v want %v", tt.name, got, tt.want) + } + } +} + +func TestWikidataCacheRoundTrip(t *testing.T) { + cfg := config.Default() + cfg.WikidataCachePath = t.TempDir() + cfg.WikidataCacheTTLHours = 24 + + url := "https://query.wikidata.org/sparql?format=json&query=test" + want := []byte(`{"results":{"bindings":[]}}`) + writeWikidataCache(cfg, url, want) + + got, ok := readWikidataCache(cfg, url) + if !ok { + t.Fatal("expected cached wikidata response to be readable") + } + if string(got) != string(want) { + t.Fatalf("unexpected cache body %q", string(got)) + } +} diff --git a/internal/collector/discover/first.go b/internal/collector/discover/first.go new file mode 100644 index 0000000..c274dea --- /dev/null +++ b/internal/collector/discover/first.go @@ -0,0 +1,106 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + "github.com/scalytics/euosint/internal/collector/fetch" +) + +const firstAPIBase = "https://api.first.org/data/v1/teams" +const firstPageLimit = 100 + +// FIRSTTeam represents a CSIRT team from the FIRST.org API. +type FIRSTTeam struct { + ShortName string `json:"short_name"` + Country string `json:"country"` + Website string `json:"website"` +} + +type firstWebsiteField string + +func (f *firstWebsiteField) UnmarshalJSON(data []byte) error { + data = []byte(strings.TrimSpace(string(data))) + if string(data) == "null" || len(data) == 0 { + *f = "" + return nil + } + var single string + if err := json.Unmarshal(data, &single); err == nil { + *f = firstWebsiteField(strings.TrimSpace(single)) + return nil + } + var many []string + if err := json.Unmarshal(data, &many); err == nil { + for _, entry := range many { + entry = strings.TrimSpace(entry) + if entry != "" { + *f = firstWebsiteField(entry) + return nil + } + } + *f = "" + return nil + } + return fmt.Errorf("unsupported FIRST website field: %s", string(data)) +} + +// FetchFIRSTTeams fetches all CSIRT teams from the FIRST.org API, +// paginating through all results. Returns teams that have a non-empty +// website URL. +func FetchFIRSTTeams(ctx context.Context, client *fetch.Client) ([]FIRSTTeam, error) { + var allTeams []FIRSTTeam + offset := 0 + for { + if ctx.Err() != nil { + return allTeams, ctx.Err() + } + url := fmt.Sprintf("%s?limit=%d&offset=%d", firstAPIBase, firstPageLimit, offset) + body, err := fetchTextWithRetry(ctx, client, url, "application/json") + if err != nil { + return allTeams, fmt.Errorf("FIRST.org API page offset=%d: %w", offset, err) + } + + var resp struct { + Data []struct { + ShortName string `json:"short_name"` + Country string `json:"country"` + Website firstWebsiteField `json:"website"` + Host string `json:"host"` + } `json:"data"` + Total int `json:"total"` + } + if err := json.Unmarshal(body, &resp); err != nil { + return allTeams, fmt.Errorf("FIRST.org API parse: %w", err) + } + + for _, team := range resp.Data { + website := strings.TrimSpace(string(team.Website)) + if website == "" { + website = strings.TrimSpace(team.Host) + } + if website == "" { + continue + } + if !strings.HasPrefix(website, "http") { + website = "https://" + website + } + allTeams = append(allTeams, FIRSTTeam{ + ShortName: team.ShortName, + Country: team.Country, + Website: strings.TrimRight(website, "/"), + }) + } + + offset += firstPageLimit + if len(resp.Data) < firstPageLimit || offset >= resp.Total { + break + } + } + return allTeams, nil +} diff --git a/internal/collector/discover/gap.go b/internal/collector/discover/gap.go new file mode 100644 index 0000000..a27172f --- /dev/null +++ b/internal/collector/discover/gap.go @@ -0,0 +1,328 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "fmt" + "io" + "strings" + + "github.com/scalytics/euosint/internal/collector/model" +) + +// gapTarget defines a country and the minimum categories it should have. +type gapTarget struct { + Country string + CountryCode string + Categories []string +} + +// coreCategories is the minimum set of feed categories every country should have. +var coreCategories = []string{ + "cyber_advisory", + "public_appeal", + "legislative", + "conflict_monitoring", + "maritime_security", +} + +// expandedCategories adds intel, travel, and fraud for countries where these +// are commonly published by government agencies. +var expandedCategories = []string{ + "cyber_advisory", + "public_appeal", + "legislative", + "conflict_monitoring", + "maritime_security", + "travel_warning", + "intelligence_report", + "fraud_alert", +} + +// targetCountries lists all countries the system should cover with at least +// core categories. European/Nordic countries use the expanded set. +var targetCountries = []gapTarget{ + // ─── Nordics ──────────────────────────────────────────────── + {"Norway", "NO", expandedCategories}, + {"Sweden", "SE", expandedCategories}, + {"Finland", "FI", expandedCategories}, + {"Denmark", "DK", expandedCategories}, + {"Iceland", "IS", coreCategories}, + + // ─── Western Europe ───────────────────────────────────────── + {"United Kingdom", "GB", expandedCategories}, + {"France", "FR", expandedCategories}, + {"Germany", "DE", expandedCategories}, + {"Netherlands", "NL", expandedCategories}, + {"Belgium", "BE", expandedCategories}, + {"Luxembourg", "LU", coreCategories}, + {"Ireland", "IE", expandedCategories}, + {"Switzerland", "CH", expandedCategories}, + {"Austria", "AT", expandedCategories}, + + // ─── Southern Europe ──────────────────────────────────────── + {"Spain", "ES", expandedCategories}, + {"Portugal", "PT", expandedCategories}, + {"Italy", "IT", expandedCategories}, + {"Greece", "GR", expandedCategories}, + {"Malta", "MT", coreCategories}, + {"Cyprus", "CY", coreCategories}, + + // ─── Eastern Europe ───────────────────────────────────────── + {"Poland", "PL", expandedCategories}, + {"Czech Republic", "CZ", expandedCategories}, + {"Slovakia", "SK", coreCategories}, + {"Hungary", "HU", coreCategories}, + {"Romania", "RO", expandedCategories}, + {"Bulgaria", "BG", coreCategories}, + {"Croatia", "HR", coreCategories}, + {"Slovenia", "SI", coreCategories}, + {"Serbia", "RS", coreCategories}, + {"Bosnia", "BA", coreCategories}, + {"Montenegro", "ME", coreCategories}, + {"North Macedonia", "MK", coreCategories}, + {"Albania", "AL", coreCategories}, + {"Kosovo", "XK", coreCategories}, + {"Moldova", "MD", coreCategories}, + + // ─── Baltics ──────────────────────────────────────────────── + {"Estonia", "EE", expandedCategories}, + {"Latvia", "LV", expandedCategories}, + {"Lithuania", "LT", expandedCategories}, + + // ─── East ─────────────────────────────────────────────────── + {"Ukraine", "UA", expandedCategories}, + {"Georgia", "GE", coreCategories}, + {"Turkey", "TR", expandedCategories}, + + // ─── Americas ─────────────────────────────────────────────── + {"United States", "US", expandedCategories}, + {"Canada", "CA", expandedCategories}, + {"Mexico", "MX", expandedCategories}, + {"Brazil", "BR", expandedCategories}, + {"Argentina", "AR", expandedCategories}, + {"Colombia", "CO", coreCategories}, + {"Chile", "CL", coreCategories}, + {"Peru", "PE", coreCategories}, + {"Ecuador", "EC", coreCategories}, + {"Venezuela", "VE", coreCategories}, + {"Cuba", "CU", coreCategories}, + {"Panama", "PA", coreCategories}, + {"Costa Rica", "CR", coreCategories}, + {"Guatemala", "GT", coreCategories}, + {"Honduras", "HN", coreCategories}, + {"El Salvador", "SV", coreCategories}, + {"Nicaragua", "NI", coreCategories}, + {"Dominican Republic", "DO", coreCategories}, + {"Jamaica", "JM", coreCategories}, + {"Haiti", "HT", coreCategories}, + {"Paraguay", "PY", coreCategories}, + {"Uruguay", "UY", coreCategories}, + {"Bolivia", "BO", coreCategories}, + + // ─── Asia-Pacific ─────────────────────────────────────────── + {"Japan", "JP", expandedCategories}, + {"South Korea", "KR", expandedCategories}, + {"China", "CN", expandedCategories}, + {"India", "IN", expandedCategories}, + {"Australia", "AU", expandedCategories}, + {"New Zealand", "NZ", coreCategories}, + {"Indonesia", "ID", expandedCategories}, + {"Malaysia", "MY", coreCategories}, + {"Singapore", "SG", expandedCategories}, + {"Thailand", "TH", coreCategories}, + {"Philippines", "PH", coreCategories}, + {"Vietnam", "VN", coreCategories}, + {"Taiwan", "TW", expandedCategories}, + {"Pakistan", "PK", coreCategories}, + {"Bangladesh", "BD", coreCategories}, + {"Sri Lanka", "LK", coreCategories}, + {"Nepal", "NP", coreCategories}, + {"Mongolia", "MN", coreCategories}, + {"Cambodia", "KH", coreCategories}, + {"Myanmar", "MM", coreCategories}, + {"Laos", "LA", coreCategories}, + + // ─── Middle East ──────────────────────────────────────────── + {"Israel", "IL", expandedCategories}, + {"Saudi Arabia", "SA", expandedCategories}, + {"United Arab Emirates", "AE", expandedCategories}, + {"Qatar", "QA", coreCategories}, + {"Kuwait", "KW", coreCategories}, + {"Bahrain", "BH", coreCategories}, + {"Oman", "OM", coreCategories}, + {"Jordan", "JO", coreCategories}, + {"Lebanon", "LB", coreCategories}, + {"Iraq", "IQ", coreCategories}, + {"Iran", "IR", expandedCategories}, + {"Syria", "SY", coreCategories}, + {"Yemen", "YE", coreCategories}, + {"Palestine", "PS", coreCategories}, + + // ─── Africa ───────────────────────────────────────────────── + {"South Africa", "ZA", expandedCategories}, + {"Nigeria", "NG", expandedCategories}, + {"Kenya", "KE", coreCategories}, + {"Egypt", "EG", expandedCategories}, + {"Morocco", "MA", coreCategories}, + {"Algeria", "DZ", coreCategories}, + {"Tunisia", "TN", coreCategories}, + {"Libya", "LY", coreCategories}, + {"Ethiopia", "ET", coreCategories}, + {"Ghana", "GH", coreCategories}, + {"Tanzania", "TZ", coreCategories}, + {"Uganda", "UG", coreCategories}, + {"Rwanda", "RW", coreCategories}, + {"Senegal", "SN", coreCategories}, + {"Ivory Coast", "CI", coreCategories}, + {"Cameroon", "CM", coreCategories}, + {"Congo", "CD", coreCategories}, + {"Mozambique", "MZ", coreCategories}, + {"Zimbabwe", "ZW", coreCategories}, + {"Zambia", "ZM", coreCategories}, + {"Mali", "ML", coreCategories}, + {"Burkina Faso", "BF", coreCategories}, + {"Niger", "NE", coreCategories}, + {"Chad", "TD", coreCategories}, + {"Sudan", "SD", coreCategories}, + {"South Sudan", "SS", coreCategories}, + {"Somalia", "SO", coreCategories}, + {"Eritrea", "ER", coreCategories}, + {"Madagascar", "MG", coreCategories}, + {"Angola", "AO", coreCategories}, + {"Namibia", "NA", coreCategories}, + {"Botswana", "BW", coreCategories}, + + // ─── Central Asia ─────────────────────────────────────────── + {"Kazakhstan", "KZ", coreCategories}, + {"Uzbekistan", "UZ", coreCategories}, + {"Kyrgyzstan", "KG", coreCategories}, + {"Tajikistan", "TJ", coreCategories}, + {"Turkmenistan", "TM", coreCategories}, + + // ─── Caucasus ─────────────────────────────────────────────── + {"Armenia", "AM", coreCategories}, + {"Azerbaijan", "AZ", coreCategories}, + + // ─── Russia / Belarus ─────────────────────────────────────── + {"Russia", "RU", expandedCategories}, + {"Belarus", "BY", coreCategories}, +} + +// gapCandidate is a synthetic search candidate generated by gap analysis. +type gapCandidate struct { + Country string + CountryCode string + Category string +} + +// AnalyzeGaps compares the active registry against target countries and +// returns synthetic candidates for missing country+category combinations. +func AnalyzeGaps(sources []model.RegistrySource, stderr io.Writer) []model.SourceCandidate { + // Build coverage map: country_code → set of categories with active feeds. + coverage := map[string]map[string]bool{} + for _, src := range sources { + cc := strings.ToUpper(src.Source.CountryCode) + if cc == "" || cc == "INT" { + continue + } + if coverage[cc] == nil { + coverage[cc] = map[string]bool{} + } + coverage[cc][src.Category] = true + } + + var gaps []gapCandidate + for _, target := range targetCountries { + covered := coverage[target.CountryCode] + for _, cat := range target.Categories { + if covered != nil && covered[cat] { + continue + } + gaps = append(gaps, gapCandidate{ + Country: target.Country, + CountryCode: target.CountryCode, + Category: cat, + }) + } + } + + if len(gaps) == 0 { + fmt.Fprintf(stderr, "Gap analysis: full coverage for all %d target countries\n", len(targetCountries)) + return nil + } + + fmt.Fprintf(stderr, "Gap analysis: found %d missing country+category combinations across %d target countries\n", len(gaps), countUniqueCountries(gaps)) + + // Convert gaps to search candidates with descriptive names. + candidates := make([]model.SourceCandidate, 0, len(gaps)) + for _, gap := range gaps { + candidates = append(candidates, model.SourceCandidate{ + AuthorityName: gapAuthorityLabel(gap.Country, gap.Category), + AuthorityType: gapAuthorityType(gap.Category), + Category: gap.Category, + Country: gap.Country, + CountryCode: gap.CountryCode, + Notes: "autonomous seed: gap-analysis", + }) + } + return candidates +} + +func countUniqueCountries(gaps []gapCandidate) int { + seen := map[string]bool{} + for _, g := range gaps { + seen[g.CountryCode] = true + } + return len(seen) +} + +// gapAuthorityLabel generates a descriptive search label for a gap. +func gapAuthorityLabel(country string, category string) string { + switch category { + case "cyber_advisory": + return country + " national CERT or CSIRT" + case "public_appeal": + return country + " national police" + case "travel_warning": + return country + " ministry of foreign affairs" + case "intelligence_report": + return country + " intelligence or security service" + case "fraud_alert": + return country + " financial regulator or central bank" + case "wanted_suspect": + return country + " wanted persons" + case "legislative": + return country + " parliament or foreign affairs ministry" + case "conflict_monitoring": + return country + " defense ministry or armed forces" + case "maritime_security": + return country + " coast guard or maritime authority" + default: + return country + " " + strings.ReplaceAll(category, "_", " ") + } +} + +func gapAuthorityType(category string) string { + switch category { + case "cyber_advisory": + return "cert" + case "public_appeal", "wanted_suspect": + return "police" + case "travel_warning": + return "government" + case "intelligence_report": + return "national_security" + case "fraud_alert": + return "regulatory" + case "legislative": + return "regulatory" + case "conflict_monitoring": + return "national_security" + case "maritime_security": + return "national_security" + default: + return "government" + } +} diff --git a/internal/collector/discover/gap_test.go b/internal/collector/discover/gap_test.go new file mode 100644 index 0000000..5e77093 --- /dev/null +++ b/internal/collector/discover/gap_test.go @@ -0,0 +1,68 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "bytes" + "testing" + + "github.com/scalytics/euosint/internal/collector/model" +) + +func TestAnalyzeGaps_FindsMissing(t *testing.T) { + // A registry covering all expanded categories for Germany — everything else is a gap. + var sources []model.RegistrySource + for _, cat := range expandedCategories { + sources = append(sources, model.RegistrySource{ + Category: cat, + Source: model.SourceMetadata{CountryCode: "DE"}, + }) + } + + var buf bytes.Buffer + gaps := AnalyzeGaps(sources, &buf) + + if len(gaps) == 0 { + t.Fatal("expected gap candidates, got none") + } + + // Norway should be missing all categories. + norwayCats := map[string]bool{} + for _, g := range gaps { + if g.CountryCode == "NO" { + norwayCats[g.Category] = true + } + } + for _, cat := range expandedCategories { + if !norwayCats[cat] { + t.Errorf("expected Norway gap for %s", cat) + } + } + + // Germany should NOT appear (fully covered). + for _, g := range gaps { + if g.CountryCode == "DE" { + t.Errorf("unexpected gap for Germany: %s", g.Category) + } + } +} + +func TestAnalyzeGaps_FullCoverage(t *testing.T) { + // Build a registry that covers every target country+category. + var sources []model.RegistrySource + for _, target := range targetCountries { + for _, cat := range target.Categories { + sources = append(sources, model.RegistrySource{ + Category: cat, + Source: model.SourceMetadata{CountryCode: target.CountryCode}, + }) + } + } + + var buf bytes.Buffer + gaps := AnalyzeGaps(sources, &buf) + if len(gaps) != 0 { + t.Errorf("expected 0 gaps with full coverage, got %d", len(gaps)) + } +} diff --git a/internal/collector/discover/http.go b/internal/collector/discover/http.go new file mode 100644 index 0000000..c929462 --- /dev/null +++ b/internal/collector/discover/http.go @@ -0,0 +1,122 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "os" + "path/filepath" + "strings" + "time" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/fetch" +) + +func fetchTextWithRetry(ctx context.Context, client *fetch.Client, url string, accept string) ([]byte, error) { + var lastErr error + for attempt := 0; attempt < 2; attempt++ { + attemptCtx := ctx + cancel := func() {} + if _, ok := ctx.Deadline(); !ok { + attemptCtx, cancel = context.WithTimeout(ctx, 45*time.Second) + } + body, err := client.Text(attemptCtx, url, true, accept) + cancel() + if err == nil { + return body, nil + } + lastErr = err + if !looksTransient(err) || attempt == 1 { + break + } + } + return nil, lastErr +} + +func looksTransient(err error) bool { + if err == nil { + return false + } + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "timeout") || + strings.Contains(msg, "deadline exceeded") || + strings.Contains(msg, "request canceled") || + strings.Contains(msg, " eof") || + strings.Contains(msg, ": eof") +} + +func fetchWikidataTextWithCache(ctx context.Context, cfg config.Config, client *fetch.Client, url string, accept string) ([]byte, error) { + if body, ok := readWikidataCache(cfg, url); ok { + return body, nil + } + headers := map[string]string{ + "User-Agent": cfg.WikimediaUserAgent, + "Api-User-Agent": cfg.WikimediaUserAgent, + } + var lastErr error + for attempt := 0; attempt < 2; attempt++ { + attemptCtx := ctx + cancel := func() {} + if _, ok := ctx.Deadline(); !ok { + attemptCtx, cancel = context.WithTimeout(ctx, 45*time.Second) + } + body, err := client.TextWithHeaders(attemptCtx, url, true, accept, headers) + cancel() + if err == nil { + writeWikidataCache(cfg, url, body) + return body, nil + } + lastErr = err + if !looksTransient(err) || attempt == 1 { + break + } + } + return nil, lastErr +} + +func readWikidataCache(cfg config.Config, url string) ([]byte, bool) { + path := wikidataCacheFile(cfg, url) + if path == "" { + return nil, false + } + info, err := os.Stat(path) + if err != nil { + return nil, false + } + ttl := time.Duration(cfg.WikidataCacheTTLHours) * time.Hour + if ttl <= 0 { + ttl = 24 * time.Hour + } + if time.Since(info.ModTime()) > ttl { + return nil, false + } + body, err := os.ReadFile(path) + if err != nil || len(body) == 0 { + return nil, false + } + return body, true +} + +func writeWikidataCache(cfg config.Config, url string, body []byte) { + path := wikidataCacheFile(cfg, url) + if path == "" || len(body) == 0 { + return + } + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return + } + _ = os.WriteFile(path, body, 0o644) +} + +func wikidataCacheFile(cfg config.Config, url string) string { + dir := strings.TrimSpace(cfg.WikidataCachePath) + if dir == "" { + return "" + } + sum := sha256.Sum256([]byte(url)) + return filepath.Join(dir, hex.EncodeToString(sum[:])+".json") +} diff --git a/internal/collector/discover/humanitarian.go b/internal/collector/discover/humanitarian.go new file mode 100644 index 0000000..cd7dc1e --- /dev/null +++ b/internal/collector/discover/humanitarian.go @@ -0,0 +1,121 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "context" + "encoding/json" + "fmt" + "net/url" + "sort" + "strings" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/fetch" +) + +// HumanitarianOrg represents a humanitarian/emergency agency discovered via Wikidata. +type HumanitarianOrg struct { + Name string + Country string + CountryCode string + Website string +} + +var humanitarianTypeIDs = []string{ + "Q895526", // humanitarian aid org + "Q15220109", // disaster management authority + "Q1460420", // civil protection + "Q1066476", // emergency management + // Subclasses we'd miss without P279* traversal: + "Q3918693", // emergency service + "Q863734", // rescue service + "Q167546", // NGO (filtered later by hygiene) + "Q484652", // international organization +} + +func buildHumanitarianQuery(typeID string) string { + // Query ONE type ID at a time with P31 (no P279* subclass traversal). + // No label service, no rdfs:label OPTIONAL — both cause Wikidata + // timeouts. We extract the hostname as a proxy for the org name. + return fmt.Sprintf(` +SELECT ?website ?countryCode WHERE { + ?org wdt:P31 wd:%s ; + wdt:P856 ?website ; + wdt:P17 ?country . + ?country wdt:P297 ?countryCode . +} LIMIT 50 +`, strings.TrimSpace(typeID)) +} + +// FetchHumanitarianOrgs queries Wikidata for humanitarian, emergency management, +// and civil protection agencies worldwide. Queries one type ID at a time +// with LIMIT 50 to stay within Wikidata's public SPARQL timeout limits. +func FetchHumanitarianOrgs(ctx context.Context, cfg config.Config, client *fetch.Client) ([]HumanitarianOrg, error) { + var failures []string + seen := map[string]struct{}{} + var orgs []HumanitarianOrg + + for _, typeID := range humanitarianTypeIDs { + if ctx.Err() != nil { + break + } + query := strings.TrimSpace(buildHumanitarianQuery(typeID)) + reqURL := wikidataSPARQL + "?format=json&query=" + url.QueryEscape(query) + body, err := fetchWikidataTextWithCache(ctx, cfg, client, reqURL, "application/sparql-results+json, application/json;q=0.9") + if err != nil { + failures = append(failures, fmt.Sprintf("%s: %v", typeID, err)) + continue + } + + var resp struct { + Results struct { + Bindings []struct { + Website struct{ Value string } `json:"website"` + CountryCode struct{ Value string } `json:"countryCode"` + } `json:"bindings"` + } `json:"results"` + } + if err := json.Unmarshal(body, &resp); err != nil { + failures = append(failures, fmt.Sprintf("%s: parse: %v", typeID, err)) + continue + } + + for _, b := range resp.Results.Bindings { + website := strings.TrimRight(strings.TrimSpace(b.Website.Value), "/") + if website == "" { + continue + } + u, err := url.Parse(website) + if err != nil { + continue + } + host := strings.ToLower(u.Hostname()) + if _, ok := seen[host]; ok { + continue + } + seen[host] = struct{}{} + + orgs = append(orgs, HumanitarianOrg{ + Name: hostToName(host), + Country: countryFromCode(b.CountryCode.Value), + CountryCode: strings.ToUpper(strings.TrimSpace(b.CountryCode.Value)), + Website: website, + }) + } + } + sort.Slice(orgs, func(i, j int) bool { + if orgs[i].Country != orgs[j].Country { + return orgs[i].Country < orgs[j].Country + } + return orgs[i].Name < orgs[j].Name + }) + if len(orgs) > 0 { + return orgs, nil + } + if len(failures) > 0 { + return nil, fmt.Errorf("wikidata SPARQL humanitarian: %s", strings.Join(failures, " | ")) + } + return nil, nil +} diff --git a/internal/collector/discover/hygiene.go b/internal/collector/discover/hygiene.go new file mode 100644 index 0000000..3eec490 --- /dev/null +++ b/internal/collector/discover/hygiene.go @@ -0,0 +1,157 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "net/url" + "strings" + "unicode" +) + +var localEntityTerms = []string{ + "municipal", + "municipality", + "city of ", + "county ", + "sheriff", + "borough", + "township", + "village", + "metropolitan police", + "local police", + "police department", +} + +var genericNewsroomTerms = []string{ + "newsroom", + "press office", + "media centre", + "media center", + "communications office", +} + +// nonOSINTTerms catches organizations that have no intelligence relevance. +// Matched against lowered name and hostname. +var nonOSINTTerms = []string{ + "school", "university", "college", "academy", "education", + "world bank", "imf", "monetary fund", + "library", "museum", "archive", + "tourism", "tourist", "travel agency", + "sport", "olympic", "football", "soccer", "fifa", + "entertainment", "oscars", "grammy", "billboard", + "recipe", "cooking", "food network", + "weather forecast", "meteorolog", + "real estate", "property", + "fashion", "beauty", "lifestyle", + "church", "mosque", "synagogue", "cathedral", + "kindergarten", "daycare", "nursery", + "zoo", "aquarium", "botanical", + "lottery", "casino", "gambling", + "dating", "matrimon", + "openstreetmap", "missing maps", "mapathon", "tasking manager", +} + +// nonOSINTHosts rejects entire domains that are never OSINT-relevant. +var nonOSINTHosts = []string{ + "worldbank.org", "imf.org", + "unesco.org", "unicef.org", + "wikipedia.org", "wiktionary.org", + "facebook.com", "twitter.com", "x.com", "instagram.com", + "youtube.com", "tiktok.com", "reddit.com", + "linkedin.com", "pinterest.com", + "amazon.com", "ebay.com", "alibaba.com", + "spotify.com", "netflix.com", + "stackoverflow.com", "github.com", + "schoolnet.eu", "european-schoolnet", + "openstreetmap.org", "hotosm.org", "missingmaps.org", +} + +func passesDiscoveryHygiene(name string, website string, authorityType string) bool { + name = strings.ToLower(strings.TrimSpace(name)) + authorityType = strings.ToLower(strings.TrimSpace(authorityType)) + if name == "" { + return false + } + for _, term := range localEntityTerms { + if strings.Contains(name, term) { + return false + } + } + for _, term := range nonOSINTTerms { + if containsTerm(name, term) { + return false + } + } + if authorityType == "police" { + for _, term := range genericNewsroomTerms { + if strings.Contains(name, term) { + return false + } + } + } + if hostLooksLocal(website) || hostIsNonOSINT(website) { + return false + } + return true +} + +func hostLooksLocal(rawURL string) bool { + parsed, err := url.Parse(strings.TrimSpace(rawURL)) + if err != nil { + return false + } + host := strings.ToLower(parsed.Hostname()) + if host == "" { + return false + } + return strings.HasPrefix(host, "city.") || + strings.HasPrefix(host, "county.") || + strings.HasPrefix(host, "police.") || + strings.Contains(host, ".city.") || + strings.Contains(host, ".county.") || + strings.Contains(host, ".municipal.") +} + +func hostIsNonOSINT(rawURL string) bool { + parsed, err := url.Parse(strings.TrimSpace(rawURL)) + if err != nil { + return false + } + host := strings.ToLower(parsed.Hostname()) + for _, blocked := range nonOSINTHosts { + if host == blocked || strings.HasSuffix(host, "."+blocked) { + return true + } + } + return false +} + +func containsTerm(text string, term string) bool { + text = strings.ToLower(strings.TrimSpace(text)) + term = strings.ToLower(strings.TrimSpace(term)) + if text == "" || term == "" { + return false + } + offset := 0 + for { + idx := strings.Index(text[offset:], term) + if idx < 0 { + return false + } + start := offset + idx + end := start + len(term) + if isBoundary(text, start-1) && isBoundary(text, end) { + return true + } + offset = start + 1 + } +} + +func isBoundary(text string, idx int) bool { + if idx < 0 || idx >= len(text) { + return true + } + r := rune(text[idx]) + return !unicode.IsLetter(r) && !unicode.IsDigit(r) +} diff --git a/internal/collector/discover/police.go b/internal/collector/discover/police.go new file mode 100644 index 0000000..1e61399 --- /dev/null +++ b/internal/collector/discover/police.go @@ -0,0 +1,187 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "context" + "encoding/json" + "fmt" + "net/url" + "sort" + "strings" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/fetch" +) + +const wikidataSPARQL = "https://query.wikidata.org/sparql" + +// PoliceAgency represents a law-enforcement or national-security agency +// discovered via Wikidata whose website we should probe for feeds. +type PoliceAgency struct { + Name string + Country string + CountryCode string + Website string + AuthorityType string // "police", "national_security" + Category string // suggested registry category + PressReleasePaths []string // fallback HTML paths to probe if no RSS found +} + +// commonPressReleasePaths are subpaths frequently used by police/government +// sites for press-release listing pages. +var commonPressReleasePaths = []string{ + "/en/resources/press-releases/", + "/en/press-releases/", + "/en/news/", + "/en/media/press-releases/", + "/press-releases/", + "/news/press-releases/", + "/newsroom/press-releases/", + "/media/news/", + "/latest-news/", + "/news/", +} + +var policeAgencyTypeIDs = []string{ + "Q732717", // police + "Q35535", // gendarmerie + "Q15636005", // intelligence agency + "Q584085", // coast guard + "Q1752939", // customs + "Q12039646", // secret police + // Subclasses we'd miss without P279* traversal: + "Q17032608", // law enforcement agency + "Q56318653", // national police + "Q19832486", // federal police + "Q2102290", // border guard + "Q15925165", // national security agency + "Q68416", // national guard + "Q189290", // military police + "Q7188", // security service +} + +func buildPoliceAgencyQuery(typeID string) string { + // Query ONE type ID at a time with P31 (no P279* subclass traversal). + // No label service, no rdfs:label OPTIONAL — both cause Wikidata + // timeouts. We extract the hostname as a proxy for the org name. + return fmt.Sprintf(` +SELECT ?website ?countryCode WHERE { + ?agency wdt:P31 wd:%s ; + wdt:P856 ?website ; + wdt:P17 ?country . + ?country wdt:P297 ?countryCode . +} LIMIT 50 +`, strings.TrimSpace(typeID)) +} + +// FetchPoliceAgencies queries Wikidata for law-enforcement agencies +// worldwide that have official websites. Queries one type ID at a time +// with LIMIT 50 to stay within Wikidata's public SPARQL timeout limits. +func FetchPoliceAgencies(ctx context.Context, cfg config.Config, client *fetch.Client) ([]PoliceAgency, error) { + seen := map[string]struct{}{} + var agencies []PoliceAgency + var failures []string + + for _, typeID := range policeAgencyTypeIDs { + if ctx.Err() != nil { + break + } + query := strings.TrimSpace(buildPoliceAgencyQuery(typeID)) + reqURL := wikidataSPARQL + "?format=json&query=" + url.QueryEscape(query) + body, err := fetchWikidataTextWithCache(ctx, cfg, client, reqURL, "application/sparql-results+json, application/json;q=0.9") + if err != nil { + failures = append(failures, fmt.Sprintf("%s: %v", typeID, err)) + continue + } + + var resp struct { + Results struct { + Bindings []struct { + Website struct{ Value string } `json:"website"` + CountryCode struct{ Value string } `json:"countryCode"` + } `json:"bindings"` + } `json:"results"` + } + if err := json.Unmarshal(body, &resp); err != nil { + failures = append(failures, fmt.Sprintf("%s: parse: %v", typeID, err)) + continue + } + + for _, b := range resp.Results.Bindings { + website := strings.TrimRight(strings.TrimSpace(b.Website.Value), "/") + if website == "" { + continue + } + u, err := url.Parse(website) + if err != nil { + continue + } + host := strings.ToLower(u.Hostname()) + if _, ok := seen[host]; ok { + continue + } + seen[host] = struct{}{} + + agencies = append(agencies, PoliceAgency{ + Name: hostToName(host), + Country: countryFromCode(b.CountryCode.Value), + CountryCode: strings.ToUpper(strings.TrimSpace(b.CountryCode.Value)), + Website: website, + AuthorityType: "police", + Category: "public_appeal", + PressReleasePaths: commonPressReleasePaths, + }) + } + } + + sort.Slice(agencies, func(i, j int) bool { + if agencies[i].Country != agencies[j].Country { + return agencies[i].Country < agencies[j].Country + } + return agencies[i].Name < agencies[j].Name + }) + if len(agencies) > 0 { + return agencies, nil + } + if len(failures) > 0 { + return nil, fmt.Errorf("wikidata SPARQL: %s", strings.Join(failures, " | ")) + } + return nil, nil +} + +// hostToName derives a readable name from a hostname. +// e.g. "www.politi.dk" → "politi.dk" +func hostToName(host string) string { + host = strings.TrimPrefix(host, "www.") + return host +} + +// countryFromCode returns a country name for a code, or the code itself. +func countryFromCode(code string) string { + code = strings.ToUpper(strings.TrimSpace(code)) + countries := map[string]string{ + "NO": "Norway", "SE": "Sweden", "FI": "Finland", "DK": "Denmark", "IS": "Iceland", + "GB": "United Kingdom", "FR": "France", "DE": "Germany", "NL": "Netherlands", + "BE": "Belgium", "LU": "Luxembourg", "IE": "Ireland", "CH": "Switzerland", + "AT": "Austria", "ES": "Spain", "PT": "Portugal", "IT": "Italy", "GR": "Greece", + "MT": "Malta", "CY": "Cyprus", "PL": "Poland", "CZ": "Czech Republic", + "SK": "Slovakia", "HU": "Hungary", "RO": "Romania", "BG": "Bulgaria", + "HR": "Croatia", "SI": "Slovenia", "RS": "Serbia", "BA": "Bosnia", + "ME": "Montenegro", "MK": "North Macedonia", "AL": "Albania", "XK": "Kosovo", + "EE": "Estonia", "LV": "Latvia", "LT": "Lithuania", "UA": "Ukraine", + "GE": "Georgia", "TR": "Turkey", "US": "United States", "CA": "Canada", + "AU": "Australia", "NZ": "New Zealand", "JP": "Japan", "KR": "South Korea", + "CN": "China", "IN": "India", "BR": "Brazil", "MX": "Mexico", + "IL": "Israel", "SA": "Saudi Arabia", "AE": "United Arab Emirates", + "RU": "Russia", "BY": "Belarus", "MD": "Moldova", + "ZA": "South Africa", "NG": "Nigeria", "KE": "Kenya", "EG": "Egypt", + "ID": "Indonesia", "MY": "Malaysia", "SG": "Singapore", "TH": "Thailand", + "PH": "Philippines", "PK": "Pakistan", "AR": "Argentina", "CO": "Colombia", + } + if name, ok := countries[code]; ok { + return name + } + return code +} diff --git a/internal/collector/discover/probe.go b/internal/collector/discover/probe.go new file mode 100644 index 0000000..02547ed --- /dev/null +++ b/internal/collector/discover/probe.go @@ -0,0 +1,139 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "context" + "strings" + + "github.com/scalytics/euosint/internal/collector/fetch" +) + +// Common RSS/Atom feed path suffixes to probe on a website. +var feedPaths = []string{ + "/feed", + "/rss", + "/rss.xml", + "/feed.xml", + "/atom.xml", + "/.rss", + "/advisories/feed", + "/feed/rss", + "/index.xml", + // Press-release / news patterns common on police & government sites. + "/news/feed", + "/news/rss", + "/news/rss.xml", + "/en/feed", + "/en/rss", + "/en/rss.xml", + "/press-releases/feed", + "/media/news/feed", + "/resources/press-releases/feed", + "/newsroom/feed", + "/latest/feed", + // Government / DOJ / ministry patterns. + "/feeds/opa/justice-news.xml", + "/feeds/news.xml", + "/feeds/press-releases.xml", + "/feeds/alerts.xml", + "/blog/feed", + "/blog/rss", + "/updates/feed", + "/publications/feed", + "/advisories.xml", + "/warnings/feed", + "/alerts/feed", + "/releases/feed", + // Multi-language government sites. + "/de/feed", + "/fr/feed", + "/es/feed", + "/it/feed", + "/nl/feed", + "/sv/feed", + "/no/feed", + "/da/feed", + "/fi/feed", + "/pl/feed", + "/pt/feed", +} + +// ProbedFeed is a single discovered feed from probing. +type ProbedFeed struct { + FeedURL string + FeedType string // "rss" or "atom" +} + +// ProbeFeeds tries known RSS/Atom path suffixes on the given base URL and +// returns all valid feeds found (typically one, but a site may have several). +func ProbeFeeds(ctx context.Context, client *fetch.Client, baseURL string) []ProbedFeed { + baseURL = strings.TrimRight(baseURL, "/") + var found []ProbedFeed + seen := map[string]struct{}{} + for _, path := range feedPaths { + if ctx.Err() != nil { + break + } + candidate := baseURL + path + body, err := client.Text(ctx, candidate, true, "application/rss+xml, application/atom+xml, application/xml, text/xml;q=0.9, */*;q=0.8") + if err != nil { + continue + } + content := string(body) + feedType := detectFeedType(content) + if feedType == "" { + continue + } + norm := strings.ToLower(candidate) + if _, ok := seen[norm]; ok { + continue + } + seen[norm] = struct{}{} + found = append(found, ProbedFeed{FeedURL: candidate, FeedType: feedType}) + // Stop after first hit — one valid feed per site is enough. + break + } + return found +} + +// ProbeRSSFeed is a convenience wrapper that returns the first discovered +// feed URL and type (for backward compatibility). +func ProbeRSSFeed(ctx context.Context, client *fetch.Client, baseURL string) (string, string, error) { + results := ProbeFeeds(ctx, client, baseURL) + if len(results) == 0 { + return "", "", nil + } + return results[0].FeedURL, results[0].FeedType, nil +} + +// probeHTMLPage checks whether a URL returns a reachable HTML page that +// looks like a press-release listing (contains anchor tags with text). +func probeHTMLPage(ctx context.Context, client *fetch.Client, url string) bool { + body, err := client.Text(ctx, url, true, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + if err != nil { + return false + } + content := strings.ToLower(string(body)) + // Minimal validation: must look like HTML with some links. + return strings.Contains(content, " 2048 { + prefix = prefix[:2048] + } + lower := strings.ToLower(prefix) + if strings.Contains(lower, " %s\n", + len(discovered), existingCount, outputPath) + return nil +} diff --git a/internal/collector/discover/search_ddg.go b/internal/collector/discover/search_ddg.go new file mode 100644 index 0000000..199d2a9 --- /dev/null +++ b/internal/collector/discover/search_ddg.go @@ -0,0 +1,234 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "context" + "fmt" + "net/url" + "regexp" + "strings" + "time" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/fetch" + "github.com/scalytics/euosint/internal/collector/model" +) + +// ddgResultRe extracts href URLs from DDG result links. +// DDG wraps results in or +// +var ddgResultRe = regexp.MustCompile(`(?i)href="(https?://[^"]+)"`) + +// ddgRedirectRe extracts the real URL from DDG redirect wrappers. +var ddgRedirectRe = regexp.MustCompile(`[?&]uddg=([^&]+)`) + +// ddgSearchCandidates uses DuckDuckGo via a headless browser to find +// RSS/Atom feed URLs for gap-analysis targets. This is the zero-dependency +// fallback when no LLM API key is configured. +func ddgSearchCandidates(ctx context.Context, cfg config.Config, browser *fetch.BrowserClient, seeds []model.SourceCandidate) ([]model.SourceCandidate, error) { + if !cfg.DDGSearchEnabled || browser == nil { + return nil, nil + } + + targets := selectSearchTargets(cfg, seeds) + if len(targets) == 0 { + return nil, nil + } + + maxQueries := cfg.DDGSearchMaxQueries + if maxQueries <= 0 { + maxQueries = 10 + } + delay := time.Duration(cfg.DDGSearchDelayMS) * time.Millisecond + if delay < 5*time.Second { + delay = 5 * time.Second + } + + // Limit to maxQueries to stay polite. + if len(targets) > maxQueries { + targets = targets[:maxQueries] + } + + var out []model.SourceCandidate + var failures []string + seen := map[string]struct{}{} + + for i, target := range targets { + if ctx.Err() != nil { + break + } + // Polite delay between queries. + if i > 0 { + select { + case <-ctx.Done(): + break + case <-time.After(delay): + } + } + + query := buildDDGQuery(target) + found, err := ddgSearch(ctx, browser, query, target) + if err != nil { + failures = append(failures, fmt.Sprintf("%s: %v", query, err)) + continue + } + for _, c := range found { + key := normalizeURL(c.URL) + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + out = append(out, c) + } + } + + if len(failures) > 0 && len(out) == 0 { + return nil, fmt.Errorf("ddg search: %s", strings.Join(failures, " | ")) + } + return out, nil +} + +// buildDDGQuery creates a DDG search query for a discovery target. +func buildDDGQuery(target model.SourceCandidate) string { + parts := []string{} + + name := strings.TrimSpace(target.AuthorityName) + if name != "" { + parts = append(parts, name) + } + + country := strings.TrimSpace(target.Country) + if country != "" && !strings.Contains(strings.ToLower(name), strings.ToLower(country)) { + parts = append(parts, country) + } + + topic := searchTopicLabel(target.Category, target.AuthorityType) + if topic != "" { + parts = append(parts, topic) + } + + parts = append(parts, "RSS OR atom OR feed") + + return strings.Join(parts, " ") +} + +// ddgSearch performs a single DuckDuckGo search and extracts URLs. +func ddgSearch(ctx context.Context, browser *fetch.BrowserClient, query string, target model.SourceCandidate) ([]model.SourceCandidate, error) { + searchURL := "https://html.duckduckgo.com/html/?q=" + url.QueryEscape(query) + + body, err := browser.Text(ctx, searchURL, true, "") + if err != nil { + return nil, fmt.Errorf("ddg fetch: %w", err) + } + + content := string(body) + urls := extractDDGURLs(content) + if len(urls) == 0 { + return nil, nil + } + + var candidates []model.SourceCandidate + for _, raw := range urls { + if !looksLikeURL(raw) { + continue + } + // Skip DDG internal links. + if strings.Contains(raw, "duckduckgo.com") { + continue + } + // Only keep URLs that look like they could be feeds or official sites. + if !looksLikeFeedURL(raw) && !looksLikeOfficialSite(raw) { + continue + } + candidates = append(candidates, model.SourceCandidate{ + URL: raw, + AuthorityName: target.AuthorityName, + AuthorityType: target.AuthorityType, + Category: target.Category, + Country: target.Country, + CountryCode: target.CountryCode, + Region: target.Region, + BaseURL: extractBaseURL(raw), + Notes: "ddg-search: " + query, + }) + if len(candidates) >= 5 { + break + } + } + return candidates, nil +} + +// extractDDGURLs parses URLs from DDG HTML search results. +func extractDDGURLs(html string) []string { + var urls []string + seen := map[string]struct{}{} + + for _, match := range ddgResultRe.FindAllStringSubmatch(html, -1) { + if len(match) < 2 { + continue + } + rawURL := match[1] + + // DDG wraps results in redirect URLs — extract the real target. + if strings.Contains(rawURL, "duckduckgo.com/l/") { + if redir := ddgRedirectRe.FindStringSubmatch(rawURL); len(redir) >= 2 { + decoded, err := url.QueryUnescape(redir[1]) + if err == nil { + rawURL = decoded + } + } + } + + // Skip DDG assets and internal pages. + if strings.Contains(rawURL, "duckduckgo.com") { + continue + } + + norm := strings.ToLower(strings.TrimRight(rawURL, "/")) + if _, ok := seen[norm]; ok { + continue + } + seen[norm] = struct{}{} + urls = append(urls, rawURL) + } + return urls +} + +// looksLikeOfficialSite checks if a URL looks like an official government +// or organization site worth probing for feeds. +func looksLikeOfficialSite(raw string) bool { + lower := strings.ToLower(raw) + officialTLDs := []string{ + ".gov", ".gob", ".gouv", ".govt", + ".mil", ".edu", + ".int", ".org", + ".police", ".cert", + } + for _, tld := range officialTLDs { + if strings.Contains(lower, tld) { + return true + } + } + officialKeywords := []string{ + "ministry", "department", "agency", + "security", "intelligence", "police", + "cert", "csirt", "ncsc", + } + for _, kw := range officialKeywords { + if strings.Contains(lower, kw) { + return true + } + } + return false +} + +// extractBaseURL returns the scheme + host portion of a URL. +func extractBaseURL(raw string) string { + parsed, err := url.Parse(strings.TrimSpace(raw)) + if err != nil { + return "" + } + return parsed.Scheme + "://" + parsed.Host +} diff --git a/internal/collector/discover/search_ddg_test.go b/internal/collector/discover/search_ddg_test.go new file mode 100644 index 0000000..29dd9c6 --- /dev/null +++ b/internal/collector/discover/search_ddg_test.go @@ -0,0 +1,111 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "testing" + + "github.com/scalytics/euosint/internal/collector/model" +) + +func TestExtractDDGURLs(t *testing.T) { + html := ` + + + + +` + urls := extractDDGURLs(html) + + // Should have 3 URLs (DDG internal filtered out). + expected := map[string]bool{ + "https://www.ncsc.gov.uk/rss/all": false, + "https://www.cert.se/feed/rss.xml": false, + "https://www.justice.gov/feeds/opa/justice-news.xml": false, + } + + for _, u := range urls { + if _, ok := expected[u]; ok { + expected[u] = true + } + } + + for u, found := range expected { + if !found { + t.Errorf("expected URL %q not found in results", u) + } + } +} + +func TestBuildDDGQuery(t *testing.T) { + target := model.SourceCandidate{ + AuthorityName: "Norway national CERT or CSIRT", + Country: "Norway", + Category: "cyber_advisory", + AuthorityType: "cert", + } + + query := buildDDGQuery(target) + + if query == "" { + t.Fatal("expected non-empty query") + } + // Should contain the authority name and RSS/atom/feed keywords. + if !containsAll(query, "Norway", "CERT", "RSS OR atom OR feed") { + t.Errorf("query missing expected parts: %s", query) + } + // Country should NOT be duplicated (already in authority name). + // Actually "Norway" appears in AuthorityName, so it should only appear once. +} + +func TestLooksLikeOfficialSite(t *testing.T) { + tests := []struct { + url string + want bool + }{ + {"https://www.ncsc.gov.uk/rss", true}, + {"https://cert.se/feed", true}, + {"https://www.police.uk/news", true}, + {"https://www.bbc.com/news", false}, + {"https://www.justice.gov/feeds", true}, + {"https://www.reddit.com/r/netsec", false}, + } + + for _, tt := range tests { + got := looksLikeOfficialSite(tt.url) + if got != tt.want { + t.Errorf("looksLikeOfficialSite(%q) = %v, want %v", tt.url, got, tt.want) + } + } +} + +func containsAll(s string, subs ...string) bool { + for _, sub := range subs { + if !contains(s, sub) { + return false + } + } + return true +} + +func contains(s, sub string) bool { + return len(s) >= len(sub) && (s == sub || len(s) > 0 && containsSubstring(s, sub)) +} + +func containsSubstring(s, sub string) bool { + for i := 0; i <= len(s)-len(sub); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false +} diff --git a/internal/collector/discover/search_llm.go b/internal/collector/discover/search_llm.go new file mode 100644 index 0000000..e7c3c77 --- /dev/null +++ b/internal/collector/discover/search_llm.go @@ -0,0 +1,229 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package discover + +import ( + "context" + "encoding/json" + "fmt" + "net/url" + "regexp" + "strings" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/model" + "github.com/scalytics/euosint/internal/collector/vet" +) + +type searchCompleter interface { + Complete(ctx context.Context, messages []vet.Message) (string, error) +} + +type llmSearchResponse struct { + URLs []struct { + URL string `json:"url"` + Reason string `json:"reason"` + } `json:"urls"` +} + +var searchJSONBlockRe = regexp.MustCompile(`(?s)\{.*\}`) + +func llmSearchCandidates(ctx context.Context, cfg config.Config, client searchCompleter, seeds []model.SourceCandidate) ([]model.SourceCandidate, error) { + if !cfg.SearchDiscoveryEnabled || client == nil { + return nil, nil + } + targets := selectSearchTargets(cfg, seeds) + if len(targets) == 0 { + return nil, nil + } + + out := make([]model.SourceCandidate, 0, len(targets)*cfg.SearchDiscoveryMaxURLsPerTarget) + var failures []string + for _, target := range targets { + found, err := searchCandidateTarget(ctx, client, cfg, target) + if err != nil { + failures = append(failures, fmt.Sprintf("%s: %v", firstNonEmpty(target.AuthorityName, target.URL), err)) + continue + } + out = append(out, found...) + } + if len(failures) > 0 { + return out, fmt.Errorf("%s", strings.Join(failures, " | ")) + } + return out, nil +} + +func selectSearchTargets(cfg config.Config, seeds []model.SourceCandidate) []model.SourceCandidate { + maxTargets := cfg.SearchDiscoveryMaxTargets + if maxTargets <= 0 { + return nil + } + out := make([]model.SourceCandidate, 0, maxTargets) + seen := map[string]struct{}{} + for _, seed := range seeds { + if !passesDiscoveryHygiene(seed.AuthorityName, firstNonEmpty(seed.BaseURL, seed.URL), seed.AuthorityType) { + continue + } + key := strings.ToLower(strings.TrimSpace(seed.AuthorityName)) + "|" + strings.ToUpper(strings.TrimSpace(seed.CountryCode)) + "|" + strings.ToLower(strings.TrimSpace(seed.Category)) + if key == "||" { + key = normalizeURL(firstNonEmpty(seed.URL, seed.BaseURL)) + } + if key == "" { + continue + } + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + out = append(out, seed) + if len(out) >= maxTargets { + break + } + } + return out +} + +func searchCandidateTarget(ctx context.Context, client searchCompleter, cfg config.Config, target model.SourceCandidate) ([]model.SourceCandidate, error) { + maxURLs := cfg.SearchDiscoveryMaxURLsPerTarget + if maxURLs <= 0 { + maxURLs = 3 + } + + prompt := fmt.Sprintf( + "Find up to %d official RSS or ATOM feed URLs for %s in %s covering %s. Reject local or municipal sources. Return strict JSON only in the form {\"urls\":[{\"url\":\"https://...\",\"reason\":\"short\"}]}. If no official feed exists, return {\"urls\":[]}.", + maxURLs, + firstNonEmpty(target.AuthorityName, "high-authority OSINT sources"), + firstNonEmpty(target.Country, "its jurisdiction"), + searchTopicLabel(target.Category, target.AuthorityType), + ) + if base := strings.TrimSpace(firstNonEmpty(target.BaseURL, target.URL)); base != "" { + prompt += " Known official website: " + base + "." + } + + content, err := client.Complete(ctx, []vet.Message{ + { + Role: "system", + Content: "You are a source discovery assistant. Return strict JSON only. Keep output short. Only list official or highly authoritative RSS or ATOM feed URLs suitable for intelligence-relevant collection.", + }, + { + Role: "user", + Content: prompt, + }, + }) + if err != nil { + return nil, err + } + resp, err := decodeLLMSearchResponse(content) + if err != nil { + return nil, err + } + + found := make([]model.SourceCandidate, 0, len(resp.URLs)) + seen := map[string]struct{}{} + for _, item := range resp.URLs { + raw := strings.TrimSpace(item.URL) + if raw == "" { + continue + } + if !looksLikeURL(raw) { + continue + } + if !looksLikeFeedURL(raw) { + continue + } + key := normalizeURL(raw) + if key == "" { + continue + } + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + found = append(found, model.SourceCandidate{ + URL: raw, + AuthorityName: target.AuthorityName, + AuthorityType: target.AuthorityType, + Category: target.Category, + Country: target.Country, + CountryCode: target.CountryCode, + Region: target.Region, + BaseURL: firstNonEmpty(target.BaseURL, target.URL), + Notes: "llm-search:" + strings.TrimSpace(cfg.VettingProvider) + " " + strings.TrimSpace(item.Reason), + }) + if len(found) >= maxURLs { + break + } + } + return found, nil +} + +func searchTopicLabel(category string, authorityType string) string { + switch strings.TrimSpace(category) { + case "missing_person": + return "missing persons and missing children" + case "wanted_suspect": + return "wanted persons, fugitives, and public appeals" + case "terror_warning", "terrorism_tip": + return "terrorism warnings and threat notices" + case "organized_crime": + return "organized crime and major criminal investigations" + case "travel_warning": + return "travel warnings and travel advisories" + case "cyber_advisory": + return "cyber advisories and security alerts" + case "public_appeal": + return "public appeals, wanted persons, and missing persons" + case "fraud_alert": + return "fraud alerts, financial crime warnings, and sanctions notices" + case "intelligence_report": + return "strategic intelligence assessments and geopolitical analysis" + case "conflict_monitoring": + return "armed conflict tracking, ceasefire monitoring, and peace processes" + case "maritime_security": + return "maritime security, piracy, shipping threats, coast guard activity, and naval incidents" + case "legislative": + return "sanctions, defense policy, foreign affairs, security legislation, and parliamentary security debates" + case "humanitarian_security", "humanitarian_tasking": + return "humanitarian operations, aid worker security, and crisis coordination" + case "health_emergency", "disease_outbreak": + return "disease outbreaks, epidemics, pandemic surveillance, and public health emergencies" + case "environmental_disaster": + return "environmental disasters, earthquakes, oil spills, volcanic activity, and nuclear incidents" + case "public_safety", "emergency_management": + return "civil protection, emergency management, and natural disaster warnings" + default: + if strings.TrimSpace(authorityType) != "" { + return authorityType + " intelligence collection" + } + return "intelligence collection" + } +} + +func decodeLLMSearchResponse(content string) (llmSearchResponse, error) { + content = strings.TrimSpace(content) + if match := searchJSONBlockRe.FindString(content); match != "" { + content = match + } + var out llmSearchResponse + if err := json.Unmarshal([]byte(content), &out); err != nil { + return llmSearchResponse{}, fmt.Errorf("decode search discovery response: %w", err) + } + return out, nil +} + +func looksLikeURL(raw string) bool { + parsed, err := url.Parse(strings.TrimSpace(raw)) + if err != nil { + return false + } + return (parsed.Scheme == "https" || parsed.Scheme == "http") && parsed.Host != "" +} + +func looksLikeFeedURL(raw string) bool { + raw = strings.ToLower(strings.TrimSpace(raw)) + return strings.Contains(raw, "rss") || + strings.Contains(raw, "atom") || + strings.HasSuffix(raw, ".xml") || + strings.Contains(raw, "/feed") +} diff --git a/internal/collector/fetch/browser.go b/internal/collector/fetch/browser.go new file mode 100644 index 0000000..3415d50 --- /dev/null +++ b/internal/collector/fetch/browser.go @@ -0,0 +1,153 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package fetch + +import ( + "context" + "fmt" + "strings" + "sync" + "time" + + "github.com/chromedp/cdproto/network" + "github.com/chromedp/chromedp" +) + +// BrowserClient fetches page content by driving a headless Chrome instance +// via chromedp. This is used for sites that block even stealth HTTP clients +// (e.g., government sites with aggressive bot detection). +type BrowserClient struct { + allocCtx context.Context + cancelCtx context.CancelFunc + timeoutMS int +} + +// NewBrowser creates a BrowserClient with a shared headless Chrome allocator. +// Call Close() when done to release browser resources. +func NewBrowser(timeoutMS int) (*BrowserClient, error) { + opts := append(chromedp.DefaultExecAllocatorOptions[:], + chromedp.Flag("headless", true), + chromedp.Flag("no-sandbox", true), + chromedp.Flag("disable-gpu", true), + chromedp.Flag("disable-dev-shm-usage", true), + ) + allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) + return &BrowserClient{ + allocCtx: allocCtx, + cancelCtx: cancel, + timeoutMS: timeoutMS, + }, nil +} + +// Text navigates to the URL, waits for the network to become idle, and +// returns the full page HTML as bytes. The followRedirects and accept +// parameters are accepted for interface compatibility but Chrome handles +// redirects natively and always sends its own Accept header. +func (b *BrowserClient) Text(ctx context.Context, url string, followRedirects bool, accept string) ([]byte, error) { + timeout := time.Duration(b.timeoutMS) * time.Millisecond + taskCtx, cancel := chromedp.NewContext(b.allocCtx) + defer cancel() + + taskCtx, cancelTimeout := context.WithTimeout(taskCtx, timeout) + defer cancelTimeout() + + var html string + err := chromedp.Run(taskCtx, + chromedp.Navigate(url), + chromedp.WaitReady("body"), + chromedp.OuterHTML("html", &html), + ) + if err != nil { + return nil, fmt.Errorf("browser fetch %s: %w", url, err) + } + return []byte(html), nil +} + +// CaptureJSONResponses opens a page in headless Chrome and collects JSON +// XHR/fetch responses whose URL contains the given substring. +func (b *BrowserClient) CaptureJSONResponses(ctx context.Context, pageURL string, urlContains string) ([][]byte, error) { + timeout := time.Duration(b.timeoutMS) * time.Millisecond + taskCtx, cancel := chromedp.NewContext(b.allocCtx) + defer cancel() + + taskCtx, cancelTimeout := context.WithTimeout(taskCtx, timeout) + defer cancelTimeout() + + var ( + mu sync.Mutex + seen = map[network.RequestID]string{} + bodies [][]byte + captureErr error + ) + + chromedp.ListenTarget(taskCtx, func(ev any) { + switch e := ev.(type) { + case *network.EventResponseReceived: + if !strings.Contains(e.Response.URL, urlContains) { + return + } + if e.Type != network.ResourceTypeXHR && e.Type != network.ResourceTypeFetch { + return + } + mu.Lock() + seen[e.RequestID] = e.Response.URL + mu.Unlock() + case *network.EventLoadingFinished: + mu.Lock() + _, ok := seen[e.RequestID] + mu.Unlock() + if !ok { + return + } + go func(requestID network.RequestID) { + var body []byte + err := chromedp.Run(taskCtx, chromedp.ActionFunc(func(ctx context.Context) error { + data, err := network.GetResponseBody(requestID).Do(ctx) + if err != nil { + return err + } + body = data + return nil + })) + mu.Lock() + defer mu.Unlock() + if err != nil { + if captureErr == nil { + captureErr = err + } + return + } + if len(body) > 0 { + bodies = append(bodies, body) + } + }(e.RequestID) + } + }) + + if err := chromedp.Run(taskCtx, + network.Enable(), + chromedp.Navigate(pageURL), + chromedp.WaitReady("body"), + chromedp.Sleep(5*time.Second), + ); err != nil { + return nil, fmt.Errorf("browser capture %s: %w", pageURL, err) + } + + mu.Lock() + defer mu.Unlock() + if len(bodies) == 0 && captureErr != nil { + return nil, fmt.Errorf("browser capture %s: %w", pageURL, captureErr) + } + if len(bodies) == 0 { + return nil, fmt.Errorf("browser capture %s: no matching JSON responses", pageURL) + } + return bodies, nil +} + +// Close shuts down the browser allocator and releases Chrome processes. +func (b *BrowserClient) Close() { + if b.cancelCtx != nil { + b.cancelCtx() + } +} diff --git a/internal/collector/fetch/client.go b/internal/collector/fetch/client.go index 78c3470..3fca586 100644 --- a/internal/collector/fetch/client.go +++ b/internal/collector/fetch/client.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "io" + "net" "net/http" "strings" "time" @@ -22,8 +23,14 @@ type Client struct { } func New(cfg config.Config) *Client { + timeout := time.Duration(cfg.HTTPTimeoutMS) * time.Millisecond + return NewWithHTTPClient(cfg, &http.Client{ - Timeout: time.Duration(cfg.HTTPTimeoutMS) * time.Millisecond, + Timeout: timeout, + Transport: newStealthTransport(&net.Dialer{ + Timeout: timeout, + KeepAlive: 30 * time.Second, + }), CheckRedirect: func(req *http.Request, via []*http.Request) error { if len(via) >= 10 { return errors.New("stopped after 10 redirects") @@ -42,6 +49,10 @@ func NewWithHTTPClient(cfg config.Config, httpClient *http.Client) *Client { } func (c *Client) Text(ctx context.Context, url string, followRedirects bool, accept string) ([]byte, error) { + return c.TextWithHeaders(ctx, url, followRedirects, accept, nil) +} + +func (c *Client) TextWithHeaders(ctx context.Context, url string, followRedirects bool, accept string, extraHeaders map[string]string) ([]byte, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return nil, fmt.Errorf("build request %s: %w", url, err) @@ -50,6 +61,22 @@ func (c *Client) Text(ctx context.Context, url string, followRedirects bool, acc if strings.TrimSpace(accept) != "" { req.Header.Set("Accept", accept) } + req.Header.Set("Accept-Language", "en-US,en;q=0.9") + req.Header.Set("Accept-Encoding", "gzip, deflate, br") + req.Header.Set("Cache-Control", "no-cache") + req.Header.Set("Pragma", "no-cache") + req.Header.Set("DNT", "1") + req.Header.Set("Upgrade-Insecure-Requests", "1") + req.Header.Set("Sec-Fetch-Dest", "document") + req.Header.Set("Sec-Fetch-Mode", "navigate") + req.Header.Set("Sec-Fetch-Site", "none") + req.Header.Set("Sec-Fetch-User", "?1") + for key, value := range extraHeaders { + if strings.TrimSpace(key) == "" || strings.TrimSpace(value) == "" { + continue + } + req.Header.Set(key, value) + } client := c.httpClient if !followRedirects { @@ -70,8 +97,7 @@ func (c *Client) Text(ctx context.Context, url string, followRedirects bool, acc return nil, fmt.Errorf("fetch %s: status %d", url, res.StatusCode) } - reader := io.LimitReader(res.Body, c.maxBodyBytes+1) - body, err := io.ReadAll(reader) + body, err := readBody(res, c.maxBodyBytes) if err != nil { return nil, fmt.Errorf("read %s: %w", url, err) } @@ -81,3 +107,11 @@ func (c *Client) Text(ctx context.Context, url string, followRedirects bool, acc return body, nil } + +// readBody reads the response body, handling gzip/br/deflate transparently. +// The stealth transport configures decompression, but if a test transport is +// injected the body may already be plain text. +func readBody(res *http.Response, limit int64) ([]byte, error) { + reader := io.LimitReader(res.Body, limit+1) + return io.ReadAll(reader) +} diff --git a/internal/collector/fetch/client_test.go b/internal/collector/fetch/client_test.go index da6b66b..865231a 100644 --- a/internal/collector/fetch/client_test.go +++ b/internal/collector/fetch/client_test.go @@ -4,6 +4,8 @@ package fetch import ( + "bytes" + "compress/gzip" "context" "io" "net/http" @@ -33,8 +35,125 @@ func TestClientText(t *testing.T) { } } +func TestClientTextSetsBrowserLikeHeaders(t *testing.T) { + cfg := config.Default() + client := NewWithHTTPClient(cfg, &http.Client{ + Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) { + if got := req.Header.Get("User-Agent"); !strings.Contains(got, "Mozilla/5.0") { + t.Fatalf("unexpected user-agent %q", got) + } + if got := req.Header.Get("Accept-Language"); got == "" { + t.Fatal("missing Accept-Language header") + } + if got := req.Header.Get("Upgrade-Insecure-Requests"); got != "1" { + t.Fatalf("unexpected upgrade header %q", got) + } + if got := req.Header.Get("Accept-Encoding"); !strings.Contains(got, "gzip") { + t.Fatalf("missing Accept-Encoding gzip: %q", got) + } + if got := req.Header.Get("Sec-Fetch-Dest"); got != "document" { + t.Fatalf("unexpected Sec-Fetch-Dest %q", got) + } + if got := req.Header.Get("Sec-Fetch-Mode"); got != "navigate" { + t.Fatalf("unexpected Sec-Fetch-Mode %q", got) + } + return &http.Response{ + StatusCode: 200, + Body: io.NopCloser(strings.NewReader("ok")), + Header: make(http.Header), + }, nil + }), + }) + + if _, err := client.Text(context.Background(), "https://collector.test", true, "text/html"); err != nil { + t.Fatal(err) + } +} + +func TestDecompressBodyGzip(t *testing.T) { + var buf bytes.Buffer + gw := gzip.NewWriter(&buf) + gw.Write([]byte("hello gzip")) + gw.Close() + + res := &http.Response{ + StatusCode: 200, + Header: http.Header{"Content-Encoding": {"gzip"}}, + Body: io.NopCloser(&buf), + } + + if err := decompressBody(res); err != nil { + t.Fatal(err) + } + + body, err := io.ReadAll(res.Body) + if err != nil { + t.Fatal(err) + } + if string(body) != "hello gzip" { + t.Fatalf("unexpected body %q", string(body)) + } +} + +func TestDecompressBodyIdentity(t *testing.T) { + res := &http.Response{ + StatusCode: 200, + Header: make(http.Header), + Body: io.NopCloser(strings.NewReader("plain")), + } + if err := decompressBody(res); err != nil { + t.Fatal(err) + } + body, err := io.ReadAll(res.Body) + if err != nil { + t.Fatal(err) + } + if string(body) != "plain" { + t.Fatalf("unexpected body %q", string(body)) + } +} + +func TestStealthRoundTripperFallsBackToHTTP11AfterHTTP2PeerError(t *testing.T) { + rt := &stealthRoundTripper{ + dual: &dualProtoTransport{ + protoByHost: map[string]string{"https://collector.test": "h2"}, + roundTripH2: func(req *http.Request) (*http.Response, error) { + return nil, roundTripError("stream error: stream ID 3; INTERNAL_ERROR; received from peer") + }, + roundTripH1: func(req *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: 200, + Body: io.NopCloser(strings.NewReader("ok")), + Header: make(http.Header), + }, nil + }, + }, + } + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, "https://collector.test/feed", nil) + if err != nil { + t.Fatal(err) + } + res, err := rt.RoundTrip(req) + if err != nil { + t.Fatal(err) + } + defer res.Body.Close() + body, err := io.ReadAll(res.Body) + if err != nil { + t.Fatal(err) + } + if string(body) != "ok" { + t.Fatalf("unexpected body %q", string(body)) + } +} + type roundTripFunc func(*http.Request) (*http.Response, error) func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { return fn(req) } + +type roundTripError string + +func (e roundTripError) Error() string { return string(e) } diff --git a/internal/collector/fetch/fetcher.go b/internal/collector/fetch/fetcher.go new file mode 100644 index 0000000..e482f39 --- /dev/null +++ b/internal/collector/fetch/fetcher.go @@ -0,0 +1,22 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package fetch + +import "context" + +// Fetcher is the common interface for fetching page content as text. +// Both the stealth HTTP Client and the headless BrowserClient satisfy it. +type Fetcher interface { + Text(ctx context.Context, url string, followRedirects bool, accept string) ([]byte, error) +} + +// FetcherFor returns the appropriate Fetcher for the given fetch mode. +// When mode is "browser" and a BrowserClient is available, the browser +// fetcher is returned. Otherwise the stealth HTTP client is used. +func FetcherFor(mode string, client *Client, browser *BrowserClient) Fetcher { + if mode == "browser" && browser != nil { + return browser + } + return client +} diff --git a/internal/collector/fetch/stealth.go b/internal/collector/fetch/stealth.go new file mode 100644 index 0000000..88c1209 --- /dev/null +++ b/internal/collector/fetch/stealth.go @@ -0,0 +1,229 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package fetch + +import ( + "bufio" + "compress/flate" + "compress/gzip" + "context" + "crypto/tls" + "io" + "net" + "net/http" + "strings" + "sync" + "time" + + "github.com/andybalholm/brotli" + utls "github.com/refraction-networking/utls" + "golang.org/x/net/http2" +) + +// newStealthTransport builds an http.RoundTripper whose TLS ClientHello +// impersonates a recent Chrome release. This prevents WAFs that +// fingerprint Go's default TLS stack (JA3/JA4) from blocking requests. +// +// It supports both HTTP/1.1 and HTTP/2: after the uTLS handshake the +// negotiated ALPN protocol is cached per-host and subsequent requests +// are routed to the correct transport automatically. +// +// It also transparently decompresses gzip/br/deflate responses, since we +// explicitly send Accept-Encoding to look like a real browser (which +// disables Go's automatic gzip handling). +func newStealthTransport(dialer *net.Dialer) http.RoundTripper { + dt := &dualProtoTransport{ + dialer: dialer, + protoByHost: make(map[string]string), + } + + dt.h1 = &http.Transport{ + Proxy: http.ProxyFromEnvironment, + ForceAttemptHTTP2: false, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + DialTLSContext: dt.dialTLS, + DialContext: dialer.DialContext, + } + + dt.h2 = &http2.Transport{ + DialTLSContext: func(ctx context.Context, network, addr string, _ *tls.Config) (net.Conn, error) { + return dt.dialTLS(ctx, network, addr) + }, + } + + return &stealthRoundTripper{dual: dt} +} + +// dualProtoTransport manages uTLS connections and routes them to the +// appropriate HTTP/1.1 or HTTP/2 transport based on ALPN negotiation. +type dualProtoTransport struct { + dialer *net.Dialer + h1 *http.Transport + h2 *http2.Transport + + mu sync.Mutex + protoByHost map[string]string // scheme://hostname -> "h2" | "h1" + roundTripH1 func(*http.Request) (*http.Response, error) + roundTripH2 func(*http.Request) (*http.Response, error) +} + +func (dt *dualProtoTransport) dialTLS(ctx context.Context, network, addr string) (net.Conn, error) { + rawConn, err := dt.dialer.DialContext(ctx, network, addr) + if err != nil { + return nil, err + } + + host, _, err := net.SplitHostPort(addr) + if err != nil { + host = addr + } + + tlsConn := utls.UClient(rawConn, &utls.Config{ + ServerName: host, + }, utls.HelloChrome_Auto) + + if err := tlsConn.HandshakeContext(ctx); err != nil { + rawConn.Close() + return nil, err + } + + // Cache the negotiated protocol so future requests skip the probe. + proto := tlsConn.ConnectionState().NegotiatedProtocol + dt.mu.Lock() + if proto == "h2" { + dt.protoByHost["https://"+host] = "h2" + } else { + dt.protoByHost["https://"+host] = "h1" + } + dt.mu.Unlock() + + return tlsConn, nil +} + +func (dt *dualProtoTransport) getProto(scheme string, host string) string { + dt.mu.Lock() + defer dt.mu.Unlock() + return dt.protoByHost[scheme+"://"+host] +} + +// stealthRoundTripper routes requests to the appropriate protocol +// transport and transparently decompresses response bodies. +type stealthRoundTripper struct { + dual *dualProtoTransport +} + +func (s *stealthRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + if req.URL.Scheme == "http" { + res, err := s.dual.h1.RoundTrip(req) + if err != nil { + return nil, err + } + if derr := decompressBody(res); derr != nil { + res.Body = io.NopCloser(bufio.NewReader(res.Body)) + } + return res, nil + } + + host := req.URL.Hostname() + proto := s.dual.getProto(req.URL.Scheme, host) + + var res *http.Response + var err error + + switch proto { + case "h2": + res, err = s.dual.doRoundTripH2(req) + if err != nil && shouldRetryH2AsH1(err) { + res, err = s.dual.doRoundTripH1(req) + } + case "h1": + res, err = s.dual.doRoundTripH1(req) + default: + // Unknown host — try h1 first. If the server negotiates h2 via + // ALPN, the h1 transport will fail with "malformed HTTP response" + // because it tries HTTP/1.1 framing on an h2 connection. The + // dialTLS callback caches the negotiated proto regardless, so we + // can detect this and retry with the h2 transport. + res, err = s.dual.doRoundTripH1(req) + if err != nil && s.dual.getProto(req.URL.Scheme, host) == "h2" { + res, err = s.dual.doRoundTripH2(req) + } + } + + if err != nil { + return nil, err + } + if derr := decompressBody(res); derr != nil { + // If decompression setup fails, still return the raw body. + res.Body = io.NopCloser(bufio.NewReader(res.Body)) + } + return res, nil +} + +func shouldRetryH2AsH1(err error) bool { + if err == nil { + return false + } + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "stream error") || + strings.Contains(msg, "internal_error") || + strings.Contains(msg, "received from peer") +} + +func (dt *dualProtoTransport) doRoundTripH1(req *http.Request) (*http.Response, error) { + if dt.roundTripH1 != nil { + return dt.roundTripH1(req) + } + return dt.h1.RoundTrip(req) +} + +func (dt *dualProtoTransport) doRoundTripH2(req *http.Request) (*http.Response, error) { + if dt.roundTripH2 != nil { + return dt.roundTripH2(req) + } + return dt.h2.RoundTrip(req) +} + +// decompressBody wraps the response body reader to handle Content-Encoding. +func decompressBody(res *http.Response) error { + ce := strings.ToLower(strings.TrimSpace(res.Header.Get("Content-Encoding"))) + if ce == "" || ce == "identity" { + return nil + } + + var reader io.ReadCloser + switch ce { + case "gzip": + gr, err := gzip.NewReader(res.Body) + if err != nil { + return err + } + reader = gr + case "br": + reader = io.NopCloser(brotli.NewReader(res.Body)) + case "deflate": + reader = flate.NewReader(res.Body) + default: + return nil + } + + original := res.Body + res.Body = &wrappedBody{reader: reader, closer: original} + res.Header.Del("Content-Encoding") + res.Header.Del("Content-Length") + return nil +} + +// wrappedBody reads from the decompressor but closes the underlying conn. +type wrappedBody struct { + reader io.ReadCloser + closer io.Closer +} + +func (w *wrappedBody) Read(p []byte) (int, error) { return w.reader.Read(p) } +func (w *wrappedBody) Close() error { + w.reader.Close() + return w.closer.Close() +} diff --git a/internal/collector/model/alert.go b/internal/collector/model/alert.go index 73fd02f..d8ba158 100644 --- a/internal/collector/model/alert.go +++ b/internal/collector/model/alert.go @@ -50,26 +50,69 @@ type DuplicateAudit struct { } type SourceHealthEntry struct { - SourceID string `json:"source_id"` - AuthorityName string `json:"authority_name"` - Type string `json:"type"` - Status string `json:"status"` - FetchedCount int `json:"fetched_count"` - FeedURL string `json:"feed_url"` - Error string `json:"error,omitempty"` - StartedAt string `json:"started_at"` - FinishedAt string `json:"finished_at"` - ActiveCount int `json:"active_count,omitempty"` - FilteredCount int `json:"filtered_count,omitempty"` + SourceID string `json:"source_id"` + AuthorityName string `json:"authority_name"` + Type string `json:"type"` + Status string `json:"status"` + FetchedCount int `json:"fetched_count"` + FeedURL string `json:"feed_url"` + Error string `json:"error,omitempty"` + ErrorClass string `json:"error_class,omitempty"` + NeedsReplacement bool `json:"needs_replacement,omitempty"` + DiscoveryAction string `json:"discovery_action,omitempty"` + StartedAt string `json:"started_at"` + FinishedAt string `json:"finished_at"` + ActiveCount int `json:"active_count,omitempty"` + FilteredCount int `json:"filtered_count,omitempty"` +} + +type SourceReplacementCandidate struct { + SourceID string `json:"source_id"` + AuthorityName string `json:"authority_name"` + Type string `json:"type"` + FeedURL string `json:"feed_url"` + BaseURL string `json:"base_url,omitempty"` + Country string `json:"country,omitempty"` + CountryCode string `json:"country_code,omitempty"` + Region string `json:"region,omitempty"` + AuthorityType string `json:"authority_type,omitempty"` + Category string `json:"category,omitempty"` + Error string `json:"error,omitempty"` + ErrorClass string `json:"error_class,omitempty"` + DiscoveryAction string `json:"discovery_action,omitempty"` + LastAttemptAt string `json:"last_attempt_at,omitempty"` } type SourceHealthDocument struct { - GeneratedAt string `json:"generated_at"` - CriticalSourcePrefixes []string `json:"critical_source_prefixes"` - FailOnCriticalSourceGap bool `json:"fail_on_critical_source_gap"` - TotalSources int `json:"total_sources"` - SourcesOK int `json:"sources_ok"` - SourcesError int `json:"sources_error"` - DuplicateAudit DuplicateAudit `json:"duplicate_audit"` - Sources []SourceHealthEntry `json:"sources"` + GeneratedAt string `json:"generated_at"` + CriticalSourcePrefixes []string `json:"critical_source_prefixes"` + FailOnCriticalSourceGap bool `json:"fail_on_critical_source_gap"` + TotalSources int `json:"total_sources"` + SourcesOK int `json:"sources_ok"` + SourcesError int `json:"sources_error"` + DuplicateAudit DuplicateAudit `json:"duplicate_audit"` + ReplacementQueue []SourceReplacementCandidate `json:"replacement_queue"` + Sources []SourceHealthEntry `json:"sources"` +} + +type SourceReplacementDocument struct { + GeneratedAt string `json:"generated_at"` + Sources []SourceReplacementCandidate `json:"sources"` +} + +type SourceCandidate struct { + URL string `json:"url"` + AuthorityName string `json:"authority_name,omitempty"` + AuthorityType string `json:"authority_type,omitempty"` + Category string `json:"category,omitempty"` + Country string `json:"country,omitempty"` + CountryCode string `json:"country_code,omitempty"` + Region string `json:"region,omitempty"` + BaseURL string `json:"base_url,omitempty"` + Notes string `json:"notes,omitempty"` +} + +type SourceCandidateDocument struct { + GeneratedAt string `json:"generated_at,omitempty"` + Sources []SourceCandidate `json:"sources"` } diff --git a/internal/collector/model/source.go b/internal/collector/model/source.go index 6e35c80..ee8e079 100644 --- a/internal/collector/model/source.go +++ b/internal/collector/model/source.go @@ -3,8 +3,11 @@ package model +// Supported source types: rss, html-list, kev-json, interpol-red-json, +// interpol-yellow-json, fbi-wanted-json, travelwarning-json, travelwarning-atom. type RegistrySource struct { Type string `json:"type"` + FetchMode string `json:"fetch_mode,omitempty"` // "stealth" (default) or "browser" FollowRedirects bool `json:"followRedirects"` FeedURL string `json:"feed_url"` FeedURLs []string `json:"feed_urls,omitempty"` @@ -13,8 +16,14 @@ type RegistrySource struct { Lat float64 `json:"lat"` Lng float64 `json:"lng"` MaxItems int `json:"max_items"` + Accumulate bool `json:"accumulate,omitempty"` IncludeKeywords []string `json:"include_keywords,omitempty"` ExcludeKeywords []string `json:"exclude_keywords,omitempty"` + SourceQuality float64 `json:"source_quality,omitempty"` + PromotionStatus string `json:"promotion_status,omitempty"` + RejectionReason string `json:"rejection_reason,omitempty"` + IsMirror bool `json:"is_mirror,omitempty"` + PreferredRank int `json:"preferred_source_rank,omitempty"` Reporting ReportingMetadata `json:"reporting"` Source SourceMetadata `json:"source"` } @@ -27,11 +36,20 @@ type ReportingMetadata struct { } type SourceMetadata struct { - SourceID string `json:"source_id"` - AuthorityName string `json:"authority_name"` - Country string `json:"country"` - CountryCode string `json:"country_code"` - Region string `json:"region"` - AuthorityType string `json:"authority_type"` - BaseURL string `json:"base_url"` + SourceID string `json:"source_id"` + AuthorityName string `json:"authority_name"` + Country string `json:"country"` + CountryCode string `json:"country_code"` + Region string `json:"region"` + AuthorityType string `json:"authority_type"` + BaseURL string `json:"base_url"` + Scope string `json:"scope,omitempty"` + Level string `json:"level,omitempty"` + ParentAgencyID string `json:"parent_agency_id,omitempty"` + JurisdictionName string `json:"jurisdiction_name,omitempty"` + MissionTags []string `json:"mission_tags,omitempty"` + OperationalRelevance float64 `json:"operational_relevance,omitempty"` + IsCurated bool `json:"is_curated,omitempty"` + IsHighValue bool `json:"is_high_value,omitempty"` + LanguageCode string `json:"language_code,omitempty"` } diff --git a/internal/collector/normalize/geocode.go b/internal/collector/normalize/geocode.go new file mode 100644 index 0000000..e9f25fc --- /dev/null +++ b/internal/collector/normalize/geocode.go @@ -0,0 +1,409 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package normalize + +import "strings" + +// countryGeo holds a country centroid for map placement. +type countryGeo struct { + Name string + Code string + Lat float64 + Lng float64 +} + +// geoCountries is the lookup table for geocoding country mentions in +// alert titles/summaries. Centroids are approximate and intentionally +// simple — only enough precision for a world map pin. +var geoCountries = []countryGeo{ + {"Afghanistan", "AF", 33.93, 67.71}, + {"Albania", "AL", 41.15, 20.17}, + {"Algeria", "DZ", 28.03, 1.66}, + {"Angola", "AO", -11.20, 17.87}, + {"Argentina", "AR", -38.42, -63.62}, + {"Armenia", "AM", 40.07, 45.04}, + {"Australia", "AU", -25.27, 133.78}, + {"Austria", "AT", 47.52, 14.55}, + {"Azerbaijan", "AZ", 40.14, 47.58}, + {"Bahrain", "BH", 26.07, 50.55}, + {"Bangladesh", "BD", 23.68, 90.36}, + {"Belarus", "BY", 53.71, 27.95}, + {"Belgium", "BE", 50.50, 4.47}, + {"Benin", "BJ", 9.31, 2.32}, + {"Bolivia", "BO", -16.29, -63.59}, + {"Bosnia", "BA", 43.92, 17.68}, + {"Botswana", "BW", -22.33, 24.68}, + {"Brazil", "BR", -14.24, -51.93}, + {"Bulgaria", "BG", 42.73, 25.49}, + {"Burkina Faso", "BF", 12.24, -1.56}, + {"Burundi", "BI", -3.37, 29.92}, + {"Cambodia", "KH", 12.57, 104.99}, + {"Cameroon", "CM", 7.37, 12.35}, + {"Canada", "CA", 56.13, -106.35}, + {"Central African Republic", "CF", 6.61, 20.94}, + {"Chad", "TD", 15.45, 18.73}, + {"Chile", "CL", -35.68, -71.54}, + {"China", "CN", 35.86, 104.20}, + {"Colombia", "CO", 4.57, -74.30}, + {"Congo", "CD", -4.04, 21.76}, + {"Costa Rica", "CR", 9.75, -83.75}, + {"Croatia", "HR", 45.10, 15.20}, + {"Cuba", "CU", 21.52, -77.78}, + {"Cyprus", "CY", 35.13, 33.43}, + {"Czech Republic", "CZ", 49.82, 15.47}, + {"Denmark", "DK", 56.26, 9.50}, + {"Dominican Republic", "DO", 18.74, -70.16}, + {"Ecuador", "EC", -1.83, -78.18}, + {"Egypt", "EG", 26.82, 30.80}, + {"El Salvador", "SV", 13.79, -88.90}, + {"Eritrea", "ER", 15.18, 39.78}, + {"Estonia", "EE", 58.60, 25.01}, + {"Ethiopia", "ET", 9.14, 40.49}, + {"Finland", "FI", 61.92, 25.75}, + {"France", "FR", 46.23, 2.21}, + {"Gabon", "GA", -0.80, 11.61}, + {"Gambia", "GM", 13.44, -15.31}, + {"Gaza", "PS", 31.35, 34.31}, + {"Georgia", "GE", 42.32, 43.36}, + {"Germany", "DE", 51.17, 10.45}, + {"Ghana", "GH", 7.95, -1.02}, + {"Greece", "GR", 39.07, 21.82}, + {"Guatemala", "GT", 15.78, -90.23}, + {"Guinea", "GN", 9.95, -9.70}, + {"Haiti", "HT", 18.97, -72.29}, + {"Honduras", "HN", 15.20, -86.24}, + {"Hungary", "HU", 47.16, 19.50}, + {"India", "IN", 20.59, 78.96}, + {"Indonesia", "ID", -0.79, 113.92}, + {"Iran", "IR", 32.43, 53.69}, + {"Iraq", "IQ", 33.22, 43.68}, + {"Ireland", "IE", 53.14, -7.69}, + {"Israel", "IL", 31.05, 34.85}, + {"Italy", "IT", 41.87, 12.57}, + {"Ivory Coast", "CI", 7.54, -5.55}, + {"Jamaica", "JM", 18.11, -77.30}, + {"Japan", "JP", 36.20, 138.25}, + {"Jordan", "JO", 30.59, 36.24}, + {"Kazakhstan", "KZ", 48.02, 66.92}, + {"Kenya", "KE", -0.02, 37.91}, + {"Kosovo", "XK", 42.60, 20.90}, + {"Kuwait", "KW", 29.31, 47.48}, + {"Kyrgyzstan", "KG", 41.20, 74.77}, + {"Laos", "LA", 19.86, 102.50}, + {"Latvia", "LV", 56.88, 24.60}, + {"Lebanon", "LB", 33.85, 35.86}, + {"Libya", "LY", 26.34, 17.23}, + {"Lithuania", "LT", 55.17, 23.88}, + {"Madagascar", "MG", -18.77, 46.87}, + {"Malawi", "MW", -13.25, 34.30}, + {"Malaysia", "MY", 4.21, 101.98}, + {"Mali", "ML", 17.57, -4.00}, + {"Malta", "MT", 35.90, 14.51}, + {"Mauritania", "MR", 21.01, -10.94}, + {"Mexico", "MX", 23.63, -102.55}, + {"Moldova", "MD", 47.41, 28.37}, + {"Mongolia", "MN", 46.86, 103.85}, + {"Montenegro", "ME", 42.71, 19.37}, + {"Morocco", "MA", 31.79, -7.09}, + {"Mozambique", "MZ", -18.67, 35.53}, + {"Myanmar", "MM", 21.91, 95.96}, + {"Namibia", "NA", -22.96, 18.49}, + {"Nepal", "NP", 28.39, 84.12}, + {"Netherlands", "NL", 52.13, 5.29}, + {"New Zealand", "NZ", -40.90, 174.89}, + {"Nicaragua", "NI", 12.87, -85.21}, + {"Niger", "NE", 17.61, 8.08}, + {"Nigeria", "NG", 9.08, 8.68}, + {"North Korea", "KP", 40.34, 127.51}, + {"North Macedonia", "MK", 41.51, 21.75}, + {"Norway", "NO", 60.47, 8.47}, + {"Oman", "OM", 21.51, 55.92}, + {"Pakistan", "PK", 30.38, 69.35}, + {"Palestine", "PS", 31.95, 35.23}, + {"Panama", "PA", 8.54, -80.78}, + {"Papua New Guinea", "PG", -6.31, 143.96}, + {"Paraguay", "PY", -23.44, -58.44}, + {"Peru", "PE", -9.19, -75.02}, + {"Philippines", "PH", 12.88, 121.77}, + {"Poland", "PL", 51.92, 19.15}, + {"Portugal", "PT", 39.40, -8.22}, + {"Qatar", "QA", 25.35, 51.18}, + {"Romania", "RO", 45.94, 24.97}, + {"Russia", "RU", 61.52, 105.32}, + {"Rwanda", "RW", -1.94, 29.87}, + {"Saudi Arabia", "SA", 23.89, 45.08}, + {"Senegal", "SN", 14.50, -14.45}, + {"Serbia", "RS", 44.02, 21.01}, + {"Sierra Leone", "SL", 8.46, -11.78}, + {"Singapore", "SG", 1.35, 103.82}, + {"Slovakia", "SK", 48.67, 19.70}, + {"Slovenia", "SI", 46.15, 14.99}, + {"Somalia", "SO", 5.15, 46.20}, + {"South Africa", "ZA", -30.56, 22.94}, + {"South Korea", "KR", 35.91, 127.77}, + {"South Sudan", "SS", 6.88, 31.31}, + {"Spain", "ES", 40.46, -3.75}, + {"Sri Lanka", "LK", 7.87, 80.77}, + {"Sudan", "SD", 12.86, 30.22}, + {"Sweden", "SE", 60.13, 18.64}, + {"Switzerland", "CH", 46.82, 8.23}, + {"Syria", "SY", 34.80, 38.99}, + {"Taiwan", "TW", 23.70, 120.96}, + {"Tajikistan", "TJ", 38.86, 71.28}, + {"Tanzania", "TZ", -6.37, 34.89}, + {"Thailand", "TH", 15.87, 100.99}, + {"Togo", "TG", 8.62, 1.21}, + {"Tunisia", "TN", 33.89, 9.54}, + {"Turkey", "TR", 38.96, 35.24}, + {"Turkmenistan", "TM", 38.97, 59.56}, + {"Uganda", "UG", 1.37, 32.29}, + {"Ukraine", "UA", 48.38, 31.17}, + {"United Arab Emirates", "AE", 23.42, 53.85}, + {"United Kingdom", "GB", 55.38, -3.44}, + {"United States", "US", 37.09, -95.71}, + {"Uruguay", "UY", -32.52, -55.77}, + {"Uzbekistan", "UZ", 41.38, 64.59}, + {"Venezuela", "VE", 6.42, -66.59}, + {"Vietnam", "VN", 14.06, 108.28}, + {"Yemen", "YE", 15.55, 48.52}, + {"Zambia", "ZM", -13.13, 28.64}, + {"Zimbabwe", "ZW", -19.02, 29.15}, +} + +// geoAliases maps alternative names, adjectives, and region names to +// the canonical country name used in geoCountries. +var geoAliases = map[string]string{ + // Adjective forms + "afghan": "Afghanistan", + "algerian": "Algeria", + "angolan": "Angola", + "argentine": "Argentina", + "armenian": "Armenia", + "azerbaijani": "Azerbaijan", + "bangladeshi": "Bangladesh", + "belarusian": "Belarus", + "bolivian": "Bolivia", + "bosnian": "Bosnia", + "brazilian": "Brazil", + "burmese": "Myanmar", + "burundian": "Burundi", + "cambodian": "Cambodia", + "cameroonian": "Cameroon", + "chadian": "Chad", + "chinese": "China", + "colombian": "Colombia", + "congolese": "Congo", + "cuban": "Cuba", + "ecuadorian": "Ecuador", + "egyptian": "Egypt", + "eritrean": "Eritrea", + "ethiopian": "Ethiopia", + "gambian": "Gambia", + "georgian": "Georgia", + "ghanaian": "Ghana", + "guatemalan": "Guatemala", + "guinean": "Guinea", + "haitian": "Haiti", + "honduran": "Honduras", + "indonesian": "Indonesia", + "iranian": "Iran", + "iraqi": "Iraq", + "israeli": "Israel", + "ivorian": "Ivory Coast", + "jordanian": "Jordan", + "kazakh": "Kazakhstan", + "kenyan": "Kenya", + "kosovar": "Kosovo", + "kuwaiti": "Kuwait", + "kyrgyz": "Kyrgyzstan", + "lebanese": "Lebanon", + "libyan": "Libya", + "malagasy": "Madagascar", + "malawian": "Malawi", + "malaysian": "Malaysia", + "malian": "Mali", + "maltese": "Malta", + "mauritanian": "Mauritania", + "mexican": "Mexico", + "moldovan": "Moldova", + "mongolian": "Mongolia", + "moroccan": "Morocco", + "mozambican": "Mozambique", + "namibian": "Namibia", + "nepalese": "Nepal", + "nicaraguan": "Nicaragua", + "nigerien": "Niger", + "nigerian": "Nigeria", + "pakistani": "Pakistan", + "palestinian": "Palestine", + "panamanian": "Panama", + "paraguayan": "Paraguay", + "peruvian": "Peru", + "philippine": "Philippines", + "qatari": "Qatar", + "romanian": "Romania", + "russian": "Russia", + "rwandan": "Rwanda", + "salvadoran": "El Salvador", + "saudi": "Saudi Arabia", + "senegalese": "Senegal", + "serbian": "Serbia", + "somali": "Somalia", + "sri lankan": "Sri Lanka", + "sudanese": "Sudan", + "syrian": "Syria", + "tajik": "Tajikistan", + "tanzanian": "Tanzania", + "thai": "Thailand", + "tunisian": "Tunisia", + "turkish": "Turkey", + "turkmen": "Turkmenistan", + "ugandan": "Uganda", + "ukrainian": "Ukraine", + "uzbek": "Uzbekistan", + "venezuelan": "Venezuela", + "vietnamese": "Vietnam", + "yemeni": "Yemen", + "zambian": "Zambia", + "zimbabwean": "Zimbabwe", + + // Alternative / short names + "drc": "Congo", + "democratic republic of congo": "Congo", + "cote d'ivoire": "Ivory Coast", + "côte d'ivoire": "Ivory Coast", + "rok": "South Korea", + "dprk": "North Korea", + "uae": "United Arab Emirates", + "emirates": "United Arab Emirates", + "uk": "United Kingdom", + "britain": "United Kingdom", + "british": "United Kingdom", + + // Conflict regions / sub-national areas → parent country + "tigray": "Ethiopia", + "amhara": "Ethiopia", + "oromia": "Ethiopia", + "rakhine": "Myanmar", + "shan": "Myanmar", + "kachin": "Myanmar", + "darfur": "Sudan", + "kordofan": "Sudan", + "blue nile": "Sudan", + "donbas": "Ukraine", + "donbass": "Ukraine", + "donetsk": "Ukraine", + "luhansk": "Ukraine", + "crimea": "Ukraine", + "kherson": "Ukraine", + "zaporizhzhia": "Ukraine", + "idlib": "Syria", + "aleppo": "Syria", + "golan": "Syria", + "sinai": "Egypt", + "sahel": "Mali", + "cabo delgado": "Mozambique", + "kivu": "Congo", + "ituri": "Congo", + "kasai": "Congo", + "west bank": "Palestine", + "hebron": "Palestine", + "jenin": "Palestine", + "nablus": "Palestine", + "rafah": "Gaza", + "khan younis": "Gaza", + "balochistan": "Pakistan", + "waziristan": "Pakistan", + "kashmir": "India", + "nagorno-karabakh": "Azerbaijan", + "karabakh": "Azerbaijan", + "mindanao": "Philippines", + "marawi": "Philippines", + "helmand": "Afghanistan", + "kandahar": "Afghanistan", + "kabul": "Afghanistan", + "mogadishu": "Somalia", + "benghazi": "Libya", + "tripoli": "Libya", + "mosul": "Iraq", + "kirkuk": "Iraq", + "basra": "Iraq", + "aden": "Yemen", + "sanaa": "Yemen", + "marib": "Yemen", + "hodeida": "Yemen", + "hodeidah": "Yemen", + "taipei": "Taiwan", + "valletta": "Malta", + "kyiv": "Ukraine", + "kharkiv": "Ukraine", +} + +// geoIndex maps lowercased country names to their centroid. Built once at init. +var geoIndex map[string]*countryGeo + +func init() { + geoIndex = make(map[string]*countryGeo, len(geoCountries)*2) + for i := range geoCountries { + g := &geoCountries[i] + geoIndex[strings.ToLower(g.Name)] = g + } + // Wire aliases → canonical entries. + for alias, canonical := range geoAliases { + if g, ok := geoIndex[strings.ToLower(canonical)]; ok { + geoIndex[strings.ToLower(alias)] = g + } + } +} + +// geocodeCountryCode returns the capital city coordinates for a 2-letter +// country code, falling back to geographic centroid if no capital is known. +func geocodeCountryCode(code string) (lat, lng float64, name string, ok bool) { + code = strings.ToUpper(strings.TrimSpace(code)) + // Prefer capital city coords (fixes islands-in-water problem). + if capital, cok := capitalCoords[code]; cok { + for i := range geoCountries { + if geoCountries[i].Code == code { + return capital[0], capital[1], geoCountries[i].Name, true + } + } + return capital[0], capital[1], code, true + } + for i := range geoCountries { + if geoCountries[i].Code == code { + return geoCountries[i].Lat, geoCountries[i].Lng, geoCountries[i].Name, true + } + } + return 0, 0, "", false +} + +// geocodeText scans text for country/region mentions and returns the +// centroid of the best match. Prefers the rightmost mention in the text +// (headlines typically put the subject location last, e.g. "Israeli +// Strikes on Gaza" → Gaza). When two matches end at the same position, +// the longer match wins (e.g. "South Sudan" over "Sudan"). +func geocodeText(text string) (lat, lng float64, code string, ok bool) { + lower := strings.ToLower(text) + + bestPos := -1 // rightmost end-position of best match + bestLen := 0 // length of best match (tiebreaker) + var bestGeo *countryGeo + + for key, g := range geoIndex { + idx := strings.LastIndex(lower, key) + if idx < 0 { + continue + } + endPos := idx + len(key) + if endPos > bestPos || (endPos == bestPos && len(key) > bestLen) { + bestPos = endPos + bestLen = len(key) + bestGeo = g + } + } + if bestGeo != nil { + return bestGeo.Lat, bestGeo.Lng, bestGeo.Code, true + } + return 0, 0, "", false +} diff --git a/internal/collector/normalize/geocode_capitals.go b/internal/collector/normalize/geocode_capitals.go new file mode 100644 index 0000000..d51d51d --- /dev/null +++ b/internal/collector/normalize/geocode_capitals.go @@ -0,0 +1,163 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package normalize + +// capitalCoords maps ISO 3166-1 alpha-2 codes to capital city coordinates. +// Used instead of geographic centroids so island nations (Malta, Cyprus, +// Singapore, etc.) place pins on land rather than in the sea. +var capitalCoords = map[string][2]float64{ + "AF": {34.53, 69.17}, // Kabul + "AL": {41.33, 19.82}, // Tirana + "DZ": {36.75, 3.04}, // Algiers + "AO": {-8.84, 13.23}, // Luanda + "AR": {-34.60, -58.38}, // Buenos Aires + "AM": {40.18, 44.51}, // Yerevan + "AU": {-35.28, 149.13}, // Canberra + "AT": {48.21, 16.37}, // Vienna + "AZ": {40.41, 49.87}, // Baku + "BH": {26.23, 50.59}, // Manama + "BD": {23.81, 90.41}, // Dhaka + "BY": {53.90, 27.57}, // Minsk + "BE": {50.85, 4.35}, // Brussels + "BJ": {6.50, 2.60}, // Porto-Novo + "BO": {-16.50, -68.15}, // La Paz + "BA": {43.86, 18.41}, // Sarajevo + "BW": {-24.65, 25.91}, // Gaborone + "BR": {-15.79, -47.88}, // Brasília + "BG": {42.70, 23.32}, // Sofia + "BF": {12.37, -1.52}, // Ouagadougou + "BI": {-3.38, 29.36}, // Gitega + "KH": {11.56, 104.92}, // Phnom Penh + "CM": {3.87, 11.52}, // Yaoundé + "CA": {45.42, -75.70}, // Ottawa + "CF": {4.39, 18.56}, // Bangui + "TD": {12.13, 15.05}, // N'Djamena + "CL": {-33.45, -70.67}, // Santiago + "CN": {39.90, 116.40}, // Beijing + "CO": {4.71, -74.07}, // Bogotá + "CD": {-4.32, 15.31}, // Kinshasa + "CR": {9.93, -84.09}, // San José + "HR": {45.81, 15.98}, // Zagreb + "CU": {23.11, -82.37}, // Havana + "CY": {35.17, 33.36}, // Nicosia + "CZ": {50.08, 14.43}, // Prague + "DK": {55.68, 12.57}, // Copenhagen + "DO": {18.47, -69.90}, // Santo Domingo + "EC": {-0.18, -78.47}, // Quito + "EG": {30.04, 31.24}, // Cairo + "SV": {13.69, -89.19}, // San Salvador + "ER": {15.34, 38.93}, // Asmara + "EE": {59.44, 24.75}, // Tallinn + "ET": {9.02, 38.75}, // Addis Ababa + "FI": {60.17, 24.94}, // Helsinki + "FR": {48.86, 2.35}, // Paris + "GA": {0.39, 9.45}, // Libreville + "GM": {13.45, -16.58}, // Banjul + "PS": {31.90, 35.20}, // Ramallah + "GE": {41.72, 44.79}, // Tbilisi + "DE": {52.52, 13.41}, // Berlin + "GH": {5.56, -0.19}, // Accra + "GR": {37.98, 23.73}, // Athens + "GT": {14.63, -90.51}, // Guatemala City + "GN": {9.64, -13.58}, // Conakry + "HT": {18.54, -72.34}, // Port-au-Prince + "HN": {14.07, -87.19}, // Tegucigalpa + "HU": {47.50, 19.04}, // Budapest + "IN": {28.61, 77.21}, // New Delhi + "ID": {-6.21, 106.85}, // Jakarta + "IR": {35.69, 51.39}, // Tehran + "IQ": {33.34, 44.37}, // Baghdad + "IE": {53.35, -6.26}, // Dublin + "IL": {31.77, 35.22}, // Jerusalem + "IT": {41.90, 12.50}, // Rome + "CI": {6.83, -5.29}, // Yamoussoukro + "JM": {18.00, -76.79}, // Kingston + "JP": {35.68, 139.69}, // Tokyo + "JO": {31.95, 35.93}, // Amman + "KZ": {51.17, 71.43}, // Astana + "KE": {-1.29, 36.82}, // Nairobi + "XK": {42.66, 21.17}, // Pristina + "KW": {29.37, 47.98}, // Kuwait City + "KG": {42.87, 74.59}, // Bishkek + "LA": {17.97, 102.63}, // Vientiane + "LV": {56.95, 24.11}, // Riga + "LB": {33.89, 35.50}, // Beirut + "LY": {32.90, 13.18}, // Tripoli + "LT": {54.69, 25.28}, // Vilnius + "LU": {49.61, 6.13}, // Luxembourg City + "MG": {-18.91, 47.54}, // Antananarivo + "MW": {-13.97, 33.79}, // Lilongwe + "MY": {3.14, 101.69}, // Kuala Lumpur + "ML": {12.64, -8.00}, // Bamako + "MT": {35.90, 14.51}, // Valletta + "MR": {18.09, -15.98}, // Nouakchott + "MX": {19.43, -99.13}, // Mexico City + "MD": {47.01, 28.86}, // Chișinău + "MN": {47.91, 106.91}, // Ulaanbaatar + "ME": {42.44, 19.26}, // Podgorica + "MA": {34.02, -6.84}, // Rabat + "MZ": {-25.97, 32.57}, // Maputo + "MM": {19.76, 96.07}, // Naypyidaw + "NA": {-22.56, 17.08}, // Windhoek + "NP": {27.72, 85.32}, // Kathmandu + "NL": {52.37, 4.89}, // Amsterdam + "NZ": {-41.29, 174.78}, // Wellington + "NI": {12.11, -86.27}, // Managua + "NE": {13.51, 2.11}, // Niamey + "NG": {9.06, 7.49}, // Abuja + "KP": {39.02, 125.75}, // Pyongyang + "MK": {42.00, 21.43}, // Skopje + "NO": {59.91, 10.75}, // Oslo + "OM": {23.59, 58.54}, // Muscat + "PK": {33.69, 73.04}, // Islamabad + "PA": {8.98, -79.52}, // Panama City + "PG": {-6.31, 147.15}, // Port Moresby + "PY": {-25.26, -57.58}, // Asunción + "PE": {-12.05, -77.04}, // Lima + "PH": {14.60, 120.98}, // Manila + "PL": {52.23, 21.01}, // Warsaw + "PT": {38.72, -9.14}, // Lisbon + "QA": {25.29, 51.53}, // Doha + "RO": {44.43, 26.10}, // Bucharest + "RU": {55.76, 37.62}, // Moscow + "RW": {-1.94, 30.06}, // Kigali + "SA": {24.69, 46.72}, // Riyadh + "SN": {14.72, -17.47}, // Dakar + "RS": {44.79, 20.47}, // Belgrade + "SL": {8.48, -13.23}, // Freetown + "SG": {1.29, 103.85}, // Singapore + "SK": {48.15, 17.11}, // Bratislava + "SI": {46.06, 14.51}, // Ljubljana + "SO": {2.05, 45.32}, // Mogadishu + "ZA": {-25.75, 28.19}, // Pretoria + "KR": {37.57, 126.98}, // Seoul + "SS": {4.85, 31.60}, // Juba + "ES": {40.42, -3.70}, // Madrid + "LK": {6.93, 79.85}, // Colombo + "SD": {15.60, 32.53}, // Khartoum + "SE": {59.33, 18.07}, // Stockholm + "CH": {46.95, 7.45}, // Bern + "SY": {33.51, 36.28}, // Damascus + "TW": {25.03, 121.57}, // Taipei + "TJ": {38.56, 68.77}, // Dushanbe + "TZ": {-6.16, 35.75}, // Dodoma + "TH": {13.76, 100.50}, // Bangkok + "TG": {6.14, 1.21}, // Lomé + "TN": {36.81, 10.17}, // Tunis + "TR": {39.93, 32.87}, // Ankara + "TM": {37.95, 58.38}, // Ashgabat + "UG": {0.35, 32.58}, // Kampala + "UA": {50.45, 30.52}, // Kyiv + "AE": {24.45, 54.65}, // Abu Dhabi + "GB": {51.51, -0.13}, // London + "US": {38.90, -77.04}, // Washington D.C. + "UY": {-34.88, -56.17}, // Montevideo + "UZ": {41.30, 69.28}, // Tashkent + "VE": {10.49, -66.88}, // Caracas + "VN": {21.03, 105.85}, // Hanoi + "YE": {15.37, 44.21}, // Sana'a + "ZM": {-15.39, 28.32}, // Lusaka + "ZW": {-17.83, 31.05}, // Harare + "IS": {64.15, -21.94}, // Reykjavik +} diff --git a/internal/collector/normalize/geocode_nominatim.go b/internal/collector/normalize/geocode_nominatim.go new file mode 100644 index 0000000..4cdbfb6 --- /dev/null +++ b/internal/collector/normalize/geocode_nominatim.go @@ -0,0 +1,178 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package normalize + +import ( + "context" + "encoding/json" + "io" + "net/http" + "net/url" + "strconv" + "strings" + "sync" + "time" +) + +// NominatimResult is a geocoding result from OpenStreetMap Nominatim. +type NominatimResult struct { + Lat float64 + Lng float64 + CountryCode string + DisplayName string +} + +// NominatimClient queries the OSM Nominatim API for geocoding. +// It enforces the 1 request/second rate limit for the public instance. +type NominatimClient struct { + baseURL string + userAgent string + client *http.Client + + mu sync.Mutex + lastCall time.Time + + // Simple in-memory cache to avoid re-querying the same place name. + cacheMu sync.RWMutex + cache map[string]*nominatimCacheEntry +} + +type nominatimCacheEntry struct { + result NominatimResult + ok bool +} + +type nominatimAPIResponse struct { + Lat string `json:"lat"` + Lon string `json:"lon"` + DisplayName string `json:"display_name"` + Address struct { + CountryCode string `json:"country_code"` + } `json:"address"` +} + +// NewNominatimClient creates a Nominatim geocoding client. +// baseURL defaults to the public OSM Nominatim if empty. +func NewNominatimClient(baseURL string, userAgent string) *NominatimClient { + if baseURL == "" { + baseURL = "https://nominatim.openstreetmap.org" + } + if userAgent == "" { + userAgent = "EUOSINTBot/1.0 (https://www.scalytics.io; ops@scalytics.io)" + } + return &NominatimClient{ + baseURL: strings.TrimRight(baseURL, "/"), + userAgent: userAgent, + client: &http.Client{Timeout: 10 * time.Second}, + cache: make(map[string]*nominatimCacheEntry, 256), + } +} + +// Geocode looks up a place name and returns coordinates. +func (n *NominatimClient) Geocode(ctx context.Context, query string, countryCode string) (NominatimResult, bool) { + query = strings.TrimSpace(query) + if query == "" { + return NominatimResult{}, false + } + + cacheKey := strings.ToLower(query) + "|" + strings.ToUpper(countryCode) + n.cacheMu.RLock() + if entry, ok := n.cache[cacheKey]; ok { + n.cacheMu.RUnlock() + return entry.result, entry.ok + } + n.cacheMu.RUnlock() + + // Rate limit: 1 req/sec for public Nominatim. + n.mu.Lock() + since := time.Since(n.lastCall) + if since < time.Second { + time.Sleep(time.Second - since) + } + n.lastCall = time.Now() + n.mu.Unlock() + + params := url.Values{ + "q": {query}, + "format": {"json"}, + "limit": {"1"}, + "addressdetails": {"1"}, + "accept-language": {"en"}, + } + if cc := strings.TrimSpace(countryCode); cc != "" { + params.Set("countrycodes", strings.ToLower(cc)) + } + + reqURL := n.baseURL + "/search?" + params.Encode() + req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil) + if err != nil { + n.cacheNegative(cacheKey) + return NominatimResult{}, false + } + req.Header.Set("User-Agent", n.userAgent) + + resp, err := n.client.Do(req) + if err != nil { + n.cacheNegative(cacheKey) + return NominatimResult{}, false + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + n.cacheNegative(cacheKey) + return NominatimResult{}, false + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) + if err != nil { + n.cacheNegative(cacheKey) + return NominatimResult{}, false + } + + var results []nominatimAPIResponse + if err := json.Unmarshal(body, &results); err != nil || len(results) == 0 { + n.cacheNegative(cacheKey) + return NominatimResult{}, false + } + + r := results[0] + lat, errLat := strconv.ParseFloat(r.Lat, 64) + lng, errLng := strconv.ParseFloat(r.Lon, 64) + if errLat != nil || errLng != nil { + n.cacheNegative(cacheKey) + return NominatimResult{}, false + } + + result := NominatimResult{ + Lat: lat, + Lng: lng, + CountryCode: strings.ToUpper(r.Address.CountryCode), + DisplayName: r.DisplayName, + } + + n.cacheMu.Lock() + n.cache[cacheKey] = &nominatimCacheEntry{result: result, ok: true} + n.cacheMu.Unlock() + + return result, true +} + +func (n *NominatimClient) cacheNegative(key string) { + n.cacheMu.Lock() + n.cache[key] = &nominatimCacheEntry{ok: false} + n.cacheMu.Unlock() +} + +// CacheStats returns the number of cached entries (for diagnostics). +func (n *NominatimClient) CacheStats() (total int, hits int) { + n.cacheMu.RLock() + defer n.cacheMu.RUnlock() + total = len(n.cache) + for _, e := range n.cache { + if e.ok { + hits++ + } + } + return total, hits +} diff --git a/internal/collector/normalize/geocode_test.go b/internal/collector/normalize/geocode_test.go new file mode 100644 index 0000000..7497db4 --- /dev/null +++ b/internal/collector/normalize/geocode_test.go @@ -0,0 +1,35 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package normalize + +import "testing" + +func TestGeocodeText(t *testing.T) { + tests := []struct { + text string + wantCode string + wantOK bool + }{ + {"Crisis in Myanmar's Rakhine State", "MM", true}, + {"Ukraine Conflict Monitor Update", "UA", true}, + {"Ethiopia's Tigray: A Fragile Peace", "ET", true}, + {"South Sudan Violence Escalates", "SS", true}, + {"Sudanese Military Conflict Deepens", "SD", true}, + {"Israeli Strikes on Gaza Intensify", "PS", true}, // Gaza → PS + {"DRC Eastern Congo Humanitarian Emergency", "CD", true}, + {"Sahel Region Security Briefing", "ML", true}, + {"Weekly Global Summary Report", "", false}, + {"New Policy Framework Released", "", false}, + } + for _, tt := range tests { + _, _, code, ok := geocodeText(tt.text) + if ok != tt.wantOK { + t.Errorf("geocodeText(%q): ok=%v, want %v", tt.text, ok, tt.wantOK) + continue + } + if code != tt.wantCode { + t.Errorf("geocodeText(%q): code=%q, want %q", tt.text, code, tt.wantCode) + } + } +} diff --git a/internal/collector/normalize/geocoder.go b/internal/collector/normalize/geocoder.go new file mode 100644 index 0000000..360f0aa --- /dev/null +++ b/internal/collector/normalize/geocoder.go @@ -0,0 +1,282 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package normalize + +import ( + "context" + "regexp" + "strings" + "unicode" +) + +// CityLookup abstracts the city database so normalize doesn't import sourcedb. +type CityLookup interface { + LookupCity(ctx context.Context, name string, countryCode string) (CityLookupResult, bool) +} + +// CityLookupResult mirrors sourcedb.CityResult without creating a dependency. +type CityLookupResult struct { + Name string + CountryCode string + Lat float64 + Lng float64 + Population int +} + +// GeoResult is the output of the full geocoding pipeline. +type GeoResult struct { + Lat float64 + Lng float64 + CountryCode string + CityName string + Source string // "city-db", "nominatim", "country-text", "capital", "registry" +} + +// Geocoder chains the three geocoding tiers: +// 1. City gazetteer (GeoNames in SQLite) — fast, local, city-level precision +// 2. Nominatim (OSM) — external fallback for place names not in the DB +// 3. Country-level (text scanning + capital coords) — always available +type Geocoder struct { + cities CityLookup // may be nil + nominatim *NominatimClient // may be nil +} + +// NewGeocoder creates a geocoder. Both deps are optional — pass nil to skip. +func NewGeocoder(cities CityLookup, nominatim *NominatimClient) *Geocoder { + return &Geocoder{cities: cities, nominatim: nominatim} +} + +// wordBoundaryRe matches sequences of word characters for tokenizing text +// into potential city name candidates. +var wordBoundaryRe = regexp.MustCompile(`[\p{L}\p{N}][\p{L}\p{N}\s'-]{2,30}`) + +// Resolve geocodes a text string (typically alert title + summary) to +// the most precise coordinates available. countryHint is the source's +// country code (e.g. "DE") and helps disambiguate city names. +func (g *Geocoder) Resolve(ctx context.Context, text string, countryHint string) GeoResult { + countryHint = strings.ToUpper(strings.TrimSpace(countryHint)) + + // ── Tier 1: City gazetteer ────────────────────────────────── + if g.cities != nil { + if result, ok := g.matchCityInText(ctx, text, countryHint); ok { + return result + } + } + + // ── Tier 2: Nominatim for extracted place-like tokens ─────── + if g.nominatim != nil { + if result, ok := g.nominatimFromText(ctx, text, countryHint); ok { + return result + } + } + + // ── Tier 3: Country-level from text ───────────────────────── + if lat, lng, code, ok := geocodeText(text); ok { + // Use capital coords instead of centroid. + if capital, cok := capitalCoords[code]; cok { + return GeoResult{Lat: capital[0], Lng: capital[1], CountryCode: code, Source: "capital"} + } + return GeoResult{Lat: lat, Lng: lng, CountryCode: code, Source: "country-text"} + } + + // ── Fallback: use country hint's capital ──────────────────── + if countryHint != "" && countryHint != "INT" { + if capital, ok := capitalCoords[countryHint]; ok { + return GeoResult{Lat: capital[0], Lng: capital[1], CountryCode: countryHint, Source: "capital"} + } + } + + return GeoResult{} // no match +} + +// matchCityInText extracts candidate n-grams from text and looks them up +// in the city database. Returns the match with the highest population that +// appears rightmost in the text (consistent with geocodeText strategy). +func (g *Geocoder) matchCityInText(ctx context.Context, text string, countryHint string) (GeoResult, bool) { + candidates := extractCandidateNames(text) + if len(candidates) == 0 { + return GeoResult{}, false + } + + type hit struct { + pos int + pop int + name string + lat float64 + lng float64 + code string + } + var best *hit + + for _, c := range candidates { + result, ok := g.cities.LookupCity(ctx, c.name, countryHint) + if !ok { + continue + } + // Skip tiny places (pop < 5000) unless they match country hint. + if result.Population < 5000 && result.CountryCode != countryHint { + continue + } + h := hit{ + pos: c.endPos, + pop: result.Population, + name: result.Name, + lat: result.Lat, + lng: result.Lng, + code: result.CountryCode, + } + if best == nil || + h.pos > best.pos || + (h.pos == best.pos && h.pop > best.pop) { + best = &h + } + } + + if best != nil { + return GeoResult{ + Lat: best.lat, + Lng: best.lng, + CountryCode: best.code, + CityName: best.name, + Source: "city-db", + }, true + } + return GeoResult{}, false +} + +type nameCandidate struct { + name string + endPos int +} + +// extractCandidateNames pulls potential place names from text. It extracts +// capitalized word sequences (1-4 words) which is how city names typically +// appear in headlines. E.g. "Explosion in San Francisco kills 3" → +// ["Explosion", "San Francisco", "San Francisco kills"]. +func extractCandidateNames(text string) []nameCandidate { + words := tokenizeWords(text) + if len(words) == 0 { + return nil + } + + var candidates []nameCandidate + seen := map[string]struct{}{} + + // Single words and multi-word sequences (up to 4 words). + for i := range words { + for n := 1; n <= 4 && i+n <= len(words); n++ { + var parts []string + allEmpty := true + for j := i; j < i+n; j++ { + w := words[j] + if w.text == "" { + break + } + allEmpty = false + parts = append(parts, w.text) + } + if allEmpty || len(parts) != n { + break + } + name := strings.Join(parts, " ") + lower := strings.ToLower(name) + + // Skip very short single words and common noise. + if n == 1 && len(name) < 3 { + continue + } + if isGeoStopword(lower) { + continue + } + + if _, ok := seen[lower]; ok { + continue + } + seen[lower] = struct{}{} + candidates = append(candidates, nameCandidate{ + name: name, + endPos: words[i+n-1].endPos, + }) + } + } + return candidates +} + +type wordToken struct { + text string + endPos int +} + +func tokenizeWords(text string) []wordToken { + var tokens []wordToken + inWord := false + start := 0 + for i, r := range text { + if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '\'' || r == '-' || r == '.' { + if !inWord { + start = i + inWord = true + } + } else { + if inWord { + tokens = append(tokens, wordToken{text: text[start:i], endPos: i}) + inWord = false + } + } + } + if inWord { + tokens = append(tokens, wordToken{text: text[start:], endPos: len(text)}) + } + return tokens +} + +var geoStopwords = map[string]bool{ + // Common English words that are also city/place names but are almost + // never geographic references in OSINT headlines. + "the": true, "and": true, "for": true, "new": true, "has": true, + "was": true, "are": true, "with": true, "from": true, "that": true, + "this": true, "not": true, "but": true, "all": true, "its": true, + "will": true, "can": true, "more": true, "update": true, "alert": true, + "warning": true, "report": true, "press": true, "release": true, + "security": true, "advisory": true, "notice": true, "bulletin": true, + "critical": true, "high": true, "medium": true, "low": true, "info": true, +} + +func isGeoStopword(lower string) bool { + return geoStopwords[lower] +} + +// nominatimFromText tries Nominatim for capitalized multi-word tokens that +// look like place names. Only attempts a few lookups to stay within rate limits. +func (g *Geocoder) nominatimFromText(ctx context.Context, text string, countryHint string) (GeoResult, bool) { + candidates := extractCandidateNames(text) + if len(candidates) == 0 { + return GeoResult{}, false + } + + // Only try the last few candidates (rightmost = most likely geographic). + maxAttempts := 3 + start := len(candidates) - maxAttempts + if start < 0 { + start = 0 + } + + for i := len(candidates) - 1; i >= start; i-- { + c := candidates[i] + if len(c.name) < 4 { + continue + } + result, ok := g.nominatim.Geocode(ctx, c.name, countryHint) + if ok { + return GeoResult{ + Lat: result.Lat, + Lng: result.Lng, + CountryCode: result.CountryCode, + CityName: c.name, + Source: "nominatim", + }, true + } + } + return GeoResult{}, false +} diff --git a/internal/collector/normalize/geocoder_test.go b/internal/collector/normalize/geocoder_test.go new file mode 100644 index 0000000..14532ad --- /dev/null +++ b/internal/collector/normalize/geocoder_test.go @@ -0,0 +1,176 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package normalize + +import ( + "context" + "testing" +) + +// mockCityLookup is a simple in-memory mock for testing. +type mockCityLookup struct { + cities map[string]CityLookupResult +} + +func (m *mockCityLookup) LookupCity(_ context.Context, name string, countryCode string) (CityLookupResult, bool) { + key := name + if countryCode != "" { + // Try country-specific first. + if r, ok := m.cities[name+"|"+countryCode]; ok { + return r, true + } + } + r, ok := m.cities[key] + return r, ok +} + +func newMockCities() *mockCityLookup { + return &mockCityLookup{cities: map[string]CityLookupResult{ + "Valletta": {Name: "Valletta", CountryCode: "MT", Lat: 35.90, Lng: 14.51, Population: 6400}, + "Berlin": {Name: "Berlin", CountryCode: "DE", Lat: 52.52, Lng: 13.41, Population: 3700000}, + "Munich": {Name: "Munich", CountryCode: "DE", Lat: 48.14, Lng: 11.58, Population: 1500000}, + "Kyiv": {Name: "Kyiv", CountryCode: "UA", Lat: 50.45, Lng: 30.52, Population: 3000000}, + "Mogadishu": {Name: "Mogadishu", CountryCode: "SO", Lat: 2.05, Lng: 45.32, Population: 2900000}, + "Aleppo": {Name: "Aleppo", CountryCode: "SY", Lat: 36.20, Lng: 37.17, Population: 1800000}, + }} +} + +func TestGeocoderResolve_CityDB(t *testing.T) { + g := NewGeocoder(newMockCities(), nil) + + tests := []struct { + name string + text string + countryHint string + wantCity string + wantCode string + wantSource string + }{ + { + name: "city in headline", + text: "Explosion rocks central Berlin district", + wantCity: "Berlin", + wantCode: "DE", + wantSource: "city-db", + }, + { + name: "city with country hint", + text: "Air raid sirens in Kyiv as strikes continue", + countryHint: "UA", + wantCity: "Kyiv", + wantCode: "UA", + wantSource: "city-db", + }, + { + name: "rightmost city wins", + text: "Berlin conference discusses Aleppo humanitarian crisis", + wantCity: "Aleppo", + wantCode: "SY", + wantSource: "city-db", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := g.Resolve(context.Background(), tt.text, tt.countryHint) + if result.CityName != tt.wantCity { + t.Errorf("CityName = %q, want %q", result.CityName, tt.wantCity) + } + if result.CountryCode != tt.wantCode { + t.Errorf("CountryCode = %q, want %q", result.CountryCode, tt.wantCode) + } + if result.Source != tt.wantSource { + t.Errorf("Source = %q, want %q", result.Source, tt.wantSource) + } + }) + } +} + +func TestGeocoderResolve_FallbackToCapital(t *testing.T) { + // No city DB, no Nominatim — should fall back to country text + capitals. + g := NewGeocoder(nil, nil) + + result := g.Resolve(context.Background(), "Somalia conflict escalates", "") + if result.CountryCode != "SO" { + t.Errorf("CountryCode = %q, want SO", result.CountryCode) + } + // Should use capital coords (Mogadishu) not centroid. + if result.Source != "capital" { + t.Errorf("Source = %q, want capital", result.Source) + } + if result.Lat < 1.5 || result.Lat > 3.0 { + t.Errorf("Lat = %f, want ~2.05 (Mogadishu)", result.Lat) + } +} + +func TestGeocoderResolve_CountryHintCapital(t *testing.T) { + // No city DB, no text match — should use country hint's capital. + g := NewGeocoder(nil, nil) + + result := g.Resolve(context.Background(), "New advisory issued for financial sector", "MT") + if result.Source != "capital" { + t.Errorf("Source = %q, want capital", result.Source) + } + // Valletta coordinates. + if result.Lat < 35.5 || result.Lat > 36.5 { + t.Errorf("Lat = %f, want ~35.90 (Valletta)", result.Lat) + } +} + +func TestCapitalCoords_IslandsOnLand(t *testing.T) { + // Verify that island nations have capital coords on land. + islands := map[string]string{ + "MT": "Valletta", + "CY": "Nicosia", + "SG": "Singapore", + "JM": "Kingston", + "CU": "Havana", + "IS": "Reykjavik", + } + + for code, name := range islands { + coords, ok := capitalCoords[code] + if !ok { + t.Errorf("missing capital coords for %s (%s)", code, name) + continue + } + // Sanity: lat/lng should be non-zero. + if coords[0] == 0 && coords[1] == 0 { + t.Errorf("capital coords for %s (%s) are zero", code, name) + } + } +} + +func TestGeocodeCountryCode_UsesCapitals(t *testing.T) { + // Malta centroid is in the sea. Capital (Valletta) should be returned. + lat, lng, name, ok := geocodeCountryCode("MT") + if !ok { + // MT might not be in geoCountries — that's fine for this test. + t.Skip("MT not in geoCountries") + } + _ = name + // Valletta is at 35.90, 14.51. Centroid would be different. + if lat < 35.5 || lat > 36.5 || lng < 14.0 || lng > 15.0 { + t.Errorf("geocodeCountryCode(MT) = (%f, %f), want Valletta area", lat, lng) + } +} + +func TestExtractCandidateNames(t *testing.T) { + candidates := extractCandidateNames("Explosion in San Francisco kills 3 near Mission District") + + names := make(map[string]bool) + for _, c := range candidates { + names[c.name] = true + } + + if !names["San"] { + t.Error("expected 'San' in candidates") + } + if !names["San Francisco"] { + t.Error("expected 'San Francisco' in candidates") + } + if !names["Mission District"] { + t.Error("expected 'Mission District' in candidates") + } +} diff --git a/internal/collector/normalize/normalize.go b/internal/collector/normalize/normalize.go index 8875cf0..6030f35 100644 --- a/internal/collector/normalize/normalize.go +++ b/internal/collector/normalize/normalize.go @@ -4,6 +4,7 @@ package normalize import ( + "context" "crypto/sha1" "encoding/hex" "math" @@ -49,12 +50,42 @@ var ( regexp.MustCompile(`(?i)\b(?:ceremony|speech|statement|newsletter|weekly roundup)\b`), regexp.MustCompile(`(?i)\b(?:partnership|memorandum|mou|initiative|campaign)\b`), } + certificationPatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(?:certification|certifi(?:ed|cate)|accreditation|compliance audit|standard(?:s)?)\b`), + regexp.MustCompile(`(?i)\b(?:NESAS|common criteria|ISO[\s-]?27001|ISO[\s-]?15408|ITSEC|protection profile)\b`), + regexp.MustCompile(`(?i)\b(?:evaluation|scheme|approval|conformity|audit report|test report)\b`), + regexp.MustCompile(`(?i)\b(?:product certification|vendor certification|zertifizierung|anerkennung)\b`), + regexp.MustCompile(`(?i)\b(?:training|course|curriculum|e-learning|online.?training|skill|qualification)\b`), + } securityContextPatterns = []*regexp.Regexp{ regexp.MustCompile(`(?i)\b(?:cyber|cybersecurity|infosec|information security|it security)\b`), regexp.MustCompile(`(?i)\b(?:security posture|security controls?|threat intelligence)\b`), regexp.MustCompile(`(?i)\b(?:vulnerability|exploit|patch|advisory|defend|defensive)\b`), regexp.MustCompile(`(?i)\b(?:soc|siem|incident response|malware analysis)\b`), } + // localCrimePatterns match routine domestic police operations that lack + // cross-border or international intelligence significance. + localCrimePatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(?:operac[aã]o|opera[çc][aã]o|operation)\b.*\b(?:busca|raid|search|apreens[aã]o|seizure)\b`), + regexp.MustCompile(`(?i)\b(?:drug bust|drug seizure|narcotics seized|heroin|cocaine|cannabis|marijuana)\b.*\b(?:kg|kilos?|grams?|pounds?|tonnes?)\b`), + regexp.MustCompile(`(?i)\b(?:burglary|robbery|theft|shoplifting|pickpocket|break-?in|car theft|vehicle theft)\b`), + regexp.MustCompile(`(?i)\b(?:domestic (?:violence|abuse|dispute)|bar fight|pub brawl|assault|gbh|abh)\b`), + regexp.MustCompile(`(?i)\b(?:drunk driv|dui|dwi|speeding|traffic (?:offence|offense|violation)|road rage)\b`), + regexp.MustCompile(`(?i)\b(?:sentenced to|prison sentence|jail (?:term|sentence)|community service|probation order)\b`), + regexp.MustCompile(`(?i)\b(?:mortu[aá]ri[ao]|autopsy|autópsia|post-?mortem|inquest|coroner)\b`), + regexp.MustCompile(`(?i)\b(?:local police|polícia local|commissariat|poste de police|comisaría)\b`), + } + // crossBorderSignals indicate international/strategic significance that + // should prevent local-crime downranking. + crossBorderSignals = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(?:interpol|europol|eurojust|frontex|five eyes|nato)\b`), + regexp.MustCompile(`(?i)\b(?:cross-?border|transnational|international|multi-?country|joint (?:operation|investigation))\b`), + regexp.MustCompile(`(?i)\b(?:terror(?:ism|ist)?|extremis[tm]|radicaliz|foreign fighter)\b`), + regexp.MustCompile(`(?i)\b(?:cyber.?attack|state-?sponsored|apt|espionage|intelligence)\b`), + regexp.MustCompile(`(?i)\b(?:trafficking|smuggling|organized crime|money laundering|sanctions evasion)\b`), + regexp.MustCompile(`(?i)\b(?:critical infrastructure|national security|chemical|biological|nuclear|radiological)\b`), + regexp.MustCompile(`(?i)\b(?:mass casualty|mass shooting|bombing|explosion|hostage)\b`), + } assistancePatterns = []*regexp.Regexp{ regexp.MustCompile(`(?i)\b(?:report(?:\s+a)?(?:\s+crime)?|submit (?:a )?tip|tip[-\s]?off)\b`), regexp.MustCompile(`(?i)\b(?:contact (?:police|authorities|law enforcement)|hotline|helpline)\b`), @@ -94,8 +125,9 @@ var ( ) type Context struct { - Config config.Config - Now time.Time + Config config.Config + Now time.Time + Geocoder *Geocoder // optional; nil falls back to country-level only } type FeedContext struct { @@ -113,7 +145,7 @@ func RSSItem(ctx Context, meta model.RegistrySource, item parse.FeedItem) *model if !isFresh(ctx.Config, publishedAt, ctx.Now) { return nil } - alert := baseAlert(ctx, meta, item.Title, item.Link, publishedAt) + alert := baseAlert(ctx, meta, item.Title, item.Link, item.Title+" "+item.Summary, publishedAt) triage := score(ctx.Config, alert, FeedContext{ Summary: item.Summary, Author: item.Author, @@ -131,7 +163,7 @@ func RSSItem(ctx Context, meta model.RegistrySource, item parse.FeedItem) *model } func HTMLItem(ctx Context, meta model.RegistrySource, item parse.FeedItem) *model.Alert { - alert := baseAlert(ctx, meta, item.Title, item.Link, ctx.Now) + alert := baseAlert(ctx, meta, item.Title, item.Link, item.Title+" "+item.Summary, ctx.Now) triage := score(ctx.Config, alert, FeedContext{ Summary: item.Summary, Tags: item.Tags, @@ -156,7 +188,7 @@ func KEVAlert(ctx Context, meta model.RegistrySource, cveID string, vulnName str if strings.TrimSpace(cveID) != "" { link = "https://nvd.nist.gov/vuln/detail/" + strings.TrimSpace(cveID) } - alert := baseAlert(ctx, meta, title, link, publishedAt) + alert := baseAlert(ctx, meta, title, link, title+" "+description, publishedAt) if hoursBetween(ctx.Now, publishedAt) <= 72 { alert.Severity = "critical" } else if hoursBetween(ctx.Now, publishedAt) <= 168 { @@ -174,15 +206,26 @@ func KEVAlert(ctx Context, meta model.RegistrySource, cveID string, vulnName str return &alert } -func InterpolAlert(ctx Context, meta model.RegistrySource, title string, link string, countryCode string, summary string, tags []string) *model.Alert { +func InterpolAlert(ctx Context, meta model.RegistrySource, noticeID string, title string, link string, countryCode string, summary string, tags []string) *model.Alert { if strings.TrimSpace(title) == "" { return nil } - alert := baseAlert(ctx, meta, title, firstNonEmpty(link, meta.Source.BaseURL), ctx.Now) + alert := baseAlert(ctx, meta, title, firstNonEmpty(link, meta.Source.BaseURL), title+" "+summary, ctx.Now) alert.Severity = "critical" - alert.RegionTag = firstNonEmpty(countryCode, alert.RegionTag) - if strings.TrimSpace(countryCode) != "" { - alert.Source.CountryCode = strings.ToUpper(strings.TrimSpace(countryCode)) + if id := strings.TrimSpace(noticeID); id != "" { + alert.AlertID = meta.Source.SourceID + ":" + id + } + if code := normalizeCountryCode(countryCode); code != "" { + alert.RegionTag = code + alert.Source.CountryCode = code + if name := countryNameFromCode(code); name != "" { + alert.Source.Country = name + } + // Override lat/lng to the person's nationality country instead of + // Interpol HQ (Lyon, France). + if gLat, gLng, _, ok := geocodeCountryCode(code); ok { + alert.Lat, alert.Lng = jitter(gLat, gLng, meta.Source.SourceID+":"+link, "capital") + } } alert.Triage = score(ctx.Config, alert, FeedContext{ Summary: summary, @@ -192,6 +235,56 @@ func InterpolAlert(ctx Context, meta model.RegistrySource, title string, link st return &alert } +func FBIWantedAlert(ctx Context, meta model.RegistrySource, item parse.FeedItem) *model.Alert { + publishedAt := parseDate(item.Published) + if publishedAt.IsZero() { + publishedAt = ctx.Now + } + alert := baseAlert(ctx, meta, item.Title, item.Link, item.Title+" "+item.Summary, publishedAt) + alert.Severity = "critical" + triage := score(ctx.Config, alert, FeedContext{ + Summary: item.Summary, + Tags: item.Tags, + FeedType: meta.Type, + }) + alert.Triage = triage + return &alert +} + +func TravelWarningAlert(ctx Context, meta model.RegistrySource, item parse.FeedItem) *model.Alert { + publishedAt := parseDate(item.Published) + if publishedAt.IsZero() { + publishedAt = ctx.Now + } + if !isFresh(ctx.Config, publishedAt, ctx.Now) { + return nil + } + alert := baseAlert(ctx, meta, item.Title, item.Link, item.Title+" "+item.Summary, publishedAt) + alert.Severity = inferTravelWarningSeverity(item.Title, item.Summary, item.Tags) + triage := score(ctx.Config, alert, FeedContext{ + Summary: item.Summary, + Author: item.Author, + Tags: item.Tags, + FeedType: meta.Type, + }) + alert.Triage = triage + return &alert +} + +func inferTravelWarningSeverity(title, summary string, tags []string) string { + text := strings.ToLower(title + " " + summary + " " + strings.Join(tags, " ")) + switch { + case containsAny(text, "do not travel", "reisewarnung", "advise against all travel", "level 4"): + return "critical" + case containsAny(text, "reconsider travel", "avoid non-essential travel", "advise against all but essential travel", "level 3", "teilreisewarnung"): + return "high" + case containsAny(text, "exercise increased caution", "exercise a high degree of caution", "level 2"): + return "medium" + default: + return "medium" + } +} + func StaticInterpolEntry(now time.Time) model.Alert { return model.Alert{ AlertID: "interpol-hub-static", @@ -217,12 +310,72 @@ func StaticInterpolEntry(now time.Time) model.Alert { } } -func baseAlert(ctx Context, meta model.RegistrySource, title string, link string, publishedAt time.Time) model.Alert { - lat, lng := jitter(meta.Lat, meta.Lng, meta.Source.SourceID+":"+link) +func baseAlert(ctx Context, meta model.RegistrySource, title string, link string, geoText string, publishedAt time.Time) model.Alert { + title = strings.TrimSpace(title) + geoText = strings.TrimSpace(geoText) + if geoText == "" { + geoText = title + } + // Fix broken NCMEC-style titles that start with ": Name (State)". + if strings.HasPrefix(title, ": ") { + title = "Missing" + title + } + + baseLat, baseLng := meta.Lat, meta.Lng + geoSource := "registry" + source := meta.Source + + // Use capital coords instead of geographic centroid for the source's + // country — fixes islands (Malta, Cyprus, etc.) landing in the sea. + if source.CountryCode != "" && source.CountryCode != "INT" { + if capital, ok := capitalCoords[source.CountryCode]; ok { + baseLat, baseLng = capital[0], capital[1] + geoSource = "capital" + } + } + + geocoded := false + + // For international sources, try to geocode the alert to the actual + // crisis location instead of pinning it to the org's HQ. + if meta.RegionTag == "INT" || meta.Source.CountryCode == "INT" { + if ctx.Geocoder != nil { + // Enhanced geocoding: city DB → Nominatim → country text. + if result := ctx.Geocoder.Resolve(context.Background(), geoText, ""); result.CountryCode != "" { + baseLat, baseLng = result.Lat, result.Lng + geoSource = result.Source + geocoded = true + if name := countryNameFromCode(result.CountryCode); name != "" { + source.Country = name + source.CountryCode = result.CountryCode + } + } + } + if !geocoded { + if gLat, gLng, code, ok := geocodeText(geoText); ok { + baseLat, baseLng = gLat, gLng + geoSource = "country-text" + if name := countryNameFromCode(code); name != "" { + source.Country = name + source.CountryCode = code + } + } + } + } else if ctx.Geocoder != nil { + // Non-international source: try city-level geocoding within the + // source's country for better pin placement. + if result := ctx.Geocoder.Resolve(context.Background(), geoText, source.CountryCode); result.CountryCode != "" && + (result.Source == "city-db" || result.Source == "nominatim" || result.CountryCode == source.CountryCode) { + baseLat, baseLng = result.Lat, result.Lng + geoSource = result.Source + } + } + + lat, lng := jitter(baseLat, baseLng, meta.Source.SourceID+":"+link, geoSource) return model.Alert{ AlertID: meta.Source.SourceID + "-" + hashID(link), SourceID: meta.Source.SourceID, - Source: meta.Source, + Source: source, Title: strings.TrimSpace(title), CanonicalURL: strings.TrimSpace(link), FirstSeen: publishedAt.UTC().Format(time.RFC3339), @@ -238,6 +391,25 @@ func baseAlert(ctx Context, meta model.RegistrySource, title string, link string } } +func normalizeCountryCode(code string) string { + code = strings.ToUpper(strings.TrimSpace(code)) + if len(code) == 2 { + return code + } + return "" +} + +func countryNameFromCode(code string) string { + code = normalizeCountryCode(code) + // Use the geocode table as the canonical source. + for i := range geoCountries { + if geoCountries[i].Code == code { + return geoCountries[i].Name + } + } + return "" +} + func score(cfg config.Config, alert model.Alert, feed FeedContext) *model.Triage { text := strings.ToLower(strings.Join([]string{ alert.Title, @@ -278,6 +450,8 @@ func score(cfg config.Config, alert model.Alert, feed FeedContext) *model.Triage add(0.07, "education and digital capacity category") case "fraud_alert": add(0.07, "fraud incident category") + case "travel_warning": + add(0.08, "travel warning category") } hasTechnical := hasAny(text, technicalSignalPatterns) @@ -286,6 +460,7 @@ func score(cfg config.Config, alert model.Alert, feed FeedContext) *model.Triage hasSpecificImpact := hasAny(text, impactSpecificityPatterns) hasNarrative := hasAny(text, narrativePatterns) hasGeneral := hasAny(text, generalNewsPatterns) + hasCertification := hasAny(text, certificationPatterns) looksLikeBlog := isBlog(alert) if hasTechnical { @@ -309,6 +484,16 @@ func score(cfg config.Config, alert model.Alert, feed FeedContext) *model.Triage if looksLikeBlog { add(-0.10, "blog-style structure") } + if hasCertification && !hasIncident && !hasTechnical { + add(-0.22, "certification/training/standards content") + } + // Downrank routine local crime stories from police feeds unless + // they carry cross-border or strategic intelligence significance. + hasLocalCrime := hasAny(text, localCrimePatterns) + hasCrossBorder := hasAny(text, crossBorderSignals) + if hasLocalCrime && !hasCrossBorder && !hasTechnical { + add(-0.20, "routine local crime without cross-border significance") + } if !hasTechnical && !hasIncident && (hasNarrative || hasGeneral) { add(-0.08, "weak incident evidence relative to narrative cues") } @@ -376,6 +561,12 @@ func defaultSeverity(category string) string { return "critical" case "public_appeal", "humanitarian_tasking", "humanitarian_security", "private_sector": return "high" + case "travel_warning": + return "high" + case "environmental_disaster", "disease_outbreak": + return "high" + case "emergency_management", "health_emergency": + return "high" default: return "medium" } @@ -384,13 +575,13 @@ func defaultSeverity(category string) string { func inferSeverity(title string, fallback string) string { t := strings.ToLower(title) switch { - case containsAny(t, "critical", "emergency", "zero-day", "0-day", "ransomware", "actively exploited", "exploitation", "breach", "data leak", "crypto heist", "million stolen", "wanted", "fugitive", "murder", "homicide", "missing", "amber alert", "kidnap"): + case containsAny(t, "critical", "kritische", "emergency", "zero-day", "0-day", "ransomware", "actively exploited", "exploitation", "breach", "data leak", "crypto heist", "million stolen", "wanted", "fugitive", "murder", "homicide", "missing", "amber alert", "kidnap", "do not travel", "notfall", "pandemic", "ebola", "plague", "tsunami", "earthquake", "eruption", "nuclear incident", "radiation leak", "oil spill", "explosion"): return "critical" - case containsAny(t, "hack", "compromise", "vulnerability", "high", "severe", "urgent", "fatal", "death", "shooting", "fraud", "scam", "phishing"): + case containsAny(t, "hack", "compromise", "vulnerability", "schwachstelle", "sicherheitslücke", "high", "severe", "urgent", "dringend", "fatal", "death", "shooting", "fraud", "scam", "phishing", "reconsider travel", "avoid non-essential travel", "warnung", "gefährlich", "outbreak", "epidemic", "cholera", "mpox", "avian influenza", "flood", "wildfire", "cyclone", "hurricane", "typhoon", "drought", "chemical spill", "hazmat"): return "high" - case containsAny(t, "arrested", "charged", "sentenced", "medium", "moderate"): + case containsAny(t, "arrested", "charged", "sentenced", "medium", "moderate", "festgenommen", "verurteilt"): return "medium" - case containsAny(t, "low", "informational"): + case containsAny(t, "low", "informational", "infopaket", "infoblatt", "handreichung", "leitfaden", "newsletter"): return "info" default: return fallback @@ -442,6 +633,9 @@ func inferPublicationType(alert model.Alert, feedType string) string { if feedType == "kev-json" || feedType == "interpol-red-json" || feedType == "interpol-yellow-json" { return "structured_incident_feed" } + if feedType == "travelwarning-json" || feedType == "travelwarning-atom" { + return "official_update" + } return "official_update" } @@ -505,10 +699,11 @@ func hashID(value string) string { return hex.EncodeToString(sum[:])[:12] } -func jitter(lat float64, lng float64, seed string) (float64, float64) { +func jitter(lat float64, lng float64, seed string, geoSource string) (float64, float64) { sum := sha1.Sum([]byte(seed)) angle := float64(sum[0])/255*math.Pi*2 + float64(sum[1])/255 - radius := 22 + float64(sum[2])/255*55 + minRadius, maxRadius := jitterRadiusKM(geoSource) + radius := minRadius + float64(sum[2])/255*(maxRadius-minRadius) dLat := (radius / 111.32) * math.Cos(angle) cosLat := math.Max(0.2, math.Cos((lat*math.Pi)/180)) dLng := (radius / (111.32 * cosLat)) * math.Sin(angle) @@ -523,6 +718,23 @@ func jitter(lat float64, lng float64, seed string) (float64, float64) { return round5(outLat), round5(outLng) } +func jitterRadiusKM(geoSource string) (float64, float64) { + switch geoSource { + case "city-db": + return 0.4, 1.6 + case "nominatim": + return 0.8, 2.5 + case "capital": + return 1.2, 4 + case "country-text": + return 4, 14 + case "registry": + return 2, 10 + default: + return 2, 10 + } +} + func extractDomain(raw string) string { u, err := url.Parse(raw) if err != nil { diff --git a/internal/collector/normalize/normalize_test.go b/internal/collector/normalize/normalize_test.go index 55454a8..e8d0b24 100644 --- a/internal/collector/normalize/normalize_test.go +++ b/internal/collector/normalize/normalize_test.go @@ -5,9 +5,11 @@ package normalize import ( "testing" + "time" "github.com/scalytics/euosint/internal/collector/config" "github.com/scalytics/euosint/internal/collector/model" + "github.com/scalytics/euosint/internal/collector/parse" ) func TestDeduplicatePrefersHigherScore(t *testing.T) { @@ -40,3 +42,131 @@ func TestFilterActiveUsesMissingPersonThreshold(t *testing.T) { t.Fatalf("unexpected filtered alerts %#v", filtered) } } + +func TestInterpolAlertUsesNoticeCountryAndStableID(t *testing.T) { + ctx := Context{Config: config.Default(), Now: time.Date(2026, 3, 16, 0, 0, 0, 0, time.UTC)} + meta := model.RegistrySource{ + Type: "interpol-yellow-json", + Category: "missing_person", + RegionTag: "INT", + Source: model.SourceMetadata{ + SourceID: "interpol-yellow", + AuthorityName: "INTERPOL Yellow Notices", + Country: "France", + CountryCode: "FR", + Region: "International", + AuthorityType: "police", + BaseURL: "https://www.interpol.int", + }, + } + alert := InterpolAlert(ctx, meta, "2026-17351", "INTERPOL Yellow Notice: Jane Doe", "https://www.interpol.int/How-we-work/Notices/Yellow-Notices/View-Yellow-Notices#2026-17351", "DE", "INTERPOL Paris", []string{"DE"}) + if alert == nil { + t.Fatal("expected interpol alert") + } + if alert.AlertID != "interpol-yellow:2026-17351" { + t.Fatalf("expected stable interpol alert id, got %q", alert.AlertID) + } + if alert.Source.CountryCode != "DE" || alert.Source.Country != "Germany" { + t.Fatalf("expected country mapping to Germany, got %#v", alert.Source) + } + if alert.Source.AuthorityName != "INTERPOL Yellow Notices" { + t.Fatalf("expected source authority to remain INTERPOL, got %#v", alert.Source) + } +} + +func TestLocalCrimeDownranked(t *testing.T) { + cfg := config.Default() + ctx := Context{Config: cfg, Now: time.Date(2026, 3, 16, 0, 0, 0, 0, time.UTC)} + meta := model.RegistrySource{ + Type: "rss", + Category: "public_appeal", + Source: model.SourceMetadata{ + SourceID: "pj-pt", + AuthorityName: "Polícia Judiciária", + Country: "Portugal", + CountryCode: "PT", + Region: "Europe", + AuthorityType: "police", + }, + } + + // Local crime: police raid on a mortuary — no cross-border significance. + localItem := parse.FeedItem{ + Title: "Operação Rigor Mortis – PJ realiza buscas em casa mortuária e em domicílios", + Link: "https://www.policiajudiciaria.pt/operacao-rigor-mortis/", + Published: "2026-03-15T10:00:00Z", + Summary: "A Polícia Judiciária realizou buscas em casa mortuária. Autopsy fraud investigation.", + } + localAlert := RSSItem(ctx, meta, localItem) + if localAlert == nil { + t.Fatal("expected local crime alert to be normalized") + } + if localAlert.Triage.RelevanceScore >= cfg.IncidentRelevanceThreshold { + t.Fatalf("expected local crime to be below threshold, got %.3f (threshold %.3f)", + localAlert.Triage.RelevanceScore, cfg.IncidentRelevanceThreshold) + } + + // Cross-border crime: Europol joint operation — should stay above threshold. + crossBorderItem := parse.FeedItem{ + Title: "Operação conjunta PJ-Europol — rede transnacional de tráfico desmantelada", + Link: "https://www.policiajudiciaria.pt/operacao-europol/", + Published: "2026-03-15T10:00:00Z", + Summary: "Joint operation with Europol dismantled cross-border trafficking network. Drug seizure of 500 kg cocaine.", + } + crossBorderAlert := RSSItem(ctx, meta, crossBorderItem) + if crossBorderAlert == nil { + t.Fatal("expected cross-border alert to be normalized") + } + if crossBorderAlert.Triage.RelevanceScore < localAlert.Triage.RelevanceScore { + t.Fatalf("expected cross-border alert (%.3f) to score higher than local crime (%.3f)", + crossBorderAlert.Triage.RelevanceScore, localAlert.Triage.RelevanceScore) + } +} + +func TestJitterRadiusKMIsPrecisionAware(t *testing.T) { + cityMin, cityMax := jitterRadiusKM("city-db") + countryMin, countryMax := jitterRadiusKM("country-text") + if cityMax >= countryMin { + t.Fatalf("expected city jitter to be tighter than country jitter, got city %.1f-%.1f km vs country %.1f-%.1f km", cityMin, cityMax, countryMin, countryMax) + } + if cityMax > 2 { + t.Fatalf("expected city-db jitter to stay very tight, got max %.1f km", cityMax) + } +} + +func TestRSSItemUsesSummaryForCityPlacement(t *testing.T) { + cfg := config.Default() + ctx := Context{ + Config: cfg, + Now: time.Date(2026, 3, 17, 0, 0, 0, 0, time.UTC), + Geocoder: NewGeocoder(&mockCityLookup{cities: map[string]CityLookupResult{ + "Valletta|MT": {Name: "Valletta", CountryCode: "MT", Lat: 35.90, Lng: 14.51, Population: 6400}, + }}, nil), + } + meta := model.RegistrySource{ + Type: "rss", + Category: "public_safety", + Source: model.SourceMetadata{ + SourceID: "malta-civil", + AuthorityName: "Malta Civil Protection", + Country: "Malta", + CountryCode: "MT", + Region: "Europe", + AuthorityType: "public_safety_program", + BaseURL: "https://example.test", + }, + } + item := parse.FeedItem{ + Title: "Incident update", + Summary: "Emergency crews dispatched in Valletta harbour district", + Link: "https://example.test/incident", + Published: "2026-03-16T10:00:00Z", + } + alert := RSSItem(ctx, meta, item) + if alert == nil { + t.Fatal("expected alert") + } + if alert.Lat < 35.7 || alert.Lat > 36.1 || alert.Lng < 14.3 || alert.Lng > 14.7 { + t.Fatalf("expected alert to stay near Valletta, got (%f, %f)", alert.Lat, alert.Lng) + } +} diff --git a/internal/collector/output/write.go b/internal/collector/output/write.go index c88e7c5..91ce593 100644 --- a/internal/collector/output/write.go +++ b/internal/collector/output/write.go @@ -14,8 +14,8 @@ import ( "github.com/scalytics/euosint/internal/collector/model" ) -func Write(cfg config.Config, active []model.Alert, filtered []model.Alert, state []model.Alert, sourceHealth []model.SourceHealthEntry, duplicateAudit model.DuplicateAudit) error { - paths := []string{cfg.OutputPath, cfg.FilteredOutputPath, cfg.StateOutputPath, cfg.SourceHealthOutputPath} +func Write(cfg config.Config, active []model.Alert, filtered []model.Alert, state []model.Alert, sourceHealth []model.SourceHealthEntry, duplicateAudit model.DuplicateAudit, replacementQueue []model.SourceReplacementCandidate) error { + paths := []string{cfg.OutputPath, cfg.FilteredOutputPath, cfg.StateOutputPath, cfg.SourceHealthOutputPath, cfg.ReplacementQueuePath} for _, path := range paths { if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { return err @@ -38,9 +38,23 @@ func Write(cfg config.Config, active []model.Alert, filtered []model.Alert, stat SourcesOK: countStatus(sourceHealth, "ok"), SourcesError: countStatus(sourceHealth, "error"), DuplicateAudit: duplicateAudit, + ReplacementQueue: replacementQueue, Sources: sourceHealth, } - return writeJSON(cfg.SourceHealthOutputPath, doc) + if doc.ReplacementQueue == nil { + doc.ReplacementQueue = []model.SourceReplacementCandidate{} + } + if err := writeJSON(cfg.SourceHealthOutputPath, doc); err != nil { + return err + } + queueDoc := model.SourceReplacementDocument{ + GeneratedAt: doc.GeneratedAt, + Sources: replacementQueue, + } + if queueDoc.Sources == nil { + queueDoc.Sources = []model.SourceReplacementCandidate{} + } + return writeJSON(cfg.ReplacementQueuePath, queueDoc) } func writeJSON(path string, value any) error { diff --git a/internal/collector/output/write_test.go b/internal/collector/output/write_test.go index bde54c6..6a257fb 100644 --- a/internal/collector/output/write_test.go +++ b/internal/collector/output/write_test.go @@ -19,12 +19,13 @@ func TestWriteOutputs(t *testing.T) { cfg.FilteredOutputPath = filepath.Join(dir, "filtered.json") cfg.StateOutputPath = filepath.Join(dir, "state.json") cfg.SourceHealthOutputPath = filepath.Join(dir, "health.json") + cfg.ReplacementQueuePath = filepath.Join(dir, "replacement.json") - err := Write(cfg, []model.Alert{{AlertID: "a"}}, []model.Alert{{AlertID: "b"}}, []model.Alert{{AlertID: "c"}}, []model.SourceHealthEntry{{SourceID: "s", Status: "ok"}}, model.DuplicateAudit{}) + err := Write(cfg, []model.Alert{{AlertID: "a"}}, []model.Alert{{AlertID: "b"}}, []model.Alert{{AlertID: "c"}}, []model.SourceHealthEntry{{SourceID: "s", Status: "ok"}}, model.DuplicateAudit{}, nil) if err != nil { t.Fatal(err) } - for _, path := range []string{cfg.OutputPath, cfg.FilteredOutputPath, cfg.StateOutputPath, cfg.SourceHealthOutputPath} { + for _, path := range []string{cfg.OutputPath, cfg.FilteredOutputPath, cfg.StateOutputPath, cfg.SourceHealthOutputPath, cfg.ReplacementQueuePath} { if _, err := os.Stat(path); err != nil { t.Fatalf("expected output file %s: %v", path, err) } diff --git a/internal/collector/parse/fbi.go b/internal/collector/parse/fbi.go new file mode 100644 index 0000000..09c9f38 --- /dev/null +++ b/internal/collector/parse/fbi.go @@ -0,0 +1,125 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package parse + +import ( + "encoding/json" + "strings" +) + +// FBIWantedResponse is the top-level response from the FBI Wanted API. +type FBIWantedResponse struct { + Total int `json:"total"` + Page int `json:"page"` + Items []FBIWantedEntry `json:"items"` +} + +// FBIWantedEntry is a single person/case from the FBI Wanted API. +type FBIWantedEntry struct { + UID string `json:"uid"` + Title string `json:"title"` + Description string `json:"description"` + Details string `json:"details"` + Caution string `json:"caution"` + WarningMessage string `json:"warning_message"` + Remarks string `json:"remarks"` + Sex string `json:"sex"` + Nationality string `json:"nationality"` + PlaceOfBirth string `json:"place_of_birth"` + DatesOfBirthUsed []string `json:"dates_of_birth_used"` + Aliases []string `json:"aliases"` + Subjects []string `json:"subjects"` + Status string `json:"status"` + PersonClassification string `json:"person_classification"` + PosterClassification string `json:"poster_classification"` + RewardText string `json:"reward_text"` + RewardMin int `json:"reward_min"` + RewardMax int `json:"reward_max"` + URL string `json:"url"` + Path string `json:"path"` + Publication string `json:"publication"` + Modified string `json:"modified"` + FieldOffices []string `json:"field_offices"` + PossibleCountries []string `json:"possible_countries"` + PossibleStates []string `json:"possible_states"` + Images []struct { + Thumb string `json:"thumb"` + Original string `json:"original"` + Large string `json:"large"` + Caption string `json:"caption"` + } `json:"images"` +} + +// ParseFBIWanted parses the FBI Wanted API JSON response into FeedItems. +func ParseFBIWanted(body []byte) ([]FeedItem, int, error) { + var resp FBIWantedResponse + if err := json.Unmarshal(body, &resp); err != nil { + return nil, 0, err + } + items := make([]FeedItem, 0, len(resp.Items)) + for _, entry := range resp.Items { + title := strings.TrimSpace(entry.Title) + if title == "" { + continue + } + link := strings.TrimSpace(entry.URL) + if link == "" && strings.TrimSpace(entry.Path) != "" { + link = "https://www.fbi.gov" + entry.Path + } + if link == "" { + link = "https://www.fbi.gov/wanted" + } + + summary := buildFBISummary(entry) + tags := buildFBITags(entry) + published := firstNonEmpty(entry.Modified, entry.Publication) + + items = append(items, FeedItem{ + Title: title, + Link: link, + Published: published, + Summary: summary, + Tags: tags, + }) + } + return items, resp.Total, nil +} + +func buildFBISummary(entry FBIWantedEntry) string { + parts := []string{} + if desc := StripHTML(entry.Description); desc != "" { + parts = append(parts, desc) + } + if entry.Nationality != "" { + parts = append(parts, "Nationality: "+entry.Nationality) + } + if entry.PlaceOfBirth != "" { + parts = append(parts, "Born: "+entry.PlaceOfBirth) + } + if len(entry.Aliases) > 0 { + parts = append(parts, "Aliases: "+strings.Join(entry.Aliases, ", ")) + } + if entry.RewardText != "" { + parts = append(parts, "Reward: "+StripHTML(entry.RewardText)) + } + return strings.Join(parts, ". ") +} + +func buildFBITags(entry FBIWantedEntry) []string { + tags := make([]string, 0, len(entry.Subjects)+4) + tags = append(tags, entry.Subjects...) + if entry.PosterClassification != "" { + tags = append(tags, entry.PosterClassification) + } + if entry.PersonClassification != "" { + tags = append(tags, entry.PersonClassification) + } + if entry.Sex != "" { + tags = append(tags, entry.Sex) + } + if entry.WarningMessage != "" { + tags = append(tags, "armed-dangerous") + } + return tags +} diff --git a/internal/collector/parse/fbi_test.go b/internal/collector/parse/fbi_test.go new file mode 100644 index 0000000..f3b4f40 --- /dev/null +++ b/internal/collector/parse/fbi_test.go @@ -0,0 +1,88 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package parse + +import "testing" + +func TestParseFBIWanted(t *testing.T) { + body := []byte(`{ + "total": 1137, + "page": 1, + "items": [ + { + "uid": "abc123", + "title": "JOHN DOE", + "description": "

Conspiracy to commit wire fraud

", + "url": "https://www.fbi.gov/wanted/fugitive/john-doe", + "nationality": "American", + "place_of_birth": "New York, New York", + "aliases": ["JD", "Johnny"], + "subjects": ["Cyber's Most Wanted"], + "person_classification": "Main", + "poster_classification": "default", + "sex": "Male", + "warning_message": "SHOULD BE CONSIDERED ARMED AND DANGEROUS", + "reward_text": "Up to $100,000", + "publication": "2025-01-15T00:00:00", + "modified": "2026-03-01T12:00:00", + "images": [{"thumb": "https://www.fbi.gov/image/thumb.jpg"}] + }, + { + "uid": "def456", + "title": "", + "url": "" + } + ] + }`) + + items, total, err := ParseFBIWanted(body) + if err != nil { + t.Fatal(err) + } + if total != 1137 { + t.Fatalf("expected total 1137, got %d", total) + } + if len(items) != 1 { + t.Fatalf("expected 1 item (empty title skipped), got %d", len(items)) + } + + item := items[0] + if item.Title != "JOHN DOE" { + t.Fatalf("expected title 'JOHN DOE', got %q", item.Title) + } + if item.Link != "https://www.fbi.gov/wanted/fugitive/john-doe" { + t.Fatalf("expected FBI link, got %q", item.Link) + } + if item.Published != "2026-03-01T12:00:00" { + t.Fatalf("expected modified date, got %q", item.Published) + } + // Summary should contain stripped HTML description. + if item.Summary == "" { + t.Fatal("expected non-empty summary") + } + // Tags should include subjects, classification, sex, armed-dangerous. + foundArmed := false + for _, tag := range item.Tags { + if tag == "armed-dangerous" { + foundArmed = true + } + } + if !foundArmed { + t.Fatalf("expected armed-dangerous tag, got %v", item.Tags) + } +} + +func TestParseFBIWantedEmpty(t *testing.T) { + body := []byte(`{"total": 0, "page": 1, "items": []}`) + items, total, err := ParseFBIWanted(body) + if err != nil { + t.Fatal(err) + } + if total != 0 { + t.Fatalf("expected total 0, got %d", total) + } + if len(items) != 0 { + t.Fatalf("expected 0 items, got %d", len(items)) + } +} diff --git a/internal/collector/parse/html.go b/internal/collector/parse/html.go index 5d1e3b8..7457633 100644 --- a/internal/collector/parse/html.go +++ b/internal/collector/parse/html.go @@ -14,6 +14,33 @@ var anchorRe = regexp.MustCompile(`(?is)]*href=["']([^"']+)["'][^>]*>([\s var tagStripRe = regexp.MustCompile(`(?is)<[^>]+>`) var scriptStripRe = regexp.MustCompile(`(?is)|`) +// junkTitles are navigation / boilerplate link texts that should never +// become alerts. Checked case-insensitively against the stripped title. +var junkTitles = []string{ + "load more", "read more", "see more", "show more", "ver más", + "cookie", "cookies", "privacy policy", "terms of use", + "terms of service", "legal notice", "aviso legal", + "log in", "sign in", "register", "iniciar sesión", + "contact us", "about us", "home", "back to top", + "next", "previous", "page", "skip to content", + "accept", "decline", "configuración de cookies", + "mozilla firefox", "google chrome", "microsoft edge", + "reset filters", +} + +func isJunkTitle(title string) bool { + lower := strings.ToLower(title) + if strings.Contains(lower, "${") || strings.Contains(lower, "{{") { + return true + } + for _, junk := range junkTitles { + if lower == junk || strings.TrimSpace(lower) == junk { + return true + } + } + return false +} + func ParseHTMLAnchors(body string, baseURL string) []FeedItem { matches := anchorRe.FindAllStringSubmatch(body, -1) seen := make(map[string]struct{}, len(matches)) @@ -34,8 +61,8 @@ func ParseHTMLAnchors(body string, baseURL string) []FeedItem { if err != nil { continue } - title := stripHTML(match[2]) - if len(title) < 8 { + title := StripHTML(match[2]) + if len(title) < 8 || isJunkTitle(title) { continue } finalURL := resolved.ResolveReference(link).String() @@ -48,7 +75,9 @@ func ParseHTMLAnchors(body string, baseURL string) []FeedItem { return out } -func stripHTML(value string) string { +// StripHTML removes script/style tags, strips remaining HTML tags, +// unescapes entities, and normalizes whitespace. +func StripHTML(value string) string { value = scriptStripRe.ReplaceAllString(value, " ") value = tagStripRe.ReplaceAllString(value, " ") value = html.UnescapeString(value) diff --git a/internal/collector/parse/html_test.go b/internal/collector/parse/html_test.go index 97984ad..ec51d68 100644 --- a/internal/collector/parse/html_test.go +++ b/internal/collector/parse/html_test.go @@ -15,3 +15,18 @@ func TestParseHTMLAnchors(t *testing.T) { t.Fatalf("unexpected link %q", items[0].Link) } } + +func TestParseHTMLAnchorsSkipsTemplateAndFilterNoise(t *testing.T) { + body := ` +${item.title} ${item.url} +Reset Filters +Disaster Response in Sudan +` + items := ParseHTMLAnchors(body, "https://agency.example.org/news") + if len(items) != 1 { + t.Fatalf("expected 1 item after filtering noise, got %d", len(items)) + } + if items[0].Title != "Disaster Response in Sudan" { + t.Fatalf("unexpected title %q", items[0].Title) + } +} diff --git a/internal/collector/parse/travelwarning.go b/internal/collector/parse/travelwarning.go new file mode 100644 index 0000000..ea84839 --- /dev/null +++ b/internal/collector/parse/travelwarning.go @@ -0,0 +1,85 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package parse + +import ( + "encoding/json" + "strings" +) + +// ParseGermanAATravelWarnings parses the JSON response from the German +// Auswärtiges Amt (Federal Foreign Office) travel warning open-data API. +// The API returns an object whose keys are numeric country IDs and values +// contain the warning metadata. +func ParseGermanAATravelWarnings(body []byte) ([]FeedItem, error) { + // The top-level structure wraps a "response" object whose keys are + // country IDs mapping to warning objects, or it can be a flat map. + // We try both shapes. + var envelope struct { + Response map[string]json.RawMessage `json:"response"` + } + warnings := map[string]json.RawMessage{} + if err := json.Unmarshal(body, &envelope); err == nil && len(envelope.Response) > 0 { + warnings = envelope.Response + } else { + // Fall back to flat map keyed by country ID. + if err := json.Unmarshal(body, &warnings); err != nil { + return nil, err + } + } + + type warningEntry struct { + Title string `json:"title"` + Country string `json:"country"` + Warning string `json:"warning"` + Severity string `json:"severity"` + LastChanged string `json:"lastChanged"` + URL string `json:"url"` + Effective string `json:"effective"` + Content string `json:"content"` + } + + items := make([]FeedItem, 0, len(warnings)) + for _, raw := range warnings { + var entry warningEntry + if err := json.Unmarshal(raw, &entry); err != nil { + continue + } + title := firstNonEmpty(entry.Title, entry.Country) + if strings.TrimSpace(title) == "" { + continue + } + link := strings.TrimSpace(entry.URL) + if link == "" { + link = "https://www.auswaertiges-amt.de/de/ReiseUndSicherheit/reise-und-sicherheitshinweise" + } + summary := firstNonEmpty(entry.Warning, entry.Content) + published := firstNonEmpty(entry.LastChanged, entry.Effective) + + tags := []string{} + if entry.Severity != "" { + tags = append(tags, entry.Severity) + } + if entry.Country != "" { + tags = append(tags, entry.Country) + } + + items = append(items, FeedItem{ + Title: title, + Link: link, + Published: published, + Summary: summary, + Tags: tags, + }) + } + return items, nil +} + +// ParseFCDOAtom parses a UK FCDO (Foreign, Commonwealth & Development Office) +// Atom feed containing travel advice entries. This delegates to the generic +// Atom parser in ParseFeed and returns the results. +func ParseFCDOAtom(body []byte) ([]FeedItem, error) { + items := ParseFeed(string(body)) + return items, nil +} diff --git a/internal/collector/parse/travelwarning_test.go b/internal/collector/parse/travelwarning_test.go new file mode 100644 index 0000000..f144a48 --- /dev/null +++ b/internal/collector/parse/travelwarning_test.go @@ -0,0 +1,98 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package parse + +import ( + "testing" +) + +func TestParseGermanAATravelWarnings(t *testing.T) { + body := []byte(`{ + "1": { + "title": "Afghanistan - Reisewarnung", + "country": "Afghanistan", + "warning": "Do not travel to Afghanistan.", + "severity": "Reisewarnung", + "lastChanged": "2026-01-15", + "url": "https://www.auswaertiges-amt.de/de/aussenpolitik/laender/afghanistan-node/afghanistansicherheit/204692" + }, + "2": { + "title": "France - Exercise normal safety precautions", + "country": "France", + "warning": "No specific warnings.", + "severity": "", + "lastChanged": "2026-02-01", + "url": "https://www.auswaertiges-amt.de/de/aussenpolitik/laender/frankreich-node" + } + }`) + items, err := ParseGermanAATravelWarnings(body) + if err != nil { + t.Fatal(err) + } + if len(items) != 2 { + t.Fatalf("expected 2 items, got %d", len(items)) + } + foundAfghan := false + for _, item := range items { + if item.Title == "Afghanistan - Reisewarnung" { + foundAfghan = true + if item.Summary != "Do not travel to Afghanistan." { + t.Errorf("unexpected summary: %s", item.Summary) + } + if item.Published != "2026-01-15" { + t.Errorf("unexpected published: %s", item.Published) + } + } + } + if !foundAfghan { + t.Error("did not find Afghanistan entry") + } +} + +func TestParseGermanAATravelWarningsEnvelope(t *testing.T) { + body := []byte(`{"response": { + "10": { + "title": "Test Country", + "country": "Test", + "warning": "Be careful.", + "lastChanged": "2026-03-01" + } + }}`) + items, err := ParseGermanAATravelWarnings(body) + if err != nil { + t.Fatal(err) + } + if len(items) != 1 { + t.Fatalf("expected 1 item, got %d", len(items)) + } + if items[0].Title != "Test Country" { + t.Errorf("unexpected title: %s", items[0].Title) + } +} + +func TestParseFCDOAtom(t *testing.T) { + body := []byte(` + + FCDO Travel Advice + + Afghanistan travel advice + + 2026-01-10T12:00:00Z + FCDO advises against all travel to Afghanistan. + + `) + items, err := ParseFCDOAtom(body) + if err != nil { + t.Fatal(err) + } + if len(items) != 1 { + t.Fatalf("expected 1 item, got %d", len(items)) + } + if items[0].Title != "Afghanistan travel advice" { + t.Errorf("unexpected title: %s", items[0].Title) + } + if items[0].Link != "https://www.gov.uk/foreign-travel-advice/afghanistan" { + t.Errorf("unexpected link: %s", items[0].Link) + } +} diff --git a/internal/collector/registry/registry.go b/internal/collector/registry/registry.go index 3d1cbcf..0a8aa0a 100644 --- a/internal/collector/registry/registry.go +++ b/internal/collector/registry/registry.go @@ -4,16 +4,32 @@ package registry import ( + "context" "encoding/json" "fmt" "os" + "path/filepath" "sort" "strings" "github.com/scalytics/euosint/internal/collector/model" + "github.com/scalytics/euosint/internal/sourcedb" ) func Load(path string) ([]model.RegistrySource, error) { + if isSQLitePath(path) { + db, err := sourcedb.Open(path) + if err != nil { + return nil, err + } + defer db.Close() + raw, err := db.LoadActiveSources(context.Background()) + if err != nil { + return nil, fmt.Errorf("load registry from source DB %s: %w", path, err) + } + return normalizeAll(raw), nil + } + data, err := os.ReadFile(path) if err != nil { return nil, fmt.Errorf("read registry %s: %w", path, err) @@ -24,24 +40,7 @@ func Load(path string) ([]model.RegistrySource, error) { return nil, fmt.Errorf("decode registry %s: %w", path, err) } - seen := make(map[string]struct{}, len(raw)) - out := make([]model.RegistrySource, 0, len(raw)) - for _, entry := range raw { - normalized, ok := normalize(entry) - if !ok { - continue - } - if _, exists := seen[normalized.Source.SourceID]; exists { - continue - } - seen[normalized.Source.SourceID] = struct{}{} - out = append(out, normalized) - } - - sort.Slice(out, func(i, j int) bool { - return out[i].Source.SourceID < out[j].Source.SourceID - }) - return out, nil + return normalizeAll(raw), nil } func normalize(entry model.RegistrySource) (model.RegistrySource, bool) { @@ -74,3 +73,35 @@ func fallback(value, fallback string) string { } return strings.TrimSpace(value) } + +func normalizeAll(raw []model.RegistrySource) []model.RegistrySource { + seen := make(map[string]struct{}, len(raw)) + out := make([]model.RegistrySource, 0, len(raw)) + for _, entry := range raw { + if strings.ToLower(strings.TrimSpace(entry.PromotionStatus)) == "rejected" { + continue + } + normalized, ok := normalize(entry) + if !ok { + continue + } + if _, exists := seen[normalized.Source.SourceID]; exists { + continue + } + seen[normalized.Source.SourceID] = struct{}{} + out = append(out, normalized) + } + sort.Slice(out, func(i, j int) bool { + return out[i].Source.SourceID < out[j].Source.SourceID + }) + return out +} + +func isSQLitePath(path string) bool { + switch strings.ToLower(filepath.Ext(strings.TrimSpace(path))) { + case ".db", ".sqlite", ".sqlite3": + return true + default: + return false + } +} diff --git a/internal/collector/registry/registry_test.go b/internal/collector/registry/registry_test.go index 1b4ce08..63a225f 100644 --- a/internal/collector/registry/registry_test.go +++ b/internal/collector/registry/registry_test.go @@ -7,6 +7,8 @@ import ( "os" "path/filepath" "testing" + + "github.com/scalytics/euosint/internal/sourcedb" ) func TestLoadRegistryDeduplicatesAndNormalizes(t *testing.T) { @@ -34,3 +36,34 @@ func TestLoadRegistryDeduplicatesAndNormalizes(t *testing.T) { t.Fatalf("expected normalized country code, got %q", sources[0].Source.CountryCode) } } + +func TestLoadRegistryFromSQLite(t *testing.T) { + dir := t.TempDir() + jsonPath := filepath.Join(dir, "registry.json") + dbPath := filepath.Join(dir, "sources.db") + content := `[ + {"type":"rss","feed_url":"https://one.example/feed","category":"cyber_advisory","source":{"source_id":"one-feed","authority_name":"Agency One","country":"France","country_code":"fr","region":"Europe","authority_type":"cert","base_url":"https://one.example"}} + ]` + if err := os.WriteFile(jsonPath, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + db, err := sourcedb.Open(dbPath) + if err != nil { + t.Fatal(err) + } + defer db.Close() + if err := db.ImportRegistry(t.Context(), jsonPath); err != nil { + t.Fatal(err) + } + + sources, err := Load(dbPath) + if err != nil { + t.Fatal(err) + } + if len(sources) != 1 { + t.Fatalf("expected 1 source from sqlite registry, got %d", len(sources)) + } + if sources[0].Source.CountryCode != "FR" { + t.Fatalf("expected normalized country code, got %q", sources[0].Source.CountryCode) + } +} diff --git a/internal/collector/run/run.go b/internal/collector/run/run.go index 3f18e25..f029439 100644 --- a/internal/collector/run/run.go +++ b/internal/collector/run/run.go @@ -9,10 +9,13 @@ import ( "fmt" "io" "net/url" + "path/filepath" "strings" "time" "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/dictionary" + "github.com/scalytics/euosint/internal/collector/discover" "github.com/scalytics/euosint/internal/collector/fetch" "github.com/scalytics/euosint/internal/collector/model" "github.com/scalytics/euosint/internal/collector/normalize" @@ -21,12 +24,15 @@ import ( "github.com/scalytics/euosint/internal/collector/registry" "github.com/scalytics/euosint/internal/collector/state" "github.com/scalytics/euosint/internal/collector/translate" + "github.com/scalytics/euosint/internal/collector/vet" + "github.com/scalytics/euosint/internal/sourcedb" ) type Runner struct { - stdout io.Writer - stderr io.Writer - clientFactory func(config.Config) *fetch.Client + stdout io.Writer + stderr io.Writer + clientFactory func(config.Config) *fetch.Client + browserFactory func(config.Config) (*fetch.BrowserClient, error) } func New(stdout io.Writer, stderr io.Writer) Runner { @@ -34,10 +40,16 @@ func New(stdout io.Writer, stderr io.Writer) Runner { stdout: stdout, stderr: stderr, clientFactory: fetch.New, + browserFactory: func(cfg config.Config) (*fetch.BrowserClient, error) { + return fetch.NewBrowser(cfg.BrowserTimeoutMS) + }, } } func (r Runner) Run(ctx context.Context, cfg config.Config) error { + if cfg.Watch && cfg.DiscoverBackground { + go r.runDiscoveryLoop(ctx, cfg) + } if cfg.Watch { return r.watch(ctx, cfg) } @@ -61,19 +73,64 @@ func (r Runner) watch(ctx context.Context, cfg config.Config) error { } func (r Runner) runOnce(ctx context.Context, cfg config.Config) error { + // Live-merge the baked-in JSON registry into SQLite every cycle. + // This picks up new sources and syncs rejected status without restart. + if cfg.RegistrySeedPath != "" && isSQLitePath(cfg.RegistryPath) { + if err := r.mergeRegistry(ctx, cfg); err != nil { + fmt.Fprintf(r.stderr, "WARN registry merge: %v\n", err) + } + } + sources, err := registry.Load(cfg.RegistryPath) if err != nil { return err } client := r.clientFactory(cfg) + + var browser *fetch.BrowserClient + if cfg.BrowserEnabled && r.browserFactory != nil { + b, err := r.browserFactory(cfg) + if err != nil { + fmt.Fprintf(r.stderr, "WARN browser init failed (falling back to stealth): %v\n", err) + } else { + browser = b + defer browser.Close() + } + } + now := time.Now().UTC() - nctx := normalize.Context{Config: cfg, Now: now} + geocoder := r.initGeocoder(ctx, cfg) + nctx := normalize.Context{Config: cfg, Now: now, Geocoder: geocoder} + categoryDictionary, err := dictionary.Load(cfg.CategoryDictionaryPath) + if err != nil { + fmt.Fprintf(r.stderr, "WARN category dictionary load failed (falling back to legacy filters): %v\n", err) + } + + cursors := state.ReadCursors(cfg.CursorsPath) alerts := []model.Alert{normalize.StaticInterpolEntry(now)} sourceHealth := make([]model.SourceHealthEntry, 0, len(sources)) for _, source := range sources { startedAt := time.Now().UTC() - batch, err := r.fetchSource(ctx, client, nctx, source) + fetcher := fetch.FetcherFor(source.FetchMode, client, browser) + batch, err := r.fetchSource(ctx, fetcher, browser, nctx, source, categoryDictionary, cursors) + + // Retry once for transient errors (timeout, EOF) after a short backoff. + if err != nil { + errClass, _, _ := classifySourceError(err) + if (errClass == "timeout" || errClass == "eof" || errClass == "transient") && ctx.Err() == nil { + fmt.Fprintf(r.stderr, "RETRY %s (transient %s): %v\n", source.Source.AuthorityName, errClass, err) + retryDelay := 3 * time.Second + select { + case <-time.After(retryDelay): + case <-ctx.Done(): + } + if ctx.Err() == nil { + batch, err = r.fetchSource(ctx, fetcher, browser, nctx, source, categoryDictionary, cursors) + } + } + } + entry := model.SourceHealthEntry{ SourceID: source.Source.SourceID, AuthorityName: source.Source.AuthorityName, @@ -85,6 +142,7 @@ func (r Runner) runOnce(ctx context.Context, cfg config.Config) error { if err != nil { entry.Status = "error" entry.Error = err.Error() + entry.ErrorClass, entry.NeedsReplacement, entry.DiscoveryAction = classifySourceError(err) sourceHealth = append(sourceHealth, entry) fmt.Fprintf(r.stderr, "WARN %s: %v\n", source.Source.AuthorityName, err) continue @@ -95,6 +153,10 @@ func (r Runner) runOnce(ctx context.Context, cfg config.Config) error { alerts = append(alerts, batch...) } + if err := state.WriteCursors(cfg.CursorsPath, cursors); err != nil { + fmt.Fprintf(r.stderr, "WARN failed to save cursors: %v\n", err) + } + deduped, duplicateAudit := normalize.Deduplicate(alerts) active, filtered := normalize.FilterActive(cfg, deduped) populateSourceHealth(sourceHealth, active, filtered) @@ -102,47 +164,97 @@ func (r Runner) runOnce(ctx context.Context, cfg config.Config) error { return err } - previous := state.Read(cfg.StateOutputPath) - if len(previous) == 0 { - previous = state.Read(cfg.OutputPath) + previous, err := loadPreviousAlerts(ctx, cfg) + if err != nil { + return err + } + // Purge stale alerts from sources that no longer exist or were rejected. + // Include source IDs from both the registry and the current fetch batch + // (covers synthetic alerts like the Interpol hub static entry). + activeSourceIDs := map[string]struct{}{} + for _, s := range sources { + activeSourceIDs[s.Source.SourceID] = struct{}{} + } + for _, a := range alerts { + activeSourceIDs[a.SourceID] = struct{}{} + } + previous = purgeOrphanAlerts(previous, activeSourceIDs) + + accumulateSources := map[string]bool{} + for _, s := range sources { + if s.Accumulate { + accumulateSources[s.Source.SourceID] = true + } } - currentActive, currentFiltered, fullState := state.Reconcile(cfg, active, filtered, previous, now) - if err := output.Write(cfg, currentActive, currentFiltered, fullState, sourceHealth, duplicateAudit); err != nil { + currentActive, currentFiltered, fullState := state.Reconcile(cfg, active, filtered, previous, now, accumulateSources) + replacementQueue := buildReplacementQueue(sourceHealth, sources) + if err := deactivateReplacementSources(ctx, cfg.RegistryPath, replacementQueue); err != nil { + return err + } + if err := saveAlertState(ctx, cfg, fullState); err != nil { + return err + } + if err := output.Write(cfg, currentActive, currentFiltered, fullState, sourceHealth, duplicateAudit, replacementQueue); err != nil { return err } _, err = fmt.Fprintf(r.stdout, "Wrote %d active alerts -> %s (%d filtered in %s)\n", len(currentActive), cfg.OutputPath, len(currentFiltered), cfg.FilteredOutputPath) return err } -func (r Runner) fetchSource(ctx context.Context, client *fetch.Client, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { +// purgeOrphanAlerts removes alerts whose source_id is no longer in the +// active registry. This cleans up zombie alerts from rejected or removed +// sources that would otherwise persist in the state file indefinitely. +func purgeOrphanAlerts(alerts []model.Alert, activeSourceIDs map[string]struct{}) []model.Alert { + out := make([]model.Alert, 0, len(alerts)) + for _, a := range alerts { + if _, ok := activeSourceIDs[a.SourceID]; ok { + out = append(out, a) + } + } + return out +} + +func (r Runner) fetchSource(ctx context.Context, fetcher fetch.Fetcher, browser *fetch.BrowserClient, nctx normalize.Context, source model.RegistrySource, categoryDictionary *dictionary.Store, cursors state.Cursors) ([]model.Alert, error) { switch source.Type { case "rss": - return r.fetchRSS(ctx, client, nctx, source) + return r.fetchRSS(ctx, fetcher, nctx, source) case "html-list": - return r.fetchHTML(ctx, client, nctx, source) + return r.fetchHTML(ctx, fetcher, nctx, source, categoryDictionary) case "kev-json": - return r.fetchKEV(ctx, client, nctx, source) + return r.fetchKEV(ctx, fetcher, nctx, source) case "interpol-red-json", "interpol-yellow-json": - return r.fetchInterpol(ctx, client, nctx, source) + return r.fetchInterpol(ctx, fetcher, browser, nctx, source, cursors) + case "fbi-wanted-json": + return r.fetchFBIWanted(ctx, fetcher, nctx, source) + case "travelwarning-json": + return r.fetchTravelWarningJSON(ctx, fetcher, nctx, source) + case "travelwarning-atom": + return r.fetchTravelWarningAtom(ctx, fetcher, nctx, source) default: return nil, fmt.Errorf("unsupported source type %s", source.Type) } } -func (r Runner) fetchRSS(ctx context.Context, client *fetch.Client, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { - body, err := fetchWithFallback(ctx, client, source, "application/rss+xml, application/atom+xml, application/xml, text/xml;q=0.9, */*;q=0.8") +func (r Runner) fetchRSS(ctx context.Context, fetcher fetch.Fetcher, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { + body, err := fetchWithFallback(ctx, fetcher, source, "application/rss+xml, application/atom+xml, application/xml, text/xml;q=0.9, */*;q=0.8") if err != nil { return nil, err } items := parse.ParseFeed(string(body)) if nctx.Config.TranslateEnabled { - if translated, err := translate.Batch(ctx, client, items); err == nil { + // translate.Batch requires the stealth HTTP client (not a browser). + translateClient := r.clientFactory(nctx.Config) + if translated, err := translate.Batch(ctx, translateClient, items); err == nil { items = translated } else { fmt.Fprintf(r.stderr, "WARN %s: translate batch failed: %v\n", source.Source.AuthorityName, err) } } + items = filterFeedKeywords(items, source.IncludeKeywords, source.ExcludeKeywords) limit := perSourceLimit(nctx.Config, source) + if len(items) > limit { + items = items[:limit] + } out := make([]model.Alert, 0, limit) for _, item := range items { if len(out) == limit { @@ -159,18 +271,48 @@ func (r Runner) fetchRSS(ctx context.Context, client *fetch.Client, nctx normali return out, nil } -func (r Runner) fetchHTML(ctx context.Context, client *fetch.Client, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { - body, finalURL, err := fetchWithFallbackURL(ctx, client, source, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") +func (r Runner) fetchHTML(ctx context.Context, fetcher fetch.Fetcher, nctx normalize.Context, source model.RegistrySource, categoryDictionary *dictionary.Store) ([]model.Alert, error) { + body, finalURL, err := fetchWithFallbackURL(ctx, fetcher, source, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") if err != nil { return nil, err } items := parse.ParseHTMLAnchors(string(body), finalURL) items = filterKeywords(items, source.IncludeKeywords, source.ExcludeKeywords) + items = filterCategoryItems(items, source, categoryDictionary) limit := perSourceLimit(nctx.Config, source) + if nctx.Config.AlertLLMEnabled { + if llmLimit := nctx.Config.AlertLLMMaxItemsPerSource; llmLimit > 0 && llmLimit < limit { + limit = llmLimit + } + } if len(items) > limit { items = items[:limit] } out := make([]model.Alert, 0, len(items)) + if nctx.Config.AlertLLMEnabled { + alertLLM := vet.NewClient(config.Config{ + HTTPTimeoutMS: nctx.Config.HTTPTimeoutMS, + VettingBaseURL: nctx.Config.VettingBaseURL, + VettingAPIKey: nctx.Config.VettingAPIKey, + VettingProvider: nctx.Config.VettingProvider, + VettingModel: nctx.Config.AlertLLMModel, + VettingTemperature: 0, + }) + classified, err := translate.BatchLLM(ctx, nctx.Config, alertLLM, source.Category, items) + if err != nil { + fmt.Fprintf(r.stderr, "WARN %s: alert llm failed: %v\n", source.Source.AuthorityName, err) + } else { + for _, classifiedItem := range classified { + meta := source + meta.Category = firstNonEmpty(classifiedItem.Category, source.Category) + alert := normalize.HTMLItem(nctx, meta, classifiedItem.Item) + if alert != nil { + out = append(out, *alert) + } + } + return out, nil + } + } for _, item := range items { alert := normalize.HTMLItem(nctx, source, item) if alert != nil { @@ -180,8 +322,8 @@ func (r Runner) fetchHTML(ctx context.Context, client *fetch.Client, nctx normal return out, nil } -func (r Runner) fetchKEV(ctx context.Context, client *fetch.Client, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { - body, err := client.Text(ctx, source.FeedURL, source.FollowRedirects, "application/json") +func (r Runner) fetchKEV(ctx context.Context, fetcher fetch.Fetcher, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { + body, err := fetcher.Text(ctx, source.FeedURL, source.FollowRedirects, "application/json") if err != nil { return nil, err } @@ -214,14 +356,106 @@ func (r Runner) fetchKEV(ctx context.Context, client *fetch.Client, nctx normali return out, nil } -func (r Runner) fetchInterpol(ctx context.Context, client *fetch.Client, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { - body, err := client.Text(ctx, source.FeedURL, source.FollowRedirects, "application/json") +func (r Runner) fetchInterpol(ctx context.Context, fetcher fetch.Fetcher, browser *fetch.BrowserClient, nctx normalize.Context, source model.RegistrySource, cursors state.Cursors) ([]model.Alert, error) { + limit := perSourceLimit(nctx.Config, source) + pageSize := 160 + var allNotices []model.Alert + sid := source.Source.SourceID + + // Interpol's API sits behind Akamai WAF and requires XHR-style headers + // with Referer/Origin pointing to the Interpol website. + interpolHeaders := map[string]string{ + "Referer": "https://www.interpol.int/How-we-work/Notices/View-Notices", + "Origin": "https://www.interpol.int", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-site", + "X-Requested-With": "XMLHttpRequest", + } + + clientFetcher, isClient := fetcher.(*fetch.Client) + + fetchPage := func(page int) ([]model.Alert, error) { + pageURL := buildInterpolPageURL(source.FeedURL, page, pageSize) + var body []byte + var err error + if isClient { + body, err = clientFetcher.TextWithHeaders(ctx, pageURL, source.FollowRedirects, "application/json", interpolHeaders) + } else { + body, err = fetcher.Text(ctx, pageURL, source.FollowRedirects, "application/json") + } + if err != nil { + return nil, err + } + return parseInterpolNotices(nctx, source, body) + } + + // Always fetch page 1 first to pick up new notices. + batch, err := fetchPage(1) if err != nil { + if browser != nil { + fmt.Fprintf(r.stderr, "WARN %s: stealth fetch failed, trying browser fallback: %v\n", source.Source.AuthorityName, err) + bBody, bErr := fetchInterpolViaBrowser(ctx, browser, source) + if bErr == nil && len(bBody) > 0 { + return parseInterpolNotices(nctx, source, bBody) + } + } return nil, err } + allNotices = append(allNotices, batch...) + lastPageFetched := 1 + + // Resume from cursor to backfill older pages. + resumePage := cursors[sid] + if resumePage < 2 { + resumePage = 2 + } + for page := resumePage; len(allNotices) < limit; page++ { + select { + case <-time.After(1 * time.Second): + case <-ctx.Done(): + cursors[sid] = page + return allNotices, nil + } + batch, err := fetchPage(page) + if err != nil { + break + } + allNotices = append(allNotices, batch...) + lastPageFetched = page + if len(batch) < pageSize { + // Reached the end — wrap cursor back to 2 for next run. + lastPageFetched = 1 + break + } + } + + // Advance cursor for next run. + cursors[sid] = lastPageFetched + 1 + + if len(allNotices) > limit { + allNotices = allNotices[:limit] + } + return allNotices, nil +} + +func buildInterpolPageURL(baseURL string, page int, pageSize int) string { + u, err := url.Parse(baseURL) + if err != nil { + return baseURL + } + q := u.Query() + q.Set("page", fmt.Sprintf("%d", page)) + q.Set("resultPerPage", fmt.Sprintf("%d", pageSize)) + u.RawQuery = q.Encode() + return u.String() +} + +func parseInterpolNotices(nctx normalize.Context, source model.RegistrySource, body []byte) ([]model.Alert, error) { var doc struct { Embedded struct { Notices []struct { + EntityID string `json:"entity_id"` Forename string `json:"forename"` Name string `json:"name"` PlaceOfBirth string `json:"place_of_birth"` @@ -239,12 +473,8 @@ func (r Runner) fetchInterpol(ctx context.Context, client *fetch.Client, nctx no if err := json.Unmarshal(body, &doc); err != nil { return nil, err } - limit := perSourceLimit(nctx.Config, source) out := []model.Alert{} for _, notice := range doc.Embedded.Notices { - if len(out) == limit { - break - } titlePrefix := "INTERPOL Red Notice" if source.Type == "interpol-yellow-json" { titlePrefix = "INTERPOL Yellow Notice" @@ -254,22 +484,186 @@ func (r Runner) fetchInterpol(ctx context.Context, client *fetch.Client, nctx no if label != "" { title = titlePrefix + ": " + label } - link := notice.Links.Self.Href - if strings.TrimSpace(link) != "" { - if _, err := url.Parse(link); err == nil && !strings.HasPrefix(link, "http") { - link = (&url.URL{Scheme: "https", Host: "ws-public.interpol.int", Path: link}).String() - } - } + link := interpolWebURL(source.Type, notice.EntityID, notice.Links.Self.Href) countryCode := "" - if len(notice.CountriesLikelyToVisit) > 0 { - countryCode = notice.CountriesLikelyToVisit[0] - } else if len(notice.Nationalities) > 0 { + if len(notice.Nationalities) > 0 { countryCode = notice.Nationalities[0] + } else if len(notice.CountriesLikelyToVisit) > 0 { + countryCode = notice.CountriesLikelyToVisit[0] } + noticeID := extractInterpolNoticeID(notice.EntityID, link) summary := strings.TrimSpace(notice.IssuingEntity + " " + notice.PlaceOfBirth) tags := append([]string{}, notice.Nationalities...) tags = append(tags, notice.CountriesLikelyToVisit...) - alert := normalize.InterpolAlert(nctx, source, title, link, countryCode, summary, tags) + alert := normalize.InterpolAlert(nctx, source, noticeID, title, link, countryCode, summary, tags) + if alert != nil { + out = append(out, *alert) + } + } + return out, nil +} + +func fetchInterpolViaBrowser(ctx context.Context, browser *fetch.BrowserClient, source model.RegistrySource) ([]byte, error) { + pageURL, matchURL := interpolBrowserURLs(source.Type) + if pageURL == "" || matchURL == "" { + return nil, fmt.Errorf("no browser fallback for %s", source.Type) + } + bodies, err := browser.CaptureJSONResponses(ctx, pageURL, matchURL) + if err != nil { + return nil, err + } + for _, body := range bodies { + if len(body) > 0 { + return body, nil + } + } + return nil, fmt.Errorf("no interpol browser JSON bodies captured") +} + +func interpolBrowserURLs(sourceType string) (pageURL string, matchURL string) { + switch sourceType { + case "interpol-red-json": + return "https://www.interpol.int/How-we-work/Notices/Red-Notices/View-Red-Notices", "/notices/v1/red" + case "interpol-yellow-json": + return "https://www.interpol.int/How-we-work/Notices/Yellow-Notices/View-Yellow-Notices", "/notices/v1/yellow" + default: + return "", "" + } +} + +func extractInterpolNoticeID(entityID string, link string) string { + if id := strings.TrimSpace(entityID); id != "" { + return strings.ReplaceAll(id, "/", "-") + } + parsed, err := url.Parse(strings.TrimSpace(link)) + if err != nil { + return "" + } + if fragment := strings.TrimSpace(parsed.Fragment); fragment != "" { + return strings.ReplaceAll(fragment, "/", "-") + } + path := strings.Trim(parsed.Path, "/") + if path == "" { + return "" + } + // API paths like /notices/v1/red/2026/5314 → "2026-5314" + parts := strings.Split(path, "/") + if len(parts) >= 2 && parts[len(parts)-2] >= "1900" && parts[len(parts)-2] <= "2099" { + return parts[len(parts)-2] + "-" + parts[len(parts)-1] + } + return strings.TrimSpace(parts[len(parts)-1]) +} + +// interpolWebURL converts an Interpol API self-link into a human-readable +// web URL. e.g. ".../notices/v1/red/2025-81216" becomes +// "https://www.interpol.int/How-we-work/Notices/Red-Notices/View-Red-Notices#2025-81216". +func interpolWebURL(sourceType string, entityID string, selfHref string) string { + noticeID := extractInterpolNoticeID(entityID, selfHref) + base := "https://www.interpol.int/How-we-work/Notices/Red-Notices/View-Red-Notices" + if sourceType == "interpol-yellow-json" { + base = "https://www.interpol.int/How-we-work/Notices/Yellow-Notices/View-Yellow-Notices" + } + if noticeID != "" { + return base + "#" + noticeID + } + return base +} + +func (r Runner) fetchFBIWanted(ctx context.Context, fetcher fetch.Fetcher, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { + limit := perSourceLimit(nctx.Config, source) + pageSize := 40 + var allAlerts []model.Alert + + for page := 1; len(allAlerts) < limit; page++ { + pageURL := fmt.Sprintf("%s&page=%d&pageSize=%d", source.FeedURL, page, pageSize) + body, err := fetcher.Text(ctx, pageURL, source.FollowRedirects, "application/json") + if err != nil { + if page == 1 { + return nil, err + } + break + } + items, total, err := parse.ParseFBIWanted(body) + if err != nil { + if page == 1 { + return nil, err + } + break + } + for _, item := range items { + if len(allAlerts) >= limit { + break + } + if strings.TrimSpace(item.Title) == "" { + continue + } + alert := normalize.FBIWantedAlert(nctx, source, item) + if alert != nil { + allAlerts = append(allAlerts, *alert) + } + } + // Stop if we've fetched all available or last page was partial. + if total > 0 && page*pageSize >= total { + break + } + if len(items) < pageSize { + break + } + // Polite delay between pages. + select { + case <-time.After(1 * time.Second): + case <-ctx.Done(): + return allAlerts, nil + } + } + return allAlerts, nil +} + +func (r Runner) fetchTravelWarningJSON(ctx context.Context, fetcher fetch.Fetcher, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { + body, err := fetcher.Text(ctx, source.FeedURL, source.FollowRedirects, "application/json") + if err != nil { + return nil, err + } + items, err := parse.ParseGermanAATravelWarnings(body) + if err != nil { + return nil, err + } + limit := perSourceLimit(nctx.Config, source) + out := make([]model.Alert, 0, limit) + for _, item := range items { + if len(out) == limit { + break + } + if strings.TrimSpace(item.Title) == "" { + continue + } + alert := normalize.TravelWarningAlert(nctx, source, item) + if alert != nil { + out = append(out, *alert) + } + } + return out, nil +} + +func (r Runner) fetchTravelWarningAtom(ctx context.Context, fetcher fetch.Fetcher, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { + body, err := fetchWithFallback(ctx, fetcher, source, "application/atom+xml, application/xml, text/xml;q=0.9, */*;q=0.8") + if err != nil { + return nil, err + } + items, err := parse.ParseFCDOAtom(body) + if err != nil { + return nil, err + } + limit := perSourceLimit(nctx.Config, source) + out := make([]model.Alert, 0, limit) + for _, item := range items { + if len(out) == limit { + break + } + if strings.TrimSpace(item.Title) == "" || strings.TrimSpace(item.Link) == "" { + continue + } + alert := normalize.TravelWarningAlert(nctx, source, item) if alert != nil { out = append(out, *alert) } @@ -277,20 +671,22 @@ func (r Runner) fetchInterpol(ctx context.Context, client *fetch.Client, nctx no return out, nil } -func fetchWithFallback(ctx context.Context, client *fetch.Client, source model.RegistrySource, accept string) ([]byte, error) { - body, _, err := fetchWithFallbackURL(ctx, client, source, accept) +func fetchWithFallback(ctx context.Context, fetcher fetch.Fetcher, source model.RegistrySource, accept string) ([]byte, error) { + body, _, err := fetchWithFallbackURL(ctx, fetcher, source, accept) return body, err } -func fetchWithFallbackURL(ctx context.Context, client *fetch.Client, source model.RegistrySource, accept string) ([]byte, string, error) { +func fetchWithFallbackURL(ctx context.Context, fetcher fetch.Fetcher, source model.RegistrySource, accept string) ([]byte, string, error) { candidates := []string{} if strings.TrimSpace(source.FeedURL) != "" { candidates = append(candidates, source.FeedURL) } candidates = append(candidates, source.FeedURLs...) + // Always follow redirects for feed fetches — 301/302/307 are normal + // for RSS/Atom feeds (HTTP→HTTPS, www→non-www, CDN routing, etc.). var lastErr error for _, candidate := range candidates { - body, err := client.Text(ctx, candidate, source.FollowRedirects, accept) + body, err := fetcher.Text(ctx, candidate, true, accept) if err == nil { return body, candidate, nil } @@ -307,11 +703,45 @@ func filterKeywords(items []parse.FeedItem, include []string, exclude []string) exclude = normalizeKeywords(exclude) out := []parse.FeedItem{} for _, item := range items { - hay := strings.ToLower(item.Title + " " + item.Link) - if len(include) > 0 && !containsKeyword(hay, include) { + titleHay := strings.ToLower(item.Title) + fullHay := strings.ToLower(item.Title + " " + item.Link) + // Include keywords match against title only — matching against the + // URL caused false positives when the page URL itself contained a + // keyword (e.g. /desaparecidos in the path let every link through). + if len(include) > 0 && !containsKeyword(titleHay, include) { + continue + } + // Exclude keywords match against title + URL (conservative). + if len(exclude) > 0 && containsKeyword(fullHay, exclude) { + continue + } + out = append(out, item) + } + return out +} + +func filterFeedKeywords(items []parse.FeedItem, include []string, exclude []string) []parse.FeedItem { + include = normalizeKeywords(include) + exclude = normalizeKeywords(exclude) + out := []parse.FeedItem{} + for _, item := range items { + includeHay := strings.ToLower(strings.Join([]string{ + item.Title, + item.Summary, + item.Author, + strings.Join(item.Tags, " "), + }, " ")) + excludeHay := strings.ToLower(strings.Join([]string{ + item.Title, + item.Summary, + item.Author, + strings.Join(item.Tags, " "), + item.Link, + }, " ")) + if len(include) > 0 && !containsKeyword(includeHay, include) { continue } - if len(exclude) > 0 && containsKeyword(hay, exclude) { + if len(exclude) > 0 && containsKeyword(excludeHay, exclude) { continue } out = append(out, item) @@ -339,6 +769,19 @@ func containsKeyword(hay string, needles []string) bool { return false } +func filterCategoryItems(items []parse.FeedItem, source model.RegistrySource, categoryDictionary *dictionary.Store) []parse.FeedItem { + if categoryDictionary == nil { + return items + } + out := make([]parse.FeedItem, 0, len(items)) + for _, item := range items { + if categoryDictionary.Match(source.Category, source, item.Title, item.Link) { + out = append(out, item) + } + } + return out +} + func populateSourceHealth(entries []model.SourceHealthEntry, active []model.Alert, filtered []model.Alert) { activeBySource := map[string]int{} filteredBySource := map[string]int{} @@ -392,3 +835,228 @@ func firstNonEmpty(values ...string) string { } return "" } + +func isSQLitePath(path string) bool { + ext := filepath.Ext(path) + return ext == ".db" || ext == ".sqlite" || ext == ".sqlite3" +} + +func (r Runner) mergeRegistry(ctx context.Context, cfg config.Config) error { + db, err := sourcedb.Open(cfg.RegistryPath) + if err != nil { + return err + } + defer db.Close() + return db.MergeRegistry(ctx, cfg.RegistrySeedPath) +} + +// cityLookupAdapter wraps sourcedb.DB to satisfy normalize.CityLookup. +type cityLookupAdapter struct { + db *sourcedb.DB +} + +func (a *cityLookupAdapter) LookupCity(ctx context.Context, name string, countryCode string) (normalize.CityLookupResult, bool) { + r, ok := a.db.LookupCity(ctx, name, countryCode) + if !ok { + return normalize.CityLookupResult{}, false + } + return normalize.CityLookupResult{ + Name: r.Name, + CountryCode: r.CountryCode, + Lat: r.Lat, + Lng: r.Lng, + Population: r.Population, + }, true +} + +func (r Runner) initGeocoder(ctx context.Context, cfg config.Config) *normalize.Geocoder { + var cities normalize.CityLookup + var nominatim *normalize.NominatimClient + + // Try to open the source DB for city lookups. + if isSQLitePath(cfg.RegistryPath) { + db, err := sourcedb.Open(cfg.RegistryPath) + if err == nil { + // Import GeoNames if the cities table is empty and the file exists. + if !db.HasCities(ctx) && cfg.GeoNamesPath != "" { + if err := db.ImportGeoNames(ctx, cfg.GeoNamesPath); err != nil { + fmt.Fprintf(r.stderr, "WARN geonames import: %v\n", err) + } + } + if db.HasCities(ctx) { + cities = &cityLookupAdapter{db: db} + // NOTE: we intentionally don't defer db.Close() here because + // the geocoder is used throughout the run. The DB handle is + // safe for concurrent reads. + } else { + db.Close() + } + } else { + fmt.Fprintf(r.stderr, "WARN geocoder DB open: %v\n", err) + } + } + + if cfg.NominatimEnabled { + nominatim = normalize.NewNominatimClient(cfg.NominatimBaseURL, cfg.WikimediaUserAgent) + } + + if cities == nil && nominatim == nil { + return nil + } + return normalize.NewGeocoder(cities, nominatim) +} + +func (r Runner) runDiscoveryLoop(ctx context.Context, cfg config.Config) { + runOnce := func() { + if err := discover.Run(ctx, cfg, r.stdout, r.stderr); err != nil && ctx.Err() == nil { + fmt.Fprintf(r.stderr, "WARN background discovery failed: %v\n", err) + } + } + + runOnce() + + interval := time.Duration(cfg.DiscoverIntervalMS) * time.Millisecond + if interval <= 0 { + interval = 15 * time.Minute + } + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + runOnce() + } + } +} + +func classifySourceError(err error) (string, bool, string) { + if err == nil { + return "", false, "" + } + msg := strings.ToLower(err.Error()) + switch { + case strings.Contains(msg, "status 404"), strings.Contains(msg, "status 410"): + return "not_found", true, "dead_letter" + case strings.Contains(msg, "status 301"), strings.Contains(msg, "status 302"), strings.Contains(msg, "status 307"), strings.Contains(msg, "status 308"): + // Redirects should be followed automatically — if we still see + // one here it means the chain exceeded 10 hops. + return "redirect", false, "retry" + case strings.Contains(msg, "status 403"): + return "blocked", true, "dead_letter" + case strings.Contains(msg, "response too large"): + return "oversized", true, "dead_letter" + case strings.Contains(msg, "certificate signed by unknown authority"): + return "tls_invalid", true, "dead_letter" + case strings.Contains(msg, "no such host"): + return "dns_error", true, "dead_letter" + case strings.Contains(msg, "client.timeout exceeded"), strings.Contains(msg, "request canceled"), strings.Contains(msg, "timeout"): + return "timeout", false, "retry" + case strings.Contains(msg, ": eof"), strings.HasSuffix(msg, " eof"): + return "eof", false, "retry" + default: + return "transient", false, "retry" + } +} + +func buildReplacementQueue(entries []model.SourceHealthEntry, sources []model.RegistrySource) []model.SourceReplacementCandidate { + byID := make(map[string]model.RegistrySource, len(sources)) + for _, source := range sources { + byID[source.Source.SourceID] = source + } + + queue := make([]model.SourceReplacementCandidate, 0) + for _, entry := range entries { + if !entry.NeedsReplacement { + continue + } + source, ok := byID[entry.SourceID] + if !ok { + continue + } + queue = append(queue, model.SourceReplacementCandidate{ + SourceID: entry.SourceID, + AuthorityName: entry.AuthorityName, + Type: entry.Type, + FeedURL: entry.FeedURL, + BaseURL: source.Source.BaseURL, + Country: source.Source.Country, + CountryCode: source.Source.CountryCode, + Region: source.Source.Region, + AuthorityType: source.Source.AuthorityType, + Category: source.Category, + Error: entry.Error, + ErrorClass: entry.ErrorClass, + DiscoveryAction: entry.DiscoveryAction, + LastAttemptAt: entry.FinishedAt, + }) + } + return queue +} + +func deactivateReplacementSources(ctx context.Context, registryPath string, queue []model.SourceReplacementCandidate) error { + if !isSQLiteRegistryPath(registryPath) || len(queue) == 0 { + return nil + } + db, err := sourcedb.Open(registryPath) + if err != nil { + return err + } + defer db.Close() + + reasons := make(map[string]string, len(queue)) + for _, candidate := range queue { + reasons[candidate.SourceID] = candidate.Error + } + return db.DeactivateSources(ctx, reasons) +} + +func loadPreviousAlerts(ctx context.Context, cfg config.Config) ([]model.Alert, error) { + if !isSQLiteRegistryPath(cfg.RegistryPath) { + previous := state.Read(cfg.StateOutputPath) + if len(previous) == 0 { + previous = state.Read(cfg.OutputPath) + } + return previous, nil + } + + db, err := sourcedb.Open(cfg.RegistryPath) + if err != nil { + return nil, fmt.Errorf("open source DB for alert state: %w", err) + } + defer db.Close() + + alerts, err := db.LoadAlerts(ctx) + if err != nil { + return nil, fmt.Errorf("load alert state from source DB: %w", err) + } + return alerts, nil +} + +func saveAlertState(ctx context.Context, cfg config.Config, alerts []model.Alert) error { + if !isSQLiteRegistryPath(cfg.RegistryPath) { + return nil + } + + db, err := sourcedb.Open(cfg.RegistryPath) + if err != nil { + return fmt.Errorf("open source DB for alert save: %w", err) + } + defer db.Close() + + if err := db.SaveAlerts(ctx, alerts); err != nil { + return fmt.Errorf("save alert state to source DB: %w", err) + } + return nil +} + +func isSQLiteRegistryPath(path string) bool { + switch strings.ToLower(filepath.Ext(strings.TrimSpace(path))) { + case ".db", ".sqlite", ".sqlite3": + return true + default: + return false + } +} diff --git a/internal/collector/run/run_test.go b/internal/collector/run/run_test.go index 6fb3af6..9d0bf36 100644 --- a/internal/collector/run/run_test.go +++ b/internal/collector/run/run_test.go @@ -12,10 +12,14 @@ import ( "path/filepath" "strings" "testing" + "time" "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/dictionary" "github.com/scalytics/euosint/internal/collector/fetch" "github.com/scalytics/euosint/internal/collector/model" + "github.com/scalytics/euosint/internal/collector/parse" + "github.com/scalytics/euosint/internal/sourcedb" ) func TestRunnerRunOnceWritesOutputs(t *testing.T) { @@ -25,7 +29,9 @@ func TestRunnerRunOnceWritesOutputs(t *testing.T) { {"type":"rss","feed_url":"https://collector.test/rss","category":"cyber_advisory","region_tag":"INT","lat":48.8,"lng":2.3,"source":{"source_id":"rss-source","authority_name":"RSS Source","country":"France","country_code":"FR","region":"Europe","authority_type":"cert","base_url":"https://collector.test"}}, {"type":"html-list","feed_url":"https://collector.test/html","category":"wanted_suspect","region_tag":"FR","lat":48.8,"lng":2.3,"include_keywords":["wanted"],"source":{"source_id":"html-source","authority_name":"HTML Source","country":"France","country_code":"FR","region":"Europe","authority_type":"police","base_url":"https://collector.test"}}, {"type":"kev-json","feed_url":"https://collector.test/kev","category":"cyber_advisory","region_tag":"US","lat":38.8,"lng":-77.0,"source":{"source_id":"kev-source","authority_name":"KEV Source","country":"United States","country_code":"US","region":"North America","authority_type":"cert","base_url":"https://www.cisa.gov"}}, - {"type":"interpol-red-json","feed_url":"https://collector.test/interpol","category":"wanted_suspect","region_tag":"INT","lat":45.7,"lng":4.8,"source":{"source_id":"interpol-red","authority_name":"Interpol Red","country":"France","country_code":"FR","region":"International","authority_type":"police","base_url":"https://www.interpol.int"}} + {"type":"interpol-red-json","feed_url":"https://collector.test/interpol","category":"wanted_suspect","region_tag":"INT","lat":45.7,"lng":4.8,"source":{"source_id":"interpol-red","authority_name":"Interpol Red","country":"France","country_code":"FR","region":"International","authority_type":"police","base_url":"https://www.interpol.int"}}, + {"type":"travelwarning-json","feed_url":"https://collector.test/travel-json","category":"travel_warning","region_tag":"DE","lat":52.5,"lng":13.4,"source":{"source_id":"de-aa-travel","authority_name":"German AA","country":"Germany","country_code":"DE","region":"Europe","authority_type":"national_security","base_url":"https://www.auswaertiges-amt.de"}}, + {"type":"travelwarning-atom","feed_url":"https://collector.test/travel-atom","category":"travel_warning","region_tag":"GB","lat":51.5,"lng":-0.1,"source":{"source_id":"uk-fcdo-travel","authority_name":"UK FCDO","country":"United Kingdom","country_code":"GB","region":"Europe","authority_type":"national_security","base_url":"https://www.gov.uk"}} ]`) if err := os.WriteFile(registryPath, registry, 0o644); err != nil { t.Fatal(err) @@ -37,6 +43,7 @@ func TestRunnerRunOnceWritesOutputs(t *testing.T) { cfg.FilteredOutputPath = filepath.Join(dir, "filtered.json") cfg.StateOutputPath = filepath.Join(dir, "state.json") cfg.SourceHealthOutputPath = filepath.Join(dir, "health.json") + cfg.ReplacementQueuePath = filepath.Join(dir, "replacement.json") cfg.MaxAgeDays = 10000 runner := New(io.Discard, io.Discard) @@ -53,6 +60,10 @@ func TestRunnerRunOnceWritesOutputs(t *testing.T) { body = `{"vulnerabilities":[{"cveID":"CVE-2026-9999","vulnerabilityName":"Test vuln","shortDescription":"Known exploited issue","dateAdded":"2026-01-01","knownRansomwareCampaign":true}]}` case "/interpol": body = `{"_embedded":{"notices":[{"forename":"Jane","name":"Doe","issuing_entity":"Interpol","place_of_birth":"Paris","nationalities":["FR"],"_links":{"self":{"href":"https://ws-public.interpol.int/notices/v1/red/123"}}}]}}` + case "/travel-json": + body = `{"1":{"title":"Afghanistan - Do not travel","country":"Afghanistan","warning":"Do not travel.","severity":"Reisewarnung","lastChanged":"2026-01-15","url":"https://example.com/af"}}` + case "/travel-atom": + body = `France travel advice2026-02-01T00:00:00ZExercise normal caution.` default: return &http.Response{StatusCode: 404, Body: io.NopCloser(strings.NewReader("not found")), Header: make(http.Header)}, nil } @@ -84,9 +95,231 @@ func TestRunnerRunOnceWritesOutputs(t *testing.T) { if err := json.Unmarshal(rawHealth, &health); err != nil { t.Fatal(err) } - if health.TotalSources != 4 { - t.Fatalf("expected 4 sources in health document, got %d", health.TotalSources) + if health.TotalSources != 6 { + t.Fatalf("expected 6 sources in health document, got %d", health.TotalSources) } + if len(health.ReplacementQueue) != 0 { + t.Fatalf("expected no replacement queue entries, got %d", len(health.ReplacementQueue)) + } + if _, err := os.Stat(cfg.ReplacementQueuePath); err != nil { + t.Fatalf("expected replacement queue output, got %v", err) + } +} + +func TestBuildReplacementQueueFromPermanentFailures(t *testing.T) { + sources := []model.RegistrySource{ + { + Type: "rss", + FeedURL: "https://collector.test/dead-feed", + Category: "cyber_advisory", + Source: model.SourceMetadata{ + SourceID: "dead-source", + AuthorityName: "Dead Source", + Country: "France", + CountryCode: "FR", + Region: "Europe", + AuthorityType: "cert", + BaseURL: "https://collector.test", + }, + }, + } + entries := []model.SourceHealthEntry{ + { + SourceID: "dead-source", + AuthorityName: "Dead Source", + Type: "rss", + Status: "error", + FeedURL: "https://collector.test/dead-feed", + Error: "fetch https://collector.test/dead-feed: status 404", + ErrorClass: "not_found", + NeedsReplacement: true, + DiscoveryAction: "find_replacement", + FinishedAt: "2026-03-16T12:00:00Z", + }, + } + + queue := buildReplacementQueue(entries, sources) + if len(queue) != 1 { + t.Fatalf("expected one queued replacement candidate, got %d", len(queue)) + } + if queue[0].BaseURL != "https://collector.test" { + t.Fatalf("expected base URL to be carried into replacement queue, got %q", queue[0].BaseURL) + } +} + +func TestExtractInterpolNoticeID(t *testing.T) { + if got := extractInterpolNoticeID("2026-17351", ""); got != "2026-17351" { + t.Fatalf("expected entity id to win, got %q", got) + } + if got := extractInterpolNoticeID("", "https://www.interpol.int/How-we-work/Notices/Yellow-Notices/View-Yellow-Notices#2026-17351"); got != "2026-17351" { + t.Fatalf("expected fragment id, got %q", got) + } + if got := extractInterpolNoticeID("", "https://ws-public.interpol.int/notices/v1/red/123"); got != "123" { + t.Fatalf("expected path id, got %q", got) + } +} + +func TestFilterCategoryItemsDropsUnrelatedMissingPersonHTML(t *testing.T) { + dict, err := dictionary.Load(filepath.Join("..", "..", "..", "registry", "category_dictionary.json")) + if err != nil { + t.Fatal(err) + } + items := []parse.FeedItem{ + {Title: "Calendario de actividades", Link: "https://example.test/calendario"}, + {Title: "Persona desaparecida en San Jose", Link: "https://example.test/desaparecidos/1"}, + } + filtered := filterCategoryItems(items, model.RegistrySource{ + Category: "missing_person", + Source: model.SourceMetadata{CountryCode: "CR"}, + }, dict) + if len(filtered) != 1 { + t.Fatalf("expected only one missing-person item after filtering, got %d", len(filtered)) + } + if filtered[0].Link != "https://example.test/desaparecidos/1" { + t.Fatalf("unexpected retained item: %#v", filtered[0]) + } +} + +func TestFilterCategoryItemsDropsUnrelatedWantedHTML(t *testing.T) { + dict, err := dictionary.Load(filepath.Join("..", "..", "..", "registry", "category_dictionary.json")) + if err != nil { + t.Fatal(err) + } + items := []parse.FeedItem{ + {Title: "Institutional history", Link: "https://example.test/history"}, + {Title: "Wanted suspect public appeal", Link: "https://example.test/wanted/1"}, + } + filtered := filterCategoryItems(items, model.RegistrySource{ + Category: "wanted_suspect", + Source: model.SourceMetadata{CountryCode: "US"}, + }, dict) + if len(filtered) != 1 { + t.Fatalf("expected only one wanted item after filtering, got %d", len(filtered)) + } + if filtered[0].Link != "https://example.test/wanted/1" { + t.Fatalf("unexpected retained item: %#v", filtered[0]) + } +} + +func TestFilterCategoryItemsMatchesCatalanMissingPersonPage(t *testing.T) { + dict, err := dictionary.Load(filepath.Join("..", "..", "..", "registry", "category_dictionary.json")) + if err != nil { + t.Fatal(err) + } + items := []parse.FeedItem{ + {Title: "Persona desapareguda", Link: "https://example.test/_ca/persona-desapareguda"}, + } + filtered := filterCategoryItems(items, model.RegistrySource{ + Category: "missing_person", + FeedURL: "https://www.policia.es/_ca/comunicacion_salaprensa.php?idiomaActual=ca", + Source: model.SourceMetadata{CountryCode: "ES"}, + }, dict) + if len(filtered) != 1 { + t.Fatalf("expected Catalan missing-person page to be retained, got %d", len(filtered)) + } +} + +func TestFilterFeedKeywordsAppliesToRSSContent(t *testing.T) { + items := []parse.FeedItem{ + {Title: "Budget debate", Summary: "Parliament procedure only", Link: "https://example.test/a"}, + {Title: "Parliament update", Summary: "New sanctions package announced", Link: "https://example.test/b"}, + } + filtered := filterFeedKeywords(items, []string{"sanction"}, nil) + if len(filtered) != 1 { + t.Fatalf("expected 1 retained RSS item, got %d", len(filtered)) + } + if filtered[0].Link != "https://example.test/b" { + t.Fatalf("unexpected retained RSS item: %#v", filtered[0]) + } +} + +func TestRunnerRunOnceUsesSQLiteAlertStateWithoutDuplicatingAlerts(t *testing.T) { + dir := t.TempDir() + registryPath := filepath.Join(dir, "sources.db") + seedPath := filepath.Join(dir, "registry.json") + registry := []byte(`[ + {"type":"rss","feed_url":"https://collector.test/rss","category":"cyber_advisory","region_tag":"INT","lat":48.8,"lng":2.3,"source":{"source_id":"rss-source","authority_name":"RSS Source","country":"France","country_code":"FR","region":"Europe","authority_type":"cert","base_url":"https://collector.test","language_code":"en"}} + ]`) + if err := os.WriteFile(seedPath, registry, 0o644); err != nil { + t.Fatal(err) + } + + db, err := sourcedb.Open(registryPath) + if err != nil { + t.Fatal(err) + } + if err := db.ImportRegistry(context.Background(), seedPath); err != nil { + db.Close() + t.Fatal(err) + } + db.Close() + + cfg := config.Default() + cfg.RegistryPath = registryPath + cfg.OutputPath = filepath.Join(dir, "alerts.json") + cfg.FilteredOutputPath = filepath.Join(dir, "filtered.json") + cfg.StateOutputPath = filepath.Join(dir, "state.json") + cfg.SourceHealthOutputPath = filepath.Join(dir, "health.json") + cfg.ReplacementQueuePath = filepath.Join(dir, "replacement.json") + cfg.MaxAgeDays = 10000 + + runner := New(io.Discard, io.Discard) + runner.clientFactory = func(cfg config.Config) *fetch.Client { + return fetch.NewWithHTTPClient(cfg, &http.Client{ + Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) { + if req.URL.Path != "/rss" { + return &http.Response{StatusCode: 404, Body: io.NopCloser(strings.NewReader("not found")), Header: make(http.Header)}, nil + } + body := `alert-1Critical cyber advisoryhttps://collector.test/rss-itemMon, 02 Jan 2026 15:04:05 MSTCVE-2026-1234 patch advisory` + return &http.Response{StatusCode: 200, Body: io.NopCloser(strings.NewReader(body)), Header: make(http.Header)}, nil + }), + }) + } + + if err := runner.Run(context.Background(), cfg); err != nil { + t.Fatal(err) + } + persistedAfterFirstRun, err := loadPersistedAlerts(registryPath) + if err != nil { + t.Fatal(err) + } + if len(persistedAfterFirstRun) != 2 { + t.Fatalf("expected 2 persisted alerts after first run, got %d", len(persistedAfterFirstRun)) + } + + firstSeenByID := map[string]string{} + for _, alert := range persistedAfterFirstRun { + firstSeenByID[alert.AlertID] = alert.FirstSeen + } + + time.Sleep(1100 * time.Millisecond) + + if err := runner.Run(context.Background(), cfg); err != nil { + t.Fatal(err) + } + persistedAfterSecondRun, err := loadPersistedAlerts(registryPath) + if err != nil { + t.Fatal(err) + } + if len(persistedAfterSecondRun) != 2 { + t.Fatalf("expected 2 persisted alerts after second run, got %d", len(persistedAfterSecondRun)) + } + for _, alert := range persistedAfterSecondRun { + if want := firstSeenByID[alert.AlertID]; want == "" { + t.Fatalf("unexpected alert persisted after second run: %q", alert.AlertID) + } else if alert.FirstSeen != want { + t.Fatalf("expected first_seen for %s to remain %q, got %q", alert.AlertID, want, alert.FirstSeen) + } + } +} + +func loadPersistedAlerts(dbPath string) ([]model.Alert, error) { + db, err := sourcedb.Open(dbPath) + if err != nil { + return nil, err + } + defer db.Close() + return db.LoadAlerts(context.Background()) } type roundTripFunc func(*http.Request) (*http.Response, error) diff --git a/internal/collector/state/state.go b/internal/collector/state/state.go index 80682d8..0388920 100644 --- a/internal/collector/state/state.go +++ b/internal/collector/state/state.go @@ -25,7 +25,33 @@ func Read(path string) []model.Alert { return alerts } -func Reconcile(cfg config.Config, active []model.Alert, filtered []model.Alert, previous []model.Alert, now time.Time) ([]model.Alert, []model.Alert, []model.Alert) { +// Cursors tracks the resume page for paginated sources that accumulate. +type Cursors map[string]int // sourceID → next page to fetch + +func ReadCursors(path string) Cursors { + data, err := os.ReadFile(path) + if err != nil { + return Cursors{} + } + var c Cursors + if err := json.Unmarshal(data, &c); err != nil { + return Cursors{} + } + return c +} + +func WriteCursors(path string, c Cursors) error { + data, err := json.MarshalIndent(c, "", " ") + if err != nil { + return err + } + return os.WriteFile(path, data, 0644) +} + +// Reconcile merges current fetch results with previous state. +// accumulateSources lists source IDs where alerts carry forward across runs +// (paginated APIs like Interpol where each run only fetches a window). +func Reconcile(cfg config.Config, active []model.Alert, filtered []model.Alert, previous []model.Alert, now time.Time, accumulateSources map[string]bool) ([]model.Alert, []model.Alert, []model.Alert) { nowISO := now.UTC().Format(time.RFC3339) retentionCutoff := now.Add(-time.Duration(cfg.RemovedRetentionDays) * 24 * time.Hour) previousByID := map[string]model.Alert{} @@ -62,6 +88,12 @@ func Reconcile(cfg config.Config, active []model.Alert, filtered []model.Alert, if _, ok := presentByID[prev.AlertID]; ok { continue } + // Accumulating sources: carry forward active alerts not in this batch. + if accumulateSources[prev.SourceID] && prev.Status == "active" { + currentActive = append(currentActive, prev) + presentByID[prev.AlertID] = struct{}{} + continue + } if prev.Status == "removed" { lastSeen, err := time.Parse(time.RFC3339, prev.LastSeen) if err == nil && !lastSeen.Before(retentionCutoff) { diff --git a/internal/collector/state/state_test.go b/internal/collector/state/state_test.go index ffe5119..60ddc0a 100644 --- a/internal/collector/state/state_test.go +++ b/internal/collector/state/state_test.go @@ -21,7 +21,7 @@ func TestReconcileCarriesForwardAndRemoves(t *testing.T) { {AlertID: "c", FirstSeen: now.Add(-24 * time.Hour).Format(time.RFC3339), Status: "active", LastSeen: now.Add(-time.Hour).Format(time.RFC3339)}, } - currentActive, currentFiltered, fullState := Reconcile(cfg, active, filtered, previous, now) + currentActive, currentFiltered, fullState := Reconcile(cfg, active, filtered, previous, now, nil) if currentActive[0].FirstSeen != previous[0].FirstSeen { t.Fatalf("expected first_seen to carry forward, got %q", currentActive[0].FirstSeen) } @@ -38,3 +38,38 @@ func TestReconcileCarriesForwardAndRemoves(t *testing.T) { t.Fatalf("expected removed alert in state %#v", fullState) } } + +func TestReconcileAccumulateCarriesForward(t *testing.T) { + cfg := config.Default() + now := time.Date(2026, 1, 2, 3, 4, 5, 0, time.UTC) + // This run only fetches alert "a" (new page). + active := []model.Alert{{AlertID: "a", SourceID: "interpol-red"}} + // Previous state has "b" from an earlier run — should carry forward. + previous := []model.Alert{ + {AlertID: "b", SourceID: "interpol-red", Status: "active", FirstSeen: "2026-01-01T00:00:00Z", LastSeen: "2026-01-01T12:00:00Z"}, + {AlertID: "c", SourceID: "other-source", Status: "active", FirstSeen: "2026-01-01T00:00:00Z", LastSeen: "2026-01-01T12:00:00Z"}, + } + acc := map[string]bool{"interpol-red": true} + + currentActive, _, fullState := Reconcile(cfg, active, nil, previous, now, acc) + + foundB := false + for _, a := range currentActive { + if a.AlertID == "b" { + foundB = true + } + } + if !foundB { + t.Fatalf("expected accumulated alert 'b' in currentActive, got %v", currentActive) + } + + foundCRemoved := false + for _, a := range fullState { + if a.AlertID == "c" && a.Status == "removed" { + foundCRemoved = true + } + } + if !foundCRemoved { + t.Fatalf("expected non-accumulating alert 'c' to be removed in fullState") + } +} diff --git a/internal/collector/translate/google.go b/internal/collector/translate/google.go index 6a4a6e4..9dcee3d 100644 --- a/internal/collector/translate/google.go +++ b/internal/collector/translate/google.go @@ -15,6 +15,10 @@ import ( "github.com/scalytics/euosint/internal/collector/parse" ) +// maxTranslateChars is the maximum character count sent to Google Translate +// per field. The free gtx endpoint returns 413 for payloads above ~5 KB. +const maxTranslateChars = 2000 + var nonLatinRE = regexp.MustCompile(`[\p{Han}\p{Hangul}\p{Cyrillic}\p{Arabic}\p{Thai}]`) func Batch(ctx context.Context, client *fetch.Client, items []parse.FeedItem) ([]parse.FeedItem, error) { @@ -40,10 +44,16 @@ func Batch(ctx context.Context, client *fetch.Client, items []parse.FeedItem) ([ } func toEnglish(ctx context.Context, client *fetch.Client, text string) (string, error) { + // Strip HTML (RSS descriptions often contain full page markup) and truncate + // to avoid 413 from the free Google Translate endpoint. + text = parse.StripHTML(text) text = strings.TrimSpace(text) if text == "" { return text, nil } + if len(text) > maxTranslateChars { + text = text[:maxTranslateChars] + } endpoint := "https://translate.googleapis.com/translate_a/single?client=gtx&sl=auto&tl=en&dt=t&q=" + url.QueryEscape(text) body, err := client.Text(ctx, endpoint, true, "application/json") if err != nil { diff --git a/internal/collector/translate/llm.go b/internal/collector/translate/llm.go new file mode 100644 index 0000000..42f0df6 --- /dev/null +++ b/internal/collector/translate/llm.go @@ -0,0 +1,151 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package translate + +import ( + "context" + "encoding/json" + "fmt" + "regexp" + "strings" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/parse" + "github.com/scalytics/euosint/internal/collector/vet" +) + +type ClassifiedItem struct { + Item parse.FeedItem + Category string +} + +type AlertLLMResponse struct { + Yes bool `json:"yes"` + Translation string `json:"translation"` + CategoryID string `json:"category_id"` +} + +type alertBatchResponse struct { + Items []struct { + Index int `json:"index"` + Yes bool `json:"yes"` + Translation string `json:"translation"` + CategoryID string `json:"category_id"` + } `json:"items"` +} + +type Completer interface { + Complete(ctx context.Context, messages []vet.Message) (string, error) +} + +func BatchLLM(ctx context.Context, cfg config.Config, client Completer, defaultCategory string, items []parse.FeedItem) ([]ClassifiedItem, error) { + if len(items) == 0 { + return nil, nil + } + results, err := classifyItems(ctx, client, cfg.AlertLLMModel, defaultCategory, items) + if err != nil { + return nil, err + } + out := make([]ClassifiedItem, 0, len(items)) + for index, item := range items { + result, ok := results[index] + if !ok { + continue + } + if !result.Yes { + continue + } + next := item + if strings.TrimSpace(result.Translation) != "" { + next.Title = strings.TrimSpace(result.Translation) + } + out = append(out, ClassifiedItem{ + Item: next, + Category: firstNonEmpty(result.CategoryID, defaultCategory), + }) + } + return out, nil +} + +func classifyItems(ctx context.Context, client Completer, model string, defaultCategory string, items []parse.FeedItem) (map[int]AlertLLMResponse, error) { + batch := make([]map[string]any, 0, len(items)) + for index, item := range items { + batch = append(batch, map[string]any{ + "index": index, + "default_category": defaultCategory, + "title": strings.TrimSpace(item.Title), + "summary": strings.TrimSpace(item.Summary), + "link": strings.TrimSpace(item.Link), + "tags": item.Tags, + }) + } + payload, err := json.MarshalIndent(map[string]any{ + "items": batch, + }, "", " ") + if err != nil { + return nil, fmt.Errorf("marshal alert llm input: %w", err) + } + + content, err := client.Complete(ctx, []vet.Message{ + { + Role: "system", + Content: "You classify public source alert items. Return strict JSON only in the form {\"items\":[{\"index\":0,\"yes\":true,\"translation\":\"short english title\",\"category_id\":\"known_or_default\"}]}. yes must be true only for intelligence-relevant alerts, not generic information/noise. translation must be a short English title. category_id must be one of the known category ids or the supplied default_category.", + }, + { + Role: "user", + Content: "Model: " + model + "\nEvaluate these items and return JSON only.\n\n" + string(payload), + }, + }) + if err != nil { + return nil, err + } + + return decodeAlertBatchLLMResponse(content) +} + +var alertJSONBlockRe = regexp.MustCompile(`(?s)\{.*\}`) + +func decodeAlertLLMResponse(content string) (AlertLLMResponse, error) { + content = strings.TrimSpace(content) + if match := alertJSONBlockRe.FindString(content); match != "" { + content = match + } + var out AlertLLMResponse + if err := json.Unmarshal([]byte(content), &out); err != nil { + return AlertLLMResponse{}, fmt.Errorf("decode alert llm response: %w", err) + } + out.Translation = strings.TrimSpace(out.Translation) + out.CategoryID = strings.TrimSpace(out.CategoryID) + return out, nil +} + +func decodeAlertBatchLLMResponse(content string) (map[int]AlertLLMResponse, error) { + content = strings.TrimSpace(content) + if match := alertJSONBlockRe.FindString(content); match != "" { + content = match + } + var out alertBatchResponse + if err := json.Unmarshal([]byte(content), &out); err != nil { + return nil, fmt.Errorf("decode alert llm batch response: %w", err) + } + results := make(map[int]AlertLLMResponse, len(out.Items)) + for _, item := range out.Items { + results[item.Index] = AlertLLMResponse{ + Yes: item.Yes, + Translation: strings.TrimSpace(item.Translation), + CategoryID: strings.TrimSpace(item.CategoryID), + } + } + return results, nil +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + value = strings.TrimSpace(value) + if value != "" { + return value + } + } + return "" +} diff --git a/internal/collector/translate/llm_test.go b/internal/collector/translate/llm_test.go new file mode 100644 index 0000000..99860a5 --- /dev/null +++ b/internal/collector/translate/llm_test.go @@ -0,0 +1,76 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package translate + +import ( + "context" + "testing" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/parse" + "github.com/scalytics/euosint/internal/collector/vet" +) + +type fakeCompleter struct { + content string + err error +} + +func (f fakeCompleter) Complete(ctx context.Context, messages []vet.Message) (string, error) { + return f.content, f.err +} + +func TestDecodeAlertLLMResponse(t *testing.T) { + got, err := decodeAlertLLMResponse("```json\n{\"yes\":true,\"translation\":\"Wanted suspect in Berlin\",\"category_id\":\"wanted_suspect\"}\n```") + if err != nil { + t.Fatal(err) + } + if !got.Yes || got.Translation != "Wanted suspect in Berlin" || got.CategoryID != "wanted_suspect" { + t.Fatalf("unexpected response %#v", got) + } +} + +func TestBatchLLMFiltersAndOverridesCategory(t *testing.T) { + cfg := config.Default() + cfg.AlertLLMModel = "gpt-test" + items := []parse.FeedItem{{Title: "Titulo", Link: "https://example.test/a"}} + classified, err := BatchLLM(context.Background(), cfg, fakeCompleter{content: `{"items":[{"index":0,"yes":true,"translation":"Missing child in Madrid","category_id":"missing_person"}]}`}, "public_appeal", items) + if err != nil { + t.Fatal(err) + } + if len(classified) != 1 { + t.Fatalf("expected 1 classified item, got %d", len(classified)) + } + if classified[0].Item.Title != "Missing child in Madrid" || classified[0].Category != "missing_person" { + t.Fatalf("unexpected classified item %#v", classified[0]) + } +} + +func TestBatchLLMDropsNoise(t *testing.T) { + cfg := config.Default() + items := []parse.FeedItem{{Title: "General update", Link: "https://example.test/a"}} + classified, err := BatchLLM(context.Background(), cfg, fakeCompleter{content: `{"items":[{"index":0,"yes":false,"translation":"","category_id":""}]}`}, "public_appeal", items) + if err != nil { + t.Fatal(err) + } + if len(classified) != 0 { + t.Fatalf("expected no classified items, got %d", len(classified)) + } +} + +func TestDecodeAlertBatchLLMResponse(t *testing.T) { + got, err := decodeAlertBatchLLMResponse("```json\n{\"items\":[{\"index\":0,\"yes\":true,\"translation\":\"Wanted suspect in Berlin\",\"category_id\":\"wanted_suspect\"},{\"index\":1,\"yes\":false,\"translation\":\"\",\"category_id\":\"\"}]}\n```") + if err != nil { + t.Fatal(err) + } + if len(got) != 2 { + t.Fatalf("expected 2 batch responses, got %d", len(got)) + } + if !got[0].Yes || got[0].CategoryID != "wanted_suspect" { + t.Fatalf("unexpected first batch response %#v", got[0]) + } + if got[1].Yes { + t.Fatalf("expected second batch response to be negative, got %#v", got[1]) + } +} diff --git a/internal/collector/vet/client.go b/internal/collector/vet/client.go new file mode 100644 index 0000000..87c8c27 --- /dev/null +++ b/internal/collector/vet/client.go @@ -0,0 +1,117 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package vet + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/scalytics/euosint/internal/collector/config" +) + +type Client struct { + httpClient *http.Client + baseURL string + apiKey string + model string + provider string + temperature float64 +} + +func NewClient(cfg config.Config) *Client { + timeout := time.Duration(cfg.HTTPTimeoutMS) * time.Millisecond + if timeout <= 0 { + timeout = 30 * time.Second + } + return &Client{ + httpClient: &http.Client{Timeout: timeout}, + baseURL: strings.TrimSpace(cfg.VettingBaseURL), + apiKey: strings.TrimSpace(cfg.VettingAPIKey), + model: strings.TrimSpace(cfg.VettingModel), + provider: strings.TrimSpace(cfg.VettingProvider), + temperature: cfg.VettingTemperature, + } +} + +type Message struct { + Role string `json:"role"` + Content string `json:"content"` +} + +type chatRequest struct { + Model string `json:"model"` + Messages []Message `json:"messages"` + Temperature float64 `json:"temperature,omitempty"` +} + +type chatResponse struct { + Choices []struct { + Message struct { + Content string `json:"content"` + } `json:"message"` + } `json:"choices"` +} + +func (c *Client) Complete(ctx context.Context, messages []Message) (string, error) { + reqBody, err := json.Marshal(chatRequest{ + Model: c.model, + Messages: messages, + Temperature: c.temperature, + }) + if err != nil { + return "", fmt.Errorf("marshal source vetting request: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, completionsURL(c.baseURL), bytes.NewReader(reqBody)) + if err != nil { + return "", fmt.Errorf("build source vetting request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + if c.apiKey != "" { + req.Header.Set("Authorization", "Bearer "+c.apiKey) + } + if c.provider != "" { + req.Header.Set("X-EUOSINT-Provider", c.provider) + } + + res, err := c.httpClient.Do(req) + if err != nil { + return "", fmt.Errorf("request source vetting completion: %w", err) + } + defer res.Body.Close() + body, err := io.ReadAll(res.Body) + if err != nil { + return "", fmt.Errorf("read source vetting response: %w", err) + } + if res.StatusCode < 200 || res.StatusCode >= 300 { + return "", fmt.Errorf("source vetting endpoint status %d: %s", res.StatusCode, strings.TrimSpace(string(body))) + } + + var parsed chatResponse + if err := json.Unmarshal(body, &parsed); err != nil { + return "", fmt.Errorf("decode source vetting response: %w", err) + } + if len(parsed.Choices) == 0 { + return "", fmt.Errorf("source vetting response returned no choices") + } + return strings.TrimSpace(parsed.Choices[0].Message.Content), nil +} + +func completionsURL(baseURL string) string { + baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/") + if baseURL == "" { + return "https://api.openai.com/v1/chat/completions" + } + if strings.HasSuffix(baseURL, "/chat/completions") { + return baseURL + } + return baseURL + "/chat/completions" +} diff --git a/internal/collector/vet/client_test.go b/internal/collector/vet/client_test.go new file mode 100644 index 0000000..23c8865 --- /dev/null +++ b/internal/collector/vet/client_test.go @@ -0,0 +1,64 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package vet + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/scalytics/euosint/internal/collector/config" +) + +func TestClientCompleteUsesOpenAICompatibleEndpoint(t *testing.T) { + var gotAuth string + var gotModel string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v1/chat/completions" { + t.Fatalf("unexpected path %s", r.URL.Path) + } + gotAuth = r.Header.Get("Authorization") + var payload map[string]any + if err := json.NewDecoder(r.Body).Decode(&payload); err != nil { + t.Fatal(err) + } + gotModel, _ = payload["model"].(string) + _ = json.NewEncoder(w).Encode(map[string]any{ + "choices": []map[string]any{ + {"message": map[string]any{"content": `{"approve":true}`}}, + }, + }) + })) + defer server.Close() + + cfg := config.Default() + cfg.VettingBaseURL = server.URL + "/v1" + cfg.VettingAPIKey = "secret" + cfg.VettingModel = "gpt-test" + client := NewClient(cfg) + content, err := client.Complete(context.Background(), []Message{{Role: "user", Content: "test"}}) + if err != nil { + t.Fatal(err) + } + if gotAuth != "Bearer secret" { + t.Fatalf("expected bearer auth header, got %q", gotAuth) + } + if gotModel != "gpt-test" { + t.Fatalf("expected model gpt-test, got %q", gotModel) + } + if content != `{"approve":true}` { + t.Fatalf("unexpected content %q", content) + } +} + +func TestCompletionsURLNormalizesBase(t *testing.T) { + if got := completionsURL("http://localhost:11434/v1"); got != "http://localhost:11434/v1/chat/completions" { + t.Fatalf("unexpected ollama/vllm url %q", got) + } + if got := completionsURL("https://gateway.example/openai/v1/chat/completions"); got != "https://gateway.example/openai/v1/chat/completions" { + t.Fatalf("unexpected passthrough url %q", got) + } +} diff --git a/internal/collector/vet/source.go b/internal/collector/vet/source.go new file mode 100644 index 0000000..4f33d5a --- /dev/null +++ b/internal/collector/vet/source.go @@ -0,0 +1,251 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package vet + +import ( + "context" + "encoding/json" + "fmt" + "regexp" + "strconv" + "strings" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/parse" +) + +type Sample struct { + Title string `json:"title"` + Link string `json:"link"` + Summary string `json:"summary,omitempty"` + Tags []string `json:"tags,omitempty"` +} + +type Input struct { + AuthorityName string `json:"authority_name"` + AuthorityType string `json:"authority_type"` + Category string `json:"category"` + Country string `json:"country"` + CountryCode string `json:"country_code"` + URL string `json:"url"` + BaseURL string `json:"base_url"` + FeedType string `json:"feed_type"` + Samples []Sample `json:"samples"` +} + +type Verdict struct { + Approve bool `json:"approve"` + PromotionStatus string `json:"promotion_status"` + Category string `json:"category,omitempty"` + Level string `json:"level"` + MissionTags []string `json:"mission_tags"` + SourceQuality flexFloat `json:"source_quality"` + OperationalRelevance flexFloat `json:"operational_relevance"` + Reason string `json:"reason"` +} + +type flexFloat float64 + +func (f *flexFloat) UnmarshalJSON(data []byte) error { + raw := strings.TrimSpace(string(data)) + if raw == "" || raw == "null" { + *f = 0 + return nil + } + + var num float64 + if err := json.Unmarshal(data, &num); err == nil { + *f = flexFloat(num) + return nil + } + + var asString string + if err := json.Unmarshal(data, &asString); err != nil { + return err + } + asString = strings.TrimSpace(asString) + if asString == "" { + *f = 0 + return nil + } + num, err := strconv.ParseFloat(asString, 64) + if err != nil { + return err + } + *f = flexFloat(num) + return nil +} + +type Vetter struct { + client *Client + model string +} + +func New(cfg config.Config) *Vetter { + return &Vetter{client: NewClient(cfg), model: cfg.VettingModel} +} + +func (v *Vetter) Evaluate(ctx context.Context, input Input) (Verdict, error) { + if reason, reject := deterministicReject(input); reject { + return Verdict{ + Approve: false, + PromotionStatus: "rejected", + Level: "local", + MissionTags: nil, + SourceQuality: 0.1, + OperationalRelevance: 0.1, + Reason: reason, + }, nil + } + + payload, err := json.MarshalIndent(input, "", " ") + if err != nil { + return Verdict{}, fmt.Errorf("marshal source vetting input: %w", err) + } + + content, err := v.client.Complete(ctx, []Message{ + { + Role: "system", + Content: `You vet intelligence source candidates for an OSINT dashboard. + +Approve only operationally relevant sources: supranational, federal, or national level sources that publish actionable intelligence — wanted/missing persons, public appeals, cyber advisories, vulnerability disclosures, humanitarian security, conflict monitoring, disease outbreaks, environmental disasters, fraud alerts, terrorism, travel warnings, emergency management, or public-safety intelligence. + +Reject: generic PR, speeches, institutional updates, local police, municipal news, marketing, newsletters, or low-signal content. + +Valid categories (pick the best match): +- cyber_advisory: vulnerability disclosures, patch advisories, threat intel from CERTs +- wanted_suspect: arrest warrants, wanted person notices +- missing_person: missing persons, AMBER alerts +- public_appeal: police witness calls, identification requests, crime tips +- fraud_alert: financial crime, scam warnings, sanctions, AML +- intelligence_report: strategic assessments, geopolitical analysis +- travel_warning: government travel advisories, consular warnings +- conflict_monitoring: armed conflict tracking, ceasefire violations, peace processes +- humanitarian_security: aid worker safety, access restrictions, crisis zones +- humanitarian_tasking: humanitarian missions, disaster response deployments +- health_emergency: disease outbreaks, pandemic updates, biosecurity +- disease_outbreak: epidemics, zoonotic diseases, outbreak surveillance +- environmental_disaster: earthquakes, oil spills, floods, wildfires, nuclear incidents, volcanic activity +- public_safety: civil protection, natural disaster warnings, emergency notifications +- emergency_management: disaster declarations, evacuation orders, crisis coordination +- terrorism_tip: counter-terrorism alerts, extremism threat assessments +- private_sector: corporate security, supply chain disruptions +- informational: general information, educational content + +Return strict JSON only with keys: approve, promotion_status, category, level, mission_tags, source_quality, operational_relevance, reason.`, + }, + { + Role: "user", + Content: "Evaluate this discovered source and return JSON with keys approve, promotion_status, category, level, mission_tags, source_quality, operational_relevance, reason.\n\n" + string(payload), + }, + }) + if err != nil { + return Verdict{}, err + } + + verdict, err := decodeVerdict(content) + if err != nil { + return Verdict{}, err + } + verdict.normalize() + return verdict, nil +} + +func SamplesFromFeedItems(items []parse.FeedItem, limit int) []Sample { + if limit <= 0 || limit > len(items) { + limit = len(items) + } + samples := make([]Sample, 0, limit) + for _, item := range items[:limit] { + samples = append(samples, Sample{ + Title: strings.TrimSpace(item.Title), + Link: strings.TrimSpace(item.Link), + Summary: strings.TrimSpace(item.Summary), + Tags: append([]string(nil), item.Tags...), + }) + } + return samples +} + +var jsonBlockRe = regexp.MustCompile(`(?s)\{.*\}`) + +func decodeVerdict(content string) (Verdict, error) { + content = strings.TrimSpace(content) + if match := jsonBlockRe.FindString(content); match != "" { + content = match + } + var verdict Verdict + if err := json.Unmarshal([]byte(content), &verdict); err != nil { + return Verdict{}, fmt.Errorf("decode source vetting verdict: %w", err) + } + return verdict, nil +} + +var validCategories = map[string]bool{ + "cyber_advisory": true, "wanted_suspect": true, "missing_person": true, + "public_appeal": true, "fraud_alert": true, "intelligence_report": true, + "travel_warning": true, "conflict_monitoring": true, "humanitarian_security": true, + "humanitarian_tasking": true, "health_emergency": true, "disease_outbreak": true, + "environmental_disaster": true, "public_safety": true, "emergency_management": true, + "terrorism_tip": true, "private_sector": true, "informational": true, +} + +func (v *Verdict) normalize() { + v.PromotionStatus = strings.ToLower(strings.TrimSpace(v.PromotionStatus)) + switch v.PromotionStatus { + case "active", "validated", "rejected": + default: + if v.Approve { + v.PromotionStatus = "active" + } else { + v.PromotionStatus = "rejected" + } + } + v.Category = strings.ToLower(strings.TrimSpace(v.Category)) + if !validCategories[v.Category] { + v.Category = "" + } + v.Level = strings.ToLower(strings.TrimSpace(v.Level)) + switch v.Level { + case "international", "supranational", "federal", "national", "regional", "local": + default: + v.Level = "national" + } + v.SourceQuality = flexFloat(clamp01(float64(v.SourceQuality))) + v.OperationalRelevance = flexFloat(clamp01(float64(v.OperationalRelevance))) + if !v.Approve { + v.PromotionStatus = "rejected" + } +} + +func clamp01(v float64) float64 { + if v < 0 { + return 0 + } + if v > 1 { + return 1 + } + return v +} + +func deterministicReject(input Input) (string, bool) { + hay := strings.ToLower(strings.Join([]string{ + input.AuthorityName, + input.AuthorityType, + input.Category, + input.URL, + input.BaseURL, + }, " ")) + for _, needle := range []string{ + "municipal", "municipality", "city of ", "county ", "sheriff", "police department", "local police", + } { + if strings.Contains(hay, needle) { + return "deterministic reject: local or municipal source", true + } + } + if len(input.Samples) == 0 { + return "deterministic reject: no sample items to assess", true + } + return "", false +} diff --git a/internal/collector/vet/source_test.go b/internal/collector/vet/source_test.go new file mode 100644 index 0000000..72d4b73 --- /dev/null +++ b/internal/collector/vet/source_test.go @@ -0,0 +1,55 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package vet + +import ( + "testing" + + "github.com/scalytics/euosint/internal/collector/parse" +) + +func TestDecodeVerdictExtractsJSONBlock(t *testing.T) { + verdict, err := decodeVerdict("```json\n{\"approve\":true,\"promotion_status\":\"active\",\"level\":\"federal\",\"source_quality\":0.9,\"operational_relevance\":0.8,\"mission_tags\":[\"organized_crime\"],\"reason\":\"high signal\"}\n```") + if err != nil { + t.Fatal(err) + } + verdict.normalize() + if !verdict.Approve || verdict.PromotionStatus != "active" || verdict.Level != "federal" { + t.Fatalf("unexpected verdict %#v", verdict) + } +} + +func TestDecodeVerdictAcceptsNumericStrings(t *testing.T) { + verdict, err := decodeVerdict(`{"approve":true,"promotion_status":"active","level":"national","source_quality":"0.9","operational_relevance":"0.8","mission_tags":["organized_crime"],"reason":"high signal"}`) + if err != nil { + t.Fatal(err) + } + verdict.normalize() + if float64(verdict.SourceQuality) != 0.9 { + t.Fatalf("expected source_quality 0.9, got %v", verdict.SourceQuality) + } + if float64(verdict.OperationalRelevance) != 0.8 { + t.Fatalf("expected operational_relevance 0.8, got %v", verdict.OperationalRelevance) + } +} + +func TestDeterministicRejectsLocalAndMissingSamples(t *testing.T) { + if _, reject := deterministicReject(Input{AuthorityName: "City of Valletta Police Department", Samples: []Sample{{Title: "x"}}}); !reject { + t.Fatal("expected local police deterministic reject") + } + if _, reject := deterministicReject(Input{AuthorityName: "Europol", Samples: nil}); !reject { + t.Fatal("expected no-sample deterministic reject") + } +} + +func TestSamplesFromFeedItemsHonorsLimit(t *testing.T) { + items := []parse.FeedItem{ + {Title: "One", Link: "https://one"}, + {Title: "Two", Link: "https://two"}, + } + samples := SamplesFromFeedItems(items, 1) + if len(samples) != 1 || samples[0].Title != "One" { + t.Fatalf("unexpected samples %#v", samples) + } +} diff --git a/internal/sourcedb/db.go b/internal/sourcedb/db.go new file mode 100644 index 0000000..90f6c20 --- /dev/null +++ b/internal/sourcedb/db.go @@ -0,0 +1,1233 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package sourcedb + +import ( + "context" + "database/sql" + _ "embed" + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "sync" + + _ "modernc.org/sqlite" + + "github.com/scalytics/euosint/internal/collector/model" +) + +//go:embed schema.sql +var schemaSQL string + +var initMu sync.Mutex + +type DB struct { + sql *sql.DB +} + +type CandidateInput struct { + DiscoveredURL string + DiscoveredVia string + Status string + LanguageCode string + Category string + AuthorityType string + Country string + CountryCode string + Notes string +} + +func Open(path string) (*DB, error) { + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return nil, fmt.Errorf("create source DB directory: %w", err) + } + db, err := sql.Open("sqlite", path) + if err != nil { + return nil, fmt.Errorf("open sqlite source DB: %w", err) + } + if _, err := db.Exec(`PRAGMA busy_timeout = 5000`); err != nil { + db.Close() + return nil, fmt.Errorf("set sqlite busy_timeout: %w", err) + } + return &DB{sql: db}, nil +} + +func (db *DB) Close() error { + if db == nil || db.sql == nil { + return nil + } + return db.sql.Close() +} + +func (db *DB) Init(ctx context.Context) error { + initMu.Lock() + defer initMu.Unlock() + + if _, err := db.sql.ExecContext(ctx, schemaSQL); err != nil { + return fmt.Errorf("init source DB schema: %w", err) + } + for _, stmt := range []string{ + `ALTER TABLE agencies ADD COLUMN level TEXT NOT NULL DEFAULT 'national'`, + `ALTER TABLE agencies ADD COLUMN mission_tags_json TEXT NOT NULL DEFAULT '[]'`, + `ALTER TABLE agencies ADD COLUMN operational_relevance REAL NOT NULL DEFAULT 0`, + `ALTER TABLE sources ADD COLUMN source_quality REAL NOT NULL DEFAULT 0`, + `ALTER TABLE sources ADD COLUMN promotion_status TEXT NOT NULL DEFAULT 'candidate'`, + `ALTER TABLE sources ADD COLUMN rejection_reason TEXT NOT NULL DEFAULT ''`, + `ALTER TABLE sources ADD COLUMN is_mirror INTEGER NOT NULL DEFAULT 0`, + `ALTER TABLE sources ADD COLUMN preferred_source_rank INTEGER NOT NULL DEFAULT 0`, + } { + if _, err := db.sql.ExecContext(ctx, stmt); err != nil && !isDuplicateColumnError(err) { + return fmt.Errorf("migrate source DB schema: %w", err) + } + } + // Backfill live sources from pre-promotion-status databases so existing + // vetted registry rows remain visible after schema migration. + if _, err := db.sql.ExecContext(ctx, ` +UPDATE sources +SET promotion_status = 'active' +WHERE status IN ('active', '') + AND promotion_status = 'candidate' + AND NOT EXISTS ( + SELECT 1 + FROM source_candidates c + WHERE c.discovered_url = sources.feed_url + )`); err != nil { + return fmt.Errorf("backfill source promotion status: %w", err) + } + return nil +} + +func isDuplicateColumnError(err error) bool { + if err == nil { + return false + } + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "duplicate column name") +} + +func (db *DB) ImportRegistry(ctx context.Context, registryPath string) error { + if err := db.Init(ctx); err != nil { + return err + } + sources, err := loadRegistryJSON(registryPath) + if err != nil { + return err + } + + tx, err := db.sql.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("begin import tx: %w", err) + } + defer tx.Rollback() + + if _, err := tx.ExecContext(ctx, `DELETE FROM source_categories`); err != nil { + return fmt.Errorf("clear source_categories: %w", err) + } + if _, err := tx.ExecContext(ctx, `DELETE FROM agency_category_coverage`); err != nil { + return fmt.Errorf("clear agency_category_coverage: %w", err) + } + if _, err := tx.ExecContext(ctx, `DELETE FROM sources`); err != nil { + return fmt.Errorf("clear sources: %w", err) + } + if _, err := tx.ExecContext(ctx, `DELETE FROM agency_aliases`); err != nil { + return fmt.Errorf("clear agency_aliases: %w", err) + } + if _, err := tx.ExecContext(ctx, `DELETE FROM agencies_fts`); err != nil { + return fmt.Errorf("clear agencies_fts: %w", err) + } + if _, err := tx.ExecContext(ctx, `DELETE FROM agencies`); err != nil { + return fmt.Errorf("clear agencies: %w", err) + } + + for _, src := range sources { + agencyID := agencyKey(src.Source) + if err := upsertAgency(ctx, tx, agencyID, src.Source); err != nil { + return err + } + if err := upsertSource(ctx, tx, agencyID, src); err != nil { + return err + } + if err := upsertAgencyCoverage(ctx, tx, agencyID, src.Category); err != nil { + return err + } + if err := upsertAgencyFTS(ctx, tx, agencyID, src.Source); err != nil { + return err + } + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit import tx: %w", err) + } + return nil +} + +func (db *DB) MergeRegistry(ctx context.Context, registryPath string) error { + if err := db.Init(ctx); err != nil { + return err + } + sources, err := loadRegistryJSON(registryPath) + if err != nil { + return err + } + + tx, err := db.sql.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("begin merge tx: %w", err) + } + defer tx.Rollback() + + for _, src := range sources { + agencyID := agencyKey(src.Source) + if err := upsertAgency(ctx, tx, agencyID, src.Source); err != nil { + return err + } + if err := upsertSource(ctx, tx, agencyID, src); err != nil { + return err + } + if err := upsertAgencyCoverage(ctx, tx, agencyID, src.Category); err != nil { + return err + } + if err := upsertAgencyFTS(ctx, tx, agencyID, src.Source); err != nil { + return err + } + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit merge tx: %w", err) + } + return nil +} + +func (db *DB) UpsertRegistrySources(ctx context.Context, sources []model.RegistrySource) error { + if err := db.Init(ctx); err != nil { + return err + } + if len(sources) == 0 { + return nil + } + tx, err := db.sql.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("begin upsert sources tx: %w", err) + } + defer tx.Rollback() + + for _, src := range sources { + agencyID := agencyKey(src.Source) + if err := upsertAgency(ctx, tx, agencyID, src.Source); err != nil { + return err + } + if err := upsertSource(ctx, tx, agencyID, src); err != nil { + return err + } + if err := upsertAgencyCoverage(ctx, tx, agencyID, src.Category); err != nil { + return err + } + if err := upsertAgencyFTS(ctx, tx, agencyID, src.Source); err != nil { + return err + } + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit upsert sources tx: %w", err) + } + return nil +} + +func (db *DB) LoadActiveSources(ctx context.Context) ([]model.RegistrySource, error) { + if err := db.Init(ctx); err != nil { + return nil, err + } + rows, err := db.sql.QueryContext(ctx, ` +SELECT + s.id, + s.type, + s.fetch_mode, + s.follow_redirects, + s.feed_url, + s.feed_urls_json, + s.category, + s.region_tag, + s.lat, + s.lng, + s.max_items, + s.include_keywords_json, + s.exclude_keywords_json, + s.source_quality, + s.promotion_status, + s.rejection_reason, + s.is_mirror, + s.preferred_source_rank, + s.reporting_label, + s.reporting_url, + s.reporting_phone, + s.reporting_notes, + a.authority_name, + a.language_code, + a.country, + a.country_code, + a.region, + a.authority_type, + a.base_url, + a.scope, + a.level, + a.parent_agency_id, + a.jurisdiction_name, + a.mission_tags_json, + a.operational_relevance, + a.is_curated, + a.is_high_value +FROM sources s +JOIN agencies a ON a.id = s.agency_id +WHERE s.status IN ('active', '') AND s.promotion_status = 'active' +ORDER BY s.id`) + if err != nil { + return nil, fmt.Errorf("query active sources: %w", err) + } + defer rows.Close() + + out := make([]model.RegistrySource, 0) + for rows.Next() { + var ( + sourceID, sourceType, fetchMode, feedURL, feedURLsJSON, category, regionTag string + includeJSON, excludeJSON, promotionStatus, rejectionReason string + reportingLabel, reportingURL, reportingPhone, reportingNotes string + authorityName, languageCode, country, countryCode, region, authorityType, baseURL string + scope, level, parentAgencyID, jurisdictionName, missionTagsJSON string + followRedirects, isMirror int + isCurated, isHighValue int + lat, lng, sourceQuality, operationalRelevance float64 + maxItems, preferredSourceRank int + ) + if err := rows.Scan( + &sourceID, + &sourceType, + &fetchMode, + &followRedirects, + &feedURL, + &feedURLsJSON, + &category, + ®ionTag, + &lat, + &lng, + &maxItems, + &includeJSON, + &excludeJSON, + &sourceQuality, + &promotionStatus, + &rejectionReason, + &isMirror, + &preferredSourceRank, + &reportingLabel, + &reportingURL, + &reportingPhone, + &reportingNotes, + &authorityName, + &languageCode, + &country, + &countryCode, + ®ion, + &authorityType, + &baseURL, + &scope, + &level, + &parentAgencyID, + &jurisdictionName, + &missionTagsJSON, + &operationalRelevance, + &isCurated, + &isHighValue, + ); err != nil { + return nil, fmt.Errorf("scan active source: %w", err) + } + var feedURLs, includeKeywords, excludeKeywords, missionTags []string + if err := decodeJSONStrings(feedURLsJSON, &feedURLs); err != nil { + return nil, err + } + if err := decodeJSONStrings(includeJSON, &includeKeywords); err != nil { + return nil, err + } + if err := decodeJSONStrings(excludeJSON, &excludeKeywords); err != nil { + return nil, err + } + if err := decodeJSONStrings(missionTagsJSON, &missionTags); err != nil { + return nil, err + } + out = append(out, model.RegistrySource{ + Type: sourceType, + FetchMode: emptyToZero(fetchMode), + FollowRedirects: followRedirects == 1, + FeedURL: feedURL, + FeedURLs: feedURLs, + Category: category, + RegionTag: regionTag, + Lat: lat, + Lng: lng, + MaxItems: maxItems, + IncludeKeywords: includeKeywords, + ExcludeKeywords: excludeKeywords, + SourceQuality: sourceQuality, + PromotionStatus: promotionStatus, + RejectionReason: rejectionReason, + IsMirror: isMirror == 1, + PreferredRank: preferredSourceRank, + Reporting: model.ReportingMetadata{ + Label: reportingLabel, + URL: reportingURL, + Phone: reportingPhone, + Notes: reportingNotes, + }, + Source: model.SourceMetadata{ + SourceID: sourceID, + AuthorityName: authorityName, + Country: country, + CountryCode: countryCode, + Region: region, + AuthorityType: authorityType, + BaseURL: baseURL, + Scope: scope, + Level: level, + ParentAgencyID: parentAgencyID, + JurisdictionName: jurisdictionName, + MissionTags: missionTags, + OperationalRelevance: operationalRelevance, + IsCurated: isCurated == 1, + IsHighValue: isHighValue == 1, + LanguageCode: languageCode, + }, + }) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate active sources: %w", err) + } + return out, nil +} + +func (db *DB) LoadAlerts(ctx context.Context) ([]model.Alert, error) { + if err := db.Init(ctx); err != nil { + return nil, err + } + rows, err := db.sql.QueryContext(ctx, ` +SELECT + alert_id, + source_id, + status, + first_seen, + last_seen, + title, + canonical_url, + category, + severity, + region_tag, + lat, + lng, + freshness_hours, + source_json, + reporting_json, + triage_json +FROM alerts +ORDER BY last_seen DESC`) + if err != nil { + return nil, fmt.Errorf("query alerts: %w", err) + } + defer rows.Close() + + out := make([]model.Alert, 0) + for rows.Next() { + var ( + alert model.Alert + sourceJSON, reportingJSON, triageJSON string + ) + if err := rows.Scan( + &alert.AlertID, + &alert.SourceID, + &alert.Status, + &alert.FirstSeen, + &alert.LastSeen, + &alert.Title, + &alert.CanonicalURL, + &alert.Category, + &alert.Severity, + &alert.RegionTag, + &alert.Lat, + &alert.Lng, + &alert.FreshnessHours, + &sourceJSON, + &reportingJSON, + &triageJSON, + ); err != nil { + return nil, fmt.Errorf("scan alert: %w", err) + } + if err := json.Unmarshal([]byte(sourceJSON), &alert.Source); err != nil { + return nil, fmt.Errorf("decode alert source %s: %w", alert.AlertID, err) + } + if strings.TrimSpace(reportingJSON) != "" && reportingJSON != "{}" { + if err := json.Unmarshal([]byte(reportingJSON), &alert.Reporting); err != nil { + return nil, fmt.Errorf("decode alert reporting %s: %w", alert.AlertID, err) + } + } + if strings.TrimSpace(triageJSON) != "" && triageJSON != "null" { + var triage model.Triage + if err := json.Unmarshal([]byte(triageJSON), &triage); err != nil { + return nil, fmt.Errorf("decode alert triage %s: %w", alert.AlertID, err) + } + alert.Triage = &triage + } + out = append(out, alert) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate alerts: %w", err) + } + return out, nil +} + +func (db *DB) UpsertSourceCandidates(ctx context.Context, candidates []CandidateInput) error { + if err := db.Init(ctx); err != nil { + return err + } + if len(candidates) == 0 { + return nil + } + tx, err := db.sql.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("begin candidate upsert tx: %w", err) + } + defer tx.Rollback() + + for _, candidate := range candidates { + discoveredURL := strings.TrimSpace(candidate.DiscoveredURL) + if discoveredURL == "" { + continue + } + status := strings.TrimSpace(candidate.Status) + if status == "" { + status = "candidate" + } + if _, err := tx.ExecContext(ctx, ` +INSERT INTO source_candidates ( + discovered_url, discovered_via, status, language_code, category, authority_type, + country, country_code, checked_at, notes +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?) +ON CONFLICT(discovered_url) DO UPDATE SET + discovered_via = excluded.discovered_via, + status = excluded.status, + language_code = excluded.language_code, + category = excluded.category, + authority_type = excluded.authority_type, + country = excluded.country, + country_code = excluded.country_code, + checked_at = CURRENT_TIMESTAMP, + notes = excluded.notes +`, discoveredURL, strings.TrimSpace(candidate.DiscoveredVia), status, strings.TrimSpace(candidate.LanguageCode), strings.TrimSpace(candidate.Category), strings.TrimSpace(candidate.AuthorityType), strings.TrimSpace(candidate.Country), strings.ToUpper(strings.TrimSpace(candidate.CountryCode)), strings.TrimSpace(candidate.Notes)); err != nil { + return fmt.Errorf("upsert source candidate %s: %w", discoveredURL, err) + } + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit candidate upsert tx: %w", err) + } + return nil +} + +func (db *DB) SaveAlerts(ctx context.Context, alerts []model.Alert) error { + if err := db.Init(ctx); err != nil { + return err + } + tx, err := db.sql.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("begin alert save tx: %w", err) + } + defer tx.Rollback() + + if _, err := tx.ExecContext(ctx, `DELETE FROM alerts`); err != nil { + return fmt.Errorf("clear alerts: %w", err) + } + for _, alert := range alerts { + sourceJSON, _ := json.Marshal(alert.Source) + reportingJSON, _ := json.Marshal(alert.Reporting) + triageJSON, _ := json.Marshal(alert.Triage) + if _, err := tx.ExecContext(ctx, ` +INSERT INTO alerts ( + alert_id, source_id, status, first_seen, last_seen, title, canonical_url, + category, severity, region_tag, lat, lng, freshness_hours, source_json, + reporting_json, triage_json +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +ON CONFLICT(alert_id) DO UPDATE SET + source_id = excluded.source_id, + status = excluded.status, + first_seen = excluded.first_seen, + last_seen = excluded.last_seen, + title = excluded.title, + canonical_url = excluded.canonical_url, + category = excluded.category, + severity = excluded.severity, + region_tag = excluded.region_tag, + lat = excluded.lat, + lng = excluded.lng, + freshness_hours = excluded.freshness_hours, + source_json = excluded.source_json, + reporting_json = excluded.reporting_json, + triage_json = excluded.triage_json +`, alert.AlertID, alert.SourceID, alert.Status, alert.FirstSeen, alert.LastSeen, alert.Title, alert.CanonicalURL, alert.Category, alert.Severity, alert.RegionTag, alert.Lat, alert.Lng, alert.FreshnessHours, string(sourceJSON), string(reportingJSON), string(triageJSON)); err != nil { + return fmt.Errorf("upsert alert %s: %w", alert.AlertID, err) + } + } + // Rebuild FTS index. + if _, err := tx.ExecContext(ctx, `DELETE FROM alerts_fts`); err != nil { + return fmt.Errorf("clear alerts_fts: %w", err) + } + if _, err := tx.ExecContext(ctx, ` +INSERT INTO alerts_fts (alert_id, title, canonical_url, category, severity, region_tag, + source_authority, source_country, source_country_code) +SELECT a.alert_id, a.title, a.canonical_url, a.category, a.severity, a.region_tag, + json_extract(a.source_json, '$.authority_name'), + json_extract(a.source_json, '$.country'), + json_extract(a.source_json, '$.country_code') +FROM alerts a +`); err != nil { + return fmt.Errorf("rebuild alerts_fts: %w", err) + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit alert save tx: %w", err) + } + return nil +} + +// SearchAlerts performs full-text search against the alerts FTS index. +// The query supports FTS5 syntax: bare words, "quoted phrases", prefix*, AND/OR/NOT. +// Results are ordered by BM25 relevance. Limit 0 means default (100). +func (db *DB) SearchAlerts(ctx context.Context, query string, category string, region string, status string, limit int) ([]model.Alert, error) { + if limit <= 0 { + limit = 100 + } + + query = strings.TrimSpace(query) + if query == "" && category == "" && region == "" { + return nil, nil + } + + var ( + rows *sql.Rows + err error + ) + if query != "" { + // FTS5 match with optional filters on the joined alerts table. + where := `WHERE alerts_fts MATCH ?` + args := []any{query} + if category != "" { + where += ` AND a.category = ?` + args = append(args, category) + } + if region != "" { + where += ` AND a.region_tag = ?` + args = append(args, region) + } + if status != "" { + where += ` AND a.status = ?` + args = append(args, status) + } + args = append(args, limit) + rows, err = db.sql.QueryContext(ctx, ` +SELECT a.alert_id, a.source_id, a.status, a.first_seen, a.last_seen, a.title, + a.canonical_url, a.category, a.severity, a.region_tag, a.lat, a.lng, + a.freshness_hours, a.source_json, a.reporting_json, a.triage_json, + bm25(alerts_fts, 0, 10.0, 0, 3.0, 0, 2.0, 2.0, 2.0, 1.0) AS rank +FROM alerts_fts +JOIN alerts a ON a.alert_id = alerts_fts.alert_id +`+where+` +ORDER BY rank +LIMIT ?`, args...) + } else { + // No text query, just filter. + where := `WHERE 1=1` + args := []any{} + if category != "" { + where += ` AND a.category = ?` + args = append(args, category) + } + if region != "" { + where += ` AND a.region_tag = ?` + args = append(args, region) + } + if status != "" { + where += ` AND a.status = ?` + args = append(args, status) + } + args = append(args, limit) + rows, err = db.sql.QueryContext(ctx, ` +SELECT a.alert_id, a.source_id, a.status, a.first_seen, a.last_seen, a.title, + a.canonical_url, a.category, a.severity, a.region_tag, a.lat, a.lng, + a.freshness_hours, a.source_json, a.reporting_json, a.triage_json, + 0 AS rank +FROM alerts a +`+where+` +ORDER BY a.last_seen DESC +LIMIT ?`, args...) + } + if err != nil { + return nil, fmt.Errorf("search alerts: %w", err) + } + defer rows.Close() + + var results []model.Alert + for rows.Next() { + var ( + a model.Alert + sourceJSON string + reportJSON string + triageJSON string + rank float64 + ) + if err := rows.Scan(&a.AlertID, &a.SourceID, &a.Status, &a.FirstSeen, &a.LastSeen, + &a.Title, &a.CanonicalURL, &a.Category, &a.Severity, &a.RegionTag, + &a.Lat, &a.Lng, &a.FreshnessHours, &sourceJSON, &reportJSON, &triageJSON, &rank); err != nil { + return nil, fmt.Errorf("scan search result: %w", err) + } + _ = json.Unmarshal([]byte(sourceJSON), &a.Source) + _ = json.Unmarshal([]byte(reportJSON), &a.Reporting) + if triageJSON != "null" && triageJSON != "" { + var t model.Triage + if json.Unmarshal([]byte(triageJSON), &t) == nil { + a.Triage = &t + } + } + results = append(results, a) + } + return results, rows.Err() +} + +func (db *DB) DeactivateSources(ctx context.Context, reasons map[string]string) error { + if len(reasons) == 0 { + return nil + } + tx, err := db.sql.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("begin deactivate tx: %w", err) + } + defer tx.Rollback() + + for sourceID, reason := range reasons { + sourceID = strings.TrimSpace(sourceID) + if sourceID == "" { + continue + } + if _, err := tx.ExecContext(ctx, ` +UPDATE sources +SET status = 'needs_replacement', + promotion_status = 'rejected', + rejection_reason = ?, + last_error = ?, + updated_at = CURRENT_TIMESTAMP +WHERE id = ?`, "Dead source: "+strings.TrimSpace(reason), strings.TrimSpace(reason), sourceID); err != nil { + return fmt.Errorf("deactivate source %s: %w", sourceID, err) + } + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit deactivate tx: %w", err) + } + return nil +} + +func (db *DB) ExportRegistry(ctx context.Context, registryPath string) error { + rows, err := db.sql.QueryContext(ctx, ` +SELECT + s.id, + s.type, + s.fetch_mode, + s.follow_redirects, + s.feed_url, + s.feed_urls_json, + s.category, + s.region_tag, + s.lat, + s.lng, + s.max_items, + s.include_keywords_json, + s.exclude_keywords_json, + s.source_quality, + s.promotion_status, + s.rejection_reason, + s.is_mirror, + s.preferred_source_rank, + s.reporting_label, + s.reporting_url, + s.reporting_phone, + s.reporting_notes, + a.id, + a.authority_name, + a.language_code, + a.country, + a.country_code, + a.region, + a.authority_type, + a.base_url, + a.scope, + a.level, + a.parent_agency_id, + a.jurisdiction_name, + a.mission_tags_json, + a.operational_relevance, + a.is_curated, + a.is_high_value +FROM sources s +JOIN agencies a ON a.id = s.agency_id +WHERE s.status IN ('active', 'candidate', '') AND s.promotion_status != 'rejected' +ORDER BY a.region, a.country, a.authority_name, s.id`) + if err != nil { + return fmt.Errorf("query registry export: %w", err) + } + defer rows.Close() + + exported := make([]model.RegistrySource, 0) + for rows.Next() { + var ( + sourceID, sourceType, fetchMode, feedURL, feedURLsJSON, category, regionTag string + includeJSON, excludeJSON, promotionStatus, rejectionReason string + reportingLabel, reportingURL, reportingPhone, reportingNotes string + agencyID, authorityName, languageCode, country, countryCode, region, authorityType, baseURL string + scope, level, parentAgencyID, jurisdictionName, missionTagsJSON string + followRedirects, isMirror int + isCurated, isHighValue int + lat, lng, sourceQuality, operationalRelevance float64 + maxItems, preferredSourceRank int + ) + if err := rows.Scan( + &sourceID, + &sourceType, + &fetchMode, + &followRedirects, + &feedURL, + &feedURLsJSON, + &category, + ®ionTag, + &lat, + &lng, + &maxItems, + &includeJSON, + &excludeJSON, + &sourceQuality, + &promotionStatus, + &rejectionReason, + &isMirror, + &preferredSourceRank, + &reportingLabel, + &reportingURL, + &reportingPhone, + &reportingNotes, + &agencyID, + &authorityName, + &languageCode, + &country, + &countryCode, + ®ion, + &authorityType, + &baseURL, + &scope, + &level, + &parentAgencyID, + &jurisdictionName, + &missionTagsJSON, + &operationalRelevance, + &isCurated, + &isHighValue, + ); err != nil { + return fmt.Errorf("scan registry export: %w", err) + } + + var feedURLs, includeKeywords, excludeKeywords, missionTags []string + if err := decodeJSONStrings(feedURLsJSON, &feedURLs); err != nil { + return err + } + if err := decodeJSONStrings(includeJSON, &includeKeywords); err != nil { + return err + } + if err := decodeJSONStrings(excludeJSON, &excludeKeywords); err != nil { + return err + } + if err := decodeJSONStrings(missionTagsJSON, &missionTags); err != nil { + return err + } + + exported = append(exported, model.RegistrySource{ + Type: sourceType, + FetchMode: emptyToZero(fetchMode), + FollowRedirects: followRedirects == 1, + FeedURL: feedURL, + FeedURLs: feedURLs, + Category: category, + RegionTag: regionTag, + Lat: lat, + Lng: lng, + MaxItems: maxItems, + IncludeKeywords: includeKeywords, + ExcludeKeywords: excludeKeywords, + SourceQuality: sourceQuality, + PromotionStatus: promotionStatus, + RejectionReason: rejectionReason, + IsMirror: isMirror == 1, + PreferredRank: preferredSourceRank, + Reporting: model.ReportingMetadata{ + Label: reportingLabel, + URL: reportingURL, + Phone: reportingPhone, + Notes: reportingNotes, + }, + Source: model.SourceMetadata{ + SourceID: agencyIDOrSourceID(agencyID, sourceID), + AuthorityName: authorityName, + Country: country, + CountryCode: countryCode, + Region: region, + AuthorityType: authorityType, + BaseURL: baseURL, + Scope: scope, + Level: level, + ParentAgencyID: parentAgencyID, + JurisdictionName: jurisdictionName, + MissionTags: missionTags, + OperationalRelevance: operationalRelevance, + IsCurated: isCurated == 1, + IsHighValue: isHighValue == 1, + LanguageCode: languageCode, + }, + }) + } + if err := rows.Err(); err != nil { + return fmt.Errorf("iterate registry export: %w", err) + } + + sort.Slice(exported, func(i, j int) bool { + if exported[i].Source.Region != exported[j].Source.Region { + return exported[i].Source.Region < exported[j].Source.Region + } + if exported[i].Source.Country != exported[j].Source.Country { + return exported[i].Source.Country < exported[j].Source.Country + } + return exported[i].Source.SourceID < exported[j].Source.SourceID + }) + + data, err := json.MarshalIndent(exported, "", " ") + if err != nil { + return fmt.Errorf("marshal exported registry: %w", err) + } + data = append(data, '\n') + if err := os.WriteFile(registryPath, data, 0o644); err != nil { + return fmt.Errorf("write exported registry: %w", err) + } + return nil +} + +func upsertAgency(ctx context.Context, tx *sql.Tx, agencyID string, meta model.SourceMetadata) error { + missionTagsJSON, _ := json.Marshal(compactStrings(meta.MissionTags...)) + _, err := tx.ExecContext(ctx, ` +INSERT INTO agencies (id, authority_name, language_code, country, country_code, region, authority_type, base_url, scope, level, parent_agency_id, jurisdiction_name, mission_tags_json, operational_relevance, is_curated, is_high_value) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +ON CONFLICT(id) DO UPDATE SET + authority_name = excluded.authority_name, + language_code = excluded.language_code, + country = excluded.country, + country_code = excluded.country_code, + region = excluded.region, + authority_type = excluded.authority_type, + base_url = excluded.base_url, + scope = excluded.scope, + level = excluded.level, + parent_agency_id = excluded.parent_agency_id, + jurisdiction_name = excluded.jurisdiction_name, + mission_tags_json = excluded.mission_tags_json, + operational_relevance = excluded.operational_relevance, + is_curated = excluded.is_curated, + is_high_value = excluded.is_high_value, + updated_at = CURRENT_TIMESTAMP +`, agencyID, meta.AuthorityName, meta.LanguageCode, meta.Country, meta.CountryCode, meta.Region, meta.AuthorityType, meta.BaseURL, fallbackScope(meta.Scope), fallbackLevel(meta.Level, meta.Scope), meta.ParentAgencyID, meta.JurisdictionName, string(missionTagsJSON), defaultOperationalRelevance(meta), boolToInt(meta.IsCurated), boolToInt(meta.IsHighValue)) + if err != nil { + return fmt.Errorf("upsert agency %s: %w", agencyID, err) + } + return nil +} + +func upsertSource(ctx context.Context, tx *sql.Tx, agencyID string, src model.RegistrySource) error { + feedURLsJSON, _ := json.Marshal(src.FeedURLs) + includeJSON, _ := json.Marshal(src.IncludeKeywords) + excludeJSON, _ := json.Marshal(src.ExcludeKeywords) + _, err := tx.ExecContext(ctx, ` +INSERT INTO sources ( + id, agency_id, language_code, type, fetch_mode, follow_redirects, feed_url, feed_urls_json, + category, region_tag, lat, lng, max_items, include_keywords_json, exclude_keywords_json, + source_quality, promotion_status, rejection_reason, is_mirror, preferred_source_rank, + reporting_label, reporting_url, reporting_phone, reporting_notes, status +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'active') +ON CONFLICT(id) DO UPDATE SET + agency_id = excluded.agency_id, + language_code = excluded.language_code, + type = excluded.type, + fetch_mode = excluded.fetch_mode, + follow_redirects = excluded.follow_redirects, + feed_url = excluded.feed_url, + feed_urls_json = excluded.feed_urls_json, + category = excluded.category, + region_tag = excluded.region_tag, + lat = excluded.lat, + lng = excluded.lng, + max_items = excluded.max_items, + include_keywords_json = excluded.include_keywords_json, + exclude_keywords_json = excluded.exclude_keywords_json, + source_quality = excluded.source_quality, + promotion_status = CASE + WHEN excluded.promotion_status = 'rejected' THEN 'rejected' + WHEN sources.promotion_status = 'rejected' AND excluded.promotion_status IN ('active', '') THEN 'rejected' + ELSE excluded.promotion_status + END, + rejection_reason = CASE + WHEN excluded.promotion_status = 'rejected' THEN excluded.rejection_reason + WHEN sources.promotion_status = 'rejected' AND excluded.promotion_status IN ('active', '') THEN sources.rejection_reason + ELSE excluded.rejection_reason + END, + is_mirror = excluded.is_mirror, + preferred_source_rank = excluded.preferred_source_rank, + reporting_label = excluded.reporting_label, + reporting_url = excluded.reporting_url, + reporting_phone = excluded.reporting_phone, + reporting_notes = excluded.reporting_notes, + status = CASE + WHEN sources.promotion_status = 'rejected' AND excluded.promotion_status IN ('active', '') THEN sources.status + ELSE 'active' + END, + updated_at = CURRENT_TIMESTAMP +`, + src.Source.SourceID, + agencyID, + src.Source.LanguageCode, + src.Type, + src.FetchMode, + boolToInt(src.FollowRedirects), + src.FeedURL, + string(feedURLsJSON), + src.Category, + src.RegionTag, + src.Lat, + src.Lng, + src.MaxItems, + string(includeJSON), + string(excludeJSON), + defaultSourceQuality(src), + defaultPromotionStatus(src), + strings.TrimSpace(src.RejectionReason), + boolToInt(src.IsMirror), + src.PreferredRank, + src.Reporting.Label, + src.Reporting.URL, + src.Reporting.Phone, + src.Reporting.Notes, + ) + if err != nil { + return fmt.Errorf("upsert source %s: %w", src.Source.SourceID, err) + } + if _, err := tx.ExecContext(ctx, `INSERT OR IGNORE INTO source_categories (source_id, category) VALUES (?, ?)`, src.Source.SourceID, src.Category); err != nil { + return fmt.Errorf("upsert source category %s: %w", src.Source.SourceID, err) + } + return nil +} + +func upsertAgencyCoverage(ctx context.Context, tx *sql.Tx, agencyID string, category string) error { + category = strings.TrimSpace(category) + if agencyID == "" || category == "" { + return nil + } + if _, err := tx.ExecContext(ctx, `INSERT OR IGNORE INTO agency_category_coverage (agency_id, category) VALUES (?, ?)`, agencyID, category); err != nil { + return fmt.Errorf("upsert agency coverage %s/%s: %w", agencyID, category, err) + } + return nil +} + +func upsertAgencyFTS(ctx context.Context, tx *sql.Tx, agencyID string, meta model.SourceMetadata) error { + if _, err := tx.ExecContext(ctx, `DELETE FROM agencies_fts WHERE agency_id = ?`, agencyID); err != nil { + return fmt.Errorf("clear agency fts %s: %w", agencyID, err) + } + aliases := strings.Join(compactStrings(meta.SourceID, meta.AuthorityName), " ") + _, err := tx.ExecContext(ctx, ` +INSERT INTO agencies_fts (agency_id, authority_name, aliases, country, country_code, region, authority_type, base_url) +VALUES (?, ?, ?, ?, ?, ?, ?, ?) +`, agencyID, meta.AuthorityName, aliases, meta.Country, meta.CountryCode, meta.Region, meta.AuthorityType, meta.BaseURL) + if err != nil { + return fmt.Errorf("upsert agency fts %s: %w", agencyID, err) + } + return nil +} + +func fallbackScope(scope string) string { + scope = strings.TrimSpace(scope) + if scope == "" { + return "national" + } + return scope +} + +func fallbackLevel(level string, scope string) string { + level = strings.ToLower(strings.TrimSpace(level)) + if level != "" { + return level + } + scope = strings.ToLower(strings.TrimSpace(scope)) + switch scope { + case "international", "supranational", "federal", "national", "regional", "local": + return scope + default: + return "national" + } +} + +func defaultOperationalRelevance(meta model.SourceMetadata) float64 { + if meta.OperationalRelevance > 0 { + return meta.OperationalRelevance + } + score := 0.65 + switch fallbackLevel(meta.Level, meta.Scope) { + case "international": + score = 0.95 + case "supranational": + score = 0.92 + case "federal": + score = 0.9 + case "national": + score = 0.82 + case "regional": + score = 0.45 + case "local": + score = 0.2 + } + if meta.IsHighValue { + score += 0.05 + } + if meta.IsCurated { + score += 0.03 + } + if score > 1 { + score = 1 + } + return score +} + +func defaultSourceQuality(src model.RegistrySource) float64 { + if src.SourceQuality > 0 { + return src.SourceQuality + } + score := 0.72 + switch strings.TrimSpace(src.Type) { + case "rss", "travelwarning-atom": + score = 0.9 + case "kev-json", "interpol-red-json", "interpol-yellow-json", "travelwarning-json": + score = 0.95 + case "html-list": + score = 0.62 + } + if src.Source.IsCurated { + score += 0.03 + } + if src.Source.IsHighValue { + score += 0.03 + } + if src.IsMirror { + score -= 0.1 + } + if score > 1 { + score = 1 + } + return score +} + +func defaultPromotionStatus(src model.RegistrySource) string { + status := strings.ToLower(strings.TrimSpace(src.PromotionStatus)) + if status != "" { + return status + } + level := fallbackLevel(src.Source.Level, src.Source.Scope) + if level == "local" { + return "rejected" + } + if level == "regional" && !src.Source.IsCurated && !src.Source.IsHighValue { + return "validated" + } + return "active" +} + +func decodeJSONStrings(raw string, target *[]string) error { + if strings.TrimSpace(raw) == "" { + *target = nil + return nil + } + if err := json.Unmarshal([]byte(raw), target); err != nil { + return fmt.Errorf("decode string array %q: %w", raw, err) + } + return nil +} + +func compactStrings(values ...string) []string { + out := make([]string, 0, len(values)) + for _, value := range values { + value = strings.TrimSpace(value) + if value != "" { + out = append(out, value) + } + } + return out +} + +func boolToInt(v bool) int { + if v { + return 1 + } + return 0 +} + +func emptyToZero(v string) string { + return strings.TrimSpace(v) +} + +func agencyIDOrSourceID(agencyID, sourceID string) string { + if strings.TrimSpace(agencyID) != "" { + return agencyID + } + return sourceID +} + +func agencyKey(meta model.SourceMetadata) string { + base := strings.ToLower(strings.TrimSpace(meta.AuthorityName)) + base = strings.ReplaceAll(base, "&", " and ") + base = strings.Map(func(r rune) rune { + switch { + case r >= 'a' && r <= 'z': + return r + case r >= '0' && r <= '9': + return r + default: + return '-' + } + }, base) + base = strings.Trim(base, "-") + base = strings.Join(strings.FieldsFunc(base, func(r rune) bool { return r == '-' }), "-") + if base == "" { + base = strings.ToLower(strings.TrimSpace(meta.SourceID)) + } + if code := strings.ToLower(strings.TrimSpace(meta.CountryCode)); code != "" { + return base + "-" + code + } + return base +} + +func loadRegistryJSON(path string) ([]model.RegistrySource, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read registry %s: %w", path, err) + } + var raw []model.RegistrySource + if err := json.Unmarshal(data, &raw); err != nil { + return nil, fmt.Errorf("decode registry %s: %w", path, err) + } + return raw, nil +} diff --git a/internal/sourcedb/db_test.go b/internal/sourcedb/db_test.go new file mode 100644 index 0000000..cc8a1da --- /dev/null +++ b/internal/sourcedb/db_test.go @@ -0,0 +1,445 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package sourcedb + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/scalytics/euosint/internal/collector/model" +) + +func TestImportAndExportRegistry(t *testing.T) { + dir := t.TempDir() + registryPath := filepath.Join(dir, "registry.json") + dbPath := filepath.Join(dir, "sources.db") + content := `[ + {"type":"rss","feed_url":"https://one.example/feed","category":"cyber_advisory","region_tag":"EU","source":{"source_id":"agency-one-feed","authority_name":"Agency One","country":"France","country_code":"FR","region":"Europe","authority_type":"cert","base_url":"https://one.example"}}, + {"type":"rss","feed_url":"https://one.example/alerts","category":"public_appeal","region_tag":"EU","source":{"source_id":"agency-one-alerts","authority_name":"Agency One","country":"France","country_code":"FR","region":"Europe","authority_type":"cert","base_url":"https://one.example"}} + ]` + if err := os.WriteFile(registryPath, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + + db, err := Open(dbPath) + if err != nil { + t.Fatal(err) + } + defer db.Close() + + if err := db.ImportRegistry(context.Background(), registryPath); err != nil { + t.Fatal(err) + } + sources, err := db.LoadActiveSources(context.Background()) + if err != nil { + t.Fatal(err) + } + if len(sources) != 2 { + t.Fatalf("expected 2 active sources, got %d", len(sources)) + } + + exportPath := filepath.Join(dir, "exported.json") + if err := db.ExportRegistry(context.Background(), exportPath); err != nil { + t.Fatal(err) + } + if _, err := os.Stat(exportPath); err != nil { + t.Fatalf("expected exported registry file: %v", err) + } +} + +func TestLoadActiveSourcesAutoMigratesOlderDatabase(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "sources.db") + + db, err := Open(dbPath) + if err != nil { + t.Fatal(err) + } + defer db.Close() + + // Simulate an older DB that predates the hygiene columns added later. + for _, stmt := range []string{ + `CREATE TABLE agencies ( + id TEXT PRIMARY KEY, + authority_name TEXT NOT NULL, + country TEXT NOT NULL DEFAULT '', + country_code TEXT NOT NULL DEFAULT '', + region TEXT NOT NULL DEFAULT '', + authority_type TEXT NOT NULL DEFAULT '', + base_url TEXT NOT NULL DEFAULT '', + scope TEXT NOT NULL DEFAULT 'national', + jurisdiction_name TEXT NOT NULL DEFAULT '', + parent_agency_id TEXT NOT NULL DEFAULT '', + is_curated INTEGER NOT NULL DEFAULT 0, + is_high_value INTEGER NOT NULL DEFAULT 0, + language_code TEXT NOT NULL DEFAULT '' + )`, + `CREATE TABLE sources ( + id TEXT PRIMARY KEY, + agency_id TEXT NOT NULL, + type TEXT NOT NULL, + fetch_mode TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'active', + follow_redirects INTEGER NOT NULL DEFAULT 1, + feed_url TEXT NOT NULL, + feed_urls_json TEXT NOT NULL DEFAULT '[]', + category TEXT NOT NULL DEFAULT '', + region_tag TEXT NOT NULL DEFAULT '', + lat REAL NOT NULL DEFAULT 0, + lng REAL NOT NULL DEFAULT 0, + max_items INTEGER NOT NULL DEFAULT 20, + include_keywords_json TEXT NOT NULL DEFAULT '[]', + exclude_keywords_json TEXT NOT NULL DEFAULT '[]', + reporting_label TEXT NOT NULL DEFAULT '', + reporting_url TEXT NOT NULL DEFAULT '', + reporting_phone TEXT NOT NULL DEFAULT '', + reporting_notes TEXT NOT NULL DEFAULT '', + last_http_status INTEGER NOT NULL DEFAULT 0, + last_ok_at TEXT NOT NULL DEFAULT '', + last_error TEXT NOT NULL DEFAULT '', + last_error_class TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP + )`, + `CREATE TABLE source_categories ( + source_id TEXT NOT NULL, + category TEXT NOT NULL, + PRIMARY KEY (source_id, category) + )`, + `CREATE TABLE source_candidates ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + discovered_url TEXT NOT NULL, + discovered_via TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'candidate', + language_code TEXT NOT NULL DEFAULT '', + category TEXT NOT NULL DEFAULT '', + authority_type TEXT NOT NULL DEFAULT '', + country TEXT NOT NULL DEFAULT '', + country_code TEXT NOT NULL DEFAULT '', + notes TEXT NOT NULL DEFAULT '', + checked_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE(discovered_url) + )`, + `CREATE TABLE agencies_fts ( + agency_id TEXT NOT NULL, + name TEXT NOT NULL, + aliases TEXT NOT NULL DEFAULT '' + )`, + `CREATE TABLE agency_category_coverage ( + agency_id TEXT NOT NULL, + category TEXT NOT NULL, + PRIMARY KEY (agency_id, category) + )`, + } { + if _, err := db.sql.ExecContext(context.Background(), stmt); err != nil { + t.Fatalf("seed old schema: %v", err) + } + } + + if _, err := db.sql.ExecContext(context.Background(), ` +INSERT INTO agencies (id, authority_name, country, country_code, region, authority_type, base_url, scope) +VALUES ('agency-one', 'Agency One', 'France', 'FR', 'Europe', 'cert', 'https://one.example', 'national')`); err != nil { + t.Fatal(err) + } + if _, err := db.sql.ExecContext(context.Background(), ` +INSERT INTO sources (id, agency_id, type, status, feed_url, category) +VALUES ('agency-one-feed', 'agency-one', 'rss', 'active', 'https://one.example/feed', 'cyber_advisory')`); err != nil { + t.Fatal(err) + } + + sources, err := db.LoadActiveSources(context.Background()) + if err != nil { + t.Fatal(err) + } + if len(sources) != 1 { + t.Fatalf("expected 1 active source after auto-migration, got %d", len(sources)) + } + if sources[0].PromotionStatus != "active" { + t.Fatalf("expected promotion_status backfill to active, got %q", sources[0].PromotionStatus) + } +} + +func TestDeactivateSourcesRemovesThemFromActiveLoad(t *testing.T) { + dir := t.TempDir() + registryPath := filepath.Join(dir, "registry.json") + dbPath := filepath.Join(dir, "sources.db") + content := `[ + {"type":"rss","feed_url":"https://one.example/feed","category":"cyber_advisory","source":{"source_id":"agency-one-feed","authority_name":"Agency One","country":"France","country_code":"FR","region":"Europe","authority_type":"cert","base_url":"https://one.example"}} + ]` + if err := os.WriteFile(registryPath, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + + db, err := Open(dbPath) + if err != nil { + t.Fatal(err) + } + defer db.Close() + if err := db.ImportRegistry(context.Background(), registryPath); err != nil { + t.Fatal(err) + } + if err := db.DeactivateSources(context.Background(), map[string]string{ + "agency-one-feed": "fetch https://one.example/feed: status 404", + }); err != nil { + t.Fatal(err) + } + sources, err := db.LoadActiveSources(context.Background()) + if err != nil { + t.Fatal(err) + } + if len(sources) != 0 { + t.Fatalf("expected deactivated source to be removed from active load, got %d", len(sources)) + } +} + +func TestScopeAndCurationMetadataRoundTrip(t *testing.T) { + dir := t.TempDir() + registryPath := filepath.Join(dir, "registry.json") + dbPath := filepath.Join(dir, "sources.db") + content := `[ + {"type":"rss","feed_url":"https://example.test/feed","category":"wanted_suspect","source_quality":0.97,"promotion_status":"active","source":{"source_id":"europol","authority_name":"Europol","country":"Netherlands","country_code":"NL","region":"Europe","authority_type":"police","base_url":"https://www.europol.europa.eu","scope":"supranational","level":"supranational","mission_tags":["organized_crime","wanted_suspect"],"operational_relevance":0.98,"is_curated":true,"is_high_value":true,"language_code":"en"}} + ]` + if err := os.WriteFile(registryPath, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + + db, err := Open(dbPath) + if err != nil { + t.Fatal(err) + } + defer db.Close() + if err := db.ImportRegistry(context.Background(), registryPath); err != nil { + t.Fatal(err) + } + + sources, err := db.LoadActiveSources(context.Background()) + if err != nil { + t.Fatal(err) + } + if len(sources) != 1 { + t.Fatalf("expected 1 active source, got %d", len(sources)) + } + if sources[0].Source.Scope != "supranational" { + t.Fatalf("expected scope to round-trip, got %q", sources[0].Source.Scope) + } + if !sources[0].Source.IsCurated || !sources[0].Source.IsHighValue { + t.Fatalf("expected curated/high-value flags to round-trip: %#v", sources[0].Source) + } + if sources[0].Source.LanguageCode != "en" { + t.Fatalf("expected language code to round-trip, got %q", sources[0].Source.LanguageCode) + } + if sources[0].Source.Level != "supranational" { + t.Fatalf("expected level to round-trip, got %q", sources[0].Source.Level) + } + if sources[0].PromotionStatus != "active" { + t.Fatalf("expected promotion_status to round-trip, got %q", sources[0].PromotionStatus) + } + if sources[0].SourceQuality != 0.97 { + t.Fatalf("expected source_quality to round-trip, got %v", sources[0].SourceQuality) + } + if len(sources[0].Source.MissionTags) != 2 { + t.Fatalf("expected mission tags to round-trip, got %#v", sources[0].Source.MissionTags) + } +} + +func TestMergeRegistryAddsCuratedSeedWithoutReplacingExistingSources(t *testing.T) { + dir := t.TempDir() + baseRegistryPath := filepath.Join(dir, "base.json") + seedRegistryPath := filepath.Join(dir, "seed.json") + dbPath := filepath.Join(dir, "sources.db") + + base := `[ + {"type":"rss","feed_url":"https://example.test/base","category":"cyber_advisory","source":{"source_id":"base-source","authority_name":"Base Source","country":"France","country_code":"FR","region":"Europe","authority_type":"cert","base_url":"https://example.test"}} + ]` + seed := `[ + {"type":"rss","feed_url":"https://example.test/seed","category":"wanted_suspect","source":{"source_id":"seed-source","authority_name":"Seed Source","country":"United States","country_code":"US","region":"North America","authority_type":"police","base_url":"https://example.test","scope":"national","is_curated":true,"is_high_value":true,"language_code":"en"}} + ]` + if err := os.WriteFile(baseRegistryPath, []byte(base), 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(seedRegistryPath, []byte(seed), 0o644); err != nil { + t.Fatal(err) + } + + db, err := Open(dbPath) + if err != nil { + t.Fatal(err) + } + defer db.Close() + if err := db.ImportRegistry(context.Background(), baseRegistryPath); err != nil { + t.Fatal(err) + } + if err := db.MergeRegistry(context.Background(), seedRegistryPath); err != nil { + t.Fatal(err) + } + + sources, err := db.LoadActiveSources(context.Background()) + if err != nil { + t.Fatal(err) + } + if len(sources) != 2 { + t.Fatalf("expected merged source count 2, got %d", len(sources)) + } +} + +func TestMergeRegistryRejectsExistingSource(t *testing.T) { + dir := t.TempDir() + baseRegistryPath := filepath.Join(dir, "base.json") + seedRegistryPath := filepath.Join(dir, "seed.json") + dbPath := filepath.Join(dir, "sources.db") + + base := `[ + {"type":"html-list","feed_url":"https://www.hotosm.org/projects/","category":"humanitarian_tasking","source":{"source_id":"hot-tasking","authority_name":"Humanitarian OpenStreetMap Team","country":"International","country_code":"INT","region":"International","authority_type":"public_safety_program","base_url":"https://www.hotosm.org"}} + ]` + seed := `[ + {"type":"html-list","feed_url":"https://www.hotosm.org/projects/","category":"humanitarian_tasking","promotion_status":"rejected","rejection_reason":"JS-rendered navigation page, not a stable incident/tasking feed","source":{"source_id":"hot-tasking","authority_name":"Humanitarian OpenStreetMap Team","country":"International","country_code":"INT","region":"International","authority_type":"public_safety_program","base_url":"https://www.hotosm.org"}} + ]` + if err := os.WriteFile(baseRegistryPath, []byte(base), 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(seedRegistryPath, []byte(seed), 0o644); err != nil { + t.Fatal(err) + } + + db, err := Open(dbPath) + if err != nil { + t.Fatal(err) + } + defer db.Close() + if err := db.ImportRegistry(context.Background(), baseRegistryPath); err != nil { + t.Fatal(err) + } + if err := db.MergeRegistry(context.Background(), seedRegistryPath); err != nil { + t.Fatal(err) + } + + sources, err := db.LoadActiveSources(context.Background()) + if err != nil { + t.Fatal(err) + } + if len(sources) != 0 { + t.Fatalf("expected rejected source to be removed from active load, got %d", len(sources)) + } + + var promotionStatus, rejectionReason string + if err := db.sql.QueryRowContext(context.Background(), `SELECT promotion_status, rejection_reason FROM sources WHERE id = 'hot-tasking'`).Scan(&promotionStatus, &rejectionReason); err != nil { + t.Fatal(err) + } + if promotionStatus != "rejected" { + t.Fatalf("expected promotion_status rejected, got %q", promotionStatus) + } + if rejectionReason == "" { + t.Fatal("expected rejection_reason to be persisted") + } +} + +func TestSaveAndLoadAlertsReplacesMaterializedStateWithoutDuplicates(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "sources.db") + + db, err := Open(dbPath) + if err != nil { + t.Fatal(err) + } + defer db.Close() + + firstSeen := time.Date(2026, 3, 16, 10, 0, 0, 0, time.UTC).Format(time.RFC3339) + lastSeen := time.Date(2026, 3, 16, 10, 15, 0, 0, time.UTC).Format(time.RFC3339) + alerts := []model.Alert{ + { + AlertID: "alpha", + SourceID: "source-one", + Title: "Alpha alert", + CanonicalURL: "https://example.test/a", + FirstSeen: firstSeen, + LastSeen: lastSeen, + Status: "active", + Category: "cyber_advisory", + Severity: "high", + RegionTag: "EU", + Source: model.SourceMetadata{ + SourceID: "source-one", + AuthorityName: "Source One", + Country: "France", + CountryCode: "FR", + Region: "Europe", + AuthorityType: "cert", + BaseURL: "https://example.test", + }, + }, + } + if err := db.SaveAlerts(context.Background(), alerts); err != nil { + t.Fatal(err) + } + if err := db.SaveAlerts(context.Background(), alerts); err != nil { + t.Fatal(err) + } + + loaded, err := db.LoadAlerts(context.Background()) + if err != nil { + t.Fatal(err) + } + if len(loaded) != 1 { + t.Fatalf("expected exactly 1 persisted alert, got %d", len(loaded)) + } + if loaded[0].AlertID != "alpha" { + t.Fatalf("expected alert alpha, got %q", loaded[0].AlertID) + } + if loaded[0].FirstSeen != firstSeen { + t.Fatalf("expected first_seen %q, got %q", firstSeen, loaded[0].FirstSeen) + } +} + +func TestUpsertSourceCandidatesDeduplicatesByURL(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "sources.db") + + db, err := Open(dbPath) + if err != nil { + t.Fatal(err) + } + defer db.Close() + + inputs := []CandidateInput{ + { + DiscoveredURL: "https://example.test/feed", + DiscoveredVia: "first.org", + Status: "candidate", + Category: "cyber_advisory", + AuthorityType: "cert", + Country: "France", + CountryCode: "FR", + Notes: "Agency One", + }, + { + DiscoveredURL: "https://example.test/feed", + DiscoveredVia: "replacement-queue", + Status: "candidate", + Category: "public_appeal", + AuthorityType: "police", + Country: "France", + CountryCode: "FR", + Notes: "Agency One Revised", + }, + } + if err := db.UpsertSourceCandidates(context.Background(), inputs); err != nil { + t.Fatal(err) + } + + row := db.sql.QueryRowContext(context.Background(), `SELECT COUNT(*), discovered_via, category, authority_type, notes FROM source_candidates WHERE discovered_url = ?`, "https://example.test/feed") + var count int + var discoveredVia, category, authorityType, notes string + if err := row.Scan(&count, &discoveredVia, &category, &authorityType, ¬es); err != nil { + t.Fatal(err) + } + if count != 1 { + t.Fatalf("expected 1 candidate row, got %d", count) + } + if discoveredVia != "replacement-queue" || category != "public_appeal" || authorityType != "police" || notes != "Agency One Revised" { + t.Fatalf("unexpected candidate row values: via=%q category=%q authority=%q notes=%q", discoveredVia, category, authorityType, notes) + } +} diff --git a/internal/sourcedb/geocode.go b/internal/sourcedb/geocode.go new file mode 100644 index 0000000..26c3a00 --- /dev/null +++ b/internal/sourcedb/geocode.go @@ -0,0 +1,164 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + +package sourcedb + +import ( + "bufio" + "context" + "fmt" + "os" + "strconv" + "strings" +) + +// CityResult is a geocoded city from the GeoNames gazetteer. +type CityResult struct { + Name string + CountryCode string + Lat float64 + Lng float64 + Population int +} + +// ImportGeoNames loads a GeoNames cities500.txt (tab-separated) file into +// the cities table. It replaces all existing rows. The file format is +// documented at https://download.geonames.org/export/dump/readme.txt. +// +// Columns used: 0=geonameid, 1=name, 2=asciiname, 4=latitude, 5=longitude, +// 8=country_code, 14=population. +func (db *DB) ImportGeoNames(ctx context.Context, path string) error { + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open geonames file: %w", err) + } + defer f.Close() + + tx, err := db.sql.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("begin geonames import tx: %w", err) + } + defer tx.Rollback() //nolint:errcheck + + if _, err := tx.ExecContext(ctx, `DELETE FROM cities`); err != nil { + return fmt.Errorf("clear cities table: %w", err) + } + + stmt, err := tx.PrepareContext(ctx, + `INSERT INTO cities (id, name, name_lower, ascii_name, ascii_lower, country_code, lat, lng, population) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`) + if err != nil { + return fmt.Errorf("prepare cities insert: %w", err) + } + defer stmt.Close() + + scanner := bufio.NewScanner(f) + scanner.Buffer(make([]byte, 0, 1024*1024), 1024*1024) + + var count int + for scanner.Scan() { + fields := strings.Split(scanner.Text(), "\t") + if len(fields) < 15 { + continue + } + geoID, _ := strconv.Atoi(fields[0]) + name := strings.TrimSpace(fields[1]) + asciiName := strings.TrimSpace(fields[2]) + lat, _ := strconv.ParseFloat(fields[4], 64) + lng, _ := strconv.ParseFloat(fields[5], 64) + cc := strings.TrimSpace(fields[8]) + pop, _ := strconv.Atoi(fields[14]) + + if name == "" || cc == "" { + continue + } + + if _, err := stmt.ExecContext(ctx, geoID, name, strings.ToLower(name), asciiName, strings.ToLower(asciiName), cc, lat, lng, pop); err != nil { + return fmt.Errorf("insert city %q: %w", name, err) + } + count++ + } + if err := scanner.Err(); err != nil { + return fmt.Errorf("scan geonames file: %w", err) + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit geonames import: %w", err) + } + fmt.Printf("Imported %d cities from GeoNames\n", count) + return nil +} + +// HasCities returns true if the cities table is populated. +func (db *DB) HasCities(ctx context.Context) bool { + var count int + if err := db.sql.QueryRowContext(ctx, `SELECT COUNT(*) FROM cities LIMIT 1`).Scan(&count); err != nil { + return false + } + return count > 0 +} + +// LookupCity finds the largest city matching the given name. If countryCode +// is non-empty, results for that country are strongly preferred. +func (db *DB) LookupCity(ctx context.Context, name string, countryCode string) (CityResult, bool) { + nameLower := strings.ToLower(strings.TrimSpace(name)) + countryCode = strings.ToUpper(strings.TrimSpace(countryCode)) + + if nameLower == "" { + return CityResult{}, false + } + + // Try country-specific match first. + if countryCode != "" { + var r CityResult + err := db.sql.QueryRowContext(ctx, + `SELECT name, country_code, lat, lng, population FROM cities + WHERE (name_lower = ? OR ascii_lower = ?) AND country_code = ? + ORDER BY population DESC LIMIT 1`, + nameLower, nameLower, countryCode).Scan(&r.Name, &r.CountryCode, &r.Lat, &r.Lng, &r.Population) + if err == nil { + return r, true + } + } + + // Fallback: largest city worldwide with that name. + var r CityResult + err := db.sql.QueryRowContext(ctx, + `SELECT name, country_code, lat, lng, population FROM cities + WHERE name_lower = ? OR ascii_lower = ? + ORDER BY population DESC LIMIT 1`, + nameLower, nameLower).Scan(&r.Name, &r.CountryCode, &r.Lat, &r.Lng, &r.Population) + if err == nil { + return r, true + } + return CityResult{}, false +} + +// LookupCities finds all cities matching the given name, ordered by +// population descending. Used for batch scanning of text. +func (db *DB) LookupCities(ctx context.Context, name string) ([]CityResult, error) { + nameLower := strings.ToLower(strings.TrimSpace(name)) + if nameLower == "" { + return nil, nil + } + + rows, err := db.sql.QueryContext(ctx, + `SELECT name, country_code, lat, lng, population FROM cities + WHERE name_lower = ? OR ascii_lower = ? + ORDER BY population DESC LIMIT 10`, + nameLower, nameLower) + if err != nil { + return nil, err + } + defer rows.Close() + + var results []CityResult + for rows.Next() { + var r CityResult + if err := rows.Scan(&r.Name, &r.CountryCode, &r.Lat, &r.Lng, &r.Population); err != nil { + continue + } + results = append(results, r) + } + return results, rows.Err() +} diff --git a/internal/sourcedb/schema.sql b/internal/sourcedb/schema.sql new file mode 100644 index 0000000..b766cc5 --- /dev/null +++ b/internal/sourcedb/schema.sql @@ -0,0 +1,188 @@ +CREATE TABLE IF NOT EXISTS agencies ( + id TEXT PRIMARY KEY, + authority_name TEXT NOT NULL, + language_code TEXT NOT NULL DEFAULT '', + country TEXT NOT NULL, + country_code TEXT NOT NULL, + region TEXT NOT NULL, + authority_type TEXT NOT NULL, + base_url TEXT NOT NULL, + scope TEXT NOT NULL DEFAULT 'national', + level TEXT NOT NULL DEFAULT 'national', + parent_agency_id TEXT NOT NULL DEFAULT '', + jurisdiction_name TEXT NOT NULL DEFAULT '', + mission_tags_json TEXT NOT NULL DEFAULT '[]', + operational_relevance REAL NOT NULL DEFAULT 0, + is_curated INTEGER NOT NULL DEFAULT 0, + is_high_value INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS agency_aliases ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + agency_id TEXT NOT NULL, + alias TEXT NOT NULL, + alias_type TEXT NOT NULL DEFAULT 'short_name', + UNIQUE(agency_id, alias), + FOREIGN KEY (agency_id) REFERENCES agencies(id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS sources ( + id TEXT PRIMARY KEY, + agency_id TEXT NOT NULL, + language_code TEXT NOT NULL DEFAULT '', + type TEXT NOT NULL, + fetch_mode TEXT NOT NULL DEFAULT '', + follow_redirects INTEGER NOT NULL DEFAULT 0, + feed_url TEXT NOT NULL, + feed_urls_json TEXT NOT NULL DEFAULT '[]', + category TEXT NOT NULL, + region_tag TEXT NOT NULL DEFAULT '', + lat REAL NOT NULL DEFAULT 0, + lng REAL NOT NULL DEFAULT 0, + max_items INTEGER NOT NULL DEFAULT 0, + include_keywords_json TEXT NOT NULL DEFAULT '[]', + exclude_keywords_json TEXT NOT NULL DEFAULT '[]', + source_quality REAL NOT NULL DEFAULT 0, + promotion_status TEXT NOT NULL DEFAULT 'candidate', + rejection_reason TEXT NOT NULL DEFAULT '', + is_mirror INTEGER NOT NULL DEFAULT 0, + preferred_source_rank INTEGER NOT NULL DEFAULT 0, + reporting_label TEXT NOT NULL DEFAULT '', + reporting_url TEXT NOT NULL DEFAULT '', + reporting_phone TEXT NOT NULL DEFAULT '', + reporting_notes TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'active', + last_http_status INTEGER, + last_ok_at TEXT, + last_error TEXT, + last_error_class TEXT, + created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (agency_id) REFERENCES agencies(id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS source_categories ( + source_id TEXT NOT NULL, + category TEXT NOT NULL, + PRIMARY KEY (source_id, category), + FOREIGN KEY (source_id) REFERENCES sources(id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS agency_category_coverage ( + agency_id TEXT NOT NULL, + category TEXT NOT NULL, + PRIMARY KEY (agency_id, category), + FOREIGN KEY (agency_id) REFERENCES agencies(id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS source_checks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_id TEXT NOT NULL, + checked_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + http_status INTEGER, + final_url TEXT NOT NULL DEFAULT '', + content_type TEXT NOT NULL DEFAULT '', + ok INTEGER NOT NULL DEFAULT 0, + error TEXT NOT NULL DEFAULT '', + error_class TEXT NOT NULL DEFAULT '', + FOREIGN KEY (source_id) REFERENCES sources(id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS source_candidates ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + agency_id TEXT, + discovered_url TEXT NOT NULL, + discovered_via TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL DEFAULT 'candidate', + language_code TEXT NOT NULL DEFAULT '', + category TEXT NOT NULL DEFAULT '', + authority_type TEXT NOT NULL DEFAULT '', + country TEXT NOT NULL DEFAULT '', + country_code TEXT NOT NULL DEFAULT '', + checked_at TEXT, + notes TEXT NOT NULL DEFAULT '', + UNIQUE(discovered_url), + FOREIGN KEY (agency_id) REFERENCES agencies(id) ON DELETE SET NULL +); + +CREATE TABLE IF NOT EXISTS source_term_overrides ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_id TEXT NOT NULL, + category TEXT NOT NULL, + language_code TEXT NOT NULL DEFAULT '', + term TEXT NOT NULL, + term_type TEXT NOT NULL, + created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + UNIQUE(source_id, category, language_code, term, term_type), + FOREIGN KEY (source_id) REFERENCES sources(id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS alerts ( + alert_id TEXT PRIMARY KEY, + source_id TEXT NOT NULL, + status TEXT NOT NULL, + first_seen TEXT NOT NULL, + last_seen TEXT NOT NULL, + title TEXT NOT NULL, + canonical_url TEXT NOT NULL, + category TEXT NOT NULL, + severity TEXT NOT NULL, + region_tag TEXT NOT NULL, + lat REAL NOT NULL DEFAULT 0, + lng REAL NOT NULL DEFAULT 0, + freshness_hours INTEGER NOT NULL DEFAULT 0, + source_json TEXT NOT NULL, + reporting_json TEXT NOT NULL DEFAULT '{}', + triage_json TEXT NOT NULL DEFAULT 'null' +); + +CREATE INDEX IF NOT EXISTS idx_sources_agency_id ON sources(agency_id); +CREATE INDEX IF NOT EXISTS idx_sources_status ON sources(status); +CREATE INDEX IF NOT EXISTS idx_sources_feed_url ON sources(feed_url); +CREATE INDEX IF NOT EXISTS idx_source_checks_source_id_checked_at ON source_checks(source_id, checked_at DESC); +CREATE INDEX IF NOT EXISTS idx_source_candidates_status ON source_candidates(status); +CREATE UNIQUE INDEX IF NOT EXISTS idx_source_candidates_discovered_url ON source_candidates(discovered_url); +CREATE INDEX IF NOT EXISTS idx_source_term_overrides_source_id ON source_term_overrides(source_id); +CREATE INDEX IF NOT EXISTS idx_agency_category_coverage_category ON agency_category_coverage(category); +CREATE INDEX IF NOT EXISTS idx_alerts_status ON alerts(status); +CREATE INDEX IF NOT EXISTS idx_alerts_source_id ON alerts(source_id); + +CREATE TABLE IF NOT EXISTS cities ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + name_lower TEXT NOT NULL, + ascii_name TEXT NOT NULL, + ascii_lower TEXT NOT NULL, + country_code TEXT NOT NULL, + lat REAL NOT NULL, + lng REAL NOT NULL, + population INTEGER NOT NULL DEFAULT 0 +); +CREATE INDEX IF NOT EXISTS idx_cities_name_lower ON cities(name_lower); +CREATE INDEX IF NOT EXISTS idx_cities_ascii_lower ON cities(ascii_lower); +CREATE INDEX IF NOT EXISTS idx_cities_country_code ON cities(country_code); + +CREATE VIRTUAL TABLE IF NOT EXISTS agencies_fts USING fts5( + agency_id UNINDEXED, + authority_name, + aliases, + country, + country_code, + region, + authority_type, + base_url +); + +CREATE VIRTUAL TABLE IF NOT EXISTS alerts_fts USING fts5( + alert_id UNINDEXED, + title, + canonical_url UNINDEXED, + category, + severity UNINDEXED, + region_tag, + source_authority, + source_country, + source_country_code +); diff --git a/package-lock.json b/package-lock.json index fb738bb..5665a0c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,24 +8,22 @@ "name": "euosint", "version": "0.0.0", "dependencies": { - "@types/three": "^0.182.0", "clsx": "^2.1.1", + "leaflet": "^1.9.4", + "leaflet.markercluster": "^1.5.3", "lucide-react": "^0.563.0", "react": "^19.2.0", "react-dom": "^19.2.0", - "react-globe.gl": "^2.37.0", - "tailwind-merge": "^3.4.0", - "three": "^0.182.0", - "topojson-client": "^3.1.0" + "tailwind-merge": "^3.4.0" }, "devDependencies": { "@eslint/js": "^9.39.1", "@tailwindcss/vite": "^4.1.18", + "@types/leaflet": "^1.9.21", + "@types/leaflet.markercluster": "^1.5.6", "@types/node": "^24.10.1", "@types/react": "^19.2.5", "@types/react-dom": "^19.2.3", - "@types/topojson-client": "^3.1.5", - "@types/topojson-specification": "^1.0.5", "@vitejs/plugin-react": "^5.1.1", "eslint": "^9.39.1", "eslint-plugin-react-hooks": "^7.0.1", @@ -275,15 +273,6 @@ "@babel/core": "^7.0.0-0" } }, - "node_modules/@babel/runtime": { - "version": "7.28.6", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.6.tgz", - "integrity": "sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, "node_modules/@babel/template": { "version": "7.28.6", "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz", @@ -332,12 +321,6 @@ "node": ">=6.9.0" } }, - "node_modules/@dimforge/rapier3d-compat": { - "version": "0.12.0", - "resolved": "https://registry.npmjs.org/@dimforge/rapier3d-compat/-/rapier3d-compat-0.12.0.tgz", - "integrity": "sha512-uekIGetywIgopfD97oDL5PfeezkFpNhwlzlaEYNOA0N6ghdsOvh/HYjSMek5Q2O1PYvRSDFcqFVJl4r4ZBwOow==", - "license": "Apache-2.0" - }, "node_modules/@esbuild/aix-ppc64": { "version": "0.27.3", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.3.tgz", @@ -1668,55 +1651,6 @@ "vite": "^5.2.0 || ^6 || ^7" } }, - "node_modules/@turf/boolean-point-in-polygon": { - "version": "7.3.4", - "resolved": "https://registry.npmjs.org/@turf/boolean-point-in-polygon/-/boolean-point-in-polygon-7.3.4.tgz", - "integrity": "sha512-v/4hfyY90Vz9cDgs2GwjQf+Lft8o7mNCLJOTz/iv8SHAIgMMX0czEoIaNVOJr7tBqPqwin1CGwsncrkf5C9n8Q==", - "license": "MIT", - "dependencies": { - "@turf/helpers": "7.3.4", - "@turf/invariant": "7.3.4", - "@types/geojson": "^7946.0.10", - "point-in-polygon-hao": "^1.1.0", - "tslib": "^2.8.1" - }, - "funding": { - "url": "https://opencollective.com/turf" - } - }, - "node_modules/@turf/helpers": { - "version": "7.3.4", - "resolved": "https://registry.npmjs.org/@turf/helpers/-/helpers-7.3.4.tgz", - "integrity": "sha512-U/S5qyqgx3WTvg4twaH0WxF3EixoTCfDsmk98g1E3/5e2YKp7JKYZdz0vivsS5/UZLJeZDEElOSFH4pUgp+l7g==", - "license": "MIT", - "dependencies": { - "@types/geojson": "^7946.0.10", - "tslib": "^2.8.1" - }, - "funding": { - "url": "https://opencollective.com/turf" - } - }, - "node_modules/@turf/invariant": { - "version": "7.3.4", - "resolved": "https://registry.npmjs.org/@turf/invariant/-/invariant-7.3.4.tgz", - "integrity": "sha512-88Eo4va4rce9sNZs6XiMJowWkikM3cS2TBhaCKlU+GFHdNf8PFEpiU42VDU8q5tOF6/fu21Rvlke5odgOGW4AQ==", - "license": "MIT", - "dependencies": { - "@turf/helpers": "7.3.4", - "@types/geojson": "^7946.0.10", - "tslib": "^2.8.1" - }, - "funding": { - "url": "https://opencollective.com/turf" - } - }, - "node_modules/@tweenjs/tween.js": { - "version": "23.1.3", - "resolved": "https://registry.npmjs.org/@tweenjs/tween.js/-/tween.js-23.1.3.tgz", - "integrity": "sha512-vJmvvwFxYuGnF2axRtPYocag6Clbb5YS7kLL+SO/TeVFzHqDIWrNKYtcsPMibjDx9O+bu+psAy9NKfWklassUA==", - "license": "MIT" - }, "node_modules/@types/babel__core": { "version": "7.20.5", "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", @@ -1773,6 +1707,7 @@ "version": "7946.0.16", "resolved": "https://registry.npmjs.org/@types/geojson/-/geojson-7946.0.16.tgz", "integrity": "sha512-6C8nqWur3j98U6+lXDfTUWIfgvZU+EumvpHKcYjujKH7woYyLj2sUmff0tRhrqM7BohUw7Pz3ZB1jj2gW9Fvmg==", + "dev": true, "license": "MIT" }, "node_modules/@types/json-schema": { @@ -1782,6 +1717,26 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/leaflet": { + "version": "1.9.21", + "resolved": "https://registry.npmjs.org/@types/leaflet/-/leaflet-1.9.21.tgz", + "integrity": "sha512-TbAd9DaPGSnzp6QvtYngntMZgcRk+igFELwR2N99XZn7RXUdKgsXMR+28bUO0rPsWp8MIu/f47luLIQuSLYv/w==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/geojson": "*" + } + }, + "node_modules/@types/leaflet.markercluster": { + "version": "1.5.6", + "resolved": "https://registry.npmjs.org/@types/leaflet.markercluster/-/leaflet.markercluster-1.5.6.tgz", + "integrity": "sha512-I7hZjO2+isVXGYWzKxBp8PsCzAYCJBc29qBdFpquOCkS7zFDqUsUvkEOyQHedsk/Cy5tocQzf+Ndorm5W9YKTQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/leaflet": "^1.9" + } + }, "node_modules/@types/node": { "version": "24.10.12", "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.12.tgz", @@ -1812,54 +1767,6 @@ "@types/react": "^19.2.0" } }, - "node_modules/@types/stats.js": { - "version": "0.17.4", - "resolved": "https://registry.npmjs.org/@types/stats.js/-/stats.js-0.17.4.tgz", - "integrity": "sha512-jIBvWWShCvlBqBNIZt0KAshWpvSjhkwkEu4ZUcASoAvhmrgAUI2t1dXrjSL4xXVLB4FznPrIsX3nKXFl/Dt4vA==", - "license": "MIT" - }, - "node_modules/@types/three": { - "version": "0.182.0", - "resolved": "https://registry.npmjs.org/@types/three/-/three-0.182.0.tgz", - "integrity": "sha512-WByN9V3Sbwbe2OkWuSGyoqQO8Du6yhYaXtXLoA5FkKTUJorZ+yOHBZ35zUUPQXlAKABZmbYp5oAqpA4RBjtJ/Q==", - "license": "MIT", - "dependencies": { - "@dimforge/rapier3d-compat": "~0.12.0", - "@tweenjs/tween.js": "~23.1.3", - "@types/stats.js": "*", - "@types/webxr": ">=0.5.17", - "@webgpu/types": "*", - "fflate": "~0.8.2", - "meshoptimizer": "~0.22.0" - } - }, - "node_modules/@types/topojson-client": { - "version": "3.1.5", - "resolved": "https://registry.npmjs.org/@types/topojson-client/-/topojson-client-3.1.5.tgz", - "integrity": "sha512-C79rySTyPxnQNNguTZNI1Ct4D7IXgvyAs3p9HPecnl6mNrJ5+UhvGNYcZfpROYV2lMHI48kJPxwR+F9C6c7nmw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/geojson": "*", - "@types/topojson-specification": "*" - } - }, - "node_modules/@types/topojson-specification": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@types/topojson-specification/-/topojson-specification-1.0.5.tgz", - "integrity": "sha512-C7KvcQh+C2nr6Y2Ub4YfgvWvWCgP2nOQMtfhlnwsRL4pYmmwzBS7HclGiS87eQfDOU/DLQpX6GEscviaz4yLIQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/geojson": "*" - } - }, - "node_modules/@types/webxr": { - "version": "0.5.24", - "resolved": "https://registry.npmjs.org/@types/webxr/-/webxr-0.5.24.tgz", - "integrity": "sha512-h8fgEd/DpoS9CBrjEQXR+dIDraopAEfu4wYVNY2tEPwk60stPWhvZMf4Foo5FakuQ7HFZoa8WceaWFervK2Ovg==", - "license": "MIT" - }, "node_modules/@typescript-eslint/eslint-plugin": { "version": "8.54.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.54.0.tgz", @@ -2150,21 +2057,6 @@ "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, - "node_modules/@webgpu/types": { - "version": "0.1.69", - "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.69.tgz", - "integrity": "sha512-RPmm6kgRbI8e98zSD3RVACvnuktIja5+yLgDAkTmxLr90BEwdTXRQWNLF3ETTTyH/8mKhznZuN5AveXYFEsMGQ==", - "license": "BSD-3-Clause" - }, - "node_modules/accessor-fn": { - "version": "1.5.3", - "resolved": "https://registry.npmjs.org/accessor-fn/-/accessor-fn-1.5.3.tgz", - "integrity": "sha512-rkAofCwe/FvYFUlMB0v0gWmhqtfAtV1IUkdPbfhTUyYniu5LrC0A0UJkTH0Jv3S8SvwkmfuAlY+mQIJATdocMA==", - "license": "MIT", - "engines": { - "node": ">=12" - } - }, "node_modules/acorn": { "version": "8.15.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", @@ -2367,12 +2259,6 @@ "dev": true, "license": "MIT" }, - "node_modules/commander": { - "version": "2.20.3", - "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", - "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==", - "license": "MIT" - }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -2409,180 +2295,6 @@ "dev": true, "license": "MIT" }, - "node_modules/d3-array": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz", - "integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==", - "license": "ISC", - "dependencies": { - "internmap": "1 - 2" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-color": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz", - "integrity": "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-delaunay": { - "version": "6.0.4", - "resolved": "https://registry.npmjs.org/d3-delaunay/-/d3-delaunay-6.0.4.tgz", - "integrity": "sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A==", - "license": "ISC", - "dependencies": { - "delaunator": "5" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-format": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz", - "integrity": "sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg==", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-geo": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/d3-geo/-/d3-geo-3.1.1.tgz", - "integrity": "sha512-637ln3gXKXOwhalDzinUgY83KzNWZRKbYubaG+fGVuc/dxO64RRljtCTnf5ecMyE1RIdtqpkVcq0IbtU2S8j2Q==", - "license": "ISC", - "dependencies": { - "d3-array": "2.5.0 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-geo-voronoi": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/d3-geo-voronoi/-/d3-geo-voronoi-2.1.0.tgz", - "integrity": "sha512-kqE4yYuOjPbKdBXG0xztCacPwkVSK2REF1opSNrnqqtXJmNcM++UbwQ8SxvwP6IQTj9RvIjjK4qeiVsEfj0Z2Q==", - "license": "ISC", - "dependencies": { - "d3-array": "3", - "d3-delaunay": "6", - "d3-geo": "3", - "d3-tricontour": "1" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-interpolate": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz", - "integrity": "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==", - "license": "ISC", - "dependencies": { - "d3-color": "1 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-octree": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/d3-octree/-/d3-octree-1.1.0.tgz", - "integrity": "sha512-F8gPlqpP+HwRPMO/8uOu5wjH110+6q4cgJvgJT6vlpy3BEaDIKlTZrgHKZSp/i1InRpVfh4puY/kvL6MxK930A==", - "license": "MIT" - }, - "node_modules/d3-scale": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz", - "integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==", - "license": "ISC", - "dependencies": { - "d3-array": "2.10.0 - 3", - "d3-format": "1 - 3", - "d3-interpolate": "1.2.0 - 3", - "d3-time": "2.1.1 - 3", - "d3-time-format": "2 - 4" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-scale-chromatic": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz", - "integrity": "sha512-A3s5PWiZ9YCXFye1o246KoscMWqf8BsD9eRiJ3He7C9OBaxKhAd5TFCdEx/7VbKtxxTsu//1mMJFrEt572cEyQ==", - "license": "ISC", - "dependencies": { - "d3-color": "1 - 3", - "d3-interpolate": "1 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-selection": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", - "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-time": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz", - "integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==", - "license": "ISC", - "dependencies": { - "d3-array": "2 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-time-format": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz", - "integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==", - "license": "ISC", - "dependencies": { - "d3-time": "1 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-tricontour": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/d3-tricontour/-/d3-tricontour-1.1.0.tgz", - "integrity": "sha512-G7gHKj89n2owmkGb6WX6ixcnQ0Kf/0wpa9VIh9DGdbHu8wdrlaHU4ir3/bFNERl8N8nn4G7e7qbtBG8N9caihQ==", - "license": "ISC", - "dependencies": { - "d3-delaunay": "6", - "d3-scale": "4" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/data-bind-mapper": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/data-bind-mapper/-/data-bind-mapper-1.0.3.tgz", - "integrity": "sha512-QmU3lyEnbENQPo0M1F9BMu4s6cqNNp8iJA+b/HP2sSb7pf3dxwF3+EP1eO69rwBfH9kFJ1apmzrtogAmVt2/Xw==", - "license": "MIT", - "dependencies": { - "accessor-fn": "1" - }, - "engines": { - "node": ">=12" - } - }, "node_modules/debug": { "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", @@ -2608,15 +2320,6 @@ "dev": true, "license": "MIT" }, - "node_modules/delaunator": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/delaunator/-/delaunator-5.0.1.tgz", - "integrity": "sha512-8nvh+XBe96aCESrGOqMp/84b13H9cdKbG5P2ejQCh4d4sK9RL4371qou9drQjMhvnPmhWl5hnmqbEE0fXr9Xnw==", - "license": "ISC", - "dependencies": { - "robust-predicates": "^3.0.2" - } - }, "node_modules/detect-libc": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", @@ -2627,12 +2330,6 @@ "node": ">=8" } }, - "node_modules/earcut": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/earcut/-/earcut-3.0.2.tgz", - "integrity": "sha512-X7hshQbLyMJ/3RPhyObLARM2sNxxmRALLKx1+NVFFnQ9gKzmCrxm9+uLIAdBcvc8FNLpctqlQ2V6AE92Ol9UDQ==", - "license": "ISC" - }, "node_modules/electron-to-chromium": { "version": "1.5.286", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.286.tgz", @@ -2942,12 +2639,6 @@ } } }, - "node_modules/fflate": { - "version": "0.8.2", - "resolved": "https://registry.npmjs.org/fflate/-/fflate-0.8.2.tgz", - "integrity": "sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==", - "license": "MIT" - }, "node_modules/file-entry-cache": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz", @@ -2999,29 +2690,6 @@ "dev": true, "license": "ISC" }, - "node_modules/float-tooltip": { - "version": "1.7.5", - "resolved": "https://registry.npmjs.org/float-tooltip/-/float-tooltip-1.7.5.tgz", - "integrity": "sha512-/kXzuDnnBqyyWyhDMH7+PfP8J/oXiAavGzcRxASOMRHFuReDtofizLLJsf7nnDLAfEaMW4pVWaXrAjtnglpEkg==", - "license": "MIT", - "dependencies": { - "d3-selection": "2 - 3", - "kapsule": "^1.16", - "preact": "10" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/frame-ticker": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/frame-ticker/-/frame-ticker-1.0.3.tgz", - "integrity": "sha512-E0X2u2JIvbEMrqEg5+4BpTqaD22OwojJI63K7MdKHdncjtAhGRbCR8nJCr2vwEt9NWBPCPcu70X9smPviEBy8Q==", - "license": "MIT", - "dependencies": { - "simplesignal": "^2.1.6" - } - }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -3073,23 +2741,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/globe.gl": { - "version": "2.45.0", - "resolved": "https://registry.npmjs.org/globe.gl/-/globe.gl-2.45.0.tgz", - "integrity": "sha512-fjkLHVBrnbESkUgklTd4UbcGLciu4nIl49IIi1hclLjI6MU3ASu6JYmf/K5qwPf7I+tNOauQRr4i5Y28JTtHQg==", - "license": "MIT", - "dependencies": { - "@tweenjs/tween.js": "18 - 25", - "accessor-fn": "1", - "kapsule": "^1.16", - "three": ">=0.154 <1", - "three-globe": "^2.45", - "three-render-objects": "^1.40" - }, - "engines": { - "node": ">=12" - } - }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -3097,17 +2748,6 @@ "dev": true, "license": "ISC" }, - "node_modules/h3-js": { - "version": "4.4.0", - "resolved": "https://registry.npmjs.org/h3-js/-/h3-js-4.4.0.tgz", - "integrity": "sha512-DvJh07MhGgY2KcC4OeZc8SSyA+ZXpdvoh6uCzGpoKvWtZxJB+g6VXXC1+eWYkaMIsLz7J/ErhOalHCpcs1KYog==", - "license": "Apache-2.0", - "engines": { - "node": ">=4", - "npm": ">=3", - "yarn": ">=1.3.0" - } - }, "node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -3172,24 +2812,6 @@ "node": ">=0.8.19" } }, - "node_modules/index-array-by": { - "version": "1.4.2", - "resolved": "https://registry.npmjs.org/index-array-by/-/index-array-by-1.4.2.tgz", - "integrity": "sha512-SP23P27OUKzXWEC/TOyWlwLviofQkCSCKONnc62eItjp69yCZZPqDQtr3Pw5gJDnPeUMqExmKydNZaJO0FU9pw==", - "license": "MIT", - "engines": { - "node": ">=12" - } - }, - "node_modules/internmap": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/internmap/-/internmap-2.0.3.tgz", - "integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -3220,15 +2842,6 @@ "dev": true, "license": "ISC" }, - "node_modules/jerrypick": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/jerrypick/-/jerrypick-1.1.2.tgz", - "integrity": "sha512-YKnxXEekXKzhpf7CLYA0A+oDP8V0OhICNCr5lv96FvSsDEmrb0GKM776JgQvHTMjr7DTTPEVv/1Ciaw0uEWzBA==", - "license": "MIT", - "engines": { - "node": ">=12" - } - }, "node_modules/jiti": { "version": "2.6.1", "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz", @@ -3243,6 +2856,7 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true, "license": "MIT" }, "node_modules/js-yaml": { @@ -3305,18 +2919,6 @@ "node": ">=6" } }, - "node_modules/kapsule": { - "version": "1.16.3", - "resolved": "https://registry.npmjs.org/kapsule/-/kapsule-1.16.3.tgz", - "integrity": "sha512-4+5mNNf4vZDSwPhKprKwz3330iisPrb08JyMgbsdFrimBCKNHecua/WBwvVg3n7vwx0C1ARjfhwIpbrbd9n5wg==", - "license": "MIT", - "dependencies": { - "lodash-es": "4" - }, - "engines": { - "node": ">=12" - } - }, "node_modules/keyv": { "version": "4.5.4", "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", @@ -3327,6 +2929,21 @@ "json-buffer": "3.0.1" } }, + "node_modules/leaflet": { + "version": "1.9.4", + "resolved": "https://registry.npmjs.org/leaflet/-/leaflet-1.9.4.tgz", + "integrity": "sha512-nxS1ynzJOmOlHp+iL3FyWqK89GtNL8U8rvlMOsQdTTssxZwCXh8N2NB3GDQOL+YR3XnWyZAxwQixURb+FA74PA==", + "license": "BSD-2-Clause" + }, + "node_modules/leaflet.markercluster": { + "version": "1.5.3", + "resolved": "https://registry.npmjs.org/leaflet.markercluster/-/leaflet.markercluster-1.5.3.tgz", + "integrity": "sha512-vPTw/Bndq7eQHjLBVlWpnGeLa3t+3zGiuM7fJwCkiMFq+nmRuG3RI3f7f4N4TDX7T4NpbAXpR2+NTRSEGfCSeA==", + "license": "MIT", + "peerDependencies": { + "leaflet": "^1.3.1" + } + }, "node_modules/levn": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", @@ -3618,12 +3235,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/lodash-es": { - "version": "4.17.23", - "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.23.tgz", - "integrity": "sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==", - "license": "MIT" - }, "node_modules/lodash.merge": { "version": "4.6.2", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", @@ -3631,18 +3242,6 @@ "dev": true, "license": "MIT" }, - "node_modules/loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "license": "MIT", - "dependencies": { - "js-tokens": "^3.0.0 || ^4.0.0" - }, - "bin": { - "loose-envify": "cli.js" - } - }, "node_modules/lru-cache": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", @@ -3672,12 +3271,6 @@ "@jridgewell/sourcemap-codec": "^1.5.5" } }, - "node_modules/meshoptimizer": { - "version": "0.22.0", - "resolved": "https://registry.npmjs.org/meshoptimizer/-/meshoptimizer-0.22.0.tgz", - "integrity": "sha512-IebiK79sqIy+E4EgOr+CAw+Ke8hAspXKzBd0JdgEmPHiAwmvEj2S4h1rfvo+o/BnfEYd/jAOg5IeeIjzlzSnDg==", - "license": "MIT" - }, "node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -3731,15 +3324,6 @@ "dev": true, "license": "MIT" }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", @@ -3843,27 +3427,6 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, - "node_modules/point-in-polygon-hao": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/point-in-polygon-hao/-/point-in-polygon-hao-1.2.4.tgz", - "integrity": "sha512-x2pcvXeqhRHlNRdhLs/tgFapAbSSe86wa/eqmj1G6pWftbEs5aVRJhRGM6FYSUERKu0PjekJzMq0gsI2XyiclQ==", - "license": "MIT", - "dependencies": { - "robust-predicates": "^3.0.2" - } - }, - "node_modules/polished": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/polished/-/polished-4.3.1.tgz", - "integrity": "sha512-OBatVyC/N7SCW/FaDHrSd+vn0o5cS855TOmYi4OkdWUMSJCET/xip//ch8xGUvtr3i44X9LVyWwQlRMTN3pwSA==", - "license": "MIT", - "dependencies": { - "@babel/runtime": "^7.17.8" - }, - "engines": { - "node": ">=10" - } - }, "node_modules/postcss": { "version": "8.5.6", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", @@ -3893,16 +3456,6 @@ "node": "^10 || ^12 || >=14" } }, - "node_modules/preact": { - "version": "10.28.3", - "resolved": "https://registry.npmjs.org/preact/-/preact-10.28.3.tgz", - "integrity": "sha512-tCmoRkPQLpBeWzpmbhryairGnhW9tKV6c6gr/w+RhoRoKEJwsjzipwp//1oCpGPOchvSLaAPlpcJi9MwMmoPyA==", - "license": "MIT", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/preact" - } - }, "node_modules/prelude-ls": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", @@ -3913,17 +3466,6 @@ "node": ">= 0.8.0" } }, - "node_modules/prop-types": { - "version": "15.8.1", - "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", - "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==", - "license": "MIT", - "dependencies": { - "loose-envify": "^1.4.0", - "object-assign": "^4.1.1", - "react-is": "^16.13.1" - } - }, "node_modules/punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", @@ -3955,44 +3497,6 @@ "react": "^19.2.4" } }, - "node_modules/react-globe.gl": { - "version": "2.37.0", - "resolved": "https://registry.npmjs.org/react-globe.gl/-/react-globe.gl-2.37.0.tgz", - "integrity": "sha512-nN1FDOJBhFvWfKrOY0SnkDuA8wk9FSTBN0HFfAdTqqcFM5R+OXBIxK0BM6t8n3oNVYpEJVxEzjYFwLyk4BC7Cw==", - "license": "MIT", - "dependencies": { - "globe.gl": "^2.45", - "prop-types": "15", - "react-kapsule": "^2.5" - }, - "engines": { - "node": ">=12" - }, - "peerDependencies": { - "react": "*" - } - }, - "node_modules/react-is": { - "version": "16.13.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", - "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==", - "license": "MIT" - }, - "node_modules/react-kapsule": { - "version": "2.5.7", - "resolved": "https://registry.npmjs.org/react-kapsule/-/react-kapsule-2.5.7.tgz", - "integrity": "sha512-kifAF4ZPD77qZKc4CKLmozq6GY1sBzPEJTIJb0wWFK6HsePJatK3jXplZn2eeAt3x67CDozgi7/rO8fNQ/AL7A==", - "license": "MIT", - "dependencies": { - "jerrypick": "^1.1.1" - }, - "engines": { - "node": ">=12" - }, - "peerDependencies": { - "react": ">=16.13.1" - } - }, "node_modules/react-refresh": { "version": "0.18.0", "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.18.0.tgz", @@ -4013,12 +3517,6 @@ "node": ">=4" } }, - "node_modules/robust-predicates": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.2.tgz", - "integrity": "sha512-IXgzBWvWQwE6PrDI05OvmXUIruQTcoMDzRsOd5CDvHCVLcLHMTSYvOK5Cm46kWqlV3yAbuSpBZdJ5oP5OUoStg==", - "license": "Unlicense" - }, "node_modules/rollup": { "version": "4.57.1", "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.57.1.tgz", @@ -4103,12 +3601,6 @@ "node": ">=8" } }, - "node_modules/simplesignal": { - "version": "2.1.7", - "resolved": "https://registry.npmjs.org/simplesignal/-/simplesignal-2.1.7.tgz", - "integrity": "sha512-PEo2qWpUke7IMhlqiBxrulIFvhJRLkl1ih52Rwa+bPjzhJepcd4GIjn2RiQmFSx3dQvsEAgF0/lXMwMN7vODaA==", - "license": "MIT" - }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", @@ -4176,123 +3668,6 @@ "url": "https://opencollective.com/webpack" } }, - "node_modules/three": { - "version": "0.182.0", - "resolved": "https://registry.npmjs.org/three/-/three-0.182.0.tgz", - "integrity": "sha512-GbHabT+Irv+ihI1/f5kIIsZ+Ef9Sl5A1Y7imvS5RQjWgtTPfPnZ43JmlYI7NtCRDK9zir20lQpfg8/9Yd02OvQ==", - "license": "MIT" - }, - "node_modules/three-conic-polygon-geometry": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/three-conic-polygon-geometry/-/three-conic-polygon-geometry-2.1.2.tgz", - "integrity": "sha512-NaP3RWLJIyPGI+zyaZwd0Yj6rkoxm4FJHqAX1Enb4L64oNYLCn4bz1ESgOEYavgcUwCNYINu1AgEoUBJr1wZcA==", - "license": "MIT", - "dependencies": { - "@turf/boolean-point-in-polygon": "^7.2", - "d3-array": "1 - 3", - "d3-geo": "1 - 3", - "d3-geo-voronoi": "2", - "d3-scale": "1 - 4", - "delaunator": "5", - "earcut": "3" - }, - "engines": { - "node": ">=12" - }, - "peerDependencies": { - "three": ">=0.72.0" - } - }, - "node_modules/three-geojson-geometry": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/three-geojson-geometry/-/three-geojson-geometry-2.1.1.tgz", - "integrity": "sha512-dC7bF3ri1goDcihYhzACHOBQqu7YNNazYLa2bSydVIiJUb3jDFojKSy+gNj2pMkqZNSVjssSmdY9zlmnhEpr1w==", - "license": "MIT", - "dependencies": { - "d3-geo": "1 - 3", - "d3-interpolate": "1 - 3", - "earcut": "3" - }, - "engines": { - "node": ">=12" - }, - "peerDependencies": { - "three": ">=0.72.0" - } - }, - "node_modules/three-globe": { - "version": "2.45.0", - "resolved": "https://registry.npmjs.org/three-globe/-/three-globe-2.45.0.tgz", - "integrity": "sha512-Ur6BVkezvmHnvsEg8fbq6gIscSZtknSQMWwDRbiJ95o6OSDjDbGTc4oO6nP7mOM9aAA3YrF7YZyOwSkP4T56QA==", - "license": "MIT", - "dependencies": { - "@tweenjs/tween.js": "18 - 25", - "accessor-fn": "1", - "d3-array": "3", - "d3-color": "3", - "d3-geo": "3", - "d3-interpolate": "3", - "d3-scale": "4", - "d3-scale-chromatic": "3", - "data-bind-mapper": "1", - "frame-ticker": "1", - "h3-js": "4", - "index-array-by": "1", - "kapsule": "^1.16", - "three-conic-polygon-geometry": "2", - "three-geojson-geometry": "2", - "three-slippy-map-globe": "1", - "tinycolor2": "1" - }, - "engines": { - "node": ">=12" - }, - "peerDependencies": { - "three": ">=0.154" - } - }, - "node_modules/three-render-objects": { - "version": "1.40.4", - "resolved": "https://registry.npmjs.org/three-render-objects/-/three-render-objects-1.40.4.tgz", - "integrity": "sha512-Ukpu1pei3L5r809izvjsZxwuRcYLiyn6Uvy3lZ9bpMTdvj3i6PeX6w++/hs2ZS3KnEzGjb6YvTvh4UQuwHTDJg==", - "license": "MIT", - "dependencies": { - "@tweenjs/tween.js": "18 - 25", - "accessor-fn": "1", - "float-tooltip": "^1.7", - "kapsule": "^1.16", - "polished": "4" - }, - "engines": { - "node": ">=12" - }, - "peerDependencies": { - "three": ">=0.168" - } - }, - "node_modules/three-slippy-map-globe": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/three-slippy-map-globe/-/three-slippy-map-globe-1.0.4.tgz", - "integrity": "sha512-am8A4PP38AfTdrhXBDucwPRHLTbBl93yhpjIs56K1TLs9VuUWzg68oim4Dibs9QC1riXbj5SoBp/okA1VN9eYg==", - "license": "MIT", - "dependencies": { - "d3-geo": "1 - 3", - "d3-octree": "^1.1", - "d3-scale": "1 - 4" - }, - "engines": { - "node": ">=12" - }, - "peerDependencies": { - "three": ">=0.154" - } - }, - "node_modules/tinycolor2": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/tinycolor2/-/tinycolor2-1.6.0.tgz", - "integrity": "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw==", - "license": "MIT" - }, "node_modules/tinyglobby": { "version": "0.2.15", "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", @@ -4310,20 +3685,6 @@ "url": "https://github.com/sponsors/SuperchupuDev" } }, - "node_modules/topojson-client": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/topojson-client/-/topojson-client-3.1.0.tgz", - "integrity": "sha512-605uxS6bcYxGXw9qi62XyrV6Q3xwbndjachmNxu8HWTtVPxZfEJN9fd/SZS1Q54Sn2y0TMyMxFj/cJINqGHrKw==", - "license": "ISC", - "dependencies": { - "commander": "2" - }, - "bin": { - "topo2geo": "bin/topo2geo", - "topomerge": "bin/topomerge", - "topoquantize": "bin/topoquantize" - } - }, "node_modules/ts-api-utils": { "version": "2.4.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz", @@ -4341,7 +3702,9 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" + "dev": true, + "license": "0BSD", + "optional": true }, "node_modules/type-check": { "version": "0.4.0", @@ -4742,11 +4105,6 @@ "@babel/helper-plugin-utils": "^7.27.1" } }, - "@babel/runtime": { - "version": "7.28.6", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.6.tgz", - "integrity": "sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==" - }, "@babel/template": { "version": "7.28.6", "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz", @@ -4783,11 +4141,6 @@ "@babel/helper-validator-identifier": "^7.28.5" } }, - "@dimforge/rapier3d-compat": { - "version": "0.12.0", - "resolved": "https://registry.npmjs.org/@dimforge/rapier3d-compat/-/rapier3d-compat-0.12.0.tgz", - "integrity": "sha512-uekIGetywIgopfD97oDL5PfeezkFpNhwlzlaEYNOA0N6ghdsOvh/HYjSMek5Q2O1PYvRSDFcqFVJl4r4ZBwOow==" - }, "@esbuild/aix-ppc64": { "version": "0.27.3", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.3.tgz", @@ -5458,42 +4811,6 @@ "tailwindcss": "4.1.18" } }, - "@turf/boolean-point-in-polygon": { - "version": "7.3.4", - "resolved": "https://registry.npmjs.org/@turf/boolean-point-in-polygon/-/boolean-point-in-polygon-7.3.4.tgz", - "integrity": "sha512-v/4hfyY90Vz9cDgs2GwjQf+Lft8o7mNCLJOTz/iv8SHAIgMMX0czEoIaNVOJr7tBqPqwin1CGwsncrkf5C9n8Q==", - "requires": { - "@turf/helpers": "7.3.4", - "@turf/invariant": "7.3.4", - "@types/geojson": "^7946.0.10", - "point-in-polygon-hao": "^1.1.0", - "tslib": "^2.8.1" - } - }, - "@turf/helpers": { - "version": "7.3.4", - "resolved": "https://registry.npmjs.org/@turf/helpers/-/helpers-7.3.4.tgz", - "integrity": "sha512-U/S5qyqgx3WTvg4twaH0WxF3EixoTCfDsmk98g1E3/5e2YKp7JKYZdz0vivsS5/UZLJeZDEElOSFH4pUgp+l7g==", - "requires": { - "@types/geojson": "^7946.0.10", - "tslib": "^2.8.1" - } - }, - "@turf/invariant": { - "version": "7.3.4", - "resolved": "https://registry.npmjs.org/@turf/invariant/-/invariant-7.3.4.tgz", - "integrity": "sha512-88Eo4va4rce9sNZs6XiMJowWkikM3cS2TBhaCKlU+GFHdNf8PFEpiU42VDU8q5tOF6/fu21Rvlke5odgOGW4AQ==", - "requires": { - "@turf/helpers": "7.3.4", - "@types/geojson": "^7946.0.10", - "tslib": "^2.8.1" - } - }, - "@tweenjs/tween.js": { - "version": "23.1.3", - "resolved": "https://registry.npmjs.org/@tweenjs/tween.js/-/tween.js-23.1.3.tgz", - "integrity": "sha512-vJmvvwFxYuGnF2axRtPYocag6Clbb5YS7kLL+SO/TeVFzHqDIWrNKYtcsPMibjDx9O+bu+psAy9NKfWklassUA==" - }, "@types/babel__core": { "version": "7.20.5", "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", @@ -5544,7 +4861,8 @@ "@types/geojson": { "version": "7946.0.16", "resolved": "https://registry.npmjs.org/@types/geojson/-/geojson-7946.0.16.tgz", - "integrity": "sha512-6C8nqWur3j98U6+lXDfTUWIfgvZU+EumvpHKcYjujKH7woYyLj2sUmff0tRhrqM7BohUw7Pz3ZB1jj2gW9Fvmg==" + "integrity": "sha512-6C8nqWur3j98U6+lXDfTUWIfgvZU+EumvpHKcYjujKH7woYyLj2sUmff0tRhrqM7BohUw7Pz3ZB1jj2gW9Fvmg==", + "dev": true }, "@types/json-schema": { "version": "7.0.15", @@ -5552,6 +4870,24 @@ "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", "dev": true }, + "@types/leaflet": { + "version": "1.9.21", + "resolved": "https://registry.npmjs.org/@types/leaflet/-/leaflet-1.9.21.tgz", + "integrity": "sha512-TbAd9DaPGSnzp6QvtYngntMZgcRk+igFELwR2N99XZn7RXUdKgsXMR+28bUO0rPsWp8MIu/f47luLIQuSLYv/w==", + "dev": true, + "requires": { + "@types/geojson": "*" + } + }, + "@types/leaflet.markercluster": { + "version": "1.5.6", + "resolved": "https://registry.npmjs.org/@types/leaflet.markercluster/-/leaflet.markercluster-1.5.6.tgz", + "integrity": "sha512-I7hZjO2+isVXGYWzKxBp8PsCzAYCJBc29qBdFpquOCkS7zFDqUsUvkEOyQHedsk/Cy5tocQzf+Ndorm5W9YKTQ==", + "dev": true, + "requires": { + "@types/leaflet": "^1.9" + } + }, "@types/node": { "version": "24.10.12", "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.12.tgz", @@ -5577,49 +4913,6 @@ "dev": true, "requires": {} }, - "@types/stats.js": { - "version": "0.17.4", - "resolved": "https://registry.npmjs.org/@types/stats.js/-/stats.js-0.17.4.tgz", - "integrity": "sha512-jIBvWWShCvlBqBNIZt0KAshWpvSjhkwkEu4ZUcASoAvhmrgAUI2t1dXrjSL4xXVLB4FznPrIsX3nKXFl/Dt4vA==" - }, - "@types/three": { - "version": "0.182.0", - "resolved": "https://registry.npmjs.org/@types/three/-/three-0.182.0.tgz", - "integrity": "sha512-WByN9V3Sbwbe2OkWuSGyoqQO8Du6yhYaXtXLoA5FkKTUJorZ+yOHBZ35zUUPQXlAKABZmbYp5oAqpA4RBjtJ/Q==", - "requires": { - "@dimforge/rapier3d-compat": "~0.12.0", - "@tweenjs/tween.js": "~23.1.3", - "@types/stats.js": "*", - "@types/webxr": ">=0.5.17", - "@webgpu/types": "*", - "fflate": "~0.8.2", - "meshoptimizer": "~0.22.0" - } - }, - "@types/topojson-client": { - "version": "3.1.5", - "resolved": "https://registry.npmjs.org/@types/topojson-client/-/topojson-client-3.1.5.tgz", - "integrity": "sha512-C79rySTyPxnQNNguTZNI1Ct4D7IXgvyAs3p9HPecnl6mNrJ5+UhvGNYcZfpROYV2lMHI48kJPxwR+F9C6c7nmw==", - "dev": true, - "requires": { - "@types/geojson": "*", - "@types/topojson-specification": "*" - } - }, - "@types/topojson-specification": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@types/topojson-specification/-/topojson-specification-1.0.5.tgz", - "integrity": "sha512-C7KvcQh+C2nr6Y2Ub4YfgvWvWCgP2nOQMtfhlnwsRL4pYmmwzBS7HclGiS87eQfDOU/DLQpX6GEscviaz4yLIQ==", - "dev": true, - "requires": { - "@types/geojson": "*" - } - }, - "@types/webxr": { - "version": "0.5.24", - "resolved": "https://registry.npmjs.org/@types/webxr/-/webxr-0.5.24.tgz", - "integrity": "sha512-h8fgEd/DpoS9CBrjEQXR+dIDraopAEfu4wYVNY2tEPwk60stPWhvZMf4Foo5FakuQ7HFZoa8WceaWFervK2Ovg==" - }, "@typescript-eslint/eslint-plugin": { "version": "8.54.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.54.0.tgz", @@ -5783,16 +5076,6 @@ "react-refresh": "^0.18.0" } }, - "@webgpu/types": { - "version": "0.1.69", - "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.69.tgz", - "integrity": "sha512-RPmm6kgRbI8e98zSD3RVACvnuktIja5+yLgDAkTmxLr90BEwdTXRQWNLF3ETTTyH/8mKhznZuN5AveXYFEsMGQ==" - }, - "accessor-fn": { - "version": "1.5.3", - "resolved": "https://registry.npmjs.org/accessor-fn/-/accessor-fn-1.5.3.tgz", - "integrity": "sha512-rkAofCwe/FvYFUlMB0v0gWmhqtfAtV1IUkdPbfhTUyYniu5LrC0A0UJkTH0Jv3S8SvwkmfuAlY+mQIJATdocMA==" - }, "acorn": { "version": "8.15.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", @@ -5910,11 +5193,6 @@ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "dev": true }, - "commander": { - "version": "2.20.3", - "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", - "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==" - }, "concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -5944,123 +5222,6 @@ "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==", "dev": true }, - "d3-array": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz", - "integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==", - "requires": { - "internmap": "1 - 2" - } - }, - "d3-color": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz", - "integrity": "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==" - }, - "d3-delaunay": { - "version": "6.0.4", - "resolved": "https://registry.npmjs.org/d3-delaunay/-/d3-delaunay-6.0.4.tgz", - "integrity": "sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A==", - "requires": { - "delaunator": "5" - } - }, - "d3-format": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz", - "integrity": "sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg==" - }, - "d3-geo": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/d3-geo/-/d3-geo-3.1.1.tgz", - "integrity": "sha512-637ln3gXKXOwhalDzinUgY83KzNWZRKbYubaG+fGVuc/dxO64RRljtCTnf5ecMyE1RIdtqpkVcq0IbtU2S8j2Q==", - "requires": { - "d3-array": "2.5.0 - 3" - } - }, - "d3-geo-voronoi": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/d3-geo-voronoi/-/d3-geo-voronoi-2.1.0.tgz", - "integrity": "sha512-kqE4yYuOjPbKdBXG0xztCacPwkVSK2REF1opSNrnqqtXJmNcM++UbwQ8SxvwP6IQTj9RvIjjK4qeiVsEfj0Z2Q==", - "requires": { - "d3-array": "3", - "d3-delaunay": "6", - "d3-geo": "3", - "d3-tricontour": "1" - } - }, - "d3-interpolate": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz", - "integrity": "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==", - "requires": { - "d3-color": "1 - 3" - } - }, - "d3-octree": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/d3-octree/-/d3-octree-1.1.0.tgz", - "integrity": "sha512-F8gPlqpP+HwRPMO/8uOu5wjH110+6q4cgJvgJT6vlpy3BEaDIKlTZrgHKZSp/i1InRpVfh4puY/kvL6MxK930A==" - }, - "d3-scale": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz", - "integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==", - "requires": { - "d3-array": "2.10.0 - 3", - "d3-format": "1 - 3", - "d3-interpolate": "1.2.0 - 3", - "d3-time": "2.1.1 - 3", - "d3-time-format": "2 - 4" - } - }, - "d3-scale-chromatic": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz", - "integrity": "sha512-A3s5PWiZ9YCXFye1o246KoscMWqf8BsD9eRiJ3He7C9OBaxKhAd5TFCdEx/7VbKtxxTsu//1mMJFrEt572cEyQ==", - "requires": { - "d3-color": "1 - 3", - "d3-interpolate": "1 - 3" - } - }, - "d3-selection": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", - "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==" - }, - "d3-time": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz", - "integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==", - "requires": { - "d3-array": "2 - 3" - } - }, - "d3-time-format": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz", - "integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==", - "requires": { - "d3-time": "1 - 3" - } - }, - "d3-tricontour": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/d3-tricontour/-/d3-tricontour-1.1.0.tgz", - "integrity": "sha512-G7gHKj89n2owmkGb6WX6ixcnQ0Kf/0wpa9VIh9DGdbHu8wdrlaHU4ir3/bFNERl8N8nn4G7e7qbtBG8N9caihQ==", - "requires": { - "d3-delaunay": "6", - "d3-scale": "4" - } - }, - "data-bind-mapper": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/data-bind-mapper/-/data-bind-mapper-1.0.3.tgz", - "integrity": "sha512-QmU3lyEnbENQPo0M1F9BMu4s6cqNNp8iJA+b/HP2sSb7pf3dxwF3+EP1eO69rwBfH9kFJ1apmzrtogAmVt2/Xw==", - "requires": { - "accessor-fn": "1" - } - }, "debug": { "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", @@ -6076,25 +5237,12 @@ "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", "dev": true }, - "delaunator": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/delaunator/-/delaunator-5.0.1.tgz", - "integrity": "sha512-8nvh+XBe96aCESrGOqMp/84b13H9cdKbG5P2ejQCh4d4sK9RL4371qou9drQjMhvnPmhWl5hnmqbEE0fXr9Xnw==", - "requires": { - "robust-predicates": "^3.0.2" - } - }, "detect-libc": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", "dev": true }, - "earcut": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/earcut/-/earcut-3.0.2.tgz", - "integrity": "sha512-X7hshQbLyMJ/3RPhyObLARM2sNxxmRALLKx1+NVFFnQ9gKzmCrxm9+uLIAdBcvc8FNLpctqlQ2V6AE92Ol9UDQ==" - }, "electron-to-chromium": { "version": "1.5.286", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.286.tgz", @@ -6301,11 +5449,6 @@ "dev": true, "requires": {} }, - "fflate": { - "version": "0.8.2", - "resolved": "https://registry.npmjs.org/fflate/-/fflate-0.8.2.tgz", - "integrity": "sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==" - }, "file-entry-cache": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz", @@ -6341,24 +5484,6 @@ "integrity": "sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==", "dev": true }, - "float-tooltip": { - "version": "1.7.5", - "resolved": "https://registry.npmjs.org/float-tooltip/-/float-tooltip-1.7.5.tgz", - "integrity": "sha512-/kXzuDnnBqyyWyhDMH7+PfP8J/oXiAavGzcRxASOMRHFuReDtofizLLJsf7nnDLAfEaMW4pVWaXrAjtnglpEkg==", - "requires": { - "d3-selection": "2 - 3", - "kapsule": "^1.16", - "preact": "10" - } - }, - "frame-ticker": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/frame-ticker/-/frame-ticker-1.0.3.tgz", - "integrity": "sha512-E0X2u2JIvbEMrqEg5+4BpTqaD22OwojJI63K7MdKHdncjtAhGRbCR8nJCr2vwEt9NWBPCPcu70X9smPviEBy8Q==", - "requires": { - "simplesignal": "^2.1.6" - } - }, "fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -6387,30 +5512,12 @@ "integrity": "sha512-c/c15i26VrJ4IRt5Z89DnIzCGDn9EcebibhAOjw5ibqEHsE1wLUgkPn9RDmNcUKyU87GeaL633nyJ+pplFR2ZQ==", "dev": true }, - "globe.gl": { - "version": "2.45.0", - "resolved": "https://registry.npmjs.org/globe.gl/-/globe.gl-2.45.0.tgz", - "integrity": "sha512-fjkLHVBrnbESkUgklTd4UbcGLciu4nIl49IIi1hclLjI6MU3ASu6JYmf/K5qwPf7I+tNOauQRr4i5Y28JTtHQg==", - "requires": { - "@tweenjs/tween.js": "18 - 25", - "accessor-fn": "1", - "kapsule": "^1.16", - "three": ">=0.154 <1", - "three-globe": "^2.45", - "three-render-objects": "^1.40" - } - }, "graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", "dev": true }, - "h3-js": { - "version": "4.4.0", - "resolved": "https://registry.npmjs.org/h3-js/-/h3-js-4.4.0.tgz", - "integrity": "sha512-DvJh07MhGgY2KcC4OeZc8SSyA+ZXpdvoh6uCzGpoKvWtZxJB+g6VXXC1+eWYkaMIsLz7J/ErhOalHCpcs1KYog==" - }, "has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -6454,16 +5561,6 @@ "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", "dev": true }, - "index-array-by": { - "version": "1.4.2", - "resolved": "https://registry.npmjs.org/index-array-by/-/index-array-by-1.4.2.tgz", - "integrity": "sha512-SP23P27OUKzXWEC/TOyWlwLviofQkCSCKONnc62eItjp69yCZZPqDQtr3Pw5gJDnPeUMqExmKydNZaJO0FU9pw==" - }, - "internmap": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/internmap/-/internmap-2.0.3.tgz", - "integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==" - }, "is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -6485,11 +5582,6 @@ "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", "dev": true }, - "jerrypick": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/jerrypick/-/jerrypick-1.1.2.tgz", - "integrity": "sha512-YKnxXEekXKzhpf7CLYA0A+oDP8V0OhICNCr5lv96FvSsDEmrb0GKM776JgQvHTMjr7DTTPEVv/1Ciaw0uEWzBA==" - }, "jiti": { "version": "2.6.1", "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz", @@ -6499,7 +5591,8 @@ "js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true }, "js-yaml": { "version": "4.1.1", @@ -6540,14 +5633,6 @@ "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true }, - "kapsule": { - "version": "1.16.3", - "resolved": "https://registry.npmjs.org/kapsule/-/kapsule-1.16.3.tgz", - "integrity": "sha512-4+5mNNf4vZDSwPhKprKwz3330iisPrb08JyMgbsdFrimBCKNHecua/WBwvVg3n7vwx0C1ARjfhwIpbrbd9n5wg==", - "requires": { - "lodash-es": "4" - } - }, "keyv": { "version": "4.5.4", "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", @@ -6557,6 +5642,17 @@ "json-buffer": "3.0.1" } }, + "leaflet": { + "version": "1.9.4", + "resolved": "https://registry.npmjs.org/leaflet/-/leaflet-1.9.4.tgz", + "integrity": "sha512-nxS1ynzJOmOlHp+iL3FyWqK89GtNL8U8rvlMOsQdTTssxZwCXh8N2NB3GDQOL+YR3XnWyZAxwQixURb+FA74PA==" + }, + "leaflet.markercluster": { + "version": "1.5.3", + "resolved": "https://registry.npmjs.org/leaflet.markercluster/-/leaflet.markercluster-1.5.3.tgz", + "integrity": "sha512-vPTw/Bndq7eQHjLBVlWpnGeLa3t+3zGiuM7fJwCkiMFq+nmRuG3RI3f7f4N4TDX7T4NpbAXpR2+NTRSEGfCSeA==", + "requires": {} + }, "levn": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", @@ -6673,25 +5769,12 @@ "p-locate": "^5.0.0" } }, - "lodash-es": { - "version": "4.17.23", - "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.23.tgz", - "integrity": "sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==" - }, "lodash.merge": { "version": "4.6.2", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", "dev": true }, - "loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "requires": { - "js-tokens": "^3.0.0 || ^4.0.0" - } - }, "lru-cache": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", @@ -6716,11 +5799,6 @@ "@jridgewell/sourcemap-codec": "^1.5.5" } }, - "meshoptimizer": { - "version": "0.22.0", - "resolved": "https://registry.npmjs.org/meshoptimizer/-/meshoptimizer-0.22.0.tgz", - "integrity": "sha512-IebiK79sqIy+E4EgOr+CAw+Ke8hAspXKzBd0JdgEmPHiAwmvEj2S4h1rfvo+o/BnfEYd/jAOg5IeeIjzlzSnDg==" - }, "minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -6754,11 +5832,6 @@ "integrity": "sha512-nmh3lCkYZ3grZvqcCH+fjmQ7X+H0OeZgP40OierEaAptX4XofMh5kwNbWh7lBduUzCcV/8kZ+NDLCwm2iorIlA==", "dev": true }, - "object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==" - }, "optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", @@ -6824,22 +5897,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true }, - "point-in-polygon-hao": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/point-in-polygon-hao/-/point-in-polygon-hao-1.2.4.tgz", - "integrity": "sha512-x2pcvXeqhRHlNRdhLs/tgFapAbSSe86wa/eqmj1G6pWftbEs5aVRJhRGM6FYSUERKu0PjekJzMq0gsI2XyiclQ==", - "requires": { - "robust-predicates": "^3.0.2" - } - }, - "polished": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/polished/-/polished-4.3.1.tgz", - "integrity": "sha512-OBatVyC/N7SCW/FaDHrSd+vn0o5cS855TOmYi4OkdWUMSJCET/xip//ch8xGUvtr3i44X9LVyWwQlRMTN3pwSA==", - "requires": { - "@babel/runtime": "^7.17.8" - } - }, "postcss": { "version": "8.5.6", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", @@ -6851,27 +5908,12 @@ "source-map-js": "^1.2.1" } }, - "preact": { - "version": "10.28.3", - "resolved": "https://registry.npmjs.org/preact/-/preact-10.28.3.tgz", - "integrity": "sha512-tCmoRkPQLpBeWzpmbhryairGnhW9tKV6c6gr/w+RhoRoKEJwsjzipwp//1oCpGPOchvSLaAPlpcJi9MwMmoPyA==" - }, "prelude-ls": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", "dev": true }, - "prop-types": { - "version": "15.8.1", - "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", - "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==", - "requires": { - "loose-envify": "^1.4.0", - "object-assign": "^4.1.1", - "react-is": "^16.13.1" - } - }, "punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", @@ -6891,29 +5933,6 @@ "scheduler": "^0.27.0" } }, - "react-globe.gl": { - "version": "2.37.0", - "resolved": "https://registry.npmjs.org/react-globe.gl/-/react-globe.gl-2.37.0.tgz", - "integrity": "sha512-nN1FDOJBhFvWfKrOY0SnkDuA8wk9FSTBN0HFfAdTqqcFM5R+OXBIxK0BM6t8n3oNVYpEJVxEzjYFwLyk4BC7Cw==", - "requires": { - "globe.gl": "^2.45", - "prop-types": "15", - "react-kapsule": "^2.5" - } - }, - "react-is": { - "version": "16.13.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", - "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==" - }, - "react-kapsule": { - "version": "2.5.7", - "resolved": "https://registry.npmjs.org/react-kapsule/-/react-kapsule-2.5.7.tgz", - "integrity": "sha512-kifAF4ZPD77qZKc4CKLmozq6GY1sBzPEJTIJb0wWFK6HsePJatK3jXplZn2eeAt3x67CDozgi7/rO8fNQ/AL7A==", - "requires": { - "jerrypick": "^1.1.1" - } - }, "react-refresh": { "version": "0.18.0", "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.18.0.tgz", @@ -6926,11 +5945,6 @@ "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", "dev": true }, - "robust-predicates": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.2.tgz", - "integrity": "sha512-IXgzBWvWQwE6PrDI05OvmXUIruQTcoMDzRsOd5CDvHCVLcLHMTSYvOK5Cm46kWqlV3yAbuSpBZdJ5oP5OUoStg==" - }, "rollup": { "version": "4.57.1", "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.57.1.tgz", @@ -6992,11 +6006,6 @@ "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", "dev": true }, - "simplesignal": { - "version": "2.1.7", - "resolved": "https://registry.npmjs.org/simplesignal/-/simplesignal-2.1.7.tgz", - "integrity": "sha512-PEo2qWpUke7IMhlqiBxrulIFvhJRLkl1ih52Rwa+bPjzhJepcd4GIjn2RiQmFSx3dQvsEAgF0/lXMwMN7vODaA==" - }, "source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", @@ -7035,86 +6044,6 @@ "integrity": "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==", "dev": true }, - "three": { - "version": "0.182.0", - "resolved": "https://registry.npmjs.org/three/-/three-0.182.0.tgz", - "integrity": "sha512-GbHabT+Irv+ihI1/f5kIIsZ+Ef9Sl5A1Y7imvS5RQjWgtTPfPnZ43JmlYI7NtCRDK9zir20lQpfg8/9Yd02OvQ==" - }, - "three-conic-polygon-geometry": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/three-conic-polygon-geometry/-/three-conic-polygon-geometry-2.1.2.tgz", - "integrity": "sha512-NaP3RWLJIyPGI+zyaZwd0Yj6rkoxm4FJHqAX1Enb4L64oNYLCn4bz1ESgOEYavgcUwCNYINu1AgEoUBJr1wZcA==", - "requires": { - "@turf/boolean-point-in-polygon": "^7.2", - "d3-array": "1 - 3", - "d3-geo": "1 - 3", - "d3-geo-voronoi": "2", - "d3-scale": "1 - 4", - "delaunator": "5", - "earcut": "3" - } - }, - "three-geojson-geometry": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/three-geojson-geometry/-/three-geojson-geometry-2.1.1.tgz", - "integrity": "sha512-dC7bF3ri1goDcihYhzACHOBQqu7YNNazYLa2bSydVIiJUb3jDFojKSy+gNj2pMkqZNSVjssSmdY9zlmnhEpr1w==", - "requires": { - "d3-geo": "1 - 3", - "d3-interpolate": "1 - 3", - "earcut": "3" - } - }, - "three-globe": { - "version": "2.45.0", - "resolved": "https://registry.npmjs.org/three-globe/-/three-globe-2.45.0.tgz", - "integrity": "sha512-Ur6BVkezvmHnvsEg8fbq6gIscSZtknSQMWwDRbiJ95o6OSDjDbGTc4oO6nP7mOM9aAA3YrF7YZyOwSkP4T56QA==", - "requires": { - "@tweenjs/tween.js": "18 - 25", - "accessor-fn": "1", - "d3-array": "3", - "d3-color": "3", - "d3-geo": "3", - "d3-interpolate": "3", - "d3-scale": "4", - "d3-scale-chromatic": "3", - "data-bind-mapper": "1", - "frame-ticker": "1", - "h3-js": "4", - "index-array-by": "1", - "kapsule": "^1.16", - "three-conic-polygon-geometry": "2", - "three-geojson-geometry": "2", - "three-slippy-map-globe": "1", - "tinycolor2": "1" - } - }, - "three-render-objects": { - "version": "1.40.4", - "resolved": "https://registry.npmjs.org/three-render-objects/-/three-render-objects-1.40.4.tgz", - "integrity": "sha512-Ukpu1pei3L5r809izvjsZxwuRcYLiyn6Uvy3lZ9bpMTdvj3i6PeX6w++/hs2ZS3KnEzGjb6YvTvh4UQuwHTDJg==", - "requires": { - "@tweenjs/tween.js": "18 - 25", - "accessor-fn": "1", - "float-tooltip": "^1.7", - "kapsule": "^1.16", - "polished": "4" - } - }, - "three-slippy-map-globe": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/three-slippy-map-globe/-/three-slippy-map-globe-1.0.4.tgz", - "integrity": "sha512-am8A4PP38AfTdrhXBDucwPRHLTbBl93yhpjIs56K1TLs9VuUWzg68oim4Dibs9QC1riXbj5SoBp/okA1VN9eYg==", - "requires": { - "d3-geo": "1 - 3", - "d3-octree": "^1.1", - "d3-scale": "1 - 4" - } - }, - "tinycolor2": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/tinycolor2/-/tinycolor2-1.6.0.tgz", - "integrity": "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw==" - }, "tinyglobby": { "version": "0.2.15", "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", @@ -7125,14 +6054,6 @@ "picomatch": "^4.0.3" } }, - "topojson-client": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/topojson-client/-/topojson-client-3.1.0.tgz", - "integrity": "sha512-605uxS6bcYxGXw9qi62XyrV6Q3xwbndjachmNxu8HWTtVPxZfEJN9fd/SZS1Q54Sn2y0TMyMxFj/cJINqGHrKw==", - "requires": { - "commander": "2" - } - }, "ts-api-utils": { "version": "2.4.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz", @@ -7143,7 +6064,9 @@ "tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==" + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "dev": true, + "optional": true }, "type-check": { "version": "0.4.0", diff --git a/package.json b/package.json index cd3847b..1820207 100644 --- a/package.json +++ b/package.json @@ -26,24 +26,22 @@ "docker:run": "sh -c 'if command -v docker-compose >/dev/null 2>&1; then docker-compose up --build; else docker compose up --build; fi'" }, "dependencies": { - "@types/three": "^0.182.0", "clsx": "^2.1.1", + "leaflet": "^1.9.4", + "leaflet.markercluster": "^1.5.3", "lucide-react": "^0.563.0", "react": "^19.2.0", "react-dom": "^19.2.0", - "react-globe.gl": "^2.37.0", - "tailwind-merge": "^3.4.0", - "three": "^0.182.0", - "topojson-client": "^3.1.0" + "tailwind-merge": "^3.4.0" }, "devDependencies": { "@eslint/js": "^9.39.1", "@tailwindcss/vite": "^4.1.18", + "@types/leaflet": "^1.9.21", + "@types/leaflet.markercluster": "^1.5.6", "@types/node": "^24.10.1", "@types/react": "^19.2.5", "@types/react-dom": "^19.2.3", - "@types/topojson-client": "^3.1.5", - "@types/topojson-specification": "^1.0.5", "@vitejs/plugin-react": "^5.1.1", "eslint": "^9.39.1", "eslint-plugin-react-hooks": "^7.0.1", diff --git a/public/favicon-globe.svg b/public/favicon-globe.svg deleted file mode 100644 index 67eeaab..0000000 --- a/public/favicon-globe.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/public/favicon-shield.svg b/public/favicon-shield.svg new file mode 100644 index 0000000..af0fd9e --- /dev/null +++ b/public/favicon-shield.svg @@ -0,0 +1,25 @@ + + + + + + + + + + + + diff --git a/registry/category_dictionary.json b/registry/category_dictionary.json new file mode 100644 index 0000000..6ce3447 --- /dev/null +++ b/registry/category_dictionary.json @@ -0,0 +1,250 @@ +{ + "categories": { + "missing_person": { + "default": { + "strong": [ + "missing person", + "missing child", + "amber alert", + "silver alert", + "disappeared person", + "person missing", + "lost child" + ], + "negative": [ + "calendar", + "history", + "institution", + "policy", + "transparency", + "certification", + "department", + "press room" + ], + "url_hints": [ + "/missing", + "/missing-person", + "/missing-children", + "/amber-alert" + ] + }, + "languages": { + "es": { + "strong": ["desaparec", "persona desaparecida", "menor desaparecido", "no localizado", "ubicanos"], + "negative": ["calendario", "historia", "institucion", "transparencia", "certificacion", "oficinas"], + "url_hints": ["/desaparecid", "/desaparec", "/ubicanos"] + }, + "ca": { + "strong": ["persona desapareguda", "desaparegut", "desapareguda", "menor desaparegut"], + "negative": ["calendari", "historia", "institucio", "transparencia"], + "url_hints": ["/desapareg"] + }, + "fr": { + "strong": ["personne disparue", "enfant disparu", "alerte enlèvement", "disparu", "disparue"], + "negative": ["calendrier", "historique", "institution", "transparence"], + "url_hints": ["/disparu", "/disparue"] + }, + "de": { + "strong": ["vermisst", "vermisste person", "vermisstes kind"], + "negative": ["kalender", "geschichte", "institution", "transparenz"], + "url_hints": ["/vermisst"] + }, + "it": { + "strong": ["persona scomparsa", "minore scomparso", "scomparso", "scomparsa"], + "negative": ["calendario", "storia", "istituzione", "trasparenza"], + "url_hints": ["/scompars"] + }, + "pt": { + "strong": ["pessoa desaparecida", "crianca desaparecida", "desaparecido", "desaparecida"], + "negative": ["calendario", "historia", "institucional", "transparencia"], + "url_hints": ["/desaparecid"] + }, + "nl": { + "strong": ["vermiste persoon", "vermist kind", "vermist"], + "negative": ["kalender", "geschiedenis", "instelling", "transparantie"], + "url_hints": ["/vermist"] + }, + "pl": { + "strong": ["osoba zaginiona", "zaginione dziecko", "zaginiony", "zaginiona"], + "url_hints": ["/zagin"] + }, + "ru": { + "strong": ["пропал", "пропала", "без вести", "разыскивается ребенок"], + "url_hints": ["/проп", "/missing"] + }, + "uk": { + "strong": ["зниклий", "зникла", "безвісти", "розшук дитини"], + "url_hints": ["/зник"] + }, + "ar": { + "strong": ["مفقود", "طفل مفقود", "مفقودة"], + "url_hints": ["/مفقود"] + }, + "tr": { + "strong": ["kayıp kişi", "kayıp çocuk", "kayıp"], + "url_hints": ["/kayip", "/kayıp"] + }, + "el": { + "strong": ["αγνοούμενος", "αγνοούμενη", "εξαφανισμένος", "εξαφανισμένη"], + "url_hints": ["/αγνο"] + }, + "zh": { + "strong": ["失踪", "失蹤", "走失", "寻人", "尋人"], + "url_hints": ["/missing", "/尋人", "/寻人"] + }, + "ja": { + "strong": ["行方不明", "失踪", "捜索願", "捜しています"], + "url_hints": ["/missing", "/行方不明"] + }, + "ko": { + "strong": ["실종", "실종자", "실종아동"], + "url_hints": ["/실종"] + }, + "hi": { + "strong": ["लापता", "गुमशुदा", "लापता व्यक्ति", "लापता बच्चा"], + "url_hints": ["/missing"] + } + } + }, + "wanted_suspect": { + "default": { + "strong": [ + "wanted", + "most wanted", + "wanted suspect", + "fugitive", + "manhunt", + "public appeal" + ], + "negative": [ + "calendar", + "history", + "institution", + "policy", + "transparency", + "department" + ], + "url_hints": [ + "/wanted", + "/fugitive", + "/most-wanted" + ] + }, + "languages": { + "es": { + "strong": ["se busca", "buscado", "buscada", "profugo", "prófugo", "persona de interes policial"], + "negative": ["calendario", "historia", "institucion", "transparencia"], + "url_hints": ["/buscad", "/se-busca", "/interes-policial"] + }, + "ca": { + "strong": ["es busca", "buscat", "buscada", "fugitiu"], + "url_hints": ["/busca"] + }, + "fr": { + "strong": ["recherché", "recherche", "fugitif", "avis de recherche"], + "url_hints": ["/recherche", "/recherchee"] + }, + "de": { + "strong": ["gesucht", "fahndung", "flüchtig", "fluechtig"], + "url_hints": ["/gesucht", "/fahndung"] + }, + "it": { + "strong": ["ricercato", "ricercata", "latitante", "ricercati"], + "url_hints": ["/ricercat"] + }, + "pt": { + "strong": ["procurado", "procurada", "foragido", "suspeito procurado"], + "url_hints": ["/procurad", "/foragid"] + }, + "nl": { + "strong": ["gezocht", "voortvluchtig", "opgespoord"], + "url_hints": ["/gezocht"] + }, + "pl": { + "strong": ["poszukiwany", "poszukiwana", "zbieg"], + "url_hints": ["/poszukiw"] + }, + "ru": { + "strong": ["разыскивается", "в розыске", "беглец"], + "url_hints": ["/розыск", "/разыск"] + }, + "uk": { + "strong": ["розшукується", "у розшуку", "втікач"], + "url_hints": ["/розшук"] + }, + "ar": { + "strong": ["مطلوب", "هارب", "مشتبه به مطلوب"], + "url_hints": ["/مطلوب"] + }, + "tr": { + "strong": ["aranıyor", "aranan", "firari"], + "url_hints": ["/aran", "/firari"] + }, + "zh": { + "strong": ["通缉", "通緝", "在逃", "追缉", "追緝"], + "url_hints": ["/wanted", "/通缉", "/通緝"] + }, + "ja": { + "strong": ["指名手配", "逃走中", "公開手配"], + "url_hints": ["/wanted", "/指名手配"] + }, + "ko": { + "strong": ["지명수배", "수배", "도주범"], + "url_hints": ["/wanted", "/수배"] + }, + "hi": { + "strong": ["वांछित", "फरार", "मोस्ट वांटेड"], + "url_hints": ["/wanted"] + } + } + }, + "travel_warning": { + "default": { + "strong": ["travel warning", "do not travel", "reconsider travel", "advisory against travel"], + "url_hints": ["/travel", "/advisory"] + }, + "languages": { + "es": { "strong": ["aviso de viaje", "no viajar", "evite viajar"], "url_hints": ["/viaje"] }, + "fr": { "strong": ["conseil aux voyageurs", "ne pas voyager"], "url_hints": ["/voyage"] }, + "de": { "strong": ["reisewarnung", "nicht reisen"], "url_hints": ["/reise"] }, + "it": { "strong": ["avviso di viaggio", "non viaggiare"], "url_hints": ["/viaggio"] }, + "pt": { "strong": ["aviso de viagem", "nao viajar", "não viajar"], "url_hints": ["/viagem"] } + } + }, + "fraud_alert": { + "default": { + "strong": ["fraud alert", "scam alert", "phishing alert", "consumer warning"], + "url_hints": ["/fraud", "/scam", "/phishing"] + }, + "languages": { + "es": { "strong": ["alerta de fraude", "estafa", "phishing", "fraude"], "url_hints": ["/fraude", "/estafa"] }, + "fr": { "strong": ["alerte fraude", "escroquerie", "hameconnage"], "url_hints": ["/fraude", "/escroquerie"] }, + "de": { "strong": ["betrugswarnung", "phishing", "betrug"], "url_hints": ["/betrug", "/phishing"] }, + "pt": { "strong": ["alerta de fraude", "golpe", "phishing"], "url_hints": ["/fraude", "/golpe"] } + } + }, + "cyber_advisory": { + "default": { + "strong": ["security advisory", "vulnerability", "cve-", "patch", "exploit", "security bulletin"], + "url_hints": ["/advisory", "/alert", "/bulletin"] + }, + "languages": { + "es": { "strong": ["aviso de seguridad", "vulnerabilidad", "boletin de seguridad", "boletín de seguridad"] }, + "fr": { "strong": ["avis de securite", "avis de sécurité", "vulnerabilite", "vulnérabilité"] }, + "de": { "strong": ["sicherheitswarnung", "sicherheitsmeldung", "schwachstelle"] }, + "pt": { "strong": ["aviso de seguranca", "aviso de segurança", "vulnerabilidade"] } + } + }, + "public_appeal": { + "default": { + "strong": ["public appeal", "seeking information", "help identify", "report information"], + "url_hints": ["/appeal", "/seeking-information"] + }, + "languages": { + "es": { "strong": ["solicita informacion", "solicita información", "ayude a identificar", "informacion a la ciudadania"], "url_hints": ["/informacion"] }, + "fr": { "strong": ["appel a temoins", "appel à témoins", "recherche de renseignements"], "url_hints": ["/appel"] }, + "de": { "strong": ["zeugenaufruf", "hinweise erbeten", "hinweis gesucht"], "url_hints": ["/zeugen"] } + } + } + } +} diff --git a/registry/curated_agencies.seed.json b/registry/curated_agencies.seed.json new file mode 100644 index 0000000..83cdc5e --- /dev/null +++ b/registry/curated_agencies.seed.json @@ -0,0 +1,333 @@ +[ + { + "type": "interpol-red-json", + "fetch_mode": "browser", + "source": { + "source_id": "interpol-notices", + "authority_name": "INTERPOL Notices", + "country": "France", + "country_code": "FR", + "region": "International", + "authority_type": "police", + "base_url": "https://www.interpol.int", + "scope": "international", + "is_curated": true, + "is_high_value": true, + "language_code": "en" + }, + "feed_url": "https://ws-public.interpol.int/notices/v1/red?page=1&resultPerPage=160", + "category": "wanted_suspect", + "region_tag": "INT", + "lat": 45.764, + "lng": 4.8357, + "reporting": { + "label": "Browse INTERPOL Notices", + "url": "https://www.interpol.int/How-we-work/Notices/View-Red-Notices" + } + }, + { + "type": "interpol-yellow-json", + "fetch_mode": "browser", + "source": { + "source_id": "interpol-missing", + "authority_name": "INTERPOL Missing Persons", + "country": "France", + "country_code": "FR", + "region": "International", + "authority_type": "police", + "base_url": "https://www.interpol.int", + "scope": "international", + "parent_agency_id": "interpol-notices", + "jurisdiction_name": "INTERPOL Missing Persons", + "is_curated": true, + "is_high_value": true, + "language_code": "en" + }, + "feed_url": "https://ws-public.interpol.int/notices/v1/yellow?page=1&resultPerPage=160", + "category": "missing_person", + "region_tag": "INT", + "lat": 45.764, + "lng": 4.8357, + "reporting": { + "label": "Browse INTERPOL Yellow Notices", + "url": "https://www.interpol.int/How-we-work/Notices/View-Yellow-Notices" + } + }, + { + "type": "rss", + "source": { + "source_id": "europol", + "authority_name": "Europol", + "country": "Netherlands", + "country_code": "NL", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.europol.europa.eu", + "scope": "supranational", + "is_curated": true, + "is_high_value": true, + "language_code": "en" + }, + "feed_url": "https://www.europol.europa.eu/rss.xml", + "category": "public_appeal", + "region_tag": "EU", + "lat": 52.09, + "lng": 4.27, + "reporting": { + "label": "Report to Europol", + "url": "https://www.europol.europa.eu/report-a-crime" + } + }, + { + "type": "rss", + "source": { + "source_id": "fbi-wanted", + "authority_name": "FBI Wanted", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.fbi.gov", + "scope": "national", + "is_curated": true, + "is_high_value": true, + "language_code": "en" + }, + "feed_url": "https://www.fbi.gov/wanted", + "category": "wanted_suspect", + "region_tag": "US", + "lat": 38.9, + "lng": -77.0, + "reporting": { + "label": "Submit a Tip to FBI", + "url": "https://tips.fbi.gov/" + } + }, + { + "type": "rss", + "source": { + "source_id": "fbi-seeking-info", + "authority_name": "FBI Seeking Information", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.fbi.gov", + "scope": "national", + "parent_agency_id": "fbi-wanted", + "jurisdiction_name": "FBI Public Appeals", + "is_curated": true, + "is_high_value": true, + "language_code": "en" + }, + "feed_url": "https://www.fbi.gov/wanted/seeking-information", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.9, + "lng": -77.0, + "reporting": { + "label": "Submit a Tip to FBI", + "url": "https://tips.fbi.gov/" + } + }, + { + "type": "rss", + "source": { + "source_id": "dea", + "authority_name": "Drug Enforcement Administration", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.dea.gov", + "scope": "national", + "is_curated": true, + "is_high_value": true, + "language_code": "en" + }, + "feed_url": "https://www.dea.gov/press-releases/rss.xml", + "category": "public_safety", + "region_tag": "US", + "lat": 38.9072, + "lng": -77.0369, + "reporting": { + "label": "Report to DEA", + "url": "https://www.dea.gov/submit-tip" + } + }, + { + "type": "rss", + "source": { + "source_id": "doj", + "authority_name": "U.S. Department of Justice", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "regulatory", + "base_url": "https://www.justice.gov", + "scope": "national", + "is_curated": true, + "is_high_value": true, + "language_code": "en" + }, + "feed_url": "https://www.justice.gov/feeds/justice-news.xml", + "category": "public_safety", + "region_tag": "US", + "lat": 38.8951, + "lng": -77.0364, + "reporting": { + "label": "Contact DOJ", + "url": "https://www.justice.gov/doj/webform/your-message-department-justice" + } + }, + { + "type": "kev-json", + "source": { + "source_id": "cisa-kev", + "authority_name": "CISA", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "cert", + "base_url": "https://www.cisa.gov", + "scope": "national", + "is_curated": true, + "is_high_value": true, + "language_code": "en" + }, + "feed_url": "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json", + "category": "cyber_advisory", + "region_tag": "US", + "lat": 38.88, + "lng": -77.02, + "reporting": { + "label": "Report to CISA", + "url": "https://www.cisa.gov/report" + } + }, + { + "type": "rss", + "source": { + "source_id": "ncmec", + "authority_name": "National Center for Missing & Exploited Children", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "public_safety_program", + "base_url": "https://www.missingkids.org", + "scope": "national", + "is_curated": true, + "is_high_value": true, + "language_code": "en" + }, + "feed_url": "https://api.missingkids.org/missingkids/servlet/XmlServlet?LanguageCountry=en_US&act=rss&orgPrefix=NCMC", + "category": "missing_person", + "region_tag": "US", + "lat": 39.83, + "lng": -98.58, + "reporting": { + "label": "Report to NCMEC", + "url": "https://report.cybertip.org/" + } + }, + { + "type": "rss", + "source": { + "source_id": "bka", + "authority_name": "Bundeskriminalamt", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.bka.de", + "scope": "national", + "is_curated": true, + "is_high_value": true, + "language_code": "de" + }, + "feed_url": "https://www.bka.de/SiteGlobals/Functions/RSSFeed/RSSGenerator.xml", + "category": "wanted_suspect", + "region_tag": "DE", + "lat": 50.1109, + "lng": 8.6821, + "reporting": { + "label": "Contact BKA", + "url": "https://www.bka.de" + } + }, + { + "type": "rss", + "source": { + "source_id": "bnd", + "authority_name": "Federal Intelligence Service (BND)", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.bnd.bund.de", + "scope": "national", + "is_curated": true, + "is_high_value": true, + "language_code": "de" + }, + "feed_url": "https://www.bnd.bund.de/EN/_rssfeed/rss_node.html", + "category": "public_safety", + "region_tag": "DE", + "lat": 52.52, + "lng": 13.405, + "reporting": { + "label": "Contact BND", + "url": "https://www.bnd.bund.de" + } + }, + { + "type": "rss", + "source": { + "source_id": "nca-uk", + "authority_name": "National Crime Agency", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "police", + "base_url": "https://nationalcrimeagency.gov.uk", + "scope": "national", + "is_curated": true, + "is_high_value": true, + "language_code": "en" + }, + "feed_url": "https://nationalcrimeagency.gov.uk/news?format=feed&type=rss", + "category": "public_safety", + "region_tag": "GB", + "lat": 51.5074, + "lng": -0.1278, + "reporting": { + "label": "Report to NCA", + "url": "https://www.nationalcrimeagency.gov.uk/contact-us" + } + }, + { + "type": "rss", + "source": { + "source_id": "ncsc-uk", + "authority_name": "NCSC UK", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.ncsc.gov.uk", + "scope": "national", + "is_curated": true, + "is_high_value": true, + "language_code": "en" + }, + "feed_url": "https://www.ncsc.gov.uk/api/1/services/v1/all-rss-feed.xml", + "category": "cyber_advisory", + "region_tag": "GB", + "lat": 51.5, + "lng": -0.13, + "reporting": { + "label": "Report to NCSC", + "url": "https://www.ncsc.gov.uk/section/about-this-website/report-scam-website" + } + } +] diff --git a/registry/source_candidates.json b/registry/source_candidates.json new file mode 100644 index 0000000..2eaaa74 --- /dev/null +++ b/registry/source_candidates.json @@ -0,0 +1,4 @@ +{ + "generated_at": "", + "sources": [] +} diff --git a/registry/source_registry.json b/registry/source_registry.json index c5dc9a9..64cf4d5 100644 --- a/registry/source_registry.json +++ b/registry/source_registry.json @@ -19,52 +19,103 @@ "label": "Report to CISA", "url": "https://www.cisa.gov/report", "notes": "Use 911 for emergencies." - } + }, + "max_items": 15 }, { - "type": "rss", + "type": "fbi-wanted-json", "source": { - "source_id": "fbi", - "authority_name": "FBI", + "source_id": "fbi-wanted", + "authority_name": "FBI Wanted", "country": "United States", "country_code": "US", "region": "North America", "authority_type": "police", "base_url": "https://www.fbi.gov" }, - "feed_url": "https://www.fbi.gov/feeds/fbi-top-stories/rss.xml", - "category": "public_appeal", + "feed_url": "https://api.fbi.gov/wanted/v1/list?posterClassification=default", + "category": "wanted_suspect", "region_tag": "US", "lat": 38.9, "lng": -77, + "max_items": 100, "reporting": { - "label": "Report to FBI", + "label": "Submit a Tip to FBI", "url": "https://tips.fbi.gov/", "phone": "1-800-CALL-FBI (1-800-225-5324)", "notes": "Use 911 for emergencies." } }, { - "type": "rss", + "type": "fbi-wanted-json", "source": { - "source_id": "fbi-wanted", - "authority_name": "FBI Wanted", + "source_id": "fbi-mostwanted", + "authority_name": "FBI Ten Most Wanted", "country": "United States", "country_code": "US", "region": "North America", "authority_type": "police", "base_url": "https://www.fbi.gov" }, - "feed_url": "https://www.fbi.gov/feeds/all-wanted/rss.xml", + "feed_url": "https://api.fbi.gov/wanted/v1/list?posterClassification=ten", "category": "wanted_suspect", "region_tag": "US", - "lat": 38.9, - "lng": -77, + "lat": 38.89, + "lng": -77.02, + "max_items": 20, + "reporting": { + "label": "Report Sighting to FBI", + "url": "https://tips.fbi.gov/", + "phone": "1-800-CALL-FBI (1-800-225-5324)", + "notes": "Do NOT attempt to apprehend. Call 911 immediately if in danger." + } + }, + { + "type": "fbi-wanted-json", + "source": { + "source_id": "fbi-seeking", + "authority_name": "FBI Seeking Info", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.fbi.gov" + }, + "feed_url": "https://api.fbi.gov/wanted/v1/list?posterClassification=seeking-info", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.91, + "lng": -77.01, + "max_items": 60, "reporting": { "label": "Submit a Tip to FBI", "url": "https://tips.fbi.gov/", "phone": "1-800-CALL-FBI (1-800-225-5324)", - "notes": "Use 911 for emergencies." + "notes": "The FBI is seeking the public's assistance. If you have information, submit a tip." + } + }, + { + "type": "fbi-wanted-json", + "source": { + "source_id": "fbi-kidnappings", + "authority_name": "FBI Parental Kidnappings", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.fbi.gov" + }, + "feed_url": "https://api.fbi.gov/wanted/v1/list?posterClassification=kidnappings", + "category": "missing_person", + "region_tag": "US", + "lat": 38.88, + "lng": -76.99, + "max_items": 40, + "reporting": { + "label": "Report to FBI", + "url": "https://tips.fbi.gov/", + "phone": "1-800-CALL-FBI (1-800-225-5324)", + "notes": "If you have information about a parental kidnapping case, submit a tip." } }, { @@ -79,7 +130,7 @@ "authority_type": "police", "base_url": "https://www.europol.europa.eu" }, - "feed_url": "https://www.europol.europa.eu/rss.xml", + "feed_url": "https://www.europol.europa.eu/cms/api/rss/news", "category": "public_appeal", "region_tag": "EU", "lat": 52.09, @@ -108,7 +159,8 @@ "reporting": { "label": "Report to NCSC", "url": "https://www.ncsc.gov.uk/section/about-this-website/report-scam-website" - } + }, + "max_items": 15 }, { "type": "rss", @@ -129,7 +181,8 @@ "reporting": { "label": "Report to NCSC", "url": "https://www.ncsc.gov.uk/section/about-this-website/report-scam-website" - } + }, + "max_items": 15 }, { "type": "rss", @@ -220,7 +273,8 @@ "phone": "1-866-787-4722", "email": "soc@cisecurity.org", "notes": "24/7 Security Operations Center for state, local, tribal, and territorial governments." - } + }, + "max_items": 15 }, { "type": "rss", @@ -264,7 +318,8 @@ "label": "Report to CERT-FR", "url": "https://www.cert.ssi.gouv.fr/contact/", "email": "cert-fr@ssi.gouv.fr" - } + }, + "max_items": 15 }, { "type": "rss", @@ -286,7 +341,8 @@ "label": "Report to NCSC-NL", "url": "https://www.ncsc.nl/contact/kwetsbaarheid-melden", "email": "cert@ncsc.nl" - } + }, + "max_items": 15 }, { "type": "rss", @@ -308,7 +364,8 @@ "label": "Report to JPCERT/CC", "url": "https://www.jpcert.or.jp/english/ir/form.html", "email": "info@jpcert.or.jp" - } + }, + "max_items": 15 }, { "type": "rss", @@ -355,10 +412,12 @@ "phone": "1-888-282-0870", "email": "central@cisa.dhs.gov", "notes": "Use 911 for emergencies." - } + }, + "max_items": 15 }, { - "type": "rss", + "type": "html-list", + "fetch_mode": "browser", "followRedirects": true, "source": { "source_id": "dhs", @@ -369,7 +428,7 @@ "authority_type": "national_security", "base_url": "https://www.dhs.gov" }, - "feed_url": "https://www.dhs.gov/news/rss.xml", + "feed_url": "https://www.dhs.gov/news-releases/press-releases", "category": "public_safety", "region_tag": "US", "lat": 38.886, @@ -378,7 +437,25 @@ "label": "Report to DHS", "url": "https://www.dhs.gov/see-something-say-something/how-to-report-suspicious-activity", "phone": "1-866-347-2423" - } + }, + "feed_urls": [ + "https://www.dhs.gov/news-releases/press-releases", + "https://www.dhs.gov/news" + ], + "max_items": 80, + "include_keywords": [ + "cyber", + "terror", + "security", + "incident", + "threat", + "seizure" + ], + "exclude_keywords": [ + "subscribe", + "blog", + "speeches" + ] }, { "type": "rss", @@ -404,7 +481,8 @@ } }, { - "type": "rss", + "type": "html-list", + "fetch_mode": "browser", "followRedirects": true, "source": { "source_id": "dea", @@ -415,7 +493,7 @@ "authority_type": "police", "base_url": "https://www.dea.gov" }, - "feed_url": "https://www.dea.gov/press-releases/rss.xml", + "feed_url": "https://www.dea.gov/what-we-do/news/press-releases", "category": "public_appeal", "region_tag": "US", "lat": 38.871, @@ -424,10 +502,27 @@ "label": "Report to DEA", "url": "https://www.dea.gov/submit-tip", "phone": "1-877-792-2873" - } + }, + "feed_urls": [ + "https://www.dea.gov/what-we-do/news/press-releases" + ], + "max_items": 80, + "include_keywords": [ + "arrest", + "fentanyl", + "drug", + "traffick", + "seiz", + "operation" + ], + "exclude_keywords": [ + "submit a tip", + "subscribe" + ] }, { - "type": "rss", + "type": "html-list", + "fetch_mode": "browser", "followRedirects": true, "source": { "source_id": "atf", @@ -438,7 +533,7 @@ "authority_type": "police", "base_url": "https://www.atf.gov" }, - "feed_url": "https://www.atf.gov/news/rss.xml", + "feed_url": "https://www.atf.gov/news/press-releases", "category": "public_appeal", "region_tag": "US", "lat": 38.893, @@ -448,6 +543,77 @@ "url": "https://www.atf.gov/contact/atf-tips", "phone": "1-888-283-8477", "email": "atftips@atf.gov" + }, + "feed_urls": [ + "https://www.atf.gov/news/press-releases" + ], + "max_items": 80, + "include_keywords": [ + "sentenced", + "arrest", + "firearm", + "explosive", + "gang", + "trafficking" + ], + "exclude_keywords": [ + "subscribe for atf updates" + ] + }, + { + "type": "html-list", + "fetch_mode": "browser", + "followRedirects": true, + "source": { + "source_id": "dea-fugitives", + "authority_name": "DEA Fugitives", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.dea.gov" + }, + "feed_url": "https://www.dea.gov/fugitives", + "category": "wanted_suspect", + "region_tag": "US", + "lat": 38.87, + "lng": -77.06, + "max_items": 60, + "reporting": { + "label": "Report to DEA", + "url": "https://www.dea.gov/submit-tip", + "phone": "1-877-792-2873" + }, + "include_keywords": [ + "wanted", + "fugitive", + "reward" + ] + }, + { + "type": "html-list", + "fetch_mode": "browser", + "followRedirects": true, + "source": { + "source_id": "usms-mostwanted", + "authority_name": "US Marshals Most Wanted", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.usmarshals.gov" + }, + "feed_url": "https://www.usmarshals.gov/what-we-do/fugitive-investigations/15-most-wanted", + "category": "wanted_suspect", + "region_tag": "US", + "lat": 38.9, + "lng": -77.03, + "max_items": 20, + "reporting": { + "label": "Report to US Marshals", + "url": "https://www.usmarshals.gov/tips", + "phone": "1-877-926-8332", + "notes": "Do NOT attempt to apprehend. Call 911 immediately if in danger." } }, { @@ -520,53 +686,7 @@ } }, { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "met-police-uk", - "authority_name": "Met Police UK", - "country": "United Kingdom", - "country_code": "GB", - "region": "Europe", - "authority_type": "police", - "base_url": "https://news.met.police.uk" - }, - "feed_url": "https://news.met.police.uk/feeds/rss", - "category": "public_appeal", - "region_tag": "GB", - "lat": 51.51, - "lng": -0.14, - "reporting": { - "label": "Report to Met Police", - "url": "https://www.met.police.uk/ro/report/", - "phone": "999 (Emergency) / 101 (Non-emergency)" - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "bsi-de", - "authority_name": "BSI Germany", - "country": "Germany", - "country_code": "DE", - "region": "Europe", - "authority_type": "cert", - "base_url": "https://www.bsi.bund.de" - }, - "feed_url": "https://www.bsi.bund.de/SiteGlobals/Functions/RSSFeed/RSSNewsfeed/RSSNewsfeed.xml", - "category": "cyber_advisory", - "region_tag": "DE", - "lat": 50.73, - "lng": 7.1, - "reporting": { - "label": "Report to BSI", - "url": "https://www.bsi.bund.de/EN/Service-Navi/Contact/contact_node.html", - "email": "certbund@bsi.bund.de" - } - }, - { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { "source_id": "bka-de", @@ -577,7 +697,7 @@ "authority_type": "police", "base_url": "https://www.bka.de" }, - "feed_url": "https://www.bka.de/SharedDocs/Kurzmeldungen/DE/Warnhinweise/RSS/BKA_Pressemitteilungen_RSS.xml", + "feed_url": "https://www.bka.de/DE/Presse/Pressestelle/pressestelle_node.html", "category": "wanted_suspect", "region_tag": "DE", "lat": 50.12, @@ -586,7 +706,23 @@ "label": "Report to BKA", "url": "https://www.bka.de/DE/KontaktAufnehmen/Hinweisportal/hinweisportal_node.html", "phone": "+49 611 55-0" - } + }, + "feed_urls": [ + "https://www.bka.de/DE/Presse/Pressestelle/pressestelle_node.html" + ], + "max_items": 80, + "include_keywords": [ + "pressemitteilung", + "cyber", + "fahnd", + "kriminal", + "hasspost", + "festnahme" + ], + "exclude_keywords": [ + "kontakt", + "pressestelle" + ] }, { "type": "rss", @@ -614,35 +750,8 @@ "label": "Report to ACSC", "url": "https://www.cyber.gov.au/report-and-recover/report", "phone": "1300 292 371" - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "afp-au", - "authority_name": "AFP Australia", - "country": "Australia", - "country_code": "AU", - "region": "Oceania", - "authority_type": "police", - "base_url": "https://www.afp.gov.au" }, - "feed_url": "https://www.afp.gov.au/news-centre/media-releases/rss.xml", - "feed_urls": [ - "https://www.afp.gov.au/news-centre/media-releases/rss.xml", - "https://www.afp.gov.au/news-centre/media-release/rss.xml", - "https://www.afp.gov.au/news-centre/media-releases/feed" - ], - "category": "public_appeal", - "region_tag": "AU", - "lat": -35.31, - "lng": 149.14, - "reporting": { - "label": "Report to AFP", - "url": "https://www.afp.gov.au/what-we-do/crime-types/report-crime", - "phone": "131 237" - } + "max_items": 15 }, { "type": "rss", @@ -673,35 +782,7 @@ } }, { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "nsw-police-au", - "authority_name": "NSW Police", - "country": "Australia", - "country_code": "AU", - "region": "Oceania", - "authority_type": "police", - "base_url": "https://www.police.nsw.gov.au" - }, - "feed_url": "https://www.police.nsw.gov.au/news/rss", - "feed_urls": [ - "https://www.police.nsw.gov.au/news/rss", - "https://www.police.nsw.gov.au/rss/news", - "https://www.police.nsw.gov.au/news/feed" - ], - "category": "public_appeal", - "region_tag": "AU", - "lat": -33.87, - "lng": 151.21, - "reporting": { - "label": "Report to NSW Police", - "url": "https://portal.police.nsw.gov.au/s/online-services", - "phone": "000 (Emergency) / 131 444 (Police Assistance Line)" - } - }, - { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { "source_id": "cccs-ca", @@ -712,7 +793,7 @@ "authority_type": "cert", "base_url": "https://www.cyber.gc.ca" }, - "feed_url": "https://www.cyber.gc.ca/en/alerts-advisories/feed", + "feed_url": "https://www.cyber.gc.ca/en/alerts-advisories", "category": "cyber_advisory", "region_tag": "CA", "lat": 45.42, @@ -722,7 +803,22 @@ "url": "https://www.cyber.gc.ca/en/incident-management", "email": "contact@cyber.gc.ca", "phone": "1-833-292-3722" - } + }, + "feed_urls": [ + "https://www.cyber.gc.ca/en/alerts-advisories" + ], + "max_items": 80, + "include_keywords": [ + "alert", + "advisory", + "vulnerability", + "cyber", + "threat" + ], + "exclude_keywords": [ + "contact", + "report a cyber incident" + ] }, { "type": "rss", @@ -748,7 +844,7 @@ } }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { "source_id": "cnp-es", @@ -759,7 +855,7 @@ "authority_type": "police", "base_url": "https://www.policia.es" }, - "feed_url": "https://www.policia.es/rss/rss_prensa.xml", + "feed_url": "https://www.policia.es/_es/comunicacion_salaprensa.php", "category": "public_appeal", "region_tag": "ES", "lat": 40.42, @@ -768,7 +864,25 @@ "label": "Report to Policía Nacional", "url": "https://www.policia.es/colabora.php", "phone": "091" - } + }, + "feed_urls": [ + "https://www.policia.es/_es/comunicacion_salaprensa.php", + "https://www.policia.es/_es/comunicacion_portada.php" + ], + "max_items": 80, + "include_keywords": [ + "prensa", + "detiene", + "operación", + "ciberdelincuencia", + "incauta", + "investigación" + ], + "exclude_keywords": [ + "síguenos", + "gabinete de prensa", + "cargar más" + ] }, { "type": "rss", @@ -792,7 +906,8 @@ "url": "https://www.cert-in.org.in/", "email": "incident@cert-in.org.in", "phone": "+91-11-24368572" - } + }, + "max_items": 15 }, { "type": "rss", @@ -821,7 +936,8 @@ "url": "https://www.csa.gov.sg/singcert/reporting", "email": "singcert@csa.gov.sg", "phone": "+65 6323 5052" - } + }, + "max_items": 15 }, { "type": "rss", @@ -873,7 +989,8 @@ "url": "https://www.hkcert.org/report-incident", "email": "hkcert@hkcert.org", "phone": "+852 8105 6060" - } + }, + "max_items": 15 }, { "type": "rss", @@ -898,35 +1015,6 @@ "phone": "10111 (Emergency) / 08600 10111 (Crime Stop)" } }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "missing-children-za", - "authority_name": "Missing Children South Africa", - "country": "South Africa", - "country_code": "ZA", - "region": "Africa", - "authority_type": "public_safety_program", - "base_url": "https://missingchildren.org.za" - }, - "feed_url": "https://missingchildren.org.za/feed/", - "feed_urls": [ - "https://missingchildren.org.za/feed/", - "https://missingchildren.org.za/category/missing-children/feed/", - "https://missingchildren.org.za/category/cases/feed/" - ], - "category": "missing_person", - "region_tag": "ZA", - "lat": -29, - "lng": 24, - "reporting": { - "label": "Report to Missing Children SA", - "url": "https://missingchildren.org.za/report/", - "phone": "+27 72 647 7464", - "notes": "Coordinate directly with SAPS in emergency situations." - } - }, { "type": "rss", "followRedirects": true, @@ -951,52 +1039,6 @@ "notes": "100% anonymous. You can also report online." } }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "npa-jp", - "authority_name": "Japan NPA", - "country": "Japan", - "country_code": "JP", - "region": "Asia", - "authority_type": "police", - "base_url": "https://www.npa.go.jp" - }, - "feed_url": "https://www.npa.go.jp/rss/index.xml", - "category": "public_safety", - "region_tag": "JP", - "lat": 35.69, - "lng": 139.75, - "reporting": { - "label": "Report to NPA Japan", - "url": "https://www.npa.go.jp/english/index.html", - "phone": "110 (Emergency)" - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "gendarmerie-fr", - "authority_name": "Gendarmerie France", - "country": "France", - "country_code": "FR", - "region": "Europe", - "authority_type": "police", - "base_url": "https://www.gendarmerie.interieur.gouv.fr" - }, - "feed_url": "https://www.gendarmerie.interieur.gouv.fr/rss", - "category": "public_appeal", - "region_tag": "FR", - "lat": 48.85, - "lng": 2.3, - "reporting": { - "label": "Report to Gendarmerie", - "url": "https://www.pre-plainte-en-ligne.gouv.fr/", - "phone": "17 (Emergency)" - } - }, { "type": "rss", "followRedirects": true, @@ -1071,34 +1113,6 @@ "phone": "190 (Emergency)" } }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "carabineros-cl", - "authority_name": "Carabineros Chile", - "country": "Chile", - "country_code": "CL", - "region": "South America", - "authority_type": "police", - "base_url": "https://www.carabineros.cl" - }, - "feed_url": "https://www.carabineros.cl/feed/", - "feed_urls": [ - "https://www.carabineros.cl/feed/", - "https://www.carabineros.cl/rss", - "https://www.carabineros.cl/index.php/feed/" - ], - "category": "public_appeal", - "region_tag": "CL", - "lat": -33.45, - "lng": -70.67, - "reporting": { - "label": "Report to Carabineros", - "url": "https://www.carabineros.cl/", - "phone": "133 (Emergency)" - } - }, { "type": "rss", "followRedirects": true, @@ -1237,35 +1251,8 @@ "label": "Report Cybercrime to PDI", "url": "https://www.pdichile.cl/", "phone": "134 (PDI Emergency)" - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "fiscales-ar", - "authority_name": "Ministerio Público Fiscal Argentina", - "country": "Argentina", - "country_code": "AR", - "region": "South America", - "authority_type": "regulatory", - "base_url": "https://www.fiscales.gob.ar" }, - "feed_url": "https://www.fiscales.gob.ar/feed/", - "feed_urls": [ - "https://www.fiscales.gob.ar/feed/", - "https://www.fiscales.gob.ar/category/noticias/feed/", - "https://www.fiscales.gob.ar/category/cibercrimen/feed/" - ], - "category": "public_safety", - "region_tag": "AR", - "lat": -34.61, - "lng": -58.38, - "reporting": { - "label": "Report to Fiscalía Argentina", - "url": "https://www.mpf.gob.ar/", - "phone": "137 (Emergency advisory line)" - } + "max_items": 15 }, { "type": "rss", @@ -1294,54 +1281,6 @@ "notes": "Coordinate with local police for urgent leads." } }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "fbi-seeking", - "authority_name": "FBI Seeking Info", - "country": "United States", - "country_code": "US", - "region": "North America", - "authority_type": "police", - "base_url": "https://www.fbi.gov" - }, - "feed_url": "https://www.fbi.gov/feeds/seeking-information/rss.xml", - "category": "public_appeal", - "region_tag": "US", - "lat": 38.91, - "lng": -77.01, - "reporting": { - "label": "Submit a Tip to FBI", - "url": "https://tips.fbi.gov/", - "phone": "1-800-CALL-FBI (1-800-225-5324)", - "notes": "The FBI is seeking the public's assistance. If you have information, submit a tip." - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "fbi-mostwanted", - "authority_name": "FBI Most Wanted", - "country": "United States", - "country_code": "US", - "region": "North America", - "authority_type": "police", - "base_url": "https://www.fbi.gov" - }, - "feed_url": "https://www.fbi.gov/feeds/fbi-most-wanted/rss.xml", - "category": "wanted_suspect", - "region_tag": "US", - "lat": 38.89, - "lng": -77.02, - "reporting": { - "label": "Report Sighting to FBI", - "url": "https://tips.fbi.gov/", - "phone": "1-800-CALL-FBI (1-800-225-5324)", - "notes": "Do NOT attempt to apprehend. Call 911 immediately if in danger." - } - }, { "type": "rss", "followRedirects": true, @@ -1365,117 +1304,6 @@ "phone": "0300 123 2040" } }, - { - "type": "rss", - "source": { - "source_id": "cna-sg-crime", - "authority_name": "CNA Singapore Crime", - "country": "Singapore", - "country_code": "SG", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www.channelnewsasia.com" - }, - "feed_url": "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml&category=6511", - "category": "public_safety", - "region_tag": "SG", - "lat": 1.35, - "lng": 103.82, - "reporting": { - "label": "Report Crime in Singapore", - "url": "https://eservices.police.gov.sg/content/policehubhome/homepage/police-report.html", - "phone": "999 (Emergency) / 1800-255-0000 (Police Hotline)" - } - }, - { - "type": "rss", - "source": { - "source_id": "yonhap-kr", - "authority_name": "Yonhap News Korea", - "country": "South Korea", - "country_code": "KR", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://en.yna.co.kr" - }, - "feed_url": "https://en.yna.co.kr/RSS/news.xml", - "category": "public_safety", - "region_tag": "KR", - "lat": 37.57, - "lng": 126.98, - "reporting": { - "label": "Report Crime in South Korea", - "url": "https://www.police.go.kr/eng/index.do", - "phone": "112 (Emergency)" - } - }, - { - "type": "rss", - "source": { - "source_id": "nhk-jp", - "authority_name": "NHK Japan", - "country": "Japan", - "country_code": "JP", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www3.nhk.or.jp" - }, - "feed_url": "https://www3.nhk.or.jp/rss/news/cat1.xml", - "category": "public_safety", - "region_tag": "JP", - "lat": 35.67, - "lng": 139.71, - "reporting": { - "label": "Report to Japan Police", - "url": "https://www.npa.go.jp/english/index.html", - "phone": "110 (Emergency)" - } - }, - { - "type": "rss", - "source": { - "source_id": "scmp-hk", - "authority_name": "SCMP Hong Kong", - "country": "Hong Kong", - "country_code": "HK", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www.scmp.com" - }, - "feed_url": "https://www.scmp.com/rss/5/feed", - "followRedirects": true, - "category": "public_safety", - "region_tag": "HK", - "lat": 22.28, - "lng": 114.16, - "reporting": { - "label": "Report Crime in Hong Kong", - "url": "https://www.police.gov.hk/ppp_en/contact_us.html", - "phone": "999 (Emergency)" - } - }, - { - "type": "rss", - "source": { - "source_id": "straitstimes-sg", - "authority_name": "Straits Times Singapore", - "country": "Singapore", - "country_code": "SG", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www.straitstimes.com" - }, - "feed_url": "https://www.straitstimes.com/news/singapore/rss.xml", - "category": "public_safety", - "region_tag": "SG", - "lat": 1.3, - "lng": 103.84, - "reporting": { - "label": "Report Crime in Singapore", - "url": "https://eservices.police.gov.sg/content/policehubhome/homepage/police-report.html", - "phone": "999 (Emergency)" - } - }, { "type": "rss", "followRedirects": true, @@ -1632,79 +1460,6 @@ "notes": "Denuncia anónima / Anonymous tip line." } }, - { - "type": "html-list", - "followRedirects": true, - "source": { - "source_id": "amber-mx", - "authority_name": "AMBER Alert Mexico", - "country": "Mexico", - "country_code": "MX", - "region": "North America", - "authority_type": "public_safety_program", - "base_url": "https://www.gob.mx/amber" - }, - "feed_url": "https://www.gob.mx/amber/archivo/acciones_y_programas", - "feed_urls": [ - "https://www.gob.mx/amber/archivo/acciones_y_programas", - "https://www.gob.mx/amber/es/archivo/acciones_y_programas", - "https://www.gob.mx/amber" - ], - "include_keywords": [ - "alerta amber", - "desaparec", - "no localizado", - "se busca", - "ficha", - "menor", - "niña", - "niño", - "adolescente", - "auxilio", - "información" - ], - "exclude_keywords": [ - "evento", - "campaña", - "conferencia", - "manual", - "material" - ], - "category": "missing_person", - "region_tag": "MX", - "lat": 19.44, - "lng": -99.14, - "reporting": { - "label": "Report Missing Child Mexico", - "url": "https://www.gob.mx/amber", - "phone": "800-008-5400", - "notes": "Alerta AMBER México" - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "missing-ca", - "authority_name": "Canada Missing Children", - "country": "Canada", - "country_code": "CA", - "region": "North America", - "authority_type": "public_safety_program", - "base_url": "https://www.canadasmissing.ca" - }, - "feed_url": "https://www.canadasmissing.ca/rss/index-eng.xml", - "category": "missing_person", - "region_tag": "CA", - "lat": 45.43, - "lng": -75.68, - "reporting": { - "label": "Report Missing Person Canada", - "url": "https://www.canadasmissing.ca/index-eng.htm", - "phone": "1-866-KID-TIPS (1-866-543-8477)", - "notes": "Canadian Centre for Child Protection" - } - }, { "type": "rss", "followRedirects": true, @@ -1753,7 +1508,8 @@ "label": "Report to ThaiCERT", "url": "https://www.thaicert.or.th/", "email": "op@thaicert.or.th" - } + }, + "max_items": 15 }, { "type": "rss", @@ -1780,7 +1536,8 @@ "label": "Report to MyCERT", "url": "https://www.mycert.org.my/portal/report-incident", "email": "mycert@cybersecurity.my" - } + }, + "max_items": 15 }, { "type": "rss", @@ -1807,111 +1564,20 @@ "label": "Report to BSSN", "url": "https://bssn.go.id/", "notes": "Use official BSSN contact channels for incident reporting." - } + }, + "max_items": 15 }, { "type": "rss", "followRedirects": true, "source": { - "source_id": "bleepingcomputer", - "authority_name": "BleepingComputer", - "country": "United States", - "country_code": "US", + "source_id": "vpd-ca", + "authority_name": "Vancouver Police Department", + "country": "Canada", + "country_code": "CA", "region": "North America", - "authority_type": "private_sector", - "base_url": "https://www.bleepingcomputer.com" - }, - "feed_url": "https://www.bleepingcomputer.com/feed/", - "category": "private_sector", - "region_tag": "US", - "lat": 40.71, - "lng": -74.01, - "reporting": { - "label": "Read Full Report", - "url": "https://www.bleepingcomputer.com", - "notes": "Private-sector cybersecurity news. Report incidents to relevant authorities." - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "krebsonsecurity", - "authority_name": "Krebs on Security", - "country": "United States", - "country_code": "US", - "region": "North America", - "authority_type": "private_sector", - "base_url": "https://krebsonsecurity.com" - }, - "feed_url": "https://krebsonsecurity.com/feed/", - "category": "private_sector", - "region_tag": "US", - "lat": 38.9, - "lng": -77.04, - "reporting": { - "label": "Read Full Report", - "url": "https://krebsonsecurity.com", - "notes": "Investigative cybersecurity journalism by Brian Krebs." - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "thehackernews", - "authority_name": "The Hacker News", - "country": "United States", - "country_code": "US", - "region": "North America", - "authority_type": "private_sector", - "base_url": "https://thehackernews.com" - }, - "feed_url": "https://feeds.feedburner.com/TheHackersNews", - "category": "private_sector", - "region_tag": "US", - "lat": 37.39, - "lng": -122.08, - "reporting": { - "label": "Read Full Report", - "url": "https://thehackernews.com", - "notes": "Cybersecurity news and analysis." - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "databreaches-net", - "authority_name": "DataBreaches.net", - "country": "United States", - "country_code": "US", - "region": "North America", - "authority_type": "private_sector", - "base_url": "https://databreaches.net" - }, - "feed_url": "https://databreaches.net/feed/", - "category": "private_sector", - "region_tag": "US", - "lat": 39.83, - "lng": -98.58, - "reporting": { - "label": "Read Full Report", - "url": "https://databreaches.net", - "notes": "Data breach tracking and reporting." - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "vpd-ca", - "authority_name": "Vancouver Police Department", - "country": "Canada", - "country_code": "CA", - "region": "North America", - "authority_type": "police", - "base_url": "https://vpd.ca" + "authority_type": "police", + "base_url": "https://vpd.ca" }, "feed_url": "https://vpd.ca/feed/", "category": "public_appeal", @@ -1970,51 +1636,8 @@ "label": "Report a Cyber Incident", "url": "https://www.cyber.gc.ca/en/incident-management", "phone": "1-833-CYBER-88" - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "cbc-canada", - "authority_name": "CBC Canada News", - "country": "Canada", - "country_code": "CA", - "region": "North America", - "authority_type": "public_safety_program", - "base_url": "https://www.cbc.ca" - }, - "feed_url": "https://www.cbc.ca/webfeed/rss/rss-canada", - "category": "public_safety", - "region_tag": "CA", - "lat": 43.6532, - "lng": -79.3832, - "reporting": { - "label": "CBC News Tips", - "url": "https://www.cbc.ca/news/tips" - } - }, - { - "type": "rss", - "followRedirects": true, - "source": { - "source_id": "globalnews-ca", - "authority_name": "Global News Canada", - "country": "Canada", - "country_code": "CA", - "region": "North America", - "authority_type": "public_safety_program", - "base_url": "https://globalnews.ca" }, - "feed_url": "https://globalnews.ca/feed/", - "category": "public_safety", - "region_tag": "CA", - "lat": 45.5017, - "lng": -73.5673, - "reporting": { - "label": "Global News Tips", - "url": "https://globalnews.ca/pages/contact-us/" - } + "max_items": 15 }, { "type": "rss", @@ -2036,1715 +1659,4620 @@ "reporting": { "label": "Report Cyber Incident to USOM", "url": "https://www.usom.gov.tr/bildirim" - } + }, + "max_items": 15 }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { - "source_id": "timesofisrael-il", - "authority_name": "Times of Israel", - "country": "Israel", - "country_code": "IL", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www.timesofisrael.com" + "source_id": "kenya-dci-wanted", + "authority_name": "Kenya DCI Wanted", + "country": "Kenya", + "country_code": "KE", + "region": "Africa", + "authority_type": "police", + "base_url": "https://www.dci.go.ke" }, - "feed_url": "https://www.timesofisrael.com/feed/", - "category": "public_safety", - "region_tag": "IL", - "lat": 31.7683, - "lng": 35.2137, + "feed_url": "https://www.dci.go.ke/wanted/", + "category": "wanted_suspect", + "region_tag": "KE", + "lat": -1.286389, + "lng": 36.817223, + "max_items": 80, + "include_keywords": [ + "wanted", + "suspect", + "fugitive", + "appeal", + "missing" + ], "reporting": { - "label": "Israel Police Tips", - "url": "https://www.police.gov.il/en", - "phone": "100 (Israel Police)" + "label": "Report to Kenya DCI", + "url": "https://www.dci.go.ke/contact-us/", + "phone": "+254 20 2710000" } }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { - "source_id": "middleeasteye", - "authority_name": "Middle East Eye", - "country": "Qatar", - "country_code": "QA", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www.middleeasteye.net" + "source_id": "osp-gh-wanted", + "authority_name": "Ghana OSP Wanted Cases", + "country": "Ghana", + "country_code": "GH", + "region": "Africa", + "authority_type": "regulatory", + "base_url": "https://osp.gov.gh" }, - "feed_url": "https://www.middleeasteye.net/rss", - "category": "public_safety", - "region_tag": "ME", - "lat": 25.2854, - "lng": 51.531, + "feed_url": "https://osp.gov.gh/case_files/wanted/", + "category": "wanted_suspect", + "region_tag": "GH", + "lat": 5.603717, + "lng": -0.186964, + "max_items": 80, + "include_keywords": [ + "wanted", + "case", + "appeal", + "fugitive" + ], "reporting": { - "label": "Middle East Eye Tips", - "url": "https://www.middleeasteye.net/contact" + "label": "Report to Ghana OSP", + "url": "https://osp.gov.gh/contact/" } }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { - "source_id": "dailysabah-tr", - "authority_name": "Daily Sabah Turkey", - "country": "Turkey", - "country_code": "TR", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www.dailysabah.com" + "source_id": "efcc-ng-wanted", + "authority_name": "EFCC Nigeria Wanted", + "country": "Nigeria", + "country_code": "NG", + "region": "Africa", + "authority_type": "police", + "base_url": "https://www.efcc.gov.ng" }, - "feed_url": "https://www.dailysabah.com/rssFeed/turkey", - "category": "public_safety", - "region_tag": "TR", - "lat": 41.0082, - "lng": 28.9784, + "feed_url": "https://www.efcc.gov.ng/efcc/wanted", + "category": "wanted_suspect", + "region_tag": "NG", + "lat": 9.082, + "lng": 8.6753, + "max_items": 80, + "include_keywords": [ + "wanted", + "suspect", + "fraud", + "money laundering", + "appeal" + ], "reporting": { - "label": "Daily Sabah Contact", - "url": "https://www.dailysabah.com/contact" + "label": "Report to EFCC", + "url": "https://www.efcc.gov.ng/efcc/records/red-alert" } }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { - "source_id": "globaltimes-cn", - "authority_name": "Global Times China", - "country": "China", - "country_code": "CN", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www.globaltimes.cn" + "source_id": "icpc-ng-wanted", + "authority_name": "ICPC Nigeria Wanted", + "country": "Nigeria", + "country_code": "NG", + "region": "Africa", + "authority_type": "police", + "base_url": "https://icpc.gov.ng" }, - "feed_url": "https://www.globaltimes.cn/rss/outbrain.xml", - "category": "public_safety", - "region_tag": "CN", - "lat": 39.9042, - "lng": 116.4074, + "feed_url": "https://icpc.gov.ng/wanted-persons/", + "category": "wanted_suspect", + "region_tag": "NG", + "lat": 9.082, + "lng": 8.6753, + "max_items": 80, + "include_keywords": [ + "wanted", + "persons", + "suspect", + "fugitive" + ], "reporting": { - "label": "Global Times Contact", - "url": "https://www.globaltimes.cn/about-us/contact-us.html" + "label": "Report to ICPC", + "url": "https://icpc.gov.ng/report-corruption/" } }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { - "source_id": "indiatoday-crime", - "authority_name": "India Today Crime", - "country": "India", - "country_code": "IN", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www.indiatoday.in" + "source_id": "saps-newsroom-html", + "authority_name": "SAPS Newsroom", + "country": "South Africa", + "country_code": "ZA", + "region": "Africa", + "authority_type": "police", + "base_url": "https://www.saps.gov.za" }, - "feed_url": "https://www.indiatoday.in/rss/1786661", - "category": "public_safety", - "region_tag": "IN", - "lat": 28.6139, - "lng": 77.209, + "feed_url": "https://www.saps.gov.za/newsroom/newsroom.php", + "category": "public_appeal", + "region_tag": "ZA", + "lat": -25.747868, + "lng": 28.229271, + "max_items": 120, + "include_keywords": [ + "wanted", + "missing", + "suspect", + "appeal", + "crime", + "investigation", + "tip-off" + ], "reporting": { - "label": "India Crime Tips", - "url": "https://cybercrime.gov.in/", - "phone": "112 (India Emergency)" + "label": "Report to SAPS Crime Stop", + "url": "https://www.saps.gov.za/crimestop/", + "phone": "08600 10111" } }, { "type": "rss", "followRedirects": true, "source": { - "source_id": "ndtv-in", - "authority_name": "NDTV India News", - "country": "India", - "country_code": "IN", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www.ndtv.com" + "source_id": "ke-cirt-cc", + "authority_name": "KE-CIRT/CC", + "country": "Kenya", + "country_code": "KE", + "region": "Africa", + "authority_type": "cert", + "base_url": "https://ke-cirt.go.ke" }, - "feed_url": "https://feeds.feedburner.com/ndtvnews-india-news", - "category": "public_safety", - "region_tag": "IN", - "lat": 19.076, - "lng": 72.8777, + "feed_url": "https://ke-cirt.go.ke/feed/", + "category": "cyber_advisory", + "region_tag": "KE", + "lat": -1.286389, + "lng": 36.817223, + "max_items": 100, "reporting": { - "label": "NDTV News Tips", - "url": "https://www.ndtv.com/page/contact-us", - "phone": "112 (India Emergency)" - } + "label": "Report Incident to KE-CIRT/CC", + "url": "https://ke-cirt.go.ke/incident-reporting/" + } }, { "type": "rss", "followRedirects": true, "source": { - "source_id": "hindustantimes-in", - "authority_name": "Hindustan Times India", - "country": "India", - "country_code": "IN", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www.hindustantimes.com" + "source_id": "cert-mu", + "authority_name": "CERT-MU Mauritius", + "country": "Mauritius", + "country_code": "MU", + "region": "Africa", + "authority_type": "cert", + "base_url": "https://cert-mu.govmu.org" }, - "feed_url": "https://www.hindustantimes.com/feeds/rss/india-news/rssfeed.xml", - "category": "public_safety", - "region_tag": "IN", - "lat": 12.9716, - "lng": 77.5946, + "feed_url": "https://cert-mu.govmu.org/rss", + "feed_urls": [ + "https://cert-mu.govmu.org/rss", + "https://cert-mu.govmu.org/English/Pages/rss.aspx" + ], + "category": "cyber_advisory", + "region_tag": "MU", + "lat": -20.160891, + "lng": 57.501222, + "max_items": 100, "reporting": { - "label": "Hindustan Times Tips", - "url": "https://www.hindustantimes.com/contact-us", - "phone": "112 (India Emergency)" + "label": "Report Incident to CERT-MU", + "url": "https://cert-mu.govmu.org/Pages/incident-reporting.aspx" } }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { - "source_id": "vnexpress-vn", - "authority_name": "VnExpress International", - "country": "Vietnam", - "country_code": "VN", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://e.vnexpress.net" + "source_id": "cert-tn-bulletins", + "authority_name": "Tunisia CERT Bulletins", + "country": "Tunisia", + "country_code": "TN", + "region": "Africa", + "authority_type": "cert", + "base_url": "https://www.cert.tn" }, - "feed_url": "https://e.vnexpress.net/rss/news.rss", - "category": "public_safety", - "region_tag": "VN", - "lat": 21.0278, - "lng": 105.8342, + "feed_url": "https://www.cert.tn/", + "category": "cyber_advisory", + "region_tag": "TN", + "lat": 33.886917, + "lng": 9.537499, + "max_items": 100, + "include_keywords": [ + "alerte", + "advisory", + "vuln", + "threat", + "incident", + "cyber" + ], "reporting": { - "label": "Vietnam Police Tips", - "url": "https://congan.com.vn/", - "phone": "113 (Vietnam Police)" + "label": "Report Incident to CERT-TN", + "url": "https://www.cert.tn/" } }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { - "source_id": "laotiantimes-la", - "authority_name": "Laotian Times", - "country": "Laos", - "country_code": "LA", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://laotiantimes.com" + "source_id": "mp-hn-casos", + "authority_name": "Ministerio Publico Honduras Casos", + "country": "Honduras", + "country_code": "HN", + "region": "North America", + "authority_type": "regulatory", + "base_url": "https://www.mp.hn" }, - "feed_url": "https://laotiantimes.com/feed/", - "category": "public_safety", - "region_tag": "LA", - "lat": 17.9757, - "lng": 102.6331, + "feed_url": "https://www.mp.hn/category/noticias/", + "feed_urls": [ + "https://www.mp.hn/category/noticias/", + "https://www.mp.hn/noticias/", + "https://www.mp.hn" + ], + "category": "public_appeal", + "region_tag": "HN", + "lat": 14.0818, + "lng": -87.2068, + "max_items": 120, + "include_keywords": [ + "desaparec", + "secuestro", + "captura", + "investigacion", + "denuncia", + "homicidio", + "fiscalia" + ], + "exclude_keywords": [ + "evento", + "agenda", + "licitacion" + ], "reporting": { - "label": "Laotian Times Contact", - "url": "https://laotiantimes.com/contact/" + "label": "Report to MP Honduras", + "url": "https://www.mp.hn" } }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { - "source_id": "bangkokpost-th", - "authority_name": "Bangkok Post", - "country": "Thailand", - "country_code": "TH", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://www.bangkokpost.com" + "source_id": "fgr-sv-casos", + "authority_name": "Fiscalia El Salvador Casos", + "country": "El Salvador", + "country_code": "SV", + "region": "North America", + "authority_type": "regulatory", + "base_url": "https://www.fiscalia.gob.sv" }, - "feed_url": "https://www.bangkokpost.com/rss/data/topstories.xml", - "category": "public_safety", - "region_tag": "TH", - "lat": 13.7563, - "lng": 100.5018, + "feed_url": "https://www.fiscalia.gob.sv/category/noticias/", + "feed_urls": [ + "https://www.fiscalia.gob.sv/category/noticias/", + "https://www.fiscalia.gob.sv/noticias/", + "https://www.fiscalia.gob.sv" + ], + "category": "public_appeal", + "region_tag": "SV", + "lat": 13.6929, + "lng": -89.2182, + "max_items": 120, + "include_keywords": [ + "desaparec", + "secuestro", + "se busca", + "captura", + "informacion", + "denuncia", + "fiscalia", + "homicidio" + ], + "exclude_keywords": [ + "evento", + "agenda", + "acto protocolario" + ], "reporting": { - "label": "Thailand Police Tips", - "url": "https://www.royalthaipolice.go.th/", - "phone": "191 (Thailand Police)" + "label": "Report to FGR El Salvador", + "url": "https://www.fiscalia.gob.sv" } }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { - "source_id": "rappler-ph", - "authority_name": "Rappler Philippines", - "country": "Philippines", - "country_code": "PH", - "region": "Asia", + "source_id": "oij-cr-casos", + "authority_name": "OIJ Costa Rica Casos", + "country": "Costa Rica", + "country_code": "CR", + "region": "North America", "authority_type": "public_safety_program", - "base_url": "https://www.rappler.com" + "base_url": "https://sitiooij.poder-judicial.go.cr" }, - "feed_url": "https://www.rappler.com/feed/", - "category": "public_safety", - "region_tag": "PH", - "lat": 14.5995, - "lng": 120.9842, + "feed_url": "https://sitiooij.poder-judicial.go.cr/index.php/oficinas-y-departamentos/desaparecidos", + "feed_urls": [ + "https://sitiooij.poder-judicial.go.cr/index.php/oficinas-y-departamentos/desaparecidos", + "https://sitiooij.poder-judicial.go.cr/index.php/noticias" + ], + "category": "missing_person", + "region_tag": "CR", + "lat": 9.9325, + "lng": -84.0833, + "max_items": 120, + "include_keywords": [ + "desaparec", + "no localiza", + "se busca", + "auxilio" + ], + "exclude_keywords": [ + "licitacion", + "convocatoria", + "boletin" + ], "reporting": { - "label": "PNP Philippines Tips", - "url": "https://www.pnp.gov.ph/", - "phone": "117 (PH Emergency)" + "label": "Report to OIJ Costa Rica", + "url": "https://sitiooij.poder-judicial.go.cr" } }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { - "source_id": "tempo-id", - "authority_name": "Tempo Indonesia", - "country": "Indonesia", - "country_code": "ID", - "region": "Asia", - "authority_type": "public_safety_program", - "base_url": "https://en.tempo.co" + "source_id": "mp-pa-casos", + "authority_name": "Ministerio Publico Panama Casos", + "country": "Panama", + "country_code": "PA", + "region": "North America", + "authority_type": "regulatory", + "base_url": "https://ministeriopublico.gob.pa" }, - "feed_url": "https://rss.tempo.co/en/", - "category": "public_safety", - "region_tag": "ID", - "lat": -6.2088, - "lng": 106.8456, + "feed_url": "https://ministeriopublico.gob.pa/categoria/noticias/", + "feed_urls": [ + "https://ministeriopublico.gob.pa/categoria/noticias/", + "https://ministeriopublico.gob.pa/noticias/", + "https://ministeriopublico.gob.pa" + ], + "category": "public_appeal", + "region_tag": "PA", + "lat": 8.9824, + "lng": -79.5199, + "max_items": 120, + "include_keywords": [ + "desaparec", + "secuestro", + "se busca", + "captura", + "informacion", + "denuncia", + "investigacion" + ], + "exclude_keywords": [ + "evento", + "agenda", + "licitacion" + ], "reporting": { - "label": "Indonesia Police Tips", - "url": "https://www.polri.go.id/", - "phone": "110 (Indonesia Police)" + "label": "Report to MP Panama", + "url": "https://ministeriopublico.gob.pa" } }, { - "type": "rss", + "type": "html-list", "followRedirects": true, "source": { - "source_id": "postcourier-pg", - "authority_name": "Post-Courier PNG", - "country": "Papua New Guinea", - "country_code": "PG", - "region": "Oceania", - "authority_type": "public_safety_program", - "base_url": "https://www.postcourier.com.pg" + "source_id": "mp-ni-casos", + "authority_name": "Ministerio Publico Nicaragua Casos", + "country": "Nicaragua", + "country_code": "NI", + "region": "North America", + "authority_type": "regulatory", + "base_url": "https://www.ministeriopublico.gob.ni" }, - "feed_url": "https://www.postcourier.com.pg/feed/", - "category": "public_safety", - "region_tag": "PG", - "lat": -6.3149, - "lng": 147.1802, + "feed_url": "https://www.ministeriopublico.gob.ni/noticias/", + "feed_urls": [ + "https://www.ministeriopublico.gob.ni/noticias/", + "https://www.ministeriopublico.gob.ni" + ], + "category": "public_appeal", + "region_tag": "NI", + "lat": 12.1364, + "lng": -86.2514, + "max_items": 120, + "include_keywords": [ + "desaparec", + "secuestro", + "captura", + "investigacion", + "denuncia", + "fiscalia", + "homicidio" + ], + "exclude_keywords": [ + "agenda", + "evento", + "boletin" + ], "reporting": { - "label": "PNG Police", - "url": "https://www.rpngc.gov.pg/", - "phone": "000 (PNG Emergency)" + "label": "Report to MP Nicaragua", + "url": "https://www.ministeriopublico.gob.ni" } }, { - "type": "rss", + "type": "html-list", + "promotion_status": "rejected", + "rejection_reason": "ACLED has no RSS feed; html-list scrape only captures navigation links, not conflict data. Requires authenticated API access.", "followRedirects": true, "source": { - "source_id": "fijitimes-fj", - "authority_name": "Fiji Times", - "country": "Fiji", - "country_code": "FJ", - "region": "Oceania", + "source_id": "acled-conflict-monitor", + "authority_name": "ACLED Conflict Monitor", + "country": "Global", + "country_code": "INT", + "region": "International", "authority_type": "public_safety_program", - "base_url": "https://www.fijitimes.com" + "base_url": "https://acleddata.com" }, - "feed_url": "https://www.fijitimes.com/feed/", - "category": "public_safety", - "region_tag": "FJ", - "lat": -18.1416, - "lng": 178.4419, + "feed_url": "https://acleddata.com/", + "feed_urls": [ + "https://acleddata.com/", + "https://acleddata.com/dashboard/" + ], + "category": "conflict_monitoring", + "region_tag": "INT", + "lat": 20, + "lng": 0, + "max_items": 100, + "include_keywords": [ + "conflict", + "violence", + "protest", + "incident", + "dashboard", + "data" + ], + "exclude_keywords": [ + "careers", + "donate", + "newsletter" + ], "reporting": { - "label": "Fiji Police", - "url": "https://www.police.gov.fj/", - "phone": "917 (Fiji Police)" + "label": "ACLED Data Access", + "url": "https://acleddata.com/data-export-tool/" } }, { - "type": "rss", + "type": "html-list", + "promotion_status": "rejected", + "rejection_reason": "ICRC Family Links blocks all automated access (403)", "followRedirects": true, "source": { - "source_id": "rnz-pacific", - "authority_name": "RNZ Pacific", - "country": "New Zealand", - "country_code": "NZ", - "region": "Oceania", + "source_id": "icrc-family-links", + "authority_name": "ICRC Family Links", + "country": "Global", + "country_code": "INT", + "region": "International", "authority_type": "public_safety_program", - "base_url": "https://www.rnz.co.nz" + "base_url": "https://familylinks.icrc.org" }, - "feed_url": "https://www.rnz.co.nz/rss/pacific.xml", - "category": "public_safety", - "region_tag": "NZ", - "lat": -15.3767, - "lng": 166.9592, + "feed_url": "https://familylinks.icrc.org/", + "category": "humanitarian_security", + "region_tag": "INT", + "lat": 20, + "lng": 0, + "max_items": 80, + "include_keywords": [ + "missing", + "family", + "search", + "crisis", + "restore", + "trace" + ], + "exclude_keywords": [ + "privacy", + "policy", + "terms" + ], "reporting": { - "label": "RNZ Pacific Contact", - "url": "https://www.rnz.co.nz/about/contact" + "label": "ICRC Family Links", + "url": "https://familylinks.icrc.org/" } }, { "type": "html-list", "followRedirects": true, "source": { - "source_id": "kenya-dci-wanted", - "authority_name": "Kenya DCI Wanted", - "country": "Kenya", - "country_code": "KE", - "region": "Africa", - "authority_type": "police", - "base_url": "https://www.dci.go.ke" + "source_id": "iom-missing-migrants", + "authority_name": "IOM Missing Migrants", + "country": "Global", + "country_code": "INT", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://missingmigrants.iom.int" }, - "feed_url": "https://www.dci.go.ke/wanted/", - "category": "wanted_suspect", - "region_tag": "KE", - "lat": -1.286389, - "lng": 36.817223, - "max_items": 80, + "feed_url": "https://missingmigrants.iom.int/", + "feed_urls": [ + "https://missingmigrants.iom.int/", + "https://missingmigrants.iom.int/latest-data" + ], + "category": "humanitarian_security", + "region_tag": "INT", + "lat": 20, + "lng": 0, + "max_items": 100, "include_keywords": [ - "wanted", - "suspect", - "fugitive", - "appeal", - "missing" + "missing migrants", + "incident", + "data", + "route", + "deaths", + "disappearances" + ], + "exclude_keywords": [ + "publication", + "about us" ], "reporting": { - "label": "Report to Kenya DCI", - "url": "https://www.dci.go.ke/contact-us/", - "phone": "+254 20 2710000" + "label": "IOM Missing Migrants Data", + "url": "https://missingmigrants.iom.int/" } }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "eoco-gh-wanted", - "authority_name": "EOCO Ghana Wanted Persons", - "country": "Ghana", - "country_code": "GH", - "region": "Africa", - "authority_type": "police", - "base_url": "https://www.eoco.gov.gh" + "source_id": "bsi-certbund", + "authority_name": "CERT-Bund", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.bsi.bund.de" }, - "feed_url": "https://www.eoco.gov.gh/wanted-persons/", - "category": "wanted_suspect", - "region_tag": "GH", - "lat": 5.603717, - "lng": -0.186964, - "max_items": 80, - "include_keywords": [ - "wanted", - "suspect", - "fraud", - "crime", - "appeal" - ], + "feed_url": "https://wid.cert-bund.de/content/public/securityAdvisory/rss", + "category": "cyber_advisory", + "region_tag": "DE", + "lat": 50.73, + "lng": 7.1, "reporting": { - "label": "Report to EOCO Ghana", - "url": "https://www.eoco.gov.gh/contact-us/" - } + "label": "Report to BSI", + "url": "https://www.bsi.bund.de/DE/IT-Sicherheitsvorfall/it-sicherheitsvorfall_node.html" + }, + "max_items": 15 }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "osp-gh-wanted", - "authority_name": "Ghana OSP Wanted Cases", - "country": "Ghana", - "country_code": "GH", - "region": "Africa", - "authority_type": "regulatory", - "base_url": "https://osp.gov.gh" + "source_id": "bsi-csw", + "authority_name": "BSI Cyber-Sicherheitswarnungen", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.bsi.bund.de" }, - "feed_url": "https://osp.gov.gh/case_files/wanted/", - "category": "wanted_suspect", - "region_tag": "GH", - "lat": 5.603717, - "lng": -0.186964, - "max_items": 80, - "include_keywords": [ - "wanted", - "case", - "appeal", - "fugitive" - ], + "feed_url": "https://www.bsi.bund.de/SiteGlobals/Functions/RSSFeed/RSSNewsfeed/RSSNewsfeed_CSW.xml", + "category": "cyber_advisory", + "region_tag": "DE", + "lat": 50.73, + "lng": 7.09, "reporting": { - "label": "Report to Ghana OSP", - "url": "https://osp.gov.gh/contact/" - } + "label": "Report to BSI", + "url": "https://www.bsi.bund.de/DE/IT-Sicherheitsvorfall/it-sicherheitsvorfall_node.html" + }, + "max_items": 15 }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "efcc-ng-wanted", - "authority_name": "EFCC Nigeria Wanted", - "country": "Nigeria", - "country_code": "NG", - "region": "Africa", - "authority_type": "police", - "base_url": "https://www.efcc.gov.ng" + "source_id": "bsi-kritis", + "authority_name": "BSI KRITIS", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.bsi.bund.de" }, - "feed_url": "https://www.efcc.gov.ng/efcc/wanted", - "category": "wanted_suspect", - "region_tag": "NG", - "lat": 9.082, - "lng": 8.6753, - "max_items": 80, - "include_keywords": [ - "wanted", - "suspect", - "fraud", - "money laundering", - "appeal" - ], + "feed_url": "https://www.bsi.bund.de/SiteGlobals/Functions/RSSFeed/RSSNewsfeed/RSSNewsfeed_KRITIS-aktuell.xml", + "category": "cyber_advisory", + "region_tag": "DE", + "lat": 50.74, + "lng": 7.1, "reporting": { - "label": "Report to EFCC", - "url": "https://www.efcc.gov.ng/efcc/records/red-alert" - } + "label": "Report to BSI", + "url": "https://www.bsi.bund.de/DE/IT-Sicherheitsvorfall/it-sicherheitsvorfall_node.html" + }, + "max_items": 15 }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", + "promotion_status": "rejected", + "rejection_reason": "Product certification feed (NESAS audit/evaluation docs), not security advisories", "source": { - "source_id": "icpc-ng-wanted", - "authority_name": "ICPC Nigeria Wanted", - "country": "Nigeria", - "country_code": "NG", - "region": "Africa", - "authority_type": "police", - "base_url": "https://icpc.gov.ng" + "source_id": "bsi-nesas", + "authority_name": "BSI NESAS", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.bsi.bund.de" }, - "feed_url": "https://icpc.gov.ng/wanted-persons/", - "category": "wanted_suspect", - "region_tag": "NG", - "lat": 9.082, - "lng": 8.6753, - "max_items": 80, - "include_keywords": [ - "wanted", - "persons", - "suspect", - "fugitive" - ], + "feed_url": "https://www.bsi.bund.de/SiteGlobals/Functions/RSSFeed/RSSNewsfeed/RSSNewsfeed_NESAS.xml", + "category": "cyber_advisory", + "region_tag": "DE", + "lat": 50.72, + "lng": 7.1, "reporting": { - "label": "Report to ICPC", - "url": "https://icpc.gov.ng/report-corruption/" - } + "label": "Report to BSI", + "url": "https://www.bsi.bund.de/DE/IT-Sicherheitsvorfall/it-sicherheitsvorfall_node.html" + }, + "max_items": 15 }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "saps-newsroom-html", - "authority_name": "SAPS Newsroom", - "country": "South Africa", - "country_code": "ZA", - "region": "Africa", - "authority_type": "police", - "base_url": "https://www.saps.gov.za" + "source_id": "bsi-buergercert", + "authority_name": "BSI Bürger-CERT", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.bsi.bund.de" }, - "feed_url": "https://www.saps.gov.za/newsroom/newsroom.php", - "category": "public_appeal", - "region_tag": "ZA", - "lat": -25.747868, - "lng": 28.229271, - "max_items": 120, - "include_keywords": [ - "wanted", - "missing", - "suspect", - "appeal", - "crime", - "investigation", - "tip-off" - ], + "feed_url": "https://wid.cert-bund.de/content/public/buergercert/rss", + "category": "cyber_advisory", + "region_tag": "DE", + "lat": 50.74, + "lng": 7.09, "reporting": { - "label": "Report to SAPS Crime Stop", - "url": "https://www.saps.gov.za/crimestop/", - "phone": "08600 10111" - } + "label": "Report to BSI", + "url": "https://www.bsi.bund.de/DE/IT-Sicherheitsvorfall/it-sicherheitsvorfall_node.html" + }, + "max_items": 15 }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "missingchildren-za-html", - "authority_name": "Missing Children South Africa Cases", - "country": "South Africa", - "country_code": "ZA", - "region": "Africa", - "authority_type": "public_safety_program", - "base_url": "https://missingchildren.org.za" + "source_id": "sans-isc", + "authority_name": "SANS Internet Storm Center", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "cert", + "base_url": "https://isc.sans.edu" }, - "feed_url": "https://missingchildren.org.za/category/cases/", - "feed_urls": [ - "https://missingchildren.org.za/category/cases/", - "https://missingchildren.org.za/category/missing-children/" - ], - "category": "missing_person", - "region_tag": "ZA", - "lat": -29, - "lng": 24, - "max_items": 120, - "include_keywords": [ - "missing", - "child", - "teen", - "appeal", - "case" - ], - "exclude_keywords": [ - "donate", - "event", - "newsletter" - ], + "feed_url": "https://isc.sans.edu/rssfeed.xml", + "category": "cyber_advisory", + "region_tag": "US", + "lat": 38.95, + "lng": -77.35, "reporting": { - "label": "Report Missing Person to MCSA", - "url": "https://missingchildren.org.za/report/" - } + "label": "Report to SANS ISC", + "url": "https://isc.sans.edu/contact.html" + }, + "max_items": 15 }, { "type": "rss", - "followRedirects": true, "source": { - "source_id": "ke-cirt-cc", - "authority_name": "KE-CIRT/CC", - "country": "Kenya", - "country_code": "KE", - "region": "Africa", + "source_id": "cert-at", + "authority_name": "CERT.at", + "country": "Austria", + "country_code": "AT", + "region": "Europe", "authority_type": "cert", - "base_url": "https://ke-cirt.go.ke" + "base_url": "https://cert.at" }, - "feed_url": "https://ke-cirt.go.ke/feed/", + "feed_url": "https://www.cert.at/cert-at.de.warnings.rss_2.0.xml", "category": "cyber_advisory", - "region_tag": "KE", - "lat": -1.286389, - "lng": 36.817223, - "max_items": 100, + "region_tag": "AT", + "lat": 48.21, + "lng": 16.37, "reporting": { - "label": "Report Incident to KE-CIRT/CC", - "url": "https://ke-cirt.go.ke/incident-reporting/" - } + "label": "Report to CERT.at", + "url": "https://cert.at/de/melden" + }, + "max_items": 15 }, { "type": "rss", - "followRedirects": true, "source": { - "source_id": "cert-mu", - "authority_name": "CERT-MU Mauritius", - "country": "Mauritius", - "country_code": "MU", - "region": "Africa", + "source_id": "ccn-cert-es", + "authority_name": "CCN-CERT Spain", + "country": "Spain", + "country_code": "ES", + "region": "Europe", "authority_type": "cert", - "base_url": "https://cert-mu.govmu.org" + "base_url": "https://www.ccn-cert.cni.es" }, - "feed_url": "https://cert-mu.govmu.org/rss", - "feed_urls": [ - "https://cert-mu.govmu.org/rss", - "https://cert-mu.govmu.org/English/Pages/rss.aspx" - ], + "feed_url": "https://www.ccn-cert.cni.es/es/seguridad-al-dia/alertas-ccn-cert.html?format=feed&type=rss", "category": "cyber_advisory", - "region_tag": "MU", - "lat": -20.160891, - "lng": 57.501222, - "max_items": 100, + "region_tag": "ES", + "lat": 40.42, + "lng": -3.7, "reporting": { - "label": "Report Incident to CERT-MU", - "url": "https://cert-mu.govmu.org/Pages/incident-reporting.aspx" - } + "label": "Report to CCN-CERT", + "url": "https://www.ccn-cert.cni.es/es/comunicacion-eventos/contacto.html" + }, + "max_items": 15 }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "ngcert-ng-bulletins", - "authority_name": "ngCERT Nigeria Bulletins", - "country": "Nigeria", - "country_code": "NG", - "region": "Africa", + "source_id": "cert-se", + "authority_name": "CERT-SE", + "country": "Sweden", + "country_code": "SE", + "region": "Europe", "authority_type": "cert", - "base_url": "https://www.cert.gov.ng" + "base_url": "https://www.cert.se" }, - "feed_url": "https://www.cert.gov.ng/", + "feed_url": "https://www.cert.se/feed/", "category": "cyber_advisory", - "region_tag": "NG", - "lat": 9.082, - "lng": 8.6753, - "max_items": 100, - "include_keywords": [ - "advisory", - "alert", - "threat", - "vulnerability", - "incident" - ], + "region_tag": "SE", + "lat": 59.33, + "lng": 18.07, "reporting": { - "label": "Report Incident to ngCERT", - "url": "https://www.cert.gov.ng/" - } + "label": "Report to CERT-SE", + "url": "https://www.cert.se/om-cert-se/rapportera-en-incident/" + }, + "max_items": 15 }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "cert-tn-bulletins", - "authority_name": "Tunisia CERT Bulletins", - "country": "Tunisia", - "country_code": "TN", - "region": "Africa", + "source_id": "cert-pl", + "authority_name": "CERT.PL", + "country": "Poland", + "country_code": "PL", + "region": "Europe", "authority_type": "cert", - "base_url": "https://www.cert.tn" + "base_url": "https://cert.pl" }, - "feed_url": "https://www.cert.tn/", + "feed_url": "https://cert.pl/en/rss.xml", "category": "cyber_advisory", - "region_tag": "TN", - "lat": 33.886917, - "lng": 9.537499, - "max_items": 100, - "include_keywords": [ - "alerte", - "advisory", - "vuln", - "threat", - "incident", - "cyber" - ], + "region_tag": "PL", + "lat": 52.23, + "lng": 21.01, "reporting": { - "label": "Report Incident to CERT-TN", - "url": "https://www.cert.tn/" - } + "label": "Report to CERT.PL", + "url": "https://incydent.cert.pl/" + }, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "cert-lv", + "authority_name": "CERT.LV", + "country": "Latvia", + "country_code": "LV", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.cert.lv" + }, + "feed_url": "https://www.cert.lv/en/rss/news.xml", + "category": "cyber_advisory", + "region_tag": "LV", + "lat": 56.95, + "lng": 24.11, + "reporting": { + "label": "Report to CERT.LV", + "url": "https://www.cert.lv/en/report" + }, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "cert-hr", + "authority_name": "CERT.hr", + "country": "Croatia", + "country_code": "HR", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.cert.hr" + }, + "feed_url": "https://www.cert.hr/feed/", + "category": "cyber_advisory", + "region_tag": "HR", + "lat": 45.81, + "lng": 15.98, + "reporting": { + "label": "Report to CERT.hr", + "url": "https://www.cert.hr/prijava-incidenta/" + }, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "si-cert", + "authority_name": "SI-CERT", + "country": "Slovenia", + "country_code": "SI", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.cert.si" + }, + "feed_url": "https://www.cert.si/en/feed/", + "category": "cyber_advisory", + "region_tag": "SI", + "lat": 46.06, + "lng": 14.51, + "reporting": { + "label": "Report to SI-CERT", + "url": "https://www.cert.si/en/report/" + }, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "dnsc-ro", + "authority_name": "DNSC Romania", + "country": "Romania", + "country_code": "RO", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://dnsc.ro" + }, + "feed_url": "https://dnsc.ro/feed", + "category": "cyber_advisory", + "region_tag": "RO", + "lat": 44.43, + "lng": 26.1, + "reporting": { + "label": "Report to DNSC", + "url": "https://dnsc.ro/sesizare" + }, + "max_items": 15 }, { "type": "html-list", - "followRedirects": true, "source": { - "source_id": "mp-gt-casos", - "authority_name": "Ministerio Publico Guatemala Casos", - "country": "Guatemala", - "country_code": "GT", - "region": "North America", - "authority_type": "regulatory", - "base_url": "https://www.mp.gob.gt" + "source_id": "ncsc-ch", + "authority_name": "NCSC Switzerland", + "country": "Switzerland", + "country_code": "CH", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.ncsc.admin.ch" + }, + "feed_url": "https://www.ncsc.admin.ch/ncsc/en/home/aktuell/aktuelle-vorfaelle.html", + "category": "cyber_advisory", + "region_tag": "CH", + "lat": 46.95, + "lng": 7.45, + "reporting": { + "label": "Report to NCSC", + "url": "https://www.ncsc.admin.ch/ncsc/en/home/infos-fuer/infos-it-spezialisten/themen/schwachstelle-melden.html" }, - "feed_url": "https://www.mp.gob.gt/noticias/", + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "politi-dk", + "authority_name": "Danish Police", + "country": "Denmark", + "country_code": "DK", + "region": "Europe", + "authority_type": "police", + "base_url": "https://politi.dk" + }, + "feed_url": "https://politi.dk/rss", "feed_urls": [ - "https://www.mp.gob.gt/noticias/", - "https://www.mp.gob.gt/category/noticias/", - "https://www.mp.gob.gt" + "https://politi.dk/rss", + "https://politi.dk/nyheder/rss" ], "category": "public_appeal", - "region_tag": "GT", - "lat": 14.634915, - "lng": -90.506882, - "max_items": 120, - "include_keywords": [ - "desaparec", - "secuestro", - "se busca", - "captura", - "investigacion", - "denuncia", - "fiscalia", - "homicidio" - ], - "exclude_keywords": [ - "evento", - "licitacion", - "boletin", - "agenda" - ], + "region_tag": "DK", + "lat": 55.68, + "lng": 12.57, "reporting": { - "label": "Report to MP Guatemala", - "url": "https://www.mp.gob.gt" + "label": "Report to Danish Police", + "url": "https://politi.dk/anmeld", + "phone": "114" } }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "mp-hn-casos", - "authority_name": "Ministerio Publico Honduras Casos", - "country": "Honduras", - "country_code": "HN", - "region": "North America", - "authority_type": "regulatory", - "base_url": "https://www.mp.hn" + "source_id": "poliisi-fi", + "authority_name": "Finnish Police", + "country": "Finland", + "country_code": "FI", + "region": "Europe", + "authority_type": "police", + "base_url": "https://poliisi.fi" }, - "feed_url": "https://www.mp.hn/category/noticias/", + "feed_url": "https://poliisi.fi/en/rss/news", "feed_urls": [ - "https://www.mp.hn/category/noticias/", - "https://www.mp.hn/noticias/", - "https://www.mp.hn" + "https://poliisi.fi/en/rss/news", + "https://poliisi.fi/rss/uutiset" ], "category": "public_appeal", - "region_tag": "HN", - "lat": 14.0818, - "lng": -87.2068, - "max_items": 120, - "include_keywords": [ - "desaparec", - "secuestro", - "captura", - "investigacion", - "denuncia", - "homicidio", - "fiscalia" - ], - "exclude_keywords": [ - "evento", - "agenda", - "licitacion" - ], + "region_tag": "FI", + "lat": 60.17, + "lng": 24.94, "reporting": { - "label": "Report to MP Honduras", - "url": "https://www.mp.hn" + "label": "Report to Finnish Police", + "url": "https://poliisi.fi/en/report-a-crime", + "phone": "112" } }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "fgr-sv-casos", - "authority_name": "Fiscalia El Salvador Casos", - "country": "El Salvador", - "country_code": "SV", - "region": "North America", - "authority_type": "regulatory", - "base_url": "https://www.fiscalia.gob.sv" + "source_id": "garda-ie", + "authority_name": "An Garda Síochána", + "country": "Ireland", + "country_code": "IE", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.garda.ie" }, - "feed_url": "https://www.fiscalia.gob.sv/category/noticias/", - "feed_urls": [ - "https://www.fiscalia.gob.sv/category/noticias/", - "https://www.fiscalia.gob.sv/noticias/", - "https://www.fiscalia.gob.sv" - ], + "feed_url": "https://www.garda.ie/en/about-us/our-departments/office-of-corporate-communications/press-releases/", "category": "public_appeal", - "region_tag": "SV", - "lat": 13.6929, - "lng": -89.2182, - "max_items": 120, - "include_keywords": [ - "desaparec", - "secuestro", - "se busca", - "captura", - "informacion", - "denuncia", - "fiscalia", - "homicidio" - ], - "exclude_keywords": [ - "evento", - "agenda", - "acto protocolario" - ], + "region_tag": "IE", + "lat": 53.34, + "lng": -6.26, "reporting": { - "label": "Report to FGR El Salvador", - "url": "https://www.fiscalia.gob.sv" + "label": "Report to An Garda Síochána", + "url": "https://www.garda.ie/en/contact-us/", + "phone": "999 / 112" } }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "oij-cr-casos", - "authority_name": "OIJ Costa Rica Casos", - "country": "Costa Rica", - "country_code": "CR", - "region": "North America", - "authority_type": "public_safety_program", - "base_url": "https://sitiooij.poder-judicial.go.cr" + "source_id": "federale-politie-be", + "authority_name": "Belgian Federal Police", + "country": "Belgium", + "country_code": "BE", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.police.be" }, - "feed_url": "https://sitiooij.poder-judicial.go.cr/index.php/oficinas-y-departamentos/desaparecidos", + "feed_url": "https://www.police.be/feeds/rss", "feed_urls": [ - "https://sitiooij.poder-judicial.go.cr/index.php/oficinas-y-departamentos/desaparecidos", - "https://sitiooij.poder-judicial.go.cr/index.php/noticias", - "https://sitiooij.poder-judicial.go.cr" - ], - "category": "missing_person", - "region_tag": "CR", - "lat": 9.9325, - "lng": -84.0833, - "max_items": 120, - "include_keywords": [ - "desaparec", - "persona", - "se busca", - "informacion", - "auxilio", - "oij" - ], - "exclude_keywords": [ - "licitacion", - "convocatoria", - "boletin" + "https://www.police.be/feeds/rss", + "https://www.politie.be/feeds/rss" ], + "category": "public_appeal", + "region_tag": "BE", + "lat": 50.85, + "lng": 4.35, "reporting": { - "label": "Report to OIJ Costa Rica", - "url": "https://sitiooij.poder-judicial.go.cr" + "label": "Report to Belgian Police", + "url": "https://www.police.be/fr/contact", + "phone": "101 / 112" + } + }, + { + "type": "rss", + "source": { + "source_id": "pj-pt", + "authority_name": "Polícia Judiciária", + "country": "Portugal", + "country_code": "PT", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.policiajudiciaria.pt" + }, + "feed_url": "https://www.policiajudiciaria.pt/feed/", + "category": "public_appeal", + "region_tag": "PT", + "lat": 38.72, + "lng": -9.14, + "reporting": { + "label": "Report to PJ", + "url": "https://www.policiajudiciaria.pt/contactos/", + "phone": "112" } }, { "type": "html-list", + "source": { + "source_id": "cert-ee", + "authority_name": "CERT-EE", + "country": "Estonia", + "country_code": "EE", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.ria.ee" + }, + "feed_url": "https://www.ria.ee/kuberturvalisus/kuberintsidentide-kasitlemine-cert-ee", + "category": "cyber_advisory", + "region_tag": "EE", + "lat": 59.44, + "lng": 24.75, + "reporting": { + "label": "Report to CERT-EE", + "url": "https://www.ria.ee/en/cyber-security/cert-ee/reporting-incident" + }, + "max_items": 15 + }, + { + "type": "html-list", + "source": { + "source_id": "nksc-lt", + "authority_name": "NKSC Lithuania", + "country": "Lithuania", + "country_code": "LT", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.nksc.lt" + }, + "feed_url": "https://www.nksc.lt/en/threats.html", + "category": "cyber_advisory", + "region_tag": "LT", + "lat": 54.69, + "lng": 25.28, + "reporting": { + "label": "Report to NKSC", + "url": "https://www.nksc.lt/en/report.html" + }, + "max_items": 15 + }, + { + "type": "html-list", + "source": { + "source_id": "ncsa-gr", + "authority_name": "NCSA Greece", + "country": "Greece", + "country_code": "GR", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.nis.gr" + }, + "feed_url": "https://www.nis.gr/el/national-cert/", + "category": "cyber_advisory", + "region_tag": "GR", + "lat": 37.98, + "lng": 23.73, + "reporting": { + "label": "Report to NCSA Greece", + "url": "https://www.nis.gr/el/contact/" + }, + "max_items": 15 + }, + { + "type": "html-list", + "source": { + "source_id": "govcert-bg", + "authority_name": "GovCERT Bulgaria", + "country": "Bulgaria", + "country_code": "BG", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.govcert.bg" + }, + "feed_url": "https://www.govcert.bg/", + "category": "cyber_advisory", + "region_tag": "BG", + "lat": 42.7, + "lng": 23.32, + "reporting": { + "label": "Report to GovCERT.bg", + "url": "https://www.govcert.bg/" + }, + "max_items": 15 + }, + { + "type": "travelwarning-json", "followRedirects": true, "source": { - "source_id": "mp-pa-casos", - "authority_name": "Ministerio Publico Panama Casos", - "country": "Panama", - "country_code": "PA", - "region": "North America", - "authority_type": "regulatory", - "base_url": "https://ministeriopublico.gob.pa" + "source_id": "de-aa-travel", + "authority_name": "German Federal Foreign Office", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.auswaertiges-amt.de" }, - "feed_url": "https://ministeriopublico.gob.pa/categoria/noticias/", - "feed_urls": [ - "https://ministeriopublico.gob.pa/categoria/noticias/", - "https://ministeriopublico.gob.pa/noticias/", - "https://ministeriopublico.gob.pa" - ], - "category": "public_appeal", - "region_tag": "PA", - "lat": 8.9824, - "lng": -79.5199, - "max_items": 120, - "include_keywords": [ - "desaparec", - "secuestro", - "se busca", - "captura", - "informacion", - "denuncia", - "investigacion" - ], - "exclude_keywords": [ - "evento", - "agenda", - "licitacion" - ], + "feed_url": "https://www.auswaertiges-amt.de/opendata/travelwarning", + "category": "travel_warning", + "region_tag": "DE", + "lat": 52.52, + "lng": 13.405, "reporting": { - "label": "Report to MP Panama", - "url": "https://ministeriopublico.gob.pa" + "label": "German Federal Foreign Office", + "url": "https://www.auswaertiges-amt.de/en/aussenpolitik/laenderinformationen", + "notes": "Travel and safety information for German citizens abroad." + } + }, + { + "type": "travelwarning-atom", + "source": { + "source_id": "uk-fcdo-travel", + "authority_name": "UK FCDO Travel Advice", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.gov.uk" + }, + "feed_url": "https://www.gov.uk/government/organisations/foreign-commonwealth-development-office.atom", + "category": "travel_warning", + "region_tag": "GB", + "lat": 51.5074, + "lng": -0.1278, + "reporting": { + "label": "UK FCDO Travel Advice", + "url": "https://www.gov.uk/foreign-travel-advice", + "notes": "Travel advice and safety information for British nationals." } }, { "type": "html-list", "followRedirects": true, "source": { - "source_id": "mp-ni-casos", - "authority_name": "Ministerio Publico Nicaragua Casos", - "country": "Nicaragua", - "country_code": "NI", + "source_id": "us-state-travel", + "authority_name": "US State Dept Travel Advisories", + "country": "United States", + "country_code": "US", "region": "North America", - "authority_type": "regulatory", - "base_url": "https://www.ministeriopublico.gob.ni" + "authority_type": "national_security", + "base_url": "https://travel.state.gov" }, - "feed_url": "https://www.ministeriopublico.gob.ni/noticias/", - "feed_urls": [ - "https://www.ministeriopublico.gob.ni/noticias/", - "https://www.ministeriopublico.gob.ni" - ], - "category": "public_appeal", - "region_tag": "NI", - "lat": 12.1364, - "lng": -86.2514, - "max_items": 120, + "feed_url": "https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories.html", + "category": "travel_warning", + "region_tag": "US", + "lat": 38.9, + "lng": -77.05, "include_keywords": [ - "desaparec", - "secuestro", - "captura", - "investigacion", - "denuncia", - "fiscalia", - "homicidio" - ], - "exclude_keywords": [ - "agenda", - "evento", - "boletin" + "travel advisory", + "level", + "do not travel", + "reconsider", + "exercise caution" ], "reporting": { - "label": "Report to MP Nicaragua", - "url": "https://www.ministeriopublico.gob.ni" + "label": "US State Department Travel Advisories", + "url": "https://travel.state.gov/content/travel/en/traveladvisories/traveladvisories.html", + "notes": "Travel advisories for US citizens abroad." } }, { - "type": "html-list", - "followRedirects": true, + "type": "interpol-red-json", + "source": { + "source_id": "interpol-red", + "authority_name": "INTERPOL Red Notices", + "country": "France", + "country_code": "FR", + "region": "International", + "authority_type": "police", + "base_url": "https://ws-public.interpol.int" + }, + "feed_url": "https://ws-public.interpol.int/notices/v1/red?resultPerPage=160&page=1", + "category": "wanted_suspect", + "region_tag": "INT", + "lat": 48.86, + "lng": 2.35, + "max_items": 160 + }, + { + "type": "interpol-yellow-json", + "source": { + "source_id": "interpol-yellow", + "authority_name": "INTERPOL Yellow Notices", + "country": "France", + "country_code": "FR", + "region": "International", + "authority_type": "police", + "base_url": "https://ws-public.interpol.int" + }, + "feed_url": "https://ws-public.interpol.int/notices/v1/yellow?resultPerPage=160&page=1", + "category": "missing_person", + "region_tag": "INT", + "lat": 48.86, + "lng": 2.35, + "max_items": 160 + }, + { + "type": "rss", + "promotion_status": "rejected", + "rejection_reason": "ICRC removed main RSS feed; replaced by icrc-ihl and icrc-field-ops", + "source": { + "source_id": "icrc-news", + "authority_name": "ICRC News", + "country": "Switzerland", + "country_code": "CH", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.icrc.org" + }, + "feed_url": "https://www.icrc.org/en/rss", + "category": "humanitarian_security", + "region_tag": "INT", + "lat": 46.22, + "lng": 6.14 + }, + { + "type": "rss", + "source": { + "source_id": "icrc-ihl", + "authority_name": "ICRC Humanitarian Law & Policy", + "country": "Switzerland", + "country_code": "CH", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://blogs.icrc.org/law-and-policy" + }, + "feed_url": "https://blogs.icrc.org/law-and-policy/feed/", + "category": "humanitarian_security", + "region_tag": "INT", + "lat": 46.22, + "lng": 6.14 + }, + { + "type": "rss", + "source": { + "source_id": "icrc-field-ops", + "authority_name": "ICRC Field Operations", + "country": "Switzerland", + "country_code": "CH", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://blogs.icrc.org/ilot" + }, + "feed_url": "https://blogs.icrc.org/ilot/feed/", + "category": "humanitarian_security", + "region_tag": "INT", + "lat": 46.22, + "lng": 6.14 + }, + { + "type": "rss", + "promotion_status": "rejected", + "rejection_reason": "UNHCR blocks RSS access (403); replaced by un-refugees feed", + "source": { + "source_id": "unhcr-news", + "authority_name": "UNHCR News", + "country": "Switzerland", + "country_code": "CH", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.unhcr.org" + }, + "feed_url": "https://www.unhcr.org/rss/news.xml", + "category": "humanitarian_security", + "region_tag": "INT", + "lat": 46.22, + "lng": 6.14 + }, + { + "type": "rss", + "source": { + "source_id": "who-disease-outbreak", + "authority_name": "WHO Disease Outbreak News", + "country": "Switzerland", + "country_code": "CH", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.who.int" + }, + "feed_url": "https://www.who.int/feeds/entity/don/en/rss.xml", + "category": "health_emergency", + "region_tag": "INT", + "lat": 46.22, + "lng": 6.14 + }, + { + "type": "rss", + "source": { + "source_id": "who-emergencies", + "authority_name": "WHO Health Emergencies", + "country": "Switzerland", + "country_code": "CH", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.who.int" + }, + "feed_url": "https://www.who.int/feeds/entity/hac/en/rss.xml", + "category": "health_emergency", + "region_tag": "INT", + "lat": 46.22, + "lng": 6.14 + }, + { + "type": "rss", + "source": { + "source_id": "reliefweb-updates", + "authority_name": "ReliefWeb Updates", + "country": "Global", + "country_code": "INT", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://reliefweb.int" + }, + "feed_url": "https://reliefweb.int/updates/rss.xml", + "category": "humanitarian_security", + "region_tag": "INT", + "lat": 40.75, + "lng": -73.97 + }, + { + "type": "rss", + "source": { + "source_id": "un-peacekeeping", + "authority_name": "UN Peacekeeping (Blue Helmets)", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "national_security", + "base_url": "https://peacekeeping.un.org" + }, + "feed_url": "https://peacekeeping.un.org/en/rss.xml", + "category": "conflict_monitoring", + "region_tag": "INT", + "lat": 40.75, + "lng": -73.97 + }, + { + "type": "rss", + "source": { + "source_id": "unocha", + "authority_name": "UN OCHA", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.unocha.org" + }, + "feed_url": "https://www.unocha.org/rss.xml", + "category": "humanitarian_security", + "region_tag": "INT", + "lat": 40.75, + "lng": -73.97 + }, + { + "type": "rss", + "source": { + "source_id": "un-peace-security", + "authority_name": "UN News Peace & Security", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "national_security", + "base_url": "https://news.un.org" + }, + "feed_url": "https://news.un.org/feed/subscribe/en/news/topic/peace-and-security/feed/rss.xml", + "category": "conflict_monitoring", + "region_tag": "INT", + "lat": 40.75, + "lng": -73.97 + }, + { + "type": "rss", + "source": { + "source_id": "un-refugees", + "authority_name": "UN News Refugees & Migrants", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://news.un.org" + }, + "feed_url": "https://news.un.org/feed/subscribe/en/news/topic/migrants-and-refugees/feed/rss.xml", + "category": "humanitarian_security", + "region_tag": "INT", + "lat": 40.75, + "lng": -73.97 + }, + { + "type": "rss", + "source": { + "source_id": "un-humanitarian-aid", + "authority_name": "UN News Humanitarian Aid", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://news.un.org" + }, + "feed_url": "https://news.un.org/feed/subscribe/en/news/topic/humanitarian-aid/feed/rss.xml", + "category": "humanitarian_tasking", + "region_tag": "INT", + "lat": 40.75, + "lng": -73.97 + }, + { + "type": "html-list", + "promotion_status": "rejected", + "rejection_reason": "JS-rendered navigation page, not a stable incident/tasking feed", + "source": { + "source_id": "hot-tasking", + "authority_name": "Humanitarian OpenStreetMap Team", + "country": "International", + "country_code": "INT", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.hotosm.org" + }, + "feed_url": "https://www.hotosm.org/projects/", + "category": "humanitarian_tasking", + "region_tag": "INT", + "lat": 40.71, + "lng": -74.01 + }, + { + "type": "html-list", + "promotion_status": "rejected", + "rejection_reason": "Navigation/blog content about mapping events, not operational tasking intelligence", + "source": { + "source_id": "missing-maps-tasking", + "authority_name": "Missing Maps", + "country": "International", + "country_code": "INT", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.missingmaps.org" + }, + "feed_url": "https://www.missingmaps.org/", + "category": "humanitarian_tasking", + "region_tag": "INT", + "lat": 51.51, + "lng": -0.13 + }, + { + "type": "rss", + "promotion_status": "rejected", + "rejection_reason": "WFP blocks RSS access (403); no alternative operational feed available", + "source": { + "source_id": "wfp-news", + "authority_name": "World Food Programme News", + "country": "Italy", + "country_code": "IT", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.wfp.org" + }, + "feed_url": "https://www.wfp.org/rss.xml", + "category": "humanitarian_security", + "region_tag": "INT", + "lat": 41.88, + "lng": 12.56 + }, + { + "type": "rss", + "source": { + "source_id": "msf-news", + "authority_name": "Médecins Sans Frontières", + "country": "Switzerland", + "country_code": "CH", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.msf.org" + }, + "feed_url": "https://www.msf.org/rss/all", + "category": "humanitarian_security", + "region_tag": "INT", + "lat": 46.22, + "lng": 6.14 + }, + { + "type": "rss", + "source": { + "source_id": "icg-crisiswatch", + "authority_name": "International Crisis Group", + "country": "Belgium", + "country_code": "BE", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.crisisgroup.org" + }, + "feed_url": "https://www.crisisgroup.org/rss.xml", + "category": "conflict_monitoring", + "region_tag": "INT", + "lat": 50.85, + "lng": 4.35 + }, + { + "type": "rss", + "source": { + "source_id": "sipri-news", + "authority_name": "SIPRI News", + "country": "Sweden", + "country_code": "SE", + "region": "Europe", + "authority_type": "public_safety_program", + "base_url": "https://www.sipri.org" + }, + "feed_url": "https://www.sipri.org/rss/combined.xml", + "category": "conflict_monitoring", + "region_tag": "INT", + "lat": 59.35, + "lng": 17.95 + }, + { + "type": "rss", + "source": { + "source_id": "gdacs-alerts", + "authority_name": "GDACS Natural Disaster Alerts", + "country": "Global", + "country_code": "INT", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.gdacs.org" + }, + "feed_url": "https://www.gdacs.org/xml/rss.xml", + "category": "emergency_management", + "region_tag": "INT", + "lat": 45.77, + "lng": 4.83 + }, + { + "type": "rss", + "source": { + "source_id": "cia-newsroom", + "authority_name": "CIA Newsroom", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "intelligence", + "base_url": "https://www.cia.gov" + }, + "feed_url": "https://www.cia.gov/rss/press-releases.xml", + "category": "intelligence_report", + "region_tag": "US", + "lat": 38.95, + "lng": -77.15, + "fetch_mode": "browser" + }, + { + "type": "rss", + "source": { + "source_id": "nsa-advisories", + "authority_name": "NSA Cybersecurity Advisories", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "intelligence", + "base_url": "https://www.nsa.gov" + }, + "feed_url": "https://www.nsa.gov/Press-Room/Press-Releases-Statements/RSS-Feed/", + "category": "cyber_advisory", + "region_tag": "US", + "lat": 39.11, + "lng": -76.77, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "mi5-news", + "authority_name": "MI5 News", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.mi5.gov.uk" + }, + "feed_url": "https://www.mi5.gov.uk/UKSecurityNews.xml", + "category": "intelligence_report", + "region_tag": "GB", + "lat": 51.5, + "lng": -0.13 + }, + { + "type": "rss", + "source": { + "source_id": "gchq-news", + "authority_name": "GCHQ News", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.gchq.gov.uk" + }, + "feed_url": "https://www.gchq.gov.uk/feeds/news.xml", + "category": "intelligence_report", + "region_tag": "GB", + "lat": 51.9, + "lng": -2.12 + }, + { + "type": "rss", + "source": { + "source_id": "bfv-news", + "authority_name": "BfV (Bundesamt für Verfassungsschutz)", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.verfassungsschutz.de" + }, + "feed_url": "https://www.verfassungsschutz.de/SiteGlobals/Functions/RSSFeed/DE/RSSNewsfeed/RSSNewsfeed.xml", + "category": "intelligence_report", + "region_tag": "DE", + "lat": 50.73, + "lng": 7.1 + }, + { + "type": "rss", + "source": { + "source_id": "bnd-news", + "authority_name": "BND (Bundesnachrichtendienst)", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.bnd.bund.de" + }, + "feed_url": "https://www.bnd.bund.de/SiteGlobals/Functions/RSSFeed/DE/RSSNewsfeed/RSSNewsfeed.xml", + "category": "intelligence_report", + "region_tag": "DE", + "lat": 52.52, + "lng": 13.38 + }, + { + "type": "rss", + "source": { + "source_id": "dgsi-news", + "authority_name": "DGSI (French Internal Intelligence)", + "country": "France", + "country_code": "FR", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.dgsi.interieur.gouv.fr" + }, + "feed_url": "https://www.dgsi.interieur.gouv.fr/rss.xml", + "category": "intelligence_report", + "region_tag": "FR", + "lat": 48.86, + "lng": 2.33 + }, + { + "type": "rss", + "source": { + "source_id": "aivd-news", + "authority_name": "AIVD (Dutch Intelligence)", + "country": "Netherlands", + "country_code": "NL", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.aivd.nl" + }, + "feed_url": "https://www.aivd.nl/rss/nieuws", + "category": "intelligence_report", + "region_tag": "NL", + "lat": 52.08, + "lng": 4.31 + }, + { + "type": "rss", + "source": { + "source_id": "sapo-news", + "authority_name": "SÄPO (Swedish Security Service)", + "country": "Sweden", + "country_code": "SE", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.sakerhetspolisen.se" + }, + "feed_url": "https://www.sakerhetspolisen.se/rss/nyheter.xml", + "category": "intelligence_report", + "region_tag": "SE", + "lat": 59.33, + "lng": 18.07 + }, + { + "type": "rss", + "source": { + "source_id": "asio-news", + "authority_name": "ASIO (Australian Security Intelligence)", + "country": "Australia", + "country_code": "AU", + "region": "Asia-Pacific", + "authority_type": "intelligence", + "base_url": "https://www.asio.gov.au" + }, + "feed_url": "https://www.asio.gov.au/rss.xml", + "category": "intelligence_report", + "region_tag": "AU", + "lat": -35.28, + "lng": 149.13 + }, + { + "type": "rss", + "source": { + "source_id": "csis-canada", + "authority_name": "CSIS (Canadian Intelligence)", + "country": "Canada", + "country_code": "CA", + "region": "North America", + "authority_type": "intelligence", + "base_url": "https://www.canada.ca/en/security-intelligence-service" + }, + "feed_url": "https://www.canada.ca/en/security-intelligence-service.atom.xml", + "category": "intelligence_report", + "region_tag": "CA", + "lat": 45.42, + "lng": -75.69 + }, + { + "type": "rss", + "source": { + "source_id": "ecdc-news", + "authority_name": "ECDC Disease Threats", + "country": "Sweden", + "country_code": "SE", + "region": "Europe", + "authority_type": "public_safety_program", + "base_url": "https://www.ecdc.europa.eu" + }, + "feed_url": "https://www.ecdc.europa.eu/en/taxonomy/term/1/feed", + "category": "health_emergency", + "region_tag": "EU", + "lat": 59.35, + "lng": 17.94 + }, + { + "type": "rss", + "source": { + "source_id": "cdc-emergency", + "authority_name": "CDC Emergency Preparedness", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "public_safety_program", + "base_url": "https://www.cdc.gov" + }, + "feed_url": "https://tools.cdc.gov/api/v2/resources/media/403420.rss", + "category": "health_emergency", + "region_tag": "US", + "lat": 33.8, + "lng": -84.32 + }, + { + "type": "rss", + "source": { + "source_id": "promed-alerts", + "authority_name": "ProMED Alerts", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://promedmail.org" + }, + "feed_url": "https://promedmail.org/feed/", + "category": "health_emergency", + "region_tag": "INT", + "lat": 42.36, + "lng": -71.06 + }, + { + "type": "rss", + "source": { + "source_id": "fema-alerts", + "authority_name": "FEMA Disaster Declarations", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "public_safety_program", + "base_url": "https://www.fema.gov" + }, + "feed_url": "https://www.fema.gov/feeds/disasters", + "category": "emergency_management", + "region_tag": "US", + "lat": 38.88, + "lng": -77.02 + }, + { + "type": "rss", + "source": { + "source_id": "eu-ercc", + "authority_name": "EU Emergency Response (ERCC)", + "country": "Belgium", + "country_code": "BE", + "region": "Europe", + "authority_type": "public_safety_program", + "base_url": "https://ercc.dgecho.eu" + }, + "feed_url": "https://erccportal.jrc.ec.europa.eu/ECHO-Flash/RSS-Feeds/RSS-Latest-Emergencies", + "category": "emergency_management", + "region_tag": "EU", + "lat": 50.85, + "lng": 4.38 + }, + { + "type": "rss", + "source": { + "source_id": "usgs-earthquake", + "authority_name": "USGS Earthquake Alerts", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://earthquake.usgs.gov" + }, + "feed_url": "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/significant_week.atom", + "category": "emergency_management", + "region_tag": "INT", + "lat": 37.77, + "lng": -122.42 + }, + { + "type": "rss", + "source": { + "source_id": "ca-travel-advisories", + "authority_name": "Canada Travel Advisories", + "country": "Canada", + "country_code": "CA", + "region": "North America", + "authority_type": "national_security", + "base_url": "https://travel.gc.ca" + }, + "feed_url": "https://travel.gc.ca/travelling/advisories.atom", + "category": "travel_warning", + "region_tag": "CA", + "lat": 45.42, + "lng": -75.69 + }, + { + "type": "rss", + "source": { + "source_id": "au-smartraveller", + "authority_name": "Australia Smartraveller", + "country": "Australia", + "country_code": "AU", + "region": "Asia-Pacific", + "authority_type": "national_security", + "base_url": "https://www.smartraveller.gov.au" + }, + "feed_url": "https://www.smartraveller.gov.au/rss/countries", + "category": "travel_warning", + "region_tag": "AU", + "lat": -35.28, + "lng": 149.13 + }, + { + "type": "rss", + "source": { + "source_id": "nz-safetravel", + "authority_name": "NZ SafeTravel Advisories", + "country": "New Zealand", + "country_code": "NZ", + "region": "Asia-Pacific", + "authority_type": "national_security", + "base_url": "https://safetravel.govt.nz" + }, + "feed_url": "https://safetravel.govt.nz/rss.xml", + "category": "travel_warning", + "region_tag": "NZ", + "lat": -41.29, + "lng": 174.78 + }, + { + "type": "rss", + "source": { + "source_id": "fr-diplomatie-travel", + "authority_name": "France Diplomatie Travel Advice", + "country": "France", + "country_code": "FR", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.diplomatie.gouv.fr" + }, + "feed_url": "https://www.diplomatie.gouv.fr/spip.php?page=backend-fd&lang=fr", + "category": "travel_warning", + "region_tag": "FR", + "lat": 48.86, + "lng": 2.35 + }, + { + "type": "rss", + "source": { + "source_id": "eurojust-news", + "authority_name": "Eurojust Press Releases", + "country": "Netherlands", + "country_code": "NL", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.eurojust.europa.eu" + }, + "feed_url": "https://www.eurojust.europa.eu/rss.xml", + "category": "public_appeal", + "region_tag": "EU", + "lat": 52.09, + "lng": 4.27 + }, + { + "type": "rss", + "source": { + "source_id": "frontex-news", + "authority_name": "Frontex News", + "country": "Poland", + "country_code": "PL", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.frontex.europa.eu" + }, + "feed_url": "https://www.frontex.europa.eu/rss/", + "category": "public_safety", + "region_tag": "EU", + "lat": 52.23, + "lng": 21.01 + }, + { + "type": "rss", + "source": { + "source_id": "dhs-news", + "authority_name": "DHS Press Releases", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "national_security", + "base_url": "https://www.dhs.gov" + }, + "feed_url": "https://www.dhs.gov/news-releases/press-releases/rss.xml", + "category": "public_safety", + "region_tag": "US", + "lat": 38.88, + "lng": -77.02, + "fetch_mode": "browser" + }, + { + "type": "rss", + "source": { + "source_id": "doj-press", + "authority_name": "US DOJ Press Releases", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "national_security", + "base_url": "https://www.justice.gov" + }, + "feed_url": "https://www.justice.gov/feeds/opa/justice-news.xml", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.89, + "lng": -77.02 + }, + { + "type": "rss", + "promotion_status": "rejected", + "rejection_reason": "FBI removed RSS feeds; news/press releases not available via API (API only serves wanted persons). Covered by fbi-wanted, fbi-mostwanted, fbi-seeking, fbi-kidnappings.", + "source": { + "source_id": "fbi-news", + "authority_name": "FBI News", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.fbi.gov" + }, + "feed_url": "https://www.fbi.gov/feeds/fbi-news-feed/rss.xml", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.89, + "lng": -77.02 + }, + { + "type": "rss", + "promotion_status": "rejected", + "rejection_reason": "NATO removed all public RSS feeds; only email newsletters available now", + "source": { + "source_id": "nato-news", + "authority_name": "NATO News", + "country": "Belgium", + "country_code": "BE", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.nato.int" + }, + "feed_url": "https://www.nato.int/cps/en/natolive/news.xml", + "category": "conflict_monitoring", + "region_tag": "INT", + "lat": 50.88, + "lng": 4.43 + }, + { + "type": "rss", + "source": { + "source_id": "un-sc-press", + "authority_name": "UN Security Council Press", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "national_security", + "base_url": "https://www.un.org" + }, + "feed_url": "https://press.un.org/en/rss.xml", + "category": "conflict_monitoring", + "region_tag": "INT", + "lat": 40.75, + "lng": -73.97 + }, + { + "type": "rss", + "promotion_status": "rejected", + "rejection_reason": "OSCE removed all public RSS feeds; redirects to email subscription", + "source": { + "source_id": "osce-news", + "authority_name": "OSCE Press Releases", + "country": "Austria", + "country_code": "AT", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.osce.org" + }, + "feed_url": "https://www.osce.org/rss.xml", + "category": "conflict_monitoring", + "region_tag": "EU", + "lat": 48.21, + "lng": 16.37 + }, + { + "type": "rss", + "source": { + "source_id": "ct-europol", + "authority_name": "Europol Counter-Terrorism", + "country": "Netherlands", + "country_code": "NL", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.europol.europa.eu" + }, + "feed_url": "https://www.europol.europa.eu/taxonomy/term/69/feed", + "category": "terrorism_tip", + "region_tag": "EU", + "lat": 52.09, + "lng": 4.27 + }, + { + "type": "rss", + "source": { + "source_id": "mi5-threats", + "authority_name": "MI5 Threat Assessment", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.mi5.gov.uk" + }, + "feed_url": "https://www.mi5.gov.uk/threat-levels.xml", + "category": "terrorism_tip", + "region_tag": "GB", + "lat": 51.5, + "lng": -0.13 + }, + { + "type": "rss", + "source": { + "source_id": "cert-ua", + "authority_name": "CERT-UA", + "country": "Ukraine", + "country_code": "UA", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://cert.gov.ua" + }, + "feed_url": "https://cert.gov.ua/api/articles/rss", + "category": "cyber_advisory", + "region_tag": "UA", + "lat": 50.45, + "lng": 30.52, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "ua-nsdc", + "authority_name": "NSDC Ukraine", + "country": "Ukraine", + "country_code": "UA", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.rnbo.gov.ua" + }, + "feed_url": "https://www.rnbo.gov.ua/en/rss.xml", + "category": "public_safety", + "region_tag": "UA", + "lat": 50.45, + "lng": 30.52 + }, + { + "type": "rss", + "source": { + "source_id": "ua-ssu", + "authority_name": "SBU (Security Service of Ukraine)", + "country": "Ukraine", + "country_code": "UA", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://ssu.gov.ua" + }, + "feed_url": "https://ssu.gov.ua/en/rss", + "category": "intelligence_report", + "region_tag": "UA", + "lat": 50.45, + "lng": 30.52 + }, + { + "type": "rss", + "source": { + "source_id": "ua-npu", + "authority_name": "National Police of Ukraine", + "country": "Ukraine", + "country_code": "UA", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.npu.gov.ua" + }, + "feed_url": "https://www.npu.gov.ua/rss", + "category": "public_appeal", + "region_tag": "UA", + "lat": 50.45, + "lng": 30.52 + }, + { + "type": "rss", + "source": { + "source_id": "ru-gov-cert", + "authority_name": "GOV-CERT Russia", + "country": "Russia", + "country_code": "RU", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.gov-cert.ru" + }, + "feed_url": "https://www.gov-cert.ru/rss", + "category": "cyber_advisory", + "region_tag": "RU", + "lat": 55.75, + "lng": 37.62, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "ru-mvd", + "authority_name": "MVD Russia (Interior Ministry)", + "country": "Russia", + "country_code": "RU", + "region": "Europe", + "authority_type": "police", + "base_url": "https://мвд.рф" + }, + "feed_url": "https://мвд.рф/news/feed", + "category": "public_appeal", + "region_tag": "RU", + "lat": 55.75, + "lng": 37.62 + }, + { + "type": "rss", + "source": { + "source_id": "no-pst", + "authority_name": "PST (Norwegian Police Security)", + "country": "Norway", + "country_code": "NO", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.pst.no" + }, + "feed_url": "https://www.pst.no/rss/", + "category": "intelligence_report", + "region_tag": "NO", + "lat": 59.91, + "lng": 10.75 + }, + { + "type": "rss", + "source": { + "source_id": "no-nsm", + "authority_name": "NSM NorCERT", + "country": "Norway", + "country_code": "NO", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://nsm.no" + }, + "feed_url": "https://nsm.no/aktuelt/feed/", + "category": "cyber_advisory", + "region_tag": "NO", + "lat": 59.91, + "lng": 10.75, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "no-politiet", + "authority_name": "Norwegian Police", + "country": "Norway", + "country_code": "NO", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.politiet.no" + }, + "feed_url": "https://www.politiet.no/rss/", + "category": "public_appeal", + "region_tag": "NO", + "lat": 59.91, + "lng": 10.75 + }, + { + "type": "rss", + "source": { + "source_id": "dk-pet", + "authority_name": "PET (Danish Security Intelligence)", + "country": "Denmark", + "country_code": "DK", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://pet.dk" + }, + "feed_url": "https://pet.dk/rss", + "category": "intelligence_report", + "region_tag": "DK", + "lat": 55.68, + "lng": 12.57 + }, + { + "type": "rss", + "source": { + "source_id": "dk-cfcs", + "authority_name": "CFCS (Danish Centre for Cyber Security)", + "country": "Denmark", + "country_code": "DK", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.cfcs.dk" + }, + "feed_url": "https://www.cfcs.dk/da/nyheder/feed/", + "category": "cyber_advisory", + "region_tag": "DK", + "lat": 55.68, + "lng": 12.57, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "dk-politi", + "authority_name": "Danish Police", + "country": "Denmark", + "country_code": "DK", + "region": "Europe", + "authority_type": "police", + "base_url": "https://politi.dk" + }, + "feed_url": "https://politi.dk/rss", + "category": "public_appeal", + "region_tag": "DK", + "lat": 55.68, + "lng": 12.57 + }, + { + "type": "rss", + "source": { + "source_id": "fi-supo", + "authority_name": "SUPO (Finnish Security Intelligence)", + "country": "Finland", + "country_code": "FI", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://supo.fi" + }, + "feed_url": "https://supo.fi/en/rss/news", + "category": "intelligence_report", + "region_tag": "FI", + "lat": 60.17, + "lng": 24.94 + }, + { + "type": "rss", + "source": { + "source_id": "fi-ncsc", + "authority_name": "NCSC-FI (Traficom)", + "country": "Finland", + "country_code": "FI", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.kyberturvallisuuskeskus.fi" + }, + "feed_url": "https://www.kyberturvallisuuskeskus.fi/en/rss/news", + "category": "cyber_advisory", + "region_tag": "FI", + "lat": 60.17, + "lng": 24.94, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "fi-poliisi", + "authority_name": "Finnish Police", + "country": "Finland", + "country_code": "FI", + "region": "Europe", + "authority_type": "police", + "base_url": "https://poliisi.fi" + }, + "feed_url": "https://poliisi.fi/en/rss/news", + "category": "public_appeal", + "region_tag": "FI", + "lat": 60.17, + "lng": 24.94 + }, + { + "type": "rss", + "source": { + "source_id": "is-logreglan", + "authority_name": "Icelandic Police", + "country": "Iceland", + "country_code": "IS", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.logreglan.is" + }, + "feed_url": "https://www.logreglan.is/feed/", + "category": "public_appeal", + "region_tag": "IS", + "lat": 64.15, + "lng": -21.94 + }, + { + "type": "rss", + "source": { + "source_id": "pl-abw", + "authority_name": "ABW (Polish Internal Security Agency)", + "country": "Poland", + "country_code": "PL", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.abw.gov.pl" + }, + "feed_url": "https://www.abw.gov.pl/en/rss", + "category": "intelligence_report", + "region_tag": "PL", + "lat": 52.23, + "lng": 21.01 + }, + { + "type": "rss", + "source": { + "source_id": "pl-policja", + "authority_name": "Polish Police", + "country": "Poland", + "country_code": "PL", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.policja.pl" + }, + "feed_url": "https://www.policja.pl/pol/rss/1,RSS.html", + "category": "public_appeal", + "region_tag": "PL", + "lat": 52.23, + "lng": 21.01 + }, + { + "type": "rss", + "source": { + "source_id": "cz-nukib", + "authority_name": "NUKIB (Czech Cyber Security)", + "country": "Czech Republic", + "country_code": "CZ", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://nukib.gov.cz" + }, + "feed_url": "https://nukib.gov.cz/en/rss/", + "category": "cyber_advisory", + "region_tag": "CZ", + "lat": 50.08, + "lng": 14.42, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "cz-policie", + "authority_name": "Czech Police", + "country": "Czech Republic", + "country_code": "CZ", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.policie.cz" + }, + "feed_url": "https://www.policie.cz/rss/aktuality.aspx", + "category": "public_appeal", + "region_tag": "CZ", + "lat": 50.08, + "lng": 14.42 + }, + { + "type": "rss", + "source": { + "source_id": "hu-police", + "authority_name": "Hungarian Police", + "country": "Hungary", + "country_code": "HU", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.police.hu" + }, + "feed_url": "https://www.police.hu/rss", + "category": "public_appeal", + "region_tag": "HU", + "lat": 47.5, + "lng": 19.04 + }, + { + "type": "rss", + "source": { + "source_id": "ro-politia", + "authority_name": "Romanian Police", + "country": "Romania", + "country_code": "RO", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.politiaromana.ro" + }, + "feed_url": "https://www.politiaromana.ro/ro/rss", + "category": "public_appeal", + "region_tag": "RO", + "lat": 44.43, + "lng": 26.1 + }, + { + "type": "rss", + "source": { + "source_id": "sk-police", + "authority_name": "Slovak Police", + "country": "Slovakia", + "country_code": "SK", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.minv.sk" + }, + "feed_url": "https://www.minv.sk/?rss", + "category": "public_appeal", + "region_tag": "SK", + "lat": 48.15, + "lng": 17.11 + }, + { + "type": "rss", + "source": { + "source_id": "bg-mvr", + "authority_name": "Bulgarian Interior Ministry", + "country": "Bulgaria", + "country_code": "BG", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.mvr.bg" + }, + "feed_url": "https://www.mvr.bg/rss", + "category": "public_appeal", + "region_tag": "BG", + "lat": 42.7, + "lng": 23.32 + }, + { + "type": "rss", + "source": { + "source_id": "rs-mup", + "authority_name": "Serbian Interior Ministry", + "country": "Serbia", + "country_code": "RS", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.mup.gov.rs" + }, + "feed_url": "https://www.mup.gov.rs/wps/rss", + "category": "public_appeal", + "region_tag": "RS", + "lat": 44.79, + "lng": 20.46 + }, + { + "type": "rss", + "source": { + "source_id": "ee-kapo", + "authority_name": "KAPO (Estonian Internal Security)", + "country": "Estonia", + "country_code": "EE", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://kapo.ee" + }, + "feed_url": "https://kapo.ee/en/feed", + "category": "intelligence_report", + "region_tag": "EE", + "lat": 59.44, + "lng": 24.75 + }, + { + "type": "rss", + "source": { + "source_id": "lt-vsd", + "authority_name": "VSD (Lithuanian State Security)", + "country": "Lithuania", + "country_code": "LT", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.vsd.lt" + }, + "feed_url": "https://www.vsd.lt/en/feed/", + "category": "intelligence_report", + "region_tag": "LT", + "lat": 54.69, + "lng": 25.28 + }, + { + "type": "rss", + "source": { + "source_id": "lv-dp", + "authority_name": "Latvian State Police", + "country": "Latvia", + "country_code": "LV", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.vp.gov.lv" + }, + "feed_url": "https://www.vp.gov.lv/lv/rss", + "category": "public_appeal", + "region_tag": "LV", + "lat": 56.95, + "lng": 24.11 + }, + { + "type": "rss", + "source": { + "source_id": "ee-politsei", + "authority_name": "Estonian Police", + "country": "Estonia", + "country_code": "EE", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.politsei.ee" + }, + "feed_url": "https://www.politsei.ee/en/rss", + "category": "public_appeal", + "region_tag": "EE", + "lat": 59.44, + "lng": 24.75 + }, + { + "type": "rss", + "source": { + "source_id": "it-csirt", + "authority_name": "CSIRT Italia", + "country": "Italy", + "country_code": "IT", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.csirt.gov.it" + }, + "feed_url": "https://www.csirt.gov.it/feed/rss", + "category": "cyber_advisory", + "region_tag": "IT", + "lat": 41.9, + "lng": 12.5, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "it-carabinieri", + "authority_name": "Italian Carabinieri", + "country": "Italy", + "country_code": "IT", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.carabinieri.it" + }, + "feed_url": "https://www.carabinieri.it/rss", + "category": "public_appeal", + "region_tag": "IT", + "lat": 41.9, + "lng": 12.5 + }, + { + "type": "rss", + "source": { + "source_id": "gr-police", + "authority_name": "Hellenic Police", + "country": "Greece", + "country_code": "GR", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.astynomia.gr" + }, + "feed_url": "https://www.astynomia.gr/rss", + "category": "public_appeal", + "region_tag": "GR", + "lat": 37.98, + "lng": 23.73 + }, + { + "type": "rss", + "source": { + "source_id": "pt-cncs", + "authority_name": "CNCS (CERT Portugal)", + "country": "Portugal", + "country_code": "PT", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.cncs.gov.pt" + }, + "feed_url": "https://www.cncs.gov.pt/pt/feed/", + "category": "cyber_advisory", + "region_tag": "PT", + "lat": 38.72, + "lng": -9.14, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "ch-ncsc", + "authority_name": "Swiss NCSC", + "country": "Switzerland", + "country_code": "CH", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.ncsc.admin.ch" + }, + "feed_url": "https://www.ncsc.admin.ch/ncsc/en/home.rss", + "category": "cyber_advisory", + "region_tag": "CH", + "lat": 46.95, + "lng": 7.45, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "ch-fedpol", + "authority_name": "Swiss Federal Police (fedpol)", + "country": "Switzerland", + "country_code": "CH", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.fedpol.admin.ch" + }, + "feed_url": "https://www.fedpol.admin.ch/fedpol/en/home.rss", + "category": "public_appeal", + "region_tag": "CH", + "lat": 46.95, + "lng": 7.45 + }, + { + "type": "rss", + "source": { + "source_id": "at-bmi", + "authority_name": "Austrian Interior Ministry", + "country": "Austria", + "country_code": "AT", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.bmi.gv.at" + }, + "feed_url": "https://www.bmi.gv.at/rss", + "category": "public_appeal", + "region_tag": "AT", + "lat": 48.21, + "lng": 16.37 + }, + { + "type": "rss", + "source": { + "source_id": "ie-ncsc", + "authority_name": "NCSC Ireland", + "country": "Ireland", + "country_code": "IE", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.ncsc.gov.ie" + }, + "feed_url": "https://www.ncsc.gov.ie/news/rss.php", + "category": "cyber_advisory", + "region_tag": "IE", + "lat": 53.35, + "lng": -6.26, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "no-ud-travel", + "authority_name": "Norway MFA Travel Advice", + "country": "Norway", + "country_code": "NO", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.regjeringen.no" + }, + "feed_url": "https://www.regjeringen.no/en/topics/foreign-affairs/rss/", + "category": "travel_warning", + "region_tag": "NO", + "lat": 59.91, + "lng": 10.75 + }, + { + "type": "rss", + "source": { + "source_id": "dk-um-travel", + "authority_name": "Denmark MFA Travel Advice", + "country": "Denmark", + "country_code": "DK", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://um.dk" + }, + "feed_url": "https://um.dk/en/rss", + "category": "travel_warning", + "region_tag": "DK", + "lat": 55.68, + "lng": 12.57 + }, + { + "type": "rss", + "source": { + "source_id": "fi-um-travel", + "authority_name": "Finland MFA Travel Advice", + "country": "Finland", + "country_code": "FI", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://um.fi" + }, + "feed_url": "https://um.fi/rss/en/travel-advisories", + "category": "travel_warning", + "region_tag": "FI", + "lat": 60.17, + "lng": 24.94 + }, + { + "type": "rss", + "source": { + "source_id": "se-ud-travel", + "authority_name": "Sweden MFA Travel Advice", + "country": "Sweden", + "country_code": "SE", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.government.se" + }, + "feed_url": "https://www.government.se/rss/travel-advisories/", + "category": "travel_warning", + "region_tag": "SE", + "lat": 59.33, + "lng": 18.07 + }, + { + "type": "rss", + "source": { + "source_id": "nl-bz-travel", + "authority_name": "Netherlands MFA Travel Advice", + "country": "Netherlands", + "country_code": "NL", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.netherlandsworldwide.nl" + }, + "feed_url": "https://www.netherlandsworldwide.nl/rss/travel-advisories", + "category": "travel_warning", + "region_tag": "NL", + "lat": 52.08, + "lng": 4.31 + }, + { + "type": "rss", + "source": { + "source_id": "ie-dfa-travel", + "authority_name": "Ireland DFA Travel Advice", + "country": "Ireland", + "country_code": "IE", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.ireland.ie" + }, + "feed_url": "https://www.ireland.ie/en/dfa/overseas-travel/rss/", + "category": "travel_warning", + "region_tag": "IE", + "lat": 53.35, + "lng": -6.26 + }, + { + "type": "rss", + "source": { + "source_id": "at-bmeia-travel", + "authority_name": "Austria MFA Travel Advice", + "country": "Austria", + "country_code": "AT", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.bmeia.gv.at" + }, + "feed_url": "https://www.bmeia.gv.at/reise-services/reiseinformation/rss/", + "category": "travel_warning", + "region_tag": "AT", + "lat": 48.21, + "lng": 16.37 + }, + { + "type": "rss", + "source": { + "source_id": "ch-eda-travel", + "authority_name": "Switzerland FDFA Travel Advice", + "country": "Switzerland", + "country_code": "CH", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.eda.admin.ch" + }, + "feed_url": "https://www.eda.admin.ch/eda/en/fdfa/representations-and-travel-advice.rss", + "category": "travel_warning", + "region_tag": "CH", + "lat": 46.95, + "lng": 7.45 + }, + { + "type": "rss", + "source": { + "source_id": "it-farnesina-travel", + "authority_name": "Italy Farnesina Travel Advice", + "country": "Italy", + "country_code": "IT", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.viaggiaresicuri.it" + }, + "feed_url": "https://www.viaggiaresicuri.it/rss", + "category": "travel_warning", + "region_tag": "IT", + "lat": 41.9, + "lng": 12.5 + }, + { + "type": "rss", + "source": { + "source_id": "jp-mofa-travel", + "authority_name": "Japan MOFA Travel Safety", + "country": "Japan", + "country_code": "JP", + "region": "Asia-Pacific", + "authority_type": "national_security", + "base_url": "https://www.anzen.mofa.go.jp" + }, + "feed_url": "https://www.anzen.mofa.go.jp/rss/rss.xml", + "category": "travel_warning", + "region_tag": "JP", + "lat": 35.68, + "lng": 139.77 + }, + { + "type": "rss", + "source": { + "source_id": "ge-ssg", + "authority_name": "Georgian State Security Service", + "country": "Georgia", + "country_code": "GE", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://ssg.gov.ge" + }, + "feed_url": "https://ssg.gov.ge/en/feed", + "category": "intelligence_report", + "region_tag": "GE", + "lat": 41.69, + "lng": 44.8 + }, + { + "type": "rss", + "source": { + "source_id": "md-sis", + "authority_name": "Moldova SIS (Intelligence & Security)", + "country": "Moldova", + "country_code": "MD", + "region": "Europe", + "authority_type": "intelligence", + "base_url": "https://www.sis.md" + }, + "feed_url": "https://www.sis.md/en/rss", + "category": "intelligence_report", + "region_tag": "MD", + "lat": 47.01, + "lng": 28.86 + }, + { + "type": "rss", + "source": { + "source_id": "mt-police", + "authority_name": "Malta Police Force", + "country": "Malta", + "country_code": "MT", + "region": "Europe", + "authority_type": "police", + "base_url": "https://pulizija.gov.mt" + }, + "feed_url": "https://pulizija.gov.mt/en/feed/", + "category": "public_appeal", + "region_tag": "MT", + "lat": 35.9, + "lng": 14.51 + }, + { + "type": "rss", + "source": { + "source_id": "mt-mfsa", + "authority_name": "Malta Financial Services Authority", + "country": "Malta", + "country_code": "MT", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.mfsa.mt" + }, + "feed_url": "https://www.mfsa.mt/feed/", + "category": "fraud_alert", + "region_tag": "MT", + "lat": 35.9, + "lng": 14.51 + }, + { + "type": "rss", + "source": { + "source_id": "cy-police", + "authority_name": "Cyprus Police", + "country": "Cyprus", + "country_code": "CY", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.police.gov.cy" + }, + "feed_url": "https://www.police.gov.cy/police/police.nsf/rss.xml", + "category": "public_appeal", + "region_tag": "CY", + "lat": 35.17, + "lng": 33.36 + }, + { + "type": "rss", + "source": { + "source_id": "cy-csirt", + "authority_name": "CSIRT Cyprus", + "country": "Cyprus", + "country_code": "CY", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://csirt.cy" + }, + "feed_url": "https://csirt.cy/en/feed/", + "category": "cyber_advisory", + "region_tag": "CY", + "lat": 35.17, + "lng": 33.36, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "si-police", + "authority_name": "Slovenian Police", + "country": "Slovenia", + "country_code": "SI", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.policija.si" + }, + "feed_url": "https://www.policija.si/eng/rss", + "category": "public_appeal", + "region_tag": "SI", + "lat": 46.06, + "lng": 14.51 + }, + { + "type": "rss", + "source": { + "source_id": "il-shin-bet", + "authority_name": "Israel Security Agency (Shin Bet)", + "country": "Israel", + "country_code": "IL", + "region": "Middle East", + "authority_type": "intelligence", + "base_url": "https://www.shabak.gov.il" + }, + "feed_url": "https://www.shabak.gov.il/en/rss", + "category": "intelligence_report", + "region_tag": "IL", + "lat": 31.77, + "lng": 35.22 + }, + { + "type": "rss", + "source": { + "source_id": "il-cert", + "authority_name": "Israel CERT (INCD)", + "country": "Israel", + "country_code": "IL", + "region": "Middle East", + "authority_type": "cert", + "base_url": "https://www.gov.il/en/departments/israel_national_cyber_directorate" + }, + "feed_url": "https://www.gov.il/he/api/DataGovProxy/GetDGResults?CatalogId=cybernews&Limit=20&Offset=0", + "category": "cyber_advisory", + "region_tag": "IL", + "lat": 32.07, + "lng": 34.78, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "ae-cert", + "authority_name": "UAE aeCERT", + "country": "United Arab Emirates", + "country_code": "AE", + "region": "Middle East", + "authority_type": "cert", + "base_url": "https://www.tra.gov.ae" + }, + "feed_url": "https://www.tra.gov.ae/aecert/en/rss.xml", + "category": "cyber_advisory", + "region_tag": "AE", + "lat": 24.45, + "lng": 54.65, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "sa-cert", + "authority_name": "Saudi CERT (NCA)", + "country": "Saudi Arabia", + "country_code": "SA", + "region": "Middle East", + "authority_type": "cert", + "base_url": "https://cert.gov.sa" + }, + "feed_url": "https://cert.gov.sa/en/rss/", + "category": "cyber_advisory", + "region_tag": "SA", + "lat": 24.71, + "lng": 46.68, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "jo-ncsc", + "authority_name": "Jordan NCSC", + "country": "Jordan", + "country_code": "JO", + "region": "Middle East", + "authority_type": "cert", + "base_url": "https://www.ncsc.jo" + }, + "feed_url": "https://www.ncsc.jo/en/feed", + "category": "cyber_advisory", + "region_tag": "JO", + "lat": 31.95, + "lng": 35.93, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "qa-ncsa", + "authority_name": "Qatar NCSA", + "country": "Qatar", + "country_code": "QA", + "region": "Middle East", + "authority_type": "cert", + "base_url": "https://www.ncsa.gov.qa" + }, + "feed_url": "https://www.ncsa.gov.qa/en/rss", + "category": "cyber_advisory", + "region_tag": "QA", + "lat": 25.29, + "lng": 51.53, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "au-african-union", + "authority_name": "African Union Peace & Security", + "country": "Ethiopia", + "country_code": "ET", + "region": "Africa", + "authority_type": "national_security", + "base_url": "https://www.peaceau.org" + }, + "feed_url": "https://www.peaceau.org/rss/en/feed.xml", + "category": "conflict_monitoring", + "region_tag": "INT", + "lat": 9.02, + "lng": 38.75 + }, + { + "type": "rss", + "source": { + "source_id": "eg-cert", + "authority_name": "Egypt CERT (EG-CERT)", + "country": "Egypt", + "country_code": "EG", + "region": "Africa", + "authority_type": "cert", + "base_url": "https://www.egcert.eg" + }, + "feed_url": "https://www.egcert.eg/feed/", + "category": "cyber_advisory", + "region_tag": "EG", + "lat": 30.04, + "lng": 31.24, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "ma-dgssi", + "authority_name": "Morocco DGSSI (maCERT)", + "country": "Morocco", + "country_code": "MA", + "region": "Africa", + "authority_type": "cert", + "base_url": "https://www.dgssi.gov.ma" + }, + "feed_url": "https://www.dgssi.gov.ma/fr/feed", + "category": "cyber_advisory", + "region_tag": "MA", + "lat": 33.97, + "lng": -6.85, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "rw-police", + "authority_name": "Rwanda National Police", + "country": "Rwanda", + "country_code": "RW", + "region": "Africa", + "authority_type": "police", + "base_url": "https://www.police.gov.rw" + }, + "feed_url": "https://www.police.gov.rw/rss/", + "category": "public_appeal", + "region_tag": "RW", + "lat": -1.94, + "lng": 29.87 + }, + { + "type": "rss", + "source": { + "source_id": "et-insa", + "authority_name": "Ethiopia INSA (Cyber Security)", + "country": "Ethiopia", + "country_code": "ET", + "region": "Africa", + "authority_type": "cert", + "base_url": "https://www.insa.gov.et" + }, + "feed_url": "https://www.insa.gov.et/feed/", + "category": "cyber_advisory", + "region_tag": "ET", + "lat": 9.02, + "lng": 38.75, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "sn-police", + "authority_name": "Senegal Police Nationale", + "country": "Senegal", + "country_code": "SN", + "region": "Africa", + "authority_type": "police", + "base_url": "https://www.police.gouv.sn" + }, + "feed_url": "https://www.police.gouv.sn/feed/", + "category": "public_appeal", + "region_tag": "SN", + "lat": 14.69, + "lng": -17.44 + }, + { + "type": "rss", + "source": { + "source_id": "dz-cert", + "authority_name": "Algeria CERT (DZ-CERT)", + "country": "Algeria", + "country_code": "DZ", + "region": "Africa", + "authority_type": "cert", + "base_url": "https://www.cert.dz" + }, + "feed_url": "https://www.cert.dz/feed", + "category": "cyber_advisory", + "region_tag": "DZ", + "lat": 36.74, + "lng": 3.06, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "tw-cert", + "authority_name": "Taiwan CERT (TWCERT/CC)", + "country": "Taiwan", + "country_code": "TW", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.twcert.org.tw" + }, + "feed_url": "https://www.twcert.org.tw/en/rss/", + "category": "cyber_advisory", + "region_tag": "TW", + "lat": 25.03, + "lng": 121.57, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "cn-cert", + "authority_name": "CNCERT/CC (China CERT)", + "country": "China", + "country_code": "CN", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.cert.org.cn" + }, + "feed_url": "https://www.cert.org.cn/publish/english/rss.xml", + "category": "cyber_advisory", + "region_tag": "CN", + "lat": 39.91, + "lng": 116.4, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "vn-cert", + "authority_name": "VNCERT/CC (Vietnam CERT)", + "country": "Vietnam", + "country_code": "VN", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://vncert.vn" + }, + "feed_url": "https://vncert.vn/feed/", + "category": "cyber_advisory", + "region_tag": "VN", + "lat": 21.03, + "lng": 105.85, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "pk-cert", + "authority_name": "Pakistan CERT (PkCERT)", + "country": "Pakistan", + "country_code": "PK", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.pkcert.org.pk" + }, + "feed_url": "https://www.pkcert.org.pk/feed/", + "category": "cyber_advisory", + "region_tag": "PK", + "lat": 33.69, + "lng": 73.04, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "bd-cert", + "authority_name": "BGD e-GOV CIRT (Bangladesh)", + "country": "Bangladesh", + "country_code": "BD", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.cirt.gov.bd" + }, + "feed_url": "https://www.cirt.gov.bd/feed/", + "category": "cyber_advisory", + "region_tag": "BD", + "lat": 23.81, + "lng": 90.41, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "lk-cert", + "authority_name": "Sri Lanka CERT", + "country": "Sri Lanka", + "country_code": "LK", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.cert.gov.lk" + }, + "feed_url": "https://www.cert.gov.lk/feed/", + "category": "cyber_advisory", + "region_tag": "LK", + "lat": 6.93, + "lng": 79.85, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "mn-cert", + "authority_name": "Mongolia CERT (MonCIRT)", + "country": "Mongolia", + "country_code": "MN", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.moncirt.org.mn" + }, + "feed_url": "https://www.moncirt.org.mn/feed/", + "category": "cyber_advisory", + "region_tag": "MN", + "lat": 47.92, + "lng": 106.91, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "aseanapol", + "authority_name": "ASEANAPOL (ASEAN Police)", + "country": "Singapore", + "country_code": "SG", + "region": "Asia", + "authority_type": "police", + "base_url": "https://www.aseanapol.org" + }, + "feed_url": "https://www.aseanapol.org/feed/", + "category": "public_appeal", + "region_tag": "INT", + "lat": 1.35, + "lng": 103.82 + }, + { + "type": "rss", + "source": { + "source_id": "apcert", + "authority_name": "APCERT (Asia-Pacific CERT)", + "country": "Japan", + "country_code": "JP", + "region": "Asia-Pacific", + "authority_type": "cert", + "base_url": "https://www.apcert.org" + }, + "feed_url": "https://www.apcert.org/feed/", + "category": "cyber_advisory", + "region_tag": "INT", + "lat": 35.68, + "lng": 139.77, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "au-afp", + "authority_name": "Australian Federal Police", + "country": "Australia", + "country_code": "AU", + "region": "Oceania", + "authority_type": "police", + "base_url": "https://www.afp.gov.au" + }, + "feed_url": "https://www.afp.gov.au/rss.xml", + "category": "public_appeal", + "region_tag": "AU", + "lat": -35.28, + "lng": 149.13 + }, + { + "type": "rss", + "source": { + "source_id": "au-acsc", + "authority_name": "Australian Cyber Security Centre", + "country": "Australia", + "country_code": "AU", + "region": "Oceania", + "authority_type": "cert", + "base_url": "https://www.cyber.gov.au" + }, + "feed_url": "https://www.cyber.gov.au/rss.xml", + "category": "cyber_advisory", + "region_tag": "AU", + "lat": -35.28, + "lng": 149.13, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "ar-cert", + "authority_name": "Argentina CERT (ICIC-CERT)", + "country": "Argentina", + "country_code": "AR", + "region": "South America", + "authority_type": "cert", + "base_url": "https://www.argentina.gob.ar/jefatura/innovacion-publica/ssetic/direccion-nacional-ciberseguridad/cert" + }, + "feed_url": "https://www.argentina.gob.ar/jefatura/innovacion-publica/ssetic/direccion-nacional-ciberseguridad/cert/rss", + "category": "cyber_advisory", + "region_tag": "AR", + "lat": -34.6, + "lng": -58.38, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "br-cert", + "authority_name": "CERT.br (Brazil CERT)", + "country": "Brazil", + "country_code": "BR", + "region": "South America", + "authority_type": "cert", + "base_url": "https://www.cert.br" + }, + "feed_url": "https://www.cert.br/rss/certbr-rss.xml", + "category": "cyber_advisory", + "region_tag": "BR", + "lat": -23.55, + "lng": -46.63, + "max_items": 15 + }, + { + "type": "rss", + "source": { + "source_id": "ofac-sdn", + "authority_name": "US Treasury OFAC Sanctions", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "national_security", + "base_url": "https://www.treasury.gov" + }, + "feed_url": "https://ofac.treasury.gov/system/files/126/sdn.xml", + "category": "fraud_alert", + "region_tag": "US", + "lat": 38.89, + "lng": -77.03 + }, + { + "type": "rss", + "source": { + "source_id": "ofac-updates", + "authority_name": "OFAC Recent Actions", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "national_security", + "base_url": "https://ofac.treasury.gov" + }, + "feed_url": "https://ofac.treasury.gov/recent-actions/rss.xml", + "category": "fraud_alert", + "region_tag": "US", + "lat": 38.89, + "lng": -77.03 + }, + { + "type": "rss", + "source": { + "source_id": "eu-sanctions", + "authority_name": "EU Sanctions Map", + "country": "Belgium", + "country_code": "BE", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.sanctionsmap.eu" + }, + "feed_url": "https://webgate.ec.europa.eu/fsd/fsf/public/rss/feed", + "category": "fraud_alert", + "region_tag": "EU", + "lat": 50.85, + "lng": 4.35 + }, + { + "type": "rss", + "source": { + "source_id": "un-sanctions", + "authority_name": "UN Security Council Sanctions", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "national_security", + "base_url": "https://www.un.org/securitycouncil/sanctions" + }, + "feed_url": "https://scsanctions.un.org/resources/xml/en/consolidated.xml", + "category": "fraud_alert", + "region_tag": "INT", + "lat": 40.75, + "lng": -73.97 + }, + { + "type": "rss", + "source": { + "source_id": "fatf-updates", + "authority_name": "FATF (Financial Action Task Force)", + "country": "France", + "country_code": "FR", + "region": "International", + "authority_type": "regulatory", + "base_url": "https://www.fatf-gafi.org" + }, + "feed_url": "https://www.fatf-gafi.org/rss/publications.xml", + "category": "fraud_alert", + "region_tag": "INT", + "lat": 48.86, + "lng": 2.33 + }, + { + "type": "rss", + "source": { + "source_id": "opensanctions", + "authority_name": "OpenSanctions", + "country": "Global", + "country_code": "INT", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.opensanctions.org" + }, + "feed_url": "https://www.opensanctions.org/rss/index.xml", + "category": "fraud_alert", + "region_tag": "INT", + "lat": 52.52, + "lng": 13.41 + }, + { + "type": "rss", + "source": { + "source_id": "fca-warnings", + "authority_name": "FCA Consumer Warnings", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.fca.org.uk" + }, + "feed_url": "https://www.fca.org.uk/news/rss.xml", + "category": "fraud_alert", + "region_tag": "GB", + "lat": 51.51, + "lng": -0.09 + }, + { + "type": "rss", + "source": { + "source_id": "bafin-warnings", + "authority_name": "BaFin Unauthorized Firms", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.bafin.de" + }, + "feed_url": "https://www.bafin.de/SiteGlobals/Functions/RSSFeed/EN/RSSVerbraucherschutz/RSSVerbraucherschutz.xml", + "category": "fraud_alert", + "region_tag": "DE", + "lat": 50.11, + "lng": 8.68 + }, + { + "type": "rss", + "source": { + "source_id": "esma-warnings", + "authority_name": "ESMA Investor Warnings", + "country": "France", + "country_code": "FR", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.esma.europa.eu" + }, + "feed_url": "https://www.esma.europa.eu/rss.xml", + "category": "fraud_alert", + "region_tag": "EU", + "lat": 48.86, + "lng": 2.33 + }, + { + "type": "rss", + "source": { + "source_id": "sec-enforcement", + "authority_name": "SEC Enforcement Actions", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "regulatory", + "base_url": "https://www.sec.gov" + }, + "feed_url": "https://www.sec.gov/rss/litigation/litreleases.xml", + "category": "fraud_alert", + "region_tag": "US", + "lat": 38.9, + "lng": -77.04 + }, + { + "type": "rss", + "source": { + "source_id": "sec-alerts", + "authority_name": "SEC Investor Alerts", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "regulatory", + "base_url": "https://www.sec.gov" + }, + "feed_url": "https://www.sec.gov/rss/investor_alerts.xml", + "category": "fraud_alert", + "region_tag": "US", + "lat": 38.9, + "lng": -77.04 + }, + { + "type": "rss", + "source": { + "source_id": "fincen-advisories", + "authority_name": "FinCEN Advisories", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "regulatory", + "base_url": "https://www.fincen.gov" + }, + "feed_url": "https://www.fincen.gov/rss/advisories.xml", + "category": "fraud_alert", + "region_tag": "US", + "lat": 38.88, + "lng": -77.17 + }, + { + "type": "rss", + "source": { + "source_id": "finma-warnings", + "authority_name": "FINMA Warnings (Switzerland)", + "country": "Switzerland", + "country_code": "CH", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.finma.ch" + }, + "feed_url": "https://www.finma.ch/en/rss/finma-news/", + "category": "fraud_alert", + "region_tag": "CH", + "lat": 46.95, + "lng": 7.45 + }, + { + "type": "rss", + "source": { + "source_id": "amf-warnings", + "authority_name": "AMF Blacklists (France)", + "country": "France", + "country_code": "FR", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.amf-france.org" + }, + "feed_url": "https://www.amf-france.org/en/rss", + "category": "fraud_alert", + "region_tag": "FR", + "lat": 48.86, + "lng": 2.33 + }, + { + "type": "rss", + "source": { + "source_id": "consob-warnings", + "authority_name": "CONSOB Investor Warnings (Italy)", + "country": "Italy", + "country_code": "IT", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.consob.it" + }, + "feed_url": "https://www.consob.it/web/consob-en/rss-feed", + "category": "fraud_alert", + "region_tag": "IT", + "lat": 41.9, + "lng": 12.5 + }, + { + "type": "rss", + "source": { + "source_id": "cnmv-warnings", + "authority_name": "CNMV Investor Warnings (Spain)", + "country": "Spain", + "country_code": "ES", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.cnmv.es" + }, + "feed_url": "https://www.cnmv.es/Portal/rss.aspx?lang=en", + "category": "fraud_alert", + "region_tag": "ES", + "lat": 40.42, + "lng": -3.7 + }, + { + "type": "rss", + "source": { + "source_id": "afm-nl-warnings", + "authority_name": "AFM Warnings (Netherlands)", + "country": "Netherlands", + "country_code": "NL", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.afm.nl" + }, + "feed_url": "https://www.afm.nl/en/rss/all", + "category": "fraud_alert", + "region_tag": "NL", + "lat": 52.37, + "lng": 4.89 + }, + { + "type": "rss", + "source": { + "source_id": "dnb-nl-warnings", + "authority_name": "DNB Warnings (Dutch Central Bank)", + "country": "Netherlands", + "country_code": "NL", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.dnb.nl" + }, + "feed_url": "https://www.dnb.nl/en/rss/", + "category": "fraud_alert", + "region_tag": "NL", + "lat": 52.37, + "lng": 4.89 + }, + { + "type": "rss", + "promotion_status": "rejected", + "rejection_reason": "General press releases, not fraud intelligence", + "source": { + "source_id": "ecb-press", + "authority_name": "ECB Press Releases", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.ecb.europa.eu" + }, + "feed_url": "https://www.ecb.europa.eu/rss/press.html", + "category": "fraud_alert", + "region_tag": "EU", + "lat": 50.11, + "lng": 8.68 + }, + { + "type": "rss", + "promotion_status": "rejected", + "rejection_reason": "Educational network for schools, not an intelligence source", + "source": { + "source_id": "european-schoolnet", + "authority_name": "European Schoolnet", + "country": "Belgium", + "country_code": "BE", + "region": "Europe", + "authority_type": "educational", + "base_url": "https://www.eun.org" + }, + "feed_url": "https://www.eun.org/rss", + "category": "informational", + "region_tag": "EU", + "lat": 50.85, + "lng": 4.35 + }, + { + "type": "rss", + "promotion_status": "rejected", + "rejection_reason": "World Bank education content, not an intelligence source", + "source": { + "source_id": "worldbank-education-digital", + "authority_name": "World Bank Education Digital", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "regulatory", + "base_url": "https://www.worldbank.org" + }, + "feed_url": "https://www.worldbank.org/en/topic/edutech/rss.xml", + "category": "education_digital_capacity", + "region_tag": "INT", + "lat": 38.90, + "lng": -77.04 + }, + { + "type": "rss", + "source": { + "source_id": "occrp-investigations", + "authority_name": "OCCRP Investigations", + "country": "Bosnia and Herzegovina", + "country_code": "BA", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.occrp.org" + }, + "feed_url": "https://www.occrp.org/en/rss", + "category": "fraud_alert", + "region_tag": "INT", + "lat": 43.86, + "lng": 18.41 + }, + { + "type": "rss", + "source": { + "source_id": "transparency-intl", + "authority_name": "Transparency International", + "country": "Germany", + "country_code": "DE", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.transparency.org" + }, + "feed_url": "https://www.transparency.org/en/rss", + "category": "fraud_alert", + "region_tag": "INT", + "lat": 52.52, + "lng": 13.38 + }, + { + "type": "rss", + "source": { + "source_id": "unodc-news", + "authority_name": "UNODC (UN Office on Drugs & Crime)", + "country": "Austria", + "country_code": "AT", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.unodc.org" + }, + "feed_url": "https://www.unodc.org/rss/press.xml", + "category": "public_safety", + "region_tag": "INT", + "lat": 48.24, + "lng": 16.41 + }, + { + "type": "rss", + "source": { + "source_id": "eu-enfast", + "authority_name": "EU Most Wanted (ENFAST)", + "country": "Netherlands", + "country_code": "NL", + "region": "Europe", + "authority_type": "police", + "base_url": "https://eumostwanted.eu" + }, + "feed_url": "https://eumostwanted.eu/rss.xml", + "category": "wanted_suspect", + "region_tag": "EU", + "lat": 52.09, + "lng": 4.27 + }, + { + "type": "rss", + "source": { + "source_id": "dia-it", + "authority_name": "DIA Anti-Mafia (Italy)", + "country": "Italy", + "country_code": "IT", + "region": "Europe", + "authority_type": "police", + "base_url": "https://direzioneinvestigativaantimafia.interno.gov.it" + }, + "feed_url": "https://direzioneinvestigativaantimafia.interno.gov.it/feed/", + "category": "public_appeal", + "region_tag": "IT", + "lat": 41.9, + "lng": 12.5 + }, + { + "type": "rss", + "source": { + "source_id": "gdf-it", + "authority_name": "Guardia di Finanza (Italy)", + "country": "Italy", + "country_code": "IT", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.gdf.gov.it" + }, + "feed_url": "https://www.gdf.gov.it/it/rss", + "category": "fraud_alert", + "region_tag": "IT", + "lat": 41.9, + "lng": 12.5 + }, + { + "type": "rss", + "source": { + "source_id": "sfo-uk", + "authority_name": "Serious Fraud Office (UK)", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.sfo.gov.uk" + }, + "feed_url": "https://www.sfo.gov.uk/feed/", + "category": "fraud_alert", + "region_tag": "GB", + "lat": 51.51, + "lng": -0.1 + }, + { + "type": "rss", + "source": { + "source_id": "olaf-eu", + "authority_name": "OLAF (EU Anti-Fraud Office)", + "country": "Belgium", + "country_code": "BE", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://anti-fraud.ec.europa.eu" + }, + "feed_url": "https://anti-fraud.ec.europa.eu/rss_en", + "category": "fraud_alert", + "region_tag": "EU", + "lat": 50.84, + "lng": 4.37 + }, + { + "type": "rss", + "source": { + "source_id": "eppo-eu", + "authority_name": "EPPO (EU Public Prosecutor)", + "country": "Luxembourg", + "country_code": "LU", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.eppo.europa.eu" + }, + "feed_url": "https://www.eppo.europa.eu/en/rss.xml", + "category": "fraud_alert", + "region_tag": "EU", + "lat": 49.61, + "lng": 6.13 + }, + { + "type": "rss", + "source": { + "source_id": "dea-press", + "authority_name": "DEA Press Releases", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.dea.gov" + }, + "feed_url": "https://www.dea.gov/rss/press-releases.xml", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.87, + "lng": -77.14 + }, + { + "type": "rss", + "source": { + "source_id": "atf-press", + "authority_name": "ATF Press Releases", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.atf.gov" + }, + "feed_url": "https://www.atf.gov/rss/news", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.89, + "lng": -77.02 + }, + { + "type": "rss", + "source": { + "source_id": "ic3-fbi", + "authority_name": "FBI IC3 (Internet Crime Complaint Center)", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.ic3.gov" + }, + "feed_url": "https://www.ic3.gov/Home/RSS", + "category": "fraud_alert", + "region_tag": "US", + "lat": 39.29, + "lng": -76.61 + }, + { + "type": "rss", + "source": { + "source_id": "gdacs-disasters", + "authority_name": "GDACS Global Disasters", + "country": "International", + "country_code": "INT", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.gdacs.org" + }, + "feed_url": "https://www.gdacs.org/xml/rss.xml", + "category": "environmental_disaster", + "region_tag": "INT", + "lat": 46.23, + "lng": 6.05, + "max_items": 50 + }, + { + "type": "rss", + "source": { + "source_id": "usgs-earthquakes", + "authority_name": "USGS Significant Earthquakes", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://earthquake.usgs.gov" + }, + "feed_url": "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_week.atom", + "category": "environmental_disaster", + "region_tag": "INT", + "lat": 38.95, + "lng": -77.37, + "max_items": 40 + }, + { + "type": "rss", + "source": { + "source_id": "noaa-incidents", + "authority_name": "NOAA Incident News", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "public_safety_program", + "base_url": "https://incidentnews.noaa.gov" + }, + "feed_url": "https://incidentnews.noaa.gov/incidents.rss", + "category": "environmental_disaster", + "region_tag": "US", + "lat": 38.85, + "lng": -76.93 + }, + { + "type": "rss", + "source": { + "source_id": "smithsonian-volcanoes", + "authority_name": "Smithsonian Global Volcanism Program", + "country": "United States", + "country_code": "US", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://volcano.si.edu" + }, + "feed_url": "https://volcano.si.edu/news/WeeklyVolcanoRSS.xml", + "category": "environmental_disaster", + "region_tag": "INT", + "lat": 38.89, + "lng": -77.03 + }, + { + "type": "rss", + "source": { + "source_id": "emsa-maritime", + "authority_name": "EMSA Maritime Safety", + "country": "Portugal", + "country_code": "PT", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.emsa.europa.eu" + }, + "feed_url": "https://www.emsa.europa.eu/newsroom/latest-news.feed?type=rss&format=feed", + "category": "environmental_disaster", + "region_tag": "EU", + "lat": 38.71, + "lng": -9.14 + }, + { + "type": "rss", + "source": { + "source_id": "iaea-news", + "authority_name": "IAEA Nuclear News", + "country": "Austria", + "country_code": "AT", + "region": "International", + "authority_type": "regulatory", + "base_url": "https://www.iaea.org" + }, + "feed_url": "https://www.iaea.org/feeds/news", + "category": "environmental_disaster", + "region_tag": "INT", + "lat": 48.23, + "lng": 16.41 + }, + { + "type": "rss", + "source": { + "source_id": "who-news", + "authority_name": "WHO Disease & Health News", + "country": "Switzerland", + "country_code": "CH", + "region": "International", + "authority_type": "public_safety_program", + "base_url": "https://www.who.int" + }, + "feed_url": "https://www.who.int/rss-feeds/news-english.xml", + "category": "disease_outbreak", + "region_tag": "INT", + "lat": 46.23, + "lng": 6.14 + }, + { + "type": "rss", + "source": { + "source_id": "ecdc-epi-updates", + "authority_name": "ECDC Epidemiological Updates", + "country": "Sweden", + "country_code": "SE", + "region": "Europe", + "authority_type": "public_safety_program", + "base_url": "https://www.ecdc.europa.eu" + }, + "feed_url": "https://www.ecdc.europa.eu/en/taxonomy/term/1310/feed", + "category": "disease_outbreak", + "region_tag": "EU", + "lat": 59.35, + "lng": 18.10 + }, + { + "type": "rss", + "source": { + "source_id": "ecdc-risk-assessments", + "authority_name": "ECDC Risk Assessments", + "country": "Sweden", + "country_code": "SE", + "region": "Europe", + "authority_type": "public_safety_program", + "base_url": "https://www.ecdc.europa.eu" + }, + "feed_url": "https://www.ecdc.europa.eu/en/taxonomy/term/1295/feed", + "category": "disease_outbreak", + "region_tag": "EU", + "lat": 59.35, + "lng": 18.10 + }, + { + "type": "rss", + "source": { + "source_id": "cdc-newsroom", + "authority_name": "CDC Health Alerts", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "public_safety_program", + "base_url": "https://www.cdc.gov" + }, + "feed_url": "https://tools.cdc.gov/api/v2/resources/media/132608.rss", + "category": "disease_outbreak", + "region_tag": "US", + "lat": 33.80, + "lng": -84.39 + }, + { + "type": "rss", + "source": { + "source_id": "woah-animal-health", + "authority_name": "WOAH Animal Health", + "country": "France", + "country_code": "FR", + "region": "International", + "authority_type": "regulatory", + "base_url": "https://www.woah.org" + }, + "feed_url": "https://www.woah.org/en/feed/", + "category": "disease_outbreak", + "region_tag": "INT", + "lat": 48.86, + "lng": 2.32, + "include_keywords": ["outbreak", "avian", "influenza", "disease", "epidemic", "zoonotic", "virus", "pathogen", "infection", "surveillance", "alert", "emergency"] + }, + + {"_comment": "═══════════════ MARITIME SECURITY ═══════════════"}, + + { + "type": "rss", + "source": { + "source_id": "us-navy-news", + "authority_name": "US Navy News", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "national_security", + "base_url": "https://www.navy.mil" + }, + "feed_url": "https://www.navy.mil/DesktopModules/ArticleCS/RSS.ashx?ContentType=1&Site=1&max=10", + "category": "maritime_security", + "region_tag": "US", + "lat": 38.87, + "lng": -77.01, + "include_keywords": ["piracy", "maritime", "shipping", "vessel", "naval", "fleet", "strait", "gulf", "houthi", "red sea", "patrol", "intercept", "seizure", "smuggling", "freedom of navigation"] + }, + { + "type": "rss", "source": { - "source_id": "acled-conflict-monitor", - "authority_name": "ACLED Conflict Monitor", - "country": "Global", - "country_code": "INT", + "source_id": "cimsec", + "authority_name": "CIMSEC", + "country": "United States", + "country_code": "US", "region": "International", - "authority_type": "public_safety_program", - "base_url": "https://acleddata.com" + "authority_type": "regulatory", + "base_url": "https://cimsec.org" }, - "feed_url": "https://acleddata.com/", - "feed_urls": [ - "https://acleddata.com/", - "https://acleddata.com/dashboard/" - ], - "category": "conflict_monitoring", + "feed_url": "https://cimsec.org/feed/", + "category": "maritime_security", "region_tag": "INT", - "lat": 20, - "lng": 0, - "max_items": 100, - "include_keywords": [ - "conflict", - "violence", - "protest", - "incident", - "dashboard", - "data" - ], - "exclude_keywords": [ - "careers", - "donate", - "newsletter" - ], - "reporting": { - "label": "ACLED Data Access", - "url": "https://acleddata.com/data-export-tool/" - } + "lat": 38.87, + "lng": -77.01, + "include_keywords": ["piracy", "maritime security", "shipping", "naval", "strait", "chokepoint", "attack", "vessel", "threat", "patrol", "red sea", "gulf", "houthi", "mine", "boarding"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "hot-tasking", - "authority_name": "Humanitarian OpenStreetMap Team", - "country": "Global", - "country_code": "INT", + "source_id": "gcaptain", + "authority_name": "gCaptain Maritime News", + "country": "United States", + "country_code": "US", "region": "International", - "authority_type": "public_safety_program", - "base_url": "https://www.hotosm.org" + "authority_type": "private_sector", + "base_url": "https://gcaptain.com" }, - "feed_url": "https://www.hotosm.org/projects/", - "feed_urls": [ - "https://www.hotosm.org/projects/", - "https://tasks.hotosm.org/explore" - ], - "category": "humanitarian_tasking", + "feed_url": "https://gcaptain.com/feed/", + "category": "maritime_security", "region_tag": "INT", - "lat": 20, - "lng": 0, - "max_items": 100, - "include_keywords": [ - "mapping", - "task", - "response", - "disaster", - "humanitarian", - "project" - ], - "exclude_keywords": [ - "careers", - "donate" - ], - "reporting": { - "label": "Join HOT Mapping Tasks", - "url": "https://tasks.hotosm.org/" - } + "lat": 37.77, + "lng": -122.42, + "include_keywords": ["piracy", "attack", "seized", "hijack", "missile", "houthi", "drone strike", "maritime security", "boarding", "smuggling", "sanctions", "detained", "navy", "warship", "patrol", "red sea", "gulf of aden", "strait of hormuz", "malacca"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "missing-maps-tasking", - "authority_name": "Missing Maps", - "country": "Global", - "country_code": "INT", + "source_id": "hellenicshipping-piracy", + "authority_name": "Hellenic Shipping News — Piracy", + "country": "Greece", + "country_code": "GR", "region": "International", - "authority_type": "public_safety_program", - "base_url": "https://www.missingmaps.org" + "authority_type": "private_sector", + "base_url": "https://www.hellenicshippingnews.com" }, - "feed_url": "https://www.missingmaps.org/", - "feed_urls": [ - "https://www.missingmaps.org/", - "https://www.missingmaps.org/blog/" - ], - "category": "humanitarian_tasking", + "feed_url": "https://www.hellenicshippingnews.com/category/piracy-and-security-news/feed/", + "category": "maritime_security", "region_tag": "INT", - "lat": 20, - "lng": 0, - "max_items": 80, - "include_keywords": [ - "mapathon", - "mapping", - "task", - "response", - "field", - "vulnerable" - ], - "exclude_keywords": [ - "donate", - "shop" - ], - "reporting": { - "label": "Join Missing Maps", - "url": "https://www.missingmaps.org/get-involved/" - } + "lat": 37.94, + "lng": 23.65 }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "icrc-family-links", - "authority_name": "ICRC Family Links", - "country": "Global", - "country_code": "INT", + "source_id": "maritime-executive", + "authority_name": "Maritime Executive", + "country": "United States", + "country_code": "US", "region": "International", - "authority_type": "public_safety_program", - "base_url": "https://familylinks.icrc.org" + "authority_type": "private_sector", + "base_url": "https://maritime-executive.com" }, - "feed_url": "https://familylinks.icrc.org/", - "category": "humanitarian_security", + "feed_url": "https://maritime-executive.com/feed", + "category": "maritime_security", "region_tag": "INT", - "lat": 20, - "lng": 0, - "max_items": 80, - "include_keywords": [ - "missing", - "family", - "search", - "crisis", - "restore", - "trace" - ], - "exclude_keywords": [ - "privacy", - "policy", - "terms" - ], - "reporting": { - "label": "ICRC Family Links", - "url": "https://familylinks.icrc.org/" - } + "lat": 33.77, + "lng": -118.19, + "include_keywords": ["piracy", "attack", "seized", "hijack", "missile", "houthi", "security", "boarding", "smuggling", "sanctions", "detained", "navy", "warship", "patrol", "collision", "grounding", "sinking", "rescue"] }, { "type": "html-list", "followRedirects": true, "source": { - "source_id": "iom-missing-migrants", - "authority_name": "IOM Missing Migrants", - "country": "Global", - "country_code": "INT", - "region": "International", - "authority_type": "public_safety_program", - "base_url": "https://missingmigrants.iom.int" + "source_id": "eunavfor", + "authority_name": "EU NAVFOR Atalanta", + "country": "European Union", + "country_code": "EU", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://eunavfor.eu" }, - "feed_url": "https://missingmigrants.iom.int/", - "feed_urls": [ - "https://missingmigrants.iom.int/", - "https://missingmigrants.iom.int/latest-data" - ], - "category": "humanitarian_security", - "region_tag": "INT", - "lat": 20, - "lng": 0, - "max_items": 100, - "include_keywords": [ - "missing migrants", - "incident", - "data", - "route", - "deaths", - "disappearances" - ], - "exclude_keywords": [ - "publication", - "about us" - ], - "reporting": { - "label": "IOM Missing Migrants Data", - "url": "https://missingmigrants.iom.int/" - } + "feed_url": "https://eunavfor.eu/news", + "category": "maritime_security", + "region_tag": "EU", + "lat": 11.59, + "lng": 43.15, + "include_keywords": ["piracy", "operation", "vessel", "patrol", "escort", "maritime", "incident", "seized", "rescue", "suspicious", "attack", "atalanta", "aspides"] }, { "type": "html-list", "followRedirects": true, "source": { - "source_id": "ict4peace-briefings", - "authority_name": "ICT4Peace", - "country": "Global", - "country_code": "INT", + "source_id": "marad-advisories", + "authority_name": "US MARAD Maritime Advisories", + "country": "United States", + "country_code": "US", "region": "International", - "authority_type": "public_safety_program", - "base_url": "https://ict4peace.org" + "authority_type": "regulatory", + "base_url": "https://www.maritime.dot.gov" }, - "feed_url": "https://ict4peace.org/", - "feed_urls": [ - "https://ict4peace.org/", - "https://ict4peace.org/category/publications/" - ], - "category": "humanitarian_tasking", + "feed_url": "https://www.maritime.dot.gov/msci-advisories", + "category": "maritime_security", "region_tag": "INT", - "lat": 20, - "lng": 0, - "max_items": 80, - "include_keywords": [ - "crisis", - "humanitarian", - "technology", - "coordination", - "response" - ], - "exclude_keywords": [ - "about", - "board", - "contact" - ], - "reporting": { - "label": "ICT4Peace Publications", - "url": "https://ict4peace.org/" - } + "lat": 38.88, + "lng": -77.02, + "include_keywords": ["advisory", "threat", "piracy", "attack", "vessel", "maritime", "security", "warning", "alert", "strait", "gulf", "red sea"] }, + + {"_comment": "═══════════════ LEGISLATIVE / POLITICAL OSINT ═══════════════"}, + { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "cida-africa-updates", - "authority_name": "CIDA Africa", - "country": "South Africa", - "country_code": "ZA", - "region": "Africa", - "authority_type": "public_safety_program", - "base_url": "https://cida-africa.org" + "source_id": "eu-parliament-press", + "authority_name": "European Parliament Press", + "country": "European Union", + "country_code": "EU", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.europarl.europa.eu" }, - "feed_url": "https://cida-africa.org/", - "feed_urls": [ - "https://cida-africa.org/", - "https://cida-africa.org/news/" - ], - "category": "cyber_advisory", - "region_tag": "ZA", - "lat": -26.2041, - "lng": 28.0473, - "max_items": 80, - "include_keywords": [ - "cyber", - "security", - "incident", - "resilience", - "capacity", - "training", - "civil society" - ], - "exclude_keywords": [ - "privacy policy", - "terms", - "donate" - ], - "reporting": { - "label": "Contact CIDA Africa", - "url": "https://cida-africa.org/contact/" - } + "feed_url": "https://www.europarl.europa.eu/rss/doc/press-releases/en", + "category": "legislative", + "region_tag": "EU", + "lat": 48.60, + "lng": 7.77, + "include_keywords": ["sanction", "defence", "defense", "security", "border", "migration", "terrorism", "cyber", "intelligence", "military", "war", "conflict", "foreign affairs", "resolution", "embargo", "arms", "threat", "crisis", "nato"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "digital-society-africa", - "authority_name": "Digital Society of Africa", - "country": "Kenya", - "country_code": "KE", - "region": "Africa", - "authority_type": "public_safety_program", - "base_url": "https://digitalsociety.africa" + "source_id": "eu-council-press", + "authority_name": "Council of the EU Press", + "country": "European Union", + "country_code": "EU", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.consilium.europa.eu" }, - "feed_url": "https://digitalsociety.africa/", - "feed_urls": [ - "https://digitalsociety.africa/", - "https://digitalsociety.africa/blog/" - ], - "category": "cyber_advisory", - "region_tag": "KE", - "lat": -1.2864, - "lng": 36.8172, - "max_items": 80, - "include_keywords": [ - "digital security", - "cyber", - "safety", - "threat", - "awareness", - "training", - "response" - ], - "exclude_keywords": [ - "about us", - "privacy policy", - "terms" - ], - "reporting": { - "label": "Contact Digital Society Africa", - "url": "https://digitalsociety.africa/contact/" - } + "feed_url": "https://www.consilium.europa.eu/en/press/press-releases/rss.xml", + "category": "legislative", + "region_tag": "EU", + "lat": 50.84, + "lng": 4.37, + "include_keywords": ["sanction", "defence", "defense", "security", "border", "migration", "terrorism", "cyber", "intelligence", "military", "war", "conflict", "foreign", "resolution", "embargo", "arms", "threat", "crisis", "restrictive measures"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "resilio-africa", - "authority_name": "Resilio Africa", - "country": "Nigeria", - "country_code": "NG", - "region": "Africa", - "authority_type": "public_safety_program", - "base_url": "https://resilio.cybersafefoundation.org" + "source_id": "eeas-press", + "authority_name": "EU External Action Service", + "country": "European Union", + "country_code": "EU", + "region": "Europe", + "authority_type": "national_security", + "base_url": "https://www.eeas.europa.eu" }, - "feed_url": "https://resilio.cybersafefoundation.org/", - "category": "cyber_advisory", - "region_tag": "NG", - "lat": 9.082, - "lng": 8.6753, - "max_items": 80, - "include_keywords": [ - "cyber", - "resilience", - "program", - "community", - "security", - "incident", - "capacity" - ], - "exclude_keywords": [ - "privacy", - "terms" - ], - "reporting": { - "label": "Contact Resilio Africa", - "url": "https://resilio.cybersafefoundation.org/" - } + "feed_url": "https://www.eeas.europa.eu/eeas/press-material_en?f%5B0%5D=press_material_type%3APress+release&_format=rss", + "category": "legislative", + "region_tag": "EU", + "lat": 50.84, + "lng": 4.38, + "include_keywords": ["sanction", "security", "defence", "defense", "conflict", "military", "war", "crisis", "terrorism", "foreign", "humanitarian", "ceasefire", "peace", "threat", "arms", "embargo", "nato", "statement"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "derechos-digitales-latam", - "authority_name": "Derechos Digitales", - "country": "Chile", - "country_code": "CL", - "region": "South America", - "authority_type": "public_safety_program", - "base_url": "https://www.derechosdigitales.org" + "source_id": "eu-commission-press", + "authority_name": "European Commission Press", + "country": "European Union", + "country_code": "EU", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://ec.europa.eu" }, - "feed_url": "https://www.derechosdigitales.org/noticias/", - "feed_urls": [ - "https://www.derechosdigitales.org/noticias/", - "https://www.derechosdigitales.org/categoria/seguridad-digital/" - ], - "category": "cyber_advisory", - "region_tag": "CL", - "lat": -33.4489, - "lng": -70.6693, - "max_items": 100, - "include_keywords": [ - "seguridad digital", - "resiliencia", - "proteccion", - "ciber", - "sociedad civil", - "amenaza" - ], - "exclude_keywords": [ - "donar", - "newsletter", - "equipo" - ], - "reporting": { - "label": "Contact Derechos Digitales", - "url": "https://www.derechosdigitales.org/contacto/" - } + "feed_url": "https://ec.europa.eu/commission/presscorner/api/rss", + "category": "legislative", + "region_tag": "EU", + "lat": 50.84, + "lng": 4.38, + "include_keywords": ["sanction", "security", "defence", "defense", "border", "migration", "terrorism", "cyber", "military", "crisis", "threat", "embargo", "arms", "restrictive measures", "disinformation"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "tedic-paraguay", - "authority_name": "TEDIC Paraguay", - "country": "Paraguay", - "country_code": "PY", - "region": "South America", - "authority_type": "public_safety_program", - "base_url": "https://www.tedic.org" + "source_id": "uk-parliament-lords", + "authority_name": "UK Parliament — Lords", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.parliament.uk" }, - "feed_url": "https://www.tedic.org/noticias/", - "feed_urls": [ - "https://www.tedic.org/noticias/", - "https://www.tedic.org/tag/seguridad-digital/" - ], - "category": "cyber_advisory", - "region_tag": "PY", - "lat": -25.2637, - "lng": -57.5759, - "max_items": 100, - "include_keywords": [ - "seguridad digital", - "ciber", - "proteccion", - "sociedad civil", - "capacitacion", - "riesgo" - ], - "exclude_keywords": [ - "donar", - "equipo", - "convocatoria laboral" - ], - "reporting": { - "label": "Contact TEDIC", - "url": "https://www.tedic.org/contacto/" - } + "feed_url": "https://lordsbusiness.parliament.uk/ItemOfBusiness/atom", + "category": "legislative", + "region_tag": "GB", + "lat": 51.50, + "lng": -0.12, + "include_keywords": ["sanction", "security", "defence", "defense", "border", "migration", "terrorism", "cyber", "intelligence", "military", "war", "conflict", "foreign affairs", "arms", "threat", "crisis", "nato"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "karisma-colombia", - "authority_name": "Fundacion Karisma", - "country": "Colombia", - "country_code": "CO", - "region": "South America", - "authority_type": "public_safety_program", - "base_url": "https://web.karisma.org.co" + "source_id": "us-congress-bills", + "authority_name": "US Congress — Bills", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "regulatory", + "base_url": "https://www.congress.gov" }, - "feed_url": "https://web.karisma.org.co/blog/", - "feed_urls": [ - "https://web.karisma.org.co/blog/", - "https://web.karisma.org.co/tag/seguridad-digital/" - ], - "category": "cyber_advisory", - "region_tag": "CO", - "lat": 4.711, - "lng": -74.0721, - "max_items": 100, - "include_keywords": [ - "seguridad digital", - "ciberseguridad", - "proteccion", - "organizaciones", - "amenaza", - "riesgo" - ], - "exclude_keywords": [ - "donaciones", - "equipo", - "vacante" - ], - "reporting": { - "label": "Contact Fundacion Karisma", - "url": "https://web.karisma.org.co/contacto/" - } + "feed_url": "https://www.congress.gov/rss/most-viewed-bills.xml", + "category": "legislative", + "region_tag": "US", + "lat": 38.89, + "lng": -77.01, + "include_keywords": ["sanction", "security", "defense", "border", "immigration", "terrorism", "cyber", "intelligence", "military", "war", "conflict", "foreign", "arms", "threat", "national defense", "nato", "maritime"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "unicef-giga", - "authority_name": "UNICEF Giga", - "country": "Global", - "country_code": "INT", - "region": "International", - "authority_type": "public_safety_program", - "base_url": "https://giga.global" + "source_id": "us-state-dept-press", + "authority_name": "US State Department Press", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "national_security", + "base_url": "https://www.state.gov" }, - "feed_url": "https://giga.global/", - "feed_urls": [ - "https://giga.global/", - "https://giga.global/insights/", - "https://giga.global/projects/" - ], - "category": "education_digital_capacity", - "region_tag": "INT", - "lat": 20, - "lng": 0, - "max_items": 100, - "include_keywords": [ - "school connectivity", - "internet", - "digital divide", - "schools", - "education", - "mapping" - ], - "exclude_keywords": [ - "privacy policy", - "terms", - "careers" - ], - "reporting": { - "label": "UNICEF Giga Opportunities", - "url": "https://giga.global/" - } + "feed_url": "https://www.state.gov/rss-feed/press-releases/feed/", + "category": "legislative", + "region_tag": "US", + "lat": 38.89, + "lng": -77.05, + "include_keywords": ["sanction", "security", "defense", "terrorism", "conflict", "war", "crisis", "ceasefire", "peace", "threat", "arms", "embargo", "nuclear", "nato", "humanitarian", "statement"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "itu-digital-capacity", - "authority_name": "ITU Digital Development", - "country": "Global", - "country_code": "INT", - "region": "International", - "authority_type": "regulatory", - "base_url": "https://www.itu.int" + "source_id": "us-dod-press", + "authority_name": "US Department of Defense", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "national_security", + "base_url": "https://www.defense.gov" }, - "feed_url": "https://www.itu.int/en/ITU-D/Pages/default.aspx", - "feed_urls": [ - "https://www.itu.int/en/ITU-D/Pages/default.aspx", - "https://www.itu.int/hub/category/bridging-the-digital-divide/", - "https://www.itu.int/hub/category/cybersecurity/" - ], - "category": "education_digital_capacity", - "region_tag": "INT", - "lat": 20, - "lng": 0, - "max_items": 120, - "include_keywords": [ - "digital skills", - "capacity", - "connectivity", - "cyber capacity", - "education", - "training" - ], - "exclude_keywords": [ - "meeting calendar", - "press release" - ], - "reporting": { - "label": "ITU Development Programs", - "url": "https://www.itu.int/en/ITU-D/Pages/default.aspx" - } + "feed_url": "https://www.defense.gov/DesktopModules/ArticleCS/RSS.ashx?max=10&ContentType=1&Site=945", + "category": "conflict_monitoring", + "region_tag": "US", + "lat": 38.87, + "lng": -77.06, + "include_keywords": ["operation", "strike", "deploy", "missile", "threat", "conflict", "attack", "defense", "military", "patrol", "exercise", "nato", "coalition", "airstrike", "maritime"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "worldbank-education-digital", - "authority_name": "World Bank Education Digital", - "country": "Global", + "source_id": "nato-news", + "authority_name": "NATO News", + "country": "International", "country_code": "INT", "region": "International", - "authority_type": "regulatory", - "base_url": "https://www.worldbank.org" + "authority_type": "national_security", + "base_url": "https://www.nato.int" }, - "feed_url": "https://www.worldbank.org/en/topic/education", - "feed_urls": [ - "https://www.worldbank.org/en/topic/education", - "https://www.worldbank.org/en/topic/digitaldevelopment" - ], - "category": "education_digital_capacity", + "feed_url": "https://www.nato.int/cps/en/natolive/news.xml", + "category": "conflict_monitoring", "region_tag": "INT", - "lat": 20, - "lng": 0, - "max_items": 120, - "include_keywords": [ - "education", - "digital", - "school", - "ICT", - "skills", - "capacity building" - ], - "exclude_keywords": [ - "annual report", - "procurement policy" - ], - "reporting": { - "label": "World Bank Education Programs", - "url": "https://www.worldbank.org/en/topic/education" - } + "lat": 50.88, + "lng": 4.42, + "include_keywords": ["security", "defence", "defense", "military", "operation", "deploy", "threat", "conflict", "exercise", "summit", "alliance", "deterrence", "cyber", "maritime", "air", "nuclear"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "africa-code-week", - "authority_name": "Africa Code Week", - "country": "Africa", - "country_code": "AFR", - "region": "Africa", - "authority_type": "public_safety_program", - "base_url": "https://africacodeweek.org" + "source_id": "osce-press", + "authority_name": "OSCE Press", + "country": "International", + "country_code": "INT", + "region": "Europe", + "authority_type": "regulatory", + "base_url": "https://www.osce.org" }, - "feed_url": "https://africacodeweek.org/", - "feed_urls": [ - "https://africacodeweek.org/", - "https://africacodeweek.org/news/" - ], - "category": "education_digital_capacity", - "region_tag": "AFR", - "lat": 1.65, - "lng": 17, - "max_items": 100, - "include_keywords": [ - "mentor", - "teacher", - "volunteer", - "digital literacy", - "coding", - "training" - ], - "exclude_keywords": [ - "privacy", - "terms", - "sponsor package" - ], - "reporting": { - "label": "Join Africa Code Week", - "url": "https://africacodeweek.org/" - } + "feed_url": "https://www.osce.org/rss.xml", + "category": "conflict_monitoring", + "region_tag": "INT", + "lat": 48.21, + "lng": 16.37, + "include_keywords": ["conflict", "security", "election", "monitoring", "border", "ceasefire", "crisis", "military", "weapons", "arms", "human rights", "detention", "freedom", "threat"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "dot-digital-opportunity-trust", - "authority_name": "Digital Opportunity Trust", - "country": "Global", + "source_id": "un-security-council", + "authority_name": "UN Security Council", + "country": "International", "country_code": "INT", "region": "International", - "authority_type": "public_safety_program", - "base_url": "https://www.dotrust.org" + "authority_type": "regulatory", + "base_url": "https://press.un.org" }, - "feed_url": "https://www.dotrust.org/", - "feed_urls": [ - "https://www.dotrust.org/", - "https://www.dotrust.org/news/" - ], - "category": "education_digital_capacity", + "feed_url": "https://press.un.org/en/taxonomy/term/10/feed", + "category": "conflict_monitoring", "region_tag": "INT", - "lat": 20, - "lng": 0, - "max_items": 100, - "include_keywords": [ - "digital skills", - "training", - "youth", - "employment", - "mentor", - "capacity" - ], - "exclude_keywords": [ - "donate", - "annual report" - ], - "reporting": { - "label": "DOT Programs and Partnerships", - "url": "https://www.dotrust.org/" - } + "lat": 40.75, + "lng": -73.97, + "include_keywords": ["sanction", "resolution", "conflict", "security", "peace", "ceasefire", "military", "threat", "war", "humanitarian", "arms", "embargo", "nuclear", "terrorism"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "european-schoolnet", - "authority_name": "European Schoolnet", - "country": "Europe", - "country_code": "EU", - "region": "Europe", - "authority_type": "public_safety_program", - "base_url": "https://www.eun.org" + "source_id": "icg-crisiswatch", + "authority_name": "International Crisis Group", + "country": "International", + "country_code": "INT", + "region": "International", + "authority_type": "regulatory", + "base_url": "https://www.crisisgroup.org" }, - "feed_url": "https://www.eun.org/", - "feed_urls": [ - "https://www.eun.org/", - "https://www.eun.org/news" - ], - "category": "education_digital_capacity", - "region_tag": "EU", - "lat": 50.85, - "lng": 4.35, - "max_items": 100, - "include_keywords": [ - "digital education", - "teacher training", - "skills", - "school", - "capacity" - ], - "exclude_keywords": [ - "privacy policy", - "cookie policy" - ], - "reporting": { - "label": "European Schoolnet Initiatives", - "url": "https://www.eun.org/" - } + "feed_url": "https://www.crisisgroup.org/crisiswatch/feed", + "category": "conflict_monitoring", + "region_tag": "INT", + "lat": 50.84, + "lng": 4.36, + "include_keywords": ["conflict", "crisis", "war", "violence", "tension", "ceasefire", "peace", "military", "insurgency", "coup", "protest", "clashes", "escalation", "deteriorat"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "erasmus-plus-digital", - "authority_name": "Erasmus+ Digital Education", - "country": "Europe", - "country_code": "EU", + "source_id": "de-auswaertiges-amt", + "authority_name": "German Foreign Office", + "country": "Germany", + "country_code": "DE", "region": "Europe", - "authority_type": "regulatory", - "base_url": "https://erasmus-plus.ec.europa.eu" + "authority_type": "national_security", + "base_url": "https://www.auswaertiges-amt.de" }, - "feed_url": "https://erasmus-plus.ec.europa.eu/", - "feed_urls": [ - "https://erasmus-plus.ec.europa.eu/", - "https://erasmus-plus.ec.europa.eu/opportunities" - ], - "category": "education_digital_capacity", - "region_tag": "EU", - "lat": 50.85, - "lng": 4.35, - "max_items": 120, - "include_keywords": [ - "digital", - "education", - "training", - "call", - "project", - "skills" - ], - "exclude_keywords": [ - "about", - "legal notice" - ], - "reporting": { - "label": "Erasmus+ Opportunities", - "url": "https://erasmus-plus.ec.europa.eu/opportunities" - } + "feed_url": "https://www.auswaertiges-amt.de/en/rss/news-feed.xml", + "category": "legislative", + "region_tag": "DE", + "lat": 52.52, + "lng": 13.38, + "include_keywords": ["sanction", "security", "conflict", "crisis", "military", "war", "terrorism", "defence", "defense", "nato", "humanitarian", "ceasefire", "peace", "threat", "arms", "embargo", "nuclear"] }, { - "type": "html-list", - "followRedirects": true, + "type": "rss", "source": { - "source_id": "coe-education-digital", - "authority_name": "Council of Europe Education", - "country": "Europe", - "country_code": "EU", + "source_id": "fr-diplomatie", + "authority_name": "France Diplomatie", + "country": "France", + "country_code": "FR", "region": "Europe", - "authority_type": "regulatory", - "base_url": "https://www.coe.int" + "authority_type": "national_security", + "base_url": "https://www.diplomatie.gouv.fr" }, - "feed_url": "https://www.coe.int/en/web/education", - "feed_urls": [ - "https://www.coe.int/en/web/education", - "https://www.coe.int/en/web/digital-citizenship-education" - ], - "category": "education_digital_capacity", - "region_tag": "EU", - "lat": 48.58, - "lng": 7.75, - "max_items": 100, - "include_keywords": [ - "digital citizenship", - "education", - "training", - "school", - "online safety" - ], - "exclude_keywords": [ - "press room", - "vacancy" - ], - "reporting": { - "label": "Council of Europe Education Programs", - "url": "https://www.coe.int/en/web/education" - } + "feed_url": "https://www.diplomatie.gouv.fr/en/rss/", + "category": "legislative", + "region_tag": "FR", + "lat": 48.86, + "lng": 2.32, + "include_keywords": ["sanction", "security", "conflict", "crisis", "military", "war", "terrorism", "defence", "defense", "nato", "humanitarian", "ceasefire", "peace", "threat", "arms"] } ] diff --git a/registry/sources.seed.db b/registry/sources.seed.db new file mode 100644 index 0000000..95b00e7 Binary files /dev/null and b/registry/sources.seed.db differ diff --git a/scripts/apply-dlq.py b/scripts/apply-dlq.py new file mode 100644 index 0000000..5749b3a --- /dev/null +++ b/scripts/apply-dlq.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +"""Apply dead-letter queue rejections to the local JSON registry. + +Usage: python3 scripts/apply-dlq.py registry/source_registry.json .tmp/dlq.json +""" +import json +import sys + + +def main(): + if len(sys.argv) < 3: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(1) + + registry_path, dlq_path = sys.argv[1], sys.argv[2] + + with open(dlq_path) as f: + dlq = json.load(f) + + dead_sources = {} + for src in dlq.get("sources", []): + sid = src.get("source_id", "") + if sid: + dead_sources[sid] = src.get("error", "unknown error") + + if not dead_sources: + print("DLQ is empty, nothing to apply.") + return + + with open(registry_path) as f: + registry = json.load(f) + + changed = 0 + for entry in registry: + sid = entry.get("source", {}).get("source_id", "") + if sid in dead_sources and entry.get("promotion_status") != "rejected": + entry["promotion_status"] = "rejected" + entry["rejection_reason"] = f"Dead source: {dead_sources[sid]}" + changed += 1 + + if changed == 0: + print("No new rejections to apply.") + return + + with open(registry_path, "w") as f: + json.dump(registry, f, indent=2, ensure_ascii=False) + f.write("\n") + + print(f"Rejected {changed} dead source(s) in {registry_path}") + + +if __name__ == "__main__": + main() diff --git a/src/App.tsx b/src/App.tsx index 5ca4f14..8ab53c5 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -4,54 +4,173 @@ * See NOTICE for provenance and LICENSE for repository-local terms. */ -import { useCallback, useEffect, useRef, useState } from "react"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { Header } from "@/components/Header"; -import { StatsBar } from "@/components/StatsBar"; import { GlobeView } from "@/components/GlobeView"; import { AlertFeed } from "@/components/AlertFeed"; import { AlertDetail } from "@/components/AlertDetail"; -import { SubmitIntelModal } from "@/components/SubmitIntelModal"; +import { FeedDirectory } from "@/components/FeedDirectory"; import { useAlerts } from "@/hooks/useAlerts"; +import { useSearch } from "@/hooks/useSearch"; +import { useSourceHealth } from "@/hooks/useSourceHealth"; +import { alertMatchesRegionFilter } from "@/lib/regions"; +import type { AlertCategory } from "@/types/alert"; + +type SeverityFilter = "critical" | "high" | null; + +const SOURCE_SELECTION_COOKIE = "euosint_selected_sources"; + +function readSelectedSources(): string[] { + if (typeof document === "undefined") return []; + const cookie = document.cookie + .split("; ") + .find((entry) => entry.startsWith(`${SOURCE_SELECTION_COOKIE}=`)); + if (!cookie) return []; + try { + const value = decodeURIComponent(cookie.split("=").slice(1).join("=")); + const parsed = JSON.parse(value); + return Array.isArray(parsed) + ? parsed.filter((item): item is string => typeof item === "string" && item.trim().length > 0) + : []; + } catch { + return []; + } +} + +function writeSelectedSources(sourceIds: string[]) { + if (typeof document === "undefined") return; + const expires = new Date(); + expires.setMonth(expires.getMonth() + 6); + document.cookie = `${SOURCE_SELECTION_COOKIE}=${encodeURIComponent(JSON.stringify(sourceIds))}; expires=${expires.toUTCString()}; path=/; SameSite=Lax`; +} export default function App() { const { alerts, isLoading, sourceCount } = useAlerts(); + const { sourceHealth, isLoading: isSourceHealthLoading } = useSourceHealth(); const [selectedId, setSelectedId] = useState(null); - const [regionFilter, setRegionFilter] = useState("all"); + const [selectedSourceIds, setSelectedSourceIds] = useState([]); + const [categoryFilter, setCategoryFilter] = useState("all"); + const [severityFilter, setSeverityFilter] = useState(null); + const [regionFilter, setRegionFilter] = useState("Europe"); + const { query: searchQuery, setQuery: setSearchQuery, results: searchResults, isApiAvailable } = useSearch(); const [visibleAlertIds, setVisibleAlertIds] = useState([]); - const [mobilePane, setMobilePane] = useState<"map" | "stack">("map"); - const [isDesktopFeedOpen, setIsDesktopFeedOpen] = useState(true); - const [feedVisible, setFeedVisible] = useState(true); - const [feedToggled, setFeedToggled] = useState(false); - const [isSubmitOpen, setIsSubmitOpen] = useState(false); + const [mobilePane, setMobilePane] = useState<"intel" | "map" | "alerts">("map"); const panelRef = useRef(null); - const feedRef = useRef(null); + + useEffect(() => { + setSelectedSourceIds(readSelectedSources()); + }, []); + + useEffect(() => { + const availableSourceIds = new Set(alerts.map((alert) => alert.source_id)); + setSelectedSourceIds((current) => { + const next = current.filter((sourceId) => availableSourceIds.has(sourceId)); + if (next.length !== current.length) { + writeSelectedSources(next); + } + return next; + }); + }, [alerts]); + + useEffect(() => { + writeSelectedSources(selectedSourceIds); + }, [selectedSourceIds]); + + const handleRegionChange = useCallback((nextRegion: string) => { + setRegionFilter(nextRegion); + setSelectedSourceIds([]); + setSelectedId(null); + }, []); + + const regionScopedAlerts = useMemo(() => { + const query = searchQuery.trim().toLowerCase(); + + // When API search returned results, use those (already ranked by BM25). + if (query && isApiAvailable && searchResults.length > 0) { + let filtered = searchResults; + if (regionFilter !== "all") { + filtered = filtered.filter((alert) => alertMatchesRegionFilter(alert, regionFilter)); + } + return filtered; + } + + // Fallback: client-side filter. + let filtered = alerts; + if (regionFilter !== "all") { + filtered = filtered.filter((alert) => alertMatchesRegionFilter(alert, regionFilter)); + } + if (query) { + filtered = filtered.filter((alert) => { + const haystack = [ + alert.title, + alert.source.authority_name, + alert.source.country, + alert.source.country_code, + alert.source.region, + alert.category, + alert.canonical_url, + ] + .join(" ") + .toLowerCase(); + return haystack.includes(query); + }); + } + return filtered; + }, [alerts, regionFilter, searchQuery, searchResults, isApiAvailable]); + + const scopedAlerts = useMemo(() => { + let filtered = regionScopedAlerts; + if (selectedSourceIds.length > 0) { + const selectedSet = new Set(selectedSourceIds); + filtered = filtered.filter((alert) => selectedSet.has(alert.source_id)); + } + if (categoryFilter !== "all") { + filtered = filtered.filter((alert) => alert.category === categoryFilter); + } + if (severityFilter) { + filtered = filtered.filter((alert) => alert.severity === severityFilter); + } + return filtered; + }, [categoryFilter, regionScopedAlerts, selectedSourceIds, severityFilter]); + + const handleCountrySelect = useCallback((countryCode: string) => { + const nextRegion = `country:${countryCode}`; + setRegionFilter((current) => current === nextRegion ? "all" : nextRegion); + setSelectedSourceIds([]); + setCategoryFilter("all"); + setSelectedId(null); + }, []); + + const handleSourceSelectionChange = useCallback((sourceIds: string[]) => { + setSelectedSourceIds(sourceIds); + setSelectedId(null); + }, []); + + const handleNavigatorSelect = useCallback((nextRegion: string, nextCategory: AlertCategory) => { + setRegionFilter(nextRegion); + setCategoryFilter(nextCategory); + setSelectedSourceIds([]); + setSelectedId(null); + }, []); + const selectedAlert = selectedId - ? alerts.find((a) => a.alert_id === selectedId) ?? null + ? scopedAlerts.find((a) => a.alert_id === selectedId) ?? alerts.find((a) => a.alert_id === selectedId) ?? null : null; const handleClose = useCallback(() => { const el = panelRef.current; - if (!el) { setSelectedId(null); return; } - el.style.animation = "slide-out-right 0.3s ease-in forwards"; - el.addEventListener("animationend", () => { + if (!el) { setSelectedId(null); - }, { once: true }); - }, []); - - const handleHideFeed = useCallback(() => { - setFeedToggled(true); - setFeedVisible(false); - const el = feedRef.current; - if (!el) { setIsDesktopFeedOpen(false); return; } - el.addEventListener("animationend", () => { - setIsDesktopFeedOpen(false); - }, { once: true }); - }, []); - - const handleShowFeed = useCallback(() => { - setFeedToggled(true); - setIsDesktopFeedOpen(true); - setFeedVisible(true); + return; + } + el.style.animation = "slide-out-right 0.24s ease-in forwards"; + el.addEventListener( + "animationend", + () => { + setSelectedId(null); + }, + { once: true } + ); }, []); useEffect(() => { @@ -61,126 +180,134 @@ export default function App() { }, [alerts, selectedId]); useEffect(() => { - setVisibleAlertIds(alerts.map((a) => a.alert_id)); - }, [alerts]); + setVisibleAlertIds(scopedAlerts.map((a) => a.alert_id)); + }, [scopedAlerts]); return ( -
- {/* Top Bar */} -
setIsSubmitOpen(true)} /> - -
-
- - -
-
+
+
{}} + alerts={alerts} + /> - {/* Main Content */} -
- {!isDesktopFeedOpen && ( - - )} - {/* Left Panel: Alert Feed */} - {(isDesktopFeedOpen || mobilePane === "stack") && ( -
- {isLoading ? ( -
- Loading live feed... -
- ) : ( - - )} + {/* Main content — fills remaining height, no overflow */} +
+ {/* Left panel — intel overview */} +
+
- )} - {/* Center: Globe (full remaining width) */} -
+ {/* Center — map */} +
- {/* Right Panel: Slide-out Alert Detail */} + {/* Right panel — alert queue (contained, scrollable) */} +
+
+ {isLoading ? ( +
+ Loading live alert queue... +
+ ) : ( + + )} +
+
+ + {/* Mobile tab bar */} +
+ {[ + ["intel", "Intel"], + ["map", "Map"], + ["alerts", "Queue"], + ].map(([pane, label]) => ( + + ))} +
+ + {/* Alert detail overlay */} {selectedAlert && (
- {/* Backdrop click to close */}
-
- +
+
)}
- setIsSubmitOpen(false)} /> - - {/* Bottom Status Bar */} -
- EUOSINT v0.1.0 +
+ + Scalytics OSINT + {" // Open Source Intelligence Console"} + - Sources: {sourceCount} authorities // Live feed // No data stored // Index + Link only + {selectedSourceIds.length > 0 + ? `Scoped to ${selectedSourceIds.length} streams` + : categoryFilter !== "all" + ? `Scoped to ${categoryFilter}` + : "All live streams"} + {" // "} + Build your intelligence pipeline — Contact us - {sourceCount} src
); diff --git a/src/components/AlertFeed.tsx b/src/components/AlertFeed.tsx index 9a863a1..e953a9c 100644 --- a/src/components/AlertFeed.tsx +++ b/src/components/AlertFeed.tsx @@ -7,7 +7,7 @@ import { useEffect, useMemo, useRef, useState } from "react"; import type { Alert, AlertCategory, Severity } from "@/types/alert"; import { - severityColors, + severityColor, severityBg, severityLabel, categoryLabels, @@ -22,24 +22,27 @@ interface Props { alerts: Alert[]; selectedId: string | null; onSelect: (id: string) => void; + categoryFilter: AlertCategory | "all"; + onCategoryChange: (category: AlertCategory | "all") => void; regionFilter: string; onRegionChange: (region: string) => void; + onNavigatorSelect?: (region: string, category: AlertCategory) => void; onVisibleAlertIdsChange: (ids: string[]) => void; - onHideDesktop?: () => void; } export function AlertFeed({ alerts, selectedId, onSelect, + categoryFilter, + onCategoryChange, regionFilter, onRegionChange, + onNavigatorSelect, onVisibleAlertIdsChange, - onHideDesktop, }: Props) { const [viewMode, setViewMode] = useState<"navigator" | "timeline">("navigator"); const [actionableOnly, setActionableOnly] = useState(true); - const [categoryFilter, setCategoryFilter] = useState("all"); const [severityFilter, setSeverityFilter] = useState("all"); const [activeNavigatorGroupKey, setActiveNavigatorGroupKey] = useState(null); const [collapsedSections, setCollapsedSections] = useState>(new Set()); @@ -59,6 +62,22 @@ export function AlertFeed({ return [...set.entries()].sort((a, b) => b[1] - a[1]); }, [alerts]); + const countries = useMemo(() => { + const set = new Map(); + alerts.forEach((a) => { + const key = a.source.country_code; + const existing = set.get(key); + if (existing) { + existing.count++; + } else { + set.set(key, { name: a.source.country, count: 1 }); + } + }); + return [...set.entries()] + .map(([code, { name, count }]) => ({ code, name, count })) + .sort((a, b) => b.count - a.count); + }, [alerts]); + const regionFiltered = regionFilter === "all" ? alerts @@ -135,6 +154,11 @@ export function AlertFeed({ }); }, [facetFiltered]); + // Reset navigator selection when region or category filter changes. + useEffect(() => { + setActiveNavigatorGroupKey(null); + }, [regionFilter, categoryFilter]); + useEffect(() => { if (navigatorGroups.length === 0) { setActiveNavigatorGroupKey(null); @@ -152,6 +176,15 @@ export function AlertFeed({ const activeNavigatorGroup = navigatorGroups.find((group) => group.key === activeNavigatorGroupKey) ?? null; + const handleNavigatorGroupSelect = (groupKey: string) => { + setActiveNavigatorGroupKey(groupKey); + const group = navigatorGroups.find((entry) => entry.key === groupKey); + if (!group) { + return; + } + onNavigatorSelect?.(group.region, group.category); + }; + // Keep globe visibility aligned with current filters, not only the active navigator bucket. const visibleAlertIds = useMemo( () => facetFiltered.map((a) => a.alert_id), @@ -159,11 +192,12 @@ export function AlertFeed({ ); useEffect(() => { + const glowTimeouts = glowTimeoutsRef.current; return () => { if (refreshTimeoutRef.current) { window.clearTimeout(refreshTimeoutRef.current); } - glowTimeoutsRef.current.forEach((id) => window.clearTimeout(id)); + glowTimeouts.forEach((id) => window.clearTimeout(id)); }; }, []); @@ -206,13 +240,7 @@ export function AlertFeed({ knownAlertIdsRef.current = currentIds; }, [alerts]); - const severityRail: Record = { - critical: severityColors.critical, - high: severityColors.high, - medium: severityColors.medium, - low: severityColors.low, - info: severityColors.info, - }; + const severityRail = (s: Severity) => severityColor(s); useEffect(() => { const sig = visibleAlertIds.join("|"); @@ -235,7 +263,7 @@ export function AlertFeed({ >
@@ -299,19 +327,19 @@ export function AlertFeed({ }; return ( -
+

- SOC Alert Stack + Intelligence Queue

- +
+ {regionFilter === "all" + ? "Global scope" + : regionFilter.startsWith("country:") + ? `${countries.find((c) => c.code === regionFilter.slice(8))?.name ?? regionFilter.slice(8)} scope` + : `${regionFilter} scope`} +
@@ -333,19 +361,27 @@ export function AlertFeed({ onChange={(e) => onRegionChange(e.target.value)} className="w-full appearance-none bg-white/5 border border-siem-border rounded-md pl-7 pr-8 py-1.5 text-xs text-siem-text cursor-pointer hover:bg-siem-accent/10 transition-colors focus:outline-none focus:ring-1 focus:ring-siem-accent" > - + {regions.map(([region, count]) => ( ))} + {countries.length > 0 && ( + + )} + {countries.map((c) => ( + + ))}
setHotspotTypeFilter(e.target.value)} - className="w-full appearance-none bg-white/5 border border-siem-border rounded-md px-2 py-1 text-[11px] text-siem-text" - > - - {hotspotTypeOptions.map((type) => ( - - ))} - - +
-
- {hotspotFiltered.map((alert) => ( - - ))} -
-
- )} - {/* Legend */} -
- {( - [ - ["critical", "bg-red-500"], - ["high", "bg-orange-500"], - ["medium", "bg-yellow-500"], - ["low", "bg-green-500"], - ["info", "bg-cyan-500"], - ] as const - ).map(([sev, bg]) => ( -
-
- {sev} + ))} +
- ))} -
-
- Drag to rotate · Scroll to zoom · Click a continent to filter -
-
- - -
-
- - {regions.map(([region]) => ( - - ))} -
-
- +
-
+ ); } diff --git a/src/components/Header.tsx b/src/components/Header.tsx index db59076..a6ecbb1 100644 --- a/src/components/Header.tsx +++ b/src/components/Header.tsx @@ -4,53 +4,535 @@ * See NOTICE for provenance and LICENSE for repository-local terms. */ -import { Shield, Globe, Send } from "lucide-react"; +import { useEffect, useMemo, useRef, useState } from "react"; +import { Globe2, Radar, Search, Shield, X } from "lucide-react"; +import type { Alert } from "@/types/alert"; +import { alertMatchesRegionFilter } from "@/lib/regions"; + +type MenuView = "overview" | "feeds" | "authorities" | "health"; interface Props { regionFilter: string; - onSubmitIntel: () => void; + onRegionChange: (region: string) => void; + sourceCount: number; + selectedSourceIds: string[]; + onSelectedSourceIdsChange: (sourceIds: string[]) => void; + searchQuery: string; + onSearchChange: (query: string) => void; + activeMenu: MenuView; + onMenuChange: (view: MenuView) => void; + alerts: Alert[]; +} + +const REGIONS = [ + "Europe", + "all", + "North America", + "South America", + "Africa", + "Middle East", + "Asia", + "Oceania", + "Caribbean", + "International", +]; + +const SEARCH_HISTORY_COOKIE = "euosint_search_history"; + +function readSearchHistory(): string[] { + if (typeof document === "undefined") return []; + const cookie = document.cookie + .split("; ") + .find((entry) => entry.startsWith(`${SEARCH_HISTORY_COOKIE}=`)); + if (!cookie) return []; + try { + const value = decodeURIComponent(cookie.split("=").slice(1).join("=")); + const parsed = JSON.parse(value); + return Array.isArray(parsed) ? parsed.filter((item): item is string => typeof item === "string") : []; + } catch { + return []; + } +} + +function writeSearchHistory(history: string[]) { + if (typeof document === "undefined") return; + const expires = new Date(); + expires.setMonth(expires.getMonth() + 6); + document.cookie = `${SEARCH_HISTORY_COOKIE}=${encodeURIComponent(JSON.stringify(history))}; expires=${expires.toUTCString()}; path=/; SameSite=Lax`; } -export function Header({ regionFilter, onSubmitIntel }: Props) { +function FeedFocus({ + alerts, + sourceCount, + selectedSourceIds, + onSelectedSourceIdsChange, +}: { + alerts: Alert[]; + sourceCount: number; + selectedSourceIds: string[]; + onSelectedSourceIdsChange: (sourceIds: string[]) => void; +}) { + const [open, setOpen] = useState(false); + const [query, setQuery] = useState(""); + const containerRef = useRef(null); + const inputRef = useRef(null); + + const sources = useMemo(() => { + const map = new Map(); + for (const alert of alerts) { + const existing = map.get(alert.source_id); + if (existing) { + existing.count++; + } else { + map.set(alert.source_id, { + id: alert.source_id, + name: alert.source.authority_name, + country: alert.source.country, + count: 1, + }); + } + } + return [...map.values()].sort((a, b) => { + if (b.count !== a.count) return b.count - a.count; + return a.name.localeCompare(b.name); + }); + }, [alerts]); + + useEffect(() => { + const handler = (e: MouseEvent) => { + if (containerRef.current && !containerRef.current.contains(e.target as Node)) { + setOpen(false); + } + }; + document.addEventListener("mousedown", handler); + return () => document.removeEventListener("mousedown", handler); + }, []); + + const filteredSources = useMemo(() => { + const trimmed = query.trim().toLowerCase(); + if (!trimmed) return sources; + return sources.filter((source) => + `${source.name} ${source.country} ${source.id}`.toLowerCase().includes(trimmed), + ); + }, [query, sources]); + + const selectedLabels = useMemo(() => { + if (selectedSourceIds.length === 0) return `All ${sourceCount} authorities`; + const labels = selectedSourceIds + .map((sourceId) => sources.find((source) => source.id === sourceId)?.name ?? sourceId) + .slice(0, 2); + if (selectedSourceIds.length <= 2) return labels.join(", "); + return `${labels.join(", ")} +${selectedSourceIds.length - 2}`; + }, [selectedSourceIds, sourceCount, sources]); + + const toggleSource = (sourceId: string) => { + if (selectedSourceIds.includes(sourceId)) { + onSelectedSourceIdsChange(selectedSourceIds.filter((id) => id !== sourceId)); + return; + } + onSelectedSourceIdsChange([...selectedSourceIds, sourceId]); + }; + return ( -
-
-
- -
-
-

EUOSINT

-

- EU-Focused Authority Bulletin Intelligence -

+
+
+
+ + Feed focus
-
-
-
- - - {regionFilter === "all" ? "ALL REGIONS" : regionFilter} - +
+ + {open && ( +
+
+ + setQuery(e.target.value)} + placeholder="Search feeds, agencies, countries..." + className="w-full bg-transparent pl-7 pr-7 py-1.5 text-xs text-siem-text placeholder:text-siem-muted/60 outline-none" + /> + {query && ( + + )} +
+
+ +
+
+ {filteredSources.length === 0 && ( +
No matching feeds
+ )} + {filteredSources.map((source) => { + const selected = selectedSourceIds.includes(source.id); + return ( + + ); + })} +
-
-
- - Monitoring - - - Live - + )} +
+ ); +} + +function SearchBar({ + query, + onQueryChange, +}: { + query: string; + onQueryChange: (query: string) => void; +}) { + const [isOpen, setIsOpen] = useState(false); + const [history, setHistory] = useState([]); + const containerRef = useRef(null); + const inputRef = useRef(null); + + useEffect(() => { + setHistory(readSearchHistory()); + }, []); + + useEffect(() => { + const handler = (e: MouseEvent) => { + if (containerRef.current && !containerRef.current.contains(e.target as Node)) { + setIsOpen(false); + } + }; + document.addEventListener("mousedown", handler); + return () => document.removeEventListener("mousedown", handler); + }, []); + + const filteredHistory = useMemo(() => { + const trimmed = query.trim().toLowerCase(); + if (!trimmed) return history; + return history.filter((item) => item.toLowerCase().includes(trimmed)); + }, [history, query]); + + const commitQuery = (value: string) => { + const trimmed = value.trim(); + onQueryChange(trimmed); + if (!trimmed) return; + const nextHistory = [trimmed, ...history.filter((item) => item !== trimmed)].slice(0, 8); + setHistory(nextHistory); + writeSearchHistory(nextHistory); + }; + + return ( +
+
{ + e.preventDefault(); + commitQuery(query); + setIsOpen(false); + }} + className="relative" + > + + onQueryChange(e.target.value)} + onFocus={() => setIsOpen(true)} + placeholder="Search alerts, agencies, countries, categories..." + className="w-full rounded-full border border-siem-border bg-siem-panel-strong pl-10 pr-24 py-3 text-sm text-siem-text outline-none transition-colors placeholder:text-siem-muted/60 focus:border-siem-accent/45" + /> + {query && ( + + )} + + + + {isOpen && filteredHistory.length > 0 && ( +
+
+ Recent queries +
+
+ {filteredHistory.map((item) => ( + + ))} +
+ )} +
+ ); +} + +function RegionSearch({ + regionFilter, + onRegionChange, + alerts, +}: { + regionFilter: string; + onRegionChange: (region: string) => void; + alerts: Alert[]; +}) { + const [open, setOpen] = useState(false); + const [query, setQuery] = useState(""); + const containerRef = useRef(null); + const inputRef = useRef(null); + + // Build searchable items: regions + unique countries. + const items = useMemo(() => { + const regionItems = REGIONS.map((r) => ({ + value: r, + label: r === "all" ? "All regions" : r, + type: "region" as const, + count: r === "all" ? alerts.length : alerts.filter((a) => alertMatchesRegionFilter(a, r)).length, + })); + + const countryMap = new Map(); + for (const a of alerts) { + const key = a.source.country_code; + const existing = countryMap.get(key); + if (existing) { + existing.count++; + } else { + countryMap.set(key, { country: a.source.country, code: a.source.country_code, count: 1 }); + } + } + const countryItems = [...countryMap.values()] + .sort((a, b) => b.count - a.count) + .map((c) => ({ + value: `country:${c.code}`, + label: `${c.country} (${c.code})`, + type: "country" as const, + count: c.count, + })); + + return [...regionItems, ...countryItems]; + }, [alerts]); + + const filtered = useMemo(() => { + const q = query.toLowerCase().trim(); + if (!q) return items.filter((i) => i.count > 0 || i.type === "region"); + return items.filter((i) => i.label.toLowerCase().includes(q)); + }, [items, query]); + + // Close dropdown on outside click. + useEffect(() => { + const handler = (e: MouseEvent) => { + if (containerRef.current && !containerRef.current.contains(e.target as Node)) { + setOpen(false); + } + }; + document.addEventListener("mousedown", handler); + return () => document.removeEventListener("mousedown", handler); + }, []); + + const currentLabel = useMemo(() => { + if (regionFilter.startsWith("country:")) { + const code = regionFilter.slice(8); + const match = items.find((i) => i.value === regionFilter); + return match ? match.label : code; + } + return regionFilter === "all" ? "All regions" : regionFilter; + }, [regionFilter, items]); + + return ( +
+
+
+ + Region scope +
+
+ + {open && ( +
+
+ + setQuery(e.target.value)} + placeholder="Search regions, countries..." + className="w-full bg-transparent pl-7 pr-7 py-1.5 text-xs text-siem-text placeholder:text-siem-muted/60 outline-none" + /> + {query && ( + + )} +
+
+ {filtered.length === 0 && ( +
No matches
+ )} + {filtered.map((item) => ( + + ))} +
+
+ )}
); } + +export function Header({ + regionFilter, + onRegionChange, + sourceCount, + selectedSourceIds, + onSelectedSourceIdsChange, + searchQuery, + onSearchChange, + activeMenu, + onMenuChange, + alerts, +}: Props) { + void activeMenu; + void onMenuChange; + return ( +
+
+
+
+
+ +
+
+
+ Scalytics OSINT +
+
+ Open Source Intelligence Console +
+
+
+ +
+ +
+ +
+ + + +
+
+ +
+
+ ); +} diff --git a/src/components/StatsBar.tsx b/src/components/StatsBar.tsx index cce6414..80c9b6a 100644 --- a/src/components/StatsBar.tsx +++ b/src/components/StatsBar.tsx @@ -5,46 +5,72 @@ */ import type { Alert } from "@/types/alert"; -import { Shield, AlertTriangle, Globe, Radio, Clock } from "lucide-react"; +import type { SourceHealthDocument } from "@/types/source-health"; +import { Activity, AlertTriangle, Globe2, ShieldAlert, Siren, Workflow } from "lucide-react"; interface Props { alerts: Alert[]; + sourceHealth: SourceHealthDocument | null; } -export function StatsBar({ alerts }: Props) { - const total = alerts.length; - const critical = alerts.filter((a) => a.severity === "critical").length; - const high = alerts.filter((a) => a.severity === "high").length; - const active = alerts.filter((a) => a.status === "active").length; - const regions = new Set(alerts.map((a) => a.source.country_code)).size; - const agencyTypes = new Set(alerts.map((a) => a.source.authority_type)).size; - - const stats = [ - { icon: Radio, label: "ACTIVE ALERTS", value: active, color: "text-green-400" }, - { icon: AlertTriangle, label: "CRITICAL", value: critical, color: "text-red-400" }, - { icon: Shield, label: "HIGH", value: high, color: "text-orange-400" }, - { icon: Globe, label: "REGIONS", value: regions, color: "text-blue-400" }, - { icon: Shield, label: "AGENCY TYPES", value: agencyTypes, color: "text-siem-muted" }, - { icon: Clock, label: "TOTAL", value: total, color: "text-siem-muted" }, +export function StatsBar({ alerts, sourceHealth }: Props) { + const metrics = [ + { + icon: Activity, + label: "Active", + value: alerts.filter((alert) => alert.status === "active").length, + tone: "text-emerald-300", + }, + { + icon: AlertTriangle, + label: "Critical", + value: alerts.filter((alert) => alert.severity === "critical").length, + tone: "text-rose-300", + }, + { + icon: ShieldAlert, + label: "High", + value: alerts.filter((alert) => alert.severity === "high").length, + tone: "text-amber-300", + }, + { + icon: Globe2, + label: "Countries", + value: new Set(alerts.map((alert) => alert.source.country_code)).size, + tone: "text-siem-accent", + }, + { + icon: Workflow, + label: "Feeds OK", + value: sourceHealth?.sources_ok ?? 0, + tone: "text-siem-text", + }, + { + icon: Siren, + label: "Feed errors", + value: sourceHealth?.sources_error ?? 0, + tone: "text-siem-muted", + }, ]; return ( -
-
-
- - Live - -
- {stats.map((s) => ( -
- -
- {s.value} - {s.label} +
+
+ {metrics.map((metric) => ( +
+
+ + {metric.value} +
+
+ {metric.label} +
-
- ))} + ))} +
); } diff --git a/src/hooks/useSearch.ts b/src/hooks/useSearch.ts new file mode 100644 index 0000000..02e5533 --- /dev/null +++ b/src/hooks/useSearch.ts @@ -0,0 +1,88 @@ +/* + * EUOSINT + * Portions derived from novatechflow/osint-siem and cyberdude88/osint-siem. + * See NOTICE for provenance and LICENSE for repository-local terms. + */ + +import { useCallback, useEffect, useRef, useState } from "react"; +import type { Alert } from "@/types/alert"; + +const SEARCH_URL = `${import.meta.env.BASE_URL}api/search`; +const DEBOUNCE_MS = 300; + +interface SearchResult { + query: string; + count: number; + results: Alert[]; +} + +export function useSearch() { + const [query, setQuery] = useState(""); + const [results, setResults] = useState([]); + const [isSearching, setIsSearching] = useState(false); + const [isApiAvailable, setIsApiAvailable] = useState(null); + const abortRef = useRef(null); + const timerRef = useRef | null>(null); + + // Probe API availability once on mount. + useEffect(() => { + fetch(`${SEARCH_URL.replace("/search", "/health")}`, { cache: "no-store" }) + .then((r) => setIsApiAvailable(r.ok)) + .catch(() => setIsApiAvailable(false)); + }, []); + + const search = useCallback( + async (q: string) => { + abortRef.current?.abort(); + + const trimmed = q.trim(); + if (!trimmed || !isApiAvailable) { + setResults([]); + setIsSearching(false); + return; + } + + const controller = new AbortController(); + abortRef.current = controller; + setIsSearching(true); + + try { + const url = `${SEARCH_URL}?q=${encodeURIComponent(trimmed)}&limit=200`; + const response = await fetch(url, { + signal: controller.signal, + cache: "no-store", + }); + if (!response.ok) throw new Error(`${response.status}`); + const data = (await response.json()) as SearchResult; + if (!controller.signal.aborted) { + setResults(data.results ?? []); + } + } catch { + if (!controller.signal.aborted) { + setResults([]); + } + } finally { + if (!controller.signal.aborted) { + setIsSearching(false); + } + } + }, + [isApiAvailable], + ); + + // Debounced search trigger. + useEffect(() => { + if (timerRef.current) clearTimeout(timerRef.current); + if (!query.trim()) { + setResults([]); + setIsSearching(false); + return; + } + timerRef.current = setTimeout(() => search(query), DEBOUNCE_MS); + return () => { + if (timerRef.current) clearTimeout(timerRef.current); + }; + }, [query, search]); + + return { query, setQuery, results, isSearching, isApiAvailable }; +} diff --git a/src/hooks/useSourceHealth.ts b/src/hooks/useSourceHealth.ts new file mode 100644 index 0000000..fecd6fe --- /dev/null +++ b/src/hooks/useSourceHealth.ts @@ -0,0 +1,49 @@ +/* + * EUOSINT + * Portions derived from novatechflow/osint-siem and cyberdude88/osint-siem. + * See NOTICE for provenance and LICENSE for repository-local terms. + */ + +import { useEffect, useState } from "react"; +import type { SourceHealthDocument } from "@/types/source-health"; + +const SOURCE_HEALTH_URL = `${import.meta.env.BASE_URL}source-health.json`; +const POLL_MS = 30000; + +export function useSourceHealth() { + const [document, setDocument] = useState(null); + const [isLoading, setIsLoading] = useState(true); + + useEffect(() => { + let cancelled = false; + + async function load() { + try { + const response = await fetch(`${SOURCE_HEALTH_URL}?t=${Date.now()}`, { + cache: "no-store", + }); + if (!response.ok) { + throw new Error(`source health fetch failed: ${response.status}`); + } + const data = (await response.json()) as SourceHealthDocument; + if (!cancelled) { + setDocument(data); + setIsLoading(false); + } + } catch { + if (!cancelled) { + setIsLoading(false); + } + } + } + + load(); + const interval = window.setInterval(load, POLL_MS); + return () => { + cancelled = true; + window.clearInterval(interval); + }; + }, []); + + return { sourceHealth: document, isLoading }; +} diff --git a/src/index.css b/src/index.css index 6250d7e..bc78fe2 100644 --- a/src/index.css +++ b/src/index.css @@ -4,53 +4,98 @@ * See NOTICE for provenance and LICENSE for repository-local terms. */ +@import url("https://fonts.googleapis.com/css2?family=Montserrat:wght@400;500;600;700;800&family=Roboto+Mono:wght@400;500;600&display=swap"); @import "tailwindcss"; @theme { - --color-siem-bg: #0a0e17; - --color-siem-panel: #111827; + /* ── Brand ──────────────────────────────────────────────────────── */ + --color-siem-bg: #070E1A; + --color-siem-panel: #0b1120; + --color-siem-panel-strong: #131d2e; --color-siem-border: #1e293b; - --color-siem-text: #e2e8f0; - --color-siem-muted: #64748b; - --color-siem-accent: #5f8fa3; - --color-siem-neutral: #7f9ca8; - --color-siem-critical: #d96464; - --color-siem-high: #d98958; - --color-siem-medium: #d1b35a; - --color-siem-low: #4eaf82; - --color-siem-info: #4fa0b1; + --color-siem-text: #e6edf5; + --color-siem-muted: #5A7B95; + --color-siem-accent: #E8630A; + --color-siem-accent-strong: #FF8533; + --color-siem-neutral: #9ca3af; + + /* ── Severity ───────────────────────────────────────────────────── */ + --color-siem-critical: #ff5d5d; + --color-siem-high: #f29d4b; + --color-siem-medium: #e3c867; + --color-siem-low: #4ccb8d; + --color-siem-info: #60a5fa; + + /* ── Category ───────────────────────────────────────────────────── */ + --color-cat-informational: #60a5fa; + --color-cat-cyber: #3b82f6; + --color-cat-education: #4f95a4; + --color-cat-humanitarian: #2f8c8c; + --color-cat-conflict: #725f95; + --color-cat-humsec: #3a7395; + --color-cat-wanted: #a14a5b; + --color-cat-missing: #aa8b43; + --color-cat-appeal: #5577a4; + --color-cat-fraud: #338c66; + --color-cat-safety: #5b6887; + --color-cat-terrorism: #a34c4c; + --color-cat-private: #8f6a46; + --color-cat-travel: #c27a3a; + --color-cat-health: #4ca38c; + --color-cat-intel: #7a6eab; + --color-cat-emergency: #b85c4a; + --color-cat-environment: #4a8b6e; + --color-cat-disease: #c45e8a; + --color-cat-maritime: #2a7a9b; + --color-cat-legislative: #6b7f45; } * { - margin: 0; - padding: 0; box-sizing: border-box; } +html, +body, +#root { + min-height: 100dvh; +} + body { - font-family: 'Inter', system-ui, -apple-system, sans-serif; - background: var(--color-siem-bg); + margin: 0; + font-family: "Montserrat", "Segoe UI", sans-serif; + background: + radial-gradient(circle at top left, color-mix(in srgb, var(--color-siem-accent) 12%, transparent), transparent 32%), + radial-gradient(circle at top right, color-mix(in srgb, var(--color-siem-panel) 75%, transparent), transparent 26%), + var(--color-siem-bg); color: var(--color-siem-text); - overflow: hidden; - height: 100dvh; } -#root { - height: 100dvh; +button, +select, +input { + font: inherit; +} + +select option { + background: var(--color-siem-panel); + color: var(--color-siem-text); } ::-webkit-scrollbar { - width: 6px; + width: 8px; + height: 8px; } + ::-webkit-scrollbar-track { - background: var(--color-siem-bg); + background: color-mix(in srgb, var(--color-siem-panel) 75%, transparent); } + ::-webkit-scrollbar-thumb { - background: var(--color-siem-border); - border-radius: 3px; + background: color-mix(in srgb, var(--color-siem-accent) 25%, transparent); + border-radius: 999px; } -/* Slide-in animation for detail panel */ + @keyframes slide-in-right { from { transform: translateX(100%); @@ -74,94 +119,105 @@ body { } .animate-slide-in { - animation: slide-in-right 0.25s ease-out; -} - -.animate-slide-out { - animation: slide-out-right 0.25s ease-in forwards; -} - -/* Left panel slide */ -@keyframes slide-in-left { - from { transform: translateX(-100%); } - to { transform: translateX(0); } -} - -@keyframes slide-out-left { - from { transform: translateX(0); } - to { transform: translateX(-100%); } -} - -.feed-panel-open { - animation: slide-in-left 0.28s ease-out forwards; + animation: slide-in-right 0.26s ease-out; } -.feed-panel-closed { - animation: slide-out-left 0.28s ease-in forwards; -} - -@keyframes alert-list-refresh { - from { - transform: translateY(8px); - opacity: 0.78; - } - to { - transform: translateY(0); - opacity: 1; - } +.animate-alert-new-glow { + animation: alert-new-glow 1.8s ease-out forwards; } @keyframes alert-new-glow { 0% { - box-shadow: inset 0 0 0 1px rgba(147, 197, 253, 0.5), 0 0 0 rgba(59, 130, 246, 0); - background-color: rgba(59, 130, 246, 0.14); + box-shadow: inset 0 0 0 1px color-mix(in srgb, var(--color-siem-accent) 55%, transparent), 0 0 24px color-mix(in srgb, var(--color-siem-accent) 16%, transparent); } 100% { - box-shadow: inset 0 0 0 1px rgba(147, 197, 253, 0), 0 0 0 rgba(59, 130, 246, 0); - background-color: transparent; + box-shadow: inset 0 0 0 1px transparent, 0 0 0 transparent; } } @keyframes critical-badge-pulse { 0% { transform: scale(1); - filter: brightness(1); } 50% { - transform: scale(1.035); - filter: brightness(1.15); + transform: scale(1.04); } 100% { transform: scale(1); - filter: brightness(1); } } -.animate-alert-list-refresh { - animation: alert-list-refresh 160ms ease-out; -} - -.animate-alert-new-glow { - animation: alert-new-glow 1.8s ease-out forwards; -} - .animate-critical-badge { - animation: critical-badge-pulse 2.2s ease-in-out infinite; - transform-origin: center; + animation: critical-badge-pulse 2s ease-in-out infinite; } @media (prefers-reduced-motion: reduce) { - .animate-alert-list-refresh, + .animate-slide-in, .animate-alert-new-glow, - .animate-critical-badge, - .feed-panel-open, - .feed-panel-closed { + .animate-critical-badge { animation: none !important; } } -/* Style the region dropdown options */ -select option { - background: #111827; - color: #e2e8f0; +/* ── Leaflet cluster overrides ──────────────────────────────────── */ + +.siem-cluster { + display: flex; + align-items: center; + justify-content: center; + border-radius: 50%; + font-family: "Roboto Mono", monospace; + font-weight: 600; + color: var(--color-siem-text); + border: 2px solid color-mix(in srgb, var(--color-siem-accent) 45%, transparent); +} + +.siem-cluster span { + line-height: 1; +} + +.siem-cluster-small { + font-size: 11px; + background: color-mix(in srgb, var(--color-siem-panel) 88%, transparent); +} + +.siem-cluster-medium { + font-size: 12px; + background: color-mix(in srgb, var(--color-siem-accent) 22%, transparent); +} + +.siem-cluster-large { + font-size: 13px; + background: color-mix(in srgb, var(--color-siem-high) 28%, transparent); + border-color: color-mix(in srgb, var(--color-siem-high) 55%, transparent); +} + +/* ── Leaflet tooltip ────────────────────────────────────────────── */ + +.siem-tooltip { + background: var(--color-siem-panel) !important; + border: 1px solid var(--color-siem-border) !important; + border-radius: 0.75rem !important; + color: var(--color-siem-text) !important; + font-family: "Montserrat", sans-serif !important; + font-size: 12px !important; + padding: 6px 10px !important; + box-shadow: 0 8px 32px rgba(0, 0, 0, 0.35) !important; +} + +.siem-tooltip::before { + border-top-color: var(--color-siem-border) !important; +} + +/* Hide default leaflet cluster styles that leak through */ +.marker-cluster-small, +.marker-cluster-medium, +.marker-cluster-large { + background: transparent !important; +} + +.marker-cluster-small div, +.marker-cluster-medium div, +.marker-cluster-large div { + background: transparent !important; } diff --git a/src/lib/regions.ts b/src/lib/regions.ts index ba45733..48db0af 100644 --- a/src/lib/regions.ts +++ b/src/lib/regions.ts @@ -50,13 +50,27 @@ export function latLngToRegion(lat: number, lng: number): string | null { return null; } +export const MIDDLE_EAST_CODES = new Set([ + "AE", "BH", "CY", "EG", "IL", "IQ", "IR", "JO", "KW", "LB", + "OM", "PS", "QA", "SA", "SY", "TR", "YE", +]); + export function alertMatchesRegionFilter(alert: Alert, regionFilter: string): boolean { if (regionFilter === "all") return true; + if (regionFilter.startsWith("country:")) { + return alert.source.country_code === regionFilter.slice(8); + } if (regionFilter === "Caribbean") { return ( alert.source.region === "Caribbean" || inBounds(alert.lat, alert.lng, CARIBBEAN_INTERACTION_BOUNDS) ); } + if (regionFilter === "Middle East") { + return ( + alert.source.region === "Middle East" || + MIDDLE_EAST_CODES.has(alert.source.country_code) + ); + } return alert.source.region === regionFilter; } diff --git a/src/lib/severity.ts b/src/lib/severity.ts index 0452418..3586add 100644 --- a/src/lib/severity.ts +++ b/src/lib/severity.ts @@ -5,21 +5,26 @@ */ import type { Severity, AlertCategory } from "@/types/alert"; +import { severityHex } from "@/lib/theme"; -export const severityColors: Record = { - critical: "#d96464", - high: "#d98958", - medium: "#d1b35a", - low: "#4eaf82", - info: "#4fa0b1", -}; +/** + * Get the resolved hex colour for a severity level. + * Reads from CSS custom properties at runtime — never hardcoded. + */ +export function severityColor(s: Severity): string { + return severityHex(s); +} +/** + * Tailwind class set for severity badge backgrounds. + * References @theme tokens so colours update from a single source. + */ export const severityBg: Record = { - critical: "bg-[#d96464]/15 text-[#d96464] border-[#d96464]/35", - high: "bg-[#d98958]/15 text-[#d98958] border-[#d98958]/35", - medium: "bg-[#d1b35a]/15 text-[#d1b35a] border-[#d1b35a]/35", - low: "bg-[#4eaf82]/15 text-[#4eaf82] border-[#4eaf82]/35", - info: "bg-[#4fa0b1]/15 text-[#4fa0b1] border-[#4fa0b1]/35", + critical: "bg-siem-critical/15 text-siem-critical border-siem-critical/35", + high: "bg-siem-high/15 text-siem-high border-siem-high/35", + medium: "bg-siem-medium/15 text-siem-medium border-siem-medium/35", + low: "bg-siem-low/15 text-siem-low border-siem-low/35", + info: "bg-siem-info/15 text-siem-info border-siem-info/35", }; export const severityLabel: Record = { @@ -44,6 +49,14 @@ export const categoryLabels: Record = { fraud_alert: "Fraud Alert", public_safety: "Public Safety", private_sector: "Private Sector", + travel_warning: "Travel Warning", + health_emergency: "Health Emergency", + intelligence_report: "Intelligence Report", + emergency_management: "Emergency Management", + environmental_disaster: "Environmental Disaster", + disease_outbreak: "Disease Outbreak", + maritime_security: "Maritime Security", + legislative: "Legislative", }; export const categoryOrder: AlertCategory[] = [ @@ -60,22 +73,42 @@ export const categoryOrder: AlertCategory[] = [ "private_sector", "public_safety", "terrorism_tip", + "travel_warning", + "health_emergency", + "intelligence_report", + "emergency_management", + "environmental_disaster", + "disease_outbreak", + "maritime_security", + "legislative", ]; +/** + * Tailwind class set for category badge backgrounds. + * References --color-cat-* tokens defined in @theme. + */ export const categoryBadge: Record = { - informational: "bg-cyan-500/15 text-cyan-300 border-cyan-500/30", - cyber_advisory: "bg-sky-500/15 text-sky-300 border-sky-500/30", - education_digital_capacity: "bg-cyan-500/15 text-cyan-300 border-cyan-500/30", - humanitarian_tasking: "bg-teal-500/15 text-teal-300 border-teal-500/30", - conflict_monitoring: "bg-fuchsia-500/15 text-fuchsia-300 border-fuchsia-500/30", - humanitarian_security: "bg-blue-500/15 text-blue-300 border-blue-500/30", - wanted_suspect: "bg-rose-500/15 text-rose-300 border-rose-500/30", - missing_person: "bg-amber-500/15 text-amber-300 border-amber-500/30", - public_appeal: "bg-indigo-500/15 text-indigo-300 border-indigo-500/30", - fraud_alert: "bg-emerald-500/15 text-emerald-300 border-emerald-500/30", - public_safety: "bg-violet-500/15 text-violet-300 border-violet-500/30", - terrorism_tip: "bg-red-500/15 text-red-300 border-red-500/30", - private_sector: "bg-orange-500/15 text-orange-300 border-orange-500/30", + informational: "bg-cat-informational/15 text-cat-informational border-cat-informational/30", + cyber_advisory: "bg-cat-cyber/15 text-cat-cyber border-cat-cyber/30", + education_digital_capacity: "bg-cat-education/15 text-cat-education border-cat-education/30", + humanitarian_tasking: "bg-cat-humanitarian/15 text-cat-humanitarian border-cat-humanitarian/30", + conflict_monitoring: "bg-cat-conflict/15 text-cat-conflict border-cat-conflict/30", + humanitarian_security: "bg-cat-humsec/15 text-cat-humsec border-cat-humsec/30", + wanted_suspect: "bg-cat-wanted/15 text-cat-wanted border-cat-wanted/30", + missing_person: "bg-cat-missing/15 text-cat-missing border-cat-missing/30", + public_appeal: "bg-cat-appeal/15 text-cat-appeal border-cat-appeal/30", + fraud_alert: "bg-cat-fraud/15 text-cat-fraud border-cat-fraud/30", + public_safety: "bg-cat-safety/15 text-cat-safety border-cat-safety/30", + terrorism_tip: "bg-cat-terrorism/15 text-cat-terrorism border-cat-terrorism/30", + private_sector: "bg-cat-private/15 text-cat-private border-cat-private/30", + travel_warning: "bg-cat-travel/15 text-cat-travel border-cat-travel/30", + health_emergency: "bg-cat-health/15 text-cat-health border-cat-health/30", + intelligence_report: "bg-cat-intel/15 text-cat-intel border-cat-intel/30", + emergency_management: "bg-cat-emergency/15 text-cat-emergency border-cat-emergency/30", + environmental_disaster: "bg-cat-environment/15 text-cat-environment border-cat-environment/30", + disease_outbreak: "bg-cat-disease/15 text-cat-disease border-cat-disease/30", + maritime_security: "bg-cat-maritime/15 text-cat-maritime border-cat-maritime/30", + legislative: "bg-cat-legislative/15 text-cat-legislative border-cat-legislative/30", }; export const categoryIcons: Record = { @@ -92,6 +125,14 @@ export const categoryIcons: Record = { fraud_alert: "BadgeDollarSign", public_safety: "Siren", private_sector: "Building", + travel_warning: "Plane", + health_emergency: "HeartPulse", + intelligence_report: "Eye", + emergency_management: "Siren", + environmental_disaster: "CloudRain", + disease_outbreak: "Bug", + maritime_security: "Anchor", + legislative: "Landmark", }; export function freshnessLabel(hours: number): string { diff --git a/src/lib/theme.ts b/src/lib/theme.ts new file mode 100644 index 0000000..784f00f --- /dev/null +++ b/src/lib/theme.ts @@ -0,0 +1,50 @@ +/* + * EUOSINT + * Portions derived from novatechflow/osint-siem and cyberdude88/osint-siem. + * See NOTICE for provenance and LICENSE for repository-local terms. + */ + +/** + * Runtime reader for CSS custom-property colours defined in index.css @theme. + * This is the only place JS code should obtain colour hex values — everything + * else references Tailwind theme classes or CSS variables directly. + */ + +import type { Severity } from "@/types/alert"; + +const SEVERITY_VARS: Record = { + critical: "--color-siem-critical", + high: "--color-siem-high", + medium: "--color-siem-medium", + low: "--color-siem-low", + info: "--color-siem-info", +}; + +let root: CSSStyleDeclaration | null = null; + +function getRoot(): CSSStyleDeclaration { + if (!root) { + root = getComputedStyle(document.documentElement); + } + return root; +} + +/** Read any `--color-*` CSS variable as a trimmed hex string. */ +export function cssColor(varName: string): string { + return getRoot().getPropertyValue(varName).trim(); +} + +/** Get the hex colour for a severity level, read from the CSS theme. */ +export function severityHex(severity: Severity): string { + return cssColor(SEVERITY_VARS[severity]); +} + +/** Get the hex colour for the brand accent, read from the CSS theme. */ +export function accentHex(): string { + return cssColor("--color-siem-accent"); +} + +/** Get the hex colour for the primary text, read from the CSS theme. */ +export function textHex(): string { + return cssColor("--color-siem-text"); +} diff --git a/src/main.tsx b/src/main.tsx index 9131167..bd543cd 100644 --- a/src/main.tsx +++ b/src/main.tsx @@ -4,13 +4,13 @@ * See NOTICE for provenance and LICENSE for repository-local terms. */ -import { StrictMode } from 'react' -import { createRoot } from 'react-dom/client' -import './index.css' -import App from './App.tsx' +import { StrictMode } from "react"; +import { createRoot } from "react-dom/client"; +import "./index.css"; +import App from "./App.tsx"; -createRoot(document.getElementById('root')!).render( +createRoot(document.getElementById("root")!).render( - , -) + +); diff --git a/src/types/alert.ts b/src/types/alert.ts index 7c7c4b9..ac1c3b5 100644 --- a/src/types/alert.ts +++ b/src/types/alert.ts @@ -19,7 +19,15 @@ export type AlertCategory = | "terrorism_tip" | "fraud_alert" | "public_safety" - | "private_sector"; + | "private_sector" + | "travel_warning" + | "health_emergency" + | "intelligence_report" + | "emergency_management" + | "environmental_disaster" + | "disease_outbreak" + | "maritime_security" + | "legislative"; export type AuthorityType = | "police" | "national_security" diff --git a/src/types/source-health.ts b/src/types/source-health.ts new file mode 100644 index 0000000..67ca22a --- /dev/null +++ b/src/types/source-health.ts @@ -0,0 +1,41 @@ +/* + * EUOSINT + * Portions derived from novatechflow/osint-siem and cyberdude88/osint-siem. + * See NOTICE for provenance and LICENSE for repository-local terms. + */ + +export interface DuplicateSample { + title: string; + count: number; +} + +export interface DuplicateAudit { + suppressed_variant_duplicates: number; + repeated_title_groups_in_active: number; + repeated_title_samples: DuplicateSample[]; +} + +export interface SourceHealthEntry { + source_id: string; + authority_name: string; + type: string; + status: "ok" | "error"; + fetched_count: number; + feed_url: string; + error?: string; + started_at: string; + finished_at: string; + active_count?: number; + filtered_count?: number; +} + +export interface SourceHealthDocument { + generated_at: string; + critical_source_prefixes: string[]; + fail_on_critical_source_gap: boolean; + total_sources: number; + sources_ok: number; + sources_error: number; + duplicate_audit: DuplicateAudit; + sources: SourceHealthEntry[]; +}