From 0eb5c1e044b881d430a1a1ef7b764d263cdd783d Mon Sep 17 00:00:00 2001 From: 2pk03 Date: Mon, 16 Mar 2026 09:22:22 +0100 Subject: [PATCH 1/7] feat: add go collector backend and quality gates --- .github/workflows/alerts-feed.yml | 48 +- .github/workflows/ci.yml | 40 + .github/workflows/codeql.yml | 13 +- .github/workflows/docker.yml | 22 +- .github/workflows/release.yml | 18 +- Dockerfile.collector | 21 + Makefile | 110 ++- README.md | 13 + cmd/euosint-collector/main.go | 20 + docs/collector-migration.md | 31 + go.mod | 3 + internal/collector/app/app.go | 42 + internal/collector/app/app_test.go | 35 + internal/collector/config/config.go | 143 ++++ internal/collector/config/config_test.go | 13 + internal/collector/fetch/client.go | 80 ++ internal/collector/fetch/client_test.go | 37 + internal/collector/model/alert.go | 72 ++ internal/collector/model/source.go | 34 + internal/collector/normalize/normalize.go | 760 ++++++++++++++++++ .../collector/normalize/normalize_test.go | 39 + internal/collector/output/write.go | 60 ++ internal/collector/output/write_test.go | 29 + internal/collector/parse/html.go | 53 ++ internal/collector/parse/html_test.go | 14 + internal/collector/parse/rss.go | 148 ++++ internal/collector/parse/rss_test.go | 37 + internal/collector/registry/registry.go | 73 ++ internal/collector/registry/registry_test.go | 33 + internal/collector/run/run.go | 383 +++++++++ internal/collector/run/run_test.go | 93 +++ internal/collector/state/state.go | 80 ++ internal/collector/state/state_test.go | 37 + package.json | 9 +- 34 files changed, 2577 insertions(+), 66 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 Dockerfile.collector create mode 100644 cmd/euosint-collector/main.go create mode 100644 docs/collector-migration.md create mode 100644 go.mod create mode 100644 internal/collector/app/app.go create mode 100644 internal/collector/app/app_test.go create mode 100644 internal/collector/config/config.go create mode 100644 internal/collector/config/config_test.go create mode 100644 internal/collector/fetch/client.go create mode 100644 internal/collector/fetch/client_test.go create mode 100644 internal/collector/model/alert.go create mode 100644 internal/collector/model/source.go create mode 100644 internal/collector/normalize/normalize.go create mode 100644 internal/collector/normalize/normalize_test.go create mode 100644 internal/collector/output/write.go create mode 100644 internal/collector/output/write_test.go create mode 100644 internal/collector/parse/html.go create mode 100644 internal/collector/parse/html_test.go create mode 100644 internal/collector/parse/rss.go create mode 100644 internal/collector/parse/rss_test.go create mode 100644 internal/collector/registry/registry.go create mode 100644 internal/collector/registry/registry_test.go create mode 100644 internal/collector/run/run.go create mode 100644 internal/collector/run/run_test.go create mode 100644 internal/collector/state/state.go create mode 100644 internal/collector/state/state_test.go diff --git a/.github/workflows/alerts-feed.yml b/.github/workflows/alerts-feed.yml index b597232..ae195f5 100644 --- a/.github/workflows/alerts-feed.yml +++ b/.github/workflows/alerts-feed.yml @@ -28,10 +28,10 @@ jobs: node-version-file: .nvmrc cache: npm - - name: Install deps + - name: Install dependencies run: npm ci - - name: Fetch alerts + - name: Fetch alerts with parity reference collector run: node scripts/fetch-alerts.mjs env: MAX_PER_SOURCE: "20" @@ -54,47 +54,3 @@ jobs: git add public/alerts.json public/alerts-filtered.json public/alerts-state.json public/source-health.json git commit -m "chore: update alerts feed" git push -*** Add File: /Users/alo/Development/scalytics/EUOSINT/.github/workflows/ci.yml -name: CI - -on: - pull_request: - push: - branches: [main] - workflow_dispatch: {} - -permissions: - contents: read - -concurrency: - group: ci-${{ github.ref }} - cancel-in-progress: true - -jobs: - quality: - name: quality - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup Node - uses: actions/setup-node@v4 - with: - node-version-file: .nvmrc - cache: npm - - - name: Install dependencies - run: npm ci - - - name: Toolchain check - run: make check - - - name: Lint - run: make lint - - - name: Typecheck - run: make test - - - name: Build - run: make build diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..3b57331 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,40 @@ +name: CI + +on: + pull_request: {} + push: + branches: [main] + workflow_dispatch: {} + +permissions: + contents: read + +concurrency: + group: ci-${{ github.ref }} + cancel-in-progress: true + +jobs: + quality: + name: quality + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version-file: .nvmrc + cache: npm + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache: true + + - name: Install dependencies + run: npm ci + + - name: Commit check + run: make commit-check diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 8475344..ea36aaa 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -25,7 +25,7 @@ jobs: - name: Initialize CodeQL uses: github/codeql-action/init@v3 with: - languages: javascript-typescript + languages: go, javascript-typescript - name: Setup Node uses: actions/setup-node@v4 @@ -33,11 +33,20 @@ jobs: node-version-file: .nvmrc cache: npm + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache: true + - name: Install dependencies run: npm ci - - name: Build + - name: Build UI run: npm run build + - name: Build collector + run: go build ./cmd/euosint-collector + - name: Analyze uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 9cc6b10..ef4e0ae 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -4,8 +4,13 @@ on: pull_request: paths: - "Dockerfile" + - "Dockerfile.collector" - "docker-compose.yml" - "docker/**" + - "cmd/**" + - "internal/**" + - "registry/**" + - "go.mod" - "package.json" - "package-lock.json" - "src/**" @@ -15,8 +20,13 @@ on: branches: [main] paths: - "Dockerfile" + - "Dockerfile.collector" - "docker-compose.yml" - "docker/**" + - "cmd/**" + - "internal/**" + - "registry/**" + - "go.mod" - "package.json" - "package-lock.json" - "src/**" @@ -29,8 +39,16 @@ permissions: jobs: build: - name: build + name: build-${{ matrix.image.name }} runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + image: + - name: web + dockerfile: Dockerfile + - name: collector + dockerfile: Dockerfile.collector steps: - name: Checkout uses: actions/checkout@v4 @@ -42,7 +60,7 @@ jobs: uses: docker/build-push-action@v6 with: context: . - file: ./Dockerfile + file: ./${{ matrix.image.dockerfile }} push: false load: false provenance: false diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4947f2b..2224105 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,8 +12,18 @@ permissions: jobs: release: - name: publish + name: publish-${{ matrix.image.name }} runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + image: + - name: web + dockerfile: Dockerfile + image_suffix: -web + - name: collector + dockerfile: Dockerfile.collector + image_suffix: -collector steps: - name: Checkout uses: actions/checkout@v4 @@ -31,13 +41,14 @@ jobs: run: npm run build - name: Archive dist + if: matrix.image.name == 'web' run: | tar -czf euosint-dist-${GITHUB_REF_NAME}.tar.gz dist shasum -a 256 euosint-dist-${GITHUB_REF_NAME}.tar.gz > euosint-dist-${GITHUB_REF_NAME}.tar.gz.sha256 - name: Prepare image name id: image - run: echo "name=ghcr.io/${GITHUB_REPOSITORY@L}" >> "$GITHUB_OUTPUT" + run: echo "name=ghcr.io/${GITHUB_REPOSITORY@L}${{ matrix.image.image_suffix }}" >> "$GITHUB_OUTPUT" - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -64,7 +75,7 @@ jobs: uses: docker/build-push-action@v6 with: context: . - file: ./Dockerfile + file: ./${{ matrix.image.dockerfile }} push: true provenance: false tags: ${{ steps.meta.outputs.tags }} @@ -73,6 +84,7 @@ jobs: cache-to: type=gha,mode=max - name: Publish GitHub release + if: matrix.image.name == 'web' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | diff --git a/Dockerfile.collector b/Dockerfile.collector new file mode 100644 index 0000000..40c62b0 --- /dev/null +++ b/Dockerfile.collector @@ -0,0 +1,21 @@ +FROM golang:1.25-alpine AS build + +WORKDIR /app + +COPY go.mod ./ +COPY cmd ./cmd +COPY internal ./internal +COPY registry ./registry + +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /out/euosint-collector ./cmd/euosint-collector + +FROM alpine:3.20 + +RUN apk add --no-cache ca-certificates + +WORKDIR /app + +COPY --from=build /out/euosint-collector /usr/local/bin/euosint-collector +COPY registry ./registry + +ENTRYPOINT ["euosint-collector"] diff --git a/Makefile b/Makefile index a060aba..7d0f9e4 100644 --- a/Makefile +++ b/Makefile @@ -2,17 +2,27 @@ SHELL := /bin/bash NODE_VERSION := 25.8.1 NPM_VERSION := 11.11.0 +GO_VERSION := 1.25.2 +GO_COVERAGE_THRESHOLD ?= 45 +GO_COVER_PROFILE ?= .tmp/go-coverage.out +GO_COVER_REPORT ?= .tmp/go-coverage.txt +GOCACHE_DIR ?= $(CURDIR)/.tmp/go-build-cache +GOMODCACHE_DIR ?= $(CURDIR)/.tmp/go-mod-cache +CODEQL_RAM_MB ?= 4096 DOCKER_IMAGE ?= euosint IMAGE_TAG ?= local BUILDER ?= colima DOCKER_COMPOSE ?= $(shell if command -v docker-compose >/dev/null 2>&1; then echo docker-compose; else echo "docker compose"; fi) CODEQL_DIR ?= .tmp/codeql -CODEQL_DB ?= $(CODEQL_DIR)/db -CODEQL_OUT ?= $(CODEQL_DIR)/javascript.sarif +CODEQL_JS_DB ?= $(CODEQL_DIR)/js-db +CODEQL_GO_DB ?= $(CODEQL_DIR)/go-db +CODEQL_JS_OUT ?= $(CODEQL_DIR)/javascript.sarif +CODEQL_GO_OUT ?= $(CODEQL_DIR)/go.sarif BRANCH ?= main RELEASE_LEVEL ?= patch -.PHONY: help check install clean lint typecheck test build ci \ +.PHONY: help check check-commit install clean lint typecheck test build ci \ + go-fmt go-test go-race go-cover go-vet go-codeql commit-check \ docker-build docker-up docker-down docker-logs docker-shell \ code-ql code-ql-summary \ release-patch release-minor release-major \ @@ -34,6 +44,17 @@ check: ## Validate local toolchain @echo " compose $$($(DOCKER_COMPOSE) version 2>/dev/null | head -n 1)" @echo " gh $$(gh --version | head -n 1 | sed 's/gh version //')" +check-commit: ## Validate local toolchain for commit checks + @echo "Checking commit prerequisites..." + @command -v node >/dev/null 2>&1 || { echo "Node.js is required"; exit 1; } + @command -v npm >/dev/null 2>&1 || { echo "npm is required"; exit 1; } + @command -v go >/dev/null 2>&1 || { echo "Go is required"; exit 1; } + @command -v codeql >/dev/null 2>&1 || { echo "codeql CLI is required"; exit 1; } + @echo " Node $$(node -v) — expected $(NODE_VERSION)" + @echo " npm $$(npm -v) — expected $(NPM_VERSION)" + @echo " go $$(go version | awk '{print $$3}') — expected go$(GO_VERSION)" + @echo " codeql $$(codeql version | head -n 1 | awk '{print $$5}')" + install: ## Install project dependencies npm install @@ -54,6 +75,29 @@ build: ## Build the production bundle ci: check lint test build ## Run the full local CI suite +go-fmt: ## Auto-format Go code + @mkdir -p $(GOCACHE_DIR) $(GOMODCACHE_DIR) + GOCACHE=$(GOCACHE_DIR) GOMODCACHE=$(GOMODCACHE_DIR) gofmt -w $$(find cmd internal -name '*.go' -type f | sort) + +go-test: ## Run Go tests + @mkdir -p $(GOCACHE_DIR) $(GOMODCACHE_DIR) + GOCACHE=$(GOCACHE_DIR) GOMODCACHE=$(GOMODCACHE_DIR) go test ./... + +go-race: ## Run Go race detector + @mkdir -p $(GOCACHE_DIR) $(GOMODCACHE_DIR) + GOCACHE=$(GOCACHE_DIR) GOMODCACHE=$(GOMODCACHE_DIR) go test -race ./... + +go-cover: ## Enforce Go coverage threshold + @mkdir -p .tmp $(GOCACHE_DIR) $(GOMODCACHE_DIR) + GOCACHE=$(GOCACHE_DIR) GOMODCACHE=$(GOMODCACHE_DIR) go test -covermode=atomic -coverprofile=$(GO_COVER_PROFILE) ./... + GOCACHE=$(GOCACHE_DIR) go tool cover -func=$(GO_COVER_PROFILE) | tee $(GO_COVER_REPORT) + @coverage=$$(GOCACHE=$(GOCACHE_DIR) go tool cover -func=$(GO_COVER_PROFILE) | awk '/^total:/ {gsub("%","",$$3); print $$3}'); \ + awk -v coverage="$$coverage" -v threshold="$(GO_COVERAGE_THRESHOLD)" 'BEGIN { if (coverage + 0 < threshold + 0) { printf("coverage %.1f%% is below threshold %.1f%%\n", coverage, threshold); exit 1 } }' + +go-vet: ## Run go vet + @mkdir -p $(GOCACHE_DIR) $(GOMODCACHE_DIR) + GOCACHE=$(GOCACHE_DIR) GOMODCACHE=$(GOMODCACHE_DIR) go vet ./... + docker-build: ## Build the Docker image with buildx docker buildx build --builder $(BUILDER) --load -t $(DOCKER_IMAGE):$(IMAGE_TAG) . @@ -71,20 +115,66 @@ docker-shell: ## Open a shell in the running container code-ql: ## Run CodeQL CLI locally for JavaScript/TypeScript @command -v codeql >/dev/null 2>&1 || { echo "codeql CLI is required"; exit 1; } - rm -rf $(CODEQL_DIR) + rm -rf $(CODEQL_JS_DB) mkdir -p $(CODEQL_DIR) - codeql database create $(CODEQL_DB) \ + codeql database create $(CODEQL_JS_DB) \ --language=javascript-typescript \ --source-root=. \ --command="npm ci && npm run build" - codeql database analyze $(CODEQL_DB) \ - javascript-security-and-quality.qls \ + codeql database analyze $(CODEQL_JS_DB) \ + codeql/javascript-queries:codeql-suites/javascript-security-and-quality.qls \ + --ram=$(CODEQL_RAM_MB) \ + --format=sarif-latest \ + --output=$(CODEQL_JS_OUT) + @echo "Wrote $(CODEQL_JS_OUT)" + +go-codeql: ## Run CodeQL CLI locally for Go + @command -v codeql >/dev/null 2>&1 || { echo "codeql CLI is required"; exit 1; } + rm -rf $(CODEQL_GO_DB) + mkdir -p $(CODEQL_DIR) + codeql database create $(CODEQL_GO_DB) \ + --language=go \ + --source-root=. \ + --command="env GOCACHE=$(GOCACHE_DIR) GOMODCACHE=$(GOMODCACHE_DIR) go build ./cmd/euosint-collector" + codeql database analyze $(CODEQL_GO_DB) \ + codeql/go-queries:codeql-suites/go-security-and-quality.qls \ + --ram=$(CODEQL_RAM_MB) \ --format=sarif-latest \ - --output=$(CODEQL_OUT) - @echo "Wrote $(CODEQL_OUT)" + --output=$(CODEQL_GO_OUT) + @echo "Wrote $(CODEQL_GO_OUT)" code-ql-summary: ## Summarize the local CodeQL SARIF output - python3 scripts/codeql_summary.py $(CODEQL_OUT) + python3 scripts/codeql_summary.py $(CODEQL_JS_OUT) + +commit-check: ## Run the full local quality gate with auto-formatting + @set -euo pipefail; \ + steps=( \ + "check-commit:toolchain" \ + "go-fmt:go format" \ + "lint:ui lint" \ + "typecheck:ui typecheck" \ + "build:ui build" \ + "go-test:go test" \ + "go-race:go race" \ + "go-cover:go coverage" \ + "go-vet:go vet" \ + "code-ql:js codeql" \ + "go-codeql:go codeql" \ + ); \ + total=$${#steps[@]}; \ + index=0; \ + for entry in "$${steps[@]}"; do \ + index=$$((index + 1)); \ + target=$${entry%%:*}; \ + label=$${entry#*:}; \ + printf '\n[%d/%d] %s\n' "$$index" "$$total" "$$label"; \ + $(MAKE) --no-print-directory "$$target"; \ + printf '[ok] %s\n' "$$label"; \ + done; \ + if ! git diff --quiet -- cmd internal; then \ + printf '\n[info] gofmt rewrote Go files under cmd/ or internal/\n'; \ + fi; \ + printf '\n[done] commit-check passed\n' release-patch: ## Create and push the next patch release tag bash scripts/release-tag.sh patch diff --git a/README.md b/README.md index 31be40e..fff4868 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,9 @@ docker-compose up --build The application will be available at `http://localhost:8080`. +- The release pipeline now builds two images: a web image and a Go collector image. +- The scheduled feed refresh workflow still uses the legacy Node collector until the Go collector reaches parity with the reference implementation. + ## Run Locally Without Docker ```bash @@ -38,6 +41,14 @@ Tuning examples: INTERVAL_MS=120000 MAX_PER_SOURCE=80 npm run collector:run ``` +Legacy reference collector commands remain available during parity work: + +```bash +npm run fetch:alerts:legacy +npm run fetch:alerts:watch:legacy +npm run collector:run:legacy +``` + ## Operations ```bash @@ -49,9 +60,11 @@ make docker-build - `make release-patch`, `make release-minor`, and `make release-major` create and push semver tags that trigger the release workflow. - `.github/workflows/branch-protection.yml` applies protection to `main` using the `ADMIN_GITHUB_TOKEN` repository secret. - Docker validation runs through `buildx`, and release images publish to GHCR on semver tags. +- Release images are published as `ghcr.io//-web` and `ghcr.io//-collector`. ## Notes - Local toolchain is pinned to Node `25.8.1` and npm `11.11.0` via `package.json`, `.nvmrc`, and `.node-version`. +- The Go collector is the target backend, but `scripts/fetch-alerts.mjs` remains the operational parity reference until the Go output is validated against it. - The imported application still reflects upstream geographic coverage and source selection; EU-specific source tuning is a follow-up change. - The root `LICENSE` applies to repository-local materials and modifications added here. Upstream repository metadata should be reviewed separately for inherited code provenance. diff --git a/cmd/euosint-collector/main.go b/cmd/euosint-collector/main.go new file mode 100644 index 0000000..3155a0c --- /dev/null +++ b/cmd/euosint-collector/main.go @@ -0,0 +1,20 @@ +package main + +import ( + "context" + "log" + "os" + "os/signal" + "syscall" + + "github.com/scalytics/euosint/internal/collector/app" +) + +func main() { + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + + if err := app.Run(ctx, os.Args[1:], os.Stdout, os.Stderr); err != nil { + log.Fatal(err) + } +} diff --git a/docs/collector-migration.md b/docs/collector-migration.md new file mode 100644 index 0000000..0bdda37 --- /dev/null +++ b/docs/collector-migration.md @@ -0,0 +1,31 @@ +# Collector Migration + +The existing Node collector in `scripts/fetch-alerts.mjs` remains the reference implementation until the Go collector reaches output parity. + +Operational rule: scheduled feed generation must stay on the legacy Node collector until parity is explicitly verified. The Go collector can ship in images and local tooling before it becomes the production feed generator. + +## Goals + +- Isolate the operational collector from the npm dependency tree. +- Keep the React dashboard unchanged while the ingestion engine is migrated. +- Port behavior in small slices with parity checks against the current JSON outputs. + +## Initial Go Boundary + +- CLI entrypoint: `cmd/euosint-collector` +- Config and runtime wiring: `internal/collector/app`, `internal/collector/config` +- Domain models: `internal/collector/model` +- Registry loading and validation: `internal/collector/registry` + +## Migration Order + +1. Registry loading and source validation +2. Source fetchers by transport type +3. Parser and normalization pipeline +4. Deduplication and scoring parity +5. Output writers for alerts, state, filtered alerts, and source health +6. Watch mode and retry orchestration + +## Coexistence Rule + +Until the Go collector can reproduce the Node collector outputs for a representative fixture set, production collection stays on Node. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..c8aaa81 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/scalytics/euosint + +go 1.25 diff --git a/internal/collector/app/app.go b/internal/collector/app/app.go new file mode 100644 index 0000000..1c3f1a8 --- /dev/null +++ b/internal/collector/app/app.go @@ -0,0 +1,42 @@ +package app + +import ( + "context" + "flag" + "io" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/run" +) + +func Run(ctx context.Context, args []string, stdout io.Writer, stderr io.Writer) error { + fs := flag.NewFlagSet("euosint-collector", flag.ContinueOnError) + fs.SetOutput(stderr) + + cfg := config.FromEnv() + fs.StringVar(&cfg.RegistryPath, "registry", cfg.RegistryPath, "Path to the collector source registry JSON file") + fs.StringVar(&cfg.OutputPath, "output", cfg.OutputPath, "Path for active alerts output JSON file") + fs.StringVar(&cfg.FilteredOutputPath, "filtered-output", cfg.FilteredOutputPath, "Path for filtered alerts output JSON file") + fs.StringVar(&cfg.StateOutputPath, "state-output", cfg.StateOutputPath, "Path for collector state JSON file") + fs.StringVar(&cfg.SourceHealthOutputPath, "source-health-output", cfg.SourceHealthOutputPath, "Path for source health JSON file") + fs.BoolVar(&cfg.Watch, "watch", cfg.Watch, "Run continuously with the configured interval") + fs.IntVar(&cfg.IntervalMS, "interval-ms", cfg.IntervalMS, "Polling interval in milliseconds when watch mode is enabled") + fs.IntVar(&cfg.MaxPerSource, "max-per-source", cfg.MaxPerSource, "Maximum items retained per source fetch") + fs.IntVar(&cfg.MaxAgeDays, "max-age-days", cfg.MaxAgeDays, "Maximum item age in days") + fs.IntVar(&cfg.RemovedRetentionDays, "removed-retention-days", cfg.RemovedRetentionDays, "Retention in days for removed alerts") + fs.Float64Var(&cfg.IncidentRelevanceThreshold, "incident-threshold", cfg.IncidentRelevanceThreshold, "Default relevance threshold for active alerts") + fs.Float64Var(&cfg.MissingPersonRelevanceThreshold, "missing-person-threshold", cfg.MissingPersonRelevanceThreshold, "Relevance threshold for missing person alerts") + fs.BoolVar(&cfg.FailOnCriticalSourceGap, "fail-on-critical-source-gap", cfg.FailOnCriticalSourceGap, "Fail the run when critical sources fetch zero records") + + if err := fs.Parse(args); err != nil { + return err + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + return run.New(stdout, stderr).Run(ctx, cfg) +} diff --git a/internal/collector/app/app_test.go b/internal/collector/app/app_test.go new file mode 100644 index 0000000..9e540a9 --- /dev/null +++ b/internal/collector/app/app_test.go @@ -0,0 +1,35 @@ +package app + +import ( + "context" + "io" + "os" + "path/filepath" + "testing" +) + +func TestRunWritesOutputs(t *testing.T) { + dir := t.TempDir() + registryPath := filepath.Join(dir, "registry.json") + content := `[ + {"type":"rss","feed_url":"https://invalid.example/feed","category":"cyber_advisory","source":{"source_id":"test","authority_name":"Test","country":"France","country_code":"FR","region":"Europe","authority_type":"cert","base_url":"https://invalid.example"}} + ]` + if err := os.WriteFile(registryPath, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + err := Run(context.Background(), []string{ + "--registry", registryPath, + "--output", filepath.Join(dir, "alerts.json"), + "--filtered-output", filepath.Join(dir, "filtered.json"), + "--state-output", filepath.Join(dir, "state.json"), + "--source-health-output", filepath.Join(dir, "health.json"), + }, io.Discard, io.Discard) + if err != nil { + t.Fatal(err) + } + for _, path := range []string{"alerts.json", "filtered.json", "state.json", "health.json"} { + if _, err := os.Stat(filepath.Join(dir, path)); err != nil { + t.Fatalf("expected %s to be written: %v", path, err) + } + } +} diff --git a/internal/collector/config/config.go b/internal/collector/config/config.go new file mode 100644 index 0000000..73756d5 --- /dev/null +++ b/internal/collector/config/config.go @@ -0,0 +1,143 @@ +package config + +import ( + "os" + "strconv" + "strings" +) + +const ( + defaultOutputPath = "public/alerts.json" + defaultFilteredPath = "public/alerts-filtered.json" + defaultStatePath = "public/alerts-state.json" + defaultSourceHealthPath = "public/source-health.json" + defaultRegistryPath = "registry/source_registry.json" + defaultTimeoutMS = 15000 + defaultIntervalMS = 900000 + defaultMaxPerSource = 20 + defaultMaxAgeDays = 180 + defaultRemovedDays = 14 + defaultMaxBodyBytes = 2 * 1024 * 1024 +) + +type Config struct { + RegistryPath string + OutputPath string + FilteredOutputPath string + StateOutputPath string + SourceHealthOutputPath string + MaxPerSource int + MaxAgeDays int + RemovedRetentionDays int + IncidentRelevanceThreshold float64 + MissingPersonRelevanceThreshold float64 + FailOnCriticalSourceGap bool + CriticalSourcePrefixes []string + Watch bool + IntervalMS int + HTTPTimeoutMS int + MaxResponseBodyBytes int64 + UserAgent string +} + +func Default() Config { + return Config{ + RegistryPath: defaultRegistryPath, + OutputPath: defaultOutputPath, + FilteredOutputPath: defaultFilteredPath, + StateOutputPath: defaultStatePath, + SourceHealthOutputPath: defaultSourceHealthPath, + MaxPerSource: defaultMaxPerSource, + MaxAgeDays: defaultMaxAgeDays, + RemovedRetentionDays: defaultRemovedDays, + IncidentRelevanceThreshold: 0.42, + MissingPersonRelevanceThreshold: 0, + FailOnCriticalSourceGap: false, + CriticalSourcePrefixes: []string{"cisa"}, + Watch: false, + IntervalMS: defaultIntervalMS, + HTTPTimeoutMS: defaultTimeoutMS, + MaxResponseBodyBytes: defaultMaxBodyBytes, + UserAgent: "euosint-bot/1.0", + } +} + +func FromEnv() Config { + cfg := Default() + cfg.OutputPath = envString("OUTPUT_PATH", cfg.OutputPath) + cfg.FilteredOutputPath = envString("FILTERED_OUTPUT_PATH", cfg.FilteredOutputPath) + cfg.StateOutputPath = envString("STATE_OUTPUT_PATH", cfg.StateOutputPath) + cfg.SourceHealthOutputPath = envString("SOURCE_HEALTH_OUTPUT_PATH", cfg.SourceHealthOutputPath) + cfg.RegistryPath = envString("SOURCE_REGISTRY_PATH", cfg.RegistryPath) + cfg.MaxPerSource = envInt("MAX_PER_SOURCE", cfg.MaxPerSource) + cfg.MaxAgeDays = envInt("MAX_AGE_DAYS", cfg.MaxAgeDays) + cfg.RemovedRetentionDays = envInt("REMOVED_RETENTION_DAYS", cfg.RemovedRetentionDays) + cfg.IncidentRelevanceThreshold = envFloat("INCIDENT_RELEVANCE_THRESHOLD", cfg.IncidentRelevanceThreshold) + cfg.MissingPersonRelevanceThreshold = envFloat("MISSING_PERSON_RELEVANCE_THRESHOLD", cfg.MissingPersonRelevanceThreshold) + cfg.FailOnCriticalSourceGap = envBool("FAIL_ON_CRITICAL_SOURCE_GAP", cfg.FailOnCriticalSourceGap) + cfg.CriticalSourcePrefixes = envCSV("CRITICAL_SOURCE_PREFIXES", cfg.CriticalSourcePrefixes) + cfg.Watch = envBool("WATCH", cfg.Watch) + cfg.IntervalMS = envInt("INTERVAL_MS", cfg.IntervalMS) + cfg.HTTPTimeoutMS = envInt("HTTP_TIMEOUT_MS", cfg.HTTPTimeoutMS) + cfg.MaxResponseBodyBytes = int64(envInt("MAX_RESPONSE_BODY_BYTES", int(cfg.MaxResponseBodyBytes))) + cfg.UserAgent = envString("USER_AGENT", cfg.UserAgent) + return cfg +} + +func envString(key, fallback string) string { + if value := strings.TrimSpace(os.Getenv(key)); value != "" { + return value + } + return fallback +} + +func envInt(key string, fallback int) int { + value := strings.TrimSpace(os.Getenv(key)) + if value == "" { + return fallback + } + parsed, err := strconv.Atoi(value) + if err != nil { + return fallback + } + return parsed +} + +func envFloat(key string, fallback float64) float64 { + value := strings.TrimSpace(os.Getenv(key)) + if value == "" { + return fallback + } + parsed, err := strconv.ParseFloat(value, 64) + if err != nil { + return fallback + } + return parsed +} + +func envBool(key string, fallback bool) bool { + value := strings.TrimSpace(os.Getenv(key)) + if value == "" { + return fallback + } + return value == "1" || strings.EqualFold(value, "true") +} + +func envCSV(key string, fallback []string) []string { + value := strings.TrimSpace(os.Getenv(key)) + if value == "" { + return fallback + } + parts := strings.Split(value, ",") + out := make([]string, 0, len(parts)) + for _, part := range parts { + part = strings.TrimSpace(part) + if part != "" { + out = append(out, part) + } + } + if len(out) == 0 { + return fallback + } + return out +} diff --git a/internal/collector/config/config_test.go b/internal/collector/config/config_test.go new file mode 100644 index 0000000..59f6cb0 --- /dev/null +++ b/internal/collector/config/config_test.go @@ -0,0 +1,13 @@ +package config + +import "testing" + +func TestDefaultConfig(t *testing.T) { + cfg := Default() + if cfg.OutputPath == "" || cfg.RegistryPath == "" { + t.Fatalf("default config should populate output and registry paths: %#v", cfg) + } + if cfg.MaxPerSource <= 0 { + t.Fatalf("unexpected max per source %d", cfg.MaxPerSource) + } +} diff --git a/internal/collector/fetch/client.go b/internal/collector/fetch/client.go new file mode 100644 index 0000000..e2f27d7 --- /dev/null +++ b/internal/collector/fetch/client.go @@ -0,0 +1,80 @@ +package fetch + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/scalytics/euosint/internal/collector/config" +) + +type Client struct { + httpClient *http.Client + userAgent string + maxBodyBytes int64 +} + +func New(cfg config.Config) *Client { + return NewWithHTTPClient(cfg, &http.Client{ + Timeout: time.Duration(cfg.HTTPTimeoutMS) * time.Millisecond, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= 10 { + return errors.New("stopped after 10 redirects") + } + return nil + }, + }) +} + +func NewWithHTTPClient(cfg config.Config, httpClient *http.Client) *Client { + return &Client{ + httpClient: httpClient, + userAgent: cfg.UserAgent, + maxBodyBytes: cfg.MaxResponseBodyBytes, + } +} + +func (c *Client) Text(ctx context.Context, url string, followRedirects bool, accept string) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("build request %s: %w", url, err) + } + req.Header.Set("User-Agent", c.userAgent) + if strings.TrimSpace(accept) != "" { + req.Header.Set("Accept", accept) + } + + client := c.httpClient + if !followRedirects { + copyClient := *c.httpClient + copyClient.CheckRedirect = func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + } + client = ©Client + } + + res, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("fetch %s: %w", url, err) + } + defer res.Body.Close() + + if res.StatusCode < 200 || res.StatusCode >= 300 { + return nil, fmt.Errorf("fetch %s: status %d", url, res.StatusCode) + } + + reader := io.LimitReader(res.Body, c.maxBodyBytes+1) + body, err := io.ReadAll(reader) + if err != nil { + return nil, fmt.Errorf("read %s: %w", url, err) + } + if int64(len(body)) > c.maxBodyBytes { + return nil, fmt.Errorf("response too large for %s", url) + } + + return body, nil +} diff --git a/internal/collector/fetch/client_test.go b/internal/collector/fetch/client_test.go new file mode 100644 index 0000000..b6a7670 --- /dev/null +++ b/internal/collector/fetch/client_test.go @@ -0,0 +1,37 @@ +package fetch + +import ( + "context" + "io" + "net/http" + "strings" + "testing" + + "github.com/scalytics/euosint/internal/collector/config" +) + +func TestClientText(t *testing.T) { + cfg := config.Default() + client := NewWithHTTPClient(cfg, &http.Client{ + Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: 200, + Body: io.NopCloser(strings.NewReader("ok")), + Header: make(http.Header), + }, nil + }), + }) + body, err := client.Text(context.Background(), "https://collector.test", true, "text/plain") + if err != nil { + t.Fatal(err) + } + if string(body) != "ok" { + t.Fatalf("unexpected body %q", string(body)) + } +} + +type roundTripFunc func(*http.Request) (*http.Response, error) + +func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return fn(req) +} diff --git a/internal/collector/model/alert.go b/internal/collector/model/alert.go new file mode 100644 index 0000000..10a734b --- /dev/null +++ b/internal/collector/model/alert.go @@ -0,0 +1,72 @@ +package model + +type Alert struct { + AlertID string `json:"alert_id"` + SourceID string `json:"source_id"` + Source SourceMetadata `json:"source"` + Title string `json:"title"` + CanonicalURL string `json:"canonical_url"` + FirstSeen string `json:"first_seen"` + LastSeen string `json:"last_seen"` + Status string `json:"status"` + Category string `json:"category"` + Severity string `json:"severity"` + RegionTag string `json:"region_tag"` + Lat float64 `json:"lat"` + Lng float64 `json:"lng"` + FreshnessHours int `json:"freshness_hours"` + Reporting ReportingMetadata `json:"reporting,omitempty"` + Triage *Triage `json:"triage,omitempty"` +} + +type Triage struct { + RelevanceScore float64 `json:"relevance_score"` + Threshold float64 `json:"threshold,omitempty"` + Confidence string `json:"confidence,omitempty"` + Disposition string `json:"disposition,omitempty"` + PublicationType string `json:"publication_type,omitempty"` + WeakSignals []string `json:"weak_signals,omitempty"` + Metadata *TriageMetadata `json:"metadata,omitempty"` + Reasoning string `json:"reasoning,omitempty"` +} + +type TriageMetadata struct { + Author string `json:"author,omitempty"` + Tags []string `json:"tags,omitempty"` +} + +type DuplicateSample struct { + Title string `json:"title"` + Count int `json:"count"` +} + +type DuplicateAudit struct { + SuppressedVariantDuplicates int `json:"suppressed_variant_duplicates"` + RepeatedTitleGroupsInActive int `json:"repeated_title_groups_in_active"` + RepeatedTitleSamples []DuplicateSample `json:"repeated_title_samples"` +} + +type SourceHealthEntry struct { + SourceID string `json:"source_id"` + AuthorityName string `json:"authority_name"` + Type string `json:"type"` + Status string `json:"status"` + FetchedCount int `json:"fetched_count"` + FeedURL string `json:"feed_url"` + Error string `json:"error,omitempty"` + StartedAt string `json:"started_at"` + FinishedAt string `json:"finished_at"` + ActiveCount int `json:"active_count,omitempty"` + FilteredCount int `json:"filtered_count,omitempty"` +} + +type SourceHealthDocument struct { + GeneratedAt string `json:"generated_at"` + CriticalSourcePrefixes []string `json:"critical_source_prefixes"` + FailOnCriticalSourceGap bool `json:"fail_on_critical_source_gap"` + TotalSources int `json:"total_sources"` + SourcesOK int `json:"sources_ok"` + SourcesError int `json:"sources_error"` + DuplicateAudit DuplicateAudit `json:"duplicate_audit"` + Sources []SourceHealthEntry `json:"sources"` +} diff --git a/internal/collector/model/source.go b/internal/collector/model/source.go new file mode 100644 index 0000000..a0c7107 --- /dev/null +++ b/internal/collector/model/source.go @@ -0,0 +1,34 @@ +package model + +type RegistrySource struct { + Type string `json:"type"` + FollowRedirects bool `json:"followRedirects"` + FeedURL string `json:"feed_url"` + FeedURLs []string `json:"feed_urls,omitempty"` + Category string `json:"category"` + RegionTag string `json:"region_tag"` + Lat float64 `json:"lat"` + Lng float64 `json:"lng"` + MaxItems int `json:"max_items"` + IncludeKeywords []string `json:"include_keywords,omitempty"` + ExcludeKeywords []string `json:"exclude_keywords,omitempty"` + Reporting ReportingMetadata `json:"reporting"` + Source SourceMetadata `json:"source"` +} + +type ReportingMetadata struct { + Label string `json:"label,omitempty"` + URL string `json:"url,omitempty"` + Phone string `json:"phone,omitempty"` + Notes string `json:"notes,omitempty"` +} + +type SourceMetadata struct { + SourceID string `json:"source_id"` + AuthorityName string `json:"authority_name"` + Country string `json:"country"` + CountryCode string `json:"country_code"` + Region string `json:"region"` + AuthorityType string `json:"authority_type"` + BaseURL string `json:"base_url"` +} diff --git a/internal/collector/normalize/normalize.go b/internal/collector/normalize/normalize.go new file mode 100644 index 0000000..90afd1f --- /dev/null +++ b/internal/collector/normalize/normalize.go @@ -0,0 +1,760 @@ +package normalize + +import ( + "crypto/sha1" + "encoding/hex" + "math" + "net/url" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/model" + "github.com/scalytics/euosint/internal/collector/parse" +) + +var ( + technicalSignalPatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\bcve-\d{4}-\d{4,7}\b`), + regexp.MustCompile(`(?i)\b(?:ioc|iocs|indicator(?:s)? of compromise)\b`), + regexp.MustCompile(`(?i)\b(?:tactic|technique|ttp|mitre)\b`), + regexp.MustCompile(`(?i)\b(?:hash|sha-?256|sha-?1|md5|yara|sigma)\b`), + regexp.MustCompile(`(?i)\b(?:ip(?:v4|v6)?|domain|url|hostname|command and control|c2)\b`), + regexp.MustCompile(`(?i)\b(?:vulnerability|exploit(?:ation)?|zero-?day|patch|mitigation|workaround)\b`), + } + incidentDisclosurePatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(?:breach|data leak|compromis(?:e|ed)|intrusion|unauthori[sz]ed access)\b`), + regexp.MustCompile(`(?i)\b(?:ransomware|malware|botnet|ddos|phishing|credential theft)\b`), + regexp.MustCompile(`(?i)\b(?:attack|attacked|target(?:ed|ing)|incident response|security incident)\b`), + regexp.MustCompile(`(?i)\b(?:arrest(?:ed)?|charged|indicted|wanted|fugitive|missing person|kidnapp(?:ed|ing)|homicide)\b`), + } + actionablePatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(?:report|submit (?:a )?tip|contact|hotline|phone|email)\b`), + regexp.MustCompile(`(?i)\b(?:apply update|upgrade|disable|block|monitor|detect|investigate)\b`), + regexp.MustCompile(`(?i)\b(?:advisory|alert|warning|incident notice|public appeal)\b`), + } + narrativePatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(?:opinion|editorial|commentary|analysis|explainer|podcast|interview)\b`), + regexp.MustCompile(`(?i)\b(?:what we know|live updates|behind the scenes|feature story)\b`), + regexp.MustCompile(`(?i)\b(?:market reaction|share price|investor)\b`), + } + generalNewsPatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(?:announces?|launche[sd]?|conference|summit|webinar|event|awareness month)\b`), + regexp.MustCompile(`(?i)\b(?:ceremony|speech|statement|newsletter|weekly roundup)\b`), + regexp.MustCompile(`(?i)\b(?:partnership|memorandum|mou|initiative|campaign)\b`), + } + securityContextPatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(?:cyber|cybersecurity|infosec|information security|it security)\b`), + regexp.MustCompile(`(?i)\b(?:security posture|security controls?|threat intelligence)\b`), + regexp.MustCompile(`(?i)\b(?:vulnerability|exploit|patch|advisory|defend|defensive)\b`), + regexp.MustCompile(`(?i)\b(?:soc|siem|incident response|malware analysis)\b`), + } + assistancePatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(?:report(?:\s+a)?(?:\s+crime)?|submit (?:a )?tip|tip[-\s]?off)\b`), + regexp.MustCompile(`(?i)\b(?:contact (?:police|authorities|law enforcement)|hotline|helpline)\b`), + regexp.MustCompile(`(?i)\b(?:if you have information|seeking information|appeal for help)\b`), + regexp.MustCompile(`(?i)\b(?:missing|wanted|fugitive|amber alert)\b`), + } + impactSpecificityPatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(?:affected|impact(?:ed)?|disrupt(?:ed|ion)|outage|service interruption)\b`), + regexp.MustCompile(`(?i)\b(?:records|accounts|systems|devices|endpoints|victims|organizations)\b`), + regexp.MustCompile(`(?i)\b(?:on\s+\d{1,2}\s+\w+\s+\d{4}|timeline|between\s+\d{1,2}:\d{2})\b`), + regexp.MustCompile(`(?i)\b\d{2,}\s+(?:records|users|systems|devices|victims|organizations)\b`), + } + newsMediaDomains = []string{ + "channelnewsasia.com", + "yna.co.kr", + "nhk.or.jp", + "scmp.com", + "jamaicaobserver.com", + "straitstimes.com", + } + newsMediaIDs = map[string]struct{}{ + "cna-sg-crime": {}, + "yonhap-kr": {}, + "nhk-jp": {}, + "scmp-hk": {}, + "jamaica-observer": {}, + "straitstimes-sg": {}, + } + blogFilterExempt = map[string]struct{}{ + "bleepingcomputer": {}, + "krebsonsecurity": {}, + "thehackernews": {}, + "databreaches-net": {}, + "cbc-canada": {}, + "globalnews-ca": {}, + } +) + +type Context struct { + Config config.Config + Now time.Time +} + +type FeedContext struct { + Summary string + Author string + Tags []string + FeedType string +} + +func RSSItem(ctx Context, meta model.RegistrySource, item parse.FeedItem) *model.Alert { + publishedAt := parseDate(item.Published) + if publishedAt.IsZero() { + publishedAt = ctx.Now + } + if !isFresh(ctx.Config, publishedAt, ctx.Now) { + return nil + } + alert := baseAlert(ctx, meta, item.Title, item.Link, publishedAt) + triage := score(ctx.Config, alert, FeedContext{ + Summary: item.Summary, + Author: item.Author, + Tags: item.Tags, + FeedType: meta.Type, + }) + alert.Triage = triage + alert = normalizeInformational(ctx.Config, alert, FeedContext{ + Summary: item.Summary, + Author: item.Author, + Tags: item.Tags, + FeedType: meta.Type, + }) + return &alert +} + +func HTMLItem(ctx Context, meta model.RegistrySource, item parse.FeedItem) *model.Alert { + alert := baseAlert(ctx, meta, item.Title, item.Link, ctx.Now) + triage := score(ctx.Config, alert, FeedContext{ + Summary: item.Summary, + Tags: item.Tags, + FeedType: meta.Type, + }) + alert.Triage = triage + alert = normalizeInformational(ctx.Config, alert, FeedContext{ + Summary: item.Summary, + Tags: item.Tags, + FeedType: meta.Type, + }) + return &alert +} + +func KEVAlert(ctx Context, meta model.RegistrySource, cveID string, vulnName string, description string, dateAdded string, knownRansomware bool) *model.Alert { + publishedAt := parseDate(dateAdded) + if publishedAt.IsZero() || !isFresh(ctx.Config, publishedAt, ctx.Now) { + return nil + } + title := cveID + ": " + firstNonEmpty(vulnName, "Known Exploited Vulnerability") + link := meta.Source.BaseURL + if strings.TrimSpace(cveID) != "" { + link = "https://nvd.nist.gov/vuln/detail/" + strings.TrimSpace(cveID) + } + alert := baseAlert(ctx, meta, title, link, publishedAt) + if hoursBetween(ctx.Now, publishedAt) <= 72 { + alert.Severity = "critical" + } else if hoursBetween(ctx.Now, publishedAt) <= 168 { + alert.Severity = "high" + } + tags := []string{} + if knownRansomware { + tags = append(tags, "known-ransomware-campaign") + } + alert.Triage = score(ctx.Config, alert, FeedContext{ + Summary: strings.TrimSpace(vulnName + " " + description), + Tags: tags, + FeedType: meta.Type, + }) + return &alert +} + +func InterpolAlert(ctx Context, meta model.RegistrySource, title string, link string, countryCode string, summary string, tags []string) *model.Alert { + if strings.TrimSpace(title) == "" { + return nil + } + alert := baseAlert(ctx, meta, title, firstNonEmpty(link, meta.Source.BaseURL), ctx.Now) + alert.Severity = "critical" + alert.RegionTag = firstNonEmpty(countryCode, alert.RegionTag) + if strings.TrimSpace(countryCode) != "" { + alert.Source.CountryCode = strings.ToUpper(strings.TrimSpace(countryCode)) + } + alert.Triage = score(ctx.Config, alert, FeedContext{ + Summary: summary, + Tags: tags, + FeedType: meta.Type, + }) + return &alert +} + +func StaticInterpolEntry(now time.Time) model.Alert { + return model.Alert{ + AlertID: "interpol-hub-static", + SourceID: "interpol-hub", + Source: model.SourceMetadata{SourceID: "interpol-hub", AuthorityName: "INTERPOL Notices Hub", Country: "France", CountryCode: "FR", Region: "International", AuthorityType: "police", BaseURL: "https://www.interpol.int"}, + Title: "INTERPOL Red & Yellow Notices - Browse Wanted & Missing Persons", + CanonicalURL: "https://www.interpol.int/How-we-work/Notices/View-Red-Notices", + FirstSeen: now.UTC().Format(time.RFC3339), + LastSeen: now.UTC().Format(time.RFC3339), + Status: "active", + Category: "wanted_suspect", + Severity: "critical", + RegionTag: "INT", + Lat: 45.764, + Lng: 4.8357, + FreshnessHours: 1, + Reporting: model.ReportingMetadata{ + Label: "Browse INTERPOL Notices", + URL: "https://www.interpol.int/How-we-work/Notices/View-Red-Notices", + Notes: "Red Notices: wanted persons. Yellow Notices: missing persons. Browse directly.", + }, + Triage: &model.Triage{RelevanceScore: 1, Reasoning: "Permanent INTERPOL hub link"}, + } +} + +func baseAlert(ctx Context, meta model.RegistrySource, title string, link string, publishedAt time.Time) model.Alert { + lat, lng := jitter(meta.Lat, meta.Lng, meta.Source.SourceID+":"+link) + return model.Alert{ + AlertID: meta.Source.SourceID + "-" + hashID(link), + SourceID: meta.Source.SourceID, + Source: meta.Source, + Title: strings.TrimSpace(title), + CanonicalURL: strings.TrimSpace(link), + FirstSeen: publishedAt.UTC().Format(time.RFC3339), + LastSeen: ctx.Now.UTC().Format(time.RFC3339), + Status: "active", + Category: meta.Category, + Severity: inferSeverity(title, defaultSeverity(meta.Category)), + RegionTag: meta.RegionTag, + Lat: lat, + Lng: lng, + FreshnessHours: hoursBetween(ctx.Now, publishedAt), + Reporting: meta.Reporting, + } +} + +func score(cfg config.Config, alert model.Alert, feed FeedContext) *model.Triage { + text := strings.ToLower(strings.Join([]string{ + alert.Title, + feed.Summary, + feed.Author, + strings.Join(feed.Tags, " "), + alert.CanonicalURL, + }, "\n")) + publicationType := inferPublicationType(alert, feed.FeedType) + score := 0.5 + signals := []string{} + add := func(delta float64, reason string) { + score += delta + if delta >= 0 { + signals = append(signals, "+"+formatDelta(delta)+" "+reason) + return + } + signals = append(signals, formatDelta(delta)+" "+reason) + } + + switch publicationType { + case "news_media": + add(-0.16, "publication type leans general-news") + case "cert_advisory", "structured_incident_feed": + add(0.08, "source metadata is incident-oriented") + case "law_enforcement": + add(0.06, "law-enforcement source metadata") + } + + switch alert.Category { + case "cyber_advisory": + add(0.09, "cyber advisory category") + case "wanted_suspect", "missing_person": + add(0.09, "law-enforcement incident category") + case "humanitarian_tasking", "conflict_monitoring", "humanitarian_security": + add(0.08, "humanitarian incident/tasking category") + case "education_digital_capacity": + add(0.07, "education and digital capacity category") + case "fraud_alert": + add(0.07, "fraud incident category") + } + + hasTechnical := hasAny(text, technicalSignalPatterns) + hasIncident := hasAny(text, incidentDisclosurePatterns) + hasActionable := hasAny(text, actionablePatterns) + hasSpecificImpact := hasAny(text, impactSpecificityPatterns) + hasNarrative := hasAny(text, narrativePatterns) + hasGeneral := hasAny(text, generalNewsPatterns) + looksLikeBlog := isBlog(alert) + + if hasTechnical { + add(0.16, "technical indicators or tactics present") + } + if hasIncident { + add(0.16, "incident/crime disclosure language") + } + if hasActionable { + add(0.10, "contains response/reporting actions") + } + if hasSpecificImpact { + add(0.08, "specific impact/timeline/system details") + } + if hasNarrative { + add(-0.18, "opinion/commentary phrasing") + } + if hasGeneral { + add(-0.12, "general institutional/news language") + } + if looksLikeBlog { + add(-0.10, "blog-style structure") + } + if !hasTechnical && !hasIncident && (hasNarrative || hasGeneral) { + add(-0.08, "weak incident evidence relative to narrative cues") + } + if alert.FreshnessHours > 0 && alert.FreshnessHours <= 24 && (hasIncident || hasTechnical) { + add(0.04, "fresh post with potential early-warning signal") + } + + threshold := clamp01(cfg.IncidentRelevanceThreshold) + relevance := round3(clamp01(score)) + distance := math.Abs(relevance - threshold) + confidence := "low" + if distance >= 0.25 { + confidence = "high" + } else if distance >= 0.1 { + confidence = "medium" + } + disposition := "filtered_review" + if relevance >= threshold { + disposition = "retained" + } + return &model.Triage{ + RelevanceScore: relevance, + Threshold: threshold, + Confidence: confidence, + Disposition: disposition, + PublicationType: publicationType, + WeakSignals: limitStrings(signals, 12), + Metadata: &model.TriageMetadata{ + Author: strings.TrimSpace(feed.Author), + Tags: limitStrings(feed.Tags, 8), + }, + } +} + +func normalizeInformational(cfg config.Config, alert model.Alert, feed FeedContext) model.Alert { + if !isSecurityInformational(alert, feed) || alert.Triage == nil { + return alert + } + threshold := clamp01(cfg.IncidentRelevanceThreshold) + score := math.Max(alert.Triage.RelevanceScore, threshold) + alert.Category = "informational" + alert.Severity = "info" + alert.Triage.RelevanceScore = round3(score) + alert.Triage.Threshold = threshold + alert.Triage.Confidence = "medium" + alert.Triage.Disposition = "retained" + alert.Triage.WeakSignals = append([]string{"reclassified as informational security/cybersecurity update"}, limitStrings(alert.Triage.WeakSignals, 10)...) + return alert +} + +func thresholdForAlert(cfg config.Config, alert model.Alert) float64 { + if strings.EqualFold(alert.Category, "missing_person") { + return clamp01(cfg.MissingPersonRelevanceThreshold) + } + return clamp01(cfg.IncidentRelevanceThreshold) +} + +func defaultSeverity(category string) string { + switch strings.ToLower(strings.TrimSpace(category)) { + case "informational": + return "info" + case "cyber_advisory": + return "high" + case "wanted_suspect", "missing_person": + return "critical" + case "public_appeal", "humanitarian_tasking", "humanitarian_security", "private_sector": + return "high" + default: + return "medium" + } +} + +func inferSeverity(title string, fallback string) string { + t := strings.ToLower(title) + switch { + case containsAny(t, "critical", "emergency", "zero-day", "0-day", "ransomware", "actively exploited", "exploitation", "breach", "data leak", "crypto heist", "million stolen", "wanted", "fugitive", "murder", "homicide", "missing", "amber alert", "kidnap"): + return "critical" + case containsAny(t, "hack", "compromise", "vulnerability", "high", "severe", "urgent", "fatal", "death", "shooting", "fraud", "scam", "phishing"): + return "high" + case containsAny(t, "arrested", "charged", "sentenced", "medium", "moderate"): + return "medium" + case containsAny(t, "low", "informational"): + return "info" + default: + return fallback + } +} + +func parseDate(value string) time.Time { + value = strings.TrimSpace(value) + if value == "" { + return time.Time{} + } + layouts := []string{time.RFC3339, time.RFC1123Z, time.RFC1123, time.RFC822Z, time.RFC822, time.RFC850, "2006-01-02"} + for _, layout := range layouts { + if parsed, err := time.Parse(layout, value); err == nil { + return parsed + } + } + return time.Time{} +} + +func isFresh(cfg config.Config, date time.Time, now time.Time) bool { + cutoff := now.Add(-time.Duration(cfg.MaxAgeDays) * 24 * time.Hour) + return !date.Before(cutoff) +} + +func hasAny(text string, patterns []*regexp.Regexp) bool { + for _, pattern := range patterns { + if pattern.MatchString(text) { + return true + } + } + return false +} + +func inferPublicationType(alert model.Alert, feedType string) string { + if isNewsMedia(alert) { + return "news_media" + } + switch strings.ToLower(alert.Source.AuthorityType) { + case "cert": + return "cert_advisory" + case "police": + return "law_enforcement" + case "intelligence", "national_security": + return "security_bulletin" + case "public_safety_program": + return "public_safety_bulletin" + } + if feedType == "kev-json" || feedType == "interpol-red-json" || feedType == "interpol-yellow-json" { + return "structured_incident_feed" + } + return "official_update" +} + +func isNewsMedia(alert model.Alert) bool { + if _, ok := newsMediaIDs[strings.ToLower(alert.SourceID)]; ok { + return true + } + host := extractDomain(alert.CanonicalURL) + for _, domain := range newsMediaDomains { + if strings.Contains(host, domain) { + return true + } + } + return false +} + +func isBlog(alert model.Alert) bool { + if _, ok := blogFilterExempt[strings.ToLower(alert.SourceID)]; ok { + return false + } + title := strings.ToLower(alert.Title) + link := strings.ToLower(alert.CanonicalURL) + return strings.Contains(title, "blog") || strings.Contains(link, "/blog") || strings.Contains(link, "medium.com") || strings.Contains(link, "wordpress.com") +} + +func isSecurityInformational(alert model.Alert, feed FeedContext) bool { + text := strings.ToLower(strings.Join([]string{ + alert.Title, + feed.Summary, + feed.Author, + strings.Join(feed.Tags, " "), + alert.CanonicalURL, + }, "\n")) + publicationType := inferPublicationType(alert, feed.FeedType) + authorityType := strings.ToLower(alert.Source.AuthorityType) + sourceIsSecurityRelevant := alert.Category == "cyber_advisory" || + alert.Category == "private_sector" || + publicationType == "cert_advisory" || + authorityType == "cert" || + authorityType == "private_sector" || + authorityType == "regulatory" + return sourceIsSecurityRelevant && + hasAny(text, securityContextPatterns) && + !hasAny(text, incidentDisclosurePatterns) && + !hasAny(text, assistancePatterns) && + !hasAny(text, impactSpecificityPatterns) && + (hasAny(text, generalNewsPatterns) || hasAny(text, narrativePatterns) || publicationType == "news_media") +} + +func containsAny(value string, needles ...string) bool { + for _, needle := range needles { + if strings.Contains(value, needle) { + return true + } + } + return false +} + +func hashID(value string) string { + sum := sha1.Sum([]byte(value)) + return hex.EncodeToString(sum[:])[:12] +} + +func jitter(lat float64, lng float64, seed string) (float64, float64) { + sum := sha1.Sum([]byte(seed)) + angle := float64(sum[0])/255*math.Pi*2 + float64(sum[1])/255 + radius := 22 + float64(sum[2])/255*55 + dLat := (radius / 111.32) * math.Cos(angle) + cosLat := math.Max(0.2, math.Cos((lat*math.Pi)/180)) + dLng := (radius / (111.32 * cosLat)) * math.Sin(angle) + outLat := math.Max(-89.5, math.Min(89.5, lat+dLat)) + outLng := lng + dLng + if outLng > 180 { + outLng -= 360 + } + if outLng < -180 { + outLng += 360 + } + return round5(outLat), round5(outLng) +} + +func extractDomain(raw string) string { + u, err := url.Parse(raw) + if err != nil { + return "" + } + return strings.ToLower(u.Hostname()) +} + +func hoursBetween(now time.Time, publishedAt time.Time) int { + if publishedAt.IsZero() { + return 1 + } + hours := int(math.Round(now.Sub(publishedAt).Hours())) + if hours < 1 { + return 1 + } + return hours +} + +func clamp01(value float64) float64 { + if value < 0 { + return 0 + } + if value > 1 { + return 1 + } + return value +} + +func round3(value float64) float64 { + return math.Round(value*1000) / 1000 +} + +func round5(value float64) float64 { + return math.Round(value*100000) / 100000 +} + +func formatDelta(value float64) string { + return strconvf(value, 2) +} + +func strconvf(value float64, places int) string { + format := math.Pow(10, float64(places)) + value = math.Round(value*format) / format + return strings.TrimRight(strings.TrimRight(fmtFloat(value), "0"), ".") +} + +func fmtFloat(value float64) string { + return strconv.FormatFloat(value, 'f', 2, 64) +} + +func limitStrings(values []string, limit int) []string { + out := make([]string, 0, limit) + for _, value := range values { + value = strings.TrimSpace(value) + if value == "" { + continue + } + out = append(out, value) + if len(out) == limit { + break + } + } + if len(out) == 0 { + return nil + } + return out +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + value = strings.TrimSpace(value) + if value != "" { + return value + } + } + return "" +} + +func Deduplicate(alerts []model.Alert) ([]model.Alert, model.DuplicateAudit) { + byKey := make(map[string]model.Alert, len(alerts)) + for _, alert := range alerts { + key := strings.ToLower(alert.CanonicalURL + "|" + alert.Title) + current, ok := byKey[key] + if !ok || alertScore(alert) > alertScore(current) { + byKey[key] = alert + } + } + deduped := make([]model.Alert, 0, len(byKey)) + for _, alert := range byKey { + deduped = append(deduped, alert) + } + sort.Slice(deduped, func(i, j int) bool { return deduped[i].Title < deduped[j].Title }) + kept, suppressed := collapseVariants(deduped) + duplicates := summarizeTitleDuplicates(kept) + return kept, model.DuplicateAudit{ + SuppressedVariantDuplicates: len(suppressed), + RepeatedTitleGroupsInActive: len(duplicates), + RepeatedTitleSamples: duplicates, + } +} + +func FilterActive(cfg config.Config, alerts []model.Alert) (active []model.Alert, filtered []model.Alert) { + for _, alert := range alerts { + threshold := thresholdForAlert(cfg, alert) + score := 0.0 + if alert.Triage != nil { + score = alert.Triage.RelevanceScore + } + if score >= threshold { + active = append(active, alert) + continue + } + filtered = append(filtered, alert) + } + sortAlerts(active, true) + sortAlerts(filtered, false) + return active, filtered +} + +func sortAlerts(alerts []model.Alert, active bool) { + sort.Slice(alerts, func(i, j int) bool { + if !active { + scoreDelta := alertScore(alerts[j]) - alertScore(alerts[i]) + if scoreDelta != 0 { + return scoreDelta > 0 + } + } + return alerts[i].FirstSeen > alerts[j].FirstSeen + }) +} + +func alertScore(alert model.Alert) float64 { + if alert.Triage == nil { + return -1 + } + return alert.Triage.RelevanceScore +} + +func collapseVariants(alerts []model.Alert) ([]model.Alert, []model.Alert) { + byVariant := make(map[string][]model.Alert) + passthrough := make([]model.Alert, 0, len(alerts)) + for _, alert := range alerts { + key := buildVariantKey(alert) + if key == "" { + passthrough = append(passthrough, alert) + continue + } + byVariant[key] = append(byVariant[key], alert) + } + kept := append([]model.Alert{}, passthrough...) + suppressed := []model.Alert{} + for _, group := range byVariant { + if len(group) == 1 { + kept = append(kept, group[0]) + continue + } + sort.Slice(group, func(i, j int) bool { + return comparePreference(group[i], group[j]) < 0 + }) + kept = append(kept, group[0]) + suppressed = append(suppressed, group[1:]...) + } + return kept, suppressed +} + +func buildVariantKey(alert model.Alert) string { + titleNorm := normalizeHeadline(alert.Title) + if len(titleNorm) < 24 { + return "" + } + u, err := url.Parse(alert.CanonicalURL) + if err != nil { + return "" + } + path := strings.TrimRight(u.Path, "/") + segments := strings.Split(strings.Trim(path, "/"), "/") + if len(segments) == 0 { + return "" + } + leaf := segments[len(segments)-1] + re := regexp.MustCompile(`-\d+$`) + if !re.MatchString(leaf) { + return "" + } + segments[len(segments)-1] = re.ReplaceAllString(leaf, "") + return strings.ToLower(alert.SourceID + "|" + strings.TrimPrefix(u.Hostname(), "www.") + "/" + strings.Join(segments, "/") + "|" + titleNorm) +} + +func comparePreference(a model.Alert, b model.Alert) int { + if alertScore(a) != alertScore(b) { + if alertScore(a) > alertScore(b) { + return -1 + } + return 1 + } + if a.FirstSeen != b.FirstSeen { + if a.FirstSeen > b.FirstSeen { + return -1 + } + return 1 + } + if len(a.CanonicalURL) < len(b.CanonicalURL) { + return -1 + } + if len(a.CanonicalURL) > len(b.CanonicalURL) { + return 1 + } + return 0 +} + +func summarizeTitleDuplicates(alerts []model.Alert) []model.DuplicateSample { + counts := map[string]int{} + for _, alert := range alerts { + key := normalizeHeadline(alert.Title) + if key == "" { + continue + } + counts[key]++ + } + out := []model.DuplicateSample{} + for title, count := range counts { + if count > 1 { + out = append(out, model.DuplicateSample{Title: title, Count: count}) + } + } + sort.Slice(out, func(i, j int) bool { return out[i].Count > out[j].Count }) + if len(out) > 25 { + out = out[:25] + } + return out +} + +func normalizeHeadline(value string) string { + value = strings.ToLower(value) + re := regexp.MustCompile(`[^a-z0-9]+`) + return strings.TrimSpace(re.ReplaceAllString(value, " ")) +} diff --git a/internal/collector/normalize/normalize_test.go b/internal/collector/normalize/normalize_test.go new file mode 100644 index 0000000..c635e38 --- /dev/null +++ b/internal/collector/normalize/normalize_test.go @@ -0,0 +1,39 @@ +package normalize + +import ( + "testing" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/model" +) + +func TestDeduplicatePrefersHigherScore(t *testing.T) { + alerts := []model.Alert{ + {Title: "A", CanonicalURL: "https://x", Triage: &model.Triage{RelevanceScore: 0.2}}, + {Title: "A", CanonicalURL: "https://x", Triage: &model.Triage{RelevanceScore: 0.8}}, + } + deduped, _ := Deduplicate(alerts) + if len(deduped) != 1 { + t.Fatalf("expected 1 alert, got %d", len(deduped)) + } + if deduped[0].Triage.RelevanceScore != 0.8 { + t.Fatalf("expected highest score to win, got %.3f", deduped[0].Triage.RelevanceScore) + } +} + +func TestFilterActiveUsesMissingPersonThreshold(t *testing.T) { + cfg := config.Default() + cfg.IncidentRelevanceThreshold = 0.5 + cfg.MissingPersonRelevanceThreshold = 0.1 + alerts := []model.Alert{ + {Category: "missing_person", Triage: &model.Triage{RelevanceScore: 0.2}}, + {Category: "cyber_advisory", Triage: &model.Triage{RelevanceScore: 0.2}}, + } + active, filtered := FilterActive(cfg, alerts) + if len(active) != 1 || active[0].Category != "missing_person" { + t.Fatalf("unexpected active alerts %#v", active) + } + if len(filtered) != 1 || filtered[0].Category != "cyber_advisory" { + t.Fatalf("unexpected filtered alerts %#v", filtered) + } +} diff --git a/internal/collector/output/write.go b/internal/collector/output/write.go new file mode 100644 index 0000000..8dc179d --- /dev/null +++ b/internal/collector/output/write.go @@ -0,0 +1,60 @@ +package output + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/model" +) + +func Write(cfg config.Config, active []model.Alert, filtered []model.Alert, state []model.Alert, sourceHealth []model.SourceHealthEntry, duplicateAudit model.DuplicateAudit) error { + paths := []string{cfg.OutputPath, cfg.FilteredOutputPath, cfg.StateOutputPath, cfg.SourceHealthOutputPath} + for _, path := range paths { + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return err + } + } + if err := writeJSON(cfg.OutputPath, active); err != nil { + return err + } + if err := writeJSON(cfg.FilteredOutputPath, filtered); err != nil { + return err + } + if err := writeJSON(cfg.StateOutputPath, state); err != nil { + return err + } + doc := model.SourceHealthDocument{ + GeneratedAt: time.Now().UTC().Format(time.RFC3339), + CriticalSourcePrefixes: cfg.CriticalSourcePrefixes, + FailOnCriticalSourceGap: cfg.FailOnCriticalSourceGap, + TotalSources: len(sourceHealth), + SourcesOK: countStatus(sourceHealth, "ok"), + SourcesError: countStatus(sourceHealth, "error"), + DuplicateAudit: duplicateAudit, + Sources: sourceHealth, + } + return writeJSON(cfg.SourceHealthOutputPath, doc) +} + +func writeJSON(path string, value any) error { + data, err := json.MarshalIndent(value, "", " ") + if err != nil { + return fmt.Errorf("marshal %s: %w", path, err) + } + data = append(data, '\n') + return os.WriteFile(path, data, 0o644) +} + +func countStatus(entries []model.SourceHealthEntry, status string) int { + total := 0 + for _, entry := range entries { + if entry.Status == status { + total++ + } + } + return total +} diff --git a/internal/collector/output/write_test.go b/internal/collector/output/write_test.go new file mode 100644 index 0000000..41c4f7a --- /dev/null +++ b/internal/collector/output/write_test.go @@ -0,0 +1,29 @@ +package output + +import ( + "os" + "path/filepath" + "testing" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/model" +) + +func TestWriteOutputs(t *testing.T) { + dir := t.TempDir() + cfg := config.Default() + cfg.OutputPath = filepath.Join(dir, "alerts.json") + cfg.FilteredOutputPath = filepath.Join(dir, "filtered.json") + cfg.StateOutputPath = filepath.Join(dir, "state.json") + cfg.SourceHealthOutputPath = filepath.Join(dir, "health.json") + + err := Write(cfg, []model.Alert{{AlertID: "a"}}, []model.Alert{{AlertID: "b"}}, []model.Alert{{AlertID: "c"}}, []model.SourceHealthEntry{{SourceID: "s", Status: "ok"}}, model.DuplicateAudit{}) + if err != nil { + t.Fatal(err) + } + for _, path := range []string{cfg.OutputPath, cfg.FilteredOutputPath, cfg.StateOutputPath, cfg.SourceHealthOutputPath} { + if _, err := os.Stat(path); err != nil { + t.Fatalf("expected output file %s: %v", path, err) + } + } +} diff --git a/internal/collector/parse/html.go b/internal/collector/parse/html.go new file mode 100644 index 0000000..c28a3f1 --- /dev/null +++ b/internal/collector/parse/html.go @@ -0,0 +1,53 @@ +package parse + +import ( + "html" + "net/url" + "regexp" + "strings" +) + +var anchorRe = regexp.MustCompile(`(?is)]*href=["']([^"']+)["'][^>]*>([\s\S]*?)`) +var tagStripRe = regexp.MustCompile(`(?is)<[^>]+>`) +var scriptStripRe = regexp.MustCompile(`(?is)|`) + +func ParseHTMLAnchors(body string, baseURL string) []FeedItem { + matches := anchorRe.FindAllStringSubmatch(body, -1) + seen := make(map[string]struct{}, len(matches)) + out := make([]FeedItem, 0, len(matches)) + for _, match := range matches { + if len(match) < 3 { + continue + } + rawHref := strings.TrimSpace(match[1]) + if rawHref == "" || strings.HasPrefix(rawHref, "#") { + continue + } + link, err := url.Parse(rawHref) + if err != nil { + continue + } + resolved, err := url.Parse(baseURL) + if err != nil { + continue + } + title := stripHTML(match[2]) + if len(title) < 8 { + continue + } + finalURL := resolved.ResolveReference(link).String() + if _, ok := seen[finalURL]; ok { + continue + } + seen[finalURL] = struct{}{} + out = append(out, FeedItem{Title: title, Link: finalURL}) + } + return out +} + +func stripHTML(value string) string { + value = scriptStripRe.ReplaceAllString(value, " ") + value = tagStripRe.ReplaceAllString(value, " ") + value = html.UnescapeString(value) + return strings.Join(strings.Fields(value), " ") +} diff --git a/internal/collector/parse/html_test.go b/internal/collector/parse/html_test.go new file mode 100644 index 0000000..3b38b84 --- /dev/null +++ b/internal/collector/parse/html_test.go @@ -0,0 +1,14 @@ +package parse + +import "testing" + +func TestParseHTMLAnchors(t *testing.T) { + body := `Wanted PersonSkipDuplicate` + items := ParseHTMLAnchors(body, "https://agency.example.org/news") + if len(items) != 1 { + t.Fatalf("expected 1 item, got %d", len(items)) + } + if items[0].Link != "https://agency.example.org/wanted/1" { + t.Fatalf("unexpected link %q", items[0].Link) + } +} diff --git a/internal/collector/parse/rss.go b/internal/collector/parse/rss.go new file mode 100644 index 0000000..efd29c2 --- /dev/null +++ b/internal/collector/parse/rss.go @@ -0,0 +1,148 @@ +package parse + +import ( + "html" + "regexp" + "strings" +) + +type FeedItem struct { + Title string + Link string + Published string + Author string + Summary string + Tags []string +} + +var ( + entryRe = regexp.MustCompile(`(?is)`) + itemRe = regexp.MustCompile(`(?is)`) + tagCache = map[string]*regexp.Regexp{} + tagValuesCache = map[string]*regexp.Regexp{} + atomLinkRe = regexp.MustCompile(`(?is)]*rel=["']alternate["'][^>]*>|]*>`) + hrefRe = regexp.MustCompile(`(?i)href=["']([^"']+)["']`) + atomAuthorRe = regexp.MustCompile(`(?is)]*>[\s\S]*?]*>([\s\S]*?)[\s\S]*?`) + atomCategoryRe = regexp.MustCompile(`(?is)]*term=["']([^"']+)["'][^>]*/?>`) +) + +func ParseFeed(xml string) []FeedItem { + if strings.Contains(xml, "]*>([\s\S]*?)`) + tagCache[tag] = re + } + match := re.FindStringSubmatch(block) + if len(match) < 2 { + return "" + } + return decodeXML(match[1]) +} + +func getTagValues(block, tag string) []string { + re, ok := tagValuesCache[tag] + if !ok { + re = regexp.MustCompile(`(?is)<` + regexp.QuoteMeta(tag) + `[^>]*>([\s\S]*?)`) + tagValuesCache[tag] = re + } + matches := re.FindAllStringSubmatch(block, -1) + out := make([]string, 0, len(matches)) + for _, match := range matches { + if len(match) < 2 { + continue + } + value := decodeXML(match[1]) + if value != "" { + out = append(out, value) + } + } + return out +} + +func getAtomLink(block string) string { + linkTag := atomLinkRe.FindString(block) + if linkTag == "" { + return "" + } + match := hrefRe.FindStringSubmatch(linkTag) + if len(match) < 2 { + return "" + } + return decodeXML(match[1]) +} + +func getAuthor(block string) string { + if match := atomAuthorRe.FindStringSubmatch(block); len(match) > 1 { + return decodeXML(match[1]) + } + return firstNonEmpty(getTag(block, "author"), getTag(block, "dc:creator"), getTag(block, "creator")) +} + +func getSummary(block string) string { + return firstNonEmpty( + getTag(block, "description"), + getTag(block, "summary"), + getTag(block, "content"), + getTag(block, "content:encoded"), + ) +} + +func getCategories(block string) []string { + out := getTagValues(block, "category") + matches := atomCategoryRe.FindAllStringSubmatch(block, -1) + for _, match := range matches { + if len(match) < 2 { + continue + } + value := decodeXML(match[1]) + if value != "" { + out = append(out, value) + } + } + return out +} + +func decodeXML(value string) string { + return strings.TrimSpace(html.UnescapeString(value)) +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if strings.TrimSpace(value) != "" { + return strings.TrimSpace(value) + } + } + return "" +} diff --git a/internal/collector/parse/rss_test.go b/internal/collector/parse/rss_test.go new file mode 100644 index 0000000..581d6ce --- /dev/null +++ b/internal/collector/parse/rss_test.go @@ -0,0 +1,37 @@ +package parse + +import "testing" + +func TestParseFeedRSS(t *testing.T) { + xml := `Alert Onehttps://example.com/1Mon, 02 Jan 2006 15:04:05 MSTBodycrime` + items := ParseFeed(xml) + if len(items) != 1 { + t.Fatalf("expected 1 item, got %d", len(items)) + } + if items[0].Title != "Alert One" { + t.Fatalf("unexpected title %q", items[0].Title) + } + if items[0].Link != "https://example.com/1" { + t.Fatalf("unexpected link %q", items[0].Link) + } + if items[0].Summary != "Body" { + t.Fatalf("unexpected summary %q", items[0].Summary) + } +} + +func TestParseFeedAtom(t *testing.T) { + xml := `Entry One2026-01-02T03:04:05ZOpsSummary` + items := ParseFeed(xml) + if len(items) != 1 { + t.Fatalf("expected 1 item, got %d", len(items)) + } + if items[0].Author != "Ops" { + t.Fatalf("unexpected author %q", items[0].Author) + } + if items[0].Link != "https://example.com/a" { + t.Fatalf("unexpected link %q", items[0].Link) + } + if len(items[0].Tags) != 1 || items[0].Tags[0] != "cyber" { + t.Fatalf("unexpected tags %#v", items[0].Tags) + } +} diff --git a/internal/collector/registry/registry.go b/internal/collector/registry/registry.go new file mode 100644 index 0000000..d38e0ff --- /dev/null +++ b/internal/collector/registry/registry.go @@ -0,0 +1,73 @@ +package registry + +import ( + "encoding/json" + "fmt" + "os" + "sort" + "strings" + + "github.com/scalytics/euosint/internal/collector/model" +) + +func Load(path string) ([]model.RegistrySource, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read registry %s: %w", path, err) + } + + var raw []model.RegistrySource + if err := json.Unmarshal(data, &raw); err != nil { + return nil, fmt.Errorf("decode registry %s: %w", path, err) + } + + seen := make(map[string]struct{}, len(raw)) + out := make([]model.RegistrySource, 0, len(raw)) + for _, entry := range raw { + normalized, ok := normalize(entry) + if !ok { + continue + } + if _, exists := seen[normalized.Source.SourceID]; exists { + continue + } + seen[normalized.Source.SourceID] = struct{}{} + out = append(out, normalized) + } + + sort.Slice(out, func(i, j int) bool { + return out[i].Source.SourceID < out[j].Source.SourceID + }) + return out, nil +} + +func normalize(entry model.RegistrySource) (model.RegistrySource, bool) { + entry.Type = strings.TrimSpace(entry.Type) + entry.Category = strings.TrimSpace(entry.Category) + entry.RegionTag = strings.TrimSpace(entry.RegionTag) + entry.FeedURL = strings.TrimSpace(entry.FeedURL) + entry.Source.SourceID = strings.TrimSpace(entry.Source.SourceID) + entry.Source.AuthorityName = strings.TrimSpace(entry.Source.AuthorityName) + entry.Source.Country = fallback(entry.Source.Country, "Unknown") + entry.Source.CountryCode = fallback(strings.ToUpper(entry.Source.CountryCode), "XX") + entry.Source.Region = fallback(entry.Source.Region, "International") + entry.Source.AuthorityType = fallback(entry.Source.AuthorityType, "public_safety_program") + entry.Source.BaseURL = fallback(entry.Source.BaseURL, entry.FeedURL) + if entry.Type == "" || entry.Category == "" || entry.Source.SourceID == "" || entry.Source.AuthorityName == "" { + return model.RegistrySource{}, false + } + if entry.FeedURL == "" && len(entry.FeedURLs) == 0 { + return model.RegistrySource{}, false + } + if entry.MaxItems <= 0 { + entry.MaxItems = 20 + } + return entry, true +} + +func fallback(value, fallback string) string { + if strings.TrimSpace(value) == "" { + return fallback + } + return strings.TrimSpace(value) +} diff --git a/internal/collector/registry/registry_test.go b/internal/collector/registry/registry_test.go new file mode 100644 index 0000000..bb2a014 --- /dev/null +++ b/internal/collector/registry/registry_test.go @@ -0,0 +1,33 @@ +package registry + +import ( + "os" + "path/filepath" + "testing" +) + +func TestLoadRegistryDeduplicatesAndNormalizes(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "registry.json") + content := `[ + {"type":"rss","feed_url":"https://one.example/feed","category":"cyber_advisory","source":{"source_id":"dup","authority_name":"One","country":"France","country_code":"fr","region":"Europe","authority_type":"cert","base_url":"https://one.example"}}, + {"type":"rss","feed_url":"https://two.example/feed","category":"cyber_advisory","source":{"source_id":"dup","authority_name":"Two"}}, + {"type":"html-list","feed_url":"https://three.example/list","category":"wanted_suspect","source":{"source_id":"three","authority_name":"Three"}} + ]` + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + sources, err := Load(path) + if err != nil { + t.Fatal(err) + } + if len(sources) != 2 { + t.Fatalf("expected 2 sources, got %d", len(sources)) + } + if sources[0].Source.SourceID != "dup" { + t.Fatalf("unexpected source ordering %#v", sources) + } + if sources[0].Source.CountryCode != "FR" { + t.Fatalf("expected normalized country code, got %q", sources[0].Source.CountryCode) + } +} diff --git a/internal/collector/run/run.go b/internal/collector/run/run.go new file mode 100644 index 0000000..0d2c92d --- /dev/null +++ b/internal/collector/run/run.go @@ -0,0 +1,383 @@ +package run + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/url" + "strings" + "time" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/fetch" + "github.com/scalytics/euosint/internal/collector/model" + "github.com/scalytics/euosint/internal/collector/normalize" + "github.com/scalytics/euosint/internal/collector/output" + "github.com/scalytics/euosint/internal/collector/parse" + "github.com/scalytics/euosint/internal/collector/registry" + "github.com/scalytics/euosint/internal/collector/state" +) + +type Runner struct { + stdout io.Writer + stderr io.Writer + clientFactory func(config.Config) *fetch.Client +} + +func New(stdout io.Writer, stderr io.Writer) Runner { + return Runner{ + stdout: stdout, + stderr: stderr, + clientFactory: fetch.New, + } +} + +func (r Runner) Run(ctx context.Context, cfg config.Config) error { + if cfg.Watch { + return r.watch(ctx, cfg) + } + return r.runOnce(ctx, cfg) +} + +func (r Runner) watch(ctx context.Context, cfg config.Config) error { + ticker := time.NewTicker(time.Duration(cfg.IntervalMS) * time.Millisecond) + defer ticker.Stop() + + for { + if err := r.runOnce(ctx, cfg); err != nil { + fmt.Fprintf(r.stderr, "collector run failed: %v\n", err) + } + select { + case <-ctx.Done(): + return nil + case <-ticker.C: + } + } +} + +func (r Runner) runOnce(ctx context.Context, cfg config.Config) error { + sources, err := registry.Load(cfg.RegistryPath) + if err != nil { + return err + } + client := r.clientFactory(cfg) + now := time.Now().UTC() + nctx := normalize.Context{Config: cfg, Now: now} + + alerts := []model.Alert{normalize.StaticInterpolEntry(now)} + sourceHealth := make([]model.SourceHealthEntry, 0, len(sources)) + for _, source := range sources { + startedAt := time.Now().UTC() + batch, err := r.fetchSource(ctx, client, nctx, source) + entry := model.SourceHealthEntry{ + SourceID: source.Source.SourceID, + AuthorityName: source.Source.AuthorityName, + Type: source.Type, + FeedURL: source.FeedURL, + StartedAt: startedAt.Format(time.RFC3339), + FinishedAt: time.Now().UTC().Format(time.RFC3339), + } + if err != nil { + entry.Status = "error" + entry.Error = err.Error() + sourceHealth = append(sourceHealth, entry) + fmt.Fprintf(r.stderr, "WARN %s: %v\n", source.Source.AuthorityName, err) + continue + } + entry.Status = "ok" + entry.FetchedCount = len(batch) + sourceHealth = append(sourceHealth, entry) + alerts = append(alerts, batch...) + } + + deduped, duplicateAudit := normalize.Deduplicate(alerts) + active, filtered := normalize.FilterActive(cfg, deduped) + populateSourceHealth(sourceHealth, active, filtered) + if err := assertCriticalSourceCoverage(cfg, sourceHealth); err != nil { + return err + } + + previous := state.Read(cfg.StateOutputPath) + if len(previous) == 0 { + previous = state.Read(cfg.OutputPath) + } + currentActive, currentFiltered, fullState := state.Reconcile(cfg, active, filtered, previous, now) + if err := output.Write(cfg, currentActive, currentFiltered, fullState, sourceHealth, duplicateAudit); err != nil { + return err + } + _, err = fmt.Fprintf(r.stdout, "Wrote %d active alerts -> %s (%d filtered in %s)\n", len(currentActive), cfg.OutputPath, len(currentFiltered), cfg.FilteredOutputPath) + return err +} + +func (r Runner) fetchSource(ctx context.Context, client *fetch.Client, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { + switch source.Type { + case "rss": + return r.fetchRSS(ctx, client, nctx, source) + case "html-list": + return r.fetchHTML(ctx, client, nctx, source) + case "kev-json": + return r.fetchKEV(ctx, client, nctx, source) + case "interpol-red-json", "interpol-yellow-json": + return r.fetchInterpol(ctx, client, nctx, source) + default: + return nil, fmt.Errorf("unsupported source type %s", source.Type) + } +} + +func (r Runner) fetchRSS(ctx context.Context, client *fetch.Client, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { + body, err := fetchWithFallback(ctx, client, source, "application/rss+xml, application/atom+xml, application/xml, text/xml;q=0.9, */*;q=0.8") + if err != nil { + return nil, err + } + items := parse.ParseFeed(string(body)) + limit := perSourceLimit(nctx.Config, source) + out := make([]model.Alert, 0, limit) + for _, item := range items { + if len(out) == limit { + break + } + if strings.TrimSpace(item.Title) == "" || strings.TrimSpace(item.Link) == "" { + continue + } + alert := normalize.RSSItem(nctx, source, item) + if alert != nil { + out = append(out, *alert) + } + } + return out, nil +} + +func (r Runner) fetchHTML(ctx context.Context, client *fetch.Client, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { + body, finalURL, err := fetchWithFallbackURL(ctx, client, source, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + if err != nil { + return nil, err + } + items := parse.ParseHTMLAnchors(string(body), finalURL) + items = filterKeywords(items, source.IncludeKeywords, source.ExcludeKeywords) + limit := perSourceLimit(nctx.Config, source) + if len(items) > limit { + items = items[:limit] + } + out := make([]model.Alert, 0, len(items)) + for _, item := range items { + alert := normalize.HTMLItem(nctx, source, item) + if alert != nil { + out = append(out, *alert) + } + } + return out, nil +} + +func (r Runner) fetchKEV(ctx context.Context, client *fetch.Client, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { + body, err := client.Text(ctx, source.FeedURL, source.FollowRedirects, "application/json") + if err != nil { + return nil, err + } + var doc struct { + Vulnerabilities []struct { + CVEID string `json:"cveID"` + CVEIDAlt string `json:"cveId"` + CVE string `json:"cve"` + VulnerabilityName string `json:"vulnerabilityName"` + ShortDescription string `json:"shortDescription"` + DateAdded string `json:"dateAdded"` + KnownRansomwareCampaign bool `json:"knownRansomwareCampaign"` + } `json:"vulnerabilities"` + } + if err := json.Unmarshal(body, &doc); err != nil { + return nil, err + } + limit := perSourceLimit(nctx.Config, source) + out := []model.Alert{} + for _, vuln := range doc.Vulnerabilities { + if len(out) == limit { + break + } + cveID := firstNonEmpty(vuln.CVEID, vuln.CVEIDAlt, vuln.CVE) + alert := normalize.KEVAlert(nctx, source, cveID, vuln.VulnerabilityName, vuln.ShortDescription, vuln.DateAdded, vuln.KnownRansomwareCampaign) + if alert != nil { + out = append(out, *alert) + } + } + return out, nil +} + +func (r Runner) fetchInterpol(ctx context.Context, client *fetch.Client, nctx normalize.Context, source model.RegistrySource) ([]model.Alert, error) { + body, err := client.Text(ctx, source.FeedURL, source.FollowRedirects, "application/json") + if err != nil { + return nil, err + } + var doc struct { + Embedded struct { + Notices []struct { + Forename string `json:"forename"` + Name string `json:"name"` + PlaceOfBirth string `json:"place_of_birth"` + IssuingEntity string `json:"issuing_entity"` + Nationalities []string `json:"nationalities"` + CountriesLikelyToVisit []string `json:"countries_likely_to_be_visited"` + Links struct { + Self struct { + Href string `json:"href"` + } `json:"self"` + } `json:"_links"` + } `json:"notices"` + } `json:"_embedded"` + } + if err := json.Unmarshal(body, &doc); err != nil { + return nil, err + } + limit := perSourceLimit(nctx.Config, source) + out := []model.Alert{} + for _, notice := range doc.Embedded.Notices { + if len(out) == limit { + break + } + titlePrefix := "INTERPOL Red Notice" + if source.Type == "interpol-yellow-json" { + titlePrefix = "INTERPOL Yellow Notice" + } + label := strings.TrimSpace(strings.TrimSpace(notice.Forename) + " " + strings.TrimSpace(notice.Name)) + title := titlePrefix + if label != "" { + title = titlePrefix + ": " + label + } + link := notice.Links.Self.Href + if strings.TrimSpace(link) != "" { + if _, err := url.Parse(link); err == nil && !strings.HasPrefix(link, "http") { + link = (&url.URL{Scheme: "https", Host: "ws-public.interpol.int", Path: link}).String() + } + } + countryCode := "" + if len(notice.CountriesLikelyToVisit) > 0 { + countryCode = notice.CountriesLikelyToVisit[0] + } else if len(notice.Nationalities) > 0 { + countryCode = notice.Nationalities[0] + } + summary := strings.TrimSpace(notice.IssuingEntity + " " + notice.PlaceOfBirth) + tags := append([]string{}, notice.Nationalities...) + tags = append(tags, notice.CountriesLikelyToVisit...) + alert := normalize.InterpolAlert(nctx, source, title, link, countryCode, summary, tags) + if alert != nil { + out = append(out, *alert) + } + } + return out, nil +} + +func fetchWithFallback(ctx context.Context, client *fetch.Client, source model.RegistrySource, accept string) ([]byte, error) { + body, _, err := fetchWithFallbackURL(ctx, client, source, accept) + return body, err +} + +func fetchWithFallbackURL(ctx context.Context, client *fetch.Client, source model.RegistrySource, accept string) ([]byte, string, error) { + candidates := []string{} + if strings.TrimSpace(source.FeedURL) != "" { + candidates = append(candidates, source.FeedURL) + } + candidates = append(candidates, source.FeedURLs...) + var lastErr error + for _, candidate := range candidates { + body, err := client.Text(ctx, candidate, source.FollowRedirects, accept) + if err == nil { + return body, candidate, nil + } + lastErr = err + } + if lastErr == nil { + lastErr = fmt.Errorf("no feed URLs available") + } + return nil, "", lastErr +} + +func filterKeywords(items []parse.FeedItem, include []string, exclude []string) []parse.FeedItem { + include = normalizeKeywords(include) + exclude = normalizeKeywords(exclude) + out := []parse.FeedItem{} + for _, item := range items { + hay := strings.ToLower(item.Title + " " + item.Link) + if len(include) > 0 && !containsKeyword(hay, include) { + continue + } + if len(exclude) > 0 && containsKeyword(hay, exclude) { + continue + } + out = append(out, item) + } + return out +} + +func normalizeKeywords(values []string) []string { + out := make([]string, 0, len(values)) + for _, value := range values { + value = strings.ToLower(strings.TrimSpace(value)) + if value != "" { + out = append(out, value) + } + } + return out +} + +func containsKeyword(hay string, needles []string) bool { + for _, needle := range needles { + if strings.Contains(hay, needle) { + return true + } + } + return false +} + +func populateSourceHealth(entries []model.SourceHealthEntry, active []model.Alert, filtered []model.Alert) { + activeBySource := map[string]int{} + filteredBySource := map[string]int{} + for _, alert := range active { + activeBySource[alert.SourceID]++ + } + for _, alert := range filtered { + filteredBySource[alert.SourceID]++ + } + for i := range entries { + entries[i].ActiveCount = activeBySource[entries[i].SourceID] + entries[i].FilteredCount = filteredBySource[entries[i].SourceID] + } +} + +func assertCriticalSourceCoverage(cfg config.Config, entries []model.SourceHealthEntry) error { + if !cfg.FailOnCriticalSourceGap || len(cfg.CriticalSourcePrefixes) == 0 { + return nil + } + missing := []string{} + for _, prefix := range cfg.CriticalSourcePrefixes { + total := 0 + for _, entry := range entries { + if entry.SourceID == prefix || strings.HasPrefix(entry.SourceID, prefix+"-") { + total += entry.FetchedCount + } + } + if total == 0 { + missing = append(missing, prefix) + } + } + if len(missing) == 0 { + return nil + } + return fmt.Errorf("critical source coverage gap: no records fetched for %s", strings.Join(missing, ", ")) +} + +func perSourceLimit(cfg config.Config, source model.RegistrySource) int { + if source.MaxItems > 0 { + return source.MaxItems + } + return cfg.MaxPerSource +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + value = strings.TrimSpace(value) + if value != "" { + return value + } + } + return "" +} diff --git a/internal/collector/run/run_test.go b/internal/collector/run/run_test.go new file mode 100644 index 0000000..b639647 --- /dev/null +++ b/internal/collector/run/run_test.go @@ -0,0 +1,93 @@ +package run + +import ( + "context" + "encoding/json" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/fetch" + "github.com/scalytics/euosint/internal/collector/model" +) + +func TestRunnerRunOnceWritesOutputs(t *testing.T) { + dir := t.TempDir() + registryPath := filepath.Join(dir, "registry.json") + registry := []byte(`[ + {"type":"rss","feed_url":"https://collector.test/rss","category":"cyber_advisory","region_tag":"INT","lat":48.8,"lng":2.3,"source":{"source_id":"rss-source","authority_name":"RSS Source","country":"France","country_code":"FR","region":"Europe","authority_type":"cert","base_url":"https://collector.test"}}, + {"type":"html-list","feed_url":"https://collector.test/html","category":"wanted_suspect","region_tag":"FR","lat":48.8,"lng":2.3,"include_keywords":["wanted"],"source":{"source_id":"html-source","authority_name":"HTML Source","country":"France","country_code":"FR","region":"Europe","authority_type":"police","base_url":"https://collector.test"}}, + {"type":"kev-json","feed_url":"https://collector.test/kev","category":"cyber_advisory","region_tag":"US","lat":38.8,"lng":-77.0,"source":{"source_id":"kev-source","authority_name":"KEV Source","country":"United States","country_code":"US","region":"North America","authority_type":"cert","base_url":"https://www.cisa.gov"}}, + {"type":"interpol-red-json","feed_url":"https://collector.test/interpol","category":"wanted_suspect","region_tag":"INT","lat":45.7,"lng":4.8,"source":{"source_id":"interpol-red","authority_name":"Interpol Red","country":"France","country_code":"FR","region":"International","authority_type":"police","base_url":"https://www.interpol.int"}} + ]`) + if err := os.WriteFile(registryPath, registry, 0o644); err != nil { + t.Fatal(err) + } + + cfg := config.Default() + cfg.RegistryPath = registryPath + cfg.OutputPath = filepath.Join(dir, "alerts.json") + cfg.FilteredOutputPath = filepath.Join(dir, "filtered.json") + cfg.StateOutputPath = filepath.Join(dir, "state.json") + cfg.SourceHealthOutputPath = filepath.Join(dir, "health.json") + cfg.MaxAgeDays = 10000 + + runner := New(io.Discard, io.Discard) + runner.clientFactory = func(cfg config.Config) *fetch.Client { + return fetch.NewWithHTTPClient(cfg, &http.Client{ + Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) { + var body string + switch req.URL.Path { + case "/rss": + body = `Critical cyber advisoryhttps://collector.test/rss-itemMon, 02 Jan 2026 15:04:05 MSTCVE-2026-1234 patch advisory` + case "/html": + body = `Wanted suspect public appeal` + case "/kev": + body = `{"vulnerabilities":[{"cveID":"CVE-2026-9999","vulnerabilityName":"Test vuln","shortDescription":"Known exploited issue","dateAdded":"2026-01-01","knownRansomwareCampaign":true}]}` + case "/interpol": + body = `{"_embedded":{"notices":[{"forename":"Jane","name":"Doe","issuing_entity":"Interpol","place_of_birth":"Paris","nationalities":["FR"],"_links":{"self":{"href":"https://ws-public.interpol.int/notices/v1/red/123"}}}]}}` + default: + return &http.Response{StatusCode: 404, Body: io.NopCloser(strings.NewReader("not found")), Header: make(http.Header)}, nil + } + return &http.Response{StatusCode: 200, Body: io.NopCloser(strings.NewReader(body)), Header: make(http.Header)}, nil + }), + }) + } + if err := runner.Run(context.Background(), cfg); err != nil { + t.Fatal(err) + } + + rawAlerts, err := os.ReadFile(cfg.OutputPath) + if err != nil { + t.Fatal(err) + } + var alerts []model.Alert + if err := json.Unmarshal(rawAlerts, &alerts); err != nil { + t.Fatal(err) + } + if len(alerts) == 0 { + t.Fatalf("expected active alerts, got %#v", alerts) + } + + rawHealth, err := os.ReadFile(cfg.SourceHealthOutputPath) + if err != nil { + t.Fatal(err) + } + var health model.SourceHealthDocument + if err := json.Unmarshal(rawHealth, &health); err != nil { + t.Fatal(err) + } + if health.TotalSources != 4 { + t.Fatalf("expected 4 sources in health document, got %d", health.TotalSources) + } +} + +type roundTripFunc func(*http.Request) (*http.Response, error) + +func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return fn(req) +} diff --git a/internal/collector/state/state.go b/internal/collector/state/state.go new file mode 100644 index 0000000..b74ead7 --- /dev/null +++ b/internal/collector/state/state.go @@ -0,0 +1,80 @@ +package state + +import ( + "encoding/json" + "os" + "sort" + "time" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/model" +) + +func Read(path string) []model.Alert { + data, err := os.ReadFile(path) + if err != nil { + return nil + } + var alerts []model.Alert + if err := json.Unmarshal(data, &alerts); err != nil { + return nil + } + return alerts +} + +func Reconcile(cfg config.Config, active []model.Alert, filtered []model.Alert, previous []model.Alert, now time.Time) ([]model.Alert, []model.Alert, []model.Alert) { + nowISO := now.UTC().Format(time.RFC3339) + retentionCutoff := now.Add(-time.Duration(cfg.RemovedRetentionDays) * 24 * time.Hour) + previousByID := map[string]model.Alert{} + presentByID := map[string]struct{}{} + for _, alert := range previous { + previousByID[alert.AlertID] = alert + } + for _, alert := range append(append([]model.Alert{}, active...), filtered...) { + presentByID[alert.AlertID] = struct{}{} + } + + currentActive := make([]model.Alert, 0, len(active)) + for _, alert := range active { + if prev, ok := previousByID[alert.AlertID]; ok && prev.FirstSeen != "" { + alert.FirstSeen = prev.FirstSeen + } + alert.Status = "active" + alert.LastSeen = nowISO + currentActive = append(currentActive, alert) + } + + currentFiltered := make([]model.Alert, 0, len(filtered)) + for _, alert := range filtered { + if prev, ok := previousByID[alert.AlertID]; ok && prev.FirstSeen != "" { + alert.FirstSeen = prev.FirstSeen + } + alert.Status = "filtered" + alert.LastSeen = nowISO + currentFiltered = append(currentFiltered, alert) + } + + removed := []model.Alert{} + for _, prev := range previous { + if _, ok := presentByID[prev.AlertID]; ok { + continue + } + if prev.Status == "removed" { + lastSeen, err := time.Parse(time.RFC3339, prev.LastSeen) + if err == nil && !lastSeen.Before(retentionCutoff) { + removed = append(removed, prev) + } + continue + } + if prev.Status == "filtered" { + continue + } + prev.Status = "removed" + prev.LastSeen = nowISO + removed = append(removed, prev) + } + + fullState := append(append(append([]model.Alert{}, currentActive...), currentFiltered...), removed...) + sort.Slice(fullState, func(i, j int) bool { return fullState[i].LastSeen > fullState[j].LastSeen }) + return currentActive, currentFiltered, fullState +} diff --git a/internal/collector/state/state_test.go b/internal/collector/state/state_test.go new file mode 100644 index 0000000..42217e4 --- /dev/null +++ b/internal/collector/state/state_test.go @@ -0,0 +1,37 @@ +package state + +import ( + "testing" + "time" + + "github.com/scalytics/euosint/internal/collector/config" + "github.com/scalytics/euosint/internal/collector/model" +) + +func TestReconcileCarriesForwardAndRemoves(t *testing.T) { + cfg := config.Default() + now := time.Date(2026, 1, 2, 3, 4, 5, 0, time.UTC) + active := []model.Alert{{AlertID: "a", FirstSeen: now.Add(-time.Hour).Format(time.RFC3339), LastSeen: now.Format(time.RFC3339)}} + filtered := []model.Alert{{AlertID: "b", FirstSeen: now.Add(-2 * time.Hour).Format(time.RFC3339)}} + previous := []model.Alert{ + {AlertID: "a", FirstSeen: now.Add(-24 * time.Hour).Format(time.RFC3339), Status: "active", LastSeen: now.Add(-time.Hour).Format(time.RFC3339)}, + {AlertID: "c", FirstSeen: now.Add(-24 * time.Hour).Format(time.RFC3339), Status: "active", LastSeen: now.Add(-time.Hour).Format(time.RFC3339)}, + } + + currentActive, currentFiltered, fullState := Reconcile(cfg, active, filtered, previous, now) + if currentActive[0].FirstSeen != previous[0].FirstSeen { + t.Fatalf("expected first_seen to carry forward, got %q", currentActive[0].FirstSeen) + } + if currentFiltered[0].Status != "filtered" { + t.Fatalf("expected filtered status, got %q", currentFiltered[0].Status) + } + foundRemoved := false + for _, alert := range fullState { + if alert.AlertID == "c" && alert.Status == "removed" { + foundRemoved = true + } + } + if !foundRemoved { + t.Fatalf("expected removed alert in state %#v", fullState) + } +} diff --git a/package.json b/package.json index e91922d..3db7792 100644 --- a/package.json +++ b/package.json @@ -14,9 +14,12 @@ }, "scripts": { "dev": "vite", - "fetch:alerts": "node scripts/fetch-alerts.mjs", - "fetch:alerts:watch": "node scripts/fetch-alerts.mjs --watch", - "collector:run": "node scripts/continuous-collector.mjs", + "fetch:alerts": "go run ./cmd/euosint-collector", + "fetch:alerts:watch": "go run ./cmd/euosint-collector --watch", + "collector:run": "go run ./cmd/euosint-collector --watch", + "fetch:alerts:legacy": "node scripts/fetch-alerts.mjs", + "fetch:alerts:watch:legacy": "node scripts/fetch-alerts.mjs --watch", + "collector:run:legacy": "node scripts/continuous-collector.mjs", "typecheck": "tsc -b --pretty false", "test": "npm run typecheck", "build": "tsc -b && vite build", From e05d181bc554fd1eaae20c0d52c1c3d10c753523 Mon Sep 17 00:00:00 2001 From: 2pk03 Date: Mon, 16 Mar 2026 09:49:09 +0100 Subject: [PATCH 2/7] feat: retire node collector and switch web runtime to caddy --- .env.example | 3 + .github/workflows/alerts-feed.yml | 15 +- Dockerfile | 10 +- Dockerfile.collector | 7 +- Makefile | 20 +- README.md | 31 +- docker-compose.yml | 42 +- docker/Caddyfile | 40 + docker/collector-entrypoint.sh | 20 + docker/nginx.conf | 11 - docs/collector-migration.md | 10 +- docs/euosint.service | 16 + docs/operations.md | 74 + internal/collector/app/app.go | 1 + internal/collector/config/config.go | 3 + internal/collector/run/run.go | 8 + internal/collector/translate/google.go | 73 + package.json | 5 +- registry/source_registry.json | 2892 ++++++++++++++- scripts/continuous-collector.mjs | 93 - scripts/fetch-alerts.mjs | 4575 ------------------------ 21 files changed, 3148 insertions(+), 4801 deletions(-) create mode 100644 .env.example create mode 100644 docker/Caddyfile create mode 100644 docker/collector-entrypoint.sh delete mode 100644 docker/nginx.conf create mode 100644 docs/euosint.service create mode 100644 docs/operations.md create mode 100644 internal/collector/translate/google.go delete mode 100644 scripts/continuous-collector.mjs delete mode 100644 scripts/fetch-alerts.mjs diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..2084099 --- /dev/null +++ b/.env.example @@ -0,0 +1,3 @@ +EUOSINT_SITE_ADDRESS=:80 +EUOSINT_HTTP_PORT=8080 +EUOSINT_HTTPS_PORT=8443 diff --git a/.github/workflows/alerts-feed.yml b/.github/workflows/alerts-feed.yml index ae195f5..c3dbf3c 100644 --- a/.github/workflows/alerts-feed.yml +++ b/.github/workflows/alerts-feed.yml @@ -22,17 +22,14 @@ jobs: with: fetch-depth: 0 - - name: Setup Node - uses: actions/setup-node@v4 + - name: Setup Go + uses: actions/setup-go@v5 with: - node-version-file: .nvmrc - cache: npm + go-version-file: go.mod + cache: true - - name: Install dependencies - run: npm ci - - - name: Fetch alerts with parity reference collector - run: node scripts/fetch-alerts.mjs + - name: Fetch alerts with Go collector + run: go run ./cmd/euosint-collector env: MAX_PER_SOURCE: "20" OUTPUT_PATH: "public/alerts.json" diff --git a/Dockerfile b/Dockerfile index 0004c4d..7027da3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,11 +8,11 @@ RUN npm install -g npm@11.11.0 && npm ci COPY . . RUN npm run build -FROM nginx:1.27-alpine +FROM caddy:2.10-alpine -COPY docker/nginx.conf /etc/nginx/conf.d/default.conf -COPY --from=build /app/dist /usr/share/nginx/html +COPY docker/Caddyfile /etc/caddy/Caddyfile +COPY --from=build /app/dist /srv -EXPOSE 8080 +EXPOSE 80 443 -CMD ["nginx", "-g", "daemon off;"] +CMD ["caddy", "run", "--config", "/etc/caddy/Caddyfile", "--adapter", "caddyfile"] diff --git a/Dockerfile.collector b/Dockerfile.collector index 40c62b0..bfb8012 100644 --- a/Dockerfile.collector +++ b/Dockerfile.collector @@ -6,6 +6,7 @@ COPY go.mod ./ COPY cmd ./cmd COPY internal ./internal COPY registry ./registry +COPY public ./public RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /out/euosint-collector ./cmd/euosint-collector @@ -17,5 +18,9 @@ WORKDIR /app COPY --from=build /out/euosint-collector /usr/local/bin/euosint-collector COPY registry ./registry +COPY public ./public-defaults +COPY docker/collector-entrypoint.sh /usr/local/bin/collector-entrypoint.sh -ENTRYPOINT ["euosint-collector"] +RUN chmod +x /usr/local/bin/collector-entrypoint.sh + +ENTRYPOINT ["/usr/local/bin/collector-entrypoint.sh"] diff --git a/Makefile b/Makefile index 7d0f9e4..444d9bb 100644 --- a/Makefile +++ b/Makefile @@ -22,8 +22,9 @@ BRANCH ?= main RELEASE_LEVEL ?= patch .PHONY: help check check-commit install clean lint typecheck test build ci \ - go-fmt go-test go-race go-cover go-vet go-codeql commit-check \ + go-fmt go-test go-race go-cover go-vet go-codeql collector-parity commit-check \ docker-build docker-up docker-down docker-logs docker-shell \ + dev-start dev-stop dev-restart dev-logs \ code-ql code-ql-summary \ release-patch release-minor release-major \ branch-protection @@ -113,6 +114,23 @@ docker-logs: ## Tail Docker logs docker-shell: ## Open a shell in the running container $(DOCKER_COMPOSE) exec euosint sh +dev-start: ## Start the local HTTP dev stack on localhost + $(DOCKER_COMPOSE) up --build -d + @echo "EUOSINT available at http://localhost:$${EUOSINT_HTTP_PORT:-8080}" + @open "http://localhost:$${EUOSINT_HTTP_PORT:-8080}" + +dev-stop: ## Stop the local dev stack + $(DOCKER_COMPOSE) down --remove-orphans + +dev-restart: ## Restart the local dev stack + $(DOCKER_COMPOSE) down --remove-orphans + $(DOCKER_COMPOSE) up --build -d + @echo "EUOSINT available at http://localhost:$${EUOSINT_HTTP_PORT:-8080}" + @open "http://localhost:$${EUOSINT_HTTP_PORT:-8080}" + +dev-logs: ## Tail local dev stack logs + $(DOCKER_COMPOSE) logs -f --tail=200 + code-ql: ## Run CodeQL CLI locally for JavaScript/TypeScript @command -v codeql >/dev/null 2>&1 || { echo "codeql CLI is required"; exit 1; } rm -rf $(CODEQL_JS_DB) diff --git a/README.md b/README.md index fff4868..2a486c4 100644 --- a/README.md +++ b/README.md @@ -12,13 +12,28 @@ This project is based on the work of `cyberdude88/osint-siem` and the downstream ## Run With Docker ```bash -docker-compose up --build +if command -v docker-compose >/dev/null 2>&1; then + docker-compose up --build +else + docker compose up --build +fi ``` The application will be available at `http://localhost:8080`. +You can also use the Make targets for local HTTP development: + +```bash +make dev-start +make dev-stop +make dev-restart +make dev-logs +``` + - The release pipeline now builds two images: a web image and a Go collector image. -- The scheduled feed refresh workflow still uses the legacy Node collector until the Go collector reaches parity with the reference implementation. +- The scheduled feed refresh workflow now runs the Go collector. +- The web image now uses Caddy instead of nginx, with the collector output mounted into the web container at runtime. +- In Docker dev mode, the collector seeds the shared feed volume with the repository snapshots first, then replaces them with live output on the first successful run. ## Run Locally Without Docker @@ -41,14 +56,6 @@ Tuning examples: INTERVAL_MS=120000 MAX_PER_SOURCE=80 npm run collector:run ``` -Legacy reference collector commands remain available during parity work: - -```bash -npm run fetch:alerts:legacy -npm run fetch:alerts:watch:legacy -npm run collector:run:legacy -``` - ## Operations ```bash @@ -61,10 +68,12 @@ make docker-build - `.github/workflows/branch-protection.yml` applies protection to `main` using the `ADMIN_GITHUB_TOKEN` repository secret. - Docker validation runs through `buildx`, and release images publish to GHCR on semver tags. - Release images are published as `ghcr.io//-web` and `ghcr.io//-collector`. +- `docker-compose up --build` or `docker compose up --build` now runs the Go collector as a background refresh service and serves the generated JSON through the Caddy web container. +- VM/domain deployment instructions live in [docs/operations.md](/Users/alo/Development/scalytics/EUOSINT/docs/operations.md). ## Notes - Local toolchain is pinned to Node `25.8.1` and npm `11.11.0` via `package.json`, `.nvmrc`, and `.node-version`. -- The Go collector is the target backend, but `scripts/fetch-alerts.mjs` remains the operational parity reference until the Go output is validated against it. +- The Go collector is now the operational backend for scheduled feed refreshes, Docker runtime, and local commands. - The imported application still reflects upstream geographic coverage and source selection; EU-specific source tuning is a follow-up change. - The root `LICENSE` applies to repository-local materials and modifications added here. Upstream repository metadata should be reviewed separately for inherited code provenance. diff --git a/docker-compose.yml b/docker-compose.yml index 4caf5f5..6f25178 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,48 @@ services: + collector: + build: + context: . + dockerfile: Dockerfile.collector + command: + - --watch + - --registry + - /app/registry/source_registry.json + - --output + - /data/alerts.json + - --filtered-output + - /data/alerts-filtered.json + - --state-output + - /data/alerts-state.json + - --source-health-output + - /data/source-health.json + environment: + INTERVAL_MS: "900000" + MAX_PER_SOURCE: "20" + MISSING_PERSON_RELEVANCE_THRESHOLD: "0" + FAIL_ON_CRITICAL_SOURCE_GAP: "0" + CRITICAL_SOURCE_PREFIXES: "interpol-red,interpol-yellow" + volumes: + - feed-data:/data + restart: unless-stopped + euosint: build: context: . dockerfile: Dockerfile + depends_on: + - collector + environment: + EUOSINT_SITE_ADDRESS: "${EUOSINT_SITE_ADDRESS:-:80}" ports: - - "8080:8080" + - "${EUOSINT_HTTP_PORT:-8080}:80" + - "${EUOSINT_HTTPS_PORT:-8443}:443" + volumes: + - feed-data:/var/lib/euosint:ro + - caddy-data:/data + - caddy-config:/config restart: unless-stopped + +volumes: + feed-data: + caddy-data: + caddy-config: diff --git a/docker/Caddyfile b/docker/Caddyfile new file mode 100644 index 0000000..4f230c1 --- /dev/null +++ b/docker/Caddyfile @@ -0,0 +1,40 @@ +{ + admin off +} + +{$EUOSINT_SITE_ADDRESS::80} { + root * /srv + encode zstd gzip + + header { + Permissions-Policy "camera=(), geolocation=(), microphone=()" + Referrer-Policy "strict-origin-when-cross-origin" + X-Content-Type-Options "nosniff" + X-Frame-Options "DENY" + } + + handle /alerts.json { + root * /var/lib/euosint + file_server + } + + handle /alerts-filtered.json { + root * /var/lib/euosint + file_server + } + + handle /alerts-state.json { + root * /var/lib/euosint + file_server + } + + handle /source-health.json { + root * /var/lib/euosint + file_server + } + + handle { + try_files {path} /index.html + file_server + } +} diff --git a/docker/collector-entrypoint.sh b/docker/collector-entrypoint.sh new file mode 100644 index 0000000..e7f010f --- /dev/null +++ b/docker/collector-entrypoint.sh @@ -0,0 +1,20 @@ +#!/bin/sh +set -eu + +seed_if_missing() { + source_path="$1" + target_path="$2" + + if [ ! -f "$target_path" ] && [ -f "$source_path" ]; then + cp "$source_path" "$target_path" + fi +} + +mkdir -p /data + +seed_if_missing /app/public-defaults/alerts.json /data/alerts.json +seed_if_missing /app/public-defaults/alerts-filtered.json /data/alerts-filtered.json +seed_if_missing /app/public-defaults/alerts-state.json /data/alerts-state.json +seed_if_missing /app/public-defaults/source-health.json /data/source-health.json + +exec euosint-collector "$@" diff --git a/docker/nginx.conf b/docker/nginx.conf deleted file mode 100644 index cac4dcd..0000000 --- a/docker/nginx.conf +++ /dev/null @@ -1,11 +0,0 @@ -server { - listen 8080; - server_name _; - - root /usr/share/nginx/html; - index index.html; - - location / { - try_files $uri $uri/ /index.html; - } -} diff --git a/docs/collector-migration.md b/docs/collector-migration.md index 0bdda37..e226b73 100644 --- a/docs/collector-migration.md +++ b/docs/collector-migration.md @@ -1,8 +1,6 @@ # Collector Migration -The existing Node collector in `scripts/fetch-alerts.mjs` remains the reference implementation until the Go collector reaches output parity. - -Operational rule: scheduled feed generation must stay on the legacy Node collector until parity is explicitly verified. The Go collector can ship in images and local tooling before it becomes the production feed generator. +The collector runtime is now fully Go-based. The Node collector has been retired from operational paths, and scheduled feed generation, Docker runtime, and local commands all run through `cmd/euosint-collector`. ## Goals @@ -26,6 +24,8 @@ Operational rule: scheduled feed generation must stay on the legacy Node collect 5. Output writers for alerts, state, filtered alerts, and source health 6. Watch mode and retry orchestration -## Coexistence Rule +## Outcome -Until the Go collector can reproduce the Node collector outputs for a representative fixture set, production collection stays on Node. +- Source registry remains external in `registry/source_registry.json` +- Scheduled feed generation runs through the Go collector +- Docker runtime runs the Go collector sidecar plus the Caddy-served UI diff --git a/docs/euosint.service b/docs/euosint.service new file mode 100644 index 0000000..6c6d78e --- /dev/null +++ b/docs/euosint.service @@ -0,0 +1,16 @@ +[Unit] +Description=EUOSINT Docker Compose Stack +Requires=docker.service +After=docker.service network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +WorkingDirectory=/opt/euosint +RemainAfterExit=yes +ExecStart=/usr/bin/docker compose up -d --build +ExecStop=/usr/bin/docker compose down --remove-orphans +TimeoutStartSec=0 + +[Install] +WantedBy=multi-user.target diff --git a/docs/operations.md b/docs/operations.md new file mode 100644 index 0000000..9a11c1e --- /dev/null +++ b/docs/operations.md @@ -0,0 +1,74 @@ +# Operations + +## Runtime Model + +The production stack has two containers: + +- `collector`: the Go collector running in watch mode and writing refreshed JSON feeds into a shared Docker volume +- `euosint`: the React bundle served by Caddy, reading the shared JSON volume and serving the UI plus feed files + +The web service no longer uses nginx. Caddy serves the SPA, exposes `/alerts.json`, `/alerts-filtered.json`, `/alerts-state.json`, and `/source-health.json`, and can manage TLS automatically when you give it a real domain. + +## Local Compose + +Copy the example environment file and start the stack: + +```bash +cp .env.example .env +docker compose up --build -d +``` + +If your host only has the legacy plugin installed, use: + +```bash +docker-compose up --build -d +``` + +Default local behavior: + +- HTTP on `http://localhost:8080` +- HTTPS listener mapped to `https://localhost:8443` but not used unless `EUOSINT_SITE_ADDRESS` is changed to a hostname that enables TLS +- The collector seeds the shared feed volume with bundled JSON snapshots if the volume is empty, so the UI has data immediately while the first live refresh runs + +## Domain Setup For A VM + +1. Provision a VM with Docker Engine and Docker Compose available. +2. Point a DNS `A` record for your chosen hostname to the VM public IPv4 address. +3. Open inbound TCP `80` and `443` on the VM firewall and any cloud security group. +4. Copy the repository to the VM. +5. Create a `.env` file in the repo root: + +```dotenv +EUOSINT_SITE_ADDRESS=alerts.example.com +EUOSINT_HTTP_PORT=80 +EUOSINT_HTTPS_PORT=443 +``` + +6. Start the stack: + +```bash +docker compose up --build -d +``` + +With a real domain in `EUOSINT_SITE_ADDRESS`, Caddy will request and renew TLS certificates automatically and store them in the `caddy-data` volume. + +## VM Service With systemd + +Use the checked-in unit at [docs/euosint.service](/Users/alo/Development/scalytics/EUOSINT/docs/euosint.service) so the stack comes back after host reboots: + +Install it on the VM: + +```bash +sudo cp docs/euosint.service /etc/systemd/system/euosint.service +sudo systemctl daemon-reload +sudo systemctl enable --now euosint.service +``` + +If the VM only has `docker-compose`, adjust the unit commands accordingly. + +## Operational Notes + +- The collector writes feed output into the `feed-data` volume shared with the web container. +- TLS state and certificates persist in the `caddy-data` volume. +- Caddy runtime state persists in the `caddy-config` volume. +- Scheduled refreshes, Docker runtime, and local collection commands all run through the Go collector. diff --git a/internal/collector/app/app.go b/internal/collector/app/app.go index 1c3f1a8..b4371fc 100644 --- a/internal/collector/app/app.go +++ b/internal/collector/app/app.go @@ -27,6 +27,7 @@ func Run(ctx context.Context, args []string, stdout io.Writer, stderr io.Writer) fs.Float64Var(&cfg.IncidentRelevanceThreshold, "incident-threshold", cfg.IncidentRelevanceThreshold, "Default relevance threshold for active alerts") fs.Float64Var(&cfg.MissingPersonRelevanceThreshold, "missing-person-threshold", cfg.MissingPersonRelevanceThreshold, "Relevance threshold for missing person alerts") fs.BoolVar(&cfg.FailOnCriticalSourceGap, "fail-on-critical-source-gap", cfg.FailOnCriticalSourceGap, "Fail the run when critical sources fetch zero records") + fs.BoolVar(&cfg.TranslateEnabled, "translate", cfg.TranslateEnabled, "Translate non-Latin RSS titles and summaries to English") if err := fs.Parse(args); err != nil { return err diff --git a/internal/collector/config/config.go b/internal/collector/config/config.go index 73756d5..af5608a 100644 --- a/internal/collector/config/config.go +++ b/internal/collector/config/config.go @@ -38,6 +38,7 @@ type Config struct { HTTPTimeoutMS int MaxResponseBodyBytes int64 UserAgent string + TranslateEnabled bool } func Default() Config { @@ -59,6 +60,7 @@ func Default() Config { HTTPTimeoutMS: defaultTimeoutMS, MaxResponseBodyBytes: defaultMaxBodyBytes, UserAgent: "euosint-bot/1.0", + TranslateEnabled: true, } } @@ -81,6 +83,7 @@ func FromEnv() Config { cfg.HTTPTimeoutMS = envInt("HTTP_TIMEOUT_MS", cfg.HTTPTimeoutMS) cfg.MaxResponseBodyBytes = int64(envInt("MAX_RESPONSE_BODY_BYTES", int(cfg.MaxResponseBodyBytes))) cfg.UserAgent = envString("USER_AGENT", cfg.UserAgent) + cfg.TranslateEnabled = envBool("TRANSLATE_ENABLED", cfg.TranslateEnabled) return cfg } diff --git a/internal/collector/run/run.go b/internal/collector/run/run.go index 0d2c92d..78ef974 100644 --- a/internal/collector/run/run.go +++ b/internal/collector/run/run.go @@ -17,6 +17,7 @@ import ( "github.com/scalytics/euosint/internal/collector/parse" "github.com/scalytics/euosint/internal/collector/registry" "github.com/scalytics/euosint/internal/collector/state" + "github.com/scalytics/euosint/internal/collector/translate" ) type Runner struct { @@ -131,6 +132,13 @@ func (r Runner) fetchRSS(ctx context.Context, client *fetch.Client, nctx normali return nil, err } items := parse.ParseFeed(string(body)) + if nctx.Config.TranslateEnabled { + if translated, err := translate.Batch(ctx, client, items); err == nil { + items = translated + } else { + fmt.Fprintf(r.stderr, "WARN %s: translate batch failed: %v\n", source.Source.AuthorityName, err) + } + } limit := perSourceLimit(nctx.Config, source) out := make([]model.Alert, 0, limit) for _, item := range items { diff --git a/internal/collector/translate/google.go b/internal/collector/translate/google.go new file mode 100644 index 0000000..786091f --- /dev/null +++ b/internal/collector/translate/google.go @@ -0,0 +1,73 @@ +package translate + +import ( + "context" + "encoding/json" + "fmt" + "net/url" + "regexp" + "strings" + + "github.com/scalytics/euosint/internal/collector/fetch" + "github.com/scalytics/euosint/internal/collector/parse" +) + +var nonLatinRE = regexp.MustCompile(`[\p{Han}\p{Hangul}\p{Cyrillic}\p{Arabic}\p{Thai}]`) + +func Batch(ctx context.Context, client *fetch.Client, items []parse.FeedItem) ([]parse.FeedItem, error) { + out := make([]parse.FeedItem, 0, len(items)) + for _, item := range items { + next := item + var err error + if nonLatinRE.MatchString(next.Title) { + next.Title, err = toEnglish(ctx, client, next.Title) + if err != nil { + return nil, err + } + } + if nonLatinRE.MatchString(next.Summary) { + next.Summary, err = toEnglish(ctx, client, next.Summary) + if err != nil { + return nil, err + } + } + out = append(out, next) + } + return out, nil +} + +func toEnglish(ctx context.Context, client *fetch.Client, text string) (string, error) { + text = strings.TrimSpace(text) + if text == "" { + return text, nil + } + endpoint := "https://translate.googleapis.com/translate_a/single?client=gtx&sl=auto&tl=en&dt=t&q=" + url.QueryEscape(text) + body, err := client.Text(ctx, endpoint, true, "application/json") + if err != nil { + return text, err + } + var doc []any + if err := json.Unmarshal(body, &doc); err != nil { + return text, fmt.Errorf("decode translate response: %w", err) + } + first, ok := doc[0].([]any) + if !ok { + return text, nil + } + var builder strings.Builder + for _, segment := range first { + pair, ok := segment.([]any) + if !ok || len(pair) == 0 { + continue + } + value, ok := pair[0].(string) + if ok { + builder.WriteString(value) + } + } + translated := strings.TrimSpace(builder.String()) + if translated == "" { + return text, nil + } + return translated, nil +} diff --git a/package.json b/package.json index 3db7792..cd3847b 100644 --- a/package.json +++ b/package.json @@ -17,16 +17,13 @@ "fetch:alerts": "go run ./cmd/euosint-collector", "fetch:alerts:watch": "go run ./cmd/euosint-collector --watch", "collector:run": "go run ./cmd/euosint-collector --watch", - "fetch:alerts:legacy": "node scripts/fetch-alerts.mjs", - "fetch:alerts:watch:legacy": "node scripts/fetch-alerts.mjs --watch", - "collector:run:legacy": "node scripts/continuous-collector.mjs", "typecheck": "tsc -b --pretty false", "test": "npm run typecheck", "build": "tsc -b && vite build", "lint": "eslint .", "preview": "vite preview", "ci": "npm run lint && npm run test && npm run build", - "docker:run": "docker-compose up --build" + "docker:run": "sh -c 'if command -v docker-compose >/dev/null 2>&1; then docker-compose up --build; else docker compose up --build; fi'" }, "dependencies": { "@types/three": "^0.182.0", diff --git a/registry/source_registry.json b/registry/source_registry.json index 4fe2fe0..c5dc9a9 100644 --- a/registry/source_registry.json +++ b/registry/source_registry.json @@ -1,4 +1,2383 @@ [ + { + "type": "kev-json", + "source": { + "source_id": "cisa-kev", + "authority_name": "CISA", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "cert", + "base_url": "https://www.cisa.gov" + }, + "feed_url": "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json", + "category": "cyber_advisory", + "region_tag": "US", + "lat": 38.88, + "lng": -77.02, + "reporting": { + "label": "Report to CISA", + "url": "https://www.cisa.gov/report", + "notes": "Use 911 for emergencies." + } + }, + { + "type": "rss", + "source": { + "source_id": "fbi", + "authority_name": "FBI", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.fbi.gov" + }, + "feed_url": "https://www.fbi.gov/feeds/fbi-top-stories/rss.xml", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.9, + "lng": -77, + "reporting": { + "label": "Report to FBI", + "url": "https://tips.fbi.gov/", + "phone": "1-800-CALL-FBI (1-800-225-5324)", + "notes": "Use 911 for emergencies." + } + }, + { + "type": "rss", + "source": { + "source_id": "fbi-wanted", + "authority_name": "FBI Wanted", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.fbi.gov" + }, + "feed_url": "https://www.fbi.gov/feeds/all-wanted/rss.xml", + "category": "wanted_suspect", + "region_tag": "US", + "lat": 38.9, + "lng": -77, + "reporting": { + "label": "Submit a Tip to FBI", + "url": "https://tips.fbi.gov/", + "phone": "1-800-CALL-FBI (1-800-225-5324)", + "notes": "Use 911 for emergencies." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "europol", + "authority_name": "Europol", + "country": "Netherlands", + "country_code": "NL", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.europol.europa.eu" + }, + "feed_url": "https://www.europol.europa.eu/rss.xml", + "category": "public_appeal", + "region_tag": "EU", + "lat": 52.09, + "lng": 4.27, + "reporting": { + "label": "Report to Europol", + "url": "https://www.europol.europa.eu/report-a-crime" + } + }, + { + "type": "rss", + "source": { + "source_id": "ncsc-uk", + "authority_name": "NCSC UK", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.ncsc.gov.uk" + }, + "feed_url": "https://www.ncsc.gov.uk/api/1/services/v1/report-rss-feed.xml", + "category": "cyber_advisory", + "region_tag": "GB", + "lat": 51.5, + "lng": -0.13, + "reporting": { + "label": "Report to NCSC", + "url": "https://www.ncsc.gov.uk/section/about-this-website/report-scam-website" + } + }, + { + "type": "rss", + "source": { + "source_id": "ncsc-uk-all", + "authority_name": "NCSC UK", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.ncsc.gov.uk" + }, + "feed_url": "https://www.ncsc.gov.uk/api/1/services/v1/all-rss-feed.xml", + "category": "cyber_advisory", + "region_tag": "GB", + "lat": 51.51, + "lng": -0.1, + "reporting": { + "label": "Report to NCSC", + "url": "https://www.ncsc.gov.uk/section/about-this-website/report-scam-website" + } + }, + { + "type": "rss", + "source": { + "source_id": "nz-police-news", + "authority_name": "NZ Police", + "country": "New Zealand", + "country_code": "NZ", + "region": "Oceania", + "authority_type": "police", + "base_url": "https://www.police.govt.nz" + }, + "feed_url": "https://www.police.govt.nz/rss/news", + "category": "public_safety", + "region_tag": "NZ", + "lat": -41.29, + "lng": 174.78, + "reporting": { + "label": "Report to NZ Police", + "url": "https://www.police.govt.nz/use-105", + "phone": "111 (Emergency) / 105 (Non-emergency)" + } + }, + { + "type": "rss", + "source": { + "source_id": "nz-police-alerts", + "authority_name": "NZ Police", + "country": "New Zealand", + "country_code": "NZ", + "region": "Oceania", + "authority_type": "police", + "base_url": "https://www.police.govt.nz" + }, + "feed_url": "https://www.police.govt.nz/rss/alerts", + "category": "public_appeal", + "region_tag": "NZ", + "lat": -41.29, + "lng": 174.78, + "reporting": { + "label": "Report to NZ Police", + "url": "https://www.police.govt.nz/use-105", + "phone": "111 (Emergency) / 105 (Non-emergency)" + } + }, + { + "type": "rss", + "source": { + "source_id": "ncmec", + "authority_name": "NCMEC", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "public_safety_program", + "base_url": "https://www.missingkids.org" + }, + "feed_url": "https://api.missingkids.org/missingkids/servlet/XmlServlet?LanguageCountry=en_US&act=rss&orgPrefix=NCMC", + "category": "missing_person", + "region_tag": "US", + "lat": 39.83, + "lng": -98.58, + "reporting": { + "label": "Report to NCMEC", + "url": "https://report.cybertip.org/", + "phone": "1-800-THE-LOST (1-800-843-5678)", + "notes": "Use 911 for immediate danger." + } + }, + { + "type": "rss", + "source": { + "source_id": "cis-msisac", + "authority_name": "CIS MS-ISAC", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "cert", + "base_url": "https://www.cisecurity.org" + }, + "feed_url": "https://www.cisecurity.org/feed/advisories", + "category": "cyber_advisory", + "region_tag": "US", + "lat": 42.65, + "lng": -73.76, + "reporting": { + "label": "Report to MS-ISAC", + "url": "https://www.cisecurity.org/ms-isac/services/soc", + "phone": "1-866-787-4722", + "email": "soc@cisecurity.org", + "notes": "24/7 Security Operations Center for state, local, tribal, and territorial governments." + } + }, + { + "type": "rss", + "source": { + "source_id": "ca-oag", + "authority_name": "California AG", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://oag.ca.gov" + }, + "feed_url": "https://oag.ca.gov/news/feed", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.58, + "lng": -121.49, + "reporting": { + "label": "Report to CA Attorney General", + "url": "https://oag.ca.gov/contact/consumer-complaint-against-business-or-company", + "phone": "1-800-952-5225" + } + }, + { + "type": "rss", + "source": { + "source_id": "cert-fr", + "authority_name": "CERT-FR", + "country": "France", + "country_code": "FR", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.cert.ssi.gouv.fr" + }, + "feed_url": "https://www.cert.ssi.gouv.fr/feed/", + "category": "cyber_advisory", + "region_tag": "FR", + "lat": 48.86, + "lng": 2.35, + "reporting": { + "label": "Report to CERT-FR", + "url": "https://www.cert.ssi.gouv.fr/contact/", + "email": "cert-fr@ssi.gouv.fr" + } + }, + { + "type": "rss", + "source": { + "source_id": "ncsc-nl", + "authority_name": "NCSC-NL", + "country": "Netherlands", + "country_code": "NL", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://advisories.ncsc.nl" + }, + "feed_url": "https://advisories.ncsc.nl/rss/advisories", + "category": "cyber_advisory", + "region_tag": "NL", + "lat": 52.07, + "lng": 4.3, + "reporting": { + "label": "Report to NCSC-NL", + "url": "https://www.ncsc.nl/contact/kwetsbaarheid-melden", + "email": "cert@ncsc.nl" + } + }, + { + "type": "rss", + "source": { + "source_id": "jpcert", + "authority_name": "JPCERT/CC", + "country": "Japan", + "country_code": "JP", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.jpcert.or.jp" + }, + "feed_url": "https://www.jpcert.or.jp/english/rss/jpcert-en.rdf", + "category": "cyber_advisory", + "region_tag": "JP", + "lat": 35.68, + "lng": 139.69, + "reporting": { + "label": "Report to JPCERT/CC", + "url": "https://www.jpcert.or.jp/english/ir/form.html", + "email": "info@jpcert.or.jp" + } + }, + { + "type": "rss", + "source": { + "source_id": "policia-colombia", + "authority_name": "Policía Nacional de Colombia", + "country": "Colombia", + "country_code": "CO", + "region": "South America", + "authority_type": "police", + "base_url": "https://www.policia.gov.co" + }, + "feed_url": "https://www.policia.gov.co/rss.xml", + "category": "public_appeal", + "region_tag": "CO", + "lat": 4.71, + "lng": -74.07, + "reporting": { + "label": "Report to Policía Nacional", + "url": "https://www.policia.gov.co/denuncia-virtual", + "phone": "123 (Emergency) / 112 (Línea única)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "cisa-alerts", + "authority_name": "CISA Alerts", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "cert", + "base_url": "https://www.cisa.gov" + }, + "feed_url": "https://www.cisa.gov/cybersecurity-advisories/all.xml", + "category": "cyber_advisory", + "region_tag": "US", + "lat": 38.89, + "lng": -77.03, + "reporting": { + "label": "Report to CISA", + "url": "https://www.cisa.gov/report", + "phone": "1-888-282-0870", + "email": "central@cisa.dhs.gov", + "notes": "Use 911 for emergencies." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "dhs", + "authority_name": "DHS", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "national_security", + "base_url": "https://www.dhs.gov" + }, + "feed_url": "https://www.dhs.gov/news/rss.xml", + "category": "public_safety", + "region_tag": "US", + "lat": 38.886, + "lng": -77.015, + "reporting": { + "label": "Report to DHS", + "url": "https://www.dhs.gov/see-something-say-something/how-to-report-suspicious-activity", + "phone": "1-866-347-2423" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "usss", + "authority_name": "US Secret Service", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.secretservice.gov" + }, + "feed_url": "https://www.secretservice.gov/rss.xml", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.899, + "lng": -77.034, + "reporting": { + "label": "Report to Secret Service", + "url": "https://www.secretservice.gov/contact", + "phone": "1-202-406-5708" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "dea", + "authority_name": "DEA", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.dea.gov" + }, + "feed_url": "https://www.dea.gov/press-releases/rss.xml", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.871, + "lng": -77.053, + "reporting": { + "label": "Report to DEA", + "url": "https://www.dea.gov/submit-tip", + "phone": "1-877-792-2873" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "atf", + "authority_name": "ATF", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.atf.gov" + }, + "feed_url": "https://www.atf.gov/news/rss.xml", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.893, + "lng": -77.025, + "reporting": { + "label": "Report to ATF", + "url": "https://www.atf.gov/contact/atf-tips", + "phone": "1-888-283-8477", + "email": "atftips@atf.gov" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "usms", + "authority_name": "US Marshals", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.usmarshals.gov" + }, + "feed_url": "https://www.usmarshals.gov/news/news-releases.rss", + "category": "wanted_suspect", + "region_tag": "US", + "lat": 38.895, + "lng": -77.021, + "reporting": { + "label": "Report to US Marshals", + "url": "https://www.usmarshals.gov/tips", + "phone": "1-877-926-8332" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "nca-uk", + "authority_name": "NCA UK", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.nationalcrimeagency.gov.uk" + }, + "feed_url": "https://nationalcrimeagency.gov.uk/news?format=feed&type=rss", + "category": "public_appeal", + "region_tag": "GB", + "lat": 51.49, + "lng": -0.11, + "reporting": { + "label": "Report to NCA", + "url": "https://www.nationalcrimeagency.gov.uk/what-we-do/crime-threats/cyber-crime/reporting-cyber-crime", + "phone": "0370 496 7622" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "gmp-uk", + "authority_name": "Greater Manchester Police", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.gmp.police.uk" + }, + "feed_url": "https://www.gmp.police.uk/news/greater-manchester/rss/", + "category": "public_appeal", + "region_tag": "GB", + "lat": 53.48, + "lng": -2.24, + "reporting": { + "label": "Report to GMP", + "url": "https://www.gmp.police.uk/ro/report/", + "phone": "999 (Emergency) / 101 (Non-emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "met-police-uk", + "authority_name": "Met Police UK", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "police", + "base_url": "https://news.met.police.uk" + }, + "feed_url": "https://news.met.police.uk/feeds/rss", + "category": "public_appeal", + "region_tag": "GB", + "lat": 51.51, + "lng": -0.14, + "reporting": { + "label": "Report to Met Police", + "url": "https://www.met.police.uk/ro/report/", + "phone": "999 (Emergency) / 101 (Non-emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "bsi-de", + "authority_name": "BSI Germany", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "cert", + "base_url": "https://www.bsi.bund.de" + }, + "feed_url": "https://www.bsi.bund.de/SiteGlobals/Functions/RSSFeed/RSSNewsfeed/RSSNewsfeed.xml", + "category": "cyber_advisory", + "region_tag": "DE", + "lat": 50.73, + "lng": 7.1, + "reporting": { + "label": "Report to BSI", + "url": "https://www.bsi.bund.de/EN/Service-Navi/Contact/contact_node.html", + "email": "certbund@bsi.bund.de" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "bka-de", + "authority_name": "BKA Germany", + "country": "Germany", + "country_code": "DE", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.bka.de" + }, + "feed_url": "https://www.bka.de/SharedDocs/Kurzmeldungen/DE/Warnhinweise/RSS/BKA_Pressemitteilungen_RSS.xml", + "category": "wanted_suspect", + "region_tag": "DE", + "lat": 50.12, + "lng": 8.68, + "reporting": { + "label": "Report to BKA", + "url": "https://www.bka.de/DE/KontaktAufnehmen/Hinweisportal/hinweisportal_node.html", + "phone": "+49 611 55-0" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "acsc-au", + "authority_name": "ACSC Australia", + "country": "Australia", + "country_code": "AU", + "region": "Oceania", + "authority_type": "cert", + "base_url": "https://www.cyber.gov.au" + }, + "feed_url": "https://www.cyber.gov.au/advisories/feed", + "feed_urls": [ + "https://www.cyber.gov.au/advisories/feed", + "https://www.cyber.gov.au/about-us/advisories/rss.xml", + "https://www.cyber.gov.au/alerts/feed" + ], + "category": "cyber_advisory", + "region_tag": "AU", + "lat": -35.28, + "lng": 149.13, + "reporting": { + "label": "Report to ACSC", + "url": "https://www.cyber.gov.au/report-and-recover/report", + "phone": "1300 292 371" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "afp-au", + "authority_name": "AFP Australia", + "country": "Australia", + "country_code": "AU", + "region": "Oceania", + "authority_type": "police", + "base_url": "https://www.afp.gov.au" + }, + "feed_url": "https://www.afp.gov.au/news-centre/media-releases/rss.xml", + "feed_urls": [ + "https://www.afp.gov.au/news-centre/media-releases/rss.xml", + "https://www.afp.gov.au/news-centre/media-release/rss.xml", + "https://www.afp.gov.au/news-centre/media-releases/feed" + ], + "category": "public_appeal", + "region_tag": "AU", + "lat": -35.31, + "lng": 149.14, + "reporting": { + "label": "Report to AFP", + "url": "https://www.afp.gov.au/what-we-do/crime-types/report-crime", + "phone": "131 237" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "qps-au", + "authority_name": "Queensland Police", + "country": "Australia", + "country_code": "AU", + "region": "Oceania", + "authority_type": "police", + "base_url": "https://mypolice.qld.gov.au" + }, + "feed_url": "https://mypolice.qld.gov.au/feed/", + "feed_urls": [ + "https://mypolice.qld.gov.au/feed/", + "https://mypolice.qld.gov.au/category/alert/feed/", + "https://mypolice.qld.gov.au/category/my-police-news/feed/" + ], + "category": "public_appeal", + "region_tag": "AU", + "lat": -27.47, + "lng": 153.03, + "reporting": { + "label": "Report to Queensland Police", + "url": "https://www.police.qld.gov.au/policelink-reporting", + "phone": "000 (Emergency) / 131 444 (Policelink)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "nsw-police-au", + "authority_name": "NSW Police", + "country": "Australia", + "country_code": "AU", + "region": "Oceania", + "authority_type": "police", + "base_url": "https://www.police.nsw.gov.au" + }, + "feed_url": "https://www.police.nsw.gov.au/news/rss", + "feed_urls": [ + "https://www.police.nsw.gov.au/news/rss", + "https://www.police.nsw.gov.au/rss/news", + "https://www.police.nsw.gov.au/news/feed" + ], + "category": "public_appeal", + "region_tag": "AU", + "lat": -33.87, + "lng": 151.21, + "reporting": { + "label": "Report to NSW Police", + "url": "https://portal.police.nsw.gov.au/s/online-services", + "phone": "000 (Emergency) / 131 444 (Police Assistance Line)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "cccs-ca", + "authority_name": "Canada Cyber Centre", + "country": "Canada", + "country_code": "CA", + "region": "North America", + "authority_type": "cert", + "base_url": "https://www.cyber.gc.ca" + }, + "feed_url": "https://www.cyber.gc.ca/en/alerts-advisories/feed", + "category": "cyber_advisory", + "region_tag": "CA", + "lat": 45.42, + "lng": -75.69, + "reporting": { + "label": "Report to Cyber Centre", + "url": "https://www.cyber.gc.ca/en/incident-management", + "email": "contact@cyber.gc.ca", + "phone": "1-833-292-3722" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "rcmp-ca", + "authority_name": "RCMP Canada", + "country": "Canada", + "country_code": "CA", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.rcmp-grc.gc.ca" + }, + "feed_url": "https://www.rcmp-grc.gc.ca/en/news/rss", + "category": "public_appeal", + "region_tag": "CA", + "lat": 45.4, + "lng": -75.7, + "reporting": { + "label": "Report to RCMP", + "url": "https://www.rcmp-grc.gc.ca/en/report-information-online", + "phone": "1-800-771-5401" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "cnp-es", + "authority_name": "Policía Nacional Spain", + "country": "Spain", + "country_code": "ES", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.policia.es" + }, + "feed_url": "https://www.policia.es/rss/rss_prensa.xml", + "category": "public_appeal", + "region_tag": "ES", + "lat": 40.42, + "lng": -3.7, + "reporting": { + "label": "Report to Policía Nacional", + "url": "https://www.policia.es/colabora.php", + "phone": "091" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "cert-in", + "authority_name": "CERT-In", + "country": "India", + "country_code": "IN", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.cert-in.org.in" + }, + "feed_url": "https://www.cert-in.org.in/s2cMainServlet?pageid=RSSFEED", + "category": "cyber_advisory", + "region_tag": "IN", + "lat": 28.61, + "lng": 77.21, + "reporting": { + "label": "Report to CERT-In", + "url": "https://www.cert-in.org.in/", + "email": "incident@cert-in.org.in", + "phone": "+91-11-24368572" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "singcert", + "authority_name": "SingCERT", + "country": "Singapore", + "country_code": "SG", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.csa.gov.sg" + }, + "feed_url": "https://www.csa.gov.sg/singcert/Alerts/rss", + "feed_urls": [ + "https://www.csa.gov.sg/singcert/Alerts/rss", + "https://www.csa.gov.sg/alerts-and-advisories/alerts/rss", + "https://www.csa.gov.sg/alerts-and-advisories/advisories/rss" + ], + "category": "cyber_advisory", + "region_tag": "SG", + "lat": 1.29, + "lng": 103.85, + "reporting": { + "label": "Report to SingCERT", + "url": "https://www.csa.gov.sg/singcert/reporting", + "email": "singcert@csa.gov.sg", + "phone": "+65 6323 5052" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "spf-sg", + "authority_name": "Singapore Police", + "country": "Singapore", + "country_code": "SG", + "region": "Asia", + "authority_type": "police", + "base_url": "https://www.police.gov.sg" + }, + "feed_url": "https://www.police.gov.sg/media-room/news/feed", + "feed_urls": [ + "https://www.police.gov.sg/media-room/news/feed", + "https://www.police.gov.sg/rss", + "https://www.police.gov.sg/media-room/news/rss.xml" + ], + "category": "public_appeal", + "region_tag": "SG", + "lat": 1.31, + "lng": 103.84, + "reporting": { + "label": "Report to Singapore Police", + "url": "https://eservices.police.gov.sg/content/policehubhome/homepage/police-report.html", + "phone": "999 (Emergency) / 1800-255-0000 (Police Hotline)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "hkcert", + "authority_name": "HKCERT", + "country": "Hong Kong", + "country_code": "HK", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.hkcert.org" + }, + "feed_url": "https://www.hkcert.org/rss", + "category": "cyber_advisory", + "region_tag": "HK", + "lat": 22.32, + "lng": 114.17, + "reporting": { + "label": "Report to HKCERT", + "url": "https://www.hkcert.org/report-incident", + "email": "hkcert@hkcert.org", + "phone": "+852 8105 6060" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "saps-za", + "authority_name": "SAPS South Africa", + "country": "South Africa", + "country_code": "ZA", + "region": "Africa", + "authority_type": "police", + "base_url": "https://www.saps.gov.za" + }, + "feed_url": "https://www.saps.gov.za/newsroom/rss.php", + "category": "public_appeal", + "region_tag": "ZA", + "lat": -25.75, + "lng": 28.19, + "reporting": { + "label": "Report to SAPS", + "url": "https://www.saps.gov.za/resource_centre/contacts/contacts.php", + "phone": "10111 (Emergency) / 08600 10111 (Crime Stop)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "missing-children-za", + "authority_name": "Missing Children South Africa", + "country": "South Africa", + "country_code": "ZA", + "region": "Africa", + "authority_type": "public_safety_program", + "base_url": "https://missingchildren.org.za" + }, + "feed_url": "https://missingchildren.org.za/feed/", + "feed_urls": [ + "https://missingchildren.org.za/feed/", + "https://missingchildren.org.za/category/missing-children/feed/", + "https://missingchildren.org.za/category/cases/feed/" + ], + "category": "missing_person", + "region_tag": "ZA", + "lat": -29, + "lng": 24, + "reporting": { + "label": "Report to Missing Children SA", + "url": "https://missingchildren.org.za/report/", + "phone": "+27 72 647 7464", + "notes": "Coordinate directly with SAPS in emergency situations." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "crimestoppers-uk", + "authority_name": "Crimestoppers UK", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "public_safety_program", + "base_url": "https://www.crimestoppers-uk.org" + }, + "feed_url": "https://www.crimestoppers-uk.org/give-information/latest-news-feeds/rss", + "category": "public_appeal", + "region_tag": "GB", + "lat": 51.52, + "lng": -0.08, + "reporting": { + "label": "Report to Crimestoppers", + "url": "https://crimestoppers-uk.org/give-information", + "phone": "0800 555 111", + "notes": "100% anonymous. You can also report online." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "npa-jp", + "authority_name": "Japan NPA", + "country": "Japan", + "country_code": "JP", + "region": "Asia", + "authority_type": "police", + "base_url": "https://www.npa.go.jp" + }, + "feed_url": "https://www.npa.go.jp/rss/index.xml", + "category": "public_safety", + "region_tag": "JP", + "lat": 35.69, + "lng": 139.75, + "reporting": { + "label": "Report to NPA Japan", + "url": "https://www.npa.go.jp/english/index.html", + "phone": "110 (Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "gendarmerie-fr", + "authority_name": "Gendarmerie France", + "country": "France", + "country_code": "FR", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.gendarmerie.interieur.gouv.fr" + }, + "feed_url": "https://www.gendarmerie.interieur.gouv.fr/rss", + "category": "public_appeal", + "region_tag": "FR", + "lat": 48.85, + "lng": 2.3, + "reporting": { + "label": "Report to Gendarmerie", + "url": "https://www.pre-plainte-en-ligne.gouv.fr/", + "phone": "17 (Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "polisen-se", + "authority_name": "Polisen Sweden", + "country": "Sweden", + "country_code": "SE", + "region": "Europe", + "authority_type": "police", + "base_url": "https://polisen.se" + }, + "feed_url": "https://polisen.se/aktuellt/rss/hela-landet/", + "category": "public_appeal", + "region_tag": "SE", + "lat": 59.33, + "lng": 18.07, + "reporting": { + "label": "Report to Polisen", + "url": "https://polisen.se/en/victims-of-crime/report-a-crime-online/", + "phone": "112 (Emergency) / 114 14 (Non-emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "politiet-no", + "authority_name": "Politiet Norway", + "country": "Norway", + "country_code": "NO", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.politiet.no" + }, + "feed_url": "https://www.politiet.no/rss/", + "category": "public_appeal", + "region_tag": "NO", + "lat": 59.91, + "lng": 10.75, + "reporting": { + "label": "Report to Politiet", + "url": "https://www.politiet.no/en/services/report-an-offence/", + "phone": "112 (Emergency) / 02800 (Non-emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "pf-br", + "authority_name": "Polícia Federal Brazil", + "country": "Brazil", + "country_code": "BR", + "region": "South America", + "authority_type": "police", + "base_url": "https://www.gov.br/pf" + }, + "feed_url": "https://www.gov.br/pf/pt-br/assuntos/noticias/@@rss", + "feed_urls": [ + "https://www.gov.br/pf/pt-br/assuntos/noticias/@@rss", + "https://www.gov.br/pf/pt-br/rss", + "https://www.gov.br/pf/pt-br/@@search?sort_on=Date&Subject:list=noticias&b_size=100&format=rss" + ], + "category": "public_appeal", + "region_tag": "BR", + "lat": -15.79, + "lng": -47.88, + "reporting": { + "label": "Report to Polícia Federal", + "url": "https://www.gov.br/pf/pt-br/canais_atendimento/denuncia", + "phone": "190 (Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "carabineros-cl", + "authority_name": "Carabineros Chile", + "country": "Chile", + "country_code": "CL", + "region": "South America", + "authority_type": "police", + "base_url": "https://www.carabineros.cl" + }, + "feed_url": "https://www.carabineros.cl/feed/", + "feed_urls": [ + "https://www.carabineros.cl/feed/", + "https://www.carabineros.cl/rss", + "https://www.carabineros.cl/index.php/feed/" + ], + "category": "public_appeal", + "region_tag": "CL", + "lat": -33.45, + "lng": -70.67, + "reporting": { + "label": "Report to Carabineros", + "url": "https://www.carabineros.cl/", + "phone": "133 (Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "pnp-pe", + "authority_name": "Policía Nacional del Perú", + "country": "Peru", + "country_code": "PE", + "region": "South America", + "authority_type": "police", + "base_url": "https://www.policia.gob.pe" + }, + "feed_url": "https://www.policia.gob.pe/feed/", + "feed_urls": [ + "https://www.policia.gob.pe/feed/", + "https://www.policia.gob.pe/rss", + "https://www.gob.pe/institucion/pnp/noticias.rss" + ], + "category": "public_appeal", + "region_tag": "PE", + "lat": -12.05, + "lng": -77.04, + "reporting": { + "label": "Report to PNP Peru", + "url": "https://www.policia.gob.pe/denuncia/", + "phone": "105 (Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "policia-ec", + "authority_name": "Policía Nacional Ecuador", + "country": "Ecuador", + "country_code": "EC", + "region": "South America", + "authority_type": "police", + "base_url": "https://www.policia.gob.ec" + }, + "feed_url": "https://www.policia.gob.ec/feed/", + "feed_urls": [ + "https://www.policia.gob.ec/feed/", + "https://www.policia.gob.ec/rss", + "https://www.policia.gob.ec/category/noticias/feed/" + ], + "category": "public_appeal", + "region_tag": "EC", + "lat": -0.18, + "lng": -78.47, + "reporting": { + "label": "Report to Policía Ecuador", + "url": "https://www.policia.gob.ec/servicios/", + "phone": "911 (Emergency) / 1800-DELITO" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "policia-bo", + "authority_name": "Policía Boliviana", + "country": "Bolivia", + "country_code": "BO", + "region": "South America", + "authority_type": "police", + "base_url": "https://www.policia.bo" + }, + "feed_url": "https://www.policia.bo/feed/", + "feed_urls": [ + "https://www.policia.bo/feed/", + "https://www.policia.bo/rss", + "https://www.policia.bo/category/noticias/feed/" + ], + "category": "public_appeal", + "region_tag": "BO", + "lat": -16.5, + "lng": -68.15, + "reporting": { + "label": "Report to Policía Boliviana", + "url": "https://www.policia.bo/", + "phone": "110 (Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "policia-py", + "authority_name": "Policía Nacional Paraguay", + "country": "Paraguay", + "country_code": "PY", + "region": "South America", + "authority_type": "police", + "base_url": "https://www.policianacional.gov.py" + }, + "feed_url": "https://www.policianacional.gov.py/feed/", + "feed_urls": [ + "https://www.policianacional.gov.py/feed/", + "https://www.policianacional.gov.py/rss", + "https://www.policianacional.gov.py/category/noticias/feed/" + ], + "category": "public_appeal", + "region_tag": "PY", + "lat": -25.29, + "lng": -57.64, + "reporting": { + "label": "Report to Policía Paraguay", + "url": "https://www.policianacional.gov.py/", + "phone": "911 (Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "pdi-ciberchile", + "authority_name": "PDI Chile Cibercrimen", + "country": "Chile", + "country_code": "CL", + "region": "South America", + "authority_type": "police", + "base_url": "https://www.pdichile.cl" + }, + "feed_url": "https://www.pdichile.cl/feed/", + "feed_urls": [ + "https://www.pdichile.cl/feed/", + "https://www.pdichile.cl/rss", + "https://www.pdichile.cl/instituci%C3%B3n/noticias/feed" + ], + "category": "cyber_advisory", + "region_tag": "CL", + "lat": -33.45, + "lng": -70.66, + "reporting": { + "label": "Report Cybercrime to PDI", + "url": "https://www.pdichile.cl/", + "phone": "134 (PDI Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "fiscales-ar", + "authority_name": "Ministerio Público Fiscal Argentina", + "country": "Argentina", + "country_code": "AR", + "region": "South America", + "authority_type": "regulatory", + "base_url": "https://www.fiscales.gob.ar" + }, + "feed_url": "https://www.fiscales.gob.ar/feed/", + "feed_urls": [ + "https://www.fiscales.gob.ar/feed/", + "https://www.fiscales.gob.ar/category/noticias/feed/", + "https://www.fiscales.gob.ar/category/cibercrimen/feed/" + ], + "category": "public_safety", + "region_tag": "AR", + "lat": -34.61, + "lng": -58.38, + "reporting": { + "label": "Report to Fiscalía Argentina", + "url": "https://www.mpf.gob.ar/", + "phone": "137 (Emergency advisory line)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "missing-cl-ngo", + "authority_name": "Fundación Extraviados Chile", + "country": "Chile", + "country_code": "CL", + "region": "South America", + "authority_type": "public_safety_program", + "base_url": "https://www.extraviados.cl" + }, + "feed_url": "https://www.extraviados.cl/feed/", + "feed_urls": [ + "https://www.extraviados.cl/feed/", + "https://www.extraviados.cl/category/casos-vigentes/feed/" + ], + "category": "missing_person", + "region_tag": "CL", + "lat": -33.43, + "lng": -70.65, + "reporting": { + "label": "Report Missing Person in Chile", + "url": "https://www.extraviados.cl/", + "notes": "Coordinate with local police for urgent leads." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "fbi-seeking", + "authority_name": "FBI Seeking Info", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.fbi.gov" + }, + "feed_url": "https://www.fbi.gov/feeds/seeking-information/rss.xml", + "category": "public_appeal", + "region_tag": "US", + "lat": 38.91, + "lng": -77.01, + "reporting": { + "label": "Submit a Tip to FBI", + "url": "https://tips.fbi.gov/", + "phone": "1-800-CALL-FBI (1-800-225-5324)", + "notes": "The FBI is seeking the public's assistance. If you have information, submit a tip." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "fbi-mostwanted", + "authority_name": "FBI Most Wanted", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.fbi.gov" + }, + "feed_url": "https://www.fbi.gov/feeds/fbi-most-wanted/rss.xml", + "category": "wanted_suspect", + "region_tag": "US", + "lat": 38.89, + "lng": -77.02, + "reporting": { + "label": "Report Sighting to FBI", + "url": "https://tips.fbi.gov/", + "phone": "1-800-CALL-FBI (1-800-225-5324)", + "notes": "Do NOT attempt to apprehend. Call 911 immediately if in danger." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "actionfraud-uk", + "authority_name": "Action Fraud UK", + "country": "United Kingdom", + "country_code": "GB", + "region": "Europe", + "authority_type": "police", + "base_url": "https://www.actionfraud.police.uk" + }, + "feed_url": "https://www.actionfraud.police.uk/rss", + "category": "fraud_alert", + "region_tag": "GB", + "lat": 51.5, + "lng": -0.12, + "reporting": { + "label": "Report Fraud to Action Fraud", + "url": "https://www.actionfraud.police.uk/reporting-fraud-and-cyber-crime", + "phone": "0300 123 2040" + } + }, + { + "type": "rss", + "source": { + "source_id": "cna-sg-crime", + "authority_name": "CNA Singapore Crime", + "country": "Singapore", + "country_code": "SG", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.channelnewsasia.com" + }, + "feed_url": "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml&category=6511", + "category": "public_safety", + "region_tag": "SG", + "lat": 1.35, + "lng": 103.82, + "reporting": { + "label": "Report Crime in Singapore", + "url": "https://eservices.police.gov.sg/content/policehubhome/homepage/police-report.html", + "phone": "999 (Emergency) / 1800-255-0000 (Police Hotline)" + } + }, + { + "type": "rss", + "source": { + "source_id": "yonhap-kr", + "authority_name": "Yonhap News Korea", + "country": "South Korea", + "country_code": "KR", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://en.yna.co.kr" + }, + "feed_url": "https://en.yna.co.kr/RSS/news.xml", + "category": "public_safety", + "region_tag": "KR", + "lat": 37.57, + "lng": 126.98, + "reporting": { + "label": "Report Crime in South Korea", + "url": "https://www.police.go.kr/eng/index.do", + "phone": "112 (Emergency)" + } + }, + { + "type": "rss", + "source": { + "source_id": "nhk-jp", + "authority_name": "NHK Japan", + "country": "Japan", + "country_code": "JP", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www3.nhk.or.jp" + }, + "feed_url": "https://www3.nhk.or.jp/rss/news/cat1.xml", + "category": "public_safety", + "region_tag": "JP", + "lat": 35.67, + "lng": 139.71, + "reporting": { + "label": "Report to Japan Police", + "url": "https://www.npa.go.jp/english/index.html", + "phone": "110 (Emergency)" + } + }, + { + "type": "rss", + "source": { + "source_id": "scmp-hk", + "authority_name": "SCMP Hong Kong", + "country": "Hong Kong", + "country_code": "HK", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.scmp.com" + }, + "feed_url": "https://www.scmp.com/rss/5/feed", + "followRedirects": true, + "category": "public_safety", + "region_tag": "HK", + "lat": 22.28, + "lng": 114.16, + "reporting": { + "label": "Report Crime in Hong Kong", + "url": "https://www.police.gov.hk/ppp_en/contact_us.html", + "phone": "999 (Emergency)" + } + }, + { + "type": "rss", + "source": { + "source_id": "straitstimes-sg", + "authority_name": "Straits Times Singapore", + "country": "Singapore", + "country_code": "SG", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.straitstimes.com" + }, + "feed_url": "https://www.straitstimes.com/news/singapore/rss.xml", + "category": "public_safety", + "region_tag": "SG", + "lat": 1.3, + "lng": 103.84, + "reporting": { + "label": "Report Crime in Singapore", + "url": "https://eservices.police.gov.sg/content/policehubhome/homepage/police-report.html", + "phone": "999 (Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "pnp-ph", + "authority_name": "PNP Philippines", + "country": "Philippines", + "country_code": "PH", + "region": "Asia", + "authority_type": "police", + "base_url": "https://www.pnp.gov.ph" + }, + "feed_url": "https://www.pnp.gov.ph/rss", + "feed_urls": [ + "https://www.pnp.gov.ph/rss", + "https://www.pnp.gov.ph/feed/", + "https://www.pnp.gov.ph/category/press-release/feed/" + ], + "category": "public_appeal", + "region_tag": "PH", + "lat": 14.6, + "lng": 120.98, + "reporting": { + "label": "Report to PNP", + "url": "https://www.pnp.gov.ph/", + "phone": "117 (Emergency) / 8722-0650" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "pdrm-my", + "authority_name": "PDRM Malaysia", + "country": "Malaysia", + "country_code": "MY", + "region": "Asia", + "authority_type": "police", + "base_url": "https://www.pdrm.gov.my" + }, + "feed_url": "https://www.pdrm.gov.my/rss", + "feed_urls": [ + "https://www.pdrm.gov.my/rss", + "https://www.rmp.gov.my/rss", + "https://www.rmp.gov.my/feed/" + ], + "category": "public_appeal", + "region_tag": "MY", + "lat": 3.14, + "lng": 101.69, + "reporting": { + "label": "Report to PDRM", + "url": "https://semakonline.rmp.gov.my/", + "phone": "999 (Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "ttps", + "authority_name": "Trinidad & Tobago Police", + "country": "Trinidad and Tobago", + "country_code": "TT", + "region": "Caribbean", + "authority_type": "police", + "base_url": "https://www.ttps.gov.tt" + }, + "feed_url": "https://www.ttps.gov.tt/rss", + "category": "public_appeal", + "region_tag": "TT", + "lat": 10.65, + "lng": -61.5, + "reporting": { + "label": "Report to TTPS", + "url": "https://www.ttps.gov.tt/", + "phone": "999 (Emergency) / 555 (Crime Stoppers)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "jcf-jm", + "authority_name": "JCF Jamaica", + "country": "Jamaica", + "country_code": "JM", + "region": "Caribbean", + "authority_type": "police", + "base_url": "https://www.jcf.gov.jm" + }, + "feed_url": "https://www.jcf.gov.jm/rss", + "category": "public_appeal", + "region_tag": "JM", + "lat": 18, + "lng": -76.79, + "reporting": { + "label": "Report to JCF", + "url": "https://www.jcf.gov.jm/", + "phone": "119 (Emergency) / 311 (Crime Stop)" + } + }, + { + "type": "html-list", + "followRedirects": true, + "source": { + "source_id": "fgr-mx", + "authority_name": "FGR Mexico", + "country": "Mexico", + "country_code": "MX", + "region": "North America", + "authority_type": "police", + "base_url": "https://www.gob.mx/fgr" + }, + "feed_url": "https://www.gob.mx/fgr/archivo/prensa", + "feed_urls": [ + "https://www.gob.mx/fgr/archivo/prensa", + "https://www.gob.mx/fgr/es/archivo/prensa", + "https://www.gob.mx/fgr" + ], + "include_keywords": [ + "desaparec", + "se busca", + "ficha", + "recompensa", + "secuestro", + "privación de la libertad", + "denuncia", + "información", + "investigación", + "captura", + "homicidio", + "víctima", + "feminicidio", + "trata", + "delincuencia", + "cártel" + ], + "exclude_keywords": [ + "agenda", + "discurso", + "evento", + "licitación", + "transparencia" + ], + "category": "public_appeal", + "region_tag": "MX", + "lat": 19.43, + "lng": -99.13, + "reporting": { + "label": "Report to FGR Mexico", + "url": "https://www.gob.mx/fgr", + "phone": "800-008-5400", + "notes": "Denuncia anónima / Anonymous tip line." + } + }, + { + "type": "html-list", + "followRedirects": true, + "source": { + "source_id": "amber-mx", + "authority_name": "AMBER Alert Mexico", + "country": "Mexico", + "country_code": "MX", + "region": "North America", + "authority_type": "public_safety_program", + "base_url": "https://www.gob.mx/amber" + }, + "feed_url": "https://www.gob.mx/amber/archivo/acciones_y_programas", + "feed_urls": [ + "https://www.gob.mx/amber/archivo/acciones_y_programas", + "https://www.gob.mx/amber/es/archivo/acciones_y_programas", + "https://www.gob.mx/amber" + ], + "include_keywords": [ + "alerta amber", + "desaparec", + "no localizado", + "se busca", + "ficha", + "menor", + "niña", + "niño", + "adolescente", + "auxilio", + "información" + ], + "exclude_keywords": [ + "evento", + "campaña", + "conferencia", + "manual", + "material" + ], + "category": "missing_person", + "region_tag": "MX", + "lat": 19.44, + "lng": -99.14, + "reporting": { + "label": "Report Missing Child Mexico", + "url": "https://www.gob.mx/amber", + "phone": "800-008-5400", + "notes": "Alerta AMBER México" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "missing-ca", + "authority_name": "Canada Missing Children", + "country": "Canada", + "country_code": "CA", + "region": "North America", + "authority_type": "public_safety_program", + "base_url": "https://www.canadasmissing.ca" + }, + "feed_url": "https://www.canadasmissing.ca/rss/index-eng.xml", + "category": "missing_person", + "region_tag": "CA", + "lat": 45.43, + "lng": -75.68, + "reporting": { + "label": "Report Missing Person Canada", + "url": "https://www.canadasmissing.ca/index-eng.htm", + "phone": "1-866-KID-TIPS (1-866-543-8477)", + "notes": "Canadian Centre for Child Protection" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "knpa-kr", + "authority_name": "Korea National Police", + "country": "South Korea", + "country_code": "KR", + "region": "Asia", + "authority_type": "police", + "base_url": "https://www.police.go.kr" + }, + "feed_url": "https://www.police.go.kr/eng/portal/rss/rss.do", + "category": "public_safety", + "region_tag": "KR", + "lat": 37.58, + "lng": 126.97, + "reporting": { + "label": "Report to Korean Police", + "url": "https://www.police.go.kr/eng/index.do", + "phone": "112 (Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "thaicert", + "authority_name": "ThaiCERT", + "country": "Thailand", + "country_code": "TH", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.thaicert.or.th" + }, + "feed_url": "https://www.thaicert.or.th/RSS/feed-en.xml", + "feed_urls": [ + "https://www.thaicert.or.th/RSS/feed-en.xml", + "https://www.thaicert.or.th/feed/" + ], + "category": "cyber_advisory", + "region_tag": "TH", + "lat": 13.76, + "lng": 100.5, + "reporting": { + "label": "Report to ThaiCERT", + "url": "https://www.thaicert.or.th/", + "email": "op@thaicert.or.th" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "mycert-my", + "authority_name": "MyCERT Malaysia", + "country": "Malaysia", + "country_code": "MY", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.mycert.org.my" + }, + "feed_url": "https://www.mycert.org.my/portal/rss", + "feed_urls": [ + "https://www.mycert.org.my/portal/rss", + "https://www.mycert.org.my/feed" + ], + "category": "cyber_advisory", + "region_tag": "MY", + "lat": 3.15, + "lng": 101.7, + "reporting": { + "label": "Report to MyCERT", + "url": "https://www.mycert.org.my/portal/report-incident", + "email": "mycert@cybersecurity.my" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "bssn-id", + "authority_name": "BSSN Indonesia", + "country": "Indonesia", + "country_code": "ID", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://bssn.go.id" + }, + "feed_url": "https://bssn.go.id/feed/", + "feed_urls": [ + "https://bssn.go.id/feed/", + "https://bssn.go.id/category/peringatan-keamanan/feed/" + ], + "category": "cyber_advisory", + "region_tag": "ID", + "lat": -6.2, + "lng": 106.82, + "reporting": { + "label": "Report to BSSN", + "url": "https://bssn.go.id/", + "notes": "Use official BSSN contact channels for incident reporting." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "bleepingcomputer", + "authority_name": "BleepingComputer", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "private_sector", + "base_url": "https://www.bleepingcomputer.com" + }, + "feed_url": "https://www.bleepingcomputer.com/feed/", + "category": "private_sector", + "region_tag": "US", + "lat": 40.71, + "lng": -74.01, + "reporting": { + "label": "Read Full Report", + "url": "https://www.bleepingcomputer.com", + "notes": "Private-sector cybersecurity news. Report incidents to relevant authorities." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "krebsonsecurity", + "authority_name": "Krebs on Security", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "private_sector", + "base_url": "https://krebsonsecurity.com" + }, + "feed_url": "https://krebsonsecurity.com/feed/", + "category": "private_sector", + "region_tag": "US", + "lat": 38.9, + "lng": -77.04, + "reporting": { + "label": "Read Full Report", + "url": "https://krebsonsecurity.com", + "notes": "Investigative cybersecurity journalism by Brian Krebs." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "thehackernews", + "authority_name": "The Hacker News", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "private_sector", + "base_url": "https://thehackernews.com" + }, + "feed_url": "https://feeds.feedburner.com/TheHackersNews", + "category": "private_sector", + "region_tag": "US", + "lat": 37.39, + "lng": -122.08, + "reporting": { + "label": "Read Full Report", + "url": "https://thehackernews.com", + "notes": "Cybersecurity news and analysis." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "databreaches-net", + "authority_name": "DataBreaches.net", + "country": "United States", + "country_code": "US", + "region": "North America", + "authority_type": "private_sector", + "base_url": "https://databreaches.net" + }, + "feed_url": "https://databreaches.net/feed/", + "category": "private_sector", + "region_tag": "US", + "lat": 39.83, + "lng": -98.58, + "reporting": { + "label": "Read Full Report", + "url": "https://databreaches.net", + "notes": "Data breach tracking and reporting." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "vpd-ca", + "authority_name": "Vancouver Police Department", + "country": "Canada", + "country_code": "CA", + "region": "North America", + "authority_type": "police", + "base_url": "https://vpd.ca" + }, + "feed_url": "https://vpd.ca/feed/", + "category": "public_appeal", + "region_tag": "CA", + "lat": 49.2827, + "lng": -123.1207, + "reporting": { + "label": "Submit a Tip to VPD", + "url": "https://vpd.ca/report-a-crime/", + "phone": "604-717-3321 (Non-Emergency)", + "notes": "911 for emergencies." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "calgary-police-ca", + "authority_name": "Calgary Police Service", + "country": "Canada", + "country_code": "CA", + "region": "North America", + "authority_type": "police", + "base_url": "https://newsroom.calgary.ca" + }, + "feed_url": "https://newsroom.calgary.ca/feed/", + "category": "public_appeal", + "region_tag": "CA", + "lat": 51.0447, + "lng": -114.0719, + "reporting": { + "label": "Submit a Tip to Calgary Police", + "url": "https://www.calgarypolice.ca/contact-us", + "phone": "403-266-1234 (Non-Emergency)", + "notes": "911 for emergencies." + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "cccs-ca-api", + "authority_name": "Canadian Centre for Cyber Security (Alerts)", + "country": "Canada", + "country_code": "CA", + "region": "North America", + "authority_type": "cert", + "base_url": "https://www.cyber.gc.ca" + }, + "feed_url": "https://www.cyber.gc.ca/api/cccs/rss/v1/get?feed=alerts_advisories&lang=en", + "category": "cyber_advisory", + "region_tag": "CA", + "lat": 45.4215, + "lng": -75.6972, + "reporting": { + "label": "Report a Cyber Incident", + "url": "https://www.cyber.gc.ca/en/incident-management", + "phone": "1-833-CYBER-88" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "cbc-canada", + "authority_name": "CBC Canada News", + "country": "Canada", + "country_code": "CA", + "region": "North America", + "authority_type": "public_safety_program", + "base_url": "https://www.cbc.ca" + }, + "feed_url": "https://www.cbc.ca/webfeed/rss/rss-canada", + "category": "public_safety", + "region_tag": "CA", + "lat": 43.6532, + "lng": -79.3832, + "reporting": { + "label": "CBC News Tips", + "url": "https://www.cbc.ca/news/tips" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "globalnews-ca", + "authority_name": "Global News Canada", + "country": "Canada", + "country_code": "CA", + "region": "North America", + "authority_type": "public_safety_program", + "base_url": "https://globalnews.ca" + }, + "feed_url": "https://globalnews.ca/feed/", + "category": "public_safety", + "region_tag": "CA", + "lat": 45.5017, + "lng": -73.5673, + "reporting": { + "label": "Global News Tips", + "url": "https://globalnews.ca/pages/contact-us/" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "usom-tr", + "authority_name": "TR-CERT / USOM", + "country": "Turkey", + "country_code": "TR", + "region": "Asia", + "authority_type": "cert", + "base_url": "https://www.usom.gov.tr" + }, + "feed_url": "https://www.usom.gov.tr/rss/tehdit.rss", + "category": "cyber_advisory", + "region_tag": "TR", + "lat": 39.9334, + "lng": 32.8597, + "reporting": { + "label": "Report Cyber Incident to USOM", + "url": "https://www.usom.gov.tr/bildirim" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "timesofisrael-il", + "authority_name": "Times of Israel", + "country": "Israel", + "country_code": "IL", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.timesofisrael.com" + }, + "feed_url": "https://www.timesofisrael.com/feed/", + "category": "public_safety", + "region_tag": "IL", + "lat": 31.7683, + "lng": 35.2137, + "reporting": { + "label": "Israel Police Tips", + "url": "https://www.police.gov.il/en", + "phone": "100 (Israel Police)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "middleeasteye", + "authority_name": "Middle East Eye", + "country": "Qatar", + "country_code": "QA", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.middleeasteye.net" + }, + "feed_url": "https://www.middleeasteye.net/rss", + "category": "public_safety", + "region_tag": "ME", + "lat": 25.2854, + "lng": 51.531, + "reporting": { + "label": "Middle East Eye Tips", + "url": "https://www.middleeasteye.net/contact" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "dailysabah-tr", + "authority_name": "Daily Sabah Turkey", + "country": "Turkey", + "country_code": "TR", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.dailysabah.com" + }, + "feed_url": "https://www.dailysabah.com/rssFeed/turkey", + "category": "public_safety", + "region_tag": "TR", + "lat": 41.0082, + "lng": 28.9784, + "reporting": { + "label": "Daily Sabah Contact", + "url": "https://www.dailysabah.com/contact" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "globaltimes-cn", + "authority_name": "Global Times China", + "country": "China", + "country_code": "CN", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.globaltimes.cn" + }, + "feed_url": "https://www.globaltimes.cn/rss/outbrain.xml", + "category": "public_safety", + "region_tag": "CN", + "lat": 39.9042, + "lng": 116.4074, + "reporting": { + "label": "Global Times Contact", + "url": "https://www.globaltimes.cn/about-us/contact-us.html" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "indiatoday-crime", + "authority_name": "India Today Crime", + "country": "India", + "country_code": "IN", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.indiatoday.in" + }, + "feed_url": "https://www.indiatoday.in/rss/1786661", + "category": "public_safety", + "region_tag": "IN", + "lat": 28.6139, + "lng": 77.209, + "reporting": { + "label": "India Crime Tips", + "url": "https://cybercrime.gov.in/", + "phone": "112 (India Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "ndtv-in", + "authority_name": "NDTV India News", + "country": "India", + "country_code": "IN", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.ndtv.com" + }, + "feed_url": "https://feeds.feedburner.com/ndtvnews-india-news", + "category": "public_safety", + "region_tag": "IN", + "lat": 19.076, + "lng": 72.8777, + "reporting": { + "label": "NDTV News Tips", + "url": "https://www.ndtv.com/page/contact-us", + "phone": "112 (India Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "hindustantimes-in", + "authority_name": "Hindustan Times India", + "country": "India", + "country_code": "IN", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.hindustantimes.com" + }, + "feed_url": "https://www.hindustantimes.com/feeds/rss/india-news/rssfeed.xml", + "category": "public_safety", + "region_tag": "IN", + "lat": 12.9716, + "lng": 77.5946, + "reporting": { + "label": "Hindustan Times Tips", + "url": "https://www.hindustantimes.com/contact-us", + "phone": "112 (India Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "vnexpress-vn", + "authority_name": "VnExpress International", + "country": "Vietnam", + "country_code": "VN", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://e.vnexpress.net" + }, + "feed_url": "https://e.vnexpress.net/rss/news.rss", + "category": "public_safety", + "region_tag": "VN", + "lat": 21.0278, + "lng": 105.8342, + "reporting": { + "label": "Vietnam Police Tips", + "url": "https://congan.com.vn/", + "phone": "113 (Vietnam Police)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "laotiantimes-la", + "authority_name": "Laotian Times", + "country": "Laos", + "country_code": "LA", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://laotiantimes.com" + }, + "feed_url": "https://laotiantimes.com/feed/", + "category": "public_safety", + "region_tag": "LA", + "lat": 17.9757, + "lng": 102.6331, + "reporting": { + "label": "Laotian Times Contact", + "url": "https://laotiantimes.com/contact/" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "bangkokpost-th", + "authority_name": "Bangkok Post", + "country": "Thailand", + "country_code": "TH", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.bangkokpost.com" + }, + "feed_url": "https://www.bangkokpost.com/rss/data/topstories.xml", + "category": "public_safety", + "region_tag": "TH", + "lat": 13.7563, + "lng": 100.5018, + "reporting": { + "label": "Thailand Police Tips", + "url": "https://www.royalthaipolice.go.th/", + "phone": "191 (Thailand Police)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "rappler-ph", + "authority_name": "Rappler Philippines", + "country": "Philippines", + "country_code": "PH", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://www.rappler.com" + }, + "feed_url": "https://www.rappler.com/feed/", + "category": "public_safety", + "region_tag": "PH", + "lat": 14.5995, + "lng": 120.9842, + "reporting": { + "label": "PNP Philippines Tips", + "url": "https://www.pnp.gov.ph/", + "phone": "117 (PH Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "tempo-id", + "authority_name": "Tempo Indonesia", + "country": "Indonesia", + "country_code": "ID", + "region": "Asia", + "authority_type": "public_safety_program", + "base_url": "https://en.tempo.co" + }, + "feed_url": "https://rss.tempo.co/en/", + "category": "public_safety", + "region_tag": "ID", + "lat": -6.2088, + "lng": 106.8456, + "reporting": { + "label": "Indonesia Police Tips", + "url": "https://www.polri.go.id/", + "phone": "110 (Indonesia Police)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "postcourier-pg", + "authority_name": "Post-Courier PNG", + "country": "Papua New Guinea", + "country_code": "PG", + "region": "Oceania", + "authority_type": "public_safety_program", + "base_url": "https://www.postcourier.com.pg" + }, + "feed_url": "https://www.postcourier.com.pg/feed/", + "category": "public_safety", + "region_tag": "PG", + "lat": -6.3149, + "lng": 147.1802, + "reporting": { + "label": "PNG Police", + "url": "https://www.rpngc.gov.pg/", + "phone": "000 (PNG Emergency)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "fijitimes-fj", + "authority_name": "Fiji Times", + "country": "Fiji", + "country_code": "FJ", + "region": "Oceania", + "authority_type": "public_safety_program", + "base_url": "https://www.fijitimes.com" + }, + "feed_url": "https://www.fijitimes.com/feed/", + "category": "public_safety", + "region_tag": "FJ", + "lat": -18.1416, + "lng": 178.4419, + "reporting": { + "label": "Fiji Police", + "url": "https://www.police.gov.fj/", + "phone": "917 (Fiji Police)" + } + }, + { + "type": "rss", + "followRedirects": true, + "source": { + "source_id": "rnz-pacific", + "authority_name": "RNZ Pacific", + "country": "New Zealand", + "country_code": "NZ", + "region": "Oceania", + "authority_type": "public_safety_program", + "base_url": "https://www.rnz.co.nz" + }, + "feed_url": "https://www.rnz.co.nz/rss/pacific.xml", + "category": "public_safety", + "region_tag": "NZ", + "lat": -15.3767, + "lng": 166.9592, + "reporting": { + "label": "RNZ Pacific Contact", + "url": "https://www.rnz.co.nz/about/contact" + } + }, { "type": "html-list", "followRedirects": true, @@ -17,7 +2396,13 @@ "lat": -1.286389, "lng": 36.817223, "max_items": 80, - "include_keywords": ["wanted", "suspect", "fugitive", "appeal", "missing"], + "include_keywords": [ + "wanted", + "suspect", + "fugitive", + "appeal", + "missing" + ], "reporting": { "label": "Report to Kenya DCI", "url": "https://www.dci.go.ke/contact-us/", @@ -42,7 +2427,13 @@ "lat": 5.603717, "lng": -0.186964, "max_items": 80, - "include_keywords": ["wanted", "suspect", "fraud", "crime", "appeal"], + "include_keywords": [ + "wanted", + "suspect", + "fraud", + "crime", + "appeal" + ], "reporting": { "label": "Report to EOCO Ghana", "url": "https://www.eoco.gov.gh/contact-us/" @@ -66,7 +2457,12 @@ "lat": 5.603717, "lng": -0.186964, "max_items": 80, - "include_keywords": ["wanted", "case", "appeal", "fugitive"], + "include_keywords": [ + "wanted", + "case", + "appeal", + "fugitive" + ], "reporting": { "label": "Report to Ghana OSP", "url": "https://osp.gov.gh/contact/" @@ -90,7 +2486,13 @@ "lat": 9.082, "lng": 8.6753, "max_items": 80, - "include_keywords": ["wanted", "suspect", "fraud", "money laundering", "appeal"], + "include_keywords": [ + "wanted", + "suspect", + "fraud", + "money laundering", + "appeal" + ], "reporting": { "label": "Report to EFCC", "url": "https://www.efcc.gov.ng/efcc/records/red-alert" @@ -114,7 +2516,12 @@ "lat": 9.082, "lng": 8.6753, "max_items": 80, - "include_keywords": ["wanted", "persons", "suspect", "fugitive"], + "include_keywords": [ + "wanted", + "persons", + "suspect", + "fugitive" + ], "reporting": { "label": "Report to ICPC", "url": "https://icpc.gov.ng/report-corruption/" @@ -138,7 +2545,15 @@ "lat": -25.747868, "lng": 28.229271, "max_items": 120, - "include_keywords": ["wanted", "missing", "suspect", "appeal", "crime", "investigation", "tip-off"], + "include_keywords": [ + "wanted", + "missing", + "suspect", + "appeal", + "crime", + "investigation", + "tip-off" + ], "reporting": { "label": "Report to SAPS Crime Stop", "url": "https://www.saps.gov.za/crimestop/", @@ -164,11 +2579,21 @@ ], "category": "missing_person", "region_tag": "ZA", - "lat": -29.0, - "lng": 24.0, + "lat": -29, + "lng": 24, "max_items": 120, - "include_keywords": ["missing", "child", "teen", "appeal", "case"], - "exclude_keywords": ["donate", "event", "newsletter"], + "include_keywords": [ + "missing", + "child", + "teen", + "appeal", + "case" + ], + "exclude_keywords": [ + "donate", + "event", + "newsletter" + ], "reporting": { "label": "Report Missing Person to MCSA", "url": "https://missingchildren.org.za/report/" @@ -242,7 +2667,13 @@ "lat": 9.082, "lng": 8.6753, "max_items": 100, - "include_keywords": ["advisory", "alert", "threat", "vulnerability", "incident"], + "include_keywords": [ + "advisory", + "alert", + "threat", + "vulnerability", + "incident" + ], "reporting": { "label": "Report Incident to ngCERT", "url": "https://www.cert.gov.ng/" @@ -266,7 +2697,14 @@ "lat": 33.886917, "lng": 9.537499, "max_items": 100, - "include_keywords": ["alerte", "advisory", "vuln", "threat", "incident", "cyber"], + "include_keywords": [ + "alerte", + "advisory", + "vuln", + "threat", + "incident", + "cyber" + ], "reporting": { "label": "Report Incident to CERT-TN", "url": "https://www.cert.tn/" @@ -295,8 +2733,22 @@ "lat": 14.634915, "lng": -90.506882, "max_items": 120, - "include_keywords": ["desaparec", "secuestro", "se busca", "captura", "investigacion", "denuncia", "fiscalia", "homicidio"], - "exclude_keywords": ["evento", "licitacion", "boletin", "agenda"], + "include_keywords": [ + "desaparec", + "secuestro", + "se busca", + "captura", + "investigacion", + "denuncia", + "fiscalia", + "homicidio" + ], + "exclude_keywords": [ + "evento", + "licitacion", + "boletin", + "agenda" + ], "reporting": { "label": "Report to MP Guatemala", "url": "https://www.mp.gob.gt" @@ -325,8 +2777,20 @@ "lat": 14.0818, "lng": -87.2068, "max_items": 120, - "include_keywords": ["desaparec", "secuestro", "captura", "investigacion", "denuncia", "homicidio", "fiscalia"], - "exclude_keywords": ["evento", "agenda", "licitacion"], + "include_keywords": [ + "desaparec", + "secuestro", + "captura", + "investigacion", + "denuncia", + "homicidio", + "fiscalia" + ], + "exclude_keywords": [ + "evento", + "agenda", + "licitacion" + ], "reporting": { "label": "Report to MP Honduras", "url": "https://www.mp.hn" @@ -355,8 +2819,21 @@ "lat": 13.6929, "lng": -89.2182, "max_items": 120, - "include_keywords": ["desaparec", "secuestro", "se busca", "captura", "informacion", "denuncia", "fiscalia", "homicidio"], - "exclude_keywords": ["evento", "agenda", "acto protocolario"], + "include_keywords": [ + "desaparec", + "secuestro", + "se busca", + "captura", + "informacion", + "denuncia", + "fiscalia", + "homicidio" + ], + "exclude_keywords": [ + "evento", + "agenda", + "acto protocolario" + ], "reporting": { "label": "Report to FGR El Salvador", "url": "https://www.fiscalia.gob.sv" @@ -385,8 +2862,19 @@ "lat": 9.9325, "lng": -84.0833, "max_items": 120, - "include_keywords": ["desaparec", "persona", "se busca", "informacion", "auxilio", "oij"], - "exclude_keywords": ["licitacion", "convocatoria", "boletin"], + "include_keywords": [ + "desaparec", + "persona", + "se busca", + "informacion", + "auxilio", + "oij" + ], + "exclude_keywords": [ + "licitacion", + "convocatoria", + "boletin" + ], "reporting": { "label": "Report to OIJ Costa Rica", "url": "https://sitiooij.poder-judicial.go.cr" @@ -415,8 +2903,20 @@ "lat": 8.9824, "lng": -79.5199, "max_items": 120, - "include_keywords": ["desaparec", "secuestro", "se busca", "captura", "informacion", "denuncia", "investigacion"], - "exclude_keywords": ["evento", "agenda", "licitacion"], + "include_keywords": [ + "desaparec", + "secuestro", + "se busca", + "captura", + "informacion", + "denuncia", + "investigacion" + ], + "exclude_keywords": [ + "evento", + "agenda", + "licitacion" + ], "reporting": { "label": "Report to MP Panama", "url": "https://ministeriopublico.gob.pa" @@ -444,8 +2944,20 @@ "lat": 12.1364, "lng": -86.2514, "max_items": 120, - "include_keywords": ["desaparec", "secuestro", "captura", "investigacion", "denuncia", "fiscalia", "homicidio"], - "exclude_keywords": ["agenda", "evento", "boletin"], + "include_keywords": [ + "desaparec", + "secuestro", + "captura", + "investigacion", + "denuncia", + "fiscalia", + "homicidio" + ], + "exclude_keywords": [ + "agenda", + "evento", + "boletin" + ], "reporting": { "label": "Report to MP Nicaragua", "url": "https://www.ministeriopublico.gob.ni" @@ -470,11 +2982,22 @@ ], "category": "conflict_monitoring", "region_tag": "INT", - "lat": 20.0, - "lng": 0.0, + "lat": 20, + "lng": 0, "max_items": 100, - "include_keywords": ["conflict", "violence", "protest", "incident", "dashboard", "data"], - "exclude_keywords": ["careers", "donate", "newsletter"], + "include_keywords": [ + "conflict", + "violence", + "protest", + "incident", + "dashboard", + "data" + ], + "exclude_keywords": [ + "careers", + "donate", + "newsletter" + ], "reporting": { "label": "ACLED Data Access", "url": "https://acleddata.com/data-export-tool/" @@ -499,11 +3022,21 @@ ], "category": "humanitarian_tasking", "region_tag": "INT", - "lat": 20.0, - "lng": 0.0, + "lat": 20, + "lng": 0, "max_items": 100, - "include_keywords": ["mapping", "task", "response", "disaster", "humanitarian", "project"], - "exclude_keywords": ["careers", "donate"], + "include_keywords": [ + "mapping", + "task", + "response", + "disaster", + "humanitarian", + "project" + ], + "exclude_keywords": [ + "careers", + "donate" + ], "reporting": { "label": "Join HOT Mapping Tasks", "url": "https://tasks.hotosm.org/" @@ -528,11 +3061,21 @@ ], "category": "humanitarian_tasking", "region_tag": "INT", - "lat": 20.0, - "lng": 0.0, + "lat": 20, + "lng": 0, "max_items": 80, - "include_keywords": ["mapathon", "mapping", "task", "response", "field", "vulnerable"], - "exclude_keywords": ["donate", "shop"], + "include_keywords": [ + "mapathon", + "mapping", + "task", + "response", + "field", + "vulnerable" + ], + "exclude_keywords": [ + "donate", + "shop" + ], "reporting": { "label": "Join Missing Maps", "url": "https://www.missingmaps.org/get-involved/" @@ -553,11 +3096,22 @@ "feed_url": "https://familylinks.icrc.org/", "category": "humanitarian_security", "region_tag": "INT", - "lat": 20.0, - "lng": 0.0, + "lat": 20, + "lng": 0, "max_items": 80, - "include_keywords": ["missing", "family", "search", "crisis", "restore", "trace"], - "exclude_keywords": ["privacy", "policy", "terms"], + "include_keywords": [ + "missing", + "family", + "search", + "crisis", + "restore", + "trace" + ], + "exclude_keywords": [ + "privacy", + "policy", + "terms" + ], "reporting": { "label": "ICRC Family Links", "url": "https://familylinks.icrc.org/" @@ -582,11 +3136,21 @@ ], "category": "humanitarian_security", "region_tag": "INT", - "lat": 20.0, - "lng": 0.0, + "lat": 20, + "lng": 0, "max_items": 100, - "include_keywords": ["missing migrants", "incident", "data", "route", "deaths", "disappearances"], - "exclude_keywords": ["publication", "about us"], + "include_keywords": [ + "missing migrants", + "incident", + "data", + "route", + "deaths", + "disappearances" + ], + "exclude_keywords": [ + "publication", + "about us" + ], "reporting": { "label": "IOM Missing Migrants Data", "url": "https://missingmigrants.iom.int/" @@ -611,11 +3175,21 @@ ], "category": "humanitarian_tasking", "region_tag": "INT", - "lat": 20.0, - "lng": 0.0, + "lat": 20, + "lng": 0, "max_items": 80, - "include_keywords": ["crisis", "humanitarian", "technology", "coordination", "response"], - "exclude_keywords": ["about", "board", "contact"], + "include_keywords": [ + "crisis", + "humanitarian", + "technology", + "coordination", + "response" + ], + "exclude_keywords": [ + "about", + "board", + "contact" + ], "reporting": { "label": "ICT4Peace Publications", "url": "https://ict4peace.org/" @@ -643,8 +3217,20 @@ "lat": -26.2041, "lng": 28.0473, "max_items": 80, - "include_keywords": ["cyber", "security", "incident", "resilience", "capacity", "training", "civil society"], - "exclude_keywords": ["privacy policy", "terms", "donate"], + "include_keywords": [ + "cyber", + "security", + "incident", + "resilience", + "capacity", + "training", + "civil society" + ], + "exclude_keywords": [ + "privacy policy", + "terms", + "donate" + ], "reporting": { "label": "Contact CIDA Africa", "url": "https://cida-africa.org/contact/" @@ -672,8 +3258,20 @@ "lat": -1.2864, "lng": 36.8172, "max_items": 80, - "include_keywords": ["digital security", "cyber", "safety", "threat", "awareness", "training", "response"], - "exclude_keywords": ["about us", "privacy policy", "terms"], + "include_keywords": [ + "digital security", + "cyber", + "safety", + "threat", + "awareness", + "training", + "response" + ], + "exclude_keywords": [ + "about us", + "privacy policy", + "terms" + ], "reporting": { "label": "Contact Digital Society Africa", "url": "https://digitalsociety.africa/contact/" @@ -697,8 +3295,19 @@ "lat": 9.082, "lng": 8.6753, "max_items": 80, - "include_keywords": ["cyber", "resilience", "program", "community", "security", "incident", "capacity"], - "exclude_keywords": ["privacy", "terms"], + "include_keywords": [ + "cyber", + "resilience", + "program", + "community", + "security", + "incident", + "capacity" + ], + "exclude_keywords": [ + "privacy", + "terms" + ], "reporting": { "label": "Contact Resilio Africa", "url": "https://resilio.cybersafefoundation.org/" @@ -726,8 +3335,19 @@ "lat": -33.4489, "lng": -70.6693, "max_items": 100, - "include_keywords": ["seguridad digital", "resiliencia", "proteccion", "ciber", "sociedad civil", "amenaza"], - "exclude_keywords": ["donar", "newsletter", "equipo"], + "include_keywords": [ + "seguridad digital", + "resiliencia", + "proteccion", + "ciber", + "sociedad civil", + "amenaza" + ], + "exclude_keywords": [ + "donar", + "newsletter", + "equipo" + ], "reporting": { "label": "Contact Derechos Digitales", "url": "https://www.derechosdigitales.org/contacto/" @@ -755,8 +3375,19 @@ "lat": -25.2637, "lng": -57.5759, "max_items": 100, - "include_keywords": ["seguridad digital", "ciber", "proteccion", "sociedad civil", "capacitacion", "riesgo"], - "exclude_keywords": ["donar", "equipo", "convocatoria laboral"], + "include_keywords": [ + "seguridad digital", + "ciber", + "proteccion", + "sociedad civil", + "capacitacion", + "riesgo" + ], + "exclude_keywords": [ + "donar", + "equipo", + "convocatoria laboral" + ], "reporting": { "label": "Contact TEDIC", "url": "https://www.tedic.org/contacto/" @@ -784,8 +3415,19 @@ "lat": 4.711, "lng": -74.0721, "max_items": 100, - "include_keywords": ["seguridad digital", "ciberseguridad", "proteccion", "organizaciones", "amenaza", "riesgo"], - "exclude_keywords": ["donaciones", "equipo", "vacante"], + "include_keywords": [ + "seguridad digital", + "ciberseguridad", + "proteccion", + "organizaciones", + "amenaza", + "riesgo" + ], + "exclude_keywords": [ + "donaciones", + "equipo", + "vacante" + ], "reporting": { "label": "Contact Fundacion Karisma", "url": "https://web.karisma.org.co/contacto/" @@ -811,11 +3453,22 @@ ], "category": "education_digital_capacity", "region_tag": "INT", - "lat": 20.0, - "lng": 0.0, + "lat": 20, + "lng": 0, "max_items": 100, - "include_keywords": ["school connectivity", "internet", "digital divide", "schools", "education", "mapping"], - "exclude_keywords": ["privacy policy", "terms", "careers"], + "include_keywords": [ + "school connectivity", + "internet", + "digital divide", + "schools", + "education", + "mapping" + ], + "exclude_keywords": [ + "privacy policy", + "terms", + "careers" + ], "reporting": { "label": "UNICEF Giga Opportunities", "url": "https://giga.global/" @@ -841,11 +3494,21 @@ ], "category": "education_digital_capacity", "region_tag": "INT", - "lat": 20.0, - "lng": 0.0, + "lat": 20, + "lng": 0, "max_items": 120, - "include_keywords": ["digital skills", "capacity", "connectivity", "cyber capacity", "education", "training"], - "exclude_keywords": ["meeting calendar", "press release"], + "include_keywords": [ + "digital skills", + "capacity", + "connectivity", + "cyber capacity", + "education", + "training" + ], + "exclude_keywords": [ + "meeting calendar", + "press release" + ], "reporting": { "label": "ITU Development Programs", "url": "https://www.itu.int/en/ITU-D/Pages/default.aspx" @@ -870,11 +3533,21 @@ ], "category": "education_digital_capacity", "region_tag": "INT", - "lat": 20.0, - "lng": 0.0, + "lat": 20, + "lng": 0, "max_items": 120, - "include_keywords": ["education", "digital", "school", "ICT", "skills", "capacity building"], - "exclude_keywords": ["annual report", "procurement policy"], + "include_keywords": [ + "education", + "digital", + "school", + "ICT", + "skills", + "capacity building" + ], + "exclude_keywords": [ + "annual report", + "procurement policy" + ], "reporting": { "label": "World Bank Education Programs", "url": "https://www.worldbank.org/en/topic/education" @@ -900,10 +3573,21 @@ "category": "education_digital_capacity", "region_tag": "AFR", "lat": 1.65, - "lng": 17.0, + "lng": 17, "max_items": 100, - "include_keywords": ["mentor", "teacher", "volunteer", "digital literacy", "coding", "training"], - "exclude_keywords": ["privacy", "terms", "sponsor package"], + "include_keywords": [ + "mentor", + "teacher", + "volunteer", + "digital literacy", + "coding", + "training" + ], + "exclude_keywords": [ + "privacy", + "terms", + "sponsor package" + ], "reporting": { "label": "Join Africa Code Week", "url": "https://africacodeweek.org/" @@ -928,11 +3612,21 @@ ], "category": "education_digital_capacity", "region_tag": "INT", - "lat": 20.0, - "lng": 0.0, + "lat": 20, + "lng": 0, "max_items": 100, - "include_keywords": ["digital skills", "training", "youth", "employment", "mentor", "capacity"], - "exclude_keywords": ["donate", "annual report"], + "include_keywords": [ + "digital skills", + "training", + "youth", + "employment", + "mentor", + "capacity" + ], + "exclude_keywords": [ + "donate", + "annual report" + ], "reporting": { "label": "DOT Programs and Partnerships", "url": "https://www.dotrust.org/" @@ -960,8 +3654,17 @@ "lat": 50.85, "lng": 4.35, "max_items": 100, - "include_keywords": ["digital education", "teacher training", "skills", "school", "capacity"], - "exclude_keywords": ["privacy policy", "cookie policy"], + "include_keywords": [ + "digital education", + "teacher training", + "skills", + "school", + "capacity" + ], + "exclude_keywords": [ + "privacy policy", + "cookie policy" + ], "reporting": { "label": "European Schoolnet Initiatives", "url": "https://www.eun.org/" @@ -989,8 +3692,18 @@ "lat": 50.85, "lng": 4.35, "max_items": 120, - "include_keywords": ["digital", "education", "training", "call", "project", "skills"], - "exclude_keywords": ["about", "legal notice"], + "include_keywords": [ + "digital", + "education", + "training", + "call", + "project", + "skills" + ], + "exclude_keywords": [ + "about", + "legal notice" + ], "reporting": { "label": "Erasmus+ Opportunities", "url": "https://erasmus-plus.ec.europa.eu/opportunities" @@ -1018,8 +3731,17 @@ "lat": 48.58, "lng": 7.75, "max_items": 100, - "include_keywords": ["digital citizenship", "education", "training", "school", "online safety"], - "exclude_keywords": ["press room", "vacancy"], + "include_keywords": [ + "digital citizenship", + "education", + "training", + "school", + "online safety" + ], + "exclude_keywords": [ + "press room", + "vacancy" + ], "reporting": { "label": "Council of Europe Education Programs", "url": "https://www.coe.int/en/web/education" diff --git a/scripts/continuous-collector.mjs b/scripts/continuous-collector.mjs deleted file mode 100644 index 59fbc8c..0000000 --- a/scripts/continuous-collector.mjs +++ /dev/null @@ -1,93 +0,0 @@ -/* - * EUOSINT - * Portions derived from novatechflow/osint-siem and cyberdude88/osint-siem. - * See NOTICE for provenance and LICENSE for repository-local terms. - */ - -import { spawn } from "node:child_process"; - -const RESTART_DELAY_MIN_MS = Number.parseInt( - process.env.COLLECTOR_RESTART_DELAY_MIN_MS ?? "5000", - 10 -); -const RESTART_DELAY_MAX_MS = Number.parseInt( - process.env.COLLECTOR_RESTART_DELAY_MAX_MS ?? "60000", - 10 -); -const DEFAULT_INTERVAL_MS = process.env.INTERVAL_MS ?? "180000"; -const DEFAULT_MAX_PER_SOURCE = process.env.MAX_PER_SOURCE ?? "60"; - -let stopping = false; -let child = null; -let restartAttempt = 0; - -function clamp(value, min, max) { - return Math.max(min, Math.min(max, value)); -} - -function computeBackoff(attempt) { - const raw = RESTART_DELAY_MIN_MS * Math.pow(1.7, attempt); - return clamp(Math.round(raw), RESTART_DELAY_MIN_MS, RESTART_DELAY_MAX_MS); -} - -function launchCollector() { - const env = { - ...process.env, - WATCH: "1", - INTERVAL_MS: DEFAULT_INTERVAL_MS, - MAX_PER_SOURCE: DEFAULT_MAX_PER_SOURCE, - MISSING_PERSON_RELEVANCE_THRESHOLD: - process.env.MISSING_PERSON_RELEVANCE_THRESHOLD ?? "0", - }; - - console.log( - `[collector] starting feed watcher (INTERVAL_MS=${env.INTERVAL_MS}, MAX_PER_SOURCE=${env.MAX_PER_SOURCE})` - ); - - child = spawn("node", ["scripts/fetch-alerts.mjs", "--watch"], { - env, - stdio: "inherit", - cwd: process.cwd(), - }); - - child.on("exit", (code, signal) => { - child = null; - if (stopping) return; - const delay = computeBackoff(restartAttempt); - restartAttempt += 1; - console.warn( - `[collector] watcher exited (code=${code ?? "null"}, signal=${signal ?? "null"}); restarting in ${delay}ms` - ); - setTimeout(() => { - if (!stopping) launchCollector(); - }, delay); - }); - - child.on("error", (error) => { - console.error(`[collector] failed to launch watcher: ${error.message}`); - }); - - restartAttempt = 0; -} - -function shutdown(signal) { - if (stopping) return; - stopping = true; - console.log(`[collector] stopping due to ${signal}`); - if (!child) { - process.exit(0); - return; - } - child.once("exit", () => process.exit(0)); - child.kill("SIGTERM"); - setTimeout(() => { - if (child) { - child.kill("SIGKILL"); - } - }, 4000); -} - -process.on("SIGINT", () => shutdown("SIGINT")); -process.on("SIGTERM", () => shutdown("SIGTERM")); - -launchCollector(); diff --git a/scripts/fetch-alerts.mjs b/scripts/fetch-alerts.mjs deleted file mode 100644 index 7013541..0000000 --- a/scripts/fetch-alerts.mjs +++ /dev/null @@ -1,4575 +0,0 @@ -/* - * EUOSINT - * Portions derived from novatechflow/osint-siem and cyberdude88/osint-siem. - * See NOTICE for provenance and LICENSE for repository-local terms. - */ - -import { mkdir, readFile, writeFile } from "node:fs/promises"; -import { dirname } from "node:path"; -import crypto from "node:crypto"; - -const MAX_PER_SOURCE = Number.parseInt(process.env.MAX_PER_SOURCE ?? "20", 10); -const OUTPUT_PATH = process.env.OUTPUT_PATH ?? "public/alerts.json"; -const STATE_OUTPUT_PATH = process.env.STATE_OUTPUT_PATH ?? "public/alerts-state.json"; -const FILTERED_OUTPUT_PATH = - process.env.FILTERED_OUTPUT_PATH ?? "public/alerts-filtered.json"; -const SOURCE_HEALTH_OUTPUT_PATH = - process.env.SOURCE_HEALTH_OUTPUT_PATH ?? "public/source-health.json"; -const SOURCE_REGISTRY_PATH = - process.env.SOURCE_REGISTRY_PATH ?? "registry/source_registry.json"; -const MAX_AGE_DAYS = Number.parseInt(process.env.MAX_AGE_DAYS ?? "180", 10); -const REMOVED_RETENTION_DAYS = Number.parseInt( - process.env.REMOVED_RETENTION_DAYS ?? "14", - 10 -); -const INCIDENT_RELEVANCE_THRESHOLD = Number.parseFloat( - process.env.INCIDENT_RELEVANCE_THRESHOLD ?? "0.42" -); -const MISSING_PERSON_RELEVANCE_THRESHOLD = Number.parseFloat( - process.env.MISSING_PERSON_RELEVANCE_THRESHOLD ?? "0" -); -const FAIL_ON_CRITICAL_SOURCE_GAP = - process.env.FAIL_ON_CRITICAL_SOURCE_GAP === "1"; -const CRITICAL_SOURCE_PREFIXES = (process.env.CRITICAL_SOURCE_PREFIXES ?? - "cisa") - .split(",") - .map((value) => value.trim()) - .filter(Boolean); -const WATCH = - process.argv.includes("--watch") || process.env.WATCH === "1"; -const INTERVAL_MS = Number.parseInt(process.env.INTERVAL_MS ?? "900000", 10); -let externalSourcesCache = null; - -const US_STATE_CENTROIDS = { - alabama: [32.806671, -86.79113], - alaska: [61.370716, -152.404419], - arizona: [33.729759, -111.431221], - arkansas: [34.969704, -92.373123], - california: [36.116203, -119.681564], - colorado: [39.059811, -105.311104], - connecticut: [41.597782, -72.755371], - delaware: [39.318523, -75.507141], - florida: [27.766279, -81.686783], - georgia: [33.040619, -83.643074], - hawaii: [21.094318, -157.498337], - idaho: [44.240459, -114.478828], - illinois: [40.349457, -88.986137], - indiana: [39.849426, -86.258278], - iowa: [42.011539, -93.210526], - kansas: [38.5266, -96.726486], - kentucky: [37.66814, -84.670067], - louisiana: [31.169546, -91.867805], - maine: [44.693947, -69.381927], - maryland: [39.063946, -76.802101], - massachusetts: [42.230171, -71.530106], - michigan: [43.326618, -84.536095], - minnesota: [45.694454, -93.900192], - mississippi: [32.741646, -89.678696], - missouri: [38.456085, -92.288368], - montana: [46.921925, -110.454353], - nebraska: [41.12537, -98.268082], - nevada: [38.313515, -117.055374], - "new hampshire": [43.452492, -71.563896], - "new jersey": [40.298904, -74.521011], - "new mexico": [34.840515, -106.248482], - "new york": [42.165726, -74.948051], - "north carolina": [35.630066, -79.806419], - "north dakota": [47.528912, -99.784012], - ohio: [40.388783, -82.764915], - oklahoma: [35.565342, -96.928917], - oregon: [44.572021, -122.070938], - pennsylvania: [40.590752, -77.209755], - "rhode island": [41.680893, -71.51178], - "south carolina": [33.856892, -80.945007], - "south dakota": [44.299782, -99.438828], - tennessee: [35.747845, -86.692345], - texas: [31.054487, -97.563461], - utah: [40.150032, -111.862434], - vermont: [44.045876, -72.710686], - virginia: [37.769337, -78.169968], - washington: [47.400902, -121.490494], - "west virginia": [38.491226, -80.954453], - wisconsin: [44.268543, -89.616508], - wyoming: [42.755966, -107.30249], - "district of columbia": [38.9072, -77.0369], - "washington dc": [38.9072, -77.0369], -}; - -const US_STATE_ABBR_TO_NAME = { - AL: "alabama", - AK: "alaska", - AZ: "arizona", - AR: "arkansas", - CA: "california", - CO: "colorado", - CT: "connecticut", - DE: "delaware", - FL: "florida", - GA: "georgia", - HI: "hawaii", - ID: "idaho", - IL: "illinois", - IN: "indiana", - IA: "iowa", - KS: "kansas", - KY: "kentucky", - LA: "louisiana", - ME: "maine", - MD: "maryland", - MA: "massachusetts", - MI: "michigan", - MN: "minnesota", - MS: "mississippi", - MO: "missouri", - MT: "montana", - NE: "nebraska", - NV: "nevada", - NH: "new hampshire", - NJ: "new jersey", - NM: "new mexico", - NY: "new york", - NC: "north carolina", - ND: "north dakota", - OH: "ohio", - OK: "oklahoma", - OR: "oregon", - PA: "pennsylvania", - RI: "rhode island", - SC: "south carolina", - SD: "south dakota", - TN: "tennessee", - TX: "texas", - UT: "utah", - VT: "vermont", - VA: "virginia", - WA: "washington", - WV: "west virginia", - WI: "wisconsin", - WY: "wyoming", - DC: "district of columbia", -}; - -const US_STATE_ALT_TOKENS = { - fla: "florida", - calif: "california", - penn: "pennsylvania", - penna: "pennsylvania", - wisc: "wisconsin", - minn: "minnesota", - colo: "colorado", - ariz: "arizona", - mich: "michigan", - mass: "massachusetts", - conn: "connecticut", - ill: "illinois", - tex: "texas", - wash: "washington", - ore: "oregon", - okla: "oklahoma", - "n mex": "new mexico", - "n dak": "north dakota", - "s dak": "south dakota", - "n car": "north carolina", - "s car": "south carolina", - "w va": "west virginia", -}; - -const COUNTRY_CENTROIDS = { - "south africa": [-30.5595, 22.9375], - egypt: [26.8206, 30.8025], - nigeria: [9.082, 8.6753], - kenya: [-0.0236, 37.9062], - tanzania: [-6.369, 34.8888], - madagascar: [-18.7669, 46.8691], - uganda: [1.3733, 32.2903], - rwanda: [-1.9403, 29.8739], - zambia: [-13.1339, 27.8493], - zimbabwe: [-19.0154, 29.1549], - botswana: [-22.3285, 24.6849], - namibia: [-22.9576, 18.4904], - mozambique: [-18.6657, 35.5296], - morocco: [31.7917, -7.0926], - algeria: [28.0339, 1.6596], - ghana: [7.9465, -1.0232], - ethiopia: [9.145, 40.4897], - argentina: [-38.4161, -63.6167], - chile: [-35.6751, -71.543], - colombia: [4.5709, -74.2973], - peru: [-9.19, -75.0152], - uruguay: [-32.5228, -55.7658], - paraguay: [-23.4425, -58.4438], - bolivia: [-16.2902, -63.5887], - venezuela: [6.4238, -66.5897], - mexico: [23.6345, -102.5528], - guatemala: [15.7835, -90.2308], - belize: [17.1899, -88.4976], - honduras: [15.2, -86.2419], - "el salvador": [13.7942, -88.8965], - nicaragua: [12.8654, -85.2072], - "costa rica": [9.7489, -83.7534], - panama: [8.538, -80.7821], - "south korea": [35.9078, 127.7669], - malaysia: [4.2105, 101.9758], - thailand: [15.87, 100.9925], - vietnam: [14.0583, 108.2772], - indonesia: [-0.7893, 113.9213], - philippines: [12.8797, 121.774], - bangladesh: [23.685, 90.3563], - "sri lanka": [7.8731, 80.7718], - "united arab emirates": [23.4241, 53.8478], - "saudi arabia": [23.8859, 45.0792], - qatar: [25.3548, 51.1839], - kuwait: [29.3117, 47.4818], - bahrain: [26.0667, 50.5577], - oman: [21.4735, 55.9754], - jordan: [30.5852, 36.2384], - lebanon: [33.8547, 35.8623], - israel: [31.0461, 34.8516], - iran: [32.4279, 53.688], - iraq: [33.2232, 43.6793], - france: [46.2276, 2.2137], - germany: [51.1657, 10.4515], - netherlands: [52.1326, 5.2913], - belgium: [50.5039, 4.4699], - spain: [40.4637, -3.7492], - italy: [41.8719, 12.5674], - sweden: [60.1282, 18.6435], - poland: [51.9194, 19.1451], - bulgaria: [42.7339, 25.4858], - romania: [45.9432, 24.9668], - greece: [39.0742, 21.8243], - portugal: [39.3999, -8.2245], - ireland: [53.1424, -7.6921], - switzerland: [46.8182, 8.2275], - austria: [47.5162, 14.5501], - ukraine: [48.3794, 31.1656], - turkey: [38.9637, 35.2433], - "united kingdom": [55.3781, -3.436], - england: [52.3555, -1.1743], - scotland: [56.4907, -4.2026], - wales: [52.1307, -3.7837], - "new zealand": [-41.5, 172.8], - australia: [-25.2744, 133.7751], - canada: [56.1304, -106.3468], - "united states": [39.8283, -98.5795], - usa: [39.8283, -98.5795], - brazil: [-14.235, -51.9253], - india: [20.5937, 78.9629], - china: [35.8617, 104.1954], - russia: [61.524, 105.3188], - japan: [36.2048, 138.2529], - colombia: [4.5709, -74.2973], - "south korea": [35.9078, 127.7669], - singapore: [1.3521, 103.8198], - "hong kong": [22.3193, 114.1694], - "south africa": [-30.5595, 22.9375], - nigeria: [9.082, 8.6753], - kenya: [-0.0236, 37.9062], - mexico: [23.6345, -102.5528], - chile: [-35.6751, -71.543], - argentina: [-38.4161, -63.6167], - norway: [60.472, 8.4689], - sweden: [60.1282, 18.6435], - denmark: [56.2639, 9.5018], - finland: [61.9241, 25.7482], - jamaica: [18.1096, -77.2975], - bahamas: [25.0343, -77.3963], - barbados: [13.1939, -59.5432], - "dominican republic": [18.7357, -70.1627], - haiti: [18.9712, -72.2852], - cuba: [21.5218, -77.7812], - "trinidad and tobago": [10.6918, -61.2225], - philippines: [12.8797, 121.774], - malaysia: [4.2105, 101.9758], - thailand: [15.87, 100.9925], - vietnam: [14.0583, 108.2772], - indonesia: [-0.7893, 113.9213], - taiwan: [23.6978, 120.9605], -}; - -const CITY_CENTROIDS = { - harrisburg: [40.2732, -76.8867], - philadelphia: [39.9526, -75.1652], - pittsburgh: [40.4406, -79.9959], - allentown: [40.6023, -75.4714], - scranton: [41.4089, -75.6624], - erie: [42.1292, -80.0851], - york: [39.9626, -76.7277], - lancaster: [40.0379, -76.3055], - richmond: [37.5407, -77.436], - norfolk: [36.8508, -76.2859], - alexandria: [38.8048, -77.0469], - arlington: [38.8816, -77.091], - baltimore: [39.2904, -76.6122], - washington: [38.9072, -77.0369], - "washington dc": [38.9072, -77.0369], - "new york city": [40.7128, -74.006], - "los angeles": [34.0522, -118.2437], - chicago: [41.8781, -87.6298], - miami: [25.7617, -80.1918], - houston: [29.7604, -95.3698], - dallas: [32.7767, -96.797], - auckland: [-36.8485, 174.7633], - wellington: [-41.2865, 174.7762], - christchurch: [-43.5321, 172.6362], - hamilton: [-37.787, 175.2793], - tauranga: [-37.6878, 176.1651], - dunedin: [-45.8788, 170.5028], - queenstown: [-45.0312, 168.6626], - whangarei: [-35.7251, 174.3237], - taupo: [-38.6869, 176.0702], - "raumati beach": [-40.9398, 174.9768], - lyon: [45.764, 4.8357], - paris: [48.8566, 2.3522], - london: [51.5072, -0.1276], - amsterdam: [52.3676, 4.9041], - brussels: [50.8503, 4.3517], - sofia: [42.6977, 23.3219], - warsaw: [52.2297, 21.0122], - stockholm: [59.3293, 18.0686], - berlin: [52.52, 13.405], - madrid: [40.4168, -3.7038], - rome: [41.9028, 12.4964], - vienna: [48.2082, 16.3738], - dublin: [53.3498, -6.2603], - sydney: [-33.8688, 151.2093], - melbourne: [-37.8136, 144.9631], - tokyo: [35.6762, 139.6503], - osaka: [34.6937, 135.5023], - bogota: [4.711, -74.0721], - medellin: [6.2442, -75.5812], - cali: [3.4516, -76.532], - "the hague": [52.0705, 4.3007], - rotterdam: [51.9225, 4.4792], - sacramento: [38.5816, -121.4944], - "san francisco": [37.7749, -122.4194], - "san diego": [32.7157, -117.1611], - "san jose": [37.3382, -121.8863], -}; - -const ISO2_COUNTRY_HINTS = { - ZA: "south africa", - EG: "egypt", - NG: "nigeria", - KE: "kenya", - TZ: "tanzania", - MG: "madagascar", - UG: "uganda", - RW: "rwanda", - ZM: "zambia", - ZW: "zimbabwe", - BW: "botswana", - NA: "namibia", - MZ: "mozambique", - MA: "morocco", - DZ: "algeria", - GH: "ghana", - ET: "ethiopia", - BR: "brazil", - AR: "argentina", - CL: "chile", - CO: "colombia", - PE: "peru", - UY: "uruguay", - PY: "paraguay", - BO: "bolivia", - VE: "venezuela", - MX: "mexico", - GT: "guatemala", - BZ: "belize", - HN: "honduras", - SV: "el salvador", - NI: "nicaragua", - CR: "costa rica", - PA: "panama", - JM: "jamaica", - TT: "trinidad and tobago", - BS: "bahamas", - BB: "barbados", - DO: "dominican republic", - HT: "haiti", - CU: "cuba", - JP: "japan", - IN: "india", - SG: "singapore", - KR: "south korea", - MY: "malaysia", - TH: "thailand", - VN: "vietnam", - ID: "indonesia", - PH: "philippines", - BD: "bangladesh", - LK: "sri lanka", - AE: "united arab emirates", - SA: "saudi arabia", - QA: "qatar", - KW: "kuwait", - BH: "bahrain", - OM: "oman", - JO: "jordan", - LB: "lebanon", - IL: "israel", - TR: "turkey", - IR: "iran", - IQ: "iraq", - FR: "france", - DE: "germany", - NL: "netherlands", - ES: "spain", - IT: "italy", - GB: "united kingdom", - US: "united states", - CA: "canada", - AU: "australia", - NZ: "new zealand", -}; - -// ─── AGENCY FEEDS ─────────────────────────────────────────────── -// Organized by: CISA | FBI | INTERPOL | EUROPOL | NCSC | POLICE (region) | PUBLIC SAFETY -// Only confirmed-working feeds are included. - -const sources = [ - // ── CISA (US / North America) ───────────────────────────────── - { - type: "kev-json", - source: { - source_id: "cisa-kev", - authority_name: "CISA", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "cert", - base_url: "https://www.cisa.gov", - }, - feed_url: "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json", - category: "cyber_advisory", - region_tag: "US", - lat: 38.88, - lng: -77.02, - reporting: { - label: "Report to CISA", - url: "https://www.cisa.gov/report", - notes: "Use 911 for emergencies.", - }, - }, - - // ── FBI (US / North America) ────────────────────────────────── - { - type: "rss", - source: { - source_id: "fbi", - authority_name: "FBI", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "police", - base_url: "https://www.fbi.gov", - }, - feed_url: "https://www.fbi.gov/feeds/fbi-top-stories/rss.xml", - category: "public_appeal", - region_tag: "US", - lat: 38.9, - lng: -77.0, - reporting: { - label: "Report to FBI", - url: "https://tips.fbi.gov/", - phone: "1-800-CALL-FBI (1-800-225-5324)", - notes: "Use 911 for emergencies.", - }, - }, - { - type: "rss", - source: { - source_id: "fbi-wanted", - authority_name: "FBI Wanted", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "police", - base_url: "https://www.fbi.gov", - }, - feed_url: "https://www.fbi.gov/feeds/all-wanted/rss.xml", - category: "wanted_suspect", - region_tag: "US", - lat: 38.9, - lng: -77.0, - reporting: { - label: "Submit a Tip to FBI", - url: "https://tips.fbi.gov/", - phone: "1-800-CALL-FBI (1-800-225-5324)", - notes: "Use 911 for emergencies.", - }, - }, - - // ── INTERPOL: Removed — replaced by static hub entry in buildAlerts() ── - // ── EUROPOL (EU / Europe) ───────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "europol", - authority_name: "Europol", - country: "Netherlands", - country_code: "NL", - region: "Europe", - authority_type: "police", - base_url: "https://www.europol.europa.eu", - }, - feed_url: "https://www.europol.europa.eu/rss.xml", - category: "public_appeal", - region_tag: "EU", - lat: 52.09, - lng: 4.27, - reporting: { - label: "Report to Europol", - url: "https://www.europol.europa.eu/report-a-crime", - }, - }, - - // ── NCSC UK (UK / Europe) ───────────────────────────────────── - { - type: "rss", - source: { - source_id: "ncsc-uk", - authority_name: "NCSC UK", - country: "United Kingdom", - country_code: "GB", - region: "Europe", - authority_type: "cert", - base_url: "https://www.ncsc.gov.uk", - }, - feed_url: "https://www.ncsc.gov.uk/api/1/services/v1/report-rss-feed.xml", - category: "cyber_advisory", - region_tag: "GB", - lat: 51.5, - lng: -0.13, - reporting: { - label: "Report to NCSC", - url: "https://www.ncsc.gov.uk/section/about-this-website/report-scam-website", - }, - }, - { - type: "rss", - source: { - source_id: "ncsc-uk-all", - authority_name: "NCSC UK", - country: "United Kingdom", - country_code: "GB", - region: "Europe", - authority_type: "cert", - base_url: "https://www.ncsc.gov.uk", - }, - feed_url: "https://www.ncsc.gov.uk/api/1/services/v1/all-rss-feed.xml", - category: "cyber_advisory", - region_tag: "GB", - lat: 51.51, - lng: -0.1, - reporting: { - label: "Report to NCSC", - url: "https://www.ncsc.gov.uk/section/about-this-website/report-scam-website", - }, - }, - - // ── POLICE: New Zealand (Oceania) ───────────────────────────── - { - type: "rss", - source: { - source_id: "nz-police-news", - authority_name: "NZ Police", - country: "New Zealand", - country_code: "NZ", - region: "Oceania", - authority_type: "police", - base_url: "https://www.police.govt.nz", - }, - feed_url: "https://www.police.govt.nz/rss/news", - category: "public_safety", - region_tag: "NZ", - lat: -41.29, - lng: 174.78, - reporting: { - label: "Report to NZ Police", - url: "https://www.police.govt.nz/use-105", - phone: "111 (Emergency) / 105 (Non-emergency)", - }, - }, - { - type: "rss", - source: { - source_id: "nz-police-alerts", - authority_name: "NZ Police", - country: "New Zealand", - country_code: "NZ", - region: "Oceania", - authority_type: "police", - base_url: "https://www.police.govt.nz", - }, - feed_url: "https://www.police.govt.nz/rss/alerts", - category: "public_appeal", - region_tag: "NZ", - lat: -41.29, - lng: 174.78, - reporting: { - label: "Report to NZ Police", - url: "https://www.police.govt.nz/use-105", - phone: "111 (Emergency) / 105 (Non-emergency)", - }, - }, - - // ── PUBLIC SAFETY: NCMEC (US / North America) ───────────────── - { - type: "rss", - source: { - source_id: "ncmec", - authority_name: "NCMEC", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "public_safety_program", - base_url: "https://www.missingkids.org", - }, - feed_url: - "https://api.missingkids.org/missingkids/servlet/XmlServlet?LanguageCountry=en_US&act=rss&orgPrefix=NCMC", - category: "missing_person", - region_tag: "US", - lat: 39.83, - lng: -98.58, - reporting: { - label: "Report to NCMEC", - url: "https://report.cybertip.org/", - phone: "1-800-THE-LOST (1-800-843-5678)", - notes: "Use 911 for immediate danger.", - }, - }, - - // ── CIS MS-ISAC (US / North America) ──────────────────────────── - { - type: "rss", - source: { - source_id: "cis-msisac", - authority_name: "CIS MS-ISAC", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "cert", - base_url: "https://www.cisecurity.org", - }, - feed_url: "https://www.cisecurity.org/feed/advisories", - category: "cyber_advisory", - region_tag: "US", - lat: 42.65, - lng: -73.76, - reporting: { - label: "Report to MS-ISAC", - url: "https://www.cisecurity.org/ms-isac/services/soc", - phone: "1-866-787-4722", - email: "soc@cisecurity.org", - notes: "24/7 Security Operations Center for state, local, tribal, and territorial governments.", - }, - }, - - // ── California Attorney General (US / North America) ──────────── - { - type: "rss", - source: { - source_id: "ca-oag", - authority_name: "California AG", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "police", - base_url: "https://oag.ca.gov", - }, - feed_url: "https://oag.ca.gov/news/feed", - category: "public_appeal", - region_tag: "US", - lat: 38.58, - lng: -121.49, - reporting: { - label: "Report to CA Attorney General", - url: "https://oag.ca.gov/contact/consumer-complaint-against-business-or-company", - phone: "1-800-952-5225", - }, - }, - - // ── CERT-FR (France / Europe) ─────────────────────────────────── - { - type: "rss", - source: { - source_id: "cert-fr", - authority_name: "CERT-FR", - country: "France", - country_code: "FR", - region: "Europe", - authority_type: "cert", - base_url: "https://www.cert.ssi.gouv.fr", - }, - feed_url: "https://www.cert.ssi.gouv.fr/feed/", - category: "cyber_advisory", - region_tag: "FR", - lat: 48.86, - lng: 2.35, - reporting: { - label: "Report to CERT-FR", - url: "https://www.cert.ssi.gouv.fr/contact/", - email: "cert-fr@ssi.gouv.fr", - }, - }, - - // ── NCSC-NL (Netherlands / Europe) ────────────────────────────── - { - type: "rss", - source: { - source_id: "ncsc-nl", - authority_name: "NCSC-NL", - country: "Netherlands", - country_code: "NL", - region: "Europe", - authority_type: "cert", - base_url: "https://advisories.ncsc.nl", - }, - feed_url: "https://advisories.ncsc.nl/rss/advisories", - category: "cyber_advisory", - region_tag: "NL", - lat: 52.07, - lng: 4.30, - reporting: { - label: "Report to NCSC-NL", - url: "https://www.ncsc.nl/contact/kwetsbaarheid-melden", - email: "cert@ncsc.nl", - }, - }, - - // ── JPCERT/CC (Japan / Asia) ──────────────────────────────────── - { - type: "rss", - source: { - source_id: "jpcert", - authority_name: "JPCERT/CC", - country: "Japan", - country_code: "JP", - region: "Asia", - authority_type: "cert", - base_url: "https://www.jpcert.or.jp", - }, - feed_url: "https://www.jpcert.or.jp/english/rss/jpcert-en.rdf", - category: "cyber_advisory", - region_tag: "JP", - lat: 35.68, - lng: 139.69, - reporting: { - label: "Report to JPCERT/CC", - url: "https://www.jpcert.or.jp/english/ir/form.html", - email: "info@jpcert.or.jp", - }, - }, - - // ── Colombia National Police (South America) ──────────────────── - { - type: "rss", - source: { - source_id: "policia-colombia", - authority_name: "Policía Nacional de Colombia", - country: "Colombia", - country_code: "CO", - region: "South America", - authority_type: "police", - base_url: "https://www.policia.gov.co", - }, - feed_url: "https://www.policia.gov.co/rss.xml", - category: "public_appeal", - region_tag: "CO", - lat: 4.71, - lng: -74.07, - reporting: { - label: "Report to Policía Nacional", - url: "https://www.policia.gov.co/denuncia-virtual", - phone: "123 (Emergency) / 112 (Línea única)", - }, - }, - - // ── CISA Alerts RSS (US / North America) ───────────────────────── - // May return 403 locally but works from GitHub Actions - { - type: "rss", - followRedirects: true, - source: { - source_id: "cisa-alerts", - authority_name: "CISA Alerts", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "cert", - base_url: "https://www.cisa.gov", - }, - feed_url: "https://www.cisa.gov/cybersecurity-advisories/all.xml", - category: "cyber_advisory", - region_tag: "US", - lat: 38.89, - lng: -77.03, - reporting: { - label: "Report to CISA", - url: "https://www.cisa.gov/report", - phone: "1-888-282-0870", - email: "central@cisa.dhs.gov", - notes: "Use 911 for emergencies.", - }, - }, - - // ── DHS (US / North America) ──────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "dhs", - authority_name: "DHS", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "national_security", - base_url: "https://www.dhs.gov", - }, - feed_url: "https://www.dhs.gov/news/rss.xml", - category: "public_safety", - region_tag: "US", - lat: 38.886, - lng: -77.015, - reporting: { - label: "Report to DHS", - url: "https://www.dhs.gov/see-something-say-something/how-to-report-suspicious-activity", - phone: "1-866-347-2423", - }, - }, - - // ── US Secret Service (US / North America) ────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "usss", - authority_name: "US Secret Service", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "police", - base_url: "https://www.secretservice.gov", - }, - feed_url: "https://www.secretservice.gov/rss.xml", - category: "public_appeal", - region_tag: "US", - lat: 38.899, - lng: -77.034, - reporting: { - label: "Report to Secret Service", - url: "https://www.secretservice.gov/contact", - phone: "1-202-406-5708", - }, - }, - - // ── DEA (US / North America) ──────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "dea", - authority_name: "DEA", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "police", - base_url: "https://www.dea.gov", - }, - feed_url: "https://www.dea.gov/press-releases/rss.xml", - category: "public_appeal", - region_tag: "US", - lat: 38.871, - lng: -77.053, - reporting: { - label: "Report to DEA", - url: "https://www.dea.gov/submit-tip", - phone: "1-877-792-2873", - }, - }, - - // ── ATF (US / North America) ──────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "atf", - authority_name: "ATF", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "police", - base_url: "https://www.atf.gov", - }, - feed_url: "https://www.atf.gov/news/rss.xml", - category: "public_appeal", - region_tag: "US", - lat: 38.893, - lng: -77.025, - reporting: { - label: "Report to ATF", - url: "https://www.atf.gov/contact/atf-tips", - phone: "1-888-283-8477", - email: "atftips@atf.gov", - }, - }, - - // ── US Marshals (US / North America) ──────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "usms", - authority_name: "US Marshals", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "police", - base_url: "https://www.usmarshals.gov", - }, - feed_url: "https://www.usmarshals.gov/news/news-releases.rss", - category: "wanted_suspect", - region_tag: "US", - lat: 38.895, - lng: -77.021, - reporting: { - label: "Report to US Marshals", - url: "https://www.usmarshals.gov/tips", - phone: "1-877-926-8332", - }, - }, - - // ── NCA UK (UK / Europe) ──────────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "nca-uk", - authority_name: "NCA UK", - country: "United Kingdom", - country_code: "GB", - region: "Europe", - authority_type: "police", - base_url: "https://www.nationalcrimeagency.gov.uk", - }, - feed_url: "https://nationalcrimeagency.gov.uk/news?format=feed&type=rss", - category: "public_appeal", - region_tag: "GB", - lat: 51.49, - lng: -0.11, - reporting: { - label: "Report to NCA", - url: "https://www.nationalcrimeagency.gov.uk/what-we-do/crime-threats/cyber-crime/reporting-cyber-crime", - phone: "0370 496 7622", - }, - }, - - // ── GMP Manchester UK (UK / Europe) ───────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "gmp-uk", - authority_name: "Greater Manchester Police", - country: "United Kingdom", - country_code: "GB", - region: "Europe", - authority_type: "police", - base_url: "https://www.gmp.police.uk", - }, - feed_url: "https://www.gmp.police.uk/news/greater-manchester/rss/", - category: "public_appeal", - region_tag: "GB", - lat: 53.48, - lng: -2.24, - reporting: { - label: "Report to GMP", - url: "https://www.gmp.police.uk/ro/report/", - phone: "999 (Emergency) / 101 (Non-emergency)", - }, - }, - - // ── Met Police UK (UK / Europe) ───────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "met-police-uk", - authority_name: "Met Police UK", - country: "United Kingdom", - country_code: "GB", - region: "Europe", - authority_type: "police", - base_url: "https://news.met.police.uk", - }, - feed_url: "https://news.met.police.uk/feeds/rss", - category: "public_appeal", - region_tag: "GB", - lat: 51.51, - lng: -0.14, - reporting: { - label: "Report to Met Police", - url: "https://www.met.police.uk/ro/report/", - phone: "999 (Emergency) / 101 (Non-emergency)", - }, - }, - - // ── BSI Germany (Germany / Europe) ────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "bsi-de", - authority_name: "BSI Germany", - country: "Germany", - country_code: "DE", - region: "Europe", - authority_type: "cert", - base_url: "https://www.bsi.bund.de", - }, - feed_url: "https://www.bsi.bund.de/SiteGlobals/Functions/RSSFeed/RSSNewsfeed/RSSNewsfeed.xml", - category: "cyber_advisory", - region_tag: "DE", - lat: 50.73, - lng: 7.10, - reporting: { - label: "Report to BSI", - url: "https://www.bsi.bund.de/EN/Service-Navi/Contact/contact_node.html", - email: "certbund@bsi.bund.de", - }, - }, - - // ── BKA Germany (Germany / Europe) ────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "bka-de", - authority_name: "BKA Germany", - country: "Germany", - country_code: "DE", - region: "Europe", - authority_type: "police", - base_url: "https://www.bka.de", - }, - feed_url: "https://www.bka.de/SharedDocs/Kurzmeldungen/DE/Warnhinweise/RSS/BKA_Pressemitteilungen_RSS.xml", - category: "wanted_suspect", - region_tag: "DE", - lat: 50.12, - lng: 8.68, - reporting: { - label: "Report to BKA", - url: "https://www.bka.de/DE/KontaktAufnehmen/Hinweisportal/hinweisportal_node.html", - phone: "+49 611 55-0", - }, - }, - - // ── ACSC Australia (Oceania) ──────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "acsc-au", - authority_name: "ACSC Australia", - country: "Australia", - country_code: "AU", - region: "Oceania", - authority_type: "cert", - base_url: "https://www.cyber.gov.au", - }, - feed_url: "https://www.cyber.gov.au/advisories/feed", - feed_urls: [ - "https://www.cyber.gov.au/advisories/feed", - "https://www.cyber.gov.au/about-us/advisories/rss.xml", - "https://www.cyber.gov.au/alerts/feed", - ], - category: "cyber_advisory", - region_tag: "AU", - lat: -35.28, - lng: 149.13, - reporting: { - label: "Report to ACSC", - url: "https://www.cyber.gov.au/report-and-recover/report", - phone: "1300 292 371", - }, - }, - - // ── AFP Australia (Oceania) ───────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "afp-au", - authority_name: "AFP Australia", - country: "Australia", - country_code: "AU", - region: "Oceania", - authority_type: "police", - base_url: "https://www.afp.gov.au", - }, - feed_url: "https://www.afp.gov.au/news-centre/media-releases/rss.xml", - feed_urls: [ - "https://www.afp.gov.au/news-centre/media-releases/rss.xml", - "https://www.afp.gov.au/news-centre/media-release/rss.xml", - "https://www.afp.gov.au/news-centre/media-releases/feed", - ], - category: "public_appeal", - region_tag: "AU", - lat: -35.31, - lng: 149.14, - reporting: { - label: "Report to AFP", - url: "https://www.afp.gov.au/what-we-do/crime-types/report-crime", - phone: "131 237", - }, - }, - - // ── Queensland Police Service (Oceania) ──────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "qps-au", - authority_name: "Queensland Police", - country: "Australia", - country_code: "AU", - region: "Oceania", - authority_type: "police", - base_url: "https://mypolice.qld.gov.au", - }, - feed_url: "https://mypolice.qld.gov.au/feed/", - feed_urls: [ - "https://mypolice.qld.gov.au/feed/", - "https://mypolice.qld.gov.au/category/alert/feed/", - "https://mypolice.qld.gov.au/category/my-police-news/feed/", - ], - category: "public_appeal", - region_tag: "AU", - lat: -27.47, - lng: 153.03, - reporting: { - label: "Report to Queensland Police", - url: "https://www.police.qld.gov.au/policelink-reporting", - phone: "000 (Emergency) / 131 444 (Policelink)", - }, - }, - - // ── New South Wales Police (Oceania) ─────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "nsw-police-au", - authority_name: "NSW Police", - country: "Australia", - country_code: "AU", - region: "Oceania", - authority_type: "police", - base_url: "https://www.police.nsw.gov.au", - }, - feed_url: "https://www.police.nsw.gov.au/news/rss", - feed_urls: [ - "https://www.police.nsw.gov.au/news/rss", - "https://www.police.nsw.gov.au/rss/news", - "https://www.police.nsw.gov.au/news/feed", - ], - category: "public_appeal", - region_tag: "AU", - lat: -33.87, - lng: 151.21, - reporting: { - label: "Report to NSW Police", - url: "https://portal.police.nsw.gov.au/s/online-services", - phone: "000 (Emergency) / 131 444 (Police Assistance Line)", - }, - }, - - // ── Canada Cyber Centre (North America) ───────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "cccs-ca", - authority_name: "Canada Cyber Centre", - country: "Canada", - country_code: "CA", - region: "North America", - authority_type: "cert", - base_url: "https://www.cyber.gc.ca", - }, - feed_url: "https://www.cyber.gc.ca/en/alerts-advisories/feed", - category: "cyber_advisory", - region_tag: "CA", - lat: 45.42, - lng: -75.69, - reporting: { - label: "Report to Cyber Centre", - url: "https://www.cyber.gc.ca/en/incident-management", - email: "contact@cyber.gc.ca", - phone: "1-833-292-3722", - }, - }, - - // ── RCMP Canada (North America) ───────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "rcmp-ca", - authority_name: "RCMP Canada", - country: "Canada", - country_code: "CA", - region: "North America", - authority_type: "police", - base_url: "https://www.rcmp-grc.gc.ca", - }, - feed_url: "https://www.rcmp-grc.gc.ca/en/news/rss", - category: "public_appeal", - region_tag: "CA", - lat: 45.40, - lng: -75.70, - reporting: { - label: "Report to RCMP", - url: "https://www.rcmp-grc.gc.ca/en/report-information-online", - phone: "1-800-771-5401", - }, - }, - - // ── Policia Nacional Spain (Europe) ───────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "cnp-es", - authority_name: "Policía Nacional Spain", - country: "Spain", - country_code: "ES", - region: "Europe", - authority_type: "police", - base_url: "https://www.policia.es", - }, - feed_url: "https://www.policia.es/rss/rss_prensa.xml", - category: "public_appeal", - region_tag: "ES", - lat: 40.42, - lng: -3.70, - reporting: { - label: "Report to Policía Nacional", - url: "https://www.policia.es/colabora.php", - phone: "091", - }, - }, - - // ── CERT-In India (Asia) ──────────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "cert-in", - authority_name: "CERT-In", - country: "India", - country_code: "IN", - region: "Asia", - authority_type: "cert", - base_url: "https://www.cert-in.org.in", - }, - feed_url: "https://www.cert-in.org.in/s2cMainServlet?pageid=RSSFEED", - category: "cyber_advisory", - region_tag: "IN", - lat: 28.61, - lng: 77.21, - reporting: { - label: "Report to CERT-In", - url: "https://www.cert-in.org.in/", - email: "incident@cert-in.org.in", - phone: "+91-11-24368572", - }, - }, - - // ── SingCERT Singapore (Asia) ─────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "singcert", - authority_name: "SingCERT", - country: "Singapore", - country_code: "SG", - region: "Asia", - authority_type: "cert", - base_url: "https://www.csa.gov.sg", - }, - feed_url: "https://www.csa.gov.sg/singcert/Alerts/rss", - feed_urls: [ - "https://www.csa.gov.sg/singcert/Alerts/rss", - "https://www.csa.gov.sg/alerts-and-advisories/alerts/rss", - "https://www.csa.gov.sg/alerts-and-advisories/advisories/rss", - ], - category: "cyber_advisory", - region_tag: "SG", - lat: 1.29, - lng: 103.85, - reporting: { - label: "Report to SingCERT", - url: "https://www.csa.gov.sg/singcert/reporting", - email: "singcert@csa.gov.sg", - phone: "+65 6323 5052", - }, - }, - - // ── Singapore Police Force (Asia) ─────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "spf-sg", - authority_name: "Singapore Police", - country: "Singapore", - country_code: "SG", - region: "Asia", - authority_type: "police", - base_url: "https://www.police.gov.sg", - }, - feed_url: "https://www.police.gov.sg/media-room/news/feed", - feed_urls: [ - "https://www.police.gov.sg/media-room/news/feed", - "https://www.police.gov.sg/rss", - "https://www.police.gov.sg/media-room/news/rss.xml", - ], - category: "public_appeal", - region_tag: "SG", - lat: 1.31, - lng: 103.84, - reporting: { - label: "Report to Singapore Police", - url: "https://eservices.police.gov.sg/content/policehubhome/homepage/police-report.html", - phone: "999 (Emergency) / 1800-255-0000 (Police Hotline)", - }, - }, - - // ── HKCERT Hong Kong (Asia) ───────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "hkcert", - authority_name: "HKCERT", - country: "Hong Kong", - country_code: "HK", - region: "Asia", - authority_type: "cert", - base_url: "https://www.hkcert.org", - }, - feed_url: "https://www.hkcert.org/rss", - category: "cyber_advisory", - region_tag: "HK", - lat: 22.32, - lng: 114.17, - reporting: { - label: "Report to HKCERT", - url: "https://www.hkcert.org/report-incident", - email: "hkcert@hkcert.org", - phone: "+852 8105 6060", - }, - }, - - // ── SAPS South Africa (Africa) ────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "saps-za", - authority_name: "SAPS South Africa", - country: "South Africa", - country_code: "ZA", - region: "Africa", - authority_type: "police", - base_url: "https://www.saps.gov.za", - }, - feed_url: "https://www.saps.gov.za/newsroom/rss.php", - category: "public_appeal", - region_tag: "ZA", - lat: -25.75, - lng: 28.19, - reporting: { - label: "Report to SAPS", - url: "https://www.saps.gov.za/resource_centre/contacts/contacts.php", - phone: "10111 (Emergency) / 08600 10111 (Crime Stop)", - }, - }, - { - type: "rss", - followRedirects: true, - source: { - source_id: "missing-children-za", - authority_name: "Missing Children South Africa", - country: "South Africa", - country_code: "ZA", - region: "Africa", - authority_type: "public_safety_program", - base_url: "https://missingchildren.org.za", - }, - feed_url: "https://missingchildren.org.za/feed/", - feed_urls: [ - "https://missingchildren.org.za/feed/", - "https://missingchildren.org.za/category/missing-children/feed/", - "https://missingchildren.org.za/category/cases/feed/", - ], - category: "missing_person", - region_tag: "ZA", - lat: -29.0, - lng: 24.0, - reporting: { - label: "Report to Missing Children SA", - url: "https://missingchildren.org.za/report/", - phone: "+27 72 647 7464", - notes: "Coordinate directly with SAPS in emergency situations.", - }, - }, - - // ── Crimestoppers UK (Europe) ─────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "crimestoppers-uk", - authority_name: "Crimestoppers UK", - country: "United Kingdom", - country_code: "GB", - region: "Europe", - authority_type: "public_safety_program", - base_url: "https://www.crimestoppers-uk.org", - }, - feed_url: "https://www.crimestoppers-uk.org/give-information/latest-news-feeds/rss", - category: "public_appeal", - region_tag: "GB", - lat: 51.52, - lng: -0.08, - reporting: { - label: "Report to Crimestoppers", - url: "https://crimestoppers-uk.org/give-information", - phone: "0800 555 111", - notes: "100% anonymous. You can also report online.", - }, - }, - - // ── Japan NPA (Asia) ──────────────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "npa-jp", - authority_name: "Japan NPA", - country: "Japan", - country_code: "JP", - region: "Asia", - authority_type: "police", - base_url: "https://www.npa.go.jp", - }, - feed_url: "https://www.npa.go.jp/rss/index.xml", - category: "public_safety", - region_tag: "JP", - lat: 35.69, - lng: 139.75, - reporting: { - label: "Report to NPA Japan", - url: "https://www.npa.go.jp/english/index.html", - phone: "110 (Emergency)", - }, - }, - - // ── Gendarmerie France (Europe) ───────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "gendarmerie-fr", - authority_name: "Gendarmerie France", - country: "France", - country_code: "FR", - region: "Europe", - authority_type: "police", - base_url: "https://www.gendarmerie.interieur.gouv.fr", - }, - feed_url: "https://www.gendarmerie.interieur.gouv.fr/rss", - category: "public_appeal", - region_tag: "FR", - lat: 48.85, - lng: 2.30, - reporting: { - label: "Report to Gendarmerie", - url: "https://www.pre-plainte-en-ligne.gouv.fr/", - phone: "17 (Emergency)", - }, - }, - - // ── Polisen Sweden (Europe) ───────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "polisen-se", - authority_name: "Polisen Sweden", - country: "Sweden", - country_code: "SE", - region: "Europe", - authority_type: "police", - base_url: "https://polisen.se", - }, - feed_url: "https://polisen.se/aktuellt/rss/hela-landet/", - category: "public_appeal", - region_tag: "SE", - lat: 59.33, - lng: 18.07, - reporting: { - label: "Report to Polisen", - url: "https://polisen.se/en/victims-of-crime/report-a-crime-online/", - phone: "112 (Emergency) / 114 14 (Non-emergency)", - }, - }, - - // ── Politiet Norway (Europe) ──────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "politiet-no", - authority_name: "Politiet Norway", - country: "Norway", - country_code: "NO", - region: "Europe", - authority_type: "police", - base_url: "https://www.politiet.no", - }, - feed_url: "https://www.politiet.no/rss/", - category: "public_appeal", - region_tag: "NO", - lat: 59.91, - lng: 10.75, - reporting: { - label: "Report to Politiet", - url: "https://www.politiet.no/en/services/report-an-offence/", - phone: "112 (Emergency) / 02800 (Non-emergency)", - }, - }, - - // ── Policia Federal Brazil (South America) ────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "pf-br", - authority_name: "Polícia Federal Brazil", - country: "Brazil", - country_code: "BR", - region: "South America", - authority_type: "police", - base_url: "https://www.gov.br/pf", - }, - feed_url: "https://www.gov.br/pf/pt-br/assuntos/noticias/@@rss", - feed_urls: [ - "https://www.gov.br/pf/pt-br/assuntos/noticias/@@rss", - "https://www.gov.br/pf/pt-br/rss", - "https://www.gov.br/pf/pt-br/@@search?sort_on=Date&Subject:list=noticias&b_size=100&format=rss", - ], - category: "public_appeal", - region_tag: "BR", - lat: -15.79, - lng: -47.88, - reporting: { - label: "Report to Polícia Federal", - url: "https://www.gov.br/pf/pt-br/canais_atendimento/denuncia", - phone: "190 (Emergency)", - }, - }, - - // ── Carabineros Chile (South America) ─────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "carabineros-cl", - authority_name: "Carabineros Chile", - country: "Chile", - country_code: "CL", - region: "South America", - authority_type: "police", - base_url: "https://www.carabineros.cl", - }, - feed_url: "https://www.carabineros.cl/feed/", - feed_urls: [ - "https://www.carabineros.cl/feed/", - "https://www.carabineros.cl/rss", - "https://www.carabineros.cl/index.php/feed/", - ], - category: "public_appeal", - region_tag: "CL", - lat: -33.45, - lng: -70.67, - reporting: { - label: "Report to Carabineros", - url: "https://www.carabineros.cl/", - phone: "133 (Emergency)", - }, - }, - - // ── Policía Nacional del Perú (South America) ─────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "pnp-pe", - authority_name: "Policía Nacional del Perú", - country: "Peru", - country_code: "PE", - region: "South America", - authority_type: "police", - base_url: "https://www.policia.gob.pe", - }, - feed_url: "https://www.policia.gob.pe/feed/", - feed_urls: [ - "https://www.policia.gob.pe/feed/", - "https://www.policia.gob.pe/rss", - "https://www.gob.pe/institucion/pnp/noticias.rss", - ], - category: "public_appeal", - region_tag: "PE", - lat: -12.05, - lng: -77.04, - reporting: { - label: "Report to PNP Peru", - url: "https://www.policia.gob.pe/denuncia/", - phone: "105 (Emergency)", - }, - }, - - // ── Policía Nacional Ecuador (South America) ──────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "policia-ec", - authority_name: "Policía Nacional Ecuador", - country: "Ecuador", - country_code: "EC", - region: "South America", - authority_type: "police", - base_url: "https://www.policia.gob.ec", - }, - feed_url: "https://www.policia.gob.ec/feed/", - feed_urls: [ - "https://www.policia.gob.ec/feed/", - "https://www.policia.gob.ec/rss", - "https://www.policia.gob.ec/category/noticias/feed/", - ], - category: "public_appeal", - region_tag: "EC", - lat: -0.18, - lng: -78.47, - reporting: { - label: "Report to Policía Ecuador", - url: "https://www.policia.gob.ec/servicios/", - phone: "911 (Emergency) / 1800-DELITO", - }, - }, - - // ── Policía Boliviana (South America) ─────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "policia-bo", - authority_name: "Policía Boliviana", - country: "Bolivia", - country_code: "BO", - region: "South America", - authority_type: "police", - base_url: "https://www.policia.bo", - }, - feed_url: "https://www.policia.bo/feed/", - feed_urls: [ - "https://www.policia.bo/feed/", - "https://www.policia.bo/rss", - "https://www.policia.bo/category/noticias/feed/", - ], - category: "public_appeal", - region_tag: "BO", - lat: -16.5, - lng: -68.15, - reporting: { - label: "Report to Policía Boliviana", - url: "https://www.policia.bo/", - phone: "110 (Emergency)", - }, - }, - - // ── Policía Nacional Paraguay (South America) ─────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "policia-py", - authority_name: "Policía Nacional Paraguay", - country: "Paraguay", - country_code: "PY", - region: "South America", - authority_type: "police", - base_url: "https://www.policianacional.gov.py", - }, - feed_url: "https://www.policianacional.gov.py/feed/", - feed_urls: [ - "https://www.policianacional.gov.py/feed/", - "https://www.policianacional.gov.py/rss", - "https://www.policianacional.gov.py/category/noticias/feed/", - ], - category: "public_appeal", - region_tag: "PY", - lat: -25.29, - lng: -57.64, - reporting: { - label: "Report to Policía Paraguay", - url: "https://www.policianacional.gov.py/", - phone: "911 (Emergency)", - }, - }, - - // ── Cibercrimen Chile / PDI (South America) ───────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "pdi-ciberchile", - authority_name: "PDI Chile Cibercrimen", - country: "Chile", - country_code: "CL", - region: "South America", - authority_type: "police", - base_url: "https://www.pdichile.cl", - }, - feed_url: "https://www.pdichile.cl/feed/", - feed_urls: [ - "https://www.pdichile.cl/feed/", - "https://www.pdichile.cl/rss", - "https://www.pdichile.cl/instituci%C3%B3n/noticias/feed", - ], - category: "cyber_advisory", - region_tag: "CL", - lat: -33.45, - lng: -70.66, - reporting: { - label: "Report Cybercrime to PDI", - url: "https://www.pdichile.cl/", - phone: "134 (PDI Emergency)", - }, - }, - - // ── Fiscalía Argentina (South America) ────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "fiscales-ar", - authority_name: "Ministerio Público Fiscal Argentina", - country: "Argentina", - country_code: "AR", - region: "South America", - authority_type: "regulatory", - base_url: "https://www.fiscales.gob.ar", - }, - feed_url: "https://www.fiscales.gob.ar/feed/", - feed_urls: [ - "https://www.fiscales.gob.ar/feed/", - "https://www.fiscales.gob.ar/category/noticias/feed/", - "https://www.fiscales.gob.ar/category/cibercrimen/feed/", - ], - category: "public_safety", - region_tag: "AR", - lat: -34.61, - lng: -58.38, - reporting: { - label: "Report to Fiscalía Argentina", - url: "https://www.mpf.gob.ar/", - phone: "137 (Emergency advisory line)", - }, - }, - - // ── NGO / Nonprofit: Missing Children Chile ───────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "missing-cl-ngo", - authority_name: "Fundación Extraviados Chile", - country: "Chile", - country_code: "CL", - region: "South America", - authority_type: "public_safety_program", - base_url: "https://www.extraviados.cl", - }, - feed_url: "https://www.extraviados.cl/feed/", - feed_urls: [ - "https://www.extraviados.cl/feed/", - "https://www.extraviados.cl/category/casos-vigentes/feed/", - ], - category: "missing_person", - region_tag: "CL", - lat: -33.43, - lng: -70.65, - reporting: { - label: "Report Missing Person in Chile", - url: "https://www.extraviados.cl/", - notes: "Coordinate with local police for urgent leads.", - }, - }, - - // ── FBI Seeking Information (US / North America) ──────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "fbi-seeking", - authority_name: "FBI Seeking Info", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "police", - base_url: "https://www.fbi.gov", - }, - feed_url: "https://www.fbi.gov/feeds/seeking-information/rss.xml", - category: "public_appeal", - region_tag: "US", - lat: 38.91, - lng: -77.01, - reporting: { - label: "Submit a Tip to FBI", - url: "https://tips.fbi.gov/", - phone: "1-800-CALL-FBI (1-800-225-5324)", - notes: "The FBI is seeking the public's assistance. If you have information, submit a tip.", - }, - }, - - // ── FBI Most Wanted (US / North America) ──────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "fbi-mostwanted", - authority_name: "FBI Most Wanted", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "police", - base_url: "https://www.fbi.gov", - }, - feed_url: "https://www.fbi.gov/feeds/fbi-most-wanted/rss.xml", - category: "wanted_suspect", - region_tag: "US", - lat: 38.89, - lng: -77.02, - reporting: { - label: "Report Sighting to FBI", - url: "https://tips.fbi.gov/", - phone: "1-800-CALL-FBI (1-800-225-5324)", - notes: "Do NOT attempt to apprehend. Call 911 immediately if in danger.", - }, - }, - - // ── Action Fraud UK (Europe) ──────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "actionfraud-uk", - authority_name: "Action Fraud UK", - country: "United Kingdom", - country_code: "GB", - region: "Europe", - authority_type: "police", - base_url: "https://www.actionfraud.police.uk", - }, - feed_url: "https://www.actionfraud.police.uk/rss", - category: "fraud_alert", - region_tag: "GB", - lat: 51.50, - lng: -0.12, - reporting: { - label: "Report Fraud to Action Fraud", - url: "https://www.actionfraud.police.uk/reporting-fraud-and-cyber-crime", - phone: "0300 123 2040", - }, - }, - - // ── CNA Singapore Crime (Asia) ────────────────────────────────── - { - type: "rss", - source: { - source_id: "cna-sg-crime", - authority_name: "CNA Singapore Crime", - country: "Singapore", - country_code: "SG", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.channelnewsasia.com", - }, - feed_url: "https://www.channelnewsasia.com/api/v1/rss-outbound-feed?_format=xml&category=6511", - category: "public_safety", - region_tag: "SG", - lat: 1.35, - lng: 103.82, - reporting: { - label: "Report Crime in Singapore", - url: "https://eservices.police.gov.sg/content/policehubhome/homepage/police-report.html", - phone: "999 (Emergency) / 1800-255-0000 (Police Hotline)", - }, - }, - - // ── Yonhap News Korea (Asia) ──────────────────────────────────── - { - type: "rss", - source: { - source_id: "yonhap-kr", - authority_name: "Yonhap News Korea", - country: "South Korea", - country_code: "KR", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://en.yna.co.kr", - }, - feed_url: "https://en.yna.co.kr/RSS/news.xml", - category: "public_safety", - region_tag: "KR", - lat: 37.57, - lng: 126.98, - reporting: { - label: "Report Crime in South Korea", - url: "https://www.police.go.kr/eng/index.do", - phone: "112 (Emergency)", - }, - }, - - // ── NHK Japan News (Asia) ────────────────────────────────────── - // In Japanese - auto-translated to English - { - type: "rss", - source: { - source_id: "nhk-jp", - authority_name: "NHK Japan", - country: "Japan", - country_code: "JP", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www3.nhk.or.jp", - }, - feed_url: "https://www3.nhk.or.jp/rss/news/cat1.xml", - category: "public_safety", - region_tag: "JP", - lat: 35.67, - lng: 139.71, - reporting: { - label: "Report to Japan Police", - url: "https://www.npa.go.jp/english/index.html", - phone: "110 (Emergency)", - }, - }, - - // ── SCMP Hong Kong (Asia) ────────────────────────────────────── - { - type: "rss", - source: { - source_id: "scmp-hk", - authority_name: "SCMP Hong Kong", - country: "Hong Kong", - country_code: "HK", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.scmp.com", - }, - feed_url: "https://www.scmp.com/rss/5/feed", - followRedirects: true, - category: "public_safety", - region_tag: "HK", - lat: 22.28, - lng: 114.16, - reporting: { - label: "Report Crime in Hong Kong", - url: "https://www.police.gov.hk/ppp_en/contact_us.html", - phone: "999 (Emergency)", - }, - }, - - // ── Straits Times Singapore (Asia) ────────────────────────────── - { - type: "rss", - source: { - source_id: "straitstimes-sg", - authority_name: "Straits Times Singapore", - country: "Singapore", - country_code: "SG", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.straitstimes.com", - }, - feed_url: "https://www.straitstimes.com/news/singapore/rss.xml", - category: "public_safety", - region_tag: "SG", - lat: 1.30, - lng: 103.84, - reporting: { - label: "Report Crime in Singapore", - url: "https://eservices.police.gov.sg/content/policehubhome/homepage/police-report.html", - phone: "999 (Emergency)", - }, - }, - - // ── Philippine National Police (Asia) ─────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "pnp-ph", - authority_name: "PNP Philippines", - country: "Philippines", - country_code: "PH", - region: "Asia", - authority_type: "police", - base_url: "https://www.pnp.gov.ph", - }, - feed_url: "https://www.pnp.gov.ph/rss", - feed_urls: [ - "https://www.pnp.gov.ph/rss", - "https://www.pnp.gov.ph/feed/", - "https://www.pnp.gov.ph/category/press-release/feed/", - ], - category: "public_appeal", - region_tag: "PH", - lat: 14.60, - lng: 120.98, - reporting: { - label: "Report to PNP", - url: "https://www.pnp.gov.ph/", - phone: "117 (Emergency) / 8722-0650", - }, - }, - - // ── Royal Malaysia Police (Asia) ──────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "pdrm-my", - authority_name: "PDRM Malaysia", - country: "Malaysia", - country_code: "MY", - region: "Asia", - authority_type: "police", - base_url: "https://www.pdrm.gov.my", - }, - feed_url: "https://www.pdrm.gov.my/rss", - feed_urls: [ - "https://www.pdrm.gov.my/rss", - "https://www.rmp.gov.my/rss", - "https://www.rmp.gov.my/feed/", - ], - category: "public_appeal", - region_tag: "MY", - lat: 3.14, - lng: 101.69, - reporting: { - label: "Report to PDRM", - url: "https://semakonline.rmp.gov.my/", - phone: "999 (Emergency)", - }, - }, - - // ── Trinidad & Tobago Police (Caribbean) ──────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "ttps", - authority_name: "Trinidad & Tobago Police", - country: "Trinidad and Tobago", - country_code: "TT", - region: "Caribbean", - authority_type: "police", - base_url: "https://www.ttps.gov.tt", - }, - feed_url: "https://www.ttps.gov.tt/rss", - category: "public_appeal", - region_tag: "TT", - lat: 10.65, - lng: -61.50, - reporting: { - label: "Report to TTPS", - url: "https://www.ttps.gov.tt/", - phone: "999 (Emergency) / 555 (Crime Stoppers)", - }, - }, - - // ── Jamaica Constabulary Force (Caribbean) ────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "jcf-jm", - authority_name: "JCF Jamaica", - country: "Jamaica", - country_code: "JM", - region: "Caribbean", - authority_type: "police", - base_url: "https://www.jcf.gov.jm", - }, - feed_url: "https://www.jcf.gov.jm/rss", - category: "public_appeal", - region_tag: "JM", - lat: 18.00, - lng: -76.79, - reporting: { - label: "Report to JCF", - url: "https://www.jcf.gov.jm/", - phone: "119 (Emergency) / 311 (Crime Stop)", - }, - }, - - // ── Mexico FGR / Fiscalía (North America) ────────────────────── - { - type: "html-list", - followRedirects: true, - source: { - source_id: "fgr-mx", - authority_name: "FGR Mexico", - country: "Mexico", - country_code: "MX", - region: "North America", - authority_type: "police", - base_url: "https://www.gob.mx/fgr", - }, - feed_url: "https://www.gob.mx/fgr/archivo/prensa", - feed_urls: [ - "https://www.gob.mx/fgr/archivo/prensa", - "https://www.gob.mx/fgr/es/archivo/prensa", - "https://www.gob.mx/fgr", - ], - include_keywords: [ - "desaparec", - "se busca", - "ficha", - "recompensa", - "secuestro", - "privación de la libertad", - "denuncia", - "información", - "investigación", - "captura", - "homicidio", - "víctima", - "feminicidio", - "trata", - "delincuencia", - "cártel", - ], - exclude_keywords: ["agenda", "discurso", "evento", "licitación", "transparencia"], - category: "public_appeal", - region_tag: "MX", - lat: 19.43, - lng: -99.13, - reporting: { - label: "Report to FGR Mexico", - url: "https://www.gob.mx/fgr", - phone: "800-008-5400", - notes: "Denuncia anónima / Anonymous tip line.", - }, - }, - - // ── Mexico AMBER Alert (North America) ────────────────────────── - { - type: "html-list", - followRedirects: true, - source: { - source_id: "amber-mx", - authority_name: "AMBER Alert Mexico", - country: "Mexico", - country_code: "MX", - region: "North America", - authority_type: "public_safety_program", - base_url: "https://www.gob.mx/amber", - }, - feed_url: "https://www.gob.mx/amber/archivo/acciones_y_programas", - feed_urls: [ - "https://www.gob.mx/amber/archivo/acciones_y_programas", - "https://www.gob.mx/amber/es/archivo/acciones_y_programas", - "https://www.gob.mx/amber", - ], - include_keywords: [ - "alerta amber", - "desaparec", - "no localizado", - "se busca", - "ficha", - "menor", - "niña", - "niño", - "adolescente", - "auxilio", - "información", - ], - exclude_keywords: ["evento", "campaña", "conferencia", "manual", "material"], - category: "missing_person", - region_tag: "MX", - lat: 19.44, - lng: -99.14, - reporting: { - label: "Report Missing Child Mexico", - url: "https://www.gob.mx/amber", - phone: "800-008-5400", - notes: "Alerta AMBER México", - }, - }, - - // ── Canada Missing Children (North America) ──────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "missing-ca", - authority_name: "Canada Missing Children", - country: "Canada", - country_code: "CA", - region: "North America", - authority_type: "public_safety_program", - base_url: "https://www.canadasmissing.ca", - }, - feed_url: "https://www.canadasmissing.ca/rss/index-eng.xml", - category: "missing_person", - region_tag: "CA", - lat: 45.43, - lng: -75.68, - reporting: { - label: "Report Missing Person Canada", - url: "https://www.canadasmissing.ca/index-eng.htm", - phone: "1-866-KID-TIPS (1-866-543-8477)", - notes: "Canadian Centre for Child Protection", - }, - }, - - // ── Korea Police (Asia) ───────────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "knpa-kr", - authority_name: "Korea National Police", - country: "South Korea", - country_code: "KR", - region: "Asia", - authority_type: "police", - base_url: "https://www.police.go.kr", - }, - feed_url: "https://www.police.go.kr/eng/portal/rss/rss.do", - category: "public_safety", - region_tag: "KR", - lat: 37.58, - lng: 126.97, - reporting: { - label: "Report to Korean Police", - url: "https://www.police.go.kr/eng/index.do", - phone: "112 (Emergency)", - }, - }, - - // ── Thai CERT (Asia) ──────────────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "thaicert", - authority_name: "ThaiCERT", - country: "Thailand", - country_code: "TH", - region: "Asia", - authority_type: "cert", - base_url: "https://www.thaicert.or.th", - }, - feed_url: "https://www.thaicert.or.th/RSS/feed-en.xml", - feed_urls: [ - "https://www.thaicert.or.th/RSS/feed-en.xml", - "https://www.thaicert.or.th/feed/", - ], - category: "cyber_advisory", - region_tag: "TH", - lat: 13.76, - lng: 100.50, - reporting: { - label: "Report to ThaiCERT", - url: "https://www.thaicert.or.th/", - email: "op@thaicert.or.th", - }, - }, - - // ── MyCERT Malaysia (Asia) ────────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "mycert-my", - authority_name: "MyCERT Malaysia", - country: "Malaysia", - country_code: "MY", - region: "Asia", - authority_type: "cert", - base_url: "https://www.mycert.org.my", - }, - feed_url: "https://www.mycert.org.my/portal/rss", - feed_urls: [ - "https://www.mycert.org.my/portal/rss", - "https://www.mycert.org.my/feed", - ], - category: "cyber_advisory", - region_tag: "MY", - lat: 3.15, - lng: 101.70, - reporting: { - label: "Report to MyCERT", - url: "https://www.mycert.org.my/portal/report-incident", - email: "mycert@cybersecurity.my", - }, - }, - - // ── BSSN Indonesia (Asia) ────────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "bssn-id", - authority_name: "BSSN Indonesia", - country: "Indonesia", - country_code: "ID", - region: "Asia", - authority_type: "cert", - base_url: "https://bssn.go.id", - }, - feed_url: "https://bssn.go.id/feed/", - feed_urls: [ - "https://bssn.go.id/feed/", - "https://bssn.go.id/category/peringatan-keamanan/feed/", - ], - category: "cyber_advisory", - region_tag: "ID", - lat: -6.20, - lng: 106.82, - reporting: { - label: "Report to BSSN", - url: "https://bssn.go.id/", - notes: "Use official BSSN contact channels for incident reporting.", - }, - }, - - // ── PRIVATE SECTOR: BleepingComputer (Global) ────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "bleepingcomputer", - authority_name: "BleepingComputer", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "private_sector", - base_url: "https://www.bleepingcomputer.com", - }, - feed_url: "https://www.bleepingcomputer.com/feed/", - category: "private_sector", - region_tag: "US", - lat: 40.71, - lng: -74.01, - reporting: { - label: "Read Full Report", - url: "https://www.bleepingcomputer.com", - notes: "Private-sector cybersecurity news. Report incidents to relevant authorities.", - }, - }, - - // ── PRIVATE SECTOR: Krebs on Security (Global) ──────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "krebsonsecurity", - authority_name: "Krebs on Security", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "private_sector", - base_url: "https://krebsonsecurity.com", - }, - feed_url: "https://krebsonsecurity.com/feed/", - category: "private_sector", - region_tag: "US", - lat: 38.90, - lng: -77.04, - reporting: { - label: "Read Full Report", - url: "https://krebsonsecurity.com", - notes: "Investigative cybersecurity journalism by Brian Krebs.", - }, - }, - - // ── PRIVATE SECTOR: The Hacker News (Global) ────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "thehackernews", - authority_name: "The Hacker News", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "private_sector", - base_url: "https://thehackernews.com", - }, - feed_url: "https://feeds.feedburner.com/TheHackersNews", - category: "private_sector", - region_tag: "US", - lat: 37.39, - lng: -122.08, - reporting: { - label: "Read Full Report", - url: "https://thehackernews.com", - notes: "Cybersecurity news and analysis.", - }, - }, - - // ── PRIVATE SECTOR: DataBreaches.net (Global) ───────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "databreaches-net", - authority_name: "DataBreaches.net", - country: "United States", - country_code: "US", - region: "North America", - authority_type: "private_sector", - base_url: "https://databreaches.net", - }, - feed_url: "https://databreaches.net/feed/", - category: "private_sector", - region_tag: "US", - lat: 39.83, - lng: -98.58, - reporting: { - label: "Read Full Report", - url: "https://databreaches.net", - notes: "Data breach tracking and reporting.", - }, - }, - - // ═══════════════════════════════════════════════════════════════════ - // EXPANDED COVERAGE — sources that openly ask for public help - // ═══════════════════════════════════════════════════════════════════ - - // ── Canada: Vancouver Police (North America) ────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "vpd-ca", - authority_name: "Vancouver Police Department", - country: "Canada", - country_code: "CA", - region: "North America", - authority_type: "police", - base_url: "https://vpd.ca", - }, - feed_url: "https://vpd.ca/feed/", - category: "public_appeal", - region_tag: "CA", - lat: 49.2827, - lng: -123.1207, - reporting: { - label: "Submit a Tip to VPD", - url: "https://vpd.ca/report-a-crime/", - phone: "604-717-3321 (Non-Emergency)", - notes: "911 for emergencies.", - }, - }, - - // ── Canada: Calgary Police Newsroom (North America) ─────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "calgary-police-ca", - authority_name: "Calgary Police Service", - country: "Canada", - country_code: "CA", - region: "North America", - authority_type: "police", - base_url: "https://newsroom.calgary.ca", - }, - feed_url: "https://newsroom.calgary.ca/feed/", - category: "public_appeal", - region_tag: "CA", - lat: 51.0447, - lng: -114.0719, - reporting: { - label: "Submit a Tip to Calgary Police", - url: "https://www.calgarypolice.ca/contact-us", - phone: "403-266-1234 (Non-Emergency)", - notes: "911 for emergencies.", - }, - }, - - // ── Canada: CCCS Cyber Alerts API (North America) ───────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "cccs-ca-api", - authority_name: "Canadian Centre for Cyber Security (Alerts)", - country: "Canada", - country_code: "CA", - region: "North America", - authority_type: "cert", - base_url: "https://www.cyber.gc.ca", - }, - feed_url: "https://www.cyber.gc.ca/api/cccs/rss/v1/get?feed=alerts_advisories&lang=en", - category: "cyber_advisory", - region_tag: "CA", - lat: 45.4215, - lng: -75.6972, - reporting: { - label: "Report a Cyber Incident", - url: "https://www.cyber.gc.ca/en/incident-management", - phone: "1-833-CYBER-88", - }, - }, - - // ── Canada: CBC News (North America) ────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "cbc-canada", - authority_name: "CBC Canada News", - country: "Canada", - country_code: "CA", - region: "North America", - authority_type: "public_safety_program", - base_url: "https://www.cbc.ca", - }, - feed_url: "https://www.cbc.ca/webfeed/rss/rss-canada", - category: "public_safety", - region_tag: "CA", - lat: 43.6532, - lng: -79.3832, - reporting: { - label: "CBC News Tips", - url: "https://www.cbc.ca/news/tips", - }, - }, - - // ── Canada: Global News (North America) ─────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "globalnews-ca", - authority_name: "Global News Canada", - country: "Canada", - country_code: "CA", - region: "North America", - authority_type: "public_safety_program", - base_url: "https://globalnews.ca", - }, - feed_url: "https://globalnews.ca/feed/", - category: "public_safety", - region_tag: "CA", - lat: 45.5017, - lng: -73.5673, - reporting: { - label: "Global News Tips", - url: "https://globalnews.ca/pages/contact-us/", - }, - }, - - // ── Turkey: USOM / TR-CERT Cyber Alerts (Asia) ─────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "usom-tr", - authority_name: "TR-CERT / USOM", - country: "Turkey", - country_code: "TR", - region: "Asia", - authority_type: "cert", - base_url: "https://www.usom.gov.tr", - }, - feed_url: "https://www.usom.gov.tr/rss/tehdit.rss", - category: "cyber_advisory", - region_tag: "TR", - lat: 39.9334, - lng: 32.8597, - reporting: { - label: "Report Cyber Incident to USOM", - url: "https://www.usom.gov.tr/bildirim", - }, - }, - - // ── Israel: Times of Israel (Asia) ──────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "timesofisrael-il", - authority_name: "Times of Israel", - country: "Israel", - country_code: "IL", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.timesofisrael.com", - }, - feed_url: "https://www.timesofisrael.com/feed/", - category: "public_safety", - region_tag: "IL", - lat: 31.7683, - lng: 35.2137, - reporting: { - label: "Israel Police Tips", - url: "https://www.police.gov.il/en", - phone: "100 (Israel Police)", - }, - }, - - // ── Middle East Eye (Asia) ──────────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "middleeasteye", - authority_name: "Middle East Eye", - country: "Qatar", - country_code: "QA", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.middleeasteye.net", - }, - feed_url: "https://www.middleeasteye.net/rss", - category: "public_safety", - region_tag: "ME", - lat: 25.2854, - lng: 51.531, - reporting: { - label: "Middle East Eye Tips", - url: "https://www.middleeasteye.net/contact", - }, - }, - - // ── Turkey: Daily Sabah (Asia) ──────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "dailysabah-tr", - authority_name: "Daily Sabah Turkey", - country: "Turkey", - country_code: "TR", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.dailysabah.com", - }, - feed_url: "https://www.dailysabah.com/rssFeed/turkey", - category: "public_safety", - region_tag: "TR", - lat: 41.0082, - lng: 28.9784, - reporting: { - label: "Daily Sabah Contact", - url: "https://www.dailysabah.com/contact", - }, - }, - - // ── China: Global Times (Asia) ──────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "globaltimes-cn", - authority_name: "Global Times China", - country: "China", - country_code: "CN", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.globaltimes.cn", - }, - feed_url: "https://www.globaltimes.cn/rss/outbrain.xml", - category: "public_safety", - region_tag: "CN", - lat: 39.9042, - lng: 116.4074, - reporting: { - label: "Global Times Contact", - url: "https://www.globaltimes.cn/about-us/contact-us.html", - }, - }, - - // ── India: India Today Crime (Asia) ─────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "indiatoday-crime", - authority_name: "India Today Crime", - country: "India", - country_code: "IN", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.indiatoday.in", - }, - feed_url: "https://www.indiatoday.in/rss/1786661", - category: "public_safety", - region_tag: "IN", - lat: 28.6139, - lng: 77.209, - reporting: { - label: "India Crime Tips", - url: "https://cybercrime.gov.in/", - phone: "112 (India Emergency)", - }, - }, - - // ── India: NDTV India News (Asia) ───────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "ndtv-in", - authority_name: "NDTV India News", - country: "India", - country_code: "IN", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.ndtv.com", - }, - feed_url: "https://feeds.feedburner.com/ndtvnews-india-news", - category: "public_safety", - region_tag: "IN", - lat: 19.076, - lng: 72.8777, - reporting: { - label: "NDTV News Tips", - url: "https://www.ndtv.com/page/contact-us", - phone: "112 (India Emergency)", - }, - }, - - // ── India: Hindustan Times (Asia) ───────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "hindustantimes-in", - authority_name: "Hindustan Times India", - country: "India", - country_code: "IN", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.hindustantimes.com", - }, - feed_url: "https://www.hindustantimes.com/feeds/rss/india-news/rssfeed.xml", - category: "public_safety", - region_tag: "IN", - lat: 12.9716, - lng: 77.5946, - reporting: { - label: "Hindustan Times Tips", - url: "https://www.hindustantimes.com/contact-us", - phone: "112 (India Emergency)", - }, - }, - - // ── Vietnam: VnExpress International (Asia) ─────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "vnexpress-vn", - authority_name: "VnExpress International", - country: "Vietnam", - country_code: "VN", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://e.vnexpress.net", - }, - feed_url: "https://e.vnexpress.net/rss/news.rss", - category: "public_safety", - region_tag: "VN", - lat: 21.0278, - lng: 105.8342, - reporting: { - label: "Vietnam Police Tips", - url: "https://congan.com.vn/", - phone: "113 (Vietnam Police)", - }, - }, - - // ── Laos: Laotian Times (Asia) ──────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "laotiantimes-la", - authority_name: "Laotian Times", - country: "Laos", - country_code: "LA", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://laotiantimes.com", - }, - feed_url: "https://laotiantimes.com/feed/", - category: "public_safety", - region_tag: "LA", - lat: 17.9757, - lng: 102.6331, - reporting: { - label: "Laotian Times Contact", - url: "https://laotiantimes.com/contact/", - }, - }, - - // ── Thailand: Bangkok Post (Asia) ───────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "bangkokpost-th", - authority_name: "Bangkok Post", - country: "Thailand", - country_code: "TH", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.bangkokpost.com", - }, - feed_url: "https://www.bangkokpost.com/rss/data/topstories.xml", - category: "public_safety", - region_tag: "TH", - lat: 13.7563, - lng: 100.5018, - reporting: { - label: "Thailand Police Tips", - url: "https://www.royalthaipolice.go.th/", - phone: "191 (Thailand Police)", - }, - }, - - // ── Philippines: Rappler (Asia) ─────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "rappler-ph", - authority_name: "Rappler Philippines", - country: "Philippines", - country_code: "PH", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://www.rappler.com", - }, - feed_url: "https://www.rappler.com/feed/", - category: "public_safety", - region_tag: "PH", - lat: 14.5995, - lng: 120.9842, - reporting: { - label: "PNP Philippines Tips", - url: "https://www.pnp.gov.ph/", - phone: "117 (PH Emergency)", - }, - }, - - // ── Indonesia: Tempo English (Asia) ─────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "tempo-id", - authority_name: "Tempo Indonesia", - country: "Indonesia", - country_code: "ID", - region: "Asia", - authority_type: "public_safety_program", - base_url: "https://en.tempo.co", - }, - feed_url: "https://rss.tempo.co/en/", - category: "public_safety", - region_tag: "ID", - lat: -6.2088, - lng: 106.8456, - reporting: { - label: "Indonesia Police Tips", - url: "https://www.polri.go.id/", - phone: "110 (Indonesia Police)", - }, - }, - - // ── Papua New Guinea: Post-Courier (Oceania) ───────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "postcourier-pg", - authority_name: "Post-Courier PNG", - country: "Papua New Guinea", - country_code: "PG", - region: "Oceania", - authority_type: "public_safety_program", - base_url: "https://www.postcourier.com.pg", - }, - feed_url: "https://www.postcourier.com.pg/feed/", - category: "public_safety", - region_tag: "PG", - lat: -6.3149, - lng: 147.1802, - reporting: { - label: "PNG Police", - url: "https://www.rpngc.gov.pg/", - phone: "000 (PNG Emergency)", - }, - }, - - // ── Fiji: Fiji Times (Oceania) ──────────────────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "fijitimes-fj", - authority_name: "Fiji Times", - country: "Fiji", - country_code: "FJ", - region: "Oceania", - authority_type: "public_safety_program", - base_url: "https://www.fijitimes.com", - }, - feed_url: "https://www.fijitimes.com/feed/", - category: "public_safety", - region_tag: "FJ", - lat: -18.1416, - lng: 178.4419, - reporting: { - label: "Fiji Police", - url: "https://www.police.gov.fj/", - phone: "917 (Fiji Police)", - }, - }, - - // ── Pacific Islands: RNZ Pacific (Oceania) ──────────────────────── - { - type: "rss", - followRedirects: true, - source: { - source_id: "rnz-pacific", - authority_name: "RNZ Pacific", - country: "New Zealand", - country_code: "NZ", - region: "Oceania", - authority_type: "public_safety_program", - base_url: "https://www.rnz.co.nz", - }, - feed_url: "https://www.rnz.co.nz/rss/pacific.xml", - category: "public_safety", - region_tag: "NZ", - lat: -15.3767, - lng: 166.9592, - reporting: { - label: "RNZ Pacific Contact", - url: "https://www.rnz.co.nz/about/contact", - }, - }, -]; - -function decodeXml(value) { - if (!value) return ""; - return value - .replace(//g, "") - .replace(/&/g, "&") - .replace(/</g, "<") - .replace(/>/g, ">") - .replace(/"/g, '"') - .replace(/'/g, "'") - .trim(); -} - -function getTag(block, tag) { - const regex = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, "i"); - const match = block.match(regex); - return match ? decodeXml(match[1]) : ""; -} - -function getAtomLink(block) { - const alternate = block.match(/]*rel=["']alternate["'][^>]*>/i); - const linkTag = alternate?.[0] ?? block.match(/]*>/i)?.[0]; - if (!linkTag) return ""; - const hrefMatch = linkTag.match(/href=["']([^"']+)["']/i); - return hrefMatch ? decodeXml(hrefMatch[1]) : ""; -} - -function getTagValues(block, tag) { - const regex = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, "gi"); - return [...block.matchAll(regex)] - .map((match) => decodeXml(match[1])) - .filter(Boolean); -} - -function getAuthor(block) { - const atomAuthor = block.match( - /]*>[\s\S]*?]*>([\s\S]*?)<\/name>[\s\S]*?<\/author>/i - ); - if (atomAuthor?.[1]) { - return decodeXml(atomAuthor[1]); - } - return getTag(block, "author") || getTag(block, "dc:creator") || getTag(block, "creator"); -} - -function getSummary(block) { - return ( - getTag(block, "description") || - getTag(block, "summary") || - getTag(block, "content") || - getTag(block, "content:encoded") - ); -} - -function getCategories(block) { - const rssCategories = getTagValues(block, "category"); - const atomCategories = [...block.matchAll(/]*term=["']([^"']+)["'][^>]*\/?>/gi)] - .map((match) => decodeXml(match[1])) - .filter(Boolean); - return [...rssCategories, ...atomCategories]; -} - -function parseItems(xml) { - if (xml.includes("/gi)].map((m) => m[0]); - return entries.map((entry) => ({ - title: getTag(entry, "title"), - link: getAtomLink(entry), - published: getTag(entry, "published") || getTag(entry, "updated"), - author: getAuthor(entry), - summary: getSummary(entry), - tags: getCategories(entry), - })); - } - - const items = [...xml.matchAll(//gi)].map((m) => m[0]); - return items.map((item) => ({ - title: getTag(item, "title"), - link: getTag(item, "link") || getTag(item, "guid"), - published: getTag(item, "pubDate") || getTag(item, "dc:date"), - author: getAuthor(item), - summary: getSummary(item), - tags: getCategories(item), - })); -} - -const NEWS_MEDIA_SOURCE_IDS = new Set([ - "cna-sg-crime", - "yonhap-kr", - "nhk-jp", - "scmp-hk", - "jamaica-observer", - "straitstimes-sg", -]); - -const NEWS_MEDIA_DOMAINS = [ - "channelnewsasia.com", - "yna.co.kr", - "nhk.or.jp", - "scmp.com", - "jamaicaobserver.com", - "straitstimes.com", -]; - -const TECHNICAL_SIGNAL_PATTERNS = [ - /\bcve-\d{4}-\d{4,7}\b/i, - /\b(?:ioc|iocs|indicator(?:s)? of compromise)\b/i, - /\b(?:tactic|technique|ttp|mitre)\b/i, - /\b(?:hash|sha-?256|sha-?1|md5|yara|sigma)\b/i, - /\b(?:ip(?:v4|v6)?|domain|url|hostname|command and control|c2)\b/i, - /\b(?:vulnerability|exploit(?:ation)?|zero-?day|patch|mitigation|workaround)\b/i, -]; - -const INCIDENT_DISCLOSURE_PATTERNS = [ - /\b(?:breach|data leak|compromis(?:e|ed)|intrusion|unauthori[sz]ed access)\b/i, - /\b(?:ransomware|malware|botnet|ddos|phishing|credential theft)\b/i, - /\b(?:attack|attacked|target(?:ed|ing)|incident response|security incident)\b/i, - /\b(?:arrest(?:ed)?|charged|indicted|wanted|fugitive|missing person|kidnapp(?:ed|ing)|homicide)\b/i, -]; - -const ACTIONABLE_PATTERNS = [ - /\b(?:report|submit (?:a )?tip|contact|hotline|phone|email)\b/i, - /\b(?:apply update|upgrade|disable|block|monitor|detect|investigate)\b/i, - /\b(?:advisory|alert|warning|incident notice|public appeal)\b/i, -]; - -const NARRATIVE_NEWS_PATTERNS = [ - /\b(?:opinion|editorial|commentary|analysis|explainer|podcast|interview)\b/i, - /\b(?:what we know|live updates|behind the scenes|feature story)\b/i, - /\b(?:market reaction|share price|investor)\b/i, -]; - -const GENERAL_NEWS_PATTERNS = [ - /\b(?:announces?|launche[sd]?|conference|summit|webinar|event|awareness month)\b/i, - /\b(?:ceremony|speech|statement|newsletter|weekly roundup)\b/i, - /\b(?:partnership|memorandum|mou|initiative|campaign)\b/i, -]; - -const SECURITY_CONTEXT_PATTERNS = [ - /\b(?:cyber|cybersecurity|infosec|information security|it security)\b/i, - /\b(?:security posture|security controls?|threat intelligence)\b/i, - /\b(?:vulnerability|exploit|patch|advisory|defend|defensive)\b/i, - /\b(?:soc|siem|incident response|malware analysis)\b/i, -]; - -const ASSISTANCE_REQUEST_PATTERNS = [ - /\b(?:report(?:\s+a)?(?:\s+crime)?|submit (?:a )?tip|tip[-\s]?off)\b/i, - /\b(?:contact (?:police|authorities|law enforcement)|hotline|helpline)\b/i, - /\b(?:if you have information|seeking information|appeal for help)\b/i, - /\b(?:missing|wanted|fugitive|amber alert)\b/i, -]; - -const IMPACT_SPECIFICITY_PATTERNS = [ - /\b(?:affected|impact(?:ed)?|disrupt(?:ed|ion)|outage|service interruption)\b/i, - /\b(?:records|accounts|systems|devices|endpoints|victims|organizations)\b/i, - /\b(?:on\s+\d{1,2}\s+\w+\s+\d{4}|timeline|between\s+\d{1,2}:\d{2})\b/i, - /\b\d{2,}\s+(?:records|users|systems|devices|victims|organizations)\b/i, -]; - -function clamp01(value) { - const numeric = Number.isFinite(value) ? value : 0.42; - return Math.max(0, Math.min(1, numeric)); -} - -function thresholdForAlert(alert, defaultThreshold) { - const category = String(alert?.category ?? "").toLowerCase(); - if (category === "missing_person") { - return clamp01(MISSING_PERSON_RELEVANCE_THRESHOLD); - } - return defaultThreshold; -} - -function extractDomain(urlValue) { - try { - return new URL(String(urlValue)).hostname.toLowerCase(); - } catch { - return ""; - } -} - -function isNewsMediaSource(alert) { - const sourceId = String(alert?.source_id ?? "").toLowerCase(); - if (NEWS_MEDIA_SOURCE_IDS.has(sourceId)) { - return true; - } - const host = extractDomain(alert?.canonical_url); - return NEWS_MEDIA_DOMAINS.some((domain) => host.includes(domain)); -} - -function inferPublicationType(alert, metaHints = {}) { - const authorityType = String(alert?.source?.authority_type ?? "").toLowerCase(); - if (isNewsMediaSource(alert)) return "news_media"; - if (authorityType === "cert") return "cert_advisory"; - if (authorityType === "police") return "law_enforcement"; - if (authorityType === "intelligence" || authorityType === "national_security") { - return "security_bulletin"; - } - if (authorityType === "public_safety_program") return "public_safety_bulletin"; - if (metaHints.feedType === "kev-json" || metaHints.feedType === "interpol-red-json") { - return "structured_incident_feed"; - } - return "official_update"; -} - -function hasAnyPattern(text, patterns) { - return patterns.some((pattern) => pattern.test(text)); -} - -function scoreIncidentRelevance(alert, context = {}) { - const title = String(alert?.title ?? ""); - const summary = String(context.summary ?? ""); - const author = String(context.author ?? ""); - const tags = Array.isArray(context.tags) ? context.tags.map((t) => String(t)) : []; - const text = `${title}\n${summary}\n${author}\n${tags.join(" ")}\n${alert?.canonical_url ?? ""}`.toLowerCase(); - const publicationType = inferPublicationType(alert, context.metaHints ?? {}); - const signals = []; - let score = 0.5; - - const addSignal = (delta, reason) => { - score += delta; - signals.push(`${delta >= 0 ? "+" : ""}${delta.toFixed(2)} ${reason}`); - }; - - if (publicationType === "news_media") { - addSignal(-0.16, "publication type leans general-news"); - } else if (publicationType === "cert_advisory" || publicationType === "structured_incident_feed") { - addSignal(0.08, "source metadata is incident-oriented"); - } else if (publicationType === "law_enforcement") { - addSignal(0.06, "law-enforcement source metadata"); - } - - if (alert.category === "cyber_advisory") addSignal(0.09, "cyber advisory category"); - if (alert.category === "wanted_suspect" || alert.category === "missing_person") { - addSignal(0.09, "law-enforcement incident category"); - } - if ( - alert.category === "humanitarian_tasking" || - alert.category === "conflict_monitoring" || - alert.category === "humanitarian_security" - ) { - addSignal(0.08, "humanitarian incident/tasking category"); - } - if (alert.category === "education_digital_capacity") { - addSignal(0.07, "education and digital capacity category"); - } - if (alert.category === "fraud_alert") addSignal(0.07, "fraud incident category"); - - const hasTechnical = hasAnyPattern(text, TECHNICAL_SIGNAL_PATTERNS); - const hasIncidentDisclosure = hasAnyPattern(text, INCIDENT_DISCLOSURE_PATTERNS); - const hasActionable = hasAnyPattern(text, ACTIONABLE_PATTERNS); - const hasSpecificImpact = hasAnyPattern(text, IMPACT_SPECIFICITY_PATTERNS); - const hasNarrative = hasAnyPattern(text, NARRATIVE_NEWS_PATTERNS); - const hasGeneralNews = hasAnyPattern(text, GENERAL_NEWS_PATTERNS); - const looksLikeBlog = isBlogAlert(alert); - - if (hasTechnical) addSignal(0.16, "technical indicators or tactics present"); - if (hasIncidentDisclosure) addSignal(0.16, "incident/crime disclosure language"); - if (hasActionable) addSignal(0.1, "contains response/reporting actions"); - if (hasSpecificImpact) addSignal(0.08, "specific impact/timeline/system details"); - - if (hasNarrative) addSignal(-0.18, "opinion/commentary phrasing"); - if (hasGeneralNews) addSignal(-0.12, "general institutional/news language"); - if (looksLikeBlog) addSignal(-0.1, "blog-style structure"); - - if (!hasTechnical && !hasIncidentDisclosure && (hasNarrative || hasGeneralNews)) { - addSignal(-0.08, "weak incident evidence relative to narrative cues"); - } - - const freshnessHours = Number(alert?.freshness_hours ?? 0); - if (freshnessHours > 0 && freshnessHours <= 24 && (hasIncidentDisclosure || hasTechnical)) { - addSignal(0.04, "fresh post with potential early-warning signal"); - } - - const threshold = clamp01(INCIDENT_RELEVANCE_THRESHOLD); - const relevance = Number(clamp01(score).toFixed(3)); - const distance = Math.abs(relevance - threshold); - const confidence = - distance >= 0.25 ? "high" : distance >= 0.1 ? "medium" : "low"; - const disposition = relevance >= threshold ? "retained" : "filtered_review"; - - return { - relevance_score: relevance, - threshold, - confidence, - disposition, - publication_type: publicationType, - weak_signals: signals.slice(0, 12), - metadata: { - author: author || undefined, - tags: tags.slice(0, 8), - }, - }; -} - -const BLOG_FILTER_EXEMPT_SOURCES = new Set([ - "bleepingcomputer", - "krebsonsecurity", - "thehackernews", - "databreaches-net", - // News sources that amplify calls for help - "cbc-canada", - "globalnews-ca", - "timesofisrael-il", - "middleeasteye", - "dailysabah-tr", - "globaltimes-cn", - "indiatoday-crime", - "ndtv-in", - "hindustantimes-in", - "vnexpress-vn", - "laotiantimes-la", - "bangkokpost-th", - "rappler-ph", - "tempo-id", - "postcourier-pg", - "fijitimes-fj", - "rnz-pacific", -]); - -function isBlogContent(item, sourceId) { - if (sourceId && BLOG_FILTER_EXEMPT_SOURCES.has(sourceId)) return false; - const title = String(item?.title ?? "").toLowerCase(); - const link = String(item?.link ?? "").toLowerCase(); - if (/\bblog\b/.test(title)) return true; - if (/\/blog(s)?(\/|$)/.test(link)) return true; - if (link.includes("medium.com")) return true; - if (link.includes("wordpress.com")) return true; - return false; -} - -function isBlogAlert(alert) { - if (BLOG_FILTER_EXEMPT_SOURCES.has(alert?.source_id)) return false; - const title = String(alert?.title ?? "").toLowerCase(); - const link = String(alert?.canonical_url ?? "").toLowerCase(); - if (/\bblog\b/.test(title)) return true; - if (/\/blog(s)?(\/|$)/.test(link)) return true; - if (link.includes("medium.com")) return true; - if (link.includes("wordpress.com")) return true; - return false; -} - -function isInformational(title) { - const t = title.toLowerCase(); - const keywords = [ - "traffic", - "road", - "highway", - "motorway", - "lane", - "closure", - "closed", - "detour", - "accident", - "crash", - "collision", - "vehicle", - "multi-vehicle", - "rollover", - "roadworks", - "road work", - ]; - return keywords.some((word) => t.includes(word)); -} - -function isSecurityInformationalNews(alert, context = {}) { - const title = String(alert?.title ?? ""); - const summary = String(context.summary ?? ""); - const author = String(context.author ?? ""); - const tags = Array.isArray(context.tags) ? context.tags.map((t) => String(t)) : []; - const text = `${title}\n${summary}\n${author}\n${tags.join(" ")}\n${alert?.canonical_url ?? ""}`.toLowerCase(); - const publicationType = inferPublicationType(alert, context.metaHints ?? {}); - const authorityType = String(alert?.source?.authority_type ?? "").toLowerCase(); - - const hasSecurityContext = hasAnyPattern(text, SECURITY_CONTEXT_PATTERNS); - const hasIncidentOrCrime = hasAnyPattern(text, INCIDENT_DISCLOSURE_PATTERNS); - const hasHelpRequest = hasAnyPattern(text, ASSISTANCE_REQUEST_PATTERNS); - const hasGeneralNews = hasAnyPattern(text, GENERAL_NEWS_PATTERNS); - const hasNarrative = hasAnyPattern(text, NARRATIVE_NEWS_PATTERNS); - const hasImpactSpecifics = hasAnyPattern(text, IMPACT_SPECIFICITY_PATTERNS); - - const sourceIsSecurityRelevant = - alert?.category === "cyber_advisory" || - alert?.category === "private_sector" || - publicationType === "cert_advisory" || - authorityType === "cert" || - authorityType === "private_sector" || - authorityType === "regulatory"; - - return ( - sourceIsSecurityRelevant && - hasSecurityContext && - !hasIncidentOrCrime && - !hasHelpRequest && - !hasImpactSpecifics && - (hasGeneralNews || hasNarrative || publicationType === "news_media") - ); -} - -function normalizeInformationalSecurityAlert(alert, context = {}) { - if (!isSecurityInformationalNews(alert, context)) return alert; - const baseThreshold = clamp01(INCIDENT_RELEVANCE_THRESHOLD); - const currentScore = Number(alert?.triage?.relevance_score ?? 0); - const nextScore = Math.max(currentScore, baseThreshold); - return { - ...alert, - category: "informational", - severity: "info", - triage: { - ...(alert?.triage ?? {}), - relevance_score: Number(nextScore.toFixed(3)), - threshold: baseThreshold, - confidence: "medium", - disposition: "retained", - weak_signals: [ - "reclassified as informational security/cybersecurity update", - ...((alert?.triage?.weak_signals ?? []).slice(0, 10)), - ], - }, - }; -} - -function inferSeverity(title, fallback) { - const t = title.toLowerCase(); - if (isInformational(t)) return "info"; - // Explicit severity keywords - if (t.includes("critical") || t.includes("emergency") || t.includes("zero-day") || t.includes("0-day")) return "critical"; - if (t.includes("ransomware") || t.includes("actively exploited") || t.includes("exploitation")) return "critical"; - if (t.includes("breach") || t.includes("data leak") || t.includes("crypto heist") || t.includes("million stolen")) return "critical"; - if (t.includes("hack") || t.includes("compromise") || t.includes("vulnerability")) return "high"; - if (t.includes("high") || t.includes("severe") || t.includes("urgent")) return "high"; - if (t.includes("wanted") || t.includes("fugitive") || t.includes("murder") || t.includes("homicide")) return "critical"; - if (t.includes("missing") || t.includes("amber alert") || t.includes("kidnap")) return "critical"; - if (t.includes("fatal") || t.includes("death") || t.includes("shooting")) return "high"; - if (t.includes("fraud") || t.includes("scam") || t.includes("phishing")) return "high"; - if (t.includes("arrested") || t.includes("charged") || t.includes("sentenced")) return "medium"; - if (t.includes("medium") || t.includes("moderate")) return "medium"; - if (t.includes("low") || t.includes("informational")) return "info"; - return fallback; -} - -function defaultSeverity(category) { - switch (category) { - case "informational": - return "info"; - case "cyber_advisory": - return "high"; - case "wanted_suspect": - return "critical"; - case "missing_person": - return "critical"; - case "public_appeal": - return "high"; - case "humanitarian_tasking": - return "high"; - case "conflict_monitoring": - return "medium"; - case "humanitarian_security": - return "high"; - case "education_digital_capacity": - return "medium"; - case "public_safety": - return "medium"; - case "private_sector": - return "high"; - default: - return "medium"; - } -} - -function parseDate(value) { - if (!value) return null; - const date = new Date(value); - return Number.isNaN(date.getTime()) ? null : date; -} - -function isFresh(date, now) { - const cutoff = now.getTime() - MAX_AGE_DAYS * 86400000; - return date.getTime() >= cutoff; -} - -function hashId(value) { - return crypto.createHash("sha1").update(value).digest("hex").slice(0, 12); -} - -function hashToUnit(value) { - const hex = crypto.createHash("sha1").update(value).digest("hex").slice(0, 8); - return Number.parseInt(hex, 16) / 0xffffffff; -} - -function normalizeHeadline(value) { - return String(value ?? "") - .toLowerCase() - .replace(/[^a-z0-9]+/g, " ") - .trim(); -} - -function compareAlertPreference(a, b) { - const scoreA = Number(a?.triage?.relevance_score ?? 0); - const scoreB = Number(b?.triage?.relevance_score ?? 0); - if (scoreA !== scoreB) return scoreB - scoreA; - const seenA = new Date(a?.first_seen ?? 0).getTime(); - const seenB = new Date(b?.first_seen ?? 0).getTime(); - if (seenA !== seenB) return seenB - seenA; - const urlLenA = String(a?.canonical_url ?? "").length; - const urlLenB = String(b?.canonical_url ?? "").length; - return urlLenA - urlLenB; -} - -function buildVariantCollapseKey(alert) { - const titleNorm = normalizeHeadline(alert?.title); - if (!titleNorm || titleNorm.length < 24) return null; - const sourceId = String(alert?.source_id ?? "").toLowerCase(); - if (!sourceId) return null; - try { - const url = new URL(String(alert?.canonical_url ?? "")); - const host = url.hostname.toLowerCase().replace(/^www\./, ""); - const path = url.pathname.replace(/\/+$/, ""); - const segments = path.split("/").filter(Boolean); - const leaf = segments[segments.length - 1] ?? ""; - if (!/-\d+$/.test(leaf)) return null; - const familyLeaf = leaf.replace(/-\d+$/, ""); - const familyPath = `/${segments.slice(0, -1).concat(familyLeaf).join("/")}`; - return `${sourceId}|${host}${familyPath}|${titleNorm}`; - } catch { - return null; - } -} - -function collapseRecurringHeadlineVariants(alerts) { - const byVariant = new Map(); - const passthrough = []; - for (const alert of alerts) { - const key = buildVariantCollapseKey(alert); - if (!key) { - passthrough.push(alert); - continue; - } - const list = byVariant.get(key) ?? []; - list.push(alert); - byVariant.set(key, list); - } - - const kept = [...passthrough]; - const suppressed = []; - for (const list of byVariant.values()) { - if (list.length === 1) { - kept.push(list[0]); - continue; - } - list.sort(compareAlertPreference); - kept.push(list[0]); - suppressed.push(...list.slice(1)); - } - return { kept, suppressed }; -} - -function summarizeTitleDuplicates(alerts) { - const counts = new Map(); - for (const alert of alerts) { - const key = normalizeHeadline(alert?.title); - if (!key) continue; - counts.set(key, (counts.get(key) ?? 0) + 1); - } - return [...counts.entries()] - .filter(([, count]) => count > 1) - .sort((a, b) => b[1] - a[1]) - .slice(0, 25) - .map(([title, count]) => ({ title, count })); -} - -function jitterCoords(lat, lng, seed, minRadiusKm = 22, maxRadiusKm = 77) { - // Spread alerts around a base point so multiple notices don't collapse into one dot. - const angle = hashToUnit(`${seed}:a`) * Math.PI * 2; - const radiusKm = minRadiusKm + hashToUnit(`${seed}:r`) * Math.max(1, maxRadiusKm - minRadiusKm); - const dLat = (radiusKm / 111.32) * Math.cos(angle); - const cosLat = Math.max(0.2, Math.cos((lat * Math.PI) / 180)); - const dLng = (radiusKm / (111.32 * cosLat)) * Math.sin(angle); - const outLat = Math.max(-89.5, Math.min(89.5, lat + dLat)); - let outLng = lng + dLng; - if (outLng > 180) outLng -= 360; - if (outLng < -180) outLng += 360; - return { lat: Number(outLat.toFixed(5)), lng: Number(outLng.toFixed(5)) }; -} - -function escapeRegex(text) { - return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); -} - -function inferUSStateCoords(text) { - const haystackRaw = ` ${String(text ?? "").toLowerCase()} `; - const haystack = haystackRaw.replace(/\./g, " "); - - // Match common two-letter forms like (FL), , FL, " - FL " - for (const [abbr, name] of Object.entries(US_STATE_ABBR_TO_NAME)) { - const abbrPattern = new RegExp(`(?:^|[^a-z])\\(?${abbr.toLowerCase()}\\)?(?:[^a-z]|$)`, "i"); - if (abbrPattern.test(haystack)) { - const coords = US_STATE_CENTROIDS[name]; - if (coords) return { lat: coords[0], lng: coords[1] }; - } - } - - // Match short textual forms like "Fla", "Calif", etc. - for (const [token, name] of Object.entries(US_STATE_ALT_TOKENS)) { - const altPattern = new RegExp(`\\b${escapeRegex(token).replace(/\s+/g, "\\s+")}\\b`, "i"); - if (altPattern.test(haystack)) { - const coords = US_STATE_CENTROIDS[name]; - if (coords) return { lat: coords[0], lng: coords[1] }; - } - } - - const entries = Object.entries(US_STATE_CENTROIDS).sort((a, b) => b[0].length - a[0].length); - for (const [name, [lat, lng]] of entries) { - const pattern = new RegExp(`\\b${escapeRegex(name).replace(/\s+/g, "\\s+")}\\b`, "i"); - if (pattern.test(haystack)) { - return { lat, lng }; - } - } - return null; -} - -function inferCityCoords(text) { - const haystack = ` ${String(text ?? "").toLowerCase().replace(/\./g, " ")} `; - const entries = Object.entries(CITY_CENTROIDS).sort((a, b) => b[0].length - a[0].length); - for (const [name, [lat, lng]] of entries) { - const pattern = new RegExp(`\\b${escapeRegex(name).replace(/\s+/g, "\\s+")}\\b`, "i"); - if (pattern.test(haystack)) return { lat, lng }; - } - return null; -} - -function inferCountryCoords(text) { - const haystack = ` ${String(text ?? "").toLowerCase()} `; - const entries = Object.entries(COUNTRY_CENTROIDS).sort((a, b) => b[0].length - a[0].length); - for (const [name, [lat, lng]] of entries) { - const pattern = new RegExp(`\\b${escapeRegex(name).replace(/\s+/g, "\\s+")}\\b`, "i"); - if (pattern.test(haystack)) return { lat, lng }; - } - return null; -} - -function inferCountryFromIsoCodes(values) { - const list = Array.isArray(values) ? values : [values]; - for (const value of list) { - const code = String(value ?? "").trim().toUpperCase(); - const name = ISO2_COUNTRY_HINTS[code]; - if (!name) continue; - const coords = COUNTRY_CENTROIDS[name]; - if (coords) return { lat: coords[0], lng: coords[1] }; - } - return null; -} - -function inferCountryHintFromIsoCodes(values) { - const list = Array.isArray(values) ? values : [values]; - for (const value of list) { - const code = String(value ?? "").trim().toUpperCase(); - const name = ISO2_COUNTRY_HINTS[code]; - if (!name) continue; - return { code, name }; - } - return null; -} - -function toDisplayCountryName(countryName) { - return String(countryName ?? "") - .split(/\s+/) - .filter(Boolean) - .map((token) => token.charAt(0).toUpperCase() + token.slice(1)) - .join(" "); -} - -function inferRegionFromCoords(lat, lng) { - if (lat >= 7 && lat <= 83 && lng >= -168 && lng <= -52) return "North America"; - if (lat >= -56 && lat <= 13 && lng >= -82 && lng <= -35) return "South America"; - if (lat >= 35 && lat <= 72 && lng >= -11 && lng <= 40) return "Europe"; - if (lat >= -35 && lat <= 37 && lng >= -17 && lng <= 51) return "Africa"; - if (lat >= 5 && lat <= 77 && lng >= 40 && lng <= 180) return "Asia"; - if (lat >= -50 && lat <= 10 && lng >= 110 && lng <= 180) return "Oceania"; - if (lat < -60) return "Antarctica"; - return "International"; -} - -function extractUrlLocationText(urlValue) { - try { - const url = new URL(String(urlValue)); - const decodedPath = decodeURIComponent(url.pathname); - const query = decodeURIComponent(url.search.replace(/^\?/, "")); - return `${url.hostname} ${decodedPath} ${query}` - .replace(/[._/+?=&%-]+/g, " ") - .replace(/\s+/g, " ") - .trim(); - } catch { - return String(urlValue ?? ""); - } -} - -function resolveCoords(meta, text, seed) { - const inferredUS = - meta.source.country_code === "US" ? inferUSStateCoords(text) : null; - if (inferredUS) { - return jitterCoords(inferredUS.lat, inferredUS.lng, seed, 10, 35); - } - const inferredCity = inferCityCoords(text); - if (inferredCity) { - return jitterCoords(inferredCity.lat, inferredCity.lng, seed, 5, 24); - } - const inferredCountry = inferCountryCoords(text); - if (inferredCountry) { - return jitterCoords(inferredCountry.lat, inferredCountry.lng, seed, 12, 52); - } - return jitterCoords(meta.lat, meta.lng, seed); -} - -function resolveInterpolNoticeCoords(meta, notice, title, seed) { - const textHints = [ - title, - notice?.place_of_birth, - notice?.issuing_entity, - notice?.forename, - notice?.name, - ...(Array.isArray(notice?.nationalities) ? notice.nationalities : []), - ...(Array.isArray(notice?.countries_likely_to_be_visited) - ? notice.countries_likely_to_be_visited - : []), - ] - .filter(Boolean) - .join(" "); - - const isoCoords = - inferCountryFromIsoCodes(notice?.countries_likely_to_be_visited) || - inferCountryFromIsoCodes(notice?.nationalities); - if (isoCoords) { - return jitterCoords(isoCoords.lat, isoCoords.lng, seed, 10, 45); - } - return resolveCoords(meta, textHints, seed); -} - -function kevItemToAlert(entry, meta) { - const cve = entry.cveID ?? entry.cveId ?? entry.cve; - const title = `${cve ?? "CVE"}: ${entry.vulnerabilityName ?? "Known Exploited Vulnerability"}`; - const nvdLink = cve ? `https://nvd.nist.gov/vuln/detail/${cve}` : meta.source.base_url; - const now = new Date(); - const publishedAt = parseDate(entry.dateAdded); - if (!publishedAt || !isFresh(publishedAt, now)) { - return null; - } - const hours = Math.max(1, Math.round((now - publishedAt) / 36e5)); - const kevSeverity = hours <= 72 ? "critical" : hours <= 168 ? "high" : "medium"; - const jitter = resolveCoords( - meta, - `${title} ${nvdLink} ${extractUrlLocationText(nvdLink)}`, - `${meta.source.source_id}:${nvdLink}:${cve ?? ""}` - ); - const alert = { - alert_id: `${meta.source.source_id}-${hashId(nvdLink)}`, - source_id: meta.source.source_id, - source: meta.source, - title, - canonical_url: nvdLink, - first_seen: publishedAt.toISOString(), - last_seen: now.toISOString(), - status: "active", - category: meta.category, - severity: kevSeverity, - region_tag: meta.region_tag, - lat: jitter.lat, - lng: jitter.lng, - freshness_hours: hours, - reporting: meta.reporting, - }; - return { - ...alert, - triage: scoreIncidentRelevance(alert, { - summary: `${entry.vulnerabilityName ?? ""} ${entry.shortDescription ?? ""}`.trim(), - tags: [entry.knownRansomwareCampaign ? "known-ransomware-campaign" : ""].filter(Boolean), - metaHints: { feedType: meta.type }, - }), - }; -} - -// ─── AUTO-TRANSLATION ───────────────────────────────────────── -// Detect non-Latin text and translate to English via free Google Translate API. -const NON_LATIN_RE = /[\u3000-\u9FFF\uAC00-\uD7AF\u0400-\u04FF\u0600-\u06FF\u0E00-\u0E7F\u1100-\u11FF\uA960-\uA97F\uD7B0-\uD7FF]/; - -async function translateToEnglish(text) { - if (!text || !NON_LATIN_RE.test(text)) return text; - try { - const controller = new AbortController(); - const timer = setTimeout(() => controller.abort(), 5000); - const url = `https://translate.googleapis.com/translate_a/single?client=gtx&sl=auto&tl=en&dt=t&q=${encodeURIComponent(text)}`; - const res = await fetch(url, { signal: controller.signal }); - clearTimeout(timer); - if (!res.ok) return text; - const data = await res.json(); - const translated = data?.[0]?.map((s) => s?.[0] ?? "").join("") ?? text; - return translated || text; - } catch { - return text; - } -} - -async function translateBatch(items) { - const results = []; - for (const item of items) { - if (NON_LATIN_RE.test(item.title)) { - item.title = await translateToEnglish(item.title); - } - if (item.summary && NON_LATIN_RE.test(item.summary)) { - item.summary = await translateToEnglish(item.summary); - } - results.push(item); - } - return results; -} - -function stripHtmlTags(value) { - return String(value ?? "") - .replace(//gi, " ") - .replace(//gi, " ") - .replace(/<[^>]+>/g, " ") - .replace(/ /gi, " ") - .replace(/&/gi, "&") - .replace(/"/gi, "\"") - .replace(/'|'/gi, "'") - .replace(/</gi, "<") - .replace(/>/gi, ">") - .replace(/\s+/g, " ") - .trim(); -} - -function parseHtmlAnchors(html, baseUrl) { - const anchors = []; - const seen = new Set(); - const anchorRe = /]*href=(["'])(.*?)\1[^>]*>([\s\S]*?)<\/a>/gi; - let match; - while ((match = anchorRe.exec(html)) !== null) { - const rawHref = String(match[2] ?? "").trim(); - if (!rawHref || rawHref.startsWith("#")) continue; - const title = stripHtmlTags(match[3] ?? ""); - if (!title || title.length < 8) continue; - let link; - try { - link = new URL(rawHref, baseUrl).toString(); - } catch { - continue; - } - if (seen.has(link)) continue; - seen.add(link); - anchors.push({ title, link, summary: "" }); - } - return anchors; -} - -function normalizeExternalSource(entry) { - if (!entry || typeof entry !== "object") return null; - const source = entry.source; - if (!source || typeof source !== "object") return null; - if (!source.source_id || !source.authority_name || !entry.type || !entry.category) { - return null; - } - const normalized = { - ...entry, - source: { - ...source, - source_id: String(source.source_id), - authority_name: String(source.authority_name), - country: String(source.country ?? "Unknown"), - country_code: String(source.country_code ?? "XX"), - region: String(source.region ?? "International"), - authority_type: String(source.authority_type ?? "public_safety_program"), - base_url: String(source.base_url ?? entry.feed_url ?? ""), - }, - }; - return normalized; -} - -async function loadExternalSources() { - if (externalSourcesCache) return externalSourcesCache; - try { - const raw = await readFile(SOURCE_REGISTRY_PATH, "utf8"); - const parsed = JSON.parse(raw); - const list = Array.isArray(parsed) ? parsed : []; - const normalized = list - .map(normalizeExternalSource) - .filter(Boolean); - externalSourcesCache = normalized; - return normalized; - } catch (error) { - console.warn(`WARN source registry: ${error.message}`); - externalSourcesCache = []; - return []; - } -} - -async function getAllSources() { - const extra = await loadExternalSources(); - if (extra.length === 0) return sources; - const seen = new Set(); - const merged = []; - for (const entry of [...sources, ...extra]) { - const id = String(entry?.source?.source_id ?? ""); - if (!id || seen.has(id)) continue; - seen.add(id); - merged.push(entry); - } - return merged; -} - -async function fetchFeed(url, followRedirects = false) { - const controller = new AbortController(); - const timer = setTimeout(() => controller.abort(), 15000); - try { - const response = await fetch(url, { - redirect: followRedirects ? "follow" : "manual", - signal: controller.signal, - headers: { - "User-Agent": "Mozilla/5.0 (compatible; euosint-bot/1.0)", - Accept: "application/rss+xml, application/atom+xml, application/xml, text/xml;q=0.9, */*;q=0.8", - }, - }); - if (!response.ok) { - throw new Error(`feed fetch failed ${response.status} ${url}`); - } - return response.text(); - } finally { - clearTimeout(timer); - } -} - -async function fetchFeedWithFallback(urls, followRedirects = false) { - const candidates = Array.isArray(urls) ? urls.filter(Boolean) : [urls].filter(Boolean); - let lastError = null; - for (const url of candidates) { - try { - const xml = await fetchFeed(url, followRedirects); - return { xml, feedUrl: url }; - } catch (error) { - lastError = error; - } - } - throw lastError ?? new Error("no feed URLs available"); -} - -async function fetchRss(meta, now) { - const limit = Math.max(1, Number(meta?.max_items ?? MAX_PER_SOURCE)); - const { xml } = await fetchFeedWithFallback( - meta.feed_urls ?? [meta.feed_url], - meta.followRedirects - ); - let items = parseItems(xml) - .filter((item) => item.title && item.link) - .slice(0, limit); - - // Auto-translate non-English titles - items = await translateBatch(items); - - return items.map((item) => { - const publishedAt = parseDate(item.published) ?? now; - if (!isFresh(publishedAt, now)) { - return null; - } - const hours = Math.max(1, Math.round((now - publishedAt) / 36e5)); - const jitter = resolveCoords( - meta, - `${item.title} ${item.link} ${extractUrlLocationText(item.link)}`, - `${meta.source.source_id}:${item.link}` - ); - const alert = { - alert_id: `${meta.source.source_id}-${hashId(item.link)}`, - source_id: meta.source.source_id, - source: meta.source, - title: item.title, - canonical_url: item.link, - first_seen: publishedAt.toISOString(), - last_seen: now.toISOString(), - status: "active", - category: meta.category, - severity: inferSeverity(item.title, defaultSeverity(meta.category)), - region_tag: meta.region_tag, - lat: jitter.lat, - lng: jitter.lng, - freshness_hours: hours, - reporting: meta.reporting, - }; - const scored = { - ...alert, - triage: scoreIncidentRelevance(alert, { - summary: item.summary, - author: item.author, - tags: item.tags, - metaHints: { feedType: meta.type }, - }), - }; - return normalizeInformationalSecurityAlert(scored, { - summary: item.summary, - author: item.author, - tags: item.tags, - metaHints: { feedType: meta.type }, - }); - }).filter(Boolean); -} - -async function fetchHtmlList(meta, now) { - const limit = Math.max(1, Number(meta?.max_items ?? MAX_PER_SOURCE)); - const { xml: html, feedUrl } = await fetchFeedWithFallback( - meta.feed_urls ?? [meta.feed_url], - meta.followRedirects ?? true - ); - let items = parseHtmlAnchors(html, feedUrl); - const includeKeywords = Array.isArray(meta?.include_keywords) - ? meta.include_keywords.map((value) => String(value).toLowerCase()).filter(Boolean) - : []; - const excludeKeywords = Array.isArray(meta?.exclude_keywords) - ? meta.exclude_keywords.map((value) => String(value).toLowerCase()).filter(Boolean) - : []; - if (includeKeywords.length > 0) { - items = items.filter((item) => { - const hay = `${item.title} ${item.link}`.toLowerCase(); - return includeKeywords.some((keyword) => hay.includes(keyword)); - }); - } - if (excludeKeywords.length > 0) { - items = items.filter((item) => { - const hay = `${item.title} ${item.link}`.toLowerCase(); - return !excludeKeywords.some((keyword) => hay.includes(keyword)); - }); - } - items = items.slice(0, limit); - - return items - .map((item) => { - const publishedAt = now; - const hours = Math.max(1, Math.round((now - publishedAt) / 36e5)); - const jitter = resolveCoords( - meta, - `${item.title} ${item.link} ${extractUrlLocationText(item.link)}`, - `${meta.source.source_id}:${item.link}` - ); - const alert = { - alert_id: `${meta.source.source_id}-${hashId(item.link)}`, - source_id: meta.source.source_id, - source: meta.source, - title: item.title, - canonical_url: item.link, - first_seen: publishedAt.toISOString(), - last_seen: now.toISOString(), - status: "active", - category: meta.category, - severity: inferSeverity(item.title, defaultSeverity(meta.category)), - region_tag: meta.region_tag, - lat: jitter.lat, - lng: jitter.lng, - freshness_hours: hours, - reporting: meta.reporting, - }; - const scored = { - ...alert, - triage: scoreIncidentRelevance(alert, { - summary: item.summary, - tags: [], - metaHints: { feedType: meta.type }, - }), - }; - return normalizeInformationalSecurityAlert(scored, { - summary: item.summary, - tags: [], - metaHints: { feedType: meta.type }, - }); - }) - .filter(Boolean); -} - -async function fetchKev(meta) { - const limit = Math.max(1, Number(meta?.max_items ?? MAX_PER_SOURCE)); - const controller = new AbortController(); - const timer = setTimeout(() => controller.abort(), 15000); - let response; - try { - response = await fetch(meta.feed_url, { - signal: controller.signal, - headers: { - "User-Agent": "euosint-bot/1.0", - Accept: "application/json", - }, - }); - } finally { - clearTimeout(timer); - } - if (!response.ok) { - throw new Error(`kev fetch failed ${response.status} ${meta.feed_url}`); - } - const data = await response.json(); - const vulnerabilities = Array.isArray(data?.vulnerabilities) ? data.vulnerabilities : []; - // Sort by dateAdded descending (newest first) then take top N - vulnerabilities.sort((a, b) => new Date(b.dateAdded).getTime() - new Date(a.dateAdded).getTime()); - return vulnerabilities - .slice(0, limit) - .map((entry) => kevItemToAlert(entry, meta)) - .filter(Boolean); -} - -function interpolNoticeMatchesCountryCode(notice, countryCode) { - const normalizedCode = String(countryCode ?? "").trim().toUpperCase(); - if (!normalizedCode) return false; - const values = [ - ...(Array.isArray(notice?.nationalities) ? notice.nationalities : []), - ...(Array.isArray(notice?.countries_likely_to_be_visited) - ? notice.countries_likely_to_be_visited - : []), - ]; - return values.some((value) => String(value ?? "").trim().toUpperCase() === normalizedCode); -} - -async function fetchInterpolPages(startUrl, limit, headers) { - const seenPageUrls = new Set(); - const notices = []; - let nextPageUrl = startUrl; - let pageCount = 0; - const MAX_INTERPOL_PAGES = 200; - - while ( - nextPageUrl && - notices.length < limit && - pageCount < MAX_INTERPOL_PAGES && - !seenPageUrls.has(nextPageUrl) - ) { - seenPageUrls.add(nextPageUrl); - const controller = new AbortController(); - const timer = setTimeout(() => controller.abort(), 15000); - let response; - try { - response = await fetch(nextPageUrl, { - signal: controller.signal, - headers, - }); - } finally { - clearTimeout(timer); - } - if (!response.ok) { - throw new Error(`interpol fetch failed ${response.status} ${nextPageUrl}`); - } - const data = await response.json(); - const pageNotices = Array.isArray(data?._embedded?.notices) ? data._embedded.notices : []; - if (pageNotices.length === 0) break; - notices.push(...pageNotices); - pageCount += 1; - const nextHref = String(data?._links?.next?.href ?? "").trim(); - nextPageUrl = nextHref - ? new URL(nextHref, "https://ws-public.interpol.int").toString() - : null; - } - - return notices; -} - -async function fetchInterpolNotices(meta, now) { - const limit = Math.max(1, Number(meta?.max_items ?? MAX_PER_SOURCE)); - const headers = { - "User-Agent": "euosint-bot/1.0", - Accept: "application/json", - }; - const primaryNotices = await fetchInterpolPages(meta.feed_url, limit, headers); - let notices = primaryNotices; - let fallbackUsed = false; - - // Some nationality-filtered INTERPOL queries can return empty despite matching notices. - // Fallback: query the parent feed and client-filter by nationality code. - const url = new URL(meta.feed_url); - const nationalityCode = String(url.searchParams.get("nationality") ?? "") - .trim() - .toUpperCase(); - if (notices.length === 0 && nationalityCode) { - url.searchParams.delete("nationality"); - const fallbackPoolLimit = Math.max(limit * 5, 1000); - const fallbackNotices = await fetchInterpolPages( - url.toString(), - fallbackPoolLimit, - headers - ); - const filteredFallback = fallbackNotices.filter((notice) => - interpolNoticeMatchesCountryCode(notice, nationalityCode) - ); - if (filteredFallback.length > 0) { - notices = filteredFallback; - fallbackUsed = true; - } - } - - if (fallbackUsed) { - console.warn( - `WARN ${meta.source.authority_name}: primary nationality query returned empty; used client-side filtered fallback` - ); - } - - const noticeTitlePrefix = - meta.type === "interpol-yellow-json" - ? "INTERPOL Yellow Notice" - : "INTERPOL Red Notice"; - - return notices.slice(0, limit).map((notice) => { - const forename = String(notice.forename ?? "").trim(); - const name = String(notice.name ?? "").trim(); - const label = [forename, name].filter(Boolean).join(" "); - const rawHref = String(notice?._links?.self?.href ?? "").trim(); - const canonicalUrl = rawHref - ? new URL(rawHref, "https://ws-public.interpol.int").toString() - : meta.source.base_url; - const title = label ? `${noticeTitlePrefix}: ${label}` : noticeTitlePrefix; - const jitter = resolveInterpolNoticeCoords( - meta, - notice, - `${title} ${extractUrlLocationText(canonicalUrl)}`, - `${meta.source.source_id}:${canonicalUrl}` - ); - const countryHint = - inferCountryHintFromIsoCodes(notice?.countries_likely_to_be_visited) || - inferCountryHintFromIsoCodes(notice?.nationalities); - const derivedRegion = inferRegionFromCoords(jitter.lat, jitter.lng); - const derivedSource = { - ...meta.source, - country: countryHint ? toDisplayCountryName(countryHint.name) : meta.source.country, - country_code: countryHint?.code ?? meta.source.country_code, - region: derivedRegion || meta.source.region, - }; - const alert = { - alert_id: `${meta.source.source_id}-${hashId(canonicalUrl + title)}`, - source_id: meta.source.source_id, - source: derivedSource, - title, - canonical_url: canonicalUrl, - first_seen: now.toISOString(), - last_seen: now.toISOString(), - status: "active", - category: meta.category, - severity: "critical", - region_tag: countryHint?.code ?? meta.region_tag, - lat: jitter.lat, - lng: jitter.lng, - freshness_hours: 1, - reporting: meta.reporting, - }; - return { - ...alert, - triage: scoreIncidentRelevance(alert, { - summary: `${notice?.issuing_entity ?? ""} ${notice?.place_of_birth ?? ""}`.trim(), - tags: [ - ...(Array.isArray(notice?.nationalities) ? notice.nationalities : []), - ...(Array.isArray(notice?.countries_likely_to_be_visited) - ? notice.countries_likely_to_be_visited - : []), - ], - metaHints: { feedType: meta.type }, - }), - }; - }); -} - -function createStaticInterpolEntry(now) { - return { - alert_id: "interpol-hub-static", - source_id: "interpol-hub", - source: { - source_id: "interpol-hub", - authority_name: "INTERPOL Notices Hub", - country: "France", - country_code: "FR", - region: "International", - authority_type: "police", - base_url: "https://www.interpol.int", - }, - title: "INTERPOL Red & Yellow Notices — Browse Wanted & Missing Persons", - canonical_url: "https://www.interpol.int/How-we-work/Notices/View-Red-Notices", - first_seen: now.toISOString(), - last_seen: now.toISOString(), - status: "active", - category: "wanted_suspect", - severity: "critical", - region_tag: "INT", - lat: 45.764, - lng: 4.8357, - freshness_hours: 1, - reporting: { - label: "Browse INTERPOL Notices", - url: "https://www.interpol.int/How-we-work/Notices/View-Red-Notices", - notes: - "Red Notices: wanted persons. Yellow Notices: missing persons. " + - "Browse directly — https://www.interpol.int/How-we-work/Notices/View-Yellow-Notices", - }, - triage: { relevance_score: 1, reasoning: "Permanent INTERPOL hub link" }, - }; -} - -async function buildAlerts() { - const now = new Date(); - const alerts = [createStaticInterpolEntry(now)]; - const sourceHealth = []; - const sourceEntries = await getAllSources(); - - for (const entry of sourceEntries) { - const sourceId = entry.source.source_id; - const startedAt = new Date().toISOString(); - try { - const batch = - entry.type === "kev-json" - ? await fetchKev(entry) - : entry.type === "interpol-red-json" || entry.type === "interpol-yellow-json" - ? await fetchInterpolNotices(entry, now) - : entry.type === "html-list" - ? await fetchHtmlList(entry, now) - : await fetchRss(entry, now); - alerts.push(...batch); - sourceHealth.push({ - source_id: sourceId, - authority_name: entry.source.authority_name, - type: entry.type, - status: "ok", - fetched_count: batch.length, - feed_url: entry.feed_url, - started_at: startedAt, - finished_at: new Date().toISOString(), - }); - } catch (error) { - console.warn(`WARN ${entry.source.authority_name}: ${error.message}`); - sourceHealth.push({ - source_id: sourceId, - authority_name: entry.source.authority_name, - type: entry.type, - status: "error", - fetched_count: 0, - feed_url: entry.feed_url, - error: error.message, - started_at: startedAt, - finished_at: new Date().toISOString(), - }); - } - } - - const dedupedByKey = new Map(); - for (const alert of alerts) { - const key = `${alert.canonical_url}|${alert.title}`.toLowerCase(); - const current = dedupedByKey.get(key); - const currentScore = Number(current?.triage?.relevance_score ?? -1); - const nextScore = Number(alert?.triage?.relevance_score ?? -1); - if (!current || nextScore > currentScore) { - dedupedByKey.set(key, alert); - } - } - const deduped = [...dedupedByKey.values()]; - const { - kept: variantCollapsed, - suppressed: suppressedVariants, - } = collapseRecurringHeadlineVariants(deduped); - const threshold = clamp01(INCIDENT_RELEVANCE_THRESHOLD); - const active = variantCollapsed.filter( - (alert) => - Number(alert?.triage?.relevance_score ?? 0) >= - thresholdForAlert(alert, threshold) - ); - const filtered = variantCollapsed.filter( - (alert) => - Number(alert?.triage?.relevance_score ?? 0) < - thresholdForAlert(alert, threshold) - ); - active.sort((a, b) => new Date(b.first_seen).getTime() - new Date(a.first_seen).getTime()); - filtered.sort((a, b) => { - const scoreDelta = - Number(b?.triage?.relevance_score ?? 0) - Number(a?.triage?.relevance_score ?? 0); - if (scoreDelta !== 0) return scoreDelta; - return new Date(b.first_seen).getTime() - new Date(a.first_seen).getTime(); - }); - const activeBySource = active.reduce((acc, alert) => { - acc[alert.source_id] = (acc[alert.source_id] ?? 0) + 1; - return acc; - }, {}); - const filteredBySource = filtered.reduce((acc, alert) => { - acc[alert.source_id] = (acc[alert.source_id] ?? 0) + 1; - return acc; - }, {}); - sourceHealth.forEach((entry) => { - entry.active_count = activeBySource[entry.source_id] ?? 0; - entry.filtered_count = filteredBySource[entry.source_id] ?? 0; - }); - const duplicateHeadlineSamples = summarizeTitleDuplicates(active); - const duplicateAudit = { - suppressed_variant_duplicates: suppressedVariants.length, - repeated_title_groups_in_active: duplicateHeadlineSamples.length, - repeated_title_samples: duplicateHeadlineSamples, - }; - if (suppressedVariants.length > 0) { - console.log( - `Suppressed ${suppressedVariants.length} recurring headline variants` - ); - } - return { active, filtered, sourceHealth, duplicateAudit }; -} - -async function readAlertsFile(path) { - try { - const raw = await readFile(path, "utf8"); - const parsed = JSON.parse(raw); - return Array.isArray(parsed) ? parsed : []; - } catch { - return []; - } -} - -function reconcileAlerts(activeAlerts, filteredAlerts, previousState, now) { - const nowIso = now.toISOString(); - const nowMs = now.getTime(); - const retentionCutoff = nowMs - REMOVED_RETENTION_DAYS * 86400000; - const previousById = new Map(previousState.map((a) => [a.alert_id, a])); - const presentById = new Map( - [...activeAlerts, ...filteredAlerts].map((a) => [a.alert_id, a]) - ); - - const currentActive = activeAlerts.map((a) => { - const prev = previousById.get(a.alert_id); - return { - ...a, - status: "active", - first_seen: prev?.first_seen ?? a.first_seen, - last_seen: nowIso, - }; - }); - - const currentFiltered = filteredAlerts.map((a) => { - const prev = previousById.get(a.alert_id); - return { - ...a, - status: "filtered", - first_seen: prev?.first_seen ?? a.first_seen, - last_seen: nowIso, - }; - }); - - const removedNew = previousState - .filter((prev) => prev.status !== "removed" && prev.status !== "filtered") - .filter((prev) => !presentById.has(prev.alert_id)) - .map((prev) => ({ - ...prev, - status: "removed", - last_seen: nowIso, - })); - - const removedCarry = previousState - .filter((prev) => prev.status === "removed") - .filter((prev) => !presentById.has(prev.alert_id)) - .filter((prev) => { - const t = new Date(prev.last_seen).getTime(); - return Number.isFinite(t) && t >= retentionCutoff; - }); - - const state = [...currentActive, ...currentFiltered, ...removedNew, ...removedCarry].sort( - (a, b) => new Date(b.last_seen).getTime() - new Date(a.last_seen).getTime() - ); - - return { currentActive, currentFiltered, state }; -} - -function assertCriticalSourceCoverage(sourceHealth) { - if (!FAIL_ON_CRITICAL_SOURCE_GAP || CRITICAL_SOURCE_PREFIXES.length === 0) return; - const missingPrefixes = CRITICAL_SOURCE_PREFIXES.filter((prefix) => { - const matched = sourceHealth.filter( - (entry) => - entry.source_id === prefix || entry.source_id.startsWith(`${prefix}-`) - ); - const totalFetched = matched.reduce( - (sum, entry) => sum + Number(entry.fetched_count ?? 0), - 0 - ); - return totalFetched === 0; - }); - if (missingPrefixes.length > 0) { - throw new Error( - `critical source coverage gap: no records fetched for ${missingPrefixes.join(", ")}` - ); - } -} - -async function writeAlerts( - activeAlerts, - filteredAlerts, - stateAlerts, - sourceHealth, - duplicateAudit -) { - await mkdir(dirname(OUTPUT_PATH), { recursive: true }); - await mkdir(dirname(STATE_OUTPUT_PATH), { recursive: true }); - await mkdir(dirname(FILTERED_OUTPUT_PATH), { recursive: true }); - await mkdir(dirname(SOURCE_HEALTH_OUTPUT_PATH), { recursive: true }); - await writeFile(OUTPUT_PATH, JSON.stringify(activeAlerts, null, 2) + "\n", "utf8"); - await writeFile( - FILTERED_OUTPUT_PATH, - JSON.stringify(filteredAlerts, null, 2) + "\n", - "utf8" - ); - await writeFile(STATE_OUTPUT_PATH, JSON.stringify(stateAlerts, null, 2) + "\n", "utf8"); - const healthDoc = { - generated_at: new Date().toISOString(), - critical_source_prefixes: CRITICAL_SOURCE_PREFIXES, - fail_on_critical_source_gap: FAIL_ON_CRITICAL_SOURCE_GAP, - total_sources: sourceHealth.length, - sources_ok: sourceHealth.filter((entry) => entry.status === "ok").length, - sources_error: sourceHealth.filter((entry) => entry.status === "error").length, - duplicate_audit: duplicateAudit, - sources: sourceHealth, - }; - await writeFile( - SOURCE_HEALTH_OUTPUT_PATH, - JSON.stringify(healthDoc, null, 2) + "\n", - "utf8" - ); - const removedCount = stateAlerts.filter((a) => a.status === "removed").length; - const filteredCount = filteredAlerts.length; - console.log( - `Wrote ${activeAlerts.length} active alerts -> ${OUTPUT_PATH} (${filteredCount} filtered in ${FILTERED_OUTPUT_PATH}, ${removedCount} removed tracked in ${STATE_OUTPUT_PATH}, source health in ${SOURCE_HEALTH_OUTPUT_PATH})` - ); -} - -async function main() { - const { active, filtered, sourceHealth, duplicateAudit } = await buildAlerts(); - assertCriticalSourceCoverage(sourceHealth); - const previous = - (await readAlertsFile(STATE_OUTPUT_PATH)).length > 0 - ? await readAlertsFile(STATE_OUTPUT_PATH) - : await readAlertsFile(OUTPUT_PATH); - const { currentActive, currentFiltered, state } = reconcileAlerts( - active, - filtered, - previous, - new Date() - ); - await writeAlerts( - currentActive, - currentFiltered, - state, - sourceHealth, - duplicateAudit - ); - - if (WATCH) { - console.log(`Watching feeds every ${Math.round(INTERVAL_MS / 1000)}s...`); - setInterval(async () => { - try { - const { - active: nextActive, - filtered: nextFiltered, - sourceHealth: nextSourceHealth, - duplicateAudit: nextDuplicateAudit, - } = await buildAlerts(); - assertCriticalSourceCoverage(nextSourceHealth); - const prevState = - (await readAlertsFile(STATE_OUTPUT_PATH)).length > 0 - ? await readAlertsFile(STATE_OUTPUT_PATH) - : await readAlertsFile(OUTPUT_PATH); - const { - currentActive: activeNow, - currentFiltered: filteredNow, - state: stateNow, - } = reconcileAlerts( - nextActive, - nextFiltered, - prevState, - new Date() - ); - await writeAlerts( - activeNow, - filteredNow, - stateNow, - nextSourceHealth, - nextDuplicateAudit - ); - } catch (error) { - console.warn(`WARN refresh: ${error.message}`); - } - }, INTERVAL_MS); - } -} - -main().catch((error) => { - console.error(error); - process.exit(1); -}); From a927533da328476854bfdb43467317345a66dd8d Mon Sep 17 00:00:00 2001 From: 2pk03 Date: Mon, 16 Mar 2026 09:49:47 +0100 Subject: [PATCH 3/7] chore: ignore local collector build artifact --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 26cf504..fe14f37 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ dist/ dist-ssr/ coverage/ .tmp/ +euosint-collector # Runtime logs logs/ From c3578ccf3dec0c02059d179564e75cbf71151833 Mon Sep 17 00:00:00 2001 From: 2pk03 Date: Mon, 16 Mar 2026 09:52:45 +0100 Subject: [PATCH 4/7] ci: split github checks from local commit gate --- .github/workflows/ci.yml | 25 +++++++++++++++++++++++-- Makefile | 10 +++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3b57331..ad99b3b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,5 +36,26 @@ jobs: - name: Install dependencies run: npm ci - - name: Commit check - run: make commit-check + - name: UI lint + run: npm run lint + + - name: UI typecheck + run: npm run typecheck + + - name: UI build + run: npm run build + + - name: Go format check + run: make go-fmt-check + + - name: Go tests + run: make go-test + + - name: Go race + run: make go-race + + - name: Go coverage + run: make go-cover + + - name: Go vet + run: make go-vet diff --git a/Makefile b/Makefile index 444d9bb..c7c6b7d 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ BRANCH ?= main RELEASE_LEVEL ?= patch .PHONY: help check check-commit install clean lint typecheck test build ci \ - go-fmt go-test go-race go-cover go-vet go-codeql collector-parity commit-check \ + go-fmt go-fmt-check go-test go-race go-cover go-vet go-codeql commit-check \ docker-build docker-up docker-down docker-logs docker-shell \ dev-start dev-stop dev-restart dev-logs \ code-ql code-ql-summary \ @@ -80,6 +80,14 @@ go-fmt: ## Auto-format Go code @mkdir -p $(GOCACHE_DIR) $(GOMODCACHE_DIR) GOCACHE=$(GOCACHE_DIR) GOMODCACHE=$(GOMODCACHE_DIR) gofmt -w $$(find cmd internal -name '*.go' -type f | sort) +go-fmt-check: ## Fail if Go files are not formatted + @unformatted=$$(gofmt -l $$(find cmd internal -name '*.go' -type f | sort)); \ + if [ -n "$$unformatted" ]; then \ + echo "gofmt needs to be run for:"; \ + echo "$$unformatted"; \ + exit 1; \ + fi + go-test: ## Run Go tests @mkdir -p $(GOCACHE_DIR) $(GOMODCACHE_DIR) GOCACHE=$(GOCACHE_DIR) GOMODCACHE=$(GOMODCACHE_DIR) go test ./... From 00d0dae5458e63cb861290b894f1fa5594414e90 Mon Sep 17 00:00:00 2001 From: 2pk03 Date: Mon, 16 Mar 2026 09:58:14 +0100 Subject: [PATCH 5/7] ci: tolerate codeql upload on repos without code security --- .github/workflows/codeql.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index ea36aaa..3c065f2 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -50,3 +50,4 @@ jobs: - name: Analyze uses: github/codeql-action/analyze@v3 + continue-on-error: true From e698f0e80846572df6c6ccb2d8a43c36a7743db9 Mon Sep 17 00:00:00 2001 From: 2pk03 Date: Mon, 16 Mar 2026 10:04:44 +0100 Subject: [PATCH 6/7] chore: add apache headers to new collector files --- .env.example | 3 +++ .github/workflows/ci.yml | 3 +++ Dockerfile.collector | 3 +++ cmd/euosint-collector/main.go | 3 +++ docker/Caddyfile | 3 +++ docker/collector-entrypoint.sh | 3 +++ docs/collector-migration.md | 5 +++++ docs/euosint.service | 3 +++ docs/operations.md | 5 +++++ go.mod | 3 +++ internal/collector/app/app.go | 3 +++ internal/collector/app/app_test.go | 3 +++ internal/collector/config/config.go | 3 +++ internal/collector/config/config_test.go | 3 +++ internal/collector/fetch/client.go | 3 +++ internal/collector/fetch/client_test.go | 3 +++ internal/collector/model/alert.go | 3 +++ internal/collector/model/source.go | 3 +++ internal/collector/normalize/normalize.go | 3 +++ internal/collector/normalize/normalize_test.go | 3 +++ internal/collector/output/write.go | 3 +++ internal/collector/output/write_test.go | 3 +++ internal/collector/parse/html.go | 3 +++ internal/collector/parse/html_test.go | 3 +++ internal/collector/parse/rss.go | 3 +++ internal/collector/parse/rss_test.go | 3 +++ internal/collector/registry/registry.go | 3 +++ internal/collector/registry/registry_test.go | 3 +++ internal/collector/run/run.go | 3 +++ internal/collector/run/run_test.go | 3 +++ internal/collector/state/state.go | 3 +++ internal/collector/state/state_test.go | 3 +++ internal/collector/translate/google.go | 3 +++ 33 files changed, 103 insertions(+) diff --git a/.env.example b/.env.example index 2084099..e63a653 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,6 @@ +# Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +# SPDX-License-Identifier: Apache-2.0 + EUOSINT_SITE_ADDRESS=:80 EUOSINT_HTTP_PORT=8080 EUOSINT_HTTPS_PORT=8443 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ad99b3b..2e4c16f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,3 +1,6 @@ +# Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +# SPDX-License-Identifier: Apache-2.0 + name: CI on: diff --git a/Dockerfile.collector b/Dockerfile.collector index bfb8012..0edd46c 100644 --- a/Dockerfile.collector +++ b/Dockerfile.collector @@ -1,3 +1,6 @@ +# Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +# SPDX-License-Identifier: Apache-2.0 + FROM golang:1.25-alpine AS build WORKDIR /app diff --git a/cmd/euosint-collector/main.go b/cmd/euosint-collector/main.go index 3155a0c..60f54cf 100644 --- a/cmd/euosint-collector/main.go +++ b/cmd/euosint-collector/main.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package main import ( diff --git a/docker/Caddyfile b/docker/Caddyfile index 4f230c1..5d803af 100644 --- a/docker/Caddyfile +++ b/docker/Caddyfile @@ -1,3 +1,6 @@ +# Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +# SPDX-License-Identifier: Apache-2.0 + { admin off } diff --git a/docker/collector-entrypoint.sh b/docker/collector-entrypoint.sh index e7f010f..8db33ef 100644 --- a/docker/collector-entrypoint.sh +++ b/docker/collector-entrypoint.sh @@ -1,4 +1,7 @@ #!/bin/sh +# Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +# SPDX-License-Identifier: Apache-2.0 + set -eu seed_if_missing() { diff --git a/docs/collector-migration.md b/docs/collector-migration.md index e226b73..a207f5d 100644 --- a/docs/collector-migration.md +++ b/docs/collector-migration.md @@ -1,3 +1,8 @@ + + # Collector Migration The collector runtime is now fully Go-based. The Node collector has been retired from operational paths, and scheduled feed generation, Docker runtime, and local commands all run through `cmd/euosint-collector`. diff --git a/docs/euosint.service b/docs/euosint.service index 6c6d78e..8ce5c02 100644 --- a/docs/euosint.service +++ b/docs/euosint.service @@ -1,3 +1,6 @@ +# Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +# SPDX-License-Identifier: Apache-2.0 + [Unit] Description=EUOSINT Docker Compose Stack Requires=docker.service diff --git a/docs/operations.md b/docs/operations.md index 9a11c1e..6db1814 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -1,3 +1,8 @@ + + # Operations ## Runtime Model diff --git a/go.mod b/go.mod index c8aaa81..4e8c44c 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + module github.com/scalytics/euosint go 1.25 diff --git a/internal/collector/app/app.go b/internal/collector/app/app.go index b4371fc..677066c 100644 --- a/internal/collector/app/app.go +++ b/internal/collector/app/app.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package app import ( diff --git a/internal/collector/app/app_test.go b/internal/collector/app/app_test.go index 9e540a9..78d2a94 100644 --- a/internal/collector/app/app_test.go +++ b/internal/collector/app/app_test.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package app import ( diff --git a/internal/collector/config/config.go b/internal/collector/config/config.go index af5608a..c2f4736 100644 --- a/internal/collector/config/config.go +++ b/internal/collector/config/config.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package config import ( diff --git a/internal/collector/config/config_test.go b/internal/collector/config/config_test.go index 59f6cb0..359895c 100644 --- a/internal/collector/config/config_test.go +++ b/internal/collector/config/config_test.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package config import "testing" diff --git a/internal/collector/fetch/client.go b/internal/collector/fetch/client.go index e2f27d7..78c3470 100644 --- a/internal/collector/fetch/client.go +++ b/internal/collector/fetch/client.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package fetch import ( diff --git a/internal/collector/fetch/client_test.go b/internal/collector/fetch/client_test.go index b6a7670..da6b66b 100644 --- a/internal/collector/fetch/client_test.go +++ b/internal/collector/fetch/client_test.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package fetch import ( diff --git a/internal/collector/model/alert.go b/internal/collector/model/alert.go index 10a734b..73fd02f 100644 --- a/internal/collector/model/alert.go +++ b/internal/collector/model/alert.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package model type Alert struct { diff --git a/internal/collector/model/source.go b/internal/collector/model/source.go index a0c7107..6e35c80 100644 --- a/internal/collector/model/source.go +++ b/internal/collector/model/source.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package model type RegistrySource struct { diff --git a/internal/collector/normalize/normalize.go b/internal/collector/normalize/normalize.go index 90afd1f..8875cf0 100644 --- a/internal/collector/normalize/normalize.go +++ b/internal/collector/normalize/normalize.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package normalize import ( diff --git a/internal/collector/normalize/normalize_test.go b/internal/collector/normalize/normalize_test.go index c635e38..55454a8 100644 --- a/internal/collector/normalize/normalize_test.go +++ b/internal/collector/normalize/normalize_test.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package normalize import ( diff --git a/internal/collector/output/write.go b/internal/collector/output/write.go index 8dc179d..c88e7c5 100644 --- a/internal/collector/output/write.go +++ b/internal/collector/output/write.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package output import ( diff --git a/internal/collector/output/write_test.go b/internal/collector/output/write_test.go index 41c4f7a..bde54c6 100644 --- a/internal/collector/output/write_test.go +++ b/internal/collector/output/write_test.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package output import ( diff --git a/internal/collector/parse/html.go b/internal/collector/parse/html.go index c28a3f1..5d1e3b8 100644 --- a/internal/collector/parse/html.go +++ b/internal/collector/parse/html.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package parse import ( diff --git a/internal/collector/parse/html_test.go b/internal/collector/parse/html_test.go index 3b38b84..97984ad 100644 --- a/internal/collector/parse/html_test.go +++ b/internal/collector/parse/html_test.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package parse import "testing" diff --git a/internal/collector/parse/rss.go b/internal/collector/parse/rss.go index efd29c2..402e844 100644 --- a/internal/collector/parse/rss.go +++ b/internal/collector/parse/rss.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package parse import ( diff --git a/internal/collector/parse/rss_test.go b/internal/collector/parse/rss_test.go index 581d6ce..6486884 100644 --- a/internal/collector/parse/rss_test.go +++ b/internal/collector/parse/rss_test.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package parse import "testing" diff --git a/internal/collector/registry/registry.go b/internal/collector/registry/registry.go index d38e0ff..3d1cbcf 100644 --- a/internal/collector/registry/registry.go +++ b/internal/collector/registry/registry.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package registry import ( diff --git a/internal/collector/registry/registry_test.go b/internal/collector/registry/registry_test.go index bb2a014..1b4ce08 100644 --- a/internal/collector/registry/registry_test.go +++ b/internal/collector/registry/registry_test.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package registry import ( diff --git a/internal/collector/run/run.go b/internal/collector/run/run.go index 78ef974..3f18e25 100644 --- a/internal/collector/run/run.go +++ b/internal/collector/run/run.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package run import ( diff --git a/internal/collector/run/run_test.go b/internal/collector/run/run_test.go index b639647..6fb3af6 100644 --- a/internal/collector/run/run_test.go +++ b/internal/collector/run/run_test.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package run import ( diff --git a/internal/collector/state/state.go b/internal/collector/state/state.go index b74ead7..80682d8 100644 --- a/internal/collector/state/state.go +++ b/internal/collector/state/state.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package state import ( diff --git a/internal/collector/state/state_test.go b/internal/collector/state/state_test.go index 42217e4..ffe5119 100644 --- a/internal/collector/state/state_test.go +++ b/internal/collector/state/state_test.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package state import ( diff --git a/internal/collector/translate/google.go b/internal/collector/translate/google.go index 786091f..6a4a6e4 100644 --- a/internal/collector/translate/google.go +++ b/internal/collector/translate/google.go @@ -1,3 +1,6 @@ +// Copyright 2026 ff, Scalytics, Inc. - https://www.scalytics.io +// SPDX-License-Identifier: Apache-2.0 + package translate import ( From e2b1d17b9b22fc42b5f8de13d2fd7ed8850b2e87 Mon Sep 17 00:00:00 2001 From: 2pk03 Date: Mon, 16 Mar 2026 10:05:52 +0100 Subject: [PATCH 7/7] chore: scope collector binary ignore to repo root --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fe14f37..453d796 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ dist/ dist-ssr/ coverage/ .tmp/ -euosint-collector +/euosint-collector # Runtime logs logs/