Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ jobs:
steps:
- uses: actions/checkout@v4

- uses: dtolnay/rust-toolchain@stable
# Pinned to match the Rust version in the Dockerfile. Bump both
# together when you decide to move.
- uses: dtolnay/rust-toolchain@1.88

# Caches ~/.cargo and api/target between runs, keyed off Cargo.lock.
# Roughly halves CI time on the second run onward.
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rust:1.81.0
FROM rust:1.88

RUN apt-get update -y && apt-get install -y nano curl wget libhdf5-serial-dev libnetcdff-dev netcdf-bin

Expand Down
7 changes: 6 additions & 1 deletion api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ serde = "1.0.130"
once_cell = "1.8.0"
futures = "0.3.15"
chrono = "0.4.38"
tokio = "1.40.0"
tokio = { version = "1.40.0", features = ["macros", "rt-multi-thread"] }
tokio-stream = "0.1.16"
lazy_static = "1.4.0"

[dev-dependencies]
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
# `api` binary is auto-discovered from src/main.rs; `seed_test_db` is
# auto-discovered from src/bin/seed_test_db.rs.
82 changes: 82 additions & 0 deletions api/fixtures/bsose.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
[
{
"_id": "bsose_doc_001",
"metadata": ["bsose-profile-meta-2020"],
"basin": 1.0,
"geolocation": { "type": "Point", "coordinates": [20.0, 10.0] },
"level": 10.0,
"cell_vertical_fraction": 1.0,
"sea_binary_mask_at_t_locaiton": true,
"cell_z_size": 5.0,
"reference_density_profile": 1027.0,
"data": [
[1.1, 2.2, 3.3, 4.4],
[34.50, 34.60, 34.70, 34.80]
],
"data_info": [
["temp", "salinity"],
["units", "long_name"],
[["degC", "Temperature"], ["psu", "Salinity"]]
]
},
{
"_id": "bsose_doc_002",
"metadata": ["bsose-profile-meta-2020"],
"basin": 1.0,
"geolocation": { "type": "Point", "coordinates": [40.0, 30.0] },
"level": 10.0,
"cell_vertical_fraction": 1.0,
"sea_binary_mask_at_t_locaiton": true,
"cell_z_size": 5.0,
"reference_density_profile": 1027.0,
"data": [
[5.0, 6.0, 7.0, 8.0],
[35.00, 35.10, 35.20, 35.30]
],
"data_info": [
["temp", "salinity"],
["units", "long_name"],
[["degC", "Temperature"], ["psu", "Salinity"]]
]
},
{
"_id": "bsose_doc_003",
"metadata": ["bsose-profile-meta-2020"],
"basin": 2.0,
"geolocation": { "type": "Point", "coordinates": [-170.0, 50.0] },
"level": 20.0,
"cell_vertical_fraction": 1.0,
"sea_binary_mask_at_t_locaiton": true,
"cell_z_size": 5.0,
"reference_density_profile": 1026.5,
"data": [
[-1.0, 0.0, 1.0, 2.0],
[33.10, 33.20, 33.30, 33.40]
],
"data_info": [
["temp", "salinity"],
["units", "long_name"],
[["degC", "Temperature"], ["psu", "Salinity"]]
]
},
{
"_id": "bsose_doc_004",
"metadata": ["bsose-profile-meta-2020"],
"basin": 1.0,
"geolocation": { "type": "Point", "coordinates": [20.0, 10.0] },
"level": 50.0,
"cell_vertical_fraction": 1.0,
"sea_binary_mask_at_t_locaiton": true,
"cell_z_size": 10.0,
"reference_density_profile": 1028.0,
"data": [
[0.5, 0.6, 0.7, 0.8],
[34.10, 34.20, 34.30, 34.40]
],
"data_info": [
["temp", "salinity"],
["units", "long_name"],
[["degC", "Temperature"], ["psu", "Salinity"]]
]
}
]
21 changes: 21 additions & 0 deletions api/fixtures/timeseriesMeta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[
{
"_id": "bsose-profile-meta-2020",
"data_type": "BSOSE-profile",
"date_updated_argovis": "2026-01-01T00:00:00Z",
"timeseries": [
"2020-01-15T00:00:00Z",
"2020-04-15T00:00:00Z",
"2020-07-15T00:00:00Z",
"2020-10-15T00:00:00Z"
],
"source": [
{ "source": ["bsose"], "iter": "156" }
],
"cell_area": 10000000000.0,
"ocean_depth": 4500.0,
"depth_r0_to_bottom": 4000.0,
"interior_2d_mask": true,
"depth_r0_to_ref_surface": 100.0
}
]
123 changes: 123 additions & 0 deletions api/src/bin/seed_test_db.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// Seeds a MongoDB instance with the test fixtures used by the integration tests.
//
// Run before starting the API container so the API picks up the right
// `timeseriesMeta` document at startup:
//
// MONGODB_URI=mongodb://localhost:27017 cargo run --bin seed_test_db
//
// What it does:
// * drops the `argo.bsose` and `argo.timeseriesMeta` collections
// * loads the JSON fixtures embedded at compile time
// * converts ISO-8601 strings in known date fields to BSON DateTimes
// * inserts the resulting documents
// * creates a 2dsphere index on `geolocation` for the bsose collection
//
// Date fields in the fixtures are written as ISO-8601 strings to keep the
// JSON readable; the seeder converts them to BSON DateTimes here, since
// MongoDB's geo and time queries depend on the typed representation.

use mongodb::{
bson::{self, Bson, Document, DateTime as BsonDateTime},
options::ClientOptions,
Client, IndexModel,
};
use std::env;

const TIMESERIES_META_FIXTURE: &str =
include_str!("../../fixtures/timeseriesMeta.json");
const BSOSE_FIXTURE: &str = include_str!("../../fixtures/bsose.json");

const DB_NAME: &str = "argo";

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let uri = env::var("MONGODB_URI")
.expect("MONGODB_URI must be set (e.g. mongodb://localhost:27017)");
let opts = ClientOptions::parse(&uri).await?;
let client = Client::with_options(opts)?;
let db = client.database(DB_NAME);

// timeseriesMeta has BSON dates in two fields
seed_collection(
&db,
"timeseriesMeta",
TIMESERIES_META_FIXTURE,
&["date_updated_argovis", "timeseries"],
)
.await?;

// bsose has no top-level date fields
seed_collection(&db, "bsose", BSOSE_FIXTURE, &[]).await?;

// Geospatial queries (`$geoWithin`, `$near`) require a 2dsphere index on
// the GeoJSON field. MongoDB picks a default index name from the keys.
let geo_index = IndexModel::builder()
.keys(bson::doc! { "geolocation": "2dsphere" })
.build();
db.collection::<Document>("bsose")
.create_index(geo_index, None)
.await?;

println!("Seed complete: {} populated.", DB_NAME);
Ok(())
}

async fn seed_collection(
db: &mongodb::Database,
name: &str,
json_str: &str,
date_fields: &[&str],
) -> Result<(), Box<dyn std::error::Error>> {
let coll = db.collection::<Document>(name);
coll.drop(None).await?;

let value: serde_json::Value = serde_json::from_str(json_str)?;
let array = value
.as_array()
.ok_or_else(|| format!("fixture for {} must be a JSON array", name))?;

let mut docs: Vec<Document> = Vec::with_capacity(array.len());
for item in array {
let bson_val: Bson = bson::to_bson(item)?;
let mut doc: Document = match bson_val {
Bson::Document(d) => d,
other => {
return Err(format!(
"fixture entry for {} must be an object, got {:?}",
name, other
)
.into())
}
};
convert_date_fields(&mut doc, date_fields);
docs.push(doc);
}

if !docs.is_empty() {
coll.insert_many(docs.clone(), None).await?;
}
println!(" seeded {}: {} documents", name, docs.len());
Ok(())
}

/// For each named field, convert ISO-8601 strings (or arrays of them) to
/// BSON DateTimes. Anything that doesn't parse is left alone so the failure
/// surfaces during query rather than during seed.
fn convert_date_fields(doc: &mut Document, fields: &[&str]) {
for field in fields {
let Some(val) = doc.remove(*field) else { continue };
let converted = convert_value(val);
doc.insert(*field, converted);
}
}

fn convert_value(val: Bson) -> Bson {
match val {
Bson::String(s) => match chrono::DateTime::parse_from_rfc3339(&s) {
Ok(dt) => Bson::DateTime(BsonDateTime::from_millis(dt.timestamp_millis())),
Err(_) => Bson::String(s),
},
Bson::Array(arr) => Bson::Array(arr.into_iter().map(convert_value).collect()),
other => other,
}
}
79 changes: 79 additions & 0 deletions api/src/helpers/filters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,83 @@ fn vertical_range_filter(vertical_range: &str, mut filter: mongodb::bson::Docume
let vertical_range: Vec<f64> = serde_json::from_str(vertical_range).unwrap();
filter.insert("level", mongodb::bson::doc! { "$gte": vertical_range[0], "$lt": vertical_range[1] });
filter
}

#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;

#[test]
fn empty_params_produce_empty_filter() {
let f = filter_timeseries(json!({}));
assert_eq!(f.len(), 0);
}

#[test]
fn id_filter_sets_id_equality() {
let f = filter_timeseries(json!({"id": "doc1"}));
assert_eq!(f.get_str("_id").unwrap(), "doc1");
}

#[test]
fn vertical_range_filter_uses_gte_and_lt() {
let f = filter_timeseries(json!({"verticalRange": "[5.0, 50.0]"}));
let level = f.get_document("level").unwrap();
assert!((level.get_f64("$gte").unwrap() - 5.0).abs() < 1e-9);
assert!((level.get_f64("$lt").unwrap() - 50.0).abs() < 1e-9);
}

#[test]
fn polygon_filter_builds_geowithin_geometry() {
let f = filter_timeseries(json!({
"polygon": "[[0,0],[10,0],[10,10],[0,10],[0,0]]"
}));
let geo = f.get_document("geolocation").unwrap();
let within = geo.get_document("$geoWithin").unwrap();
let geometry = within.get_document("$geometry").unwrap();
assert_eq!(geometry.get_str("type").unwrap(), "Polygon");
// coordinates should be a single ring (array of arrays of arrays)
let coords = geometry.get_array("coordinates").unwrap();
assert_eq!(coords.len(), 1);
}

#[test]
fn center_filter_builds_geonear() {
let f = filter_timeseries(json!({
"center": "[10.0, 20.0]",
"radius": "5000"
}));
let geo = f.get_document("geolocation").unwrap();
let near = geo.get_document("$near").unwrap();
let geometry = near.get_document("$geometry").unwrap();
assert_eq!(geometry.get_str("type").unwrap(), "Point");
assert!((near.get_f64("$maxDistance").unwrap() - 5000.0).abs() < 1e-9);
}

#[test]
fn box_filter_single_box_when_not_crossing_dateline() {
// SW corner at [10, 10], NE corner at [20, 20] — does not cross
let f = filter_timeseries(json!({"box": "[[10,10],[20,20]]"}));
let or = f.get_array("$or").unwrap();
assert_eq!(or.len(), 1, "non-crossing box should produce a single $or branch");
}

#[test]
fn box_filter_splits_when_crossing_dateline() {
// SW lon (170) > NE lon (-170) -> the box wraps the dateline
let f = filter_timeseries(json!({"box": "[[170,10],[-170,20]]"}));
let or = f.get_array("$or").unwrap();
assert_eq!(or.len(), 2, "dateline-crossing box should split into two branches");
}

#[test]
fn id_and_vertical_range_compose() {
let f = filter_timeseries(json!({
"id": "doc1",
"verticalRange": "[0, 100]"
}));
assert_eq!(f.get_str("_id").unwrap(), "doc1");
assert!(f.get_document("level").is_ok());
}
}
Loading
Loading