diff --git a/docs/source/getting-started.md b/docs/source/getting-started.md index fa7d8a7..6419d9f 100644 --- a/docs/source/getting-started.md +++ b/docs/source/getting-started.md @@ -76,12 +76,7 @@ vessel_search_result = await gfw_client.vessels.search_vessels( query="412331038", ) -vessel_search_ids = [ - self_reported_info.id - for vessel_search_item in vessel_search_result.data() - if vessel_search_item.registry_info_total_records >= 1 - for self_reported_info in vessel_search_item.self_reported_info -] +vessel_search_ids = vessel_search_result.vessel_ids print(vessel_search_ids) ``` @@ -89,10 +84,12 @@ print(vessel_search_ids) **Output:** ``` -['755a48dd4-4bee-4bcf-7b5f-9baea058fc7b', '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'] +['da2b09b31-127e-27e0-fe5f-d6d87e96de6a', + '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b', + '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'] ``` -**Note:** It is recommended to prioritize vessels that include both `registry_info` and `self_reported_info` (AIS), as this indicates a successful match between registry data and AIS information. +> **Note:** It is recommended to prioritize vessels that include both `registry_info` and `self_reported_info` (AIS), as this indicates a successful match between registry data and AIS information. See how the [Vessels API](https://globalfishingwatch.org/our-apis/documentation#vessels-api) is used in the [Vessel Viewer](https://globalfishingwatch.org/map/) [here](https://globalfishingwatch.org/our-apis/assets/2024_Vessel_Viewer_and_APIs_behind_It.pdf). ### Getting Details of Vessels Filtered by Vessel Searched IDs @@ -101,15 +98,7 @@ vessels_result = await gfw_client.vessels.get_vessels_by_ids( ids=vessel_search_ids, ) -vessel_self_reported_infos = [ - self_reported_info - for vessel_item in vessels_result.data() - for self_reported_info in vessel_item.self_reported_info -] - -vessel_ids = [ - self_reported_info.id for self_reported_info in vessel_self_reported_infos -] +vessel_ids = vessels_result.vessel_ids print(vessel_ids) ``` @@ -117,42 +106,26 @@ print(vessel_ids) **Output:** ``` -['755a48dd4-4bee-4bcf-7b5f-9baea058fc7b', '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'] +['da2b09b31-127e-27e0-fe5f-d6d87e96de6a', + '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b', + '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'] ``` ### Getting Insights Related to Fishing Events for the Vessel Searched -**Important:** `start_date` must be on or after `January 1, 2020` +> **Important:** `start_date` must be on or after `January 1, 2020`. [Insights](https://globalfishingwatch.org/our-apis/documentation#insights-api) are available from `January 1, 2020` onwards. ```python -start_datetime = min( - [ - self_reported_info.transmission_date_from - for self_reported_info in vessel_self_reported_infos - ] -) -start_date = start_datetime.date() +start_date = start_date = min(vessels_result.transmission_dates_from) start_date = max(start_date, datetime.date.fromisoformat("2020-01-01")) - -end_datetime = max( - [ - self_reported_info.transmission_date_to - for self_reported_info in vessel_self_reported_infos - ] -) -end_date = end_datetime.date() - -dataset_id = "public-global-vessel-identity:latest" -dataset_ids_vessel_ids = [ - {"dataset_id": dataset_id, "vessel_id": vessel_id} for vessel_id in vessel_ids -] +end_date = max(vessels_result.transmission_dates_to) insights_result = await gfw_client.insights.get_vessel_insights( includes=["FISHING"], start_date=start_date, end_date=end_date, - vessels=dataset_ids_vessel_ids, + vessels=vessel_ids, ) insights_df = insights_result.df() @@ -163,7 +136,7 @@ print(insights_df.info()) **Output:** ``` - + RangeIndex: 1 entries, 0 to 0 Data columns (total 6 columns): # Column Non-Null Count Dtype @@ -187,7 +160,10 @@ print(dict(insights_data.apparent_fishing.period_selected_counters)) **Output:** ``` -{'events': 398, 'events_gap_off': None, 'events_in_rfmo_without_known_authorization': 144, 'events_in_no_take_mpas': 0} +{'events': 263, + 'events_gap_off': None, + 'events_in_rfmo_without_known_authorization': 163, + 'events_in_no_take_mpas': 0} ``` ### Getting Fishing Events for the Vessels Searched @@ -208,27 +184,27 @@ print(events_df.info()) **Output:** ``` - -RangeIndex: 398 entries, 0 to 397 + +RangeIndex: 263 entries, 0 to 262 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- - 0 start 398 non-null datetime64[ns, UTC] - 1 end 398 non-null datetime64[ns, UTC] - 2 id 398 non-null object - 3 type 398 non-null object - 4 position 398 non-null object - 5 regions 398 non-null object - 6 bounding_box 398 non-null object - 7 distances 398 non-null object - 8 vessel 398 non-null object + 0 start 263 non-null datetime64[us, UTC] + 1 end 263 non-null datetime64[us, UTC] + 2 id 263 non-null str + 3 type 263 non-null str + 4 position 263 non-null object + 5 regions 263 non-null object + 6 bounding_box 263 non-null object + 7 distances 263 non-null object + 8 vessel 263 non-null object 9 encounter 0 non-null object - 10 fishing 398 non-null object + 10 fishing 263 non-null object 11 gap 0 non-null object 12 loitering 0 non-null object 13 port_visit 0 non-null object -dtypes: datetime64[ns, UTC](2), object(12) -memory usage: 43.7+ KB +dtypes: datetime64[us, UTC](2), object(10), str(2) +memory usage: 28.9+ KB ``` ## Next Steps diff --git a/docs/source/usage-guides/4wings-api.md b/docs/source/usage-guides/4wings-api.md index 19ff85b..f866022 100644 --- a/docs/source/usage-guides/4wings-api.md +++ b/docs/source/usage-guides/4wings-api.md @@ -17,6 +17,8 @@ To interact with the 4Wings endpoints, you first need to instantiate the `gfw.Cl ```python import os +import geopandas as gpd + import gfwapiclient as gfw @@ -38,6 +40,21 @@ The `gfw_client.fourwings` object provides methods to generate reports, retrieve Generates **AIS (Automatic Identification System) apparent fishing effort** reports to visualize fishing activity. Please [learn more about apparent fishing effort here](https://globalfishingwatch.org/our-apis/documentation#ais-apparent-fishing-effort) and [check its data caveats here](https://globalfishingwatch.org/our-apis/documentation#apparent-fishing-effort). +> **Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**). + +```python +eez_rois_result = await gfw_client.references.get_eez_regions(iso3="RUS") +rus_eez_roi = eez_rois_result.data()[0] + +print((rus_eez_roi.id, rus_eez_roi.dataset, rus_eez_roi.label, rus_eez_roi.iso3)) +``` + +**Output:** + +``` +('5690', 'public-eez-areas', 'Russian Exclusive Economic Zone', 'RUS') +``` + ```python fishing_effort_report_result = await gfw_client.fourwings.create_fishing_effort_report( spatial_resolution="LOW", @@ -45,10 +62,7 @@ fishing_effort_report_result = await gfw_client.fourwings.create_fishing_effort_ group_by="FLAG", start_date="2022-01-01", end_date="2022-05-01", - region={ - "dataset": "public-eez-areas", - "id": "5690", - }, + region=rus_eez_roi, ) ``` @@ -122,6 +136,21 @@ Generates **AIS (Automatic Identification System) vessel presence** reports to v > **Disclaimer:** AIS vessel presence is one of the largest datasets available. To prevent timeouts and ensure optimal performance, keep requests manageable: prefer simple, small regions and shorter time ranges (e.g., a few days). +> **Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**). + +```python +eez_rois_result = await gfw_client.references.get_eez_regions(iso3="RUS") +rus_eez_roi = eez_rois_result.data()[0] + +print((rus_eez_roi.id, rus_eez_roi.dataset, rus_eez_roi.label, rus_eez_roi.iso3)) +``` + +**Output:** + +``` +('5690', 'public-eez-areas', 'Russian Exclusive Economic Zone', 'RUS') +``` + ```python ais_presence_report_result = await gfw_client.fourwings.create_ais_presence_report( spatial_resolution="LOW", @@ -129,10 +158,7 @@ ais_presence_report_result = await gfw_client.fourwings.create_ais_presence_repo group_by="FLAG", start_date="2022-01-01", end_date="2022-05-01", - region={ - "dataset": "public-eez-areas", - "id": "5690", - }, + region=rus_eez_roi, ) ``` @@ -206,6 +232,21 @@ Generates **SAR (Synthetic-Aperture Radar) vessel detections** reports to identi > **Important:** **AIS vessel presence** shows where vessels **reported their positions** via the **Automatic Identification System (AIS)**. **SAR vessel detection** shows where **Synthetic Aperture Radar (SAR) satellites detected** vessels on the ocean surface, even if they **weren't transmitting AIS**. +> **Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**). + +```python +eez_rois_result = await gfw_client.references.get_eez_regions(iso3="RUS") +rus_eez_roi = eez_rois_result.data()[0] + +print((rus_eez_roi.id, rus_eez_roi.dataset, rus_eez_roi.label, rus_eez_roi.iso3)) +``` + +**Output:** + +``` +('5690', 'public-eez-areas', 'Russian Exclusive Economic Zone', 'RUS') +``` + ```python sar_presence_report_result = await gfw_client.fourwings.create_sar_presence_report( spatial_resolution="LOW", @@ -213,10 +254,7 @@ sar_presence_report_result = await gfw_client.fourwings.create_sar_presence_repo group_by="GEARTYPE", start_date="2022-01-01", end_date="2022-05-01", - region={ - "dataset": "public-eez-areas", - "id": "5690", - }, + region=rus_eez_roi, ) ``` @@ -284,14 +322,29 @@ dtypes: float64(2), int64(2), object(16) memory usage: 624.3+ KB ``` -## Creating a Report (`create_report`) +## Creating a Generic Report from Predefined Region (`create_report`) Generates a report for any [supported datasets](https://globalfishingwatch.org/our-apis/documentation#supported-datasets), using fully customizable parameters. [Please check the data caveats here](https://globalfishingwatch.org/our-apis/documentation#data-caveat). > **Note:** AIS vessel presence (i.e., `"public-global-sar-presence:latest"` dataset) does **not** support `"GEARTYPE"` or `"FLAGANDGEARTYPE"` as `group_by` criteria. +> **Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**). + +```python +eez_rois_result = await gfw_client.references.get_eez_regions(iso3="RUS") +rus_eez_roi = eez_rois_result.data()[0] + +print((rus_eez_roi.id, rus_eez_roi.dataset, rus_eez_roi.label, rus_eez_roi.iso3)) +``` + +**Output:** + +``` +('5690', 'public-eez-areas', 'Russian Exclusive Economic Zone', 'RUS') +``` + ```python -report_result = await gfw_client.fourwings.create_report( +predefined_report_result = await gfw_client.fourwings.create_report( spatial_resolution="LOW", temporal_resolution="MONTHLY", group_by="FLAG", @@ -302,29 +355,25 @@ report_result = await gfw_client.fourwings.create_report( ], start_date="2022-01-01", end_date="2022-05-01", - region={ - "dataset": "public-eez-areas", - "id": "5690", - }, + region=rus_eez_roi, ) ``` ### Access the report data as Pydantic models ```python -report_data = report_result.data() +predefined_report_data = predefined_report_result.data() -report_item = report_data[-1] +predefined_report_item = predefined_report_data[-1] print(( - report_item.date, - report_item.flag, - report_item.hours, - report_item.vessel_ids, - report_item.lat, - report_item.lon, + predefined_report_item.date, + predefined_report_item.flag, + predefined_report_item.hours, + predefined_report_item.vessel_ids, + predefined_report_item.lat, + predefined_report_item.lon, )) -print(report_item.model_dump()) ``` **Output:** @@ -336,23 +385,22 @@ print(report_item.model_dump()) ### Access the report data as a DataFrame ```python -report_df = report_result.df() +predefined_report_df = predefined_report_result.df() -print(report_df.info()) -print(report_df[["date", "flag", "hours", "lat", "lon"]].head()) +print(predefined_report_df.info()) ``` **Output:** ``` - + RangeIndex: 310599 entries, 0 to 310598 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- - 0 date 310599 non-null object + 0 date 310599 non-null str 1 detections 3995 non-null float64 - 2 flag 310599 non-null object + 2 flag 310599 non-null str 3 gear_type 0 non-null object 4 hours 306604 non-null float64 5 vessel_ids 310599 non-null int64 @@ -366,14 +414,107 @@ Data columns (total 20 columns): 13 mmsi 0 non-null object 14 call_sign 0 non-null object 15 dataset 0 non-null object - 16 report_dataset 310599 non-null object + 16 report_dataset 310599 non-null str 17 ship_name 0 non-null object 18 lat 310599 non-null float64 19 lon 310599 non-null float64 -dtypes: float64(4), int64(1), object(15) +dtypes: float64(4), int64(1), object(12), str(3) memory usage: 47.4+ MB ``` +## Creating a Generic Report from Custom Region (`create_report`) + +Generates a report for any [supported datasets](https://globalfishingwatch.org/our-apis/documentation#supported-datasets), using fully customizable parameters. [Please check the data caveats here](https://globalfishingwatch.org/our-apis/documentation#data-caveat). + +> **Note:** AIS vessel presence (i.e., `"public-global-sar-presence:latest"` dataset) does **not** support `"GEARTYPE"` or `"FLAGANDGEARTYPE"` as `group_by` criteria. + +> **Note:** Custom region can either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. Spatial files are loaded using [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) and supported formats depend on a properly configured [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip). + +```python +filename = "https://raw.githubusercontent.com/GlobalFishingWatch/gfw-api-python-client/refs/heads/develop/tests/fixtures/fourwings/geojson/geojson.shp" + +custom_roi_gdf = gpd.read_file(filename) +``` + +```python +custom_report_result = await gfw_client.fourwings.create_report( + spatial_resolution="LOW", + temporal_resolution="MONTHLY", + group_by="FLAG", + datasets=[ + "public-global-fishing-effort:latest", + "public-global-sar-presence:latest", + "public-global-presence:latest", + ], + start_date="2022-01-01", + end_date="2022-05-01", + geojson=custom_roi_gdf, +) +``` + +### Access the report data as Pydantic models + +```python +custom_report_data = custom_report_result.data() + +custom_report_item = custom_report_data[-1] + +print(( + custom_report_item.date, + custom_report_item.flag, + custom_report_item.hours, + custom_report_item.vessel_ids, + custom_report_item.lat, + custom_report_item.lon, +)) +``` + +**Output:** + +``` +('2022-01', 'NOR', 1.0, 1, -25.9, -76.3) +``` + +### Access the report data as a DataFrame + +```python +custom_report_df = custom_report_result.df() + +print(custom_report_df.info()) +``` + +**Output:** + +``` + +RangeIndex: 6740 entries, 0 to 6739 +Data columns (total 20 columns): + # Column Non-Null Count Dtype +--- ------ -------------- ----- + 0 date 6740 non-null str + 1 detections 0 non-null object + 2 flag 6740 non-null str + 3 gear_type 0 non-null object + 4 hours 6740 non-null float64 + 5 vessel_ids 6740 non-null int64 + 6 vessel_id 0 non-null object + 7 vessel_type 0 non-null object + 8 entry_timestamp 0 non-null object + 9 exit_timestamp 0 non-null object + 10 first_transmission_date 0 non-null object + 11 last_transmission_date 0 non-null object + 12 imo 0 non-null object + 13 mmsi 0 non-null object + 14 call_sign 0 non-null object + 15 dataset 0 non-null object + 16 report_dataset 6740 non-null str + 17 ship_name 0 non-null object + 18 lat 6740 non-null float64 + 19 lon 6740 non-null float64 +dtypes: float64(3), int64(1), object(13), str(3) +memory usage: 1.0+ MB +``` + ## Reference Data The 4Wings API often requires specifying geographic regions. You can use the [Reference Data API](references-data-api) to retrieve the `dataset` and `id` of various regions (e.g., EEZs, MPAs, RFMOs) that can then be used in the `create_report()` method. diff --git a/docs/source/usage-guides/bulk-downloads-api.md b/docs/source/usage-guides/bulk-downloads-api.md index e199643..e49cc14 100644 --- a/docs/source/usage-guides/bulk-downloads-api.md +++ b/docs/source/usage-guides/bulk-downloads-api.md @@ -15,8 +15,10 @@ This guide provides detailed instructions on how to use the [gfw-api-python-clie To interact with the Bulk Download endpoints, you first need to instantiate the `gfw.Client` and then access the `bulk_downloads` resource: ```python -import time import os +import time + +import geopandas as gpd import gfwapiclient as gfw @@ -43,80 +45,199 @@ These methods return a `result` object, which offers convenient ways to access t > **Tip:** Use [IPython](https://ipython.readthedocs.io/en/stable/) or Python 3.11+ with `python -m asyncio` to run `gfw-api-python-client` code interactively, as these environments support executing `async` / `await` expressions directly in the console. -## Create a Bulk Report (`create_bulk_report`) +## Create a Bulk Report from Predefined Region (`create_bulk_report`) The `create_bulk_report()` method allows you create a bulk report based on specified filters and spatial parameters. The `name` parameter is mandatory. Please [learn more about create a bulk report here](https://globalfishingwatch.org/our-apis/documentation#create-a-bulk-report) and [check its data caveats here](https://globalfishingwatch.org/our-apis/documentation#data-caveat) and [here](https://globalfishingwatch.org/our-apis/documentation#sar-fixed-infrastructure-data-caveats). +> **Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**). + +```python +eez_rois_result = await gfw_client.references.get_eez_regions(iso3="ARG") +arg_eez_roi = eez_rois_result.data()[0] + +print((arg_eez_roi.id, arg_eez_roi.dataset, arg_eez_roi.label, arg_eez_roi.iso3)) +``` + +**Output:** + +``` +('8466', 'public-eez-areas', 'Argentinian Exclusive Economic Zone', 'ARG') +``` + ```python timestamp = int(time.time() * 1000) dataset = "public-fixed-infrastructure-data:latest" -region_dataset = "public-eez-areas" -region_id = "8466" # Argentinian Exclusive Economic Zone -name = f"{dataset.split(':')[0]}_{region_dataset}__{region_id}_{timestamp}" +name = f"{dataset.split(':')[0]}-python-package-example-{timestamp}-predefined_region" + +print(name) +``` -create_bulk_report_result = await gfw_client.bulk_downloads.create_bulk_report( +**Output:** + +``` +'public-fixed-infrastructure-data-python-package-example-1782384022398-predefined_region' +``` + +```python +create_predefined_bulk_report_result = ( + await gfw_client.bulk_downloads.create_bulk_report( + name=name, + dataset=dataset, + region=arg_eez_roi, + filters=[ + "label = 'oil'", + "label_confidence = 'high'", + "structure_start_date between '2020-01-01' and '2025-01-01'", + ], + ) +) +``` + +### Access Create a Bulk Report Result as Pydantic models + +```python +create_predefined_bulk_report_data = create_predefined_bulk_report_result.data() +print(( + create_predefined_bulk_report_data.id, + create_predefined_bulk_report_data.name, + create_predefined_bulk_report_data.status, + create_predefined_bulk_report_data.created_at, +)) +``` + +**Output:** + +``` +('ea21f550-780b-4fa6-aa8e-158f85289492', + 'public-fixed-infrastructure-data-python-package-example-1782384022398-predefined_region', + 'pending', + datetime.datetime(2026, 6, 25, 10, 40, 25, 113000, tzinfo=TzInfo(0))) +``` + +### Access Create a Bulk Report Result as a DataFrame + +```python +create_predefined_bulk_report_df = create_predefined_bulk_report_result.df() + +print(create_predefined_bulk_report_df.info()) +``` + +**Output:** + +``` + +RangeIndex: 1 entries, 0 to 0 +Data columns (total 13 columns): + # Column Non-Null Count Dtype +--- ------ -------------- ----- + 0 id 1 non-null str + 1 dataset 1 non-null str + 2 name 1 non-null str + 3 file_path 1 non-null str + 4 format 1 non-null str + 5 filters 1 non-null object + 6 geom 1 non-null object + 7 status 1 non-null str + 8 owner_id 1 non-null int64 + 9 owner_type 1 non-null str + 10 created_at 1 non-null datetime64[us, UTC] + 11 updated_at 1 non-null datetime64[us, UTC] + 12 file_size 0 non-null object +dtypes: datetime64[us, UTC](2), int64(1), object(3), str(7) +memory usage: 236.0+ bytes +``` + +## Create a Bulk Report from Custom Region (`create_bulk_report`) + +The `create_bulk_report()` method allows you create a bulk report based on specified filters and spatial parameters. The `name` parameter is mandatory. Please [learn more about create a bulk report here](https://globalfishingwatch.org/our-apis/documentation#create-a-bulk-report) and [check its data caveats here](https://globalfishingwatch.org/our-apis/documentation#data-caveat) and [here](https://globalfishingwatch.org/our-apis/documentation#sar-fixed-infrastructure-data-caveats). + +> **Note:** Custom region can either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. Spatial files are loaded using [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) and supported formats depend on a properly configured [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip). + +```python +filename = "https://raw.githubusercontent.com/GlobalFishingWatch/gfw-api-python-client/refs/heads/develop/tests/fixtures/bulk_downloads/geojson/geojson.shp" + +custom_roi_gdf = gpd.read_file(filename) +``` + +```python +timestamp = int(time.time() * 1000) +dataset = "public-fixed-infrastructure-data:latest" +name = f"{dataset.split(':')[0]}-python-package-example-{timestamp}-custom_region" + +print(name) +``` + +**Output:** + +``` +'public-fixed-infrastructure-data-python-package-example-1782384029540-custom_region' +``` + +```python +create_custom_bulk_report_result = await gfw_client.bulk_downloads.create_bulk_report( name=name, dataset=dataset, - region={ - "dataset": region_dataset, - "id": region_id, - }, - filters=["label = 'oil'", "label_confidence = 'high'"], + geojson=custom_roi_gdf, + filters=[ + "label = 'oil'", + "label_confidence = 'high'", + "structure_start_date between '2020-01-01' and '2025-01-01'" + ], ) ``` ### Access Create a Bulk Report Result as Pydantic models ```python -create_bulk_report_data = create_bulk_report_result.data() +create_custom_bulk_report_data = create_custom_bulk_report_result.data() print(( - create_bulk_report_data.id, - create_bulk_report_data.name, - create_bulk_report_data.status, - create_bulk_report_data.created_at, + create_custom_bulk_report_data.id, + create_custom_bulk_report_data.name, + create_custom_bulk_report_data.status, + create_custom_bulk_report_data.created_at, )) ``` **Output:** ``` -('c5e32895-4374-41d2-8b2e-ac414ed6757f', - 'public-fixed-infrastructure-data_public-eez-areas__8466_1768085547174', +('f0a39c14-1756-4f75-9150-0ada6b29eadf', + 'public-fixed-infrastructure-data-python-package-example-1782384029540-custom_region', 'pending', - datetime.datetime(2026, 1, 10, 22, 52, 30, 9000, tzinfo=TzInfo(0))) + datetime.datetime(2026, 6, 25, 10, 40, 30, 740000, tzinfo=TzInfo(0))) ``` ### Access Create a Bulk Report Result as a DataFrame ```python -create_bulk_report_df = create_bulk_report_result.df() +create_custom_bulk_report_df = create_custom_bulk_report_result.df() -print(create_bulk_report_df.info()) -print(create_bulk_report_df.head()) +print(create_custom_bulk_report_df.info()) ``` **Output:** ``` - + RangeIndex: 1 entries, 0 to 0 -Data columns (total 12 columns): +Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- - 0 id 1 non-null object - 1 name 1 non-null object - 2 file_path 1 non-null object - 3 format 1 non-null object - 4 filters 1 non-null object - 5 geom 1 non-null object - 6 status 1 non-null object - 7 owner_id 1 non-null int64 - 8 owner_type 1 non-null object - 9 created_at 1 non-null datetime64[ns, UTC] - 10 updated_at 1 non-null datetime64[ns, UTC] - 11 file_size 0 non-null object -dtypes: datetime64[ns, UTC](2), int64(1), object(9) -memory usage: 228.0+ bytes + 0 id 1 non-null str + 1 dataset 1 non-null str + 2 name 1 non-null str + 3 file_path 1 non-null str + 4 format 1 non-null str + 5 filters 1 non-null object + 6 geom 1 non-null object + 7 status 1 non-null str + 8 owner_id 1 non-null int64 + 9 owner_type 1 non-null str + 10 created_at 1 non-null datetime64[us, UTC] + 11 updated_at 1 non-null datetime64[us, UTC] + 12 file_size 0 non-null object +dtypes: datetime64[us, UTC](2), int64(1), object(3), str(7) +memory usage: 236.0+ bytes ``` ## Get Bulk Report by ID (`get_bulk_report_by_id`) @@ -127,7 +248,7 @@ The `get_bulk_report_by_id()` method allows you retrieves metadata and status of ```python bulk_report_result = await gfw_client.bulk_downloads.get_bulk_report_by_id( - id=create_bulk_report_data.id + id=create_predefined_bulk_report_data.id ) ``` @@ -137,20 +258,20 @@ bulk_report_result = await gfw_client.bulk_downloads.get_bulk_report_by_id( bulk_report_data = bulk_report_result.data() print(( - create_bulk_report_data.id, - create_bulk_report_data.name, - create_bulk_report_data.status, - create_bulk_report_data.created_at, + bulk_report_data.id, + bulk_report_data.name, + bulk_report_data.status, + bulk_report_data.created_at, )) ``` **Output:** ``` -('c5e32895-4374-41d2-8b2e-ac414ed6757f', - 'public-fixed-infrastructure-data_public-eez-areas__8466_1768085547174', - 'pending', - datetime.datetime(2026, 1, 10, 22, 52, 30, 9000, tzinfo=TzInfo(0))) +('ea21f550-780b-4fa6-aa8e-158f85289492', + 'public-fixed-infrastructure-data-python-package-example-1782384022398-predefined_region', + 'done', + datetime.datetime(2026, 6, 25, 10, 40, 25, 113000, tzinfo=TzInfo(0))) ``` ### Access Get Bulk Report by ID Result as a DataFrame @@ -193,6 +314,7 @@ The `get_all_bulk_reports()` method allows you retrieves a list of **metadata an ```python bulk_reports_result = await gfw_client.bulk_downloads.get_all_bulk_reports( status="done", + dataset=dataset, ) ``` @@ -307,7 +429,8 @@ The `query_bulk_fixed_infrastructure_data_report()` method allows you retrieves ```python bulk_fixed_infrastructure_data_report_result = ( await gfw_client.bulk_downloads.query_bulk_fixed_infrastructure_data_report( - id=bulk_reports_data[0].id + id=bulk_reports_data[0].id, + sort="-structure_start_date", ) ) ``` diff --git a/docs/source/usage-guides/events-api.md b/docs/source/usage-guides/events-api.md index 1da9787..c4761d5 100644 --- a/docs/source/usage-guides/events-api.md +++ b/docs/source/usage-guides/events-api.md @@ -17,6 +17,8 @@ To interact with the Events endpoints, you first need to instantiate the `gfw.Cl ```python import os +import geopandas as gpd + import gfwapiclient as gfw @@ -34,22 +36,33 @@ The `gfw_client.events` object provides methods to retrieve event data and stati > **Tip:** Use [IPython](https://ipython.readthedocs.io/en/stable/) or Python 3.11+ with `python -m asyncio` to run `gfw-api-python-client` code interactively, as these environments support executing `async` / `await` expressions directly in the console. -## Retrieving All Events (`get_all_events`) +## Retrieving All Events from Predefined Region (`get_all_events`) The `get_all_events()` method allows you to retrieve a list of events based on specified criteria. The `datasets` parameter is mandatory. +> **Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**). + +```python +eez_rois_result = await gfw_client.references.get_eez_regions(iso3="CHN") +chn_eez_roi = eez_rois_result.data()[0] +print((chn_eez_roi.id, chn_eez_roi.dataset, chn_eez_roi.label, chn_eez_roi.iso3)) +``` + +**Output:** + +```` +('8486', 'public-eez-areas', 'Chinese Exclusive Economic Zone', 'CHN') +`` + ```python events_result = await gfw_client.events.get_all_events( datasets=["public-global-fishing-events:latest"], - start_date="2020-10-01", - end_date="2020-12-31", - region={ - "dataset": "public-eez-areas", - "id": "8371", - }, + start_date="2017-01-01", + end_date="2017-01-31", + region=chn_eez_roi, limit=5, ) -``` +```` ### Access the list of event as Pydantic models @@ -57,13 +70,14 @@ events_result = await gfw_client.events.get_all_events( events_data = events_result.data() event = events_data[-1] print((event.id, event.type, event.vessel.id)) -print(event.model_dump()) ``` **Output:** ``` -('bbbf5d0cfa9639e5eac0130fc2b742e9', 'fishing', '7374d1988-87f8-6037-66b4-59854a026efb') +('54e1b8739c8ef032f2384e866b56077b', + 'fishing', + 'de2fb30db-b118-8a4e-edac-3764639a0d9e') ``` ### Access the events as a DataFrame @@ -71,21 +85,20 @@ print(event.model_dump()) ```python events_df = events_result.df() print(events_df.info()) -print(events_df[["id", "type"]].head()) ``` **Output:** ``` - + RangeIndex: 5 entries, 0 to 4 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- - 0 start 5 non-null datetime64[ns, UTC] - 1 end 5 non-null datetime64[ns, UTC] - 2 id 5 non-null object - 3 type 5 non-null object + 0 start 5 non-null datetime64[us, UTC] + 1 end 5 non-null datetime64[us, UTC] + 2 id 5 non-null str + 3 type 5 non-null str 4 position 5 non-null object 5 regions 5 non-null object 6 bounding_box 5 non-null object @@ -96,7 +109,76 @@ Data columns (total 14 columns): 11 gap 0 non-null object 12 loitering 0 non-null object 13 port_visit 0 non-null object -dtypes: datetime64[ns, UTC](2), object(12) +dtypes: datetime64[us, UTC](2), object(10), str(2) +memory usage: 692.0+ bytes +``` + +## Retrieving All Events from Custom Region (`get_all_events`) + +> **Note:** Custom region can either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. Spatial files are loaded using [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) and supported formats depend on a properly configured [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip). + +```python +filename = "https://raw.githubusercontent.com/GlobalFishingWatch/gfw-api-python-client/refs/heads/develop/tests/fixtures/events/geometry/geometry.shp" + +custom_roi_gdf = gpd.read_file(filename) +``` + +```python +custom_events_result = await gfw_client.events.get_all_events( + datasets=["public-global-fishing-events:latest"], + start_date="2017-01-01", + end_date="2017-01-31", + geometry=custom_roi_gdf, + limit=5, +) +``` + +### Access the list of event as Pydantic models + +```python +custom_events_data = custom_events_result.data() +custom_event = custom_events_data[-1] +print((custom_event.id, custom_event.type, custom_event.vessel.id)) +``` + +**Output:** + +``` +('5c03609c64d96c6ca5bfaaca0e9d9b6c', + 'fishing', + 'c01e0a0d2-20d9-7cc6-e04e-449dae2fbd95') +``` + +### Access the events as a DataFrame + +```python +custom_events_df = custom_events_result.df() +print(custom_events_df.info()) +``` + +**Output:** + +``` + +RangeIndex: 5 entries, 0 to 4 +Data columns (total 14 columns): + # Column Non-Null Count Dtype +--- ------ -------------- ----- + 0 start 5 non-null datetime64[us, UTC] + 1 end 5 non-null datetime64[us, UTC] + 2 id 5 non-null str + 3 type 5 non-null str + 4 position 5 non-null object + 5 regions 5 non-null object + 6 bounding_box 5 non-null object + 7 distances 5 non-null object + 8 vessel 5 non-null object + 9 encounter 0 non-null object + 10 fishing 5 non-null object + 11 gap 0 non-null object + 12 loitering 0 non-null object + 13 port_visit 0 non-null object +dtypes: datetime64[us, UTC](2), object(10), str(2) memory usage: 692.0+ bytes ``` @@ -159,12 +241,12 @@ dtypes: datetime64[ns, UTC](2), object(12) memory usage: 244.0+ bytes ``` -## Getting Event Statistics (`get_events_stats`) +## Getting Event Statistics Worldwide (`get_events_stats`) The `get_events_stats()` method allows you to retrieve statistics on event occurrences based on specified criteria and a time series interval. ```python -event_stats_result = await gfw_client.events.get_events_stats( +worldwide_event_stats_result = await gfw_client.events.get_events_stats( datasets=["public-global-encounters-events:latest"], encounter_types=["CARRIER-FISHING", "FISHING-CARRIER"], vessel_types=["CARRIER"], @@ -179,29 +261,166 @@ event_stats_result = await gfw_client.events.get_events_stats( ### Access the statistics as Pydantic models ```python -event_stat = event_stats_result.data() -print((event_stat.num_events, event_stat.num_flags, event_stat.num_vessels)) -print(event_stat.model_dump()) +worldwide_event_stat = worldwide_event_stats_result.data() +print(( + worldwide_event_stat.num_events, + worldwide_event_stat.num_flags, + worldwide_event_stat.num_vessels, +)) ``` **Output:** ``` -(24770, 1, 196) +(24819, 1, 194) ``` ### Access the statistics as a DataFrame ```python -event_stat_df = event_stats_result.df() -print(event_stat_df.info()) -print(event_stat_df[["num_events", "num_flags", "num_vessels"]].head()) +worldwide_event_stat_df = worldwide_event_stats_result.df() +print(worldwide_event_stat_df.info()) ``` **Output:** ``` - + +RangeIndex: 1 entries, 0 to 0 +Data columns (total 5 columns): + # Column Non-Null Count Dtype +--- ------ -------------- ----- + 0 num_events 1 non-null int64 + 1 num_flags 1 non-null int64 + 2 num_vessels 1 non-null int64 + 3 flags 1 non-null object + 4 timeseries 1 non-null object +dtypes: int64(3), object(2) +memory usage: 172.0+ bytes +``` + +## Getting Event Statistics from Predefined Region (`get_events_stats`) + +> **Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**). + +```python +eez_rois_result = await gfw_client.references.get_eez_regions(iso3="SEN") +sen_eez_roi = eez_rois_result.data()[0] + +print((sen_eez_roi.id, sen_eez_roi.dataset, sen_eez_roi.label, sen_eez_roi.iso3)) +``` + +**Output:** + +``` +('8371', 'public-eez-areas', 'Senegalese Exclusive Economic Zone', 'SEN') +``` + +```python +predefined_event_stats_result = await gfw_client.events.get_events_stats( + datasets=["public-global-port-visits-events:latest"], + start_date="2018-01-01", + end_date="2019-01-31", + timeseries_interval="YEAR", + region=sen_eez_roi, + confidences=["3", "4"], +) +``` + +### Access the statistics as Pydantic models + +```python +predefined_event_stat = predefined_event_stats_result.data() + +print(( + predefined_event_stat.num_events, + predefined_event_stat.num_flags, + predefined_event_stat.num_vessels, +)) +``` + +**Output:** + +``` +(4528, 75, 1464) +``` + +### Access the statistics as a DataFrame + +```python +predefined_event_stat_df = predefined_event_stats_result.df() + +print(predefined_event_stat_df.info()) +``` + +**Output:** + +``` + +RangeIndex: 1 entries, 0 to 0 +Data columns (total 5 columns): + # Column Non-Null Count Dtype +--- ------ -------------- ----- + 0 num_events 1 non-null int64 + 1 num_flags 1 non-null int64 + 2 num_vessels 1 non-null int64 + 3 flags 1 non-null object + 4 timeseries 1 non-null object +dtypes: int64(3), object(2) +memory usage: 172.0+ bytes +``` + +## Getting Event Statistics from Custom Region (`get_events_stats`) + +> **Note:** Custom region can either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. Spatial files are loaded using [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) and supported formats depend on a properly configured [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip). + +```python +filename = "https://raw.githubusercontent.com/GlobalFishingWatch/gfw-api-python-client/refs/heads/develop/tests/fixtures/events/geometry/geometry.shp" + +custom_stats_roi_gdf = gpd.read_file(filename) +``` + +```python +custom_event_stats_result = await gfw_client.events.get_events_stats( + datasets=["public-global-port-visits-events:latest"], + start_date="2018-01-01", + end_date="2019-01-31", + timeseries_interval="YEAR", + geometry=custom_stats_roi_gdf, + confidences=["3", "4"], +) +``` + +### Access the statistics as Pydantic models + +```python +custom_event_stat = custom_event_stats_result.data() + +print(( + custom_event_stat.num_events, + custom_event_stat.num_flags, + custom_event_stat.num_vessels, +)) +``` + +**Output:** + +``` +(301548, 162, 40996) +``` + +### Access the statistics as a DataFrame + +```python +custom_event_stat_df = custom_event_stats_result.df() + +print(custom_event_stat_df.info()) +``` + +**Output:** + +``` + RangeIndex: 1 entries, 0 to 0 Data columns (total 5 columns): # Column Non-Null Count Dtype diff --git a/docs/source/usage-guides/insights-api.md b/docs/source/usage-guides/insights-api.md index 3b4f3bf..e0a6e02 100644 --- a/docs/source/usage-guides/insights-api.md +++ b/docs/source/usage-guides/insights-api.md @@ -40,7 +40,7 @@ The `gfw_client.insights` object provides methods for retrieving insights data f The `get_vessel_insights()` method allows you to retrieve aggregated insights for a specific vessel within a given time range. -**Important:** `start_date` must be on or after `January 1, 2020` +**Important:** `start_date` must be on or after `January 1, 2020`. [Insights](https://globalfishingwatch.org/our-apis/documentation#insights-api) are available from `January 1, 2020` onwards. ```python insights_result = await gfw_client.insights.get_vessel_insights( diff --git a/docs/source/usage-guides/references-data-api.md b/docs/source/usage-guides/references-data-api.md index 57927ba..2eb1dc2 100644 --- a/docs/source/usage-guides/references-data-api.md +++ b/docs/source/usage-guides/references-data-api.md @@ -32,6 +32,8 @@ gfw_client = gfw.Client( The `gfw_client.references` object provides methods to retrieve different types of geographic regions. Each of these methods returns a `result` object, which offers convenient ways to access the data as Pydantic models using `.data()` or as pandas DataFrames using `.df()`. +**Note:** Use `gfw_client.references` methods to obtain the **Region of Interest (ROI)**, i.e., `region`, which can then be passed directly to the [4Wings API](https://globalfishingwatch.org/our-apis/documentation#map-visualization-4wings-api), [Bulk Download API](https://globalfishingwatch.org/our-apis/documentation#bulk-download-api), [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api), and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods. + > **Tip:** Use [IPython](https://ipython.readthedocs.io/en/stable/) or Python 3.11+ with `python -m asyncio` to run `gfw-api-python-client` code interactively, as these environments support executing `async` / `await` expressions directly in the console. ## Retrieving Exclusive Economic Zones (EEZs) @@ -47,15 +49,13 @@ eez_regions_result = await gfw_client.references.get_eez_regions() ```python eez_regions_data = eez_regions_result.data() eez_region = eez_regions_data[-1] -print((eez_region.id, eez_region.dataset)) -print(eez_region.model_dump()) +print((eez_region.id, eez_region.dataset, eez_region.label, eez_region.iso3)) ``` **Output:** ``` -(48999, 'public-eez-areas') -{'id': 48999, 'label': 'Overlapping claim Peñón de Vélez de la Gomera: Spain / Morocco', 'iso3': None, 'dataset': 'public-eez-areas'} +('8489', 'public-eez-areas', 'Antartic 200NM zone beyond the coastline', 'ATA') ``` ### Access the EEZ regions as a DataFrame @@ -63,25 +63,46 @@ print(eez_region.model_dump()) ```python eez_regions_df = eez_regions_result.df() print(eez_regions_df.info()) -print(eez_regions_df[["id", "dataset"]].head()) ``` **Output:** ``` - -RangeIndex: 285 entries, 0 to 284 -Data columns (total 4 columns): - # Column Non-Null Count Dtype ---- ------ -------------- ----- - 0 id 285 non-null int64 - 1 label 285 non-null object - 2 iso3 234 non-null object - 3 dataset 285 non-null object -dtypes: int64(1), object(3) -memory usage: 17.9+ KB + +RangeIndex: 286 entries, 0 to 285 +Data columns (total 8 columns): + # Column Non-Null Count Dtype +--- ------ -------------- ----- + 0 dataset 286 non-null str + 1 id 286 non-null str + 2 label 286 non-null str + 3 iso3 235 non-null str + 4 iso_sov_1 285 non-null str + 5 iso_sov_2 56 non-null str + 6 iso_sov_3 6 non-null str + 7 territory_1 285 non-null str +dtypes: str(8) +memory usage: 18.0 KB +``` + +### Filter the list of EEZ regions to Obtain the Region of Interest (ROI) + +```python +eez_rois_result = await gfw_client.references.get_eez_regions(iso3="SEN") + +eez_roi = eez_rois_result.data()[0] + +print((eez_roi.id, eez_roi.dataset, eez_roi.label, eez_roi.iso3)) +``` + +**Output:** + +``` +('8371', 'public-eez-areas', 'Senegalese Exclusive Economic Zone', 'SEN') ``` +> **Note:** Pass `eez_roi` directly to `region` parameter of the [4Wings API](https://globalfishingwatch.org/our-apis/documentation#map-visualization-4wings-api), [Bulk Download API](https://globalfishingwatch.org/our-apis/documentation#bulk-download-api), [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api), and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods. + ## Retrieving Marine Protected Areas (MPAs) To get a list of available Marine Protected Area (MPA) regions, use the `get_mpa_regions()` method: @@ -95,15 +116,15 @@ mpa_regions_result = await gfw_client.references.get_mpa_regions() ```python mpa_regions_data = mpa_regions_result.data() mpa_region = mpa_regions_data[-1] -print((mpa_region.id, mpa_region.dataset)) -print(mpa_region.model_dump()) +print((mpa_region.id, mpa_region.dataset, mpa_region.label)) ``` **Output:** ``` -('555799979', 'public-mpa-all') -{'id': '555799979', 'label': 'NAF Marine Protected Area - Marine Protected Area', 'name': None, 'dataset': 'public-mpa-all'} +('555882474', + 'public-mpa-all', + 'Humedal Tubul Raqui - Santuario de la Naturaleza') ``` ### Access the MPA regions as a DataFrame @@ -111,24 +132,40 @@ print(mpa_region.model_dump()) ```python mpa_regions_df = mpa_regions_result.df() print(mpa_regions_df.info()) -print(mpa_regions_df[["id", "dataset"]].head()) ``` **Output:** ``` - -RangeIndex: 16591 entries, 0 to 16590 -Data columns (total 4 columns): + +RangeIndex: 17172 entries, 0 to 17171 +Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- - 0 id 16591 non-null object - 1 label 16591 non-null object - 2 name 0 non-null object - 3 dataset 16591 non-null object -dtypes: object(4) -memory usage: 518.6+ KB + 0 dataset 17172 non-null str + 1 id 17172 non-null str + 2 label 17172 non-null str +dtypes: str(3) +memory usage: 402.6 KB +``` + +### Filter the list of MPA regions to Obtain the Region of Interest (ROI) + +```python +mpa_rois_result = await gfw_client.references.get_mpa_regions(id="555745302") + +mpa_roi = mpa_rois_result.data()[0] + +print((mpa_roi.id, mpa_roi.dataset, mpa_roi.label)) +``` + +**Output:** + ``` +('555745302', 'public-mpa-all', 'Dorsal de Nasca - Reserva Nacional') +``` + +> **Note:** Pass `mpa_roi` directly to `region` parameter of the [4Wings API](https://globalfishingwatch.org/our-apis/documentation#map-visualization-4wings-api), [Bulk Download API](https://globalfishingwatch.org/our-apis/documentation#bulk-download-api), [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api), and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods. ## Retrieving Regional Fisheries Management Organizations (RFMOs) @@ -143,16 +180,13 @@ rfmo_regions_result = await gfw_client.references.get_rfmo_regions() ```python rfmo_regions_data = rfmo_regions_result.data() rfmo_region = rfmo_regions_data[-1] -print((rfmo_region.id, rfmo_region.dataset)) -print(rfmo_region.model_dump()) +print((rfmo_region.id, rfmo_region.dataset, rfmo_region.label)) ``` **Output:** ``` -('BOBP-IGO', 'public-rfmo') -{'id': 'WCPFC', 'label': 'WCPFC', 'rfb': None, 'dataset': 'public-rfmo'} -{'id': 'BOBP-IGO', 'label': 'BOBP-IGO', 'rfb': None, 'dataset': 'public-rfmo', 'ID': 'BOBP-IGO'} +('BOBP-IGO', 'public-rfmo', 'BOBP-IGO') ``` ### Access the RFMO regions as a DataFrame @@ -160,25 +194,41 @@ print(rfmo_region.model_dump()) ```python rfmo_regions_df = rfmo_regions_result.df() print(rfmo_regions_df.info()) -print(rfmo_regions_df[["id", "dataset"]].head()) ``` **Output:** ``` - + RangeIndex: 42 entries, 0 to 41 -Data columns (total 5 columns): +Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- - 0 id 42 non-null object - 1 label 42 non-null object - 2 rfb 0 non-null object - 3 dataset 42 non-null object - 4 ID 42 non-null object -dtypes: object(5) -memory usage: 1.8+ KB + 0 dataset 42 non-null str + 1 id 42 non-null str + 2 label 42 non-null str + 3 id_ 42 non-null str +dtypes: str(4) +memory usage: 1.4 KB +``` + +### Filter the list of RFMO regions to Obtain the Region of Interest (ROI) + +```python +rfmo_rois_result = await gfw_client.references.get_rfmo_regions(id="WCPFC") + +rfmo_roi = rfmo_rois_result.data()[0] + +print((rfmo_roi.id, rfmo_roi.dataset, rfmo_roi.label)) +``` + +**Output:** + ``` +('WCPFC', 'public-rfmo', 'WCPFC') +``` + +> **Note:** Pass `rfmo_roi` directly to `region` parameter of the [4Wings API](https://globalfishingwatch.org/our-apis/documentation#map-visualization-4wings-api), [Bulk Download API](https://globalfishingwatch.org/our-apis/documentation#bulk-download-api), [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api), and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods. ## Next Steps diff --git a/docs/source/usage-guides/vessels-api.md b/docs/source/usage-guides/vessels-api.md index 2196890..d8071b4 100644 --- a/docs/source/usage-guides/vessels-api.md +++ b/docs/source/usage-guides/vessels-api.md @@ -32,6 +32,8 @@ gfw_client = gfw.Client( The `gfw_client.vessels` object provides methods to search for and retrieve vessel information. Each of these methods returns a `result` object, which offers convenient ways to access the data as Pydantic models using `.data()` or as pandas DataFrames using `.df()`. +> **Note:** It is recommended to prioritize vessels that include both `registry_info` and `self_reported_info` (AIS), as this indicates a successful match between registry data and AIS information. See how the [Vessels API](https://globalfishingwatch.org/our-apis/documentation#vessels-api) is used in the [Vessel Viewer](https://globalfishingwatch.org/map/) [here](https://globalfishingwatch.org/our-apis/assets/2024_Vessel_Viewer_and_APIs_behind_It.pdf). Use the `vessel_ids` property of the `result` object returned by `gfw_client.vessels` methods as a shortcut to obtain the `matched vessel IDs`, which can then be passed directly to the [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api) and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods. + > **Tip:** Use [IPython](https://ipython.readthedocs.io/en/stable/) or Python 3.11+ with `python -m asyncio` to run `gfw-api-python-client` code interactively, as these environments support executing `async` / `await` expressions directly in the console. ## Searching for Vessels (`search_vessels`) @@ -40,11 +42,27 @@ The `search_vessels()` method allows you to find vessels based on a query and va ```python vessel_search_result = await gfw_client.vessels.search_vessels( - where="ssvid='775998121' AND shipname='DON TITO'", + where="ssvid='412331038' AND imo='8775637'", includes=["MATCH_CRITERIA", "OWNERSHIP"], ) ``` +### Get List of Matched Vessel IDs + +```python +print(vessel_search_result.vessel_ids) +``` + +**Output:** + +``` +['da2b09b31-127e-27e0-fe5f-d6d87e96de6a', + '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b', + '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'] +``` + +> **Note:** Use the `vessel_search_result.vessel_ids` as a shortcut to obtain the `matched vessel IDs`, which can then be passed directly to the [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api) and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods. + ### Access the list of vessel as Pydantic models ```python @@ -95,13 +113,29 @@ The `get_vessels_by_ids()` method retrieves information for a list of vessels gi ```python vessels_result = await gfw_client.vessels.get_vessels_by_ids( ids=[ - "8c7304226-6c71-edbe-0b63-c246734b3c01", - "6583c51e3-3626-5638-866a-f47c3bc7ef7c", - "71e7da672-2451-17da-b239-857831602eca", - ], + "da2b09b31-127e-27e0-fe5f-d6d87e96de6a", + "755a48dd4-4bee-4bcf-7b5f-9baea058fc7b", + "3dad49b0b-b2e0-9347-0c4c-e39fea560f9f", + ] ) ``` +### Get List of Matched Vessel IDs + +```python +print(vessels_result.vessel_ids) +``` + +**Output:** + +``` +['da2b09b31-127e-27e0-fe5f-d6d87e96de6a', + '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b', + '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'] +``` + +> **Note:** Use the `vessels_result.vessel_ids` as a shortcut to obtain the `matched vessel IDs`, which can then be passed directly to the [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api) and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods. + ### Access the list of vessel as Pydantic models ```python @@ -150,10 +184,26 @@ The `get_vessel_by_id()` method retrieves detailed information for a specific ve ```python vessel_result = await gfw_client.vessels.get_vessel_by_id( - id="c54923e64-46f3-9338-9dcb-ff09724077a3", + id="da2b09b31-127e-27e0-fe5f-d6d87e96de6a", ) ``` +### Get List of Matched Vessel IDs + +```python +print() +``` + +**Output:** + +``` +['da2b09b31-127e-27e0-fe5f-d6d87e96de6a', + '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b', + '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'] +``` + +> **Note:** Use the `vessel_result.vessel_ids` as a shortcut to obtain the `matched vessel IDs`, which can then be passed directly to the [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api) and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods. + ### Access the vessel as Pydantic model ```python diff --git a/notebooks/getting-started.ipynb b/notebooks/getting-started.ipynb index d764948..f49aa0b 100644 --- a/notebooks/getting-started.ipynb +++ b/notebooks/getting-started.ipynb @@ -213,12 +213,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 5 entries, 0 to 4\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 dataset 5 non-null object\n", + " 0 dataset 5 non-null str \n", " 1 registry_info_total_records 5 non-null int64 \n", " 2 registry_info 5 non-null object\n", " 3 registry_owners 5 non-null object\n", @@ -226,7 +226,7 @@ " 5 combined_sources_info 5 non-null object\n", " 6 self_reported_info 5 non-null object\n", " 7 matchCriteria 5 non-null object\n", - "dtypes: int64(1), object(7)\n", + "dtypes: int64(1), object(6), str(1)\n", "memory usage: 452.0+ bytes\n" ] } @@ -275,7 +275,7 @@ " \n", " \n", " 0\n", - " public-global-vessel-identity:v3.0\n", + " public-global-vessel-identity:v4.0\n", " 0\n", " []\n", " []\n", @@ -286,18 +286,29 @@ " \n", " \n", " 1\n", - " public-global-vessel-identity:v3.0\n", + " public-global-vessel-identity:v4.0\n", " 0\n", " []\n", " []\n", " []\n", - " [{'vessel_id': '91df2f8c7-74fd-5b5a-60f5-3d86f...\n", - " [{'id': '91df2f8c7-74fd-5b5a-60f5-3d86f9c51ff2...\n", - " [{'reference': '91df2f8c7-74fd-5b5a-60f5-3d86f...\n", + " [{'vessel_id': '108c5510b-b6aa-9cf2-8b86-0598d...\n", + " [{'id': '108c5510b-b6aa-9cf2-8b86-0598de546741...\n", + " [{'reference': '108c5510b-b6aa-9cf2-8b86-0598d...\n", " \n", " \n", " 2\n", - " public-global-vessel-identity:v3.0\n", + " public-global-vessel-identity:v4.0\n", + " 1\n", + " [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's...\n", + " [{'name': 'ZHOUSHAN SHUNHANG OCEAN FISHERIES',...\n", + " [{'date_from': 2017-01-04 00:00:00+00:00, 'dat...\n", + " [{'vessel_id': 'da2b09b31-127e-27e0-fe5f-d6d87...\n", + " [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b...\n", + " [{'reference': '755a48dd4-4bee-4bcf-7b5f-9baea...\n", + " \n", + " \n", + " 3\n", + " public-global-vessel-identity:v4.0\n", " 0\n", " []\n", " []\n", @@ -307,26 +318,15 @@ " [{'reference': 'd2fbd05f5-57dd-1faa-c384-67a75...\n", " \n", " \n", - " 3\n", - " public-global-vessel-identity:v3.0\n", - " 1\n", - " [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's...\n", - " [{'name': 'ZHOUSHAN SHUNHANG OCEAN FISHERIES',...\n", - " [{'date_from': 2017-01-04 00:00:00+00:00, 'dat...\n", - " [{'vessel_id': '755a48dd4-4bee-4bcf-7b5f-9baea...\n", - " [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b...\n", - " [{'reference': '755a48dd4-4bee-4bcf-7b5f-9baea...\n", - " \n", - " \n", " 4\n", - " public-global-vessel-identity:v3.0\n", + " public-global-vessel-identity:v4.0\n", " 0\n", " []\n", " []\n", " []\n", - " [{'vessel_id': '108c5510b-b6aa-9cf2-8b86-0598d...\n", - " [{'id': '108c5510b-b6aa-9cf2-8b86-0598de546741...\n", - " [{'reference': '108c5510b-b6aa-9cf2-8b86-0598d...\n", + " [{'vessel_id': '91df2f8c7-74fd-5b5a-60f5-3d86f...\n", + " [{'id': '91df2f8c7-74fd-5b5a-60f5-3d86f9c51ff2...\n", + " [{'reference': '91df2f8c7-74fd-5b5a-60f5-3d86f...\n", " \n", " \n", "\n", @@ -334,53 +334,53 @@ ], "text/plain": [ " dataset registry_info_total_records \\\n", - "0 public-global-vessel-identity:v3.0 0 \n", - "1 public-global-vessel-identity:v3.0 0 \n", - "2 public-global-vessel-identity:v3.0 0 \n", - "3 public-global-vessel-identity:v3.0 1 \n", - "4 public-global-vessel-identity:v3.0 0 \n", + "0 public-global-vessel-identity:v4.0 0 \n", + "1 public-global-vessel-identity:v4.0 0 \n", + "2 public-global-vessel-identity:v4.0 1 \n", + "3 public-global-vessel-identity:v4.0 0 \n", + "4 public-global-vessel-identity:v4.0 0 \n", "\n", " registry_info \\\n", "0 [] \n", "1 [] \n", - "2 [] \n", - "3 [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's... \n", + "2 [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's... \n", + "3 [] \n", "4 [] \n", "\n", " registry_owners \\\n", "0 [] \n", "1 [] \n", - "2 [] \n", - "3 [{'name': 'ZHOUSHAN SHUNHANG OCEAN FISHERIES',... \n", + "2 [{'name': 'ZHOUSHAN SHUNHANG OCEAN FISHERIES',... \n", + "3 [] \n", "4 [] \n", "\n", " registry_public_authorizations \\\n", "0 [] \n", "1 [] \n", - "2 [] \n", - "3 [{'date_from': 2017-01-04 00:00:00+00:00, 'dat... \n", + "2 [{'date_from': 2017-01-04 00:00:00+00:00, 'dat... \n", + "3 [] \n", "4 [] \n", "\n", " combined_sources_info \\\n", "0 [{'vessel_id': 'a5ef97d59-9f40-3bdc-e247-daf9c... \n", - "1 [{'vessel_id': '91df2f8c7-74fd-5b5a-60f5-3d86f... \n", - "2 [{'vessel_id': 'd2fbd05f5-57dd-1faa-c384-67a75... \n", - "3 [{'vessel_id': '755a48dd4-4bee-4bcf-7b5f-9baea... \n", - "4 [{'vessel_id': '108c5510b-b6aa-9cf2-8b86-0598d... \n", + "1 [{'vessel_id': '108c5510b-b6aa-9cf2-8b86-0598d... \n", + "2 [{'vessel_id': 'da2b09b31-127e-27e0-fe5f-d6d87... \n", + "3 [{'vessel_id': 'd2fbd05f5-57dd-1faa-c384-67a75... \n", + "4 [{'vessel_id': '91df2f8c7-74fd-5b5a-60f5-3d86f... \n", "\n", " self_reported_info \\\n", "0 [{'id': 'a5ef97d59-9f40-3bdc-e247-daf9c892ceff... \n", - "1 [{'id': '91df2f8c7-74fd-5b5a-60f5-3d86f9c51ff2... \n", - "2 [{'id': 'd2fbd05f5-57dd-1faa-c384-67a75f27fc80... \n", - "3 [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b... \n", - "4 [{'id': '108c5510b-b6aa-9cf2-8b86-0598de546741... \n", + "1 [{'id': '108c5510b-b6aa-9cf2-8b86-0598de546741... \n", + "2 [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b... \n", + "3 [{'id': 'd2fbd05f5-57dd-1faa-c384-67a75f27fc80... \n", + "4 [{'id': '91df2f8c7-74fd-5b5a-60f5-3d86f9c51ff2... \n", "\n", " matchCriteria \n", "0 [{'reference': 'a5ef97d59-9f40-3bdc-e247-daf9c... \n", - "1 [{'reference': '91df2f8c7-74fd-5b5a-60f5-3d86f... \n", - "2 [{'reference': 'd2fbd05f5-57dd-1faa-c384-67a75... \n", - "3 [{'reference': '755a48dd4-4bee-4bcf-7b5f-9baea... \n", - "4 [{'reference': '108c5510b-b6aa-9cf2-8b86-0598d... " + "1 [{'reference': '108c5510b-b6aa-9cf2-8b86-0598d... \n", + "2 [{'reference': '755a48dd4-4bee-4bcf-7b5f-9baea... \n", + "3 [{'reference': 'd2fbd05f5-57dd-1faa-c384-67a75... \n", + "4 [{'reference': '91df2f8c7-74fd-5b5a-60f5-3d86f... " ] }, "execution_count": 8, @@ -392,12 +392,20 @@ "vessel_search_df.head()" ] }, + { + "cell_type": "markdown", + "id": "26cfcee0-ce10-4f79-966d-d20e326cdc37", + "metadata": {}, + "source": [ + "#### Get List of Vessel Searched IDs" + ] + }, { "cell_type": "markdown", "id": "3b34b0b1-c631-42c0-8950-d87234e68e87", "metadata": {}, "source": [ - "**Note:** It is recommended to prioritize vessels that include both `registry_info` and `self_reported_info` (AIS), as this indicates a successful match between registry data and AIS information." + "**Note:** It is recommended to prioritize vessels that include both `registry_info` and `self_reported_info` (AIS), as this indicates a successful match between registry data and AIS information. See how the [Vessels API](https://globalfishingwatch.org/our-apis/documentation#vessels-api) is used in the [Vessel Viewer](https://globalfishingwatch.org/map/) [here](https://globalfishingwatch.org/our-apis/assets/2024_Vessel_Viewer_and_APIs_behind_It.pdf)." ] }, { @@ -407,12 +415,7 @@ "metadata": {}, "outputs": [], "source": [ - "vessel_search_ids = [\n", - " self_reported_info.id\n", - " for vessel_search_item in vessel_search_result.data()\n", - " if vessel_search_item.registry_info_total_records >= 1\n", - " for self_reported_info in vessel_search_item.self_reported_info\n", - "]" + "vessel_search_ids = vessel_search_result.vessel_ids" ] }, { @@ -424,7 +427,8 @@ { "data": { "text/plain": [ - "['755a48dd4-4bee-4bcf-7b5f-9baea058fc7b',\n", + "['da2b09b31-127e-27e0-fe5f-d6d87e96de6a',\n", + " '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b',\n", " '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f']" ] }, @@ -477,19 +481,19 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 1 entries, 0 to 0\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 dataset 1 non-null object\n", + " 0 dataset 1 non-null str \n", " 1 registry_info_total_records 1 non-null int64 \n", " 2 registry_info 1 non-null object\n", " 3 registry_owners 1 non-null object\n", " 4 registry_public_authorizations 1 non-null object\n", " 5 combined_sources_info 1 non-null object\n", " 6 self_reported_info 1 non-null object\n", - "dtypes: int64(1), object(6)\n", + "dtypes: int64(1), object(5), str(1)\n", "memory usage: 188.0+ bytes\n" ] } @@ -537,12 +541,12 @@ " \n", " \n", " 0\n", - " public-global-vessel-identity:v3.0\n", - " 1\n", + " public-global-vessel-identity:v4.0\n", + " 3\n", " [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's...\n", " [{'name': 'ZHOUSHAN SHUNHANG OCEAN FISHERIES',...\n", " [{'date_from': 2017-01-04 00:00:00+00:00, 'dat...\n", - " [{'vessel_id': '755a48dd4-4bee-4bcf-7b5f-9baea...\n", + " [{'vessel_id': 'da2b09b31-127e-27e0-fe5f-d6d87...\n", " [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b...\n", " \n", " \n", @@ -551,7 +555,7 @@ ], "text/plain": [ " dataset registry_info_total_records \\\n", - "0 public-global-vessel-identity:v3.0 1 \n", + "0 public-global-vessel-identity:v4.0 3 \n", "\n", " registry_info \\\n", "0 [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's... \n", @@ -563,7 +567,7 @@ "0 [{'date_from': 2017-01-04 00:00:00+00:00, 'dat... \n", "\n", " combined_sources_info \\\n", - "0 [{'vessel_id': '755a48dd4-4bee-4bcf-7b5f-9baea... \n", + "0 [{'vessel_id': 'da2b09b31-127e-27e0-fe5f-d6d87... \n", "\n", " self_reported_info \n", "0 [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b... " @@ -579,51 +583,46 @@ ] }, { - "cell_type": "code", - "execution_count": 15, - "id": "522a618d-140e-4d36-9e18-b71e81d1c40e", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "522a618d-140e-4d36-9e18-b71e81d1c40e", - "outputId": "ee9a5ca8-f994-4357-f748-d1140bb29f9a" - }, - "outputs": [], + "cell_type": "markdown", + "id": "ec13bc79-6a7d-4f21-94c9-321f7f77db59", + "metadata": {}, + "source": [ + "#### Get List of Vessel IDs" + ] + }, + { + "cell_type": "markdown", + "id": "642ec205-e8f6-4054-91d3-c5868376e90a", + "metadata": {}, "source": [ - "vessel_self_reported_infos = [\n", - " self_reported_info\n", - " for vessel_item in vessels_result.data()\n", - " for self_reported_info in vessel_item.self_reported_info\n", - "]" + "**Note:** It is recommended to prioritize vessels that include both `registry_info` and `self_reported_info` (AIS), as this indicates a successful match between registry data and AIS information. See how the [Vessels API](https://globalfishingwatch.org/our-apis/documentation#vessels-api) is used in the [Vessel Viewer](https://globalfishingwatch.org/map/) [here](https://globalfishingwatch.org/our-apis/assets/2024_Vessel_Viewer_and_APIs_behind_It.pdf)." ] }, { "cell_type": "code", - "execution_count": 16, - "id": "e35272d9-7758-4abc-bb24-a112647c8fc9", + "execution_count": 15, + "id": "006357be-7f16-4a99-9fb5-bfe94781c8cb", "metadata": {}, "outputs": [], "source": [ - "vessel_ids = [\n", - " self_reported_info.id for self_reported_info in vessel_self_reported_infos\n", - "]" + "vessel_ids = vessels_result.vessel_ids" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "05a96294-7009-477f-9175-f2653d10b989", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['755a48dd4-4bee-4bcf-7b5f-9baea058fc7b',\n", + "['da2b09b31-127e-27e0-fe5f-d6d87e96de6a',\n", + " '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b',\n", " '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f']" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -642,18 +641,33 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "89f02bda-8318-43e3-8b28-fa85d42772e9", + "execution_count": 17, + "id": "9114d04b-155e-45c7-a11e-6e889d18b9e6", "metadata": {}, "outputs": [], "source": [ - "start_datetime = min(\n", - " [\n", - " self_reported_info.transmission_date_from\n", - " for self_reported_info in vessel_self_reported_infos\n", - " ]\n", - ")\n", - "start_date = start_datetime.date()" + "start_date = min(vessels_result.transmission_dates_from)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "30a78bcf-dcb2-4d16-afdd-935e3ba231b4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.date(2013, 11, 15)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "start_date" ] }, { @@ -661,7 +675,7 @@ "id": "4fee4e40-4ec0-406d-aa4e-3de9e6224c55", "metadata": {}, "source": [ - "**Important:** `start_date` must be on or after `January 1, 2020`" + "**Important:** `start_date` must be on or after `January 1, 2020`. [Insights](https://globalfishingwatch.org/our-apis/documentation#insights-api) are available from `January 1, 2020` onwards." ] }, { @@ -681,13 +695,7 @@ "metadata": {}, "outputs": [], "source": [ - "end_datetime = max(\n", - " [\n", - " self_reported_info.transmission_date_to\n", - " for self_reported_info in vessel_self_reported_infos\n", - " ]\n", - ")\n", - "end_date = end_datetime.date()" + "end_date = max(vessels_result.transmission_dates_to)" ] }, { @@ -699,7 +707,7 @@ { "data": { "text/plain": [ - "(datetime.date(2020, 1, 1), datetime.date(2026, 1, 4))" + "(datetime.date(2020, 1, 1), datetime.date(2026, 6, 21))" ] }, "execution_count": 21, @@ -714,19 +722,6 @@ { "cell_type": "code", "execution_count": 22, - "id": "2d331aeb-370e-4039-aa46-75a10e2d0a18", - "metadata": {}, - "outputs": [], - "source": [ - "dataset_id = \"public-global-vessel-identity:latest\"\n", - "dataset_ids_vessel_ids = [\n", - " {\"dataset_id\": dataset_id, \"vessel_id\": vessel_id} for vessel_id in vessel_ids\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 23, "id": "cd5725f5-57ca-4620-9946-d3cf4ea153b1", "metadata": {}, "outputs": [], @@ -735,13 +730,13 @@ " includes=[\"FISHING\"],\n", " start_date=start_date,\n", " end_date=end_date,\n", - " vessels=dataset_ids_vessel_ids,\n", + " vessels=vessel_ids,\n", ")" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "id": "15f1a28c-215d-4cef-b62e-84f029fecfd1", "metadata": {}, "outputs": [], @@ -751,7 +746,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "id": "cca3b40f-8f19-4715-a91e-0e86c2d2588e", "metadata": {}, "outputs": [ @@ -759,7 +754,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 1 entries, 0 to 0\n", "Data columns (total 6 columns):\n", " # Column Non-Null Count Dtype \n", @@ -781,7 +776,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "id": "be8dd7ac-b8d7-4c3d-9fe6-157831a8a7b9", "metadata": {}, "outputs": [ @@ -817,11 +812,11 @@ " \n", " \n", " 0\n", - " {'start_date': 2020-01-01, 'end_date': 2026-01...\n", + " {'start_date': 2020-01-01, 'end_date': 2026-06...\n", " None\n", " None\n", " None\n", - " {'datasets': ['public-global-fishing-events:v3...\n", + " {'datasets': ['public-global-fishing-events:v4...\n", " None\n", " \n", " \n", @@ -830,16 +825,16 @@ ], "text/plain": [ " period \\\n", - "0 {'start_date': 2020-01-01, 'end_date': 2026-01... \n", + "0 {'start_date': 2020-01-01, 'end_date': 2026-06... \n", "\n", " vessel_ids_without_identity gap coverage \\\n", "0 None None None \n", "\n", " apparent_fishing vessel_identity \n", - "0 {'datasets': ['public-global-fishing-events:v3... None " + "0 {'datasets': ['public-global-fishing-events:v4... None " ] }, - "execution_count": 26, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -850,7 +845,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "id": "7eb5f9e3-57b8-44b7-bf5e-3091cb9c0033", "metadata": {}, "outputs": [], @@ -860,20 +855,20 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 27, "id": "53704e62-9e21-4182-bab6-058e4e297fe6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'events': 398,\n", + "{'events': 263,\n", " 'events_gap_off': None,\n", - " 'events_in_rfmo_without_known_authorization': 144,\n", + " 'events_in_rfmo_without_known_authorization': 163,\n", " 'events_in_no_take_mpas': 0}" ] }, - "execution_count": 28, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -894,7 +889,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 28, "id": "b078ce74-f2d1-4ec3-a163-b3961407e497", "metadata": { "id": "b078ce74-f2d1-4ec3-a163-b3961407e497" @@ -911,7 +906,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "id": "8b1c4289-5896-4590-8d6e-7405bb0037f8", "metadata": { "id": "8b1c4289-5896-4590-8d6e-7405bb0037f8" @@ -923,7 +918,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "id": "8a1f2985-e849-4e7c-8a7e-77afcba17764", "metadata": { "colab": { @@ -937,27 +932,27 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 398 entries, 0 to 397\n", + "\n", + "RangeIndex: 263 entries, 0 to 262\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 start 398 non-null datetime64[ns, UTC]\n", - " 1 end 398 non-null datetime64[ns, UTC]\n", - " 2 id 398 non-null object \n", - " 3 type 398 non-null object \n", - " 4 position 398 non-null object \n", - " 5 regions 398 non-null object \n", - " 6 bounding_box 398 non-null object \n", - " 7 distances 398 non-null object \n", - " 8 vessel 398 non-null object \n", + " 0 start 263 non-null datetime64[us, UTC]\n", + " 1 end 263 non-null datetime64[us, UTC]\n", + " 2 id 263 non-null str \n", + " 3 type 263 non-null str \n", + " 4 position 263 non-null object \n", + " 5 regions 263 non-null object \n", + " 6 bounding_box 263 non-null object \n", + " 7 distances 263 non-null object \n", + " 8 vessel 263 non-null object \n", " 9 encounter 0 non-null object \n", - " 10 fishing 398 non-null object \n", + " 10 fishing 263 non-null object \n", " 11 gap 0 non-null object \n", " 12 loitering 0 non-null object \n", " 13 port_visit 0 non-null object \n", - "dtypes: datetime64[ns, UTC](2), object(12)\n", - "memory usage: 43.7+ KB\n" + "dtypes: datetime64[us, UTC](2), object(10), str(2)\n", + "memory usage: 28.9+ KB\n" ] } ], @@ -967,7 +962,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "id": "ff424bc9-bb00-4bc6-a8e3-ee01346786c8", "metadata": { "colab": { @@ -1018,85 +1013,85 @@ " \n", " \n", " 0\n", - " 2022-10-10 15:49:06+00:00\n", - " 2022-10-10 18:27:46+00:00\n", - " da35d28bfc433f251fe03c2ded3a5e23\n", + " 2020-05-27 12:08:47+00:00\n", + " 2020-05-27 16:52:19+00:00\n", + " a3967b8c86143d60ece964e0d55dd410\n", " fishing\n", - " {'lat': 42.9677, 'lon': 156.9126}\n", - " {'mpa': [], 'eez': [], 'rfmo': ['ACAP', 'NPFC'...\n", - " [156.90262, 42.93749, 156.92824, 42.979495]\n", - " {'start_distance_from_shore_km': 585.0, 'end_d...\n", + " {'lat': 41.2464, 'lon': 165.0914}\n", + " {'mpa': [], 'eez': [], 'rfmo': ['PICES', 'ACAP...\n", + " [165.04957333333334, 41.22549333333333, 165.11...\n", + " {'start_distance_from_shore_km': 1195.0, 'end_...\n", " {'id': '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'...\n", " None\n", - " {'total_distance_km': 5.313381753510874, 'aver...\n", + " {'total_distance_km': 7.727278367334163, 'aver...\n", " None\n", " None\n", " None\n", " \n", " \n", " 1\n", - " 2022-10-08 23:28:16+00:00\n", - " 2022-10-09 04:06:15+00:00\n", - " 080bcd728a84f62fe901a3e5b02c6014\n", + " 2020-05-29 08:23:50+00:00\n", + " 2020-05-29 08:48:39+00:00\n", + " d93fab941578e08fbd0bbcfd6237be5c\n", " fishing\n", - " {'lat': 43.0665, 'lon': 156.4381}\n", - " {'mpa': [], 'eez': [], 'rfmo': ['IWC', 'WCPFC'...\n", - " [156.38127166666666, 43.05014166666667, 156.49...\n", - " {'start_distance_from_shore_km': 549.0, 'end_d...\n", + " {'lat': 41.4293, 'lon': 165.5407}\n", + " {'mpa': [], 'eez': [], 'rfmo': ['IWC', 'NPAFC'...\n", + " [165.53578666666667, 41.42429333333333, 165.54...\n", + " {'start_distance_from_shore_km': 1209.0, 'end_...\n", " {'id': '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'...\n", " None\n", - " {'total_distance_km': 10.200405590755741, 'ave...\n", + " {'total_distance_km': 1.6521146705457104, 'ave...\n", " None\n", " None\n", " None\n", " \n", " \n", " 2\n", - " 2022-10-30 23:36:05+00:00\n", - " 2022-10-31 11:25:45+00:00\n", - " c79bf8fbab9e58bb3b279bf757bae197\n", + " 2020-11-23 06:45:07+00:00\n", + " 2020-11-23 07:27:03+00:00\n", + " f9a5a8a73b8c43feaea8e2416a4f45dc\n", " fishing\n", - " {'lat': 41.0574, 'lon': 149.7023}\n", - " {'mpa': [], 'eez': ['48950'], 'rfmo': ['PICES'...\n", - " [149.66486666666665, 40.94216166666667, 149.73...\n", - " {'start_distance_from_shore_km': 392.0, 'end_d...\n", + " {'lat': 41.2036, 'lon': 150.862}\n", + " {'mpa': [], 'eez': [], 'rfmo': ['NPAFC', 'PICE...\n", + " [150.86170666666666, 41.2018016667, 150.862258...\n", + " {'start_distance_from_shore_km': 436.0, 'end_d...\n", " {'id': '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'...\n", " None\n", - " {'total_distance_km': 31.90361153133585, 'aver...\n", + " {'total_distance_km': 0.40525710130028725, 'av...\n", " None\n", " None\n", " None\n", " \n", " \n", " 3\n", - " 2021-02-21 23:05:34+00:00\n", - " 2021-02-22 08:36:32+00:00\n", - " 3cdbebbfbf2b7c926158fa6955eecb30\n", + " 2020-11-05 06:39:22+00:00\n", + " 2020-11-05 12:23:27+00:00\n", + " c360eece02f0b6c0a9961da4f9d14338\n", " fishing\n", - " {'lat': -45.8329, 'lon': -60.603}\n", - " {'mpa': [], 'eez': [], 'rfmo': ['CCSBT', 'ICCA...\n", - " [-60.5669816667, -45.836216666666665, -60.6110...\n", - " {'start_distance_from_shore_km': 399.0, 'end_d...\n", + " {'lat': 43.8173, 'lon': 154.7858}\n", + " {'mpa': [], 'eez': [], 'rfmo': ['WCPFC', 'IWC'...\n", + " [154.78373333333334, 43.8052266667, 154.797158...\n", + " {'start_distance_from_shore_km': 404.0, 'end_d...\n", " {'id': '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'...\n", " None\n", - " {'total_distance_km': 5.725649380735478, 'aver...\n", + " {'total_distance_km': 2.763389184389981, 'aver...\n", " None\n", " None\n", " None\n", " \n", " \n", " 4\n", - " 2020-05-26 16:43:39+00:00\n", - " 2020-05-26 18:25:46+00:00\n", - " 9b6a1c84d8daf3359a0d9917ff138d39\n", + " 2020-11-22 11:02:58+00:00\n", + " 2020-11-22 20:54:28+00:00\n", + " 7bf028bb91043ebc9c76def4184c9839\n", " fishing\n", - " {'lat': 41.4176, 'lon': 165.3167}\n", - " {'mpa': [], 'eez': [], 'rfmo': ['IWC', 'NPFC',...\n", - " [165.315, 41.409706666666665, 165.31824, 41.42...\n", - " {'start_distance_from_shore_km': 1198.0, 'end_...\n", + " {'lat': 41.2789, 'lon': 150.7685}\n", + " {'mpa': [], 'eez': [], 'rfmo': ['NPFC', 'IWC',...\n", + " [150.72186666666667, 41.271973333333335, 150.8...\n", + " {'start_distance_from_shore_km': 422.0, 'end_d...\n", " {'id': '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'...\n", " None\n", - " {'total_distance_km': 2.3609454154613645, 'ave...\n", + " {'total_distance_km': 10.085051379476868, 'ave...\n", " None\n", " None\n", " None\n", @@ -1107,46 +1102,46 @@ ], "text/plain": [ " start end \\\n", - "0 2022-10-10 15:49:06+00:00 2022-10-10 18:27:46+00:00 \n", - "1 2022-10-08 23:28:16+00:00 2022-10-09 04:06:15+00:00 \n", - "2 2022-10-30 23:36:05+00:00 2022-10-31 11:25:45+00:00 \n", - "3 2021-02-21 23:05:34+00:00 2021-02-22 08:36:32+00:00 \n", - "4 2020-05-26 16:43:39+00:00 2020-05-26 18:25:46+00:00 \n", + "0 2020-05-27 12:08:47+00:00 2020-05-27 16:52:19+00:00 \n", + "1 2020-05-29 08:23:50+00:00 2020-05-29 08:48:39+00:00 \n", + "2 2020-11-23 06:45:07+00:00 2020-11-23 07:27:03+00:00 \n", + "3 2020-11-05 06:39:22+00:00 2020-11-05 12:23:27+00:00 \n", + "4 2020-11-22 11:02:58+00:00 2020-11-22 20:54:28+00:00 \n", "\n", " id type \\\n", - "0 da35d28bfc433f251fe03c2ded3a5e23 fishing \n", - "1 080bcd728a84f62fe901a3e5b02c6014 fishing \n", - "2 c79bf8fbab9e58bb3b279bf757bae197 fishing \n", - "3 3cdbebbfbf2b7c926158fa6955eecb30 fishing \n", - "4 9b6a1c84d8daf3359a0d9917ff138d39 fishing \n", + "0 a3967b8c86143d60ece964e0d55dd410 fishing \n", + "1 d93fab941578e08fbd0bbcfd6237be5c fishing \n", + "2 f9a5a8a73b8c43feaea8e2416a4f45dc fishing \n", + "3 c360eece02f0b6c0a9961da4f9d14338 fishing \n", + "4 7bf028bb91043ebc9c76def4184c9839 fishing \n", "\n", " position \\\n", - "0 {'lat': 42.9677, 'lon': 156.9126} \n", - "1 {'lat': 43.0665, 'lon': 156.4381} \n", - "2 {'lat': 41.0574, 'lon': 149.7023} \n", - "3 {'lat': -45.8329, 'lon': -60.603} \n", - "4 {'lat': 41.4176, 'lon': 165.3167} \n", + "0 {'lat': 41.2464, 'lon': 165.0914} \n", + "1 {'lat': 41.4293, 'lon': 165.5407} \n", + "2 {'lat': 41.2036, 'lon': 150.862} \n", + "3 {'lat': 43.8173, 'lon': 154.7858} \n", + "4 {'lat': 41.2789, 'lon': 150.7685} \n", "\n", " regions \\\n", - "0 {'mpa': [], 'eez': [], 'rfmo': ['ACAP', 'NPFC'... \n", - "1 {'mpa': [], 'eez': [], 'rfmo': ['IWC', 'WCPFC'... \n", - "2 {'mpa': [], 'eez': ['48950'], 'rfmo': ['PICES'... \n", - "3 {'mpa': [], 'eez': [], 'rfmo': ['CCSBT', 'ICCA... \n", - "4 {'mpa': [], 'eez': [], 'rfmo': ['IWC', 'NPFC',... \n", + "0 {'mpa': [], 'eez': [], 'rfmo': ['PICES', 'ACAP... \n", + "1 {'mpa': [], 'eez': [], 'rfmo': ['IWC', 'NPAFC'... \n", + "2 {'mpa': [], 'eez': [], 'rfmo': ['NPAFC', 'PICE... \n", + "3 {'mpa': [], 'eez': [], 'rfmo': ['WCPFC', 'IWC'... \n", + "4 {'mpa': [], 'eez': [], 'rfmo': ['NPFC', 'IWC',... \n", "\n", " bounding_box \\\n", - "0 [156.90262, 42.93749, 156.92824, 42.979495] \n", - "1 [156.38127166666666, 43.05014166666667, 156.49... \n", - "2 [149.66486666666665, 40.94216166666667, 149.73... \n", - "3 [-60.5669816667, -45.836216666666665, -60.6110... \n", - "4 [165.315, 41.409706666666665, 165.31824, 41.42... \n", + "0 [165.04957333333334, 41.22549333333333, 165.11... \n", + "1 [165.53578666666667, 41.42429333333333, 165.54... \n", + "2 [150.86170666666666, 41.2018016667, 150.862258... \n", + "3 [154.78373333333334, 43.8052266667, 154.797158... \n", + "4 [150.72186666666667, 41.271973333333335, 150.8... \n", "\n", " distances \\\n", - "0 {'start_distance_from_shore_km': 585.0, 'end_d... \n", - "1 {'start_distance_from_shore_km': 549.0, 'end_d... \n", - "2 {'start_distance_from_shore_km': 392.0, 'end_d... \n", - "3 {'start_distance_from_shore_km': 399.0, 'end_d... \n", - "4 {'start_distance_from_shore_km': 1198.0, 'end_... \n", + "0 {'start_distance_from_shore_km': 1195.0, 'end_... \n", + "1 {'start_distance_from_shore_km': 1209.0, 'end_... \n", + "2 {'start_distance_from_shore_km': 436.0, 'end_d... \n", + "3 {'start_distance_from_shore_km': 404.0, 'end_d... \n", + "4 {'start_distance_from_shore_km': 422.0, 'end_d... \n", "\n", " vessel encounter \\\n", "0 {'id': '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'... None \n", @@ -1156,11 +1151,11 @@ "4 {'id': '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f'... None \n", "\n", " fishing gap loitering \\\n", - "0 {'total_distance_km': 5.313381753510874, 'aver... None None \n", - "1 {'total_distance_km': 10.200405590755741, 'ave... None None \n", - "2 {'total_distance_km': 31.90361153133585, 'aver... None None \n", - "3 {'total_distance_km': 5.725649380735478, 'aver... None None \n", - "4 {'total_distance_km': 2.3609454154613645, 'ave... None None \n", + "0 {'total_distance_km': 7.727278367334163, 'aver... None None \n", + "1 {'total_distance_km': 1.6521146705457104, 'ave... None None \n", + "2 {'total_distance_km': 0.40525710130028725, 'av... None None \n", + "3 {'total_distance_km': 2.763389184389981, 'aver... None None \n", + "4 {'total_distance_km': 10.085051379476868, 'ave... None None \n", "\n", " port_visit \n", "0 None \n", @@ -1170,7 +1165,7 @@ "4 None " ] }, - "execution_count": 32, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } diff --git a/notebooks/usage-guides/4wings-api.ipynb b/notebooks/usage-guides/4wings-api.ipynb index b523b61..59b4bc4 100644 --- a/notebooks/usage-guides/4wings-api.ipynb +++ b/notebooks/usage-guides/4wings-api.ipynb @@ -111,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 27, "id": "2420e772-c8c6-4eec-bbab-db0708733b50", "metadata": { "id": "2420e772-c8c6-4eec-bbab-db0708733b50" @@ -120,12 +120,14 @@ "source": [ "import os\n", "\n", + "import geopandas as gpd\n", + "\n", "import gfwapiclient as gfw" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 28, "id": "c1fae7c8-2806-4468-84f0-2ca90b4f5653", "metadata": { "id": "c1fae7c8-2806-4468-84f0-2ca90b4f5653" @@ -144,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 29, "id": "08c9daef-552d-4906-ac21-eaedcea7b102", "metadata": { "id": "08c9daef-552d-4906-ac21-eaedcea7b102" @@ -172,9 +174,49 @@ "Generates **AIS (Automatic Identification System) apparent fishing effort** reports to visualize fishing activity. Please [learn more about apparent fishing effort here](https://globalfishingwatch.org/our-apis/documentation#ais-apparent-fishing-effort) and [check its data caveats here](https://globalfishingwatch.org/our-apis/documentation#apparent-fishing-effort)." ] }, + { + "cell_type": "markdown", + "id": "66211f98-d7ec-4af2-9f54-b04fcea62e36", + "metadata": {}, + "source": [ + "**Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**)." + ] + }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 64, + "id": "443536d7-48e1-45f0-ae8f-7ae0d9a5b0cf", + "metadata": {}, + "outputs": [], + "source": [ + "eez_rois_result = await gfw_client.references.get_eez_regions(iso3=\"RUS\")\n", + "rus_eez_roi = eez_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "ee4696d5-a325-4dd4-a564-b89fc0b1443d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('5690', 'public-eez-areas', 'Russian Exclusive Economic Zone', 'RUS')" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rus_eez_roi.id, rus_eez_roi.dataset, rus_eez_roi.label, rus_eez_roi.iso3" + ] + }, + { + "cell_type": "code", + "execution_count": 33, "id": "338223ff-9c23-47f6-bf33-2aaaefc35081", "metadata": {}, "outputs": [], @@ -185,10 +227,7 @@ " group_by=\"FLAG\",\n", " start_date=\"2022-01-01\",\n", " end_date=\"2022-05-01\",\n", - " region={\n", - " \"dataset\": \"public-eez-areas\",\n", - " \"id\": \"5690\",\n", - " },\n", + " region=rus_eez_roi,\n", ")" ] }, @@ -202,7 +241,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 34, "id": "3bff3854-9b54-44c9-a7f8-3ef2bf1b7719", "metadata": {}, "outputs": [], @@ -212,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 35, "id": "21e8b553-20dd-43bb-92c3-0e7f5f6f29c3", "metadata": {}, "outputs": [], @@ -222,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 36, "id": "29bd8b84-1f22-4a11-9861-548237926012", "metadata": {}, "outputs": [ @@ -232,7 +271,7 @@ "('2022-03', 'RUS', 7.109166666666667, 3, 75.8, 44.0)" ] }, - "execution_count": 41, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -258,7 +297,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 37, "id": "a778ef19-f68d-4e0b-8b37-539db79868cb", "metadata": {}, "outputs": [], @@ -268,7 +307,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 38, "id": "271d446a-702d-42bb-93a9-e576741a3fdd", "metadata": {}, "outputs": [ @@ -276,14 +315,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 32271 entries, 0 to 32270\n", "Data columns (total 20 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 date 32271 non-null object \n", + " 0 date 32271 non-null str \n", " 1 detections 0 non-null object \n", - " 2 flag 32271 non-null object \n", + " 2 flag 32271 non-null str \n", " 3 gear_type 0 non-null object \n", " 4 hours 32271 non-null float64\n", " 5 vessel_ids 32271 non-null int64 \n", @@ -297,11 +336,11 @@ " 13 mmsi 0 non-null object \n", " 14 call_sign 0 non-null object \n", " 15 dataset 0 non-null object \n", - " 16 report_dataset 32271 non-null object \n", + " 16 report_dataset 32271 non-null str \n", " 17 ship_name 0 non-null object \n", " 18 lat 32271 non-null float64\n", " 19 lon 32271 non-null float64\n", - "dtypes: float64(3), int64(1), object(16)\n", + "dtypes: float64(3), int64(1), object(13), str(3)\n", "memory usage: 4.9+ MB\n" ] } @@ -312,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 39, "id": "5145b645-c0fe-4780-b94f-261b534b7f04", "metadata": {}, "outputs": [ @@ -362,12 +401,12 @@ " \n", " \n", " 0\n", - " 2022-02\n", + " 2022-03\n", " None\n", " RUS\n", " None\n", - " 58.298253\n", - " 18\n", + " 3.416944\n", + " 2\n", " None\n", " None\n", " None\n", @@ -378,10 +417,10 @@ " None\n", " None\n", " None\n", - " public-global-fishing-effort:v3.0\n", + " public-global-fishing-effort:v4.0\n", " None\n", - " 54.5\n", - " 151.4\n", + " 75.6\n", + " 50.3\n", " \n", " \n", " 1\n", @@ -389,8 +428,8 @@ " None\n", " RUS\n", " None\n", - " 53.717894\n", - " 9\n", + " 2.144444\n", + " 3\n", " None\n", " None\n", " None\n", @@ -401,19 +440,19 @@ " None\n", " None\n", " None\n", - " public-global-fishing-effort:v3.0\n", + " public-global-fishing-effort:v4.0\n", " None\n", - " 70.4\n", - " 32.3\n", + " 51.4\n", + " 155.4\n", " \n", " \n", " 2\n", - " 2022-02\n", + " 2022-03\n", " None\n", " RUS\n", " None\n", - " 1.083333\n", - " 1\n", + " 5.436944\n", + " 2\n", " None\n", " None\n", " None\n", @@ -424,10 +463,10 @@ " None\n", " None\n", " None\n", - " public-global-fishing-effort:v3.0\n", + " public-global-fishing-effort:v4.0\n", " None\n", - " 56.2\n", - " 155.4\n", + " 58.6\n", + " 155.8\n", " \n", " \n", " 3\n", @@ -435,8 +474,8 @@ " None\n", " RUS\n", " None\n", - " 5.364167\n", - " 2\n", + " 4.435556\n", + " 1\n", " None\n", " None\n", " None\n", @@ -447,10 +486,10 @@ " None\n", " None\n", " None\n", - " public-global-fishing-effort:v3.0\n", + " public-global-fishing-effort:v4.0\n", " None\n", - " 50.0\n", - " 141.8\n", + " 45.7\n", + " 149.8\n", " \n", " \n", " 4\n", @@ -458,8 +497,8 @@ " None\n", " RUS\n", " None\n", - " 2.393056\n", - " 2\n", + " 0.362778\n", + " 1\n", " None\n", " None\n", " None\n", @@ -470,22 +509,22 @@ " None\n", " None\n", " None\n", - " public-global-fishing-effort:v3.0\n", + " public-global-fishing-effort:v4.0\n", " None\n", - " 56.2\n", - " 162.5\n", + " 46.7\n", + " 141.9\n", " \n", " \n", "\n", "" ], "text/plain": [ - " date detections flag gear_type hours vessel_ids vessel_id \\\n", - "0 2022-02 None RUS None 58.298253 18 None \n", - "1 2022-02 None RUS None 53.717894 9 None \n", - "2 2022-02 None RUS None 1.083333 1 None \n", - "3 2022-01 None RUS None 5.364167 2 None \n", - "4 2022-03 None RUS None 2.393056 2 None \n", + " date detections flag gear_type hours vessel_ids vessel_id \\\n", + "0 2022-03 None RUS None 3.416944 2 None \n", + "1 2022-02 None RUS None 2.144444 3 None \n", + "2 2022-03 None RUS None 5.436944 2 None \n", + "3 2022-01 None RUS None 4.435556 1 None \n", + "4 2022-03 None RUS None 0.362778 1 None \n", "\n", " vessel_type entry_timestamp exit_timestamp first_transmission_date \\\n", "0 None None None None \n", @@ -502,14 +541,14 @@ "4 None None None None None \n", "\n", " report_dataset ship_name lat lon \n", - "0 public-global-fishing-effort:v3.0 None 54.5 151.4 \n", - "1 public-global-fishing-effort:v3.0 None 70.4 32.3 \n", - "2 public-global-fishing-effort:v3.0 None 56.2 155.4 \n", - "3 public-global-fishing-effort:v3.0 None 50.0 141.8 \n", - "4 public-global-fishing-effort:v3.0 None 56.2 162.5 " + "0 public-global-fishing-effort:v4.0 None 75.6 50.3 \n", + "1 public-global-fishing-effort:v4.0 None 51.4 155.4 \n", + "2 public-global-fishing-effort:v4.0 None 58.6 155.8 \n", + "3 public-global-fishing-effort:v4.0 None 45.7 149.8 \n", + "4 public-global-fishing-effort:v4.0 None 46.7 141.9 " ] }, - "execution_count": 44, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -542,9 +581,49 @@ "**Disclaimer:** AIS vessel presence is one of the largest datasets available. To prevent timeouts and ensure optimal performance, keep requests manageable: prefer simple, small regions and shorter time ranges (e.g., a few days)." ] }, + { + "cell_type": "markdown", + "id": "ce279bd2-efbc-4984-9e32-3ad634e16d2b", + "metadata": {}, + "source": [ + "**Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**)." + ] + }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 40, + "id": "995bd5b0-c76e-4a64-8553-35ae289a9f82", + "metadata": {}, + "outputs": [], + "source": [ + "eez_rois_result = await gfw_client.references.get_eez_regions(iso3=\"RUS\")\n", + "rus_eez_roi = eez_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "07f0a97a-0f1f-4fba-ae34-b542cabb5eb8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('5690', 'public-eez-areas', 'Russian Exclusive Economic Zone', 'RUS')" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rus_eez_roi.id, rus_eez_roi.dataset, rus_eez_roi.label, rus_eez_roi.iso3" + ] + }, + { + "cell_type": "code", + "execution_count": 42, "id": "0946a5ab-5090-4920-94dd-88c733528f5b", "metadata": {}, "outputs": [], @@ -555,10 +634,7 @@ " group_by=\"FLAG\",\n", " start_date=\"2022-01-01\",\n", " end_date=\"2022-05-01\",\n", - " region={\n", - " \"dataset\": \"public-eez-areas\",\n", - " \"id\": \"5690\",\n", - " },\n", + " region=rus_eez_roi,\n", ")" ] }, @@ -572,7 +648,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 43, "id": "5a5d349d-f60f-4fd3-997e-f74c82d2d4f9", "metadata": {}, "outputs": [], @@ -582,7 +658,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 44, "id": "f4435324-5b39-40dd-897e-6a7ef7620a93", "metadata": {}, "outputs": [], @@ -592,7 +668,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 45, "id": "b2990454-d924-4288-8e4f-90e4ab60015b", "metadata": {}, "outputs": [ @@ -602,7 +678,7 @@ "('2022-03', 'RUS', 1.0, 1, 52.1, 153.2)" ] }, - "execution_count": 49, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -628,7 +704,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 46, "id": "4ad188a4-fbaf-41ee-ab4b-6aeda56e0e57", "metadata": {}, "outputs": [], @@ -638,7 +714,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 47, "id": "71bee1f2-2316-4908-9fdf-668854cd3c49", "metadata": {}, "outputs": [ @@ -646,14 +722,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 274333 entries, 0 to 274332\n", "Data columns (total 20 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 date 274333 non-null object \n", + " 0 date 274333 non-null str \n", " 1 detections 0 non-null object \n", - " 2 flag 274333 non-null object \n", + " 2 flag 274333 non-null str \n", " 3 gear_type 0 non-null object \n", " 4 hours 274333 non-null float64\n", " 5 vessel_ids 274333 non-null int64 \n", @@ -667,11 +743,11 @@ " 13 mmsi 0 non-null object \n", " 14 call_sign 0 non-null object \n", " 15 dataset 0 non-null object \n", - " 16 report_dataset 274333 non-null object \n", + " 16 report_dataset 274333 non-null str \n", " 17 ship_name 0 non-null object \n", " 18 lat 274333 non-null float64\n", " 19 lon 274333 non-null float64\n", - "dtypes: float64(3), int64(1), object(16)\n", + "dtypes: float64(3), int64(1), object(13), str(3)\n", "memory usage: 41.9+ MB\n" ] } @@ -682,7 +758,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 48, "id": "b8c04941-74fc-4a46-bc00-9e66afc59e10", "metadata": {}, "outputs": [ @@ -732,12 +808,12 @@ " \n", " \n", " 0\n", - " 2022-04\n", + " 2022-01\n", " None\n", - " PAN\n", + " MLT\n", " None\n", - " 3.0\n", - " 2\n", + " 1.0\n", + " 1\n", " None\n", " None\n", " None\n", @@ -748,10 +824,10 @@ " None\n", " None\n", " None\n", - " public-global-presence:v3.0\n", + " public-global-presence:v4.0\n", " None\n", - " 47.8\n", - " 154.1\n", + " 48.0\n", + " 156.1\n", " \n", " \n", " 1\n", @@ -759,8 +835,8 @@ " None\n", " RUS\n", " None\n", - " 4.0\n", - " 1\n", + " 7.0\n", + " 6\n", " None\n", " None\n", " None\n", @@ -771,19 +847,19 @@ " None\n", " None\n", " None\n", - " public-global-presence:v3.0\n", + " public-global-presence:v4.0\n", " None\n", - " 77.6\n", - " 70.6\n", + " 58.2\n", + " 154.7\n", " \n", " \n", " 2\n", - " 2022-02\n", + " 2022-01\n", " None\n", " RUS\n", " None\n", - " 1.0\n", - " 1\n", + " 5.0\n", + " 5\n", " None\n", " None\n", " None\n", @@ -794,19 +870,19 @@ " None\n", " None\n", " None\n", - " public-global-presence:v3.0\n", + " public-global-presence:v4.0\n", " None\n", - " 75.0\n", - " 159.2\n", + " 50.7\n", + " 157.2\n", " \n", " \n", " 3\n", - " 2022-03\n", + " 2022-01\n", " None\n", " RUS\n", " None\n", " 2.0\n", - " 2\n", + " 1\n", " None\n", " None\n", " None\n", @@ -817,16 +893,16 @@ " None\n", " None\n", " None\n", - " public-global-presence:v3.0\n", + " public-global-presence:v4.0\n", " None\n", - " 68.4\n", - " 49.0\n", + " 43.7\n", + " 38.8\n", " \n", " \n", " 4\n", - " 2022-02\n", + " 2022-04\n", " None\n", - " PAN\n", + " SGP\n", " None\n", " 1.0\n", " 1\n", @@ -840,10 +916,10 @@ " None\n", " None\n", " None\n", - " public-global-presence:v3.0\n", + " public-global-presence:v4.0\n", " None\n", - " 50.0\n", - " 157.0\n", + " 50.4\n", + " 157.9\n", " \n", " \n", "\n", @@ -851,11 +927,11 @@ ], "text/plain": [ " date detections flag gear_type hours vessel_ids vessel_id vessel_type \\\n", - "0 2022-04 None PAN None 3.0 2 None None \n", - "1 2022-02 None RUS None 4.0 1 None None \n", - "2 2022-02 None RUS None 1.0 1 None None \n", - "3 2022-03 None RUS None 2.0 2 None None \n", - "4 2022-02 None PAN None 1.0 1 None None \n", + "0 2022-01 None MLT None 1.0 1 None None \n", + "1 2022-02 None RUS None 7.0 6 None None \n", + "2 2022-01 None RUS None 5.0 5 None None \n", + "3 2022-01 None RUS None 2.0 1 None None \n", + "4 2022-04 None SGP None 1.0 1 None None \n", "\n", " entry_timestamp exit_timestamp first_transmission_date \\\n", "0 None None None \n", @@ -872,14 +948,14 @@ "4 None None None None None \n", "\n", " report_dataset ship_name lat lon \n", - "0 public-global-presence:v3.0 None 47.8 154.1 \n", - "1 public-global-presence:v3.0 None 77.6 70.6 \n", - "2 public-global-presence:v3.0 None 75.0 159.2 \n", - "3 public-global-presence:v3.0 None 68.4 49.0 \n", - "4 public-global-presence:v3.0 None 50.0 157.0 " + "0 public-global-presence:v4.0 None 48.0 156.1 \n", + "1 public-global-presence:v4.0 None 58.2 154.7 \n", + "2 public-global-presence:v4.0 None 50.7 157.2 \n", + "3 public-global-presence:v4.0 None 43.7 38.8 \n", + "4 public-global-presence:v4.0 None 50.4 157.9 " ] }, - "execution_count": 52, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -912,9 +988,49 @@ "**Important:** **AIS vessel presence** shows where vessels **reported their positions** via the **Automatic Identification System (AIS)**. **SAR vessel detection** shows where **Synthetic Aperture Radar (SAR) satellites detected** vessels on the ocean surface, even if they **weren't transmitting AIS**." ] }, + { + "cell_type": "markdown", + "id": "35570f97-c7f0-4634-bfca-92786b89e8b0", + "metadata": {}, + "source": [ + "**Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**)." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "7b6b6f91-a218-4636-abdd-c0100cebb99a", + "metadata": {}, + "outputs": [], + "source": [ + "eez_rois_result = await gfw_client.references.get_eez_regions(iso3=\"RUS\")\n", + "rus_eez_roi = eez_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "f6289183-3109-4dfd-9c73-76933e64ff6f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('5690', 'public-eez-areas', 'Russian Exclusive Economic Zone', 'RUS')" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rus_eez_roi.id, rus_eez_roi.dataset, rus_eez_roi.label, rus_eez_roi.iso3" + ] + }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 51, "id": "45789061-c4e1-4938-b061-79122adb25da", "metadata": {}, "outputs": [], @@ -925,10 +1041,7 @@ " group_by=\"FLAG\",\n", " start_date=\"2022-01-01\",\n", " end_date=\"2022-05-01\",\n", - " region={\n", - " \"dataset\": \"public-eez-areas\",\n", - " \"id\": \"5690\",\n", - " },\n", + " region=rus_eez_roi,\n", ")" ] }, @@ -942,7 +1055,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 52, "id": "c8f3894f-9d4a-4b2a-8f18-751c8155fddb", "metadata": {}, "outputs": [], @@ -952,7 +1065,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 53, "id": "c138eca3-4921-4056-8b68-6d658901fd17", "metadata": {}, "outputs": [], @@ -962,7 +1075,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 54, "id": "50d60501-e2c2-4b41-a341-845440c8cc8f", "metadata": {}, "outputs": [ @@ -972,7 +1085,7 @@ "('2022-04', '', 1, 1, 46.6, 142.6)" ] }, - "execution_count": 55, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -998,7 +1111,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 55, "id": "05179b03-f391-45c7-86ce-c26e3f27eef0", "metadata": {}, "outputs": [], @@ -1008,7 +1121,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 56, "id": "e8fa4e6f-efb9-4e5a-96f9-679bbc3578f8", "metadata": {}, "outputs": [ @@ -1016,14 +1129,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 3995 entries, 0 to 3994\n", "Data columns (total 20 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 date 3995 non-null object \n", + " 0 date 3995 non-null str \n", " 1 detections 3995 non-null int64 \n", - " 2 flag 3995 non-null object \n", + " 2 flag 3995 non-null str \n", " 3 gear_type 0 non-null object \n", " 4 hours 0 non-null object \n", " 5 vessel_ids 3995 non-null int64 \n", @@ -1037,11 +1150,11 @@ " 13 mmsi 0 non-null object \n", " 14 call_sign 0 non-null object \n", " 15 dataset 0 non-null object \n", - " 16 report_dataset 3995 non-null object \n", + " 16 report_dataset 3995 non-null str \n", " 17 ship_name 0 non-null object \n", " 18 lat 3995 non-null float64\n", " 19 lon 3995 non-null float64\n", - "dtypes: float64(2), int64(2), object(16)\n", + "dtypes: float64(2), int64(2), object(13), str(3)\n", "memory usage: 624.3+ KB\n" ] } @@ -1052,7 +1165,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 57, "id": "6b1fee79-c5ab-42f6-98fe-7c0be9312b1e", "metadata": {}, "outputs": [ @@ -1102,9 +1215,9 @@ " \n", " \n", " 0\n", - " 2022-02\n", + " 2022-03\n", " 1\n", - " LBR\n", + " RUS\n", " None\n", " None\n", " 1\n", @@ -1118,16 +1231,16 @@ " None\n", " None\n", " None\n", - " public-global-sar-presence:v3.0\n", + " public-global-sar-presence:v4.0\n", " None\n", - " 47.2\n", - " 153.0\n", + " 50.3\n", + " 156.7\n", " \n", " \n", " 1\n", " 2022-02\n", " 1\n", - " BHS\n", + " RUS\n", " None\n", " None\n", " 1\n", @@ -1141,16 +1254,16 @@ " None\n", " None\n", " None\n", - " public-global-sar-presence:v3.0\n", + " public-global-sar-presence:v4.0\n", " None\n", - " 44.2\n", - " 37.3\n", + " 55.1\n", + " 19.8\n", " \n", " \n", " 2\n", - " 2022-02\n", + " 2022-04\n", " 1\n", - " \n", + " RUS\n", " None\n", " None\n", " 1\n", @@ -1164,19 +1277,19 @@ " None\n", " None\n", " None\n", - " public-global-sar-presence:v3.0\n", + " public-global-sar-presence:v4.0\n", " None\n", - " 46.3\n", - " 142.4\n", + " 45.6\n", + " 149.4\n", " \n", " \n", " 3\n", - " 2022-04\n", - " 17\n", + " 2022-02\n", + " 4\n", " RUS\n", " None\n", " None\n", - " 14\n", + " 4\n", " None\n", " None\n", " None\n", @@ -1187,16 +1300,16 @@ " None\n", " None\n", " None\n", - " public-global-sar-presence:v3.0\n", + " public-global-sar-presence:v4.0\n", " None\n", - " 45.5\n", - " 36.7\n", + " 47.1\n", + " 142.0\n", " \n", " \n", " 4\n", " 2022-04\n", " 1\n", - " SGP\n", + " RUS\n", " None\n", " None\n", " 1\n", @@ -1210,10 +1323,10 @@ " None\n", " None\n", " None\n", - " public-global-sar-presence:v3.0\n", + " public-global-sar-presence:v4.0\n", " None\n", - " 46.3\n", - " 151.9\n", + " 46.8\n", + " 144.2\n", " \n", " \n", "\n", @@ -1221,11 +1334,11 @@ ], "text/plain": [ " date detections flag gear_type hours vessel_ids vessel_id vessel_type \\\n", - "0 2022-02 1 LBR None None 1 None None \n", - "1 2022-02 1 BHS None None 1 None None \n", - "2 2022-02 1 None None 1 None None \n", - "3 2022-04 17 RUS None None 14 None None \n", - "4 2022-04 1 SGP None None 1 None None \n", + "0 2022-03 1 RUS None None 1 None None \n", + "1 2022-02 1 RUS None None 1 None None \n", + "2 2022-04 1 RUS None None 1 None None \n", + "3 2022-02 4 RUS None None 4 None None \n", + "4 2022-04 1 RUS None None 1 None None \n", "\n", " entry_timestamp exit_timestamp first_transmission_date \\\n", "0 None None None \n", @@ -1242,14 +1355,14 @@ "4 None None None None None \n", "\n", " report_dataset ship_name lat lon \n", - "0 public-global-sar-presence:v3.0 None 47.2 153.0 \n", - "1 public-global-sar-presence:v3.0 None 44.2 37.3 \n", - "2 public-global-sar-presence:v3.0 None 46.3 142.4 \n", - "3 public-global-sar-presence:v3.0 None 45.5 36.7 \n", - "4 public-global-sar-presence:v3.0 None 46.3 151.9 " + "0 public-global-sar-presence:v4.0 None 50.3 156.7 \n", + "1 public-global-sar-presence:v4.0 None 55.1 19.8 \n", + "2 public-global-sar-presence:v4.0 None 45.6 149.4 \n", + "3 public-global-sar-presence:v4.0 None 47.1 142.0 \n", + "4 public-global-sar-presence:v4.0 None 46.8 144.2 " ] }, - "execution_count": 58, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -1265,7 +1378,7 @@ "id": "33fcd10f-c3dc-49e4-b35c-07c24765e27b" }, "source": [ - "## Creating a Generic Report (`create_report`)" + "## Creating a Generic Report from Predefined Region (`create_report`)" ] }, { @@ -1284,16 +1397,56 @@ "**Note:** AIS vessel presence (i.e., `\"public-global-sar-presence:latest\"` dataset) does **not** support `\"GEARTYPE\"` or `\"FLAGANDGEARTYPE\"` as `group_by` criteria." ] }, + { + "cell_type": "markdown", + "id": "608e93f3-0964-4234-9e56-f9efa5f7af9c", + "metadata": {}, + "source": [ + "**Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**)." + ] + }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 58, + "id": "05a5af7d-9090-41a9-9868-8c34c119294f", + "metadata": {}, + "outputs": [], + "source": [ + "eez_rois_result = await gfw_client.references.get_eez_regions(iso3=\"RUS\")\n", + "rus_eez_roi = eez_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "2d04deaa-3d91-431a-b800-78a0119457a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('5690', 'public-eez-areas', 'Russian Exclusive Economic Zone', 'RUS')" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rus_eez_roi.id, rus_eez_roi.dataset, rus_eez_roi.label, rus_eez_roi.iso3" + ] + }, + { + "cell_type": "code", + "execution_count": 61, "id": "d446f679-746a-4942-a5e0-ef1ca1324d65", "metadata": { "id": "d446f679-746a-4942-a5e0-ef1ca1324d65" }, "outputs": [], "source": [ - "report_result = await gfw_client.fourwings.create_report(\n", + "predefined_report_result = await gfw_client.fourwings.create_report(\n", " spatial_resolution=\"LOW\",\n", " temporal_resolution=\"MONTHLY\",\n", " group_by=\"FLAG\",\n", @@ -1304,10 +1457,7 @@ " ],\n", " start_date=\"2022-01-01\",\n", " end_date=\"2022-05-01\",\n", - " region={\n", - " \"dataset\": \"public-eez-areas\",\n", - " \"id\": \"5690\",\n", - " },\n", + " region=rus_eez_roi,\n", ")" ] }, @@ -1323,31 +1473,31 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 8, "id": "cc7947e2-d2cb-45b1-b47b-818bf4bebc95", "metadata": { "id": "cc7947e2-d2cb-45b1-b47b-818bf4bebc95" }, "outputs": [], "source": [ - "report_data = report_result.data()" + "predefined_report_data = predefined_report_result.data()" ] }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 9, "id": "198ad2ee-a199-4d9e-8e76-0deb8395461f", "metadata": { "id": "198ad2ee-a199-4d9e-8e76-0deb8395461f" }, "outputs": [], "source": [ - "report_item = report_data[-1]" + "predefined_report_item = predefined_report_data[-1]" ] }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 10, "id": "2ba5f27b-f881-40a2-a75b-f00a139f1d34", "metadata": { "colab": { @@ -1363,19 +1513,19 @@ "('2022-03', 'RUS', 1.0, 1, 52.1, 153.2)" ] }, - "execution_count": 69, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(\n", - " report_item.date,\n", - " report_item.flag,\n", - " report_item.hours,\n", - " report_item.vessel_ids,\n", - " report_item.lat,\n", - " report_item.lon,\n", + " predefined_report_item.date,\n", + " predefined_report_item.flag,\n", + " predefined_report_item.hours,\n", + " predefined_report_item.vessel_ids,\n", + " predefined_report_item.lat,\n", + " predefined_report_item.lon,\n", ")" ] }, @@ -1391,19 +1541,19 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 11, "id": "3720799b-192f-4b77-9ca2-a3614942d2d7", "metadata": { "id": "3720799b-192f-4b77-9ca2-a3614942d2d7" }, "outputs": [], "source": [ - "report_df = report_result.df()" + "predefined_report_df = predefined_report_result.df()" ] }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 12, "id": "7f298c61-d038-4d2e-86f2-2d5a554aa267", "metadata": {}, "outputs": [ @@ -1411,14 +1561,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 310599 entries, 0 to 310598\n", "Data columns (total 20 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 date 310599 non-null object \n", + " 0 date 310599 non-null str \n", " 1 detections 3995 non-null float64\n", - " 2 flag 310599 non-null object \n", + " 2 flag 310599 non-null str \n", " 3 gear_type 0 non-null object \n", " 4 hours 306604 non-null float64\n", " 5 vessel_ids 310599 non-null int64 \n", @@ -1432,22 +1582,22 @@ " 13 mmsi 0 non-null object \n", " 14 call_sign 0 non-null object \n", " 15 dataset 0 non-null object \n", - " 16 report_dataset 310599 non-null object \n", + " 16 report_dataset 310599 non-null str \n", " 17 ship_name 0 non-null object \n", " 18 lat 310599 non-null float64\n", " 19 lon 310599 non-null float64\n", - "dtypes: float64(4), int64(1), object(15)\n", + "dtypes: float64(4), int64(1), object(12), str(3)\n", "memory usage: 47.4+ MB\n" ] } ], "source": [ - "report_df.info()" + "predefined_report_df.info()" ] }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 13, "id": "8cce33de-675d-42c6-8eac-36974a9e7f48", "metadata": { "colab": { @@ -1504,12 +1654,12 @@ " \n", " \n", " 0\n", - " 2022-03\n", + " 2022-02\n", " NaN\n", " RUS\n", " None\n", - " 3.416944\n", - " 2\n", + " 58.298253\n", + " 18\n", " None\n", " None\n", " None\n", @@ -1520,10 +1670,10 @@ " None\n", " None\n", " None\n", - " public-global-fishing-effort:v3.0\n", + " public-global-fishing-effort:v4.0\n", " None\n", - " 75.6\n", - " 50.3\n", + " 54.5\n", + " 151.4\n", " \n", " \n", " 1\n", @@ -1531,8 +1681,8 @@ " NaN\n", " RUS\n", " None\n", - " 2.144444\n", - " 3\n", + " 53.717894\n", + " 9\n", " None\n", " None\n", " None\n", @@ -1543,19 +1693,19 @@ " None\n", " None\n", " None\n", - " public-global-fishing-effort:v3.0\n", + " public-global-fishing-effort:v4.0\n", " None\n", - " 51.4\n", - " 155.4\n", + " 70.4\n", + " 32.3\n", " \n", " \n", " 2\n", - " 2022-03\n", + " 2022-02\n", " NaN\n", " RUS\n", " None\n", - " 5.436944\n", - " 2\n", + " 1.083333\n", + " 1\n", " None\n", " None\n", " None\n", @@ -1566,10 +1716,10 @@ " None\n", " None\n", " None\n", - " public-global-fishing-effort:v3.0\n", + " public-global-fishing-effort:v4.0\n", " None\n", - " 58.6\n", - " 155.8\n", + " 56.2\n", + " 155.4\n", " \n", " \n", " 3\n", @@ -1577,8 +1727,8 @@ " NaN\n", " RUS\n", " None\n", - " 4.435556\n", - " 1\n", + " 5.364167\n", + " 2\n", " None\n", " None\n", " None\n", @@ -1589,10 +1739,10 @@ " None\n", " None\n", " None\n", - " public-global-fishing-effort:v3.0\n", + " public-global-fishing-effort:v4.0\n", " None\n", - " 45.7\n", - " 149.8\n", + " 50.0\n", + " 141.8\n", " \n", " \n", " 4\n", @@ -1600,8 +1750,8 @@ " NaN\n", " RUS\n", " None\n", - " 0.362778\n", - " 1\n", + " 2.393056\n", + " 2\n", " None\n", " None\n", " None\n", @@ -1612,22 +1762,22 @@ " None\n", " None\n", " None\n", - " public-global-fishing-effort:v3.0\n", + " public-global-fishing-effort:v4.0\n", " None\n", - " 46.7\n", - " 141.9\n", + " 56.2\n", + " 162.5\n", " \n", " \n", "\n", "" ], "text/plain": [ - " date detections flag gear_type hours vessel_ids vessel_id \\\n", - "0 2022-03 NaN RUS None 3.416944 2 None \n", - "1 2022-02 NaN RUS None 2.144444 3 None \n", - "2 2022-03 NaN RUS None 5.436944 2 None \n", - "3 2022-01 NaN RUS None 4.435556 1 None \n", - "4 2022-03 NaN RUS None 0.362778 1 None \n", + " date detections flag gear_type hours vessel_ids vessel_id \\\n", + "0 2022-02 NaN RUS None 58.298253 18 None \n", + "1 2022-02 NaN RUS None 53.717894 9 None \n", + "2 2022-02 NaN RUS None 1.083333 1 None \n", + "3 2022-01 NaN RUS None 5.364167 2 None \n", + "4 2022-03 NaN RUS None 2.393056 2 None \n", "\n", " vessel_type entry_timestamp exit_timestamp first_transmission_date \\\n", "0 None None None None \n", @@ -1644,20 +1794,420 @@ "4 None None None None None \n", "\n", " report_dataset ship_name lat lon \n", - "0 public-global-fishing-effort:v3.0 None 75.6 50.3 \n", - "1 public-global-fishing-effort:v3.0 None 51.4 155.4 \n", - "2 public-global-fishing-effort:v3.0 None 58.6 155.8 \n", - "3 public-global-fishing-effort:v3.0 None 45.7 149.8 \n", - "4 public-global-fishing-effort:v3.0 None 46.7 141.9 " + "0 public-global-fishing-effort:v4.0 None 54.5 151.4 \n", + "1 public-global-fishing-effort:v4.0 None 70.4 32.3 \n", + "2 public-global-fishing-effort:v4.0 None 56.2 155.4 \n", + "3 public-global-fishing-effort:v4.0 None 50.0 141.8 \n", + "4 public-global-fishing-effort:v4.0 None 56.2 162.5 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predefined_report_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "9efc3b43-a37b-479a-9173-b64063b04b9b", + "metadata": {}, + "source": [ + "## Creating a Generic Report from Custom Region (`create_report`)" + ] + }, + { + "cell_type": "markdown", + "id": "b3e72232-675c-4481-9931-ed890f12c7c1", + "metadata": {}, + "source": [ + "Generates a report for any [supported datasets](https://globalfishingwatch.org/our-apis/documentation#supported-datasets), using fully customizable parameters. [Please check the data caveats here](https://globalfishingwatch.org/our-apis/documentation#data-caveat)." + ] + }, + { + "cell_type": "markdown", + "id": "824717e0-3a5f-4bcd-8adc-5ff741bcf7bf", + "metadata": {}, + "source": [ + "**Note:** AIS vessel presence (i.e., `\"public-global-sar-presence:latest\"` dataset) does **not** support `\"GEARTYPE\"` or `\"FLAGANDGEARTYPE\"` as `group_by` criteria." + ] + }, + { + "cell_type": "markdown", + "id": "2b392df6-98a7-4a6f-aa77-d505f3bbf376", + "metadata": {}, + "source": [ + "**Note:** Custom region can either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. Spatial files are loaded using [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) and supported formats depend on a properly configured [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip)." + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "b8b02a0b-9fef-4639-a32c-755520bfa6cc", + "metadata": {}, + "outputs": [], + "source": [ + "filename = \"https://raw.githubusercontent.com/GlobalFishingWatch/gfw-api-python-client/refs/heads/develop/tests/fixtures/fourwings/geojson/geojson.shp\"" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "264b23ec-6ae5-4334-88ec-fd847bbeba4d", + "metadata": {}, + "outputs": [], + "source": [ + "custom_roi_gdf = gpd.read_file(filename)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1e6b9c4d-1a62-4edc-95a3-083883fbfd50", + "metadata": {}, + "outputs": [], + "source": [ + "custom_report_result = await gfw_client.fourwings.create_report(\n", + " spatial_resolution=\"LOW\",\n", + " temporal_resolution=\"MONTHLY\",\n", + " group_by=\"FLAG\",\n", + " datasets=[\n", + " \"public-global-fishing-effort:latest\",\n", + " \"public-global-sar-presence:latest\",\n", + " \"public-global-presence:latest\",\n", + " ],\n", + " start_date=\"2022-01-01\",\n", + " end_date=\"2022-05-01\",\n", + " geojson=custom_roi_gdf,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5d219ae7-7cdd-4eb7-ac2f-c93bc7f3cef8", + "metadata": {}, + "source": [ + "### Access the report data as Pydantic models" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a5905325-39b0-4618-88e5-75e72d90e5a3", + "metadata": {}, + "outputs": [], + "source": [ + "custom_report_data = custom_report_result.data()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "03a99701-bf20-46d9-b1c0-e5e5ca9769a7", + "metadata": {}, + "outputs": [], + "source": [ + "custom_report_item = custom_report_data[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "f50338f9-ca12-4a7d-aa97-040132168053", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('2022-01', 'NOR', 1.0, 1, -25.9, -76.3)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " custom_report_item.date,\n", + " custom_report_item.flag,\n", + " custom_report_item.hours,\n", + " custom_report_item.vessel_ids,\n", + " custom_report_item.lat,\n", + " custom_report_item.lon,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "4a7b8513-9a5d-4832-9807-6012680412fc", + "metadata": {}, + "source": [ + "### Access the report data as a DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "fe482000-3e68-4cda-aab9-f4773f1f0eff", + "metadata": {}, + "outputs": [], + "source": [ + "custom_report_df = custom_report_result.df()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "0f1c4961-f6c5-4432-82f6-72acee88966a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 6740 entries, 0 to 6739\n", + "Data columns (total 20 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 date 6740 non-null str \n", + " 1 detections 0 non-null object \n", + " 2 flag 6740 non-null str \n", + " 3 gear_type 0 non-null object \n", + " 4 hours 6740 non-null float64\n", + " 5 vessel_ids 6740 non-null int64 \n", + " 6 vessel_id 0 non-null object \n", + " 7 vessel_type 0 non-null object \n", + " 8 entry_timestamp 0 non-null object \n", + " 9 exit_timestamp 0 non-null object \n", + " 10 first_transmission_date 0 non-null object \n", + " 11 last_transmission_date 0 non-null object \n", + " 12 imo 0 non-null object \n", + " 13 mmsi 0 non-null object \n", + " 14 call_sign 0 non-null object \n", + " 15 dataset 0 non-null object \n", + " 16 report_dataset 6740 non-null str \n", + " 17 ship_name 0 non-null object \n", + " 18 lat 6740 non-null float64\n", + " 19 lon 6740 non-null float64\n", + "dtypes: float64(3), int64(1), object(13), str(3)\n", + "memory usage: 1.0+ MB\n" + ] + } + ], + "source": [ + "custom_report_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "35c50f54-be82-4c6a-a7c7-46e37ac38a4c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datedetectionsflaggear_typehoursvessel_idsvessel_idvessel_typeentry_timestampexit_timestampfirst_transmission_datelast_transmission_dateimommsicall_signdatasetreport_datasetship_namelatlon
02022-03NoneCHNNone1.01NoneNoneNoneNoneNoneNoneNoneNoneNoneNonepublic-global-presence:v4.0None-24.0-78.5
12022-03NoneMHLNone1.01NoneNoneNoneNoneNoneNoneNoneNoneNoneNonepublic-global-presence:v4.0None-25.6-79.4
22022-02NoneMLTNone1.01NoneNoneNoneNoneNoneNoneNoneNoneNoneNonepublic-global-presence:v4.0None-27.0-78.0
32022-03NoneMHLNone1.01NoneNoneNoneNoneNoneNoneNoneNoneNoneNonepublic-global-presence:v4.0None-28.6-79.9
42022-04NoneLBRNone1.01NoneNoneNoneNoneNoneNoneNoneNoneNoneNonepublic-global-presence:v4.0None-29.3-81.6
\n", + "
" + ], + "text/plain": [ + " date detections flag gear_type hours vessel_ids vessel_id vessel_type \\\n", + "0 2022-03 None CHN None 1.0 1 None None \n", + "1 2022-03 None MHL None 1.0 1 None None \n", + "2 2022-02 None MLT None 1.0 1 None None \n", + "3 2022-03 None MHL None 1.0 1 None None \n", + "4 2022-04 None LBR None 1.0 1 None None \n", + "\n", + " entry_timestamp exit_timestamp first_transmission_date \\\n", + "0 None None None \n", + "1 None None None \n", + "2 None None None \n", + "3 None None None \n", + "4 None None None \n", + "\n", + " last_transmission_date imo mmsi call_sign dataset \\\n", + "0 None None None None None \n", + "1 None None None None None \n", + "2 None None None None None \n", + "3 None None None None None \n", + "4 None None None None None \n", + "\n", + " report_dataset ship_name lat lon \n", + "0 public-global-presence:v4.0 None -24.0 -78.5 \n", + "1 public-global-presence:v4.0 None -25.6 -79.4 \n", + "2 public-global-presence:v4.0 None -27.0 -78.0 \n", + "3 public-global-presence:v4.0 None -28.6 -79.9 \n", + "4 public-global-presence:v4.0 None -29.3 -81.6 " ] }, - "execution_count": 64, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "report_df.head()" + "custom_report_df.head()" ] } ], diff --git a/notebooks/usage-guides/bulk-downloads-api.ipynb b/notebooks/usage-guides/bulk-downloads-api.ipynb index 10461bc..a9ceef9 100644 --- a/notebooks/usage-guides/bulk-downloads-api.ipynb +++ b/notebooks/usage-guides/bulk-downloads-api.ipynb @@ -121,6 +121,8 @@ "import os\n", "import time\n", "\n", + "import geopandas as gpd\n", + "\n", "import gfwapiclient as gfw" ] }, @@ -162,7 +164,7 @@ "id": "1528b997-f892-46d3-976f-4b81794b1f3a", "metadata": {}, "source": [ - "## Create a Bulk Report (`create_bulk_report`)" + "## Create a Bulk Report from Predefined Region (`create_bulk_report`)" ] }, { @@ -173,30 +175,35 @@ "The `create_bulk_report()` method allows you **create** a bulk report based on specified filters and spatial parameters. The `name` parameter is mandatory. Please [learn more about create a bulk report here](https://globalfishingwatch.org/our-apis/documentation#create-a-bulk-report) and [check its data caveats here](https://globalfishingwatch.org/our-apis/documentation#data-caveat) and [here](https://globalfishingwatch.org/our-apis/documentation#sar-fixed-infrastructure-data-caveats)." ] }, + { + "cell_type": "markdown", + "id": "de836e6d-b8e9-4b2f-8579-faecbf4164b3", + "metadata": {}, + "source": [ + "**Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**)." + ] + }, { "cell_type": "code", "execution_count": 5, - "id": "a0496b62-ca37-4d36-adb8-42ab8b69690e", + "id": "41033ef2-9d29-4709-a24a-dded78e43ca1", "metadata": {}, "outputs": [], "source": [ - "timestamp = int(time.time() * 1000)\n", - "dataset = \"public-fixed-infrastructure-data:latest\"\n", - "region_dataset = \"public-eez-areas\"\n", - "region_id = \"8466\" # Argentinian Exclusive Economic Zone\n", - "name = f\"{dataset.split(':')[0]}_{region_dataset}__{region_id}_{timestamp}\"" + "eez_rois_result = await gfw_client.references.get_eez_regions(iso3=\"ARG\")\n", + "arg_eez_roi = eez_rois_result.data()[0]" ] }, { "cell_type": "code", "execution_count": 6, - "id": "aebdbd17-f0da-4f76-929d-ad26a53d5e1b", + "id": "bb0da781-be12-4cc6-8016-ee933194aceb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'public-fixed-infrastructure-data_public-eez-areas__8466_1768085547174'" + "('8466', 'public-eez-areas', 'Argentinian Exclusive Economic Zone', 'ARG')" ] }, "execution_count": 6, @@ -205,24 +212,60 @@ } ], "source": [ - "name" + "arg_eez_roi.id, arg_eez_roi.dataset, arg_eez_roi.label, arg_eez_roi.iso3" ] }, { "cell_type": "code", "execution_count": 7, + "id": "aebdbd17-f0da-4f76-929d-ad26a53d5e1b", + "metadata": {}, + "outputs": [], + "source": [ + "timestamp = int(time.time() * 1000)\n", + "dataset = \"public-fixed-infrastructure-data:latest\"\n", + "name = f\"{dataset.split(':')[0]}-python-package-example-{timestamp}-predefined_region\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f4e71ffa-274c-470e-b2f3-ea404035609f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'public-fixed-infrastructure-data-python-package-example-1782384022398-predefined_region'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "name" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "id": "802695e0-d728-4194-967d-7a6eba3a5916", "metadata": {}, "outputs": [], "source": [ - "create_bulk_report_result = await gfw_client.bulk_downloads.create_bulk_report(\n", - " name=name,\n", - " dataset=dataset,\n", - " region={\n", - " \"dataset\": region_dataset,\n", - " \"id\": region_id,\n", - " },\n", - " filters=[\"label = 'oil'\", \"label_confidence = 'high'\"],\n", + "create_predefined_bulk_report_result = (\n", + " await gfw_client.bulk_downloads.create_bulk_report(\n", + " name=name,\n", + " dataset=dataset,\n", + " region=arg_eez_roi,\n", + " filters=[\n", + " \"label = 'oil'\",\n", + " \"label_confidence = 'high'\",\n", + " \"structure_start_date between '2020-01-01' and '2025-01-01'\",\n", + " ],\n", + " )\n", ")" ] }, @@ -236,40 +279,40 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "fb381ffe-1441-4543-866d-f47ea4675f40", "metadata": {}, "outputs": [], "source": [ - "create_bulk_report_data = create_bulk_report_result.data()" + "create_predefined_bulk_report_data = create_predefined_bulk_report_result.data()" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "d2dc3e1e-c332-4d91-9209-a0680a67d31c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "('c5e32895-4374-41d2-8b2e-ac414ed6757f',\n", - " 'public-fixed-infrastructure-data_public-eez-areas__8466_1768085547174',\n", + "('ea21f550-780b-4fa6-aa8e-158f85289492',\n", + " 'public-fixed-infrastructure-data-python-package-example-1782384022398-predefined_region',\n", " 'pending',\n", - " datetime.datetime(2026, 1, 10, 22, 52, 30, 9000, tzinfo=TzInfo(0)))" + " datetime.datetime(2026, 6, 25, 10, 40, 25, 113000, tzinfo=TzInfo(0)))" ] }, - "execution_count": 9, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(\n", - " create_bulk_report_data.id,\n", - " create_bulk_report_data.name,\n", - " create_bulk_report_data.status,\n", - " create_bulk_report_data.created_at,\n", + " create_predefined_bulk_report_data.id,\n", + " create_predefined_bulk_report_data.name,\n", + " create_predefined_bulk_report_data.status,\n", + " create_predefined_bulk_report_data.created_at,\n", ")" ] }, @@ -283,17 +326,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "id": "f68b5906-3811-47f1-ad3d-a4a3ab14b374", "metadata": {}, "outputs": [], "source": [ - "create_bulk_report_df = create_bulk_report_result.df()" + "create_predefined_bulk_report_df = create_predefined_bulk_report_result.df()" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "6744d8c2-a550-4635-8aed-4a58c0f2d64d", "metadata": {}, "outputs": [ @@ -301,35 +344,36 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 1 entries, 0 to 0\n", - "Data columns (total 12 columns):\n", + "Data columns (total 13 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 id 1 non-null object \n", - " 1 name 1 non-null object \n", - " 2 file_path 1 non-null object \n", - " 3 format 1 non-null object \n", - " 4 filters 1 non-null object \n", - " 5 geom 1 non-null object \n", - " 6 status 1 non-null object \n", - " 7 owner_id 1 non-null int64 \n", - " 8 owner_type 1 non-null object \n", - " 9 created_at 1 non-null datetime64[ns, UTC]\n", - " 10 updated_at 1 non-null datetime64[ns, UTC]\n", - " 11 file_size 0 non-null object \n", - "dtypes: datetime64[ns, UTC](2), int64(1), object(9)\n", - "memory usage: 228.0+ bytes\n" + " 0 id 1 non-null str \n", + " 1 dataset 1 non-null str \n", + " 2 name 1 non-null str \n", + " 3 file_path 1 non-null str \n", + " 4 format 1 non-null str \n", + " 5 filters 1 non-null object \n", + " 6 geom 1 non-null object \n", + " 7 status 1 non-null str \n", + " 8 owner_id 1 non-null int64 \n", + " 9 owner_type 1 non-null str \n", + " 10 created_at 1 non-null datetime64[us, UTC]\n", + " 11 updated_at 1 non-null datetime64[us, UTC]\n", + " 12 file_size 0 non-null object \n", + "dtypes: datetime64[us, UTC](2), int64(1), object(3), str(7)\n", + "memory usage: 236.0+ bytes\n" ] } ], "source": [ - "create_bulk_report_df.info()" + "create_predefined_bulk_report_df.info()" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "id": "d152ac16-99f6-4675-8f6e-a7f92302e4c2", "metadata": {}, "outputs": [ @@ -355,6 +399,7 @@ " \n", " \n", " id\n", + " dataset\n", " name\n", " file_path\n", " format\n", @@ -371,17 +416,18 @@ " \n", " \n", " 0\n", - " c5e32895-4374-41d2-8b2e-ac414ed6757f\n", - " public-fixed-infrastructure-data_public-eez-ar...\n", + " ea21f550-780b-4fa6-aa8e-158f85289492\n", + " public-fixed-infrastructure-data:v1.1\n", + " public-fixed-infrastructure-data-python-packag...\n", " sar_fixed_infrastructure_202409.csv\n", " JSON\n", - " [label = 'oil', label_confidence = 'high']\n", + " [label = 'oil', label_confidence = 'high', str...\n", " {'type': 'dataset', 'dataset': 'public-eez-are...\n", " pending\n", " 39\n", " application\n", - " 2026-01-10 22:52:30.009000+00:00\n", - " 2026-01-10 22:52:30.009000+00:00\n", + " 2026-06-25 10:40:25.113000+00:00\n", + " 2026-06-25 10:40:25.113000+00:00\n", " None\n", " \n", " \n", @@ -390,34 +436,333 @@ ], "text/plain": [ " id \\\n", - "0 c5e32895-4374-41d2-8b2e-ac414ed6757f \n", + "0 ea21f550-780b-4fa6-aa8e-158f85289492 \n", + "\n", + " dataset \\\n", + "0 public-fixed-infrastructure-data:v1.1 \n", "\n", " name \\\n", - "0 public-fixed-infrastructure-data_public-eez-ar... \n", + "0 public-fixed-infrastructure-data-python-packag... \n", "\n", " file_path format \\\n", "0 sar_fixed_infrastructure_202409.csv JSON \n", "\n", - " filters \\\n", - "0 [label = 'oil', label_confidence = 'high'] \n", + " filters \\\n", + "0 [label = 'oil', label_confidence = 'high', str... \n", "\n", " geom status owner_id \\\n", "0 {'type': 'dataset', 'dataset': 'public-eez-are... pending 39 \n", "\n", " owner_type created_at \\\n", - "0 application 2026-01-10 22:52:30.009000+00:00 \n", + "0 application 2026-06-25 10:40:25.113000+00:00 \n", + "\n", + " updated_at file_size \n", + "0 2026-06-25 10:40:25.113000+00:00 None " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "create_predefined_bulk_report_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "f7903c11-ebed-4a21-b6e3-6470f501a507", + "metadata": {}, + "source": [ + "## Create a Bulk Report from Custom Region (`create_bulk_report`)" + ] + }, + { + "cell_type": "markdown", + "id": "71edd1ee-c802-4d2a-8bce-ac226382a3dc", + "metadata": {}, + "source": [ + "The `create_bulk_report()` method allows you **create** a bulk report based on specified filters and spatial parameters. The `name` parameter is mandatory. Please [learn more about create a bulk report here](https://globalfishingwatch.org/our-apis/documentation#create-a-bulk-report) and [check its data caveats here](https://globalfishingwatch.org/our-apis/documentation#data-caveat) and [here](https://globalfishingwatch.org/our-apis/documentation#sar-fixed-infrastructure-data-caveats)." + ] + }, + { + "cell_type": "markdown", + "id": "669b36eb-604c-4a01-9dcb-c53b78c1bea3", + "metadata": {}, + "source": [ + "**Note:** Custom region can either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. Spatial files are loaded using [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) and supported formats depend on a properly configured [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip)." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "428561cc-06f5-44fc-8177-859cfbdfe503", + "metadata": {}, + "outputs": [], + "source": [ + "filename = \"https://raw.githubusercontent.com/GlobalFishingWatch/gfw-api-python-client/refs/heads/develop/tests/fixtures/bulk_downloads/geojson/geojson.shp\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "11ff5032-15fd-4079-9122-9398eaaf921b", + "metadata": {}, + "outputs": [], + "source": [ + "custom_roi_gdf = gpd.read_file(filename)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "9918b6c5-b641-4b3e-b9f3-4109de9f6785", + "metadata": {}, + "outputs": [], + "source": [ + "timestamp = int(time.time() * 1000)\n", + "dataset = \"public-fixed-infrastructure-data:latest\"\n", + "name = f\"{dataset.split(':')[0]}-python-package-example-{timestamp}-custom_region\"" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "72b71e64-7976-463b-9cb0-17b48a934c14", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'public-fixed-infrastructure-data-python-package-example-1782384029540-custom_region'" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "name" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "d551a9a5-e22c-4bbf-a804-ff1cc1fd8f3d", + "metadata": {}, + "outputs": [], + "source": [ + "create_custom_bulk_report_result = await gfw_client.bulk_downloads.create_bulk_report(\n", + " name=name,\n", + " dataset=dataset,\n", + " geojson=custom_roi_gdf,\n", + " filters=[\n", + " \"label = 'oil'\",\n", + " \"label_confidence = 'high'\",\n", + " \"structure_start_date between '2020-01-01' and '2025-01-01'\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d8c2d60b-e9fd-4af7-9e23-40f9958f17d7", + "metadata": {}, + "source": [ + "### Access Create a Bulk Report Result as Pydantic models" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "0e1fd9c1-96f1-4158-a88f-84821b5b707e", + "metadata": {}, + "outputs": [], + "source": [ + "create_custom_bulk_report_data = create_custom_bulk_report_result.data()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "294323b4-7369-420a-8dbf-7b5945597cea", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('f0a39c14-1756-4f75-9150-0ada6b29eadf',\n", + " 'public-fixed-infrastructure-data-python-package-example-1782384029540-custom_region',\n", + " 'pending',\n", + " datetime.datetime(2026, 6, 25, 10, 40, 30, 740000, tzinfo=TzInfo(0)))" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " create_custom_bulk_report_data.id,\n", + " create_custom_bulk_report_data.name,\n", + " create_custom_bulk_report_data.status,\n", + " create_custom_bulk_report_data.created_at,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "20b91174-8d15-45e3-b3f3-00a85cbb6916", + "metadata": {}, + "source": [ + "### Access Create a Bulk Report Result as a DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "67cd65b6-0fc7-4c22-af09-48874ef5bb7d", + "metadata": {}, + "outputs": [], + "source": [ + "create_custom_bulk_report_df = create_custom_bulk_report_result.df()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "15d740b3-7774-45f4-ad94-d31344dc3212", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1 entries, 0 to 0\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 1 non-null str \n", + " 1 dataset 1 non-null str \n", + " 2 name 1 non-null str \n", + " 3 file_path 1 non-null str \n", + " 4 format 1 non-null str \n", + " 5 filters 1 non-null object \n", + " 6 geom 1 non-null object \n", + " 7 status 1 non-null str \n", + " 8 owner_id 1 non-null int64 \n", + " 9 owner_type 1 non-null str \n", + " 10 created_at 1 non-null datetime64[us, UTC]\n", + " 11 updated_at 1 non-null datetime64[us, UTC]\n", + " 12 file_size 0 non-null object \n", + "dtypes: datetime64[us, UTC](2), int64(1), object(3), str(7)\n", + "memory usage: 236.0+ bytes\n" + ] + } + ], + "source": [ + "create_custom_bulk_report_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "311e9272-d561-4354-8220-ff728400677a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddatasetnamefile_pathformatfiltersgeomstatusowner_idowner_typecreated_atupdated_atfile_size
0f0a39c14-1756-4f75-9150-0ada6b29eadfpublic-fixed-infrastructure-data:v1.1public-fixed-infrastructure-data-python-packag...sar_fixed_infrastructure_202409.csvJSON[label = 'oil', label_confidence = 'high', str...{'type': 'custom', 'dataset': None, 'id': None}pending39application2026-06-25 10:40:30.740000+00:002026-06-25 10:40:30.740000+00:00None
\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 f0a39c14-1756-4f75-9150-0ada6b29eadf \n", + "\n", + " dataset \\\n", + "0 public-fixed-infrastructure-data:v1.1 \n", + "\n", + " name \\\n", + "0 public-fixed-infrastructure-data-python-packag... \n", + "\n", + " file_path format \\\n", + "0 sar_fixed_infrastructure_202409.csv JSON \n", + "\n", + " filters \\\n", + "0 [label = 'oil', label_confidence = 'high', str... \n", + "\n", + " geom status owner_id \\\n", + "0 {'type': 'custom', 'dataset': None, 'id': None} pending 39 \n", + "\n", + " owner_type created_at \\\n", + "0 application 2026-06-25 10:40:30.740000+00:00 \n", "\n", " updated_at file_size \n", - "0 2026-01-10 22:52:30.009000+00:00 None " + "0 2026-06-25 10:40:30.740000+00:00 None " ] }, - "execution_count": 12, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "create_bulk_report_df.head()" + "create_custom_bulk_report_df.head()" ] }, { @@ -446,13 +791,13 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 25, "id": "1a05b7ff-26fb-40ad-b578-74e7647eb687", "metadata": {}, "outputs": [], "source": [ "bulk_report_result = await gfw_client.bulk_downloads.get_bulk_report_by_id(\n", - " id=create_bulk_report_data.id\n", + " id=create_predefined_bulk_report_data.id\n", ")" ] }, @@ -466,7 +811,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 26, "id": "2f064920-a66c-48d8-9f20-2fb36b7dc4bc", "metadata": {}, "outputs": [], @@ -476,30 +821,30 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 27, "id": "a966e319-3214-410e-b063-e0f51f68d04c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "('c5e32895-4374-41d2-8b2e-ac414ed6757f',\n", - " 'public-fixed-infrastructure-data_public-eez-areas__8466_1768085547174',\n", - " 'pending',\n", - " datetime.datetime(2026, 1, 10, 22, 52, 30, 9000, tzinfo=TzInfo(0)))" + "('ea21f550-780b-4fa6-aa8e-158f85289492',\n", + " 'public-fixed-infrastructure-data-python-package-example-1782384022398-predefined_region',\n", + " 'done',\n", + " datetime.datetime(2026, 6, 25, 10, 40, 25, 113000, tzinfo=TzInfo(0)))" ] }, - "execution_count": 15, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(\n", - " create_bulk_report_data.id,\n", - " create_bulk_report_data.name,\n", - " create_bulk_report_data.status,\n", - " create_bulk_report_data.created_at,\n", + " bulk_report_data.id,\n", + " bulk_report_data.name,\n", + " bulk_report_data.status,\n", + " bulk_report_data.created_at,\n", ")" ] }, @@ -513,7 +858,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 28, "id": "7cf258fa-e0d0-45c3-9f39-06f547f61529", "metadata": {}, "outputs": [], @@ -523,7 +868,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 29, "id": "57381d93-fdf1-4fd3-aec2-71bbdf877bc8", "metadata": {}, "outputs": [ @@ -531,25 +876,26 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 1 entries, 0 to 0\n", - "Data columns (total 12 columns):\n", + "Data columns (total 13 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 id 1 non-null object \n", - " 1 name 1 non-null object \n", - " 2 file_path 1 non-null object \n", - " 3 format 1 non-null object \n", - " 4 filters 1 non-null object \n", - " 5 geom 1 non-null object \n", - " 6 status 1 non-null object \n", - " 7 owner_id 1 non-null int64 \n", - " 8 owner_type 1 non-null object \n", - " 9 created_at 1 non-null datetime64[ns, UTC]\n", - " 10 updated_at 1 non-null datetime64[ns, UTC]\n", - " 11 file_size 0 non-null object \n", - "dtypes: datetime64[ns, UTC](2), int64(1), object(9)\n", - "memory usage: 228.0+ bytes\n" + " 0 id 1 non-null str \n", + " 1 dataset 1 non-null str \n", + " 2 name 1 non-null str \n", + " 3 file_path 1 non-null str \n", + " 4 format 1 non-null str \n", + " 5 filters 1 non-null object \n", + " 6 geom 1 non-null object \n", + " 7 status 1 non-null str \n", + " 8 owner_id 1 non-null int64 \n", + " 9 owner_type 1 non-null str \n", + " 10 created_at 1 non-null datetime64[us, UTC]\n", + " 11 updated_at 1 non-null datetime64[us, UTC]\n", + " 12 file_size 1 non-null float64 \n", + "dtypes: datetime64[us, UTC](2), float64(1), int64(1), object(2), str(7)\n", + "memory usage: 236.0+ bytes\n" ] } ], @@ -559,7 +905,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 30, "id": "89f76538-0bc2-4258-bb27-410cd9cc5b85", "metadata": {}, "outputs": [ @@ -585,6 +931,7 @@ " \n", " \n", " id\n", + " dataset\n", " name\n", " file_path\n", " format\n", @@ -601,18 +948,19 @@ " \n", " \n", " 0\n", - " c5e32895-4374-41d2-8b2e-ac414ed6757f\n", - " public-fixed-infrastructure-data_public-eez-ar...\n", + " ea21f550-780b-4fa6-aa8e-158f85289492\n", + " public-fixed-infrastructure-data:v1.1\n", + " public-fixed-infrastructure-data-python-packag...\n", " sar_fixed_infrastructure_202409.csv\n", " JSON\n", - " [label = 'oil', label_confidence = 'high']\n", + " [label = 'oil', label_confidence = 'high', str...\n", " {'type': 'dataset', 'dataset': 'public-eez-are...\n", - " pending\n", + " done\n", " 39\n", " application\n", - " 2026-01-10 22:52:30.009000+00:00\n", - " 2026-01-10 22:52:30.009000+00:00\n", - " None\n", + " 2026-06-25 10:40:25.113000+00:00\n", + " 2026-06-25 10:40:25.113000+00:00\n", + " 907.0\n", " \n", " \n", "\n", @@ -620,28 +968,31 @@ ], "text/plain": [ " id \\\n", - "0 c5e32895-4374-41d2-8b2e-ac414ed6757f \n", + "0 ea21f550-780b-4fa6-aa8e-158f85289492 \n", + "\n", + " dataset \\\n", + "0 public-fixed-infrastructure-data:v1.1 \n", "\n", " name \\\n", - "0 public-fixed-infrastructure-data_public-eez-ar... \n", + "0 public-fixed-infrastructure-data-python-packag... \n", "\n", " file_path format \\\n", "0 sar_fixed_infrastructure_202409.csv JSON \n", "\n", - " filters \\\n", - "0 [label = 'oil', label_confidence = 'high'] \n", + " filters \\\n", + "0 [label = 'oil', label_confidence = 'high', str... \n", "\n", - " geom status owner_id \\\n", - "0 {'type': 'dataset', 'dataset': 'public-eez-are... pending 39 \n", + " geom status owner_id \\\n", + "0 {'type': 'dataset', 'dataset': 'public-eez-are... done 39 \n", "\n", " owner_type created_at \\\n", - "0 application 2026-01-10 22:52:30.009000+00:00 \n", + "0 application 2026-06-25 10:40:25.113000+00:00 \n", "\n", - " updated_at file_size \n", - "0 2026-01-10 22:52:30.009000+00:00 None " + " updated_at file_size \n", + "0 2026-06-25 10:40:25.113000+00:00 907.0 " ] }, - "execution_count": 18, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -668,13 +1019,14 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 31, "id": "338223ff-9c23-47f6-bf33-2aaaefc35081", "metadata": {}, "outputs": [], "source": [ "bulk_reports_result = await gfw_client.bulk_downloads.get_all_bulk_reports(\n", " status=\"done\",\n", + " dataset=dataset,\n", ")" ] }, @@ -688,7 +1040,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 32, "id": "3bff3854-9b54-44c9-a7f8-3ef2bf1b7719", "metadata": {}, "outputs": [], @@ -698,7 +1050,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 33, "id": "21e8b553-20dd-43bb-92c3-0e7f5f6f29c3", "metadata": {}, "outputs": [], @@ -708,7 +1060,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 34, "id": "29bd8b84-1f22-4a11-9861-548237926012", "metadata": {}, "outputs": [ @@ -721,7 +1073,7 @@ " datetime.datetime(2025, 12, 7, 10, 3, 12, 371000, tzinfo=TzInfo(0)))" ] }, - "execution_count": 22, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -745,7 +1097,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 35, "id": "a778ef19-f68d-4e0b-8b37-539db79868cb", "metadata": {}, "outputs": [], @@ -755,7 +1107,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 36, "id": "271d446a-702d-42bb-93a9-e576741a3fdd", "metadata": {}, "outputs": [ @@ -763,25 +1115,26 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 7 entries, 0 to 6\n", - "Data columns (total 12 columns):\n", + "\n", + "RangeIndex: 28 entries, 0 to 27\n", + "Data columns (total 13 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 id 7 non-null object \n", - " 1 name 7 non-null object \n", - " 2 file_path 7 non-null object \n", - " 3 format 7 non-null object \n", - " 4 filters 7 non-null object \n", - " 5 geom 4 non-null object \n", - " 6 status 7 non-null object \n", - " 7 owner_id 7 non-null int64 \n", - " 8 owner_type 7 non-null object \n", - " 9 created_at 7 non-null datetime64[ns, UTC]\n", - " 10 updated_at 7 non-null datetime64[ns, UTC]\n", - " 11 file_size 7 non-null float64 \n", - "dtypes: datetime64[ns, UTC](2), float64(1), int64(1), object(8)\n", - "memory usage: 804.0+ bytes\n" + " 0 id 28 non-null str \n", + " 1 dataset 28 non-null str \n", + " 2 name 28 non-null str \n", + " 3 file_path 28 non-null str \n", + " 4 format 28 non-null str \n", + " 5 filters 28 non-null object \n", + " 6 geom 25 non-null object \n", + " 7 status 28 non-null str \n", + " 8 owner_id 28 non-null int64 \n", + " 9 owner_type 28 non-null str \n", + " 10 created_at 28 non-null datetime64[us, UTC]\n", + " 11 updated_at 28 non-null datetime64[us, UTC]\n", + " 12 file_size 28 non-null float64 \n", + "dtypes: datetime64[us, UTC](2), float64(1), int64(1), object(2), str(7)\n", + "memory usage: 3.0+ KB\n" ] } ], @@ -791,7 +1144,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 37, "id": "5145b645-c0fe-4780-b94f-261b534b7f04", "metadata": {}, "outputs": [ @@ -817,6 +1170,7 @@ " \n", " \n", " id\n", + " dataset\n", " name\n", " file_path\n", " format\n", @@ -833,78 +1187,83 @@ " \n", " \n", " 0\n", - " 705f2f9a-f695-43f1-a4bf-7746f3deb091\n", - " public-fixed-infrastructure-data_public-eez-ar...\n", + " ea21f550-780b-4fa6-aa8e-158f85289492\n", + " public-fixed-infrastructure-data:v1.1\n", + " public-fixed-infrastructure-data-python-packag...\n", " sar_fixed_infrastructure_202409.csv\n", " JSON\n", - " [label = 'oil', label_confidence = 'high']\n", + " [label = 'oil', label_confidence = 'high', str...\n", " {'type': 'dataset', 'dataset': 'public-eez-are...\n", " done\n", " 39\n", " application\n", - " 2026-01-10 22:47:37.506000+00:00\n", - " 2026-01-10 22:47:37.506000+00:00\n", - " 22224.0\n", + " 2026-06-25 10:40:25.113000+00:00\n", + " 2026-06-25 10:40:25.113000+00:00\n", + " 907.0\n", " \n", " \n", " 1\n", - " 89e29012-bd90-4718-a6ba-71c0e3ea6260\n", - " public-fixed-infrastructure-data_public-eez-ar...\n", + " 125894a9-1052-4e4a-aeea-c78da72f3b11\n", + " public-fixed-infrastructure-data:v1.1\n", + " public-fixed-infrastructure-data-python-packag...\n", " sar_fixed_infrastructure_202409.csv\n", " JSON\n", - " [label = 'oil', label_confidence = 'high']\n", - " {'type': 'dataset', 'dataset': 'public-eez-are...\n", + " [label = 'oil', label_confidence = 'high', str...\n", + " {'type': 'custom', 'dataset': None, 'id': None}\n", " done\n", " 39\n", " application\n", - " 2026-01-10 21:57:03.520000+00:00\n", - " 2026-01-10 21:57:03.520000+00:00\n", - " 22224.0\n", + " 2026-06-25 10:36:24.944000+00:00\n", + " 2026-06-25 10:36:24.944000+00:00\n", + " 25287.0\n", " \n", " \n", " 2\n", - " 9e8d0d20-a635-4d36-83f4-4179c3aad156\n", - " sar-fixed-infrastructure-data-20241207-all-oil...\n", + " e01ebf90-b3e7-4657-8890-1f633fb84829\n", + " public-fixed-infrastructure-data:v1.1\n", + " public-fixed-infrastructure-data-python-packag...\n", " sar_fixed_infrastructure_202409.csv\n", " JSON\n", - " [label = 'oil', label_confidence = 'high']\n", - " None\n", + " [label = 'oil', label_confidence = 'high', str...\n", + " {'type': 'dataset', 'dataset': 'public-eez-are...\n", " done\n", " 39\n", " application\n", - " 2025-12-07 14:09:17.712000+00:00\n", - " 2025-12-07 14:09:17.712000+00:00\n", - " 18223879.0\n", + " 2026-06-25 10:35:58.345000+00:00\n", + " 2026-06-25 10:35:58.345000+00:00\n", + " 907.0\n", " \n", " \n", " 3\n", - " 7a036b8f-21fc-4c53-a6cb-2d90caf1c2a5\n", - " sar-fixed-infrastructure-data-20241207-all-lab...\n", + " c44e3b52-9d54-4ebe-b9de-6867a9092a48\n", + " public-fixed-infrastructure-data:v1.1\n", + " public-fixed-infrastructure-data-python-packag...\n", " sar_fixed_infrastructure_202409.csv\n", " JSON\n", - " [label_confidence = 'high' or label_confidence...\n", - " None\n", + " [label = 'oil', label_confidence = 'high', str...\n", + " {'type': 'custom', 'dataset': None, 'id': None}\n", " done\n", " 39\n", " application\n", - " 2025-12-07 12:21:33.424000+00:00\n", - " 2025-12-07 12:21:33.424000+00:00\n", - " 122605450.0\n", + " 2026-06-25 10:26:37.104000+00:00\n", + " 2026-06-25 10:26:37.104000+00:00\n", + " 25.0\n", " \n", " \n", " 4\n", - " 568d2d08-82f8-4243-a120-103c6417eef2\n", - " sar-fixed-infrastructure-data-20241207-all-1\n", + " 1188b98f-3b10-428f-9c2e-e9a51e66279c\n", + " public-fixed-infrastructure-data:v1.1\n", + " public-fixed-infrastructure-data-python-packag...\n", " sar_fixed_infrastructure_202409.csv\n", " JSON\n", - " [label_confidence = 'high']\n", - " None\n", + " [label = 'oil', label_confidence = 'high', str...\n", + " {'type': 'dataset', 'dataset': 'public-eez-are...\n", " done\n", " 39\n", " application\n", - " 2025-12-07 10:12:40.192000+00:00\n", - " 2025-12-07 10:12:40.192000+00:00\n", - " 117135274.0\n", + " 2026-06-25 10:25:30.863000+00:00\n", + " 2026-06-25 10:25:30.863000+00:00\n", + " 298.0\n", " \n", " \n", "\n", @@ -912,18 +1271,25 @@ ], "text/plain": [ " id \\\n", - "0 705f2f9a-f695-43f1-a4bf-7746f3deb091 \n", - "1 89e29012-bd90-4718-a6ba-71c0e3ea6260 \n", - "2 9e8d0d20-a635-4d36-83f4-4179c3aad156 \n", - "3 7a036b8f-21fc-4c53-a6cb-2d90caf1c2a5 \n", - "4 568d2d08-82f8-4243-a120-103c6417eef2 \n", + "0 ea21f550-780b-4fa6-aa8e-158f85289492 \n", + "1 125894a9-1052-4e4a-aeea-c78da72f3b11 \n", + "2 e01ebf90-b3e7-4657-8890-1f633fb84829 \n", + "3 c44e3b52-9d54-4ebe-b9de-6867a9092a48 \n", + "4 1188b98f-3b10-428f-9c2e-e9a51e66279c \n", + "\n", + " dataset \\\n", + "0 public-fixed-infrastructure-data:v1.1 \n", + "1 public-fixed-infrastructure-data:v1.1 \n", + "2 public-fixed-infrastructure-data:v1.1 \n", + "3 public-fixed-infrastructure-data:v1.1 \n", + "4 public-fixed-infrastructure-data:v1.1 \n", "\n", " name \\\n", - "0 public-fixed-infrastructure-data_public-eez-ar... \n", - "1 public-fixed-infrastructure-data_public-eez-ar... \n", - "2 sar-fixed-infrastructure-data-20241207-all-oil... \n", - "3 sar-fixed-infrastructure-data-20241207-all-lab... \n", - "4 sar-fixed-infrastructure-data-20241207-all-1 \n", + "0 public-fixed-infrastructure-data-python-packag... \n", + "1 public-fixed-infrastructure-data-python-packag... \n", + "2 public-fixed-infrastructure-data-python-packag... \n", + "3 public-fixed-infrastructure-data-python-packag... \n", + "4 public-fixed-infrastructure-data-python-packag... \n", "\n", " file_path format \\\n", "0 sar_fixed_infrastructure_202409.csv JSON \n", @@ -933,35 +1299,35 @@ "4 sar_fixed_infrastructure_202409.csv JSON \n", "\n", " filters \\\n", - "0 [label = 'oil', label_confidence = 'high'] \n", - "1 [label = 'oil', label_confidence = 'high'] \n", - "2 [label = 'oil', label_confidence = 'high'] \n", - "3 [label_confidence = 'high' or label_confidence... \n", - "4 [label_confidence = 'high'] \n", + "0 [label = 'oil', label_confidence = 'high', str... \n", + "1 [label = 'oil', label_confidence = 'high', str... \n", + "2 [label = 'oil', label_confidence = 'high', str... \n", + "3 [label = 'oil', label_confidence = 'high', str... \n", + "4 [label = 'oil', label_confidence = 'high', str... \n", "\n", " geom status owner_id \\\n", "0 {'type': 'dataset', 'dataset': 'public-eez-are... done 39 \n", - "1 {'type': 'dataset', 'dataset': 'public-eez-are... done 39 \n", - "2 None done 39 \n", - "3 None done 39 \n", - "4 None done 39 \n", + "1 {'type': 'custom', 'dataset': None, 'id': None} done 39 \n", + "2 {'type': 'dataset', 'dataset': 'public-eez-are... done 39 \n", + "3 {'type': 'custom', 'dataset': None, 'id': None} done 39 \n", + "4 {'type': 'dataset', 'dataset': 'public-eez-are... done 39 \n", "\n", " owner_type created_at \\\n", - "0 application 2026-01-10 22:47:37.506000+00:00 \n", - "1 application 2026-01-10 21:57:03.520000+00:00 \n", - "2 application 2025-12-07 14:09:17.712000+00:00 \n", - "3 application 2025-12-07 12:21:33.424000+00:00 \n", - "4 application 2025-12-07 10:12:40.192000+00:00 \n", + "0 application 2026-06-25 10:40:25.113000+00:00 \n", + "1 application 2026-06-25 10:36:24.944000+00:00 \n", + "2 application 2026-06-25 10:35:58.345000+00:00 \n", + "3 application 2026-06-25 10:26:37.104000+00:00 \n", + "4 application 2026-06-25 10:25:30.863000+00:00 \n", "\n", - " updated_at file_size \n", - "0 2026-01-10 22:47:37.506000+00:00 22224.0 \n", - "1 2026-01-10 21:57:03.520000+00:00 22224.0 \n", - "2 2025-12-07 14:09:17.712000+00:00 18223879.0 \n", - "3 2025-12-07 12:21:33.424000+00:00 122605450.0 \n", - "4 2025-12-07 10:12:40.192000+00:00 117135274.0 " + " updated_at file_size \n", + "0 2026-06-25 10:40:25.113000+00:00 907.0 \n", + "1 2026-06-25 10:36:24.944000+00:00 25287.0 \n", + "2 2026-06-25 10:35:58.345000+00:00 907.0 \n", + "3 2026-06-25 10:26:37.104000+00:00 25.0 \n", + "4 2026-06-25 10:25:30.863000+00:00 298.0 " ] }, - "execution_count": 25, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -988,7 +1354,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 38, "id": "70c51654-a7b9-46a9-991b-c9c97dcfe41e", "metadata": {}, "outputs": [], @@ -1010,7 +1376,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 39, "id": "5330ef22-3ef7-4482-ae34-f1e9f6f48ccf", "metadata": {}, "outputs": [], @@ -1020,17 +1386,17 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 40, "id": "4e3d4575-4c59-4c8e-b15e-2206bdb995ec", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'https://storage.googleapis.com/gfw-api-bulk-pro-us-central1/705f2f9a-f695-43f1-a4bf-7746f3deb091/data.json.gz?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=api-bulk-pro%40gfw-production.iam.gserviceaccount.com%2F20260110%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260110T225232Z&X-Goog-Expires=60&X-Goog-SignedHeaders=host&X-Goog-Signature=481a4ff7244b7286f303b37bb7941c291a26d1e3502debdb7611b8cb2d5edf37bc7aa0287b15a11c2f69f72e88791da3f76873a2fd7d08f911691c35ee8e095b825615510de8256f8cd275211997141e026837e118d86e01c026c457dc1f47d43ff2cb07131c3d21e7908c847bf1e3d87cd4773f02e8e4512a7c15e93799de186b9ea004be50cd3e53292f01e9393595a81c42cc3686f65d280f4f16076759da4722c17c2a6a698393c919cdd083402421a1bbf425b618244b3a9b30e48b770a9dc7f9eed8e63af04f8e31f0b6723fdf76fa7262ded89e7a375fbaea3b031bf29db22b1961878facd79c92d633ab6aa2309c0ce3982104d9835058ecd829bee8'" + "'https://storage.googleapis.com/gfw-api-bulk-pro-us-central1/ea21f550-780b-4fa6-aa8e-158f85289492/data.json.gz?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=api-bulk-pro%40gfw-production.iam.gserviceaccount.com%2F20260625%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260625T104032Z&X-Goog-Expires=60&X-Goog-SignedHeaders=host&X-Goog-Signature=6b9b7940e26598c8dab6cf969889d4235ceea4e59ea5412f18d40165d81891b667f9bf339ba41486d856820b0173ce430a4cd64a26b4977cb2e5856c975228e3927710078bc1b374896fc02061ab4d5ab44760299164ca4558c632200706518498507dfd6cc54bbae34bb707f3b6f91ffa1eb0d74d0fc8c0f6e34ea0a13669bb368a3c77550d143569f0722e91dd818a6d9be7fcec57036eaed7aa04cec7b1bc354308fecc623369e644839577aefae587347c395891679a1bdd3f3392de2b4edfa6e1cc6b0894da7b3f2a2ba7df5271e92801eb2479493299b0e9cba5b9b9ac604b8f18a4fb3d6f62cd6bfe56450e83fd23e8221c1afbf516abfe63081e76d9'" ] }, - "execution_count": 28, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -1049,7 +1415,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 41, "id": "7750da1f-e4cd-4fcd-9fe2-472e2d4e98b9", "metadata": {}, "outputs": [], @@ -1059,7 +1425,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 42, "id": "ac6aada1-e0a2-4a31-9421-ce88c55aebb4", "metadata": {}, "outputs": [ @@ -1067,14 +1433,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 1 entries, 0 to 0\n", "Data columns (total 1 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 url 1 non-null object\n", - "dtypes: object(1)\n", - "memory usage: 140.0+ bytes\n" + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 url 1 non-null str \n", + "dtypes: str(1)\n", + "memory usage: 140.0 bytes\n" ] } ], @@ -1084,17 +1450,17 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 43, "id": "b6fe6c39-3e0a-43c2-8279-8fa1eebb18e6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'https://storage.googleapis.com/gfw-api-bulk-pro-us-central1/705f2f9a-f695-43f1-a4bf-7746f3deb091/data.json.gz?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=api-bulk-pro%40gfw-production.iam.gserviceaccount.com%2F20260110%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260110T225232Z&X-Goog-Expires=60&X-Goog-SignedHeaders=host&X-Goog-Signature=481a4ff7244b7286f303b37bb7941c291a26d1e3502debdb7611b8cb2d5edf37bc7aa0287b15a11c2f69f72e88791da3f76873a2fd7d08f911691c35ee8e095b825615510de8256f8cd275211997141e026837e118d86e01c026c457dc1f47d43ff2cb07131c3d21e7908c847bf1e3d87cd4773f02e8e4512a7c15e93799de186b9ea004be50cd3e53292f01e9393595a81c42cc3686f65d280f4f16076759da4722c17c2a6a698393c919cdd083402421a1bbf425b618244b3a9b30e48b770a9dc7f9eed8e63af04f8e31f0b6723fdf76fa7262ded89e7a375fbaea3b031bf29db22b1961878facd79c92d633ab6aa2309c0ce3982104d9835058ecd829bee8'" + "'https://storage.googleapis.com/gfw-api-bulk-pro-us-central1/ea21f550-780b-4fa6-aa8e-158f85289492/data.json.gz?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=api-bulk-pro%40gfw-production.iam.gserviceaccount.com%2F20260625%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260625T104032Z&X-Goog-Expires=60&X-Goog-SignedHeaders=host&X-Goog-Signature=6b9b7940e26598c8dab6cf969889d4235ceea4e59ea5412f18d40165d81891b667f9bf339ba41486d856820b0173ce430a4cd64a26b4977cb2e5856c975228e3927710078bc1b374896fc02061ab4d5ab44760299164ca4558c632200706518498507dfd6cc54bbae34bb707f3b6f91ffa1eb0d74d0fc8c0f6e34ea0a13669bb368a3c77550d143569f0722e91dd818a6d9be7fcec57036eaed7aa04cec7b1bc354308fecc623369e644839577aefae587347c395891679a1bdd3f3392de2b4edfa6e1cc6b0894da7b3f2a2ba7df5271e92801eb2479493299b0e9cba5b9b9ac604b8f18a4fb3d6f62cd6bfe56450e83fd23e8221c1afbf516abfe63081e76d9'" ] }, - "execution_count": 31, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -1121,14 +1487,15 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 44, "id": "cd2f2b83-c2c6-4729-b229-1564ff9b4bbe", "metadata": {}, "outputs": [], "source": [ "bulk_fixed_infrastructure_data_report_result = (\n", " await gfw_client.bulk_downloads.query_bulk_fixed_infrastructure_data_report(\n", - " id=bulk_reports_data[0].id\n", + " id=bulk_reports_data[0].id,\n", + " sort=\"-structure_start_date\",\n", " )\n", ")" ] @@ -1143,7 +1510,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 45, "id": "fc0e5713-8747-4afe-b7de-6cd5de4527af", "metadata": {}, "outputs": [], @@ -1155,7 +1522,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 46, "id": "d45b9472-9051-4c15-9085-a4d904baef67", "metadata": {}, "outputs": [], @@ -1167,17 +1534,17 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 47, "id": "032db92c-3a8d-4f73-a5cb-b567abe5de03", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "('1051638', -53.0895574340617, -67.32289149541135, 'oil', 'high')" + "('960880', -50.9024067539338, -69.09472856180871, 'oil', 'high')" ] }, - "execution_count": 35, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } @@ -1202,7 +1569,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 48, "id": "b38b2a5b-bfd7-4308-a8fa-1efb609d70c8", "metadata": {}, "outputs": [], @@ -1214,7 +1581,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 49, "id": "a10e881e-928c-41aa-b47e-c33e9fdfb95d", "metadata": {}, "outputs": [ @@ -1222,22 +1589,22 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 1238 entries, 0 to 1237\n", + "\n", + "RangeIndex: 31 entries, 0 to 30\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 detection_id 1237 non-null object \n", - " 1 detection_date 1238 non-null datetime64[ns]\n", - " 2 structure_id 1238 non-null object \n", - " 3 lat 1238 non-null float64 \n", - " 4 lon 1238 non-null float64 \n", - " 5 structure_start_date 1238 non-null datetime64[ns]\n", - " 6 structure_end_date 7 non-null datetime64[ns]\n", - " 7 label 1238 non-null object \n", - " 8 label_confidence 1238 non-null object \n", - "dtypes: datetime64[ns](3), float64(2), object(4)\n", - "memory usage: 87.2+ KB\n" + " 0 detection_id 30 non-null str \n", + " 1 detection_date 31 non-null datetime64[us]\n", + " 2 structure_id 31 non-null str \n", + " 3 lat 31 non-null float64 \n", + " 4 lon 31 non-null float64 \n", + " 5 structure_start_date 31 non-null datetime64[us]\n", + " 6 structure_end_date 7 non-null datetime64[us]\n", + " 7 label 31 non-null str \n", + " 8 label_confidence 31 non-null str \n", + "dtypes: datetime64[us](3), float64(2), str(4)\n", + "memory usage: 2.3 KB\n" ] } ], @@ -1247,7 +1614,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 50, "id": "2c0bb2d5-c119-4a74-800b-aef1efbc5df4", "metadata": {}, "outputs": [ @@ -1286,60 +1653,60 @@ " \n", " \n", " 0\n", - " S1AB_AD_MEDIAN_COMP_20170902T000000_20180301T0...\n", - " 2017-12-01\n", - " 313068\n", - " -52.612197\n", - " -68.449292\n", - " 2017-01-01\n", + " S1AB_AD_MEDIAN_COMP_20240601T000000_20241201T0...\n", + " 2024-09-01\n", + " 1051638\n", + " -53.089557\n", + " -67.322891\n", + " 2024-02-01\n", " NaT\n", " oil\n", " high\n", " \n", " \n", " 1\n", - " S1AB_AD_MEDIAN_COMP_20231101T000000_20240501T0...\n", + " S1AB_AD_MEDIAN_COMP_20240401T000000_20241001T0...\n", + " 2024-07-01\n", + " 1051638\n", + " -53.089557\n", + " -67.322891\n", " 2024-02-01\n", - " 313068\n", - " -52.612197\n", - " -68.449292\n", - " 2017-01-01\n", " NaT\n", " oil\n", " high\n", " \n", " \n", " 2\n", - " S1AB_AD_MEDIAN_COMP_20230501T000000_20231101T0...\n", - " 2023-08-01\n", - " 313068\n", - " -52.612197\n", - " -68.449292\n", - " 2017-01-01\n", + " S1AB_AD_MEDIAN_COMP_20240301T000000_20240901T0...\n", + " 2024-06-01\n", + " 1051638\n", + " -53.089557\n", + " -67.322891\n", + " 2024-02-01\n", " NaT\n", " oil\n", " high\n", " \n", " \n", " 3\n", - " S1AB_AD_MEDIAN_COMP_20220901T000000_20230301T0...\n", - " 2022-12-01\n", - " 313068\n", - " -52.612197\n", - " -68.449292\n", - " 2017-01-01\n", + " S1AB_AD_MEDIAN_COMP_20240101T000000_20240701T0...\n", + " 2024-04-01\n", + " 1051638\n", + " -53.089557\n", + " -67.322891\n", + " 2024-02-01\n", " NaT\n", " oil\n", " high\n", " \n", " \n", " 4\n", - " S1AB_AD_MEDIAN_COMP_20220701T000000_20230101T0...\n", - " 2022-10-01\n", - " 313068\n", - " -52.612197\n", - " -68.449292\n", - " 2017-01-01\n", + " S1AB_AD_MEDIAN_COMP_20231101T000000_20240501T0...\n", + " 2024-02-01\n", + " 1051638\n", + " -53.089557\n", + " -67.322891\n", + " 2024-02-01\n", " NaT\n", " oil\n", " high\n", @@ -1350,18 +1717,18 @@ ], "text/plain": [ " detection_id detection_date \\\n", - "0 S1AB_AD_MEDIAN_COMP_20170902T000000_20180301T0... 2017-12-01 \n", - "1 S1AB_AD_MEDIAN_COMP_20231101T000000_20240501T0... 2024-02-01 \n", - "2 S1AB_AD_MEDIAN_COMP_20230501T000000_20231101T0... 2023-08-01 \n", - "3 S1AB_AD_MEDIAN_COMP_20220901T000000_20230301T0... 2022-12-01 \n", - "4 S1AB_AD_MEDIAN_COMP_20220701T000000_20230101T0... 2022-10-01 \n", + "0 S1AB_AD_MEDIAN_COMP_20240601T000000_20241201T0... 2024-09-01 \n", + "1 S1AB_AD_MEDIAN_COMP_20240401T000000_20241001T0... 2024-07-01 \n", + "2 S1AB_AD_MEDIAN_COMP_20240301T000000_20240901T0... 2024-06-01 \n", + "3 S1AB_AD_MEDIAN_COMP_20240101T000000_20240701T0... 2024-04-01 \n", + "4 S1AB_AD_MEDIAN_COMP_20231101T000000_20240501T0... 2024-02-01 \n", "\n", " structure_id lat lon structure_start_date structure_end_date \\\n", - "0 313068 -52.612197 -68.449292 2017-01-01 NaT \n", - "1 313068 -52.612197 -68.449292 2017-01-01 NaT \n", - "2 313068 -52.612197 -68.449292 2017-01-01 NaT \n", - "3 313068 -52.612197 -68.449292 2017-01-01 NaT \n", - "4 313068 -52.612197 -68.449292 2017-01-01 NaT \n", + "0 1051638 -53.089557 -67.322891 2024-02-01 NaT \n", + "1 1051638 -53.089557 -67.322891 2024-02-01 NaT \n", + "2 1051638 -53.089557 -67.322891 2024-02-01 NaT \n", + "3 1051638 -53.089557 -67.322891 2024-02-01 NaT \n", + "4 1051638 -53.089557 -67.322891 2024-02-01 NaT \n", "\n", " label label_confidence \n", "0 oil high \n", @@ -1371,7 +1738,7 @@ "4 oil high " ] }, - "execution_count": 38, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } diff --git a/notebooks/usage-guides/events-api.ipynb b/notebooks/usage-guides/events-api.ipynb index bed1978..301a3df 100644 --- a/notebooks/usage-guides/events-api.ipynb +++ b/notebooks/usage-guides/events-api.ipynb @@ -120,6 +120,8 @@ "source": [ "import os\n", "\n", + "import geopandas as gpd\n", + "\n", "import gfwapiclient as gfw" ] }, @@ -163,12 +165,52 @@ "id": "f3329f1a-ba64-4cbc-b20d-d27a6e4fa41e" }, "source": [ - "## Retrieving All Events (`get_all_events`)" + "## Retrieving All Events from Predefined Region (`get_all_events`)" + ] + }, + { + "cell_type": "markdown", + "id": "f9a01cb5-37f0-4b48-8d28-650d2ea552ff", + "metadata": {}, + "source": [ + "**Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**)." ] }, { "cell_type": "code", "execution_count": 5, + "id": "b43a3c46-1048-4f7b-b163-27bedbbdd406", + "metadata": {}, + "outputs": [], + "source": [ + "eez_rois_result = await gfw_client.references.get_eez_regions(iso3=\"CHN\")\n", + "chn_eez_roi = eez_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c4bba55f-3ea7-442c-b7de-3b2ff88c7690", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('8486', 'public-eez-areas', 'Chinese Exclusive Economic Zone', 'CHN')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chn_eez_roi.id, chn_eez_roi.dataset, chn_eez_roi.label, chn_eez_roi.iso3" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "id": "4312a999-d852-4d66-97d0-12f25e35d6fe", "metadata": { "id": "4312a999-d852-4d66-97d0-12f25e35d6fe" @@ -177,12 +219,9 @@ "source": [ "events_result = await gfw_client.events.get_all_events(\n", " datasets=[\"public-global-fishing-events:latest\"],\n", - " start_date=\"2020-10-01\",\n", - " end_date=\"2020-12-31\",\n", - " region={\n", - " \"dataset\": \"public-eez-areas\",\n", - " \"id\": \"8371\",\n", - " },\n", + " start_date=\"2017-01-01\",\n", + " end_date=\"2017-01-31\",\n", + " region=chn_eez_roi,\n", " limit=5,\n", ")" ] @@ -199,7 +238,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "id": "ae264f6f-c7b6-4f48-918b-97b665b065cb", "metadata": { "id": "ae264f6f-c7b6-4f48-918b-97b665b065cb" @@ -211,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "8254e481-c277-48b9-a5f6-88c785345b75", "metadata": { "id": "8254e481-c277-48b9-a5f6-88c785345b75" @@ -223,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "61bd0002-c092-4a5b-bb31-10562cf2a2e1", "metadata": { "colab": { @@ -236,12 +275,12 @@ { "data": { "text/plain": [ - "('bbbf5d0cfa9639e5eac0130fc2b742e9',\n", + "('54e1b8739c8ef032f2384e866b56077b',\n", " 'fishing',\n", - " '7374d1988-87f8-6037-66b4-59854a026efb')" + " 'de2fb30db-b118-8a4e-edac-3764639a0d9e')" ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -262,7 +301,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "fb1d9156-c869-400a-aa09-328ec43cd4ff", "metadata": { "id": "fb1d9156-c869-400a-aa09-328ec43cd4ff" @@ -274,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "id": "dafd51a8-8370-471e-9a2b-f523351a5b46", "metadata": {}, "outputs": [ @@ -282,15 +321,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 5 entries, 0 to 4\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 start 5 non-null datetime64[ns, UTC]\n", - " 1 end 5 non-null datetime64[ns, UTC]\n", - " 2 id 5 non-null object \n", - " 3 type 5 non-null object \n", + " 0 start 5 non-null datetime64[us, UTC]\n", + " 1 end 5 non-null datetime64[us, UTC]\n", + " 2 id 5 non-null str \n", + " 3 type 5 non-null str \n", " 4 position 5 non-null object \n", " 5 regions 5 non-null object \n", " 6 bounding_box 5 non-null object \n", @@ -301,7 +340,7 @@ " 11 gap 0 non-null object \n", " 12 loitering 0 non-null object \n", " 13 port_visit 0 non-null object \n", - "dtypes: datetime64[ns, UTC](2), object(12)\n", + "dtypes: datetime64[us, UTC](2), object(10), str(2)\n", "memory usage: 692.0+ bytes\n" ] } @@ -312,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "33328ed3-e2c0-456c-89d2-e13df6cb6911", "metadata": { "colab": { @@ -363,85 +402,85 @@ " \n", " \n", " 0\n", - " 2020-09-29 01:36:43+00:00\n", - " 2020-10-01 06:21:11+00:00\n", - " a0f5848d1a83b7f0b4b8cda6873699ba\n", + " 2016-12-28 22:53:16+00:00\n", + " 2017-01-01 05:17:36+00:00\n", + " 05cb42b98c7fcbb807527ef6749bdeda\n", " fishing\n", - " {'lat': 14.6865, 'lon': -17.4115}\n", - " {'mpa': ['555705172'], 'eez': ['8371'], 'rfmo'...\n", - " [-17.4119, 14.686378333333334, -17.41116833333...\n", - " {'start_distance_from_shore_km': 2.0, 'end_dis...\n", - " {'id': '9e01144bf-f383-e634-3178-ca7e34477f34'...\n", + " {'lat': 38.5911, 'lon': 118.3242}\n", + " {'mpa': [], 'eez': ['8486'], 'rfmo': ['PICES',...\n", + " [118.1391833333, 38.4701833333, 118.5858933333...\n", + " {'start_distance_from_shore_km': 31.0, 'end_di...\n", + " {'id': '1460ebe3f-fe57-05ee-5df7-803252df3983'...\n", " None\n", - " {'total_distance_km': 3.4650098660546633, 'ave...\n", + " {'total_distance_km': 352.0775271758605, 'aver...\n", " None\n", " None\n", " None\n", " \n", " \n", " 1\n", - " 2020-09-29 10:27:14+00:00\n", - " 2020-10-01 06:16:35+00:00\n", - " 670e2c0d5e1423f7d63821200a140d6f\n", + " 2016-12-29 05:52:08+00:00\n", + " 2017-01-09 23:03:41+00:00\n", + " d0f7824acc0120713b806510205335c0\n", " fishing\n", - " {'lat': 13.7439, 'lon': -17.0539}\n", - " {'mpa': ['555651502'], 'eez': ['8371'], 'rfmo'...\n", - " [-16.967778333333342, 13.59392, -17.25, 13.910...\n", - " {'start_distance_from_shore_km': 29.0, 'end_di...\n", - " {'id': '56797171d-dc16-997d-5765-61029b1e0244'...\n", + " {'lat': 38.7679, 'lon': 122.9222}\n", + " {'mpa': [], 'eez': ['8486'], 'rfmo': ['ACAP', ...\n", + " [122.8795516667, 38.73512, 122.967835, 38.8140...\n", + " {'start_distance_from_shore_km': 32.0, 'end_di...\n", + " {'id': 'b779f3880-0948-e042-79ac-7075cae0834a'...\n", " None\n", - " {'total_distance_km': 294.3073276985627, 'aver...\n", + " {'total_distance_km': 890.0985273915185, 'aver...\n", " None\n", " None\n", " None\n", " \n", " \n", " 2\n", - " 2020-09-30 07:13:23+00:00\n", - " 2020-10-01 05:15:51+00:00\n", - " d1d52b881f7c22df0dc289534679d647\n", + " 2016-12-29 06:02:29+00:00\n", + " 2017-01-07 15:27:55+00:00\n", + " 2e3edcbcf68a0be70c020d564a5bee3e\n", " fishing\n", - " {'lat': 14.3766, 'lon': -17.2489}\n", - " {'mpa': ['555705172'], 'eez': ['8371'], 'rfmo'...\n", - " [-17.331973333333334, 14.21384, -17.1248533333...\n", - " {'start_distance_from_shore_km': 40.0, 'end_di...\n", - " {'id': 'd29f3a946-6ece-2c04-7c5f-43dcc3515707'...\n", + " {'lat': 38.8054, 'lon': 122.9166}\n", + " {'mpa': [], 'eez': ['8486'], 'rfmo': ['WCPFC',...\n", + " [122.8849333333, 38.7806466667, 122.9357066667...\n", + " {'start_distance_from_shore_km': 28.0, 'end_di...\n", + " {'id': 'b36e8c96e-e858-c54e-fdd5-ddd9f12e446e'...\n", " None\n", - " {'total_distance_km': 130.6216027333083, 'aver...\n", + " {'total_distance_km': 700.8972967886531, 'aver...\n", " None\n", " None\n", " None\n", " \n", " \n", " 3\n", - " 2020-09-30 12:35:44+00:00\n", - " 2020-10-01 07:11:05+00:00\n", - " cebc86436ee6fb3e792a49ced7840ea4\n", + " 2016-12-29 06:20:32+00:00\n", + " 2017-01-08 00:45:54+00:00\n", + " 3d57f4311f1caebe1479057da1fb2d66\n", " fishing\n", - " {'lat': 15.501, 'lon': -17.2188}\n", - " {'mpa': [], 'eez': ['8371'], 'rfmo': ['SRFC', ...\n", - " [-17.1261683333, 15.2266266667, -17.3346466666...\n", - " {'start_distance_from_shore_km': 20.0, 'end_di...\n", - " {'id': '14fdac9e8-8a83-0d3b-0483-94e520ceaf80'...\n", + " {'lat': 38.7666, 'lon': 122.9176}\n", + " {'mpa': [], 'eez': ['8486'], 'rfmo': ['WCPFC',...\n", + " [122.8741416667, 38.7374516667, 122.9684316667...\n", + " {'start_distance_from_shore_km': 32.0, 'end_di...\n", + " {'id': 'c5da36777-79a9-4eb8-09c6-7db3c7e8bd4f'...\n", " None\n", - " {'total_distance_km': 111.11378725724042, 'ave...\n", + " {'total_distance_km': 730.167140184076, 'avera...\n", " None\n", " None\n", " None\n", " \n", " \n", " 4\n", - " 2020-09-30 13:18:27+00:00\n", - " 2020-10-01 07:33:45+00:00\n", - " bbbf5d0cfa9639e5eac0130fc2b742e9\n", + " 2016-12-29 12:50:28+00:00\n", + " 2017-01-01 12:35:12+00:00\n", + " 54e1b8739c8ef032f2384e866b56077b\n", " fishing\n", - " {'lat': 14.9647, 'lon': -17.6039}\n", - " {'mpa': [], 'eez': ['8371'], 'rfmo': ['ACAP', ...\n", - " [-17.480231666700007, 14.8685916667, -17.67519...\n", - " {'start_distance_from_shore_km': 11.0, 'end_di...\n", - " {'id': '7374d1988-87f8-6037-66b4-59854a026efb'...\n", + " {'lat': 38.9222, 'lon': 120.8962}\n", + " {'mpa': [], 'eez': ['8486'], 'rfmo': ['IWC', '...\n", + " [120.8618383333, 38.86113, 120.9324016667, 38....\n", + " {'start_distance_from_shore_km': 14.0, 'end_di...\n", + " {'id': 'de2fb30db-b118-8a4e-edac-3764639a0d9e'...\n", " None\n", - " {'total_distance_km': 93.2534926758473, 'avera...\n", + " {'total_distance_km': 251.8224420985952, 'aver...\n", " None\n", " None\n", " None\n", @@ -452,60 +491,60 @@ ], "text/plain": [ " start end \\\n", - "0 2020-09-29 01:36:43+00:00 2020-10-01 06:21:11+00:00 \n", - "1 2020-09-29 10:27:14+00:00 2020-10-01 06:16:35+00:00 \n", - "2 2020-09-30 07:13:23+00:00 2020-10-01 05:15:51+00:00 \n", - "3 2020-09-30 12:35:44+00:00 2020-10-01 07:11:05+00:00 \n", - "4 2020-09-30 13:18:27+00:00 2020-10-01 07:33:45+00:00 \n", + "0 2016-12-28 22:53:16+00:00 2017-01-01 05:17:36+00:00 \n", + "1 2016-12-29 05:52:08+00:00 2017-01-09 23:03:41+00:00 \n", + "2 2016-12-29 06:02:29+00:00 2017-01-07 15:27:55+00:00 \n", + "3 2016-12-29 06:20:32+00:00 2017-01-08 00:45:54+00:00 \n", + "4 2016-12-29 12:50:28+00:00 2017-01-01 12:35:12+00:00 \n", "\n", " id type \\\n", - "0 a0f5848d1a83b7f0b4b8cda6873699ba fishing \n", - "1 670e2c0d5e1423f7d63821200a140d6f fishing \n", - "2 d1d52b881f7c22df0dc289534679d647 fishing \n", - "3 cebc86436ee6fb3e792a49ced7840ea4 fishing \n", - "4 bbbf5d0cfa9639e5eac0130fc2b742e9 fishing \n", + "0 05cb42b98c7fcbb807527ef6749bdeda fishing \n", + "1 d0f7824acc0120713b806510205335c0 fishing \n", + "2 2e3edcbcf68a0be70c020d564a5bee3e fishing \n", + "3 3d57f4311f1caebe1479057da1fb2d66 fishing \n", + "4 54e1b8739c8ef032f2384e866b56077b fishing \n", "\n", " position \\\n", - "0 {'lat': 14.6865, 'lon': -17.4115} \n", - "1 {'lat': 13.7439, 'lon': -17.0539} \n", - "2 {'lat': 14.3766, 'lon': -17.2489} \n", - "3 {'lat': 15.501, 'lon': -17.2188} \n", - "4 {'lat': 14.9647, 'lon': -17.6039} \n", + "0 {'lat': 38.5911, 'lon': 118.3242} \n", + "1 {'lat': 38.7679, 'lon': 122.9222} \n", + "2 {'lat': 38.8054, 'lon': 122.9166} \n", + "3 {'lat': 38.7666, 'lon': 122.9176} \n", + "4 {'lat': 38.9222, 'lon': 120.8962} \n", "\n", " regions \\\n", - "0 {'mpa': ['555705172'], 'eez': ['8371'], 'rfmo'... \n", - "1 {'mpa': ['555651502'], 'eez': ['8371'], 'rfmo'... \n", - "2 {'mpa': ['555705172'], 'eez': ['8371'], 'rfmo'... \n", - "3 {'mpa': [], 'eez': ['8371'], 'rfmo': ['SRFC', ... \n", - "4 {'mpa': [], 'eez': ['8371'], 'rfmo': ['ACAP', ... \n", + "0 {'mpa': [], 'eez': ['8486'], 'rfmo': ['PICES',... \n", + "1 {'mpa': [], 'eez': ['8486'], 'rfmo': ['ACAP', ... \n", + "2 {'mpa': [], 'eez': ['8486'], 'rfmo': ['WCPFC',... \n", + "3 {'mpa': [], 'eez': ['8486'], 'rfmo': ['WCPFC',... \n", + "4 {'mpa': [], 'eez': ['8486'], 'rfmo': ['IWC', '... \n", "\n", " bounding_box \\\n", - "0 [-17.4119, 14.686378333333334, -17.41116833333... \n", - "1 [-16.967778333333342, 13.59392, -17.25, 13.910... \n", - "2 [-17.331973333333334, 14.21384, -17.1248533333... \n", - "3 [-17.1261683333, 15.2266266667, -17.3346466666... \n", - "4 [-17.480231666700007, 14.8685916667, -17.67519... \n", + "0 [118.1391833333, 38.4701833333, 118.5858933333... \n", + "1 [122.8795516667, 38.73512, 122.967835, 38.8140... \n", + "2 [122.8849333333, 38.7806466667, 122.9357066667... \n", + "3 [122.8741416667, 38.7374516667, 122.9684316667... \n", + "4 [120.8618383333, 38.86113, 120.9324016667, 38.... \n", "\n", " distances \\\n", - "0 {'start_distance_from_shore_km': 2.0, 'end_dis... \n", - "1 {'start_distance_from_shore_km': 29.0, 'end_di... \n", - "2 {'start_distance_from_shore_km': 40.0, 'end_di... \n", - "3 {'start_distance_from_shore_km': 20.0, 'end_di... \n", - "4 {'start_distance_from_shore_km': 11.0, 'end_di... \n", + "0 {'start_distance_from_shore_km': 31.0, 'end_di... \n", + "1 {'start_distance_from_shore_km': 32.0, 'end_di... \n", + "2 {'start_distance_from_shore_km': 28.0, 'end_di... \n", + "3 {'start_distance_from_shore_km': 32.0, 'end_di... \n", + "4 {'start_distance_from_shore_km': 14.0, 'end_di... \n", "\n", " vessel encounter \\\n", - "0 {'id': '9e01144bf-f383-e634-3178-ca7e34477f34'... None \n", - "1 {'id': '56797171d-dc16-997d-5765-61029b1e0244'... None \n", - "2 {'id': 'd29f3a946-6ece-2c04-7c5f-43dcc3515707'... None \n", - "3 {'id': '14fdac9e8-8a83-0d3b-0483-94e520ceaf80'... None \n", - "4 {'id': '7374d1988-87f8-6037-66b4-59854a026efb'... None \n", + "0 {'id': '1460ebe3f-fe57-05ee-5df7-803252df3983'... None \n", + "1 {'id': 'b779f3880-0948-e042-79ac-7075cae0834a'... None \n", + "2 {'id': 'b36e8c96e-e858-c54e-fdd5-ddd9f12e446e'... None \n", + "3 {'id': 'c5da36777-79a9-4eb8-09c6-7db3c7e8bd4f'... None \n", + "4 {'id': 'de2fb30db-b118-8a4e-edac-3764639a0d9e'... None \n", "\n", " fishing gap loitering \\\n", - "0 {'total_distance_km': 3.4650098660546633, 'ave... None None \n", - "1 {'total_distance_km': 294.3073276985627, 'aver... None None \n", - "2 {'total_distance_km': 130.6216027333083, 'aver... None None \n", - "3 {'total_distance_km': 111.11378725724042, 'ave... None None \n", - "4 {'total_distance_km': 93.2534926758473, 'avera... None None \n", + "0 {'total_distance_km': 352.0775271758605, 'aver... None None \n", + "1 {'total_distance_km': 890.0985273915185, 'aver... None None \n", + "2 {'total_distance_km': 700.8972967886531, 'aver... None None \n", + "3 {'total_distance_km': 730.167140184076, 'avera... None None \n", + "4 {'total_distance_km': 251.8224420985952, 'aver... None None \n", "\n", " port_visit \n", "0 None \n", @@ -515,7 +554,7 @@ "4 None " ] }, - "execution_count": 11, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -526,152 +565,168 @@ }, { "cell_type": "markdown", - "id": "2b8b1aaf-d84b-4d32-9c32-8efaa582279a", - "metadata": { - "id": "2b8b1aaf-d84b-4d32-9c32-8efaa582279a" - }, + "id": "d236d94a-ba1b-49e5-8d8b-9166a6eb44f8", + "metadata": {}, "source": [ - "## Retrieving a Single Event by ID (`get_event_by_id`)" + "## Retrieving All Events from Custom Region (`get_all_events`)" + ] + }, + { + "cell_type": "markdown", + "id": "62c8f347-dfc7-43da-b6b2-96d2d1b05941", + "metadata": {}, + "source": [ + "**Note:** Custom region can either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. Spatial files are loaded using [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) and supported formats depend on a properly configured [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip)." ] }, { "cell_type": "code", - "execution_count": 12, - "id": "7153066a-abe9-4b1a-9751-378f5486ed61", - "metadata": { - "id": "7153066a-abe9-4b1a-9751-378f5486ed61" - }, + "execution_count": 14, + "id": "10be7414-9152-4eec-8cbd-6942fdbb12b8", + "metadata": {}, "outputs": [], "source": [ - "event_result = await gfw_client.events.get_event_by_id(\n", - " id=\"c2f0967e061f99a01793edac065de003\",\n", - " dataset=\"public-global-port-visits-events:latest\",\n", + "filename = \"https://raw.githubusercontent.com/GlobalFishingWatch/gfw-api-python-client/refs/heads/develop/tests/fixtures/events/geometry/geometry.shp\"" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5b85b9c8-af3d-41a2-91e2-899d0c9d44e9", + "metadata": {}, + "outputs": [], + "source": [ + "custom_roi_gdf = gpd.read_file(filename)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5c9e009a-9d98-4620-adbb-0cf788dc3e22", + "metadata": {}, + "outputs": [], + "source": [ + "custom_events_result = await gfw_client.events.get_all_events(\n", + " datasets=[\"public-global-fishing-events:latest\"],\n", + " start_date=\"2017-01-01\",\n", + " end_date=\"2017-01-31\",\n", + " geometry=custom_roi_gdf,\n", + " limit=5,\n", ")" ] }, { "cell_type": "markdown", - "id": "afec0170-02f9-4a8d-b307-d9d14c989e78", - "metadata": { - "id": "afec0170-02f9-4a8d-b307-d9d14c989e78" - }, + "id": "7c65b01d-fdbf-43bb-97dd-adc60d587370", + "metadata": {}, "source": [ - "### Access the event model as Pydantic model" + "### Access the list of event as Pydantic models" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "07f3cec3-752b-494e-be95-f96ff7cda93a", - "metadata": { - "id": "07f3cec3-752b-494e-be95-f96ff7cda93a" - }, + "execution_count": 17, + "id": "a76e24f4-e7b9-4a0b-880d-b9367fa98acf", + "metadata": {}, "outputs": [], "source": [ - "event = event_result.data()" + "custom_events_data = custom_events_result.data()" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "1598ba96-1e0e-40ce-ade8-c2ccd721cad7", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "1598ba96-1e0e-40ce-ade8-c2ccd721cad7", - "outputId": "abedfe38-e1a1-4686-eb1e-b614e4a253e7" - }, + "execution_count": 18, + "id": "11c7b96a-7355-441c-942d-3f5f876e633b", + "metadata": {}, + "outputs": [], + "source": [ + "custom_event = custom_events_data[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "0e6cc3a2-6ea0-4688-ac68-0272cf9ba1eb", + "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "('c2f0967e061f99a01793edac065de003',\n", - " 'port_visit',\n", - " '8c7304226-6c71-edbe-0b63-c246734b3c01')" + "('5c03609c64d96c6ca5bfaaca0e9d9b6c',\n", + " 'fishing',\n", + " 'c01e0a0d2-20d9-7cc6-e04e-449dae2fbd95')" ] }, - "execution_count": 14, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "event.id, event.type, event.vessel.id" + "custom_event.id, custom_event.type, custom_event.vessel.id" ] }, { "cell_type": "markdown", - "id": "46620eff-1618-4db2-b79c-bd0092e1d6a6", - "metadata": { - "id": "46620eff-1618-4db2-b79c-bd0092e1d6a6" - }, + "id": "899dd9e4-1c20-487f-a01c-c9ee95f763f7", + "metadata": {}, "source": [ - "### Access the event as a DataFrame" + "### Access the events as a DataFrame" ] }, { "cell_type": "code", - "execution_count": 15, - "id": "5924b8c6-da8e-42a9-94ee-9b67ebdb1e7e", - "metadata": { - "id": "5924b8c6-da8e-42a9-94ee-9b67ebdb1e7e" - }, + "execution_count": 20, + "id": "fa4b930d-344d-4d85-a294-906d94be86b5", + "metadata": {}, "outputs": [], "source": [ - "event_df = event_result.df()" + "custom_events_df = custom_events_result.df()" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "1754d20c-5a9c-42f1-8e02-d10fe24a43c3", + "execution_count": 21, + "id": "53764281-ad9d-4a9b-9f42-a4366e3cdb8f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 1 entries, 0 to 0\n", + "\n", + "RangeIndex: 5 entries, 0 to 4\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 start 1 non-null datetime64[ns, UTC]\n", - " 1 end 1 non-null datetime64[ns, UTC]\n", - " 2 id 1 non-null object \n", - " 3 type 1 non-null object \n", - " 4 position 1 non-null object \n", - " 5 regions 1 non-null object \n", - " 6 bounding_box 1 non-null object \n", - " 7 distances 1 non-null object \n", - " 8 vessel 1 non-null object \n", + " 0 start 5 non-null datetime64[us, UTC]\n", + " 1 end 5 non-null datetime64[us, UTC]\n", + " 2 id 5 non-null str \n", + " 3 type 5 non-null str \n", + " 4 position 5 non-null object \n", + " 5 regions 5 non-null object \n", + " 6 bounding_box 5 non-null object \n", + " 7 distances 5 non-null object \n", + " 8 vessel 5 non-null object \n", " 9 encounter 0 non-null object \n", - " 10 fishing 0 non-null object \n", + " 10 fishing 5 non-null object \n", " 11 gap 0 non-null object \n", " 12 loitering 0 non-null object \n", - " 13 port_visit 1 non-null object \n", - "dtypes: datetime64[ns, UTC](2), object(12)\n", - "memory usage: 244.0+ bytes\n" + " 13 port_visit 0 non-null object \n", + "dtypes: datetime64[us, UTC](2), object(10), str(2)\n", + "memory usage: 692.0+ bytes\n" ] } ], "source": [ - "event_df.info()" + "custom_events_df.info()" ] }, { "cell_type": "code", - "execution_count": 17, - "id": "9e6befd3-f970-4a05-b2d0-63ddd4adcfe0", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 153 - }, - "id": "9e6befd3-f970-4a05-b2d0-63ddd4adcfe0", - "outputId": "7a28167b-20af-4202-ed15-68d1293304b9" - }, + "execution_count": 22, + "id": "bd8c05f6-67c0-4b26-add7-15d91948253b", + "metadata": {}, "outputs": [ { "data": { @@ -713,20 +768,88 @@ " \n", " \n", " 0\n", - " 2020-01-26 05:52:47+00:00\n", - " 2020-01-29 14:39:33+00:00\n", - " c2f0967e061f99a01793edac065de003\n", - " port_visit\n", - " {'lat': 20.7288, 'lon': -17.0148}\n", - " {'mpa': [], 'eez': ['8369'], 'rfmo': ['NAMMCO'...\n", - " [-17.014774393446658, 20.72879719687954, -17.0...\n", - " {'start_distance_from_shore_km': 7.0, 'end_dis...\n", - " {'id': '8c7304226-6c71-edbe-0b63-c246734b3c01'...\n", + " 2016-12-30 05:09:54+00:00\n", + " 2017-01-01 20:03:59+00:00\n", + " d6ee1cb7351fcb2c6b6e3b675335cb2c\n", + " fishing\n", + " {'lat': 27.1871, 'lon': 121.3279}\n", + " {'mpa': [], 'eez': ['8486'], 'rfmo': ['WCPFC',...\n", + " [121.139415, 27.0787716667, 121.4224816667, 27...\n", + " {'start_distance_from_shore_km': 40.0, 'end_di...\n", + " {'id': '4de4289ac-cb8b-81de-4799-05b592602bf1'...\n", " None\n", + " {'total_distance_km': 208.03116341478372, 'ave...\n", + " None\n", + " None\n", + " None\n", + " \n", + " \n", + " 1\n", + " 2016-12-30 05:21:43+00:00\n", + " 2017-01-02 03:17:00+00:00\n", + " b5b0895669f4123b5da0eed20210342d\n", + " fishing\n", + " {'lat': 27.0703, 'lon': 121.2056}\n", + " {'mpa': [], 'eez': ['8486'], 'rfmo': ['APFIC',...\n", + " [121.0888466667, 26.9642966667, 121.34091, 27....\n", + " {'start_distance_from_shore_km': 38.0, 'end_di...\n", + " {'id': '3ee872640-08b1-04cf-75f8-d90932461f6f'...\n", + " None\n", + " {'total_distance_km': 221.3299351192625, 'aver...\n", + " None\n", + " None\n", + " None\n", + " \n", + " \n", + " 2\n", + " 2016-12-30 06:26:05+00:00\n", + " 2017-01-02 02:38:20+00:00\n", + " 088ef328a8fe54bd46e6fccd539ae55a\n", + " fishing\n", + " {'lat': 27.0929, 'lon': 121.2196}\n", + " {'mpa': [], 'eez': ['8486'], 'rfmo': ['APFIC',...\n", + " [121.1208266667, 27.00363, 121.3079333333, 27....\n", + " {'start_distance_from_shore_km': 40.0, 'end_di...\n", + " {'id': '55cf89775-54e1-89aa-6e92-fef0b8deb419'...\n", + " None\n", + " {'total_distance_km': 224.6395440027435, 'aver...\n", + " None\n", + " None\n", + " None\n", + " \n", + " \n", + " 3\n", + " 2016-12-30 07:28:06+00:00\n", + " 2017-01-02 03:49:40+00:00\n", + " 8326eb5fb66215de739f0c889a9b6a8e\n", + " fishing\n", + " {'lat': 27.0865, 'lon': 121.2232}\n", + " {'mpa': [], 'eez': ['8486'], 'rfmo': ['WCPFC',...\n", + " [121.1017333333, 26.9795383333, 121.3309066667...\n", + " {'start_distance_from_shore_km': 38.0, 'end_di...\n", + " {'id': '3cedf7bd9-9800-1fc2-11d7-7985205d926e'...\n", + " None\n", + " {'total_distance_km': 235.3635712625424, 'aver...\n", + " None\n", + " None\n", + " None\n", + " \n", + " \n", + " 4\n", + " 2016-12-30 07:46:18+00:00\n", + " 2017-01-02 04:28:40+00:00\n", + " 5c03609c64d96c6ca5bfaaca0e9d9b6c\n", + " fishing\n", + " {'lat': 27.1714, 'lon': 121.3037}\n", + " {'mpa': [], 'eez': ['8486'], 'rfmo': ['ACAP', ...\n", + " [121.1050983333, 27.0637933333, 121.43991, 27....\n", + " {'start_distance_from_shore_km': 39.0, 'end_di...\n", + " {'id': 'c01e0a0d2-20d9-7cc6-e04e-449dae2fbd95'...\n", + " None\n", + " {'total_distance_km': 223.68434789759047, 'ave...\n", " None\n", " None\n", " None\n", - " {'visit_id': '38affb3e7bdc67e9c0c2e7e8f3b08da2...\n", " \n", " \n", "\n", @@ -734,65 +857,347 @@ ], "text/plain": [ " start end \\\n", - "0 2020-01-26 05:52:47+00:00 2020-01-29 14:39:33+00:00 \n", + "0 2016-12-30 05:09:54+00:00 2017-01-01 20:03:59+00:00 \n", + "1 2016-12-30 05:21:43+00:00 2017-01-02 03:17:00+00:00 \n", + "2 2016-12-30 06:26:05+00:00 2017-01-02 02:38:20+00:00 \n", + "3 2016-12-30 07:28:06+00:00 2017-01-02 03:49:40+00:00 \n", + "4 2016-12-30 07:46:18+00:00 2017-01-02 04:28:40+00:00 \n", "\n", - " id type \\\n", - "0 c2f0967e061f99a01793edac065de003 port_visit \n", + " id type \\\n", + "0 d6ee1cb7351fcb2c6b6e3b675335cb2c fishing \n", + "1 b5b0895669f4123b5da0eed20210342d fishing \n", + "2 088ef328a8fe54bd46e6fccd539ae55a fishing \n", + "3 8326eb5fb66215de739f0c889a9b6a8e fishing \n", + "4 5c03609c64d96c6ca5bfaaca0e9d9b6c fishing \n", "\n", " position \\\n", - "0 {'lat': 20.7288, 'lon': -17.0148} \n", + "0 {'lat': 27.1871, 'lon': 121.3279} \n", + "1 {'lat': 27.0703, 'lon': 121.2056} \n", + "2 {'lat': 27.0929, 'lon': 121.2196} \n", + "3 {'lat': 27.0865, 'lon': 121.2232} \n", + "4 {'lat': 27.1714, 'lon': 121.3037} \n", "\n", " regions \\\n", - "0 {'mpa': [], 'eez': ['8369'], 'rfmo': ['NAMMCO'... \n", + "0 {'mpa': [], 'eez': ['8486'], 'rfmo': ['WCPFC',... \n", + "1 {'mpa': [], 'eez': ['8486'], 'rfmo': ['APFIC',... \n", + "2 {'mpa': [], 'eez': ['8486'], 'rfmo': ['APFIC',... \n", + "3 {'mpa': [], 'eez': ['8486'], 'rfmo': ['WCPFC',... \n", + "4 {'mpa': [], 'eez': ['8486'], 'rfmo': ['ACAP', ... \n", "\n", " bounding_box \\\n", - "0 [-17.014774393446658, 20.72879719687954, -17.0... \n", + "0 [121.139415, 27.0787716667, 121.4224816667, 27... \n", + "1 [121.0888466667, 26.9642966667, 121.34091, 27.... \n", + "2 [121.1208266667, 27.00363, 121.3079333333, 27.... \n", + "3 [121.1017333333, 26.9795383333, 121.3309066667... \n", + "4 [121.1050983333, 27.0637933333, 121.43991, 27.... \n", "\n", " distances \\\n", - "0 {'start_distance_from_shore_km': 7.0, 'end_dis... \n", + "0 {'start_distance_from_shore_km': 40.0, 'end_di... \n", + "1 {'start_distance_from_shore_km': 38.0, 'end_di... \n", + "2 {'start_distance_from_shore_km': 40.0, 'end_di... \n", + "3 {'start_distance_from_shore_km': 38.0, 'end_di... \n", + "4 {'start_distance_from_shore_km': 39.0, 'end_di... \n", "\n", - " vessel encounter fishing gap \\\n", - "0 {'id': '8c7304226-6c71-edbe-0b63-c246734b3c01'... None None None \n", + " vessel encounter \\\n", + "0 {'id': '4de4289ac-cb8b-81de-4799-05b592602bf1'... None \n", + "1 {'id': '3ee872640-08b1-04cf-75f8-d90932461f6f'... None \n", + "2 {'id': '55cf89775-54e1-89aa-6e92-fef0b8deb419'... None \n", + "3 {'id': '3cedf7bd9-9800-1fc2-11d7-7985205d926e'... None \n", + "4 {'id': 'c01e0a0d2-20d9-7cc6-e04e-449dae2fbd95'... None \n", "\n", - " loitering port_visit \n", - "0 None {'visit_id': '38affb3e7bdc67e9c0c2e7e8f3b08da2... " + " fishing gap loitering \\\n", + "0 {'total_distance_km': 208.03116341478372, 'ave... None None \n", + "1 {'total_distance_km': 221.3299351192625, 'aver... None None \n", + "2 {'total_distance_km': 224.6395440027435, 'aver... None None \n", + "3 {'total_distance_km': 235.3635712625424, 'aver... None None \n", + "4 {'total_distance_km': 223.68434789759047, 'ave... None None \n", + "\n", + " port_visit \n", + "0 None \n", + "1 None \n", + "2 None \n", + "3 None \n", + "4 None " ] }, - "execution_count": 17, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "event_df.head()" + "custom_events_df.head()" ] }, { "cell_type": "markdown", - "id": "907896fe-0855-44e3-ae38-bc93a4b9bc44", + "id": "2b8b1aaf-d84b-4d32-9c32-8efaa582279a", "metadata": { - "id": "907896fe-0855-44e3-ae38-bc93a4b9bc44" + "id": "2b8b1aaf-d84b-4d32-9c32-8efaa582279a" }, "source": [ - "## Getting Event Statistics (`get_events_stats`)" + "## Retrieving a Single Event by ID (`get_event_by_id`)" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "aacbe1ad-9806-4f83-8d4c-e37d318e1a08", + "execution_count": 23, + "id": "7153066a-abe9-4b1a-9751-378f5486ed61", "metadata": { - "id": "aacbe1ad-9806-4f83-8d4c-e37d318e1a08" + "id": "7153066a-abe9-4b1a-9751-378f5486ed61" }, "outputs": [], "source": [ - "event_stats_result = await gfw_client.events.get_events_stats(\n", - " datasets=[\"public-global-encounters-events:latest\"],\n", - " encounter_types=[\"CARRIER-FISHING\", \"FISHING-CARRIER\"],\n", - " vessel_types=[\"CARRIER\"],\n", - " start_date=\"2018-01-01\",\n", - " end_date=\"2023-01-31\",\n", - " timeseries_interval=\"YEAR\",\n", + "event_result = await gfw_client.events.get_event_by_id(\n", + " id=\"c2f0967e061f99a01793edac065de003\",\n", + " dataset=\"public-global-port-visits-events:latest\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "afec0170-02f9-4a8d-b307-d9d14c989e78", + "metadata": { + "id": "afec0170-02f9-4a8d-b307-d9d14c989e78" + }, + "source": [ + "### Access the event as Pydantic model" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "07f3cec3-752b-494e-be95-f96ff7cda93a", + "metadata": { + "id": "07f3cec3-752b-494e-be95-f96ff7cda93a" + }, + "outputs": [], + "source": [ + "event = event_result.data()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "1598ba96-1e0e-40ce-ade8-c2ccd721cad7", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1598ba96-1e0e-40ce-ade8-c2ccd721cad7", + "outputId": "abedfe38-e1a1-4686-eb1e-b614e4a253e7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "('c2f0967e061f99a01793edac065de003',\n", + " 'port_visit',\n", + " '8c7304226-6c71-edbe-0b63-c246734b3c01')" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "event.id, event.type, event.vessel.id" + ] + }, + { + "cell_type": "markdown", + "id": "46620eff-1618-4db2-b79c-bd0092e1d6a6", + "metadata": { + "id": "46620eff-1618-4db2-b79c-bd0092e1d6a6" + }, + "source": [ + "### Access the event as a DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "5924b8c6-da8e-42a9-94ee-9b67ebdb1e7e", + "metadata": { + "id": "5924b8c6-da8e-42a9-94ee-9b67ebdb1e7e" + }, + "outputs": [], + "source": [ + "event_df = event_result.df()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "1754d20c-5a9c-42f1-8e02-d10fe24a43c3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1 entries, 0 to 0\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 start 1 non-null datetime64[us, UTC]\n", + " 1 end 1 non-null datetime64[us, UTC]\n", + " 2 id 1 non-null str \n", + " 3 type 1 non-null str \n", + " 4 position 1 non-null object \n", + " 5 regions 1 non-null object \n", + " 6 bounding_box 1 non-null object \n", + " 7 distances 1 non-null object \n", + " 8 vessel 1 non-null object \n", + " 9 encounter 0 non-null object \n", + " 10 fishing 0 non-null object \n", + " 11 gap 0 non-null object \n", + " 12 loitering 0 non-null object \n", + " 13 port_visit 1 non-null object \n", + "dtypes: datetime64[us, UTC](2), object(10), str(2)\n", + "memory usage: 244.0+ bytes\n" + ] + } + ], + "source": [ + "event_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "9e6befd3-f970-4a05-b2d0-63ddd4adcfe0", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 153 + }, + "id": "9e6befd3-f970-4a05-b2d0-63ddd4adcfe0", + "outputId": "7a28167b-20af-4202-ed15-68d1293304b9" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
startendidtypepositionregionsbounding_boxdistancesvesselencounterfishinggaploiteringport_visit
02020-01-26 05:52:47+00:002020-01-29 14:39:33+00:00c2f0967e061f99a01793edac065de003port_visit{'lat': 20.7288, 'lon': -17.0148}{'mpa': [], 'eez': ['8369'], 'rfmo': ['IWC', '...[-17.014774393446658, 20.72879719687954, -17.0...{'start_distance_from_shore_km': 7.0, 'end_dis...{'id': '8c7304226-6c71-edbe-0b63-c246734b3c01'...NoneNoneNoneNone{'visit_id': '38affb3e7bdc67e9c0c2e7e8f3b08da2...
\n", + "
" + ], + "text/plain": [ + " start end \\\n", + "0 2020-01-26 05:52:47+00:00 2020-01-29 14:39:33+00:00 \n", + "\n", + " id type \\\n", + "0 c2f0967e061f99a01793edac065de003 port_visit \n", + "\n", + " position \\\n", + "0 {'lat': 20.7288, 'lon': -17.0148} \n", + "\n", + " regions \\\n", + "0 {'mpa': [], 'eez': ['8369'], 'rfmo': ['IWC', '... \n", + "\n", + " bounding_box \\\n", + "0 [-17.014774393446658, 20.72879719687954, -17.0... \n", + "\n", + " distances \\\n", + "0 {'start_distance_from_shore_km': 7.0, 'end_dis... \n", + "\n", + " vessel encounter fishing gap \\\n", + "0 {'id': '8c7304226-6c71-edbe-0b63-c246734b3c01'... None None None \n", + "\n", + " loitering port_visit \n", + "0 None {'visit_id': '38affb3e7bdc67e9c0c2e7e8f3b08da2... " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "event_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "907896fe-0855-44e3-ae38-bc93a4b9bc44", + "metadata": { + "id": "907896fe-0855-44e3-ae38-bc93a4b9bc44" + }, + "source": [ + "## Getting Event Statistics Worldwide (`get_events_stats`)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "aacbe1ad-9806-4f83-8d4c-e37d318e1a08", + "metadata": { + "id": "aacbe1ad-9806-4f83-8d4c-e37d318e1a08" + }, + "outputs": [], + "source": [ + "worldwide_event_stats_result = await gfw_client.events.get_events_stats(\n", + " datasets=[\"public-global-encounters-events:latest\"],\n", + " encounter_types=[\"CARRIER-FISHING\", \"FISHING-CARRIER\"],\n", + " vessel_types=[\"CARRIER\"],\n", + " start_date=\"2018-01-01\",\n", + " end_date=\"2023-01-31\",\n", + " timeseries_interval=\"YEAR\",\n", " flags=[\"RUS\"],\n", " duration=60,\n", ")" @@ -810,19 +1215,19 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 30, "id": "20bbb945-6c45-4ae9-98c6-029a833f0aeb", "metadata": { "id": "20bbb945-6c45-4ae9-98c6-029a833f0aeb" }, "outputs": [], "source": [ - "event_stat = event_stats_result.data()" + "worldwide_event_stat = worldwide_event_stats_result.data()" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 31, "id": "24742d8a-d55f-4f1d-8d97-556fb2ec6572", "metadata": { "colab": { @@ -835,16 +1240,20 @@ { "data": { "text/plain": [ - "(24770, 1, 196)" + "(24819, 1, 194)" ] }, - "execution_count": 20, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "event_stat.num_events, event_stat.num_flags, event_stat.num_vessels" + "(\n", + " worldwide_event_stat.num_events,\n", + " worldwide_event_stat.num_flags,\n", + " worldwide_event_stat.num_vessels,\n", + ")" ] }, { @@ -859,19 +1268,19 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 32, "id": "bcdf7d04-695c-4bd6-8cbe-6e522339ad58", "metadata": { "id": "bcdf7d04-695c-4bd6-8cbe-6e522339ad58" }, "outputs": [], "source": [ - "event_stat_df = event_stats_result.df()" + "worldwide_event_stat_df = worldwide_event_stats_result.df()" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 33, "id": "d97c2f90-6414-44f4-b82c-9a92731a18db", "metadata": {}, "outputs": [ @@ -879,7 +1288,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 1 entries, 0 to 0\n", "Data columns (total 5 columns):\n", " # Column Non-Null Count Dtype \n", @@ -895,12 +1304,12 @@ } ], "source": [ - "event_stat_df.info()" + "worldwide_event_stat_df.info()" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 34, "id": "0496f743-190f-497c-9433-a4981446c786", "metadata": { "colab": { @@ -942,9 +1351,9 @@ " \n", " \n", " 0\n", - " 24770\n", + " 24819\n", " 1\n", - " 196\n", + " 194\n", " [RUS]\n", " [{'date': 2018-01-01 00:00:00+00:00, 'value': ...\n", " \n", @@ -954,19 +1363,451 @@ ], "text/plain": [ " num_events num_flags num_vessels flags \\\n", - "0 24770 1 196 [RUS] \n", + "0 24819 1 194 [RUS] \n", + "\n", + " timeseries \n", + "0 [{'date': 2018-01-01 00:00:00+00:00, 'value': ... " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "worldwide_event_stat_df" + ] + }, + { + "cell_type": "markdown", + "id": "aa17e42d-189f-4882-8ded-f927c69b942a", + "metadata": {}, + "source": [ + "## Getting Event Statistics from Predefined Region (`get_events_stats`)" + ] + }, + { + "cell_type": "markdown", + "id": "142057a5-cb9c-4f36-bd8d-cbbc7faa853d", + "metadata": {}, + "source": [ + "**Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**)." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "35b7452b-9d30-483c-a317-0d326c4cb0ca", + "metadata": {}, + "outputs": [], + "source": [ + "eez_rois_result = await gfw_client.references.get_eez_regions(iso3=\"SEN\")\n", + "sen_eez_roi = eez_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "60e03dd6-124a-45db-905f-988c8ac2b9ed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('8371', 'public-eez-areas', 'Senegalese Exclusive Economic Zone', 'SEN')" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sen_eez_roi.id, sen_eez_roi.dataset, sen_eez_roi.label, sen_eez_roi.iso3" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "9f218001-2c87-41d7-b2cb-7ff19d8ca7b5", + "metadata": {}, + "outputs": [], + "source": [ + "predefined_event_stats_result = await gfw_client.events.get_events_stats(\n", + " datasets=[\"public-global-port-visits-events:latest\"],\n", + " start_date=\"2018-01-01\",\n", + " end_date=\"2019-01-31\",\n", + " timeseries_interval=\"YEAR\",\n", + " region=sen_eez_roi,\n", + " confidences=[\"3\", \"4\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "0c5c41b6-c806-4201-9c74-8f0db297fb25", + "metadata": {}, + "source": [ + "### Access the statistics as Pydantic models" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "39adc019-800f-4068-a756-678f9ba57913", + "metadata": {}, + "outputs": [], + "source": [ + "predefined_event_stat = predefined_event_stats_result.data()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "615a1458-7233-4e83-874e-655724a0afcd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4528, 75, 1464)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " predefined_event_stat.num_events,\n", + " predefined_event_stat.num_flags,\n", + " predefined_event_stat.num_vessels,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "27c55b10-f647-4666-ab6a-f0480a6baeb5", + "metadata": {}, + "source": [ + "### Access the statistics as a DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "5892233c-e2cb-4590-89f4-5bc1ef1253b7", + "metadata": {}, + "outputs": [], + "source": [ + "predefined_event_stat_df = predefined_event_stats_result.df()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "8a07051a-79b6-4a54-b68d-c41eb10618d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1 entries, 0 to 0\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 num_events 1 non-null int64 \n", + " 1 num_flags 1 non-null int64 \n", + " 2 num_vessels 1 non-null int64 \n", + " 3 flags 1 non-null object\n", + " 4 timeseries 1 non-null object\n", + "dtypes: int64(3), object(2)\n", + "memory usage: 172.0+ bytes\n" + ] + } + ], + "source": [ + "predefined_event_stat_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "2c108fc9-6f85-41ee-b9b6-498095a99f05", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
num_eventsnum_flagsnum_vesselsflagstimeseries
04528751464[, PAN, BHS, CYP, SEN, SGP, CHN, ITA, RUS, ESP...[{'date': 2018-01-01 00:00:00+00:00, 'value': ...
\n", + "
" + ], + "text/plain": [ + " num_events num_flags num_vessels \\\n", + "0 4528 75 1464 \n", + "\n", + " flags \\\n", + "0 [, PAN, BHS, CYP, SEN, SGP, CHN, ITA, RUS, ESP... \n", + "\n", + " timeseries \n", + "0 [{'date': 2018-01-01 00:00:00+00:00, 'value': ... " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predefined_event_stat_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "0e0ce2c8-61a1-4617-8c3f-e4e69bff5f36", + "metadata": {}, + "source": [ + "## Getting Event Statistics from Custom Region (`get_events_stats`)" + ] + }, + { + "cell_type": "markdown", + "id": "2808bac3-2e68-4576-ade4-83b494bf91eb", + "metadata": {}, + "source": [ + "**Note:** Custom region can either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. Spatial files are loaded using [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) and supported formats depend on a properly configured [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip)." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "74ba1d58-0c9f-47f4-9b7a-880995fb7042", + "metadata": {}, + "outputs": [], + "source": [ + "filename = \"https://raw.githubusercontent.com/GlobalFishingWatch/gfw-api-python-client/refs/heads/develop/tests/fixtures/events/geometry/geometry.shp\"" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "fbb576b7-ec50-4342-80d2-c9697eb30629", + "metadata": {}, + "outputs": [], + "source": [ + "custom_stats_roi_gdf = gpd.read_file(filename)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "43ced81d-bc1b-4979-9a4f-401486562cb1", + "metadata": {}, + "outputs": [], + "source": [ + "custom_event_stats_result = await gfw_client.events.get_events_stats(\n", + " datasets=[\"public-global-port-visits-events:latest\"],\n", + " start_date=\"2018-01-01\",\n", + " end_date=\"2019-01-31\",\n", + " timeseries_interval=\"YEAR\",\n", + " geometry=custom_stats_roi_gdf,\n", + " confidences=[\"3\", \"4\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "865d1410-19b4-4a23-97a6-6d242226650a", + "metadata": {}, + "source": [ + "### Access the statistics as Pydantic models" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "8d1ff424-1128-473c-ae35-ecc78f5466e1", + "metadata": {}, + "outputs": [], + "source": [ + "custom_event_stat = custom_event_stats_result.data()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "d7d2bc8b-9e05-4f91-a913-e8e07de850f9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(301548, 162, 40996)" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " custom_event_stat.num_events,\n", + " custom_event_stat.num_flags,\n", + " custom_event_stat.num_vessels,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "76841b8d-6036-4a21-87db-abb23949bfe9", + "metadata": {}, + "source": [ + "### Access the statistics as a DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "902d2287-ddf4-45fe-8301-857a08d93631", + "metadata": {}, + "outputs": [], + "source": [ + "custom_event_stat_df = custom_event_stats_result.df()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "ed82895b-d18b-42a7-82a4-076bf72a229b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1 entries, 0 to 0\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 num_events 1 non-null int64 \n", + " 1 num_flags 1 non-null int64 \n", + " 2 num_vessels 1 non-null int64 \n", + " 3 flags 1 non-null object\n", + " 4 timeseries 1 non-null object\n", + "dtypes: int64(3), object(2)\n", + "memory usage: 172.0+ bytes\n" + ] + } + ], + "source": [ + "custom_event_stat_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "e5ca97ad-64c9-4cfd-b253-44cde79b6e52", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
num_eventsnum_flagsnum_vesselsflagstimeseries
030154816240996[CHN, , BTN, CCK, VAT, GRL, FRO, BLR, GEO, ALB...[{'date': 2018-01-01 00:00:00+00:00, 'value': ...
\n", + "
" + ], + "text/plain": [ + " num_events num_flags num_vessels \\\n", + "0 301548 162 40996 \n", + "\n", + " flags \\\n", + "0 [CHN, , BTN, CCK, VAT, GRL, FRO, BLR, GEO, ALB... \n", "\n", " timeseries \n", "0 [{'date': 2018-01-01 00:00:00+00:00, 'value': ... " ] }, - "execution_count": 23, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "event_stat_df" + "custom_event_stat_df.head()" ] } ], diff --git a/notebooks/usage-guides/insights-api.ipynb b/notebooks/usage-guides/insights-api.ipynb index be1aeaf..697239b 100644 --- a/notebooks/usage-guides/insights-api.ipynb +++ b/notebooks/usage-guides/insights-api.ipynb @@ -171,7 +171,7 @@ "id": "ec5549cf-873a-4f34-9dd5-fd651ff85ef5", "metadata": {}, "source": [ - "**Important:** `start_date` must be on or after `January 1, 2020`" + "**Important:** `start_date` must be on or after `January 1, 2020`. [Insights](https://globalfishingwatch.org/our-apis/documentation#insights-api) are available from `January 1, 2020` onwards." ] }, { diff --git a/notebooks/usage-guides/references-data-api.ipynb b/notebooks/usage-guides/references-data-api.ipynb index 1f2d52c..b5ae9db 100644 --- a/notebooks/usage-guides/references-data-api.ipynb +++ b/notebooks/usage-guides/references-data-api.ipynb @@ -156,6 +156,14 @@ ")" ] }, + { + "cell_type": "markdown", + "id": "7964f8e5-52b7-4a0f-89b1-9b23a1b6809d", + "metadata": {}, + "source": [ + "**Note:** Use `gfw_client.references` methods to obtain the **Region of Interest (ROI)**, i.e., `region`, which can then be passed directly to the [4Wings API](https://globalfishingwatch.org/our-apis/documentation#map-visualization-4wings-api), [Bulk Download API](https://globalfishingwatch.org/our-apis/documentation#bulk-download-api), [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api), and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods." + ] + }, { "cell_type": "markdown", "id": "40e01c84-9a16-4129-9436-1e8fb332f56a", @@ -227,7 +235,7 @@ { "data": { "text/plain": [ - "(48999, 'public-eez-areas', None)" + "('8489', 'public-eez-areas', 'Antartic 200NM zone beyond the coastline', 'ATA')" ] }, "execution_count": 8, @@ -236,7 +244,7 @@ } ], "source": [ - "eez_region.id, eez_region.dataset, eez_region.iso3" + "eez_region.id, eez_region.dataset, eez_region.label, eez_region.iso3" ] }, { @@ -271,21 +279,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 285 entries, 0 to 284\n", + "\n", + "RangeIndex: 286 entries, 0 to 285\n", "Data columns (total 8 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 285 non-null int64 \n", - " 1 label 285 non-null object\n", - " 2 iso3 234 non-null object\n", - " 3 dataset 285 non-null object\n", - " 4 isoSov1 285 non-null object\n", - " 5 isoSov2 56 non-null object\n", - " 6 isoSov3 6 non-null object\n", - " 7 territory1 285 non-null object\n", - "dtypes: int64(1), object(7)\n", - "memory usage: 17.9+ KB\n" + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 dataset 286 non-null str \n", + " 1 id 286 non-null str \n", + " 2 label 286 non-null str \n", + " 3 iso3 235 non-null str \n", + " 4 iso_sov_1 285 non-null str \n", + " 5 iso_sov_2 56 non-null str \n", + " 6 iso_sov_3 6 non-null str \n", + " 7 territory_1 285 non-null str \n", + "dtypes: str(8)\n", + "memory usage: 18.0 KB\n" ] } ], @@ -359,7 +367,7 @@ " 3\n", " 48950\n", " Overlapping claim Kuril Islands: Japan / Russia\n", - " None\n", + " NaN\n", " public-eez-areas\n", " \n", " \n", @@ -374,12 +382,12 @@ "" ], "text/plain": [ - " id label iso3 \\\n", - "0 8491 Jordanian Exclusive Economic Zone JOR \n", - "1 8467 Uruguayan Exclusive Economic Zone URY \n", - "2 5683 Latvian Exclusive Economic Zone LVA \n", - "3 48950 Overlapping claim Kuril Islands: Japan / Russia None \n", - "4 8327 South Korean Exclusive Economic Zone KOR \n", + " id label iso3 \\\n", + "0 8491 Jordanian Exclusive Economic Zone JOR \n", + "1 8467 Uruguayan Exclusive Economic Zone URY \n", + "2 5683 Latvian Exclusive Economic Zone LVA \n", + "3 48950 Overlapping claim Kuril Islands: Japan / Russia NaN \n", + "4 8327 South Korean Exclusive Economic Zone KOR \n", "\n", " dataset \n", "0 public-eez-areas \n", @@ -398,6 +406,63 @@ "eez_regions_df[[\"id\", \"label\", \"iso3\", \"dataset\"]].head()" ] }, + { + "cell_type": "markdown", + "id": "da77e69b-d15c-4d57-9022-c491d1d17b17", + "metadata": {}, + "source": [ + "### Filter the list of EEZ regions to Obtain the Region of Interest (ROI)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "81d23d30-24cd-4da2-9b53-e08730feb237", + "metadata": {}, + "outputs": [], + "source": [ + "eez_rois_result = await gfw_client.references.get_eez_regions(iso3=\"SEN\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "204b0444-3c94-4173-96ab-08ad631ce43d", + "metadata": {}, + "outputs": [], + "source": [ + "eez_roi = eez_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "da64338f-ec13-4e3c-b552-fd58394b3f9e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('8371', 'public-eez-areas', 'Senegalese Exclusive Economic Zone', 'SEN')" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eez_roi.id, eez_roi.dataset, eez_roi.label, eez_roi.iso3" + ] + }, + { + "cell_type": "markdown", + "id": "24257dc8-7739-40b1-86d2-d38ae5105ef3", + "metadata": {}, + "source": [ + "**Note:** Pass `eez_roi` directly to `region` parameter of the [4Wings API](https://globalfishingwatch.org/our-apis/documentation#map-visualization-4wings-api), [Bulk Download API](https://globalfishingwatch.org/our-apis/documentation#bulk-download-api), [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api), and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods." + ] + }, { "cell_type": "markdown", "id": "ce7ceffb-8ee3-4791-b279-bb6495b32078", @@ -410,7 +475,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "id": "71915ac9-7cb4-4a11-aee6-0df2ee6f9cd5", "metadata": { "id": "71915ac9-7cb4-4a11-aee6-0df2ee6f9cd5" @@ -432,7 +497,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 16, "id": "eee11224-fde4-4273-bfa7-da0b2c4bb9a2", "metadata": { "id": "eee11224-fde4-4273-bfa7-da0b2c4bb9a2" @@ -444,7 +509,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "id": "845f42d9-1abe-404c-ae2c-8fc07579a612", "metadata": { "id": "845f42d9-1abe-404c-ae2c-8fc07579a612" @@ -456,7 +521,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 18, "id": "c8254eff-97da-4b10-b48d-bb8eea5845d8", "metadata": { "colab": { @@ -469,16 +534,18 @@ { "data": { "text/plain": [ - "('555799979', 'public-mpa-all')" + "('555882474',\n", + " 'public-mpa-all',\n", + " 'Humedal Tubul Raqui - Santuario de la Naturaleza')" ] }, - "execution_count": 15, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "mpa_region.id, mpa_region.dataset" + "mpa_region.id, mpa_region.dataset, mpa_region.label" ] }, { @@ -493,7 +560,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 19, "id": "cb8f7439-5cf8-4ab9-88e1-2a84795adbb2", "metadata": { "id": "cb8f7439-5cf8-4ab9-88e1-2a84795adbb2" @@ -505,7 +572,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 20, "id": "0e111e02-aed8-49fc-bbf4-5fd293bf58d1", "metadata": {}, "outputs": [ @@ -513,17 +580,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 16591 entries, 0 to 16590\n", - "Data columns (total 4 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 16591 non-null object\n", - " 1 label 16591 non-null object\n", - " 2 name 0 non-null object\n", - " 3 dataset 16591 non-null object\n", - "dtypes: object(4)\n", - "memory usage: 518.6+ KB\n" + "\n", + "RangeIndex: 17172 entries, 0 to 17171\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 dataset 17172 non-null str \n", + " 1 id 17172 non-null str \n", + " 2 label 17172 non-null str \n", + "dtypes: str(3)\n", + "memory usage: 402.6 KB\n" ] } ], @@ -533,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 21, "id": "0dc3268f-e099-4fe2-821b-f17d579f9d68", "metadata": { "colab": { @@ -565,62 +631,56 @@ " \n", " \n", " \n", + " dataset\n", " id\n", " label\n", - " name\n", - " dataset\n", " \n", " \n", " \n", " \n", " 0\n", + " public-mpa-all\n", " 1\n", " Diamond Reef - Marine Reserve\n", - " None\n", - " public-mpa-all\n", " \n", " \n", " 1\n", + " public-mpa-all\n", " 2\n", " Palaster Reef - Marine Reserve\n", - " None\n", - " public-mpa-all\n", " \n", " \n", " 2\n", + " public-mpa-all\n", " 27\n", " Folkstone - Marine Reserve\n", - " None\n", - " public-mpa-all\n", " \n", " \n", " 3\n", + " public-mpa-all\n", " 46\n", " Reserva Biológica Atol Das Rocas - Reserva Bio...\n", - " None\n", - " public-mpa-all\n", " \n", " \n", " 4\n", + " public-mpa-all\n", " 57\n", " Parque Nacional Do Cabo Orange - Parque\n", - " None\n", - " public-mpa-all\n", " \n", " \n", "\n", "" ], "text/plain": [ - " id label name dataset\n", - "0 1 Diamond Reef - Marine Reserve None public-mpa-all\n", - "1 2 Palaster Reef - Marine Reserve None public-mpa-all\n", - "2 27 Folkstone - Marine Reserve None public-mpa-all\n", - "3 46 Reserva Biológica Atol Das Rocas - Reserva Bio... None public-mpa-all\n", - "4 57 Parque Nacional Do Cabo Orange - Parque None public-mpa-all" + " dataset id label\n", + "0 public-mpa-all 1 Diamond Reef - Marine Reserve\n", + "1 public-mpa-all 2 Palaster Reef - Marine Reserve\n", + "2 public-mpa-all 27 Folkstone - Marine Reserve\n", + "3 public-mpa-all 46 Reserva Biológica Atol Das Rocas - Reserva Bio...\n", + "4 public-mpa-all 57 Parque Nacional Do Cabo Orange - Parque" ] }, - "execution_count": 18, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -629,6 +689,63 @@ "mpa_regions_df.head()" ] }, + { + "cell_type": "markdown", + "id": "51fd6af9-d2f9-4fe6-9cbc-a63a3b91d236", + "metadata": {}, + "source": [ + "### Filter the list of MPA regions to Obtain the Region of Interest (ROI)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "8e131dd7-d44d-4ee2-8d35-96566386e2d1", + "metadata": {}, + "outputs": [], + "source": [ + "mpa_rois_result = await gfw_client.references.get_mpa_regions(id=\"555745302\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "4028e312-fb89-46f2-a689-f51596da030c", + "metadata": {}, + "outputs": [], + "source": [ + "mpa_roi = mpa_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "9494b8e3-7f93-4188-a04b-9007ae708cd2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('555745302', 'public-mpa-all', 'Dorsal de Nasca - Reserva Nacional')" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mpa_roi.id, mpa_roi.dataset, mpa_roi.label" + ] + }, + { + "cell_type": "markdown", + "id": "33acdade-2bec-40c1-9022-ee22d4bc6ad1", + "metadata": {}, + "source": [ + "**Note:** Pass `mpa_roi` directly to `region` parameter of the [4Wings API](https://globalfishingwatch.org/our-apis/documentation#map-visualization-4wings-api), [Bulk Download API](https://globalfishingwatch.org/our-apis/documentation#bulk-download-api), [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api), and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods." + ] + }, { "cell_type": "markdown", "id": "a4359a04-5e07-4bac-bf46-145461bc9a5c", @@ -641,7 +758,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 25, "id": "ac6c6784-e839-44ed-8b47-50c9d7f0a68a", "metadata": { "id": "ac6c6784-e839-44ed-8b47-50c9d7f0a68a" @@ -663,7 +780,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 26, "id": "377f6c90-275f-4dbb-81d2-d1dd43e65fdd", "metadata": { "id": "377f6c90-275f-4dbb-81d2-d1dd43e65fdd" @@ -675,7 +792,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 27, "id": "69605d0f-c97f-4e49-a4f5-d55121e9533a", "metadata": { "id": "69605d0f-c97f-4e49-a4f5-d55121e9533a" @@ -687,7 +804,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 28, "id": "281fe6ca-f993-4158-a225-930173c52550", "metadata": { "colab": { @@ -700,16 +817,16 @@ { "data": { "text/plain": [ - "('BOBP-IGO', 'public-rfmo')" + "('BOBP-IGO', 'public-rfmo', 'BOBP-IGO')" ] }, - "execution_count": 22, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "rfmo_region.id, rfmo_region.dataset" + "rfmo_region.id, rfmo_region.dataset, rfmo_region.label" ] }, { @@ -724,7 +841,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 29, "id": "cf0c95fd-911d-4c0a-a663-5430a9c3d073", "metadata": { "id": "cf0c95fd-911d-4c0a-a663-5430a9c3d073" @@ -736,7 +853,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 30, "id": "abcf14cc-8ddb-4b01-8a58-72885e55d435", "metadata": {}, "outputs": [ @@ -744,18 +861,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 42 entries, 0 to 41\n", - "Data columns (total 5 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 42 non-null object\n", - " 1 label 42 non-null object\n", - " 2 rfb 0 non-null object\n", - " 3 dataset 42 non-null object\n", - " 4 ID 42 non-null object\n", - "dtypes: object(5)\n", - "memory usage: 1.8+ KB\n" + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 dataset 42 non-null str \n", + " 1 id 42 non-null str \n", + " 2 label 42 non-null str \n", + " 3 id_ 42 non-null str \n", + "dtypes: str(4)\n", + "memory usage: 1.4 KB\n" ] } ], @@ -765,7 +881,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 31, "id": "59d334e4-6b8a-469c-b401-72613b42bd68", "metadata": { "colab": { @@ -797,52 +913,46 @@ " \n", " \n", " \n", + " dataset\n", " id\n", " label\n", - " rfb\n", - " dataset\n", - " ID\n", + " id_\n", " \n", " \n", " \n", " \n", " 0\n", + " public-rfmo\n", " LTA\n", " LTA\n", - " None\n", - " public-rfmo\n", " LTA\n", " \n", " \n", " 1\n", + " public-rfmo\n", " SEAFDEC\n", " SEAFDEC\n", - " None\n", - " public-rfmo\n", " SEAFDEC\n", " \n", " \n", " 2\n", + " public-rfmo\n", " IATTC\n", " IATTC\n", - " None\n", - " public-rfmo\n", " IATTC\n", " \n", " \n", " 3\n", + " public-rfmo\n", " APFIC\n", " APFIC\n", - " None\n", - " public-rfmo\n", " APFIC\n", " \n", " \n", " 4\n", + " public-rfmo\n", " IPHC\n", " IPHC\n", - " None\n", - " public-rfmo\n", " IPHC\n", " \n", " \n", @@ -850,15 +960,15 @@ "" ], "text/plain": [ - " id label rfb dataset ID\n", - "0 LTA LTA None public-rfmo LTA\n", - "1 SEAFDEC SEAFDEC None public-rfmo SEAFDEC\n", - "2 IATTC IATTC None public-rfmo IATTC\n", - "3 APFIC APFIC None public-rfmo APFIC\n", - "4 IPHC IPHC None public-rfmo IPHC" + " dataset id label id_\n", + "0 public-rfmo LTA LTA LTA\n", + "1 public-rfmo SEAFDEC SEAFDEC SEAFDEC\n", + "2 public-rfmo IATTC IATTC IATTC\n", + "3 public-rfmo APFIC APFIC APFIC\n", + "4 public-rfmo IPHC IPHC IPHC" ] }, - "execution_count": 25, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -866,6 +976,63 @@ "source": [ "rfmo_regions_df.head()" ] + }, + { + "cell_type": "markdown", + "id": "998837d9-944d-4ee3-ad79-bd4f7e8a1ec6", + "metadata": {}, + "source": [ + "### Filter the list of RFMO regions to Obtain the Region of Interest (ROI)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "ace1e3ed-23a0-4601-aae6-6ea240e08409", + "metadata": {}, + "outputs": [], + "source": [ + "rfmo_rois_result = await gfw_client.references.get_rfmo_regions(id=\"WCPFC\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "064937cb-66f3-4d69-9480-d59307c8c324", + "metadata": {}, + "outputs": [], + "source": [ + "rfmo_roi = rfmo_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "0c9a12a1-f861-4676-b156-080bc64c331d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('WCPFC', 'public-rfmo', 'WCPFC')" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfmo_roi.id, rfmo_roi.dataset, rfmo_roi.label" + ] + }, + { + "cell_type": "markdown", + "id": "bd7775a9-0731-4515-afa7-d4e6316b5854", + "metadata": {}, + "source": [ + "**Note:** Pass `rfmo_roi` directly to `region` parameter of the [4Wings API](https://globalfishingwatch.org/our-apis/documentation#map-visualization-4wings-api), [Bulk Download API](https://globalfishingwatch.org/our-apis/documentation#bulk-download-api), [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api), and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods." + ] } ], "metadata": { diff --git a/notebooks/usage-guides/vessels-api.ipynb b/notebooks/usage-guides/vessels-api.ipynb index 1f6bcc7..aa4b506 100644 --- a/notebooks/usage-guides/vessels-api.ipynb +++ b/notebooks/usage-guides/vessels-api.ipynb @@ -156,6 +156,14 @@ ")" ] }, + { + "cell_type": "markdown", + "id": "fb59a0f1-9108-43ee-9578-c41b33468ee1", + "metadata": {}, + "source": [ + "**Note:** It is recommended to prioritize vessels that include both `registry_info` and `self_reported_info` (AIS), as this indicates a successful match between registry data and AIS information. See how the [Vessels API](https://globalfishingwatch.org/our-apis/documentation#vessels-api) is used in the [Vessel Viewer](https://globalfishingwatch.org/map/) [here](https://globalfishingwatch.org/our-apis/assets/2024_Vessel_Viewer_and_APIs_behind_It.pdf). Use the `vessel_ids` property of the `result` object returned by `gfw_client.vessels` methods as a shortcut to obtain the `matched vessel IDs`, which can then be passed directly to the [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api) and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods." + ] + }, { "cell_type": "markdown", "id": "db719492-c622-4250-aee1-85e1276ee3bc", @@ -176,11 +184,50 @@ "outputs": [], "source": [ "vessel_search_result = await gfw_client.vessels.search_vessels(\n", - " where=\"ssvid='775998121' AND shipname='DON TITO'\",\n", + " where=\"ssvid='412331038' AND imo='8775637'\",\n", " includes=[\"MATCH_CRITERIA\", \"OWNERSHIP\"],\n", ")" ] }, + { + "cell_type": "markdown", + "id": "de135cee-48d9-4c54-9f9f-ee2e38877218", + "metadata": {}, + "source": [ + "### Get List of Matched Vessel IDs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "be45750b-8165-4c74-a453-f77b5e1cc8b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['da2b09b31-127e-27e0-fe5f-d6d87e96de6a',\n", + " '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b',\n", + " '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vessel_search_result.vessel_ids" + ] + }, + { + "cell_type": "markdown", + "id": "f36dda6e-8caa-4b26-85d9-0a0090395672", + "metadata": {}, + "source": [ + "**Note:** Use the `vessel_search_result.vessel_ids` as a shortcut to obtain the `matched vessel IDs`, which can then be passed directly to the [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api) and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods." + ] + }, { "cell_type": "markdown", "id": "d3278b94-d057-474d-bf9d-de3905ba90a0", @@ -193,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "6a23b5a1-747d-489a-b861-a2dd8001a60c", "metadata": { "id": "6a23b5a1-747d-489a-b861-a2dd8001a60c" @@ -205,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "2773afc5-dddc-436b-8e3b-483b88a1f7f5", "metadata": { "id": "2773afc5-dddc-436b-8e3b-483b88a1f7f5" @@ -217,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "fa2c1585-582b-478c-8e10-a249a09c4884", "metadata": { "colab": { @@ -230,10 +277,10 @@ { "data": { "text/plain": [ - "('public-global-vessel-identity:v3.0', 'c54923e64-46f3-9338-9dcb-ff09724077a3')" + "('public-global-vessel-identity:v4.0', 'da2b09b31-127e-27e0-fe5f-d6d87e96de6a')" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -254,7 +301,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "5a005a8d-7c50-42c6-8c5d-de31f06c270f", "metadata": { "id": "5a005a8d-7c50-42c6-8c5d-de31f06c270f" @@ -266,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "e7641cd3-b14d-4f6d-8702-26be4400f2e7", "metadata": {}, "outputs": [ @@ -274,21 +321,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 2 entries, 0 to 1\n", + "\n", + "RangeIndex: 1 entries, 0 to 0\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 dataset 2 non-null object\n", - " 1 registry_info_total_records 2 non-null int64 \n", - " 2 registry_info 2 non-null object\n", - " 3 registry_owners 2 non-null object\n", + " 0 dataset 1 non-null str \n", + " 1 registry_info_total_records 1 non-null int64 \n", + " 2 registry_info 1 non-null object\n", + " 3 registry_owners 1 non-null object\n", " 4 registry_public_authorizations 0 non-null object\n", - " 5 combined_sources_info 2 non-null object\n", - " 6 self_reported_info 2 non-null object\n", - " 7 matchCriteria 2 non-null object\n", - "dtypes: int64(1), object(7)\n", - "memory usage: 260.0+ bytes\n" + " 5 combined_sources_info 1 non-null object\n", + " 6 self_reported_info 1 non-null object\n", + " 7 matchCriteria 1 non-null object\n", + "dtypes: int64(1), object(6), str(1)\n", + "memory usage: 196.0+ bytes\n" ] } ], @@ -298,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "9f7237f0-1de2-4a21-834b-b692cf4452b5", "metadata": { "colab": { @@ -343,25 +390,14 @@ " \n", " \n", " 0\n", - " public-global-vessel-identity:v3.0\n", - " 0\n", - " []\n", - " []\n", - " None\n", - " [{'vessel_id': 'bae8f325c-cf0a-01fe-6d1a-9a275...\n", - " [{'id': 'bae8f325c-cf0a-01fe-6d1a-9a275588d4ff...\n", - " [{'reference': 'bae8f325c-cf0a-01fe-6d1a-9a275...\n", - " \n", - " \n", - " 1\n", - " public-global-vessel-identity:v3.0\n", - " 0\n", - " []\n", - " []\n", + " public-global-vessel-identity:v4.0\n", + " 1\n", + " [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's...\n", + " [{'name': 'ZHOUSHAN SHUNHANG OCEAN FISHERIES',...\n", " None\n", - " [{'vessel_id': 'c54923e64-46f3-9338-9dcb-ff097...\n", - " [{'id': 'c54923e64-46f3-9338-9dcb-ff09724077a3...\n", - " [{'reference': 'c54923e64-46f3-9338-9dcb-ff097...\n", + " [{'vessel_id': 'da2b09b31-127e-27e0-fe5f-d6d87...\n", + " [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b...\n", + " [{'reference': '755a48dd4-4bee-4bcf-7b5f-9baea...\n", " \n", " \n", "\n", @@ -369,27 +405,28 @@ ], "text/plain": [ " dataset registry_info_total_records \\\n", - "0 public-global-vessel-identity:v3.0 0 \n", - "1 public-global-vessel-identity:v3.0 0 \n", + "0 public-global-vessel-identity:v4.0 1 \n", + "\n", + " registry_info \\\n", + "0 [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's... \n", + "\n", + " registry_owners \\\n", + "0 [{'name': 'ZHOUSHAN SHUNHANG OCEAN FISHERIES',... \n", "\n", - " registry_info registry_owners registry_public_authorizations \\\n", - "0 [] [] None \n", - "1 [] [] None \n", + " registry_public_authorizations \\\n", + "0 None \n", "\n", " combined_sources_info \\\n", - "0 [{'vessel_id': 'bae8f325c-cf0a-01fe-6d1a-9a275... \n", - "1 [{'vessel_id': 'c54923e64-46f3-9338-9dcb-ff097... \n", + "0 [{'vessel_id': 'da2b09b31-127e-27e0-fe5f-d6d87... \n", "\n", " self_reported_info \\\n", - "0 [{'id': 'bae8f325c-cf0a-01fe-6d1a-9a275588d4ff... \n", - "1 [{'id': 'c54923e64-46f3-9338-9dcb-ff09724077a3... \n", + "0 [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b... \n", "\n", " matchCriteria \n", - "0 [{'reference': 'bae8f325c-cf0a-01fe-6d1a-9a275... \n", - "1 [{'reference': 'c54923e64-46f3-9338-9dcb-ff097... " + "0 [{'reference': '755a48dd4-4bee-4bcf-7b5f-9baea... " ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -410,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "03dbbff9-1768-4e0a-bcea-130ed46f96a0", "metadata": { "id": "03dbbff9-1768-4e0a-bcea-130ed46f96a0" @@ -419,13 +456,52 @@ "source": [ "vessels_result = await gfw_client.vessels.get_vessels_by_ids(\n", " ids=[\n", - " \"8c7304226-6c71-edbe-0b63-c246734b3c01\",\n", - " \"6583c51e3-3626-5638-866a-f47c3bc7ef7c\",\n", - " \"71e7da672-2451-17da-b239-857831602eca\",\n", - " ],\n", + " \"da2b09b31-127e-27e0-fe5f-d6d87e96de6a\",\n", + " \"755a48dd4-4bee-4bcf-7b5f-9baea058fc7b\",\n", + " \"3dad49b0b-b2e0-9347-0c4c-e39fea560f9f\",\n", + " ]\n", ")" ] }, + { + "cell_type": "markdown", + "id": "e34cf91c-33f8-4fa1-8150-4c571260e65d", + "metadata": {}, + "source": [ + "### Get List of Matched Vessel IDs" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "abce9d2c-f2b9-494c-afff-501edf4b3d04", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['da2b09b31-127e-27e0-fe5f-d6d87e96de6a',\n", + " '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b',\n", + " '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f']" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vessels_result.vessel_ids" + ] + }, + { + "cell_type": "markdown", + "id": "c9429fec-dc4b-4789-8f25-d67b91e198b6", + "metadata": {}, + "source": [ + "**Note:** Use the `vessels_result.vessel_ids` as a shortcut to obtain the `matched vessel IDs`, which can then be passed directly to the [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api) and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods." + ] + }, { "cell_type": "markdown", "id": "7427c7be-7dea-4097-a542-2db0e37a709a", @@ -438,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "id": "a549433e-53ac-4b7c-969a-d6f20af18668", "metadata": { "id": "a549433e-53ac-4b7c-969a-d6f20af18668" @@ -450,7 +526,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "id": "3778b513-3cfe-49b5-9107-731d2f10d82e", "metadata": { "id": "3778b513-3cfe-49b5-9107-731d2f10d82e" @@ -462,7 +538,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "id": "8c107871-c3eb-4544-bb6c-87188fa68675", "metadata": { "colab": { @@ -475,10 +551,10 @@ { "data": { "text/plain": [ - "('public-global-vessel-identity:v3.0', 'aca119c29-95dd-f5c4-2057-ee45268dcd6f')" + "('public-global-vessel-identity:v4.0', 'da2b09b31-127e-27e0-fe5f-d6d87e96de6a')" ] }, - "execution_count": 15, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -499,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "d792efd2-c741-41e1-91df-f3822cd7ce46", "metadata": { "id": "d792efd2-c741-41e1-91df-f3822cd7ce46" @@ -511,7 +587,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "id": "608434ad-151f-4fec-b62b-0c1800427362", "metadata": {}, "outputs": [ @@ -519,20 +595,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 3 entries, 0 to 2\n", + "\n", + "RangeIndex: 1 entries, 0 to 0\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 dataset 3 non-null object\n", - " 1 registry_info_total_records 3 non-null int64 \n", - " 2 registry_info 3 non-null object\n", - " 3 registry_owners 3 non-null object\n", - " 4 registry_public_authorizations 3 non-null object\n", - " 5 combined_sources_info 3 non-null object\n", - " 6 self_reported_info 3 non-null object\n", - "dtypes: int64(1), object(6)\n", - "memory usage: 300.0+ bytes\n" + " 0 dataset 1 non-null str \n", + " 1 registry_info_total_records 1 non-null int64 \n", + " 2 registry_info 1 non-null object\n", + " 3 registry_owners 1 non-null object\n", + " 4 registry_public_authorizations 1 non-null object\n", + " 5 combined_sources_info 1 non-null object\n", + " 6 self_reported_info 1 non-null object\n", + "dtypes: int64(1), object(5), str(1)\n", + "memory usage: 188.0+ bytes\n" ] } ], @@ -542,7 +618,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "id": "3da7dec8-4f2b-4db9-aed0-38163d591af0", "metadata": { "colab": { @@ -586,33 +662,13 @@ " \n", " \n", " 0\n", - " public-global-vessel-identity:v3.0\n", - " 5\n", - " [{'id': 'a8d00ce54b37add7f85a35fcce8e7a1b', 's...\n", - " [{'name': 'COLINER', 'flag': 'RUS', 'ssvid': '...\n", - " [{'date_from': 2023-01-01 00:00:00+00:00, 'dat...\n", - " [{'vessel_id': 'da1cd7e1b-b8d0-539c-6581-2b3df...\n", - " [{'id': 'da1cd7e1b-b8d0-539c-6581-2b3df8d0a6af...\n", - " \n", - " \n", - " 1\n", - " public-global-vessel-identity:v3.0\n", - " 1\n", - " [{'id': '685862e0626f6234c844919bc738a83a', 's...\n", - " [{'name': 'TRANS PACIFIC JOURNEY FISHING', 'fl...\n", - " [{'date_from': 2012-01-01 00:00:00+00:00, 'dat...\n", - " [{'vessel_id': '55889aefb-bef9-224c-d2db-58ecd...\n", - " [{'id': '71e7da672-2451-17da-b239-857831602eca...\n", - " \n", - " \n", - " 2\n", - " public-global-vessel-identity:v3.0\n", - " 2\n", - " [{'id': 'b82d02e5c2c11e5fe5367c91194fc3ba', 's...\n", - " [{'name': 'DONGWON INDUSTRIES', 'flag': 'KOR, ...\n", - " [{'date_from': 2015-10-08 00:00:00+00:00, 'dat...\n", - " [{'vessel_id': 'aca119c29-95dd-f5c4-2057-ee452...\n", - " [{'id': '6583c51e3-3626-5638-866a-f47c3bc7ef7c...\n", + " public-global-vessel-identity:v4.0\n", + " 3\n", + " [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's...\n", + " [{'name': 'ZHOUSHAN SHUNHANG OCEAN FISHERIES',...\n", + " [{'date_from': 2017-01-04 00:00:00+00:00, 'dat...\n", + " [{'vessel_id': 'da2b09b31-127e-27e0-fe5f-d6d87...\n", + " [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b...\n", " \n", " \n", "\n", @@ -620,37 +676,25 @@ ], "text/plain": [ " dataset registry_info_total_records \\\n", - "0 public-global-vessel-identity:v3.0 5 \n", - "1 public-global-vessel-identity:v3.0 1 \n", - "2 public-global-vessel-identity:v3.0 2 \n", + "0 public-global-vessel-identity:v4.0 3 \n", "\n", " registry_info \\\n", - "0 [{'id': 'a8d00ce54b37add7f85a35fcce8e7a1b', 's... \n", - "1 [{'id': '685862e0626f6234c844919bc738a83a', 's... \n", - "2 [{'id': 'b82d02e5c2c11e5fe5367c91194fc3ba', 's... \n", + "0 [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's... \n", "\n", " registry_owners \\\n", - "0 [{'name': 'COLINER', 'flag': 'RUS', 'ssvid': '... \n", - "1 [{'name': 'TRANS PACIFIC JOURNEY FISHING', 'fl... \n", - "2 [{'name': 'DONGWON INDUSTRIES', 'flag': 'KOR, ... \n", + "0 [{'name': 'ZHOUSHAN SHUNHANG OCEAN FISHERIES',... \n", "\n", " registry_public_authorizations \\\n", - "0 [{'date_from': 2023-01-01 00:00:00+00:00, 'dat... \n", - "1 [{'date_from': 2012-01-01 00:00:00+00:00, 'dat... \n", - "2 [{'date_from': 2015-10-08 00:00:00+00:00, 'dat... \n", + "0 [{'date_from': 2017-01-04 00:00:00+00:00, 'dat... \n", "\n", " combined_sources_info \\\n", - "0 [{'vessel_id': 'da1cd7e1b-b8d0-539c-6581-2b3df... \n", - "1 [{'vessel_id': '55889aefb-bef9-224c-d2db-58ecd... \n", - "2 [{'vessel_id': 'aca119c29-95dd-f5c4-2057-ee452... \n", + "0 [{'vessel_id': 'da2b09b31-127e-27e0-fe5f-d6d87... \n", "\n", " self_reported_info \n", - "0 [{'id': 'da1cd7e1b-b8d0-539c-6581-2b3df8d0a6af... \n", - "1 [{'id': '71e7da672-2451-17da-b239-857831602eca... \n", - "2 [{'id': '6583c51e3-3626-5638-866a-f47c3bc7ef7c... " + "0 [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b... " ] }, - "execution_count": 18, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -671,7 +715,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "id": "5b12f20c-9c0e-40f6-ac13-31ad7e0144ab", "metadata": { "id": "5b12f20c-9c0e-40f6-ac13-31ad7e0144ab" @@ -679,10 +723,49 @@ "outputs": [], "source": [ "vessel_result = await gfw_client.vessels.get_vessel_by_id(\n", - " id=\"c54923e64-46f3-9338-9dcb-ff09724077a3\",\n", + " id=\"da2b09b31-127e-27e0-fe5f-d6d87e96de6a\",\n", ")" ] }, + { + "cell_type": "markdown", + "id": "f9daf108-2074-4f01-983f-a9a9ee2da9c8", + "metadata": {}, + "source": [ + "### Get List of Matched Vessel IDs" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "464e587d-2faf-4b6b-9c9e-c560c52da1fc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['da2b09b31-127e-27e0-fe5f-d6d87e96de6a',\n", + " '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b',\n", + " '3dad49b0b-b2e0-9347-0c4c-e39fea560f9f']" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vessel_result.vessel_ids" + ] + }, + { + "cell_type": "markdown", + "id": "73ee2939-c049-4bb0-a1fa-ad1624899c27", + "metadata": {}, + "source": [ + "**Note:** Use the `vessel_result.vessel_ids` as a shortcut to obtain the `matched vessel IDs`, which can then be passed directly to the [Insights API](https://globalfishingwatch.org/our-apis/documentation#insights-api) and [Events API](https://globalfishingwatch.org/our-apis/documentation#events-api) methods." + ] + }, { "cell_type": "markdown", "id": "988bb525-09c9-4f69-876d-8cf08b9f26e8", @@ -695,7 +778,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 23, "id": "ba62e001-a8e9-4dac-9674-461f9cff191f", "metadata": { "id": "ba62e001-a8e9-4dac-9674-461f9cff191f" @@ -707,7 +790,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 24, "id": "4bf8c038-2aab-49d9-acfc-86949d265dfb", "metadata": { "colab": { @@ -720,10 +803,10 @@ { "data": { "text/plain": [ - "('public-global-vessel-identity:v3.0', 'c54923e64-46f3-9338-9dcb-ff09724077a3')" + "('public-global-vessel-identity:v4.0', 'da2b09b31-127e-27e0-fe5f-d6d87e96de6a')" ] }, - "execution_count": 21, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -744,7 +827,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 25, "id": "6f146ff6-f669-466a-b3fc-810a30f14a4d", "metadata": { "id": "6f146ff6-f669-466a-b3fc-810a30f14a4d" @@ -756,7 +839,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 26, "id": "591d90b1-58ca-4da6-8455-c506953824bf", "metadata": {}, "outputs": [ @@ -764,19 +847,19 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "RangeIndex: 1 entries, 0 to 0\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 dataset 1 non-null object\n", + " 0 dataset 1 non-null str \n", " 1 registry_info_total_records 1 non-null int64 \n", " 2 registry_info 1 non-null object\n", " 3 registry_owners 1 non-null object\n", " 4 registry_public_authorizations 1 non-null object\n", " 5 combined_sources_info 1 non-null object\n", " 6 self_reported_info 1 non-null object\n", - "dtypes: int64(1), object(6)\n", + "dtypes: int64(1), object(5), str(1)\n", "memory usage: 188.0+ bytes\n" ] } @@ -787,7 +870,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 27, "id": "f2b2b97d-bc5b-4dba-a17a-40eb9ada75ab", "metadata": { "colab": { @@ -831,13 +914,13 @@ " \n", " \n", " 0\n", - " public-global-vessel-identity:v3.0\n", - " 0\n", - " []\n", - " []\n", - " []\n", - " [{'vessel_id': 'c54923e64-46f3-9338-9dcb-ff097...\n", - " [{'id': 'c54923e64-46f3-9338-9dcb-ff09724077a3...\n", + " public-global-vessel-identity:v4.0\n", + " 3\n", + " [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's...\n", + " [{'name': 'ZHOUSHAN SHUNHANG OCEAN FISHERIES',...\n", + " [{'date_from': 2017-01-04 00:00:00+00:00, 'dat...\n", + " [{'vessel_id': 'da2b09b31-127e-27e0-fe5f-d6d87...\n", + " [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b...\n", " \n", " \n", "\n", @@ -845,19 +928,25 @@ ], "text/plain": [ " dataset registry_info_total_records \\\n", - "0 public-global-vessel-identity:v3.0 0 \n", + "0 public-global-vessel-identity:v4.0 3 \n", + "\n", + " registry_info \\\n", + "0 [{'id': '4ef90bea19300c6a23f6ce627a80238b', 's... \n", "\n", - " registry_info registry_owners registry_public_authorizations \\\n", - "0 [] [] [] \n", + " registry_owners \\\n", + "0 [{'name': 'ZHOUSHAN SHUNHANG OCEAN FISHERIES',... \n", + "\n", + " registry_public_authorizations \\\n", + "0 [{'date_from': 2017-01-04 00:00:00+00:00, 'dat... \n", "\n", " combined_sources_info \\\n", - "0 [{'vessel_id': 'c54923e64-46f3-9338-9dcb-ff097... \n", + "0 [{'vessel_id': 'da2b09b31-127e-27e0-fe5f-d6d87... \n", "\n", " self_reported_info \n", - "0 [{'id': 'c54923e64-46f3-9338-9dcb-ff09724077a3... " + "0 [{'id': '755a48dd4-4bee-4bcf-7b5f-9baea058fc7b... " ] }, - "execution_count": 24, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } diff --git a/notebooks/workflow-guides/workflow-01-analyze-apparent-fishing-effort-senegalese-eez.ipynb b/notebooks/workflow-guides/workflow-01-analyze-apparent-fishing-effort-senegalese-eez.ipynb index 765071b..b7b37a8 100644 --- a/notebooks/workflow-guides/workflow-01-analyze-apparent-fishing-effort-senegalese-eez.ipynb +++ b/notebooks/workflow-guides/workflow-01-analyze-apparent-fishing-effort-senegalese-eez.ipynb @@ -97,6 +97,7 @@ "metadata": {}, "outputs": [], "source": [ + "import datetime\n", "import os\n", "\n", "import pandas as pd\n", @@ -226,6 +227,46 @@ "For **[Senegalese EEZ, the region ID is 8371](https://www.marineregions.org/gazetteer.php?p=details&id=8371)** (public-eez-areas dataset)." ] }, + { + "cell_type": "markdown", + "id": "4a8ba667-84a6-4d42-9c3e-33bab3218e19", + "metadata": {}, + "source": [ + "**Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9a2a1a57-e675-4648-b510-dd30590179e2", + "metadata": {}, + "outputs": [], + "source": [ + "eez_rois_result = await gfw_client.references.get_eez_regions(iso3=\"SEN\")\n", + "sen_eez_roi = eez_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b12bf0ce-f48d-496b-bada-d17d79c61a61", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('8371', 'public-eez-areas', 'Senegalese Exclusive Economic Zone', 'SEN')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sen_eez_roi.id, sen_eez_roi.dataset, sen_eez_roi.label, sen_eez_roi.iso3" + ] + }, { "cell_type": "markdown", "id": "6fe5edf3-8e96-4147-8c59-a6bc8982662d", @@ -274,12 +315,53 @@ "\n", "- Gear types, such as **trawlers**, are inferred based on **Global Fishing Watch’s vessel classification system**, which relies on **AIS data and vessel public registries**. The **gear type associated with each vessel is not always 100% accurate**, as it may be derived from historical sources or inferred from movement patterns. See more details on [supported gear types here](https://globalfishingwatch.org/our-apis/documentation#gear-types-supported).\n", "- Also, please see data caveats regarding [vessel types and their classification here](https://globalfishingwatch.org/our-apis/documentation#vessel-types).\n", - "- See more details on retrieving [Region IDs here](https://globalfishingwatch.org/our-apis/documentation#regions)." + "- See [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) for more details on retrieving [Region IDs](https://globalfishingwatch.org/our-apis/documentation#regions)." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, + "id": "702b9209-90ac-494f-93c2-ffc6325ae746", + "metadata": {}, + "outputs": [], + "source": [ + "end_date = datetime.date.today()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e4f9262c-fdeb-4354-8151-8e4a17883f74", + "metadata": {}, + "outputs": [], + "source": [ + "start_date = end_date - datetime.timedelta(weeks=12)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6da47bd7-9cb6-4c09-95f3-b7d70816a749", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(datetime.date(2026, 4, 2), datetime.date(2026, 6, 25))" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "start_date, end_date" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "id": "db35d1b2-ba32-4a58-97f3-17059b3de164", "metadata": {}, "outputs": [], @@ -289,19 +371,16 @@ " group_by=\"VESSEL_ID\",\n", " temporal_resolution=\"MONTHLY\",\n", " filters=[\"geartype in ('trawlers')\"],\n", - " start_date=\"2024-11-01\",\n", - " end_date=\"2025-01-31\",\n", + " start_date=start_date,\n", + " end_date=end_date,\n", " spatial_aggregation=True,\n", - " region={\n", - " \"dataset\": \"public-eez-areas\",\n", - " \"id\": \"8371\",\n", - " },\n", + " region=sen_eez_roi,\n", ")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "id": "301f888e-3b7e-4946-a7aa-496e05b53cbd", "metadata": {}, "outputs": [], @@ -311,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "id": "866f84fb-50c4-47e2-95fa-43fbc06d17cd", "metadata": {}, "outputs": [ @@ -319,33 +398,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 170 entries, 0 to 169\n", + "\n", + "RangeIndex: 211 entries, 0 to 210\n", "Data columns (total 20 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 date 170 non-null object \n", + " 0 date 211 non-null str \n", " 1 detections 0 non-null object \n", - " 2 flag 170 non-null object \n", - " 3 gear_type 170 non-null object \n", - " 4 hours 170 non-null float64 \n", + " 2 flag 211 non-null str \n", + " 3 gear_type 211 non-null str \n", + " 4 hours 211 non-null float64 \n", " 5 vessel_ids 0 non-null object \n", - " 6 vessel_id 170 non-null object \n", - " 7 vessel_type 170 non-null object \n", - " 8 entry_timestamp 170 non-null datetime64[ns, UTC]\n", - " 9 exit_timestamp 170 non-null datetime64[ns, UTC]\n", - " 10 first_transmission_date 170 non-null datetime64[ns, UTC]\n", - " 11 last_transmission_date 170 non-null datetime64[ns, UTC]\n", - " 12 imo 170 non-null object \n", - " 13 mmsi 170 non-null object \n", - " 14 call_sign 170 non-null object \n", - " 15 dataset 170 non-null object \n", - " 16 report_dataset 170 non-null object \n", - " 17 ship_name 170 non-null object \n", + " 6 vessel_id 211 non-null str \n", + " 7 vessel_type 211 non-null str \n", + " 8 entry_timestamp 211 non-null datetime64[us, UTC]\n", + " 9 exit_timestamp 211 non-null datetime64[us, UTC]\n", + " 10 first_transmission_date 211 non-null datetime64[us, UTC]\n", + " 11 last_transmission_date 211 non-null datetime64[us, UTC]\n", + " 12 imo 211 non-null str \n", + " 13 mmsi 211 non-null str \n", + " 14 call_sign 211 non-null str \n", + " 15 dataset 211 non-null str \n", + " 16 report_dataset 211 non-null str \n", + " 17 ship_name 211 non-null str \n", " 18 lat 0 non-null object \n", " 19 lon 0 non-null object \n", - "dtypes: datetime64[ns, UTC](4), float64(1), object(15)\n", - "memory usage: 26.7+ KB\n" + "dtypes: datetime64[us, UTC](4), float64(1), object(4), str(11)\n", + "memory usage: 33.1+ KB\n" ] } ], @@ -355,7 +434,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "c76de7b9-260b-4469-aea5-0ad8fe2f25a4", "metadata": {}, "outputs": [ @@ -390,58 +469,58 @@ " \n", " \n", " 0\n", - " CHN\n", + " SEN\n", " TRAWLERS\n", - " 0.368056\n", - " 412209175\n", - " MENGXIN24\n", + " 528.425000\n", + " 663152000\n", + " KENTIA\n", " \n", " \n", " 1\n", - " ESP\n", + " GNB\n", " TRAWLERS\n", - " 1.306389\n", - " 225987981\n", - " CIUDAD DE HUELVA\n", + " 135.772500\n", + " 630124008\n", + " BACALAM\n", " \n", " \n", " 2\n", - " CHN\n", + " SEN\n", " TRAWLERS\n", - " 216.545000\n", - " 412549331\n", - " YUAN YU 886\n", + " 330.992222\n", + " 663092000\n", + " SOKONE\n", " \n", " \n", " 3\n", - " SEN\n", + " CHN\n", " TRAWLERS\n", - " 612.825556\n", - " 663123000\n", - " ILE AUX OISEAUX\n", + " 543.411111\n", + " 412549197\n", + " DAK  1372\n", " \n", " \n", " 4\n", - " CHN\n", + " SEN\n", " TRAWLERS\n", - " 31.888889\n", - " 412444322\n", - " MIN LONG YU61146\n", + " 324.623889\n", + " 663146000\n", + " F/V AUDREY-\n", " \n", " \n", "\n", "" ], "text/plain": [ - " flag gear_type hours mmsi ship_name\n", - "0 CHN TRAWLERS 0.368056 412209175 MENGXIN24\n", - "1 ESP TRAWLERS 1.306389 225987981 CIUDAD DE HUELVA\n", - "2 CHN TRAWLERS 216.545000 412549331 YUAN YU 886\n", - "3 SEN TRAWLERS 612.825556 663123000 ILE AUX OISEAUX\n", - "4 CHN TRAWLERS 31.888889 412444322 MIN LONG YU61146" + " flag gear_type hours mmsi ship_name\n", + "0 SEN TRAWLERS 528.425000 663152000 KENTIA\n", + "1 GNB TRAWLERS 135.772500 630124008 BACALAM\n", + "2 SEN TRAWLERS 330.992222 663092000 SOKONE\n", + "3 CHN TRAWLERS 543.411111 412549197 DAK 1372\n", + "4 SEN TRAWLERS 324.623889 663146000 F/V AUDREY-" ] }, - "execution_count": 8, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -460,7 +539,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "id": "c44d3d72-ebe5-4fe0-85fb-8e8675bd1922", "metadata": {}, "outputs": [], @@ -474,8 +553,49 @@ }, { "cell_type": "code", - "execution_count": 10, - "id": "545d63f0-e3b6-4e81-8c5c-55990eeec9b0", + "execution_count": 15, + "id": "59a8faad-3b37-4583-85c8-286463103439", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 88.000000\n", + "mean 567.947727\n", + "std 612.831612\n", + "min 0.318611\n", + "25% 58.696042\n", + "50% 220.915694\n", + "75% 1254.456458\n", + "max 1680.045556\n", + "Name: hours, dtype: float64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "step_1_agg_report_df[\"hours\"].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "955083e2-f441-4409-ac52-82f58a20e59c", + "metadata": {}, + "outputs": [], + "source": [ + "step_1_agg_report_mask = step_1_agg_report_df[\"hours\"] >= step_1_agg_report_df[\n", + " \"hours\"\n", + "].quantile(0.75)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5129d418-beac-4f2b-8f19-a4952ecfba95", "metadata": {}, "outputs": [ { @@ -508,65 +628,218 @@ " \n", " \n", " \n", - " 66\n", + " 54\n", + " SEN\n", + " TRAWLERS\n", + " 663093000\n", + " AMINE\n", + " 1680.045556\n", + " \n", + " \n", + " 59\n", + " SEN\n", + " TRAWLERS\n", + " 663111111\n", + " LAGUEM I\n", + " 1650.861667\n", + " \n", + " \n", + " 55\n", + " SEN\n", + " TRAWLERS\n", + " 663101000\n", + " CHIQUITA\n", + " 1629.070833\n", + " \n", + " \n", + " 57\n", + " SEN\n", + " TRAWLERS\n", + " 663103000\n", + " RIA  DE  DAKAR\n", + " 1614.329722\n", + " \n", + " \n", + " 77\n", + " SEN\n", + " TRAWLERS\n", + " 663176000\n", + " CARVISA DOS\n", + " 1596.626389\n", + " \n", + " \n", + " 69\n", + " SEN\n", + " TRAWLERS\n", + " 663131000\n", + " KANBAL II\n", + " 1588.290556\n", + " \n", + " \n", + " 48\n", + " SEN\n", + " TRAWLERS\n", + " 663010400\n", + " TOUBA\n", + " 1583.635278\n", + " \n", + " \n", + " 80\n", + " SEN\n", + " TRAWLERS\n", + " 663180000\n", + " F/V NATA\n", + " 1582.401944\n", + " \n", + " \n", + " 70\n", + " SEN\n", + " TRAWLERS\n", + " 663133000\n", + " KANBAL III        \"3\n", + " 1582.378056\n", + " \n", + " \n", + " 85\n", + " SEN\n", + " TRAWLERS\n", + " 663250000\n", + " PRAIA DA MAROSA\n", + " 1561.656944\n", + " \n", + " \n", + " 21\n", + " CHN\n", + " TRAWLERS\n", + " 412549196\n", + " GUOJIN901\n", + " 1512.410000\n", + " \n", + " \n", + " 22\n", + " CHN\n", + " TRAWLERS\n", + " 412549197\n", + " DAK  1372\n", + " 1456.541667\n", + " \n", + " \n", + " 78\n", " SEN\n", " TRAWLERS\n", " 663178000\n", " NUEVONOSOLAR\n", - " 1678.888333\n", + " 1454.887500\n", " \n", " \n", - " 52\n", + " 15\n", + " CHN\n", + " TRAWLERS\n", + " 412420883\n", + " DAKA1366\n", + " 1452.546389\n", + " \n", + " \n", + " 62\n", " SEN\n", " TRAWLERS\n", - " 663115000\n", - " BETTY\n", - " 1648.771944\n", + " 663114000\n", + " ADRIMEX II\n", + " 1382.205556\n", + " \n", + " \n", + " 61\n", + " SEN\n", + " TRAWLERS\n", + " 663113000\n", + " SANTANA\n", + " 1376.565833\n", + " \n", + " \n", + " 50\n", + " SEN\n", + " TRAWLERS\n", + " 663073000\n", + " HISPASEN VI\n", + " 1373.737222\n", " \n", " \n", - " 49\n", + " 60\n", " SEN\n", " TRAWLERS\n", " 663112000\n", " TADORNE\n", - " 1610.828611\n", + " 1357.967778\n", " \n", " \n", - " 42\n", + " 58\n", " SEN\n", " TRAWLERS\n", - " 663039000\n", - " SEGUNDO SAN RAFAEL\n", - " 1595.538333\n", + " 663109000\n", + " PDT OMAR DIALLO\n", + " 1294.566389\n", " \n", " \n", - " 74\n", + " 63\n", " SEN\n", " TRAWLERS\n", - " 663250000\n", - " PRAIA DA MAROSA\n", - " 1573.587500\n", + " 663115000\n", + " BETTY\n", + " 1277.338611\n", + " \n", + " \n", + " 66\n", + " SEN\n", + " TRAWLERS\n", + " 663122000\n", + " ILES AUX MIMOSAS\n", + " 1262.438611\n", + " \n", + " \n", + " 73\n", + " SEN\n", + " TRAWLERS\n", + " 663152000\n", + " KENTIA\n", + " 1260.501667\n", " \n", " \n", "\n", "" ], "text/plain": [ - " flag gear_type mmsi ship_name hours\n", - "66 SEN TRAWLERS 663178000 NUEVONOSOLAR 1678.888333\n", - "52 SEN TRAWLERS 663115000 BETTY 1648.771944\n", - "49 SEN TRAWLERS 663112000 TADORNE 1610.828611\n", - "42 SEN TRAWLERS 663039000 SEGUNDO SAN RAFAEL 1595.538333\n", - "74 SEN TRAWLERS 663250000 PRAIA DA MAROSA 1573.587500" + " flag gear_type mmsi ship_name hours\n", + "54 SEN TRAWLERS 663093000 AMINE 1680.045556\n", + "59 SEN TRAWLERS 663111111 LAGUEM I 1650.861667\n", + "55 SEN TRAWLERS 663101000 CHIQUITA 1629.070833\n", + "57 SEN TRAWLERS 663103000 RIA DE DAKAR 1614.329722\n", + "77 SEN TRAWLERS 663176000 CARVISA DOS 1596.626389\n", + "69 SEN TRAWLERS 663131000 KANBAL II 1588.290556\n", + "48 SEN TRAWLERS 663010400 TOUBA 1583.635278\n", + "80 SEN TRAWLERS 663180000 F/V NATA 1582.401944\n", + "70 SEN TRAWLERS 663133000 KANBAL III \"3 1582.378056\n", + "85 SEN TRAWLERS 663250000 PRAIA DA MAROSA 1561.656944\n", + "21 CHN TRAWLERS 412549196 GUOJIN901 1512.410000\n", + "22 CHN TRAWLERS 412549197 DAK 1372 1456.541667\n", + "78 SEN TRAWLERS 663178000 NUEVONOSOLAR 1454.887500\n", + "15 CHN TRAWLERS 412420883 DAKA1366 1452.546389\n", + "62 SEN TRAWLERS 663114000 ADRIMEX II 1382.205556\n", + "61 SEN TRAWLERS 663113000 SANTANA 1376.565833\n", + "50 SEN TRAWLERS 663073000 HISPASEN VI 1373.737222\n", + "60 SEN TRAWLERS 663112000 TADORNE 1357.967778\n", + "58 SEN TRAWLERS 663109000 PDT OMAR DIALLO 1294.566389\n", + "63 SEN TRAWLERS 663115000 BETTY 1277.338611\n", + "66 SEN TRAWLERS 663122000 ILES AUX MIMOSAS 1262.438611\n", + "73 SEN TRAWLERS 663152000 KENTIA 1260.501667" ] }, - "execution_count": 10, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "step_1_agg_report_df.head()" + "step_1_agg_report_df[step_1_agg_report_mask]" ] }, { @@ -582,9 +855,7 @@ "id": "7d51287d-263b-4ff5-a40a-5efa7e7ea0a4", "metadata": {}, "source": [ - "- There are vessels appear to have been engaged in potential trawling activity in Senegalese EEZ over the past 3 months i.e.,:\n", - " - `NUEVONOSOLAR (mmsi: 663178000, flag: SEN)`\n", - " - `BETTY (mmsi: 663115000, flag: SEN)`\n", + "- There are vessels appear to have been engaged in potential **trawling activity** in Senegalese EEZ over the past 3 months.\n", "- We will retrieve these vessels' `ownership`, `flag history`, and `authorizations` in **Step 2 to validate** them." ] }, @@ -626,27 +897,48 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 18, "id": "5b507df7-99fe-4bb2-82da-7ed4ad4cfa8e", "metadata": {}, "outputs": [], "source": [ - "step_1_vessel_mmsis = list(step_1_agg_report_df[\"mmsi\"].head(n=2))" + "step_1_vessel_mmsis = list(step_1_agg_report_df[step_1_agg_report_mask][\"mmsi\"])" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "f87106f0-b2a0-482c-bd1f-d11119ee53a3", + "execution_count": 19, + "id": "6a6b3ae1-b813-41be-ae6d-41e6f4bfac0b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['663178000', '663115000']" + "['663093000',\n", + " '663111111',\n", + " '663101000',\n", + " '663103000',\n", + " '663176000',\n", + " '663131000',\n", + " '663010400',\n", + " '663180000',\n", + " '663133000',\n", + " '663250000',\n", + " '412549196',\n", + " '412549197',\n", + " '663178000',\n", + " '412420883',\n", + " '663114000',\n", + " '663113000',\n", + " '663073000',\n", + " '663112000',\n", + " '663109000',\n", + " '663115000',\n", + " '663122000',\n", + " '663152000']" ] }, - "execution_count": 12, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -657,7 +949,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 20, "id": "1eb89d2a-4d53-4d67-a668-0cbb233e950c", "metadata": {}, "outputs": [], @@ -671,18 +963,38 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 21, "id": "e670a688-3af0-410d-b93b-ec1e06ec4e09", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['894bc3ec6-6ade-f09c-e792-ff2e947508d8',\n", - " 'bf28c5a58-8c83-8690-8689-7f2d520f926e']" + "['b289909a3-3fe7-d20b-ff71-e0faae7cfbd1',\n", + " '50009f324-4ad6-bd1f-55ce-91adcbe14835',\n", + " '3d4bfcfd2-23c1-3379-d4c2-63239c90b40e',\n", + " 'f5d810a8d-d406-64e0-fc57-8f2edbc0894c',\n", + " 'bf28c5a58-8c83-8690-8689-7f2d520f926e',\n", + " '26aa49b9d-d1eb-aa18-71a8-49cc406c4d0f',\n", + " '7bfa4e72a-aa54-4aae-0aa6-145fc83a25bb',\n", + " '56797171d-dc16-997d-5765-61029b1e0244',\n", + " '90ab31dfb-bcab-a05f-d12f-2544e1869205',\n", + " '84c3d602c-cc35-8137-7c15-e3ff55b8b3c7',\n", + " 'bd0fd660c-ce55-f36f-ebf5-5bd0126c1d3e',\n", + " '833e0dfb5-52fe-cf51-e0e5-c52f024ebdac',\n", + " 'e1978b237-794c-1a7d-171c-6c11f629c154',\n", + " '54423a274-4798-9a62-79f7-4c80605246ac',\n", + " '894bc3ec6-6ade-f09c-e792-ff2e947508d8',\n", + " '32e3e6c0a-aa3e-b7f8-7ac4-0b557fc601a8',\n", + " 'cee15c9d1-1057-e347-f773-8a64a32bc08a',\n", + " '0caeaa7c7-71e8-b911-3acb-8f01fc9eefe6',\n", + " '2dcdb9a93-3782-9140-78eb-9d55e8b4e3d3',\n", + " '8a228af50-03a0-6abc-8dd5-08699a638bcb',\n", + " 'd0e58b8b6-6c5d-c117-d258-c0a38bfdcff4',\n", + " 'c6ca30d4c-cca8-92c9-b821-6620ee18940b']" ] }, - "execution_count": 14, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -693,7 +1005,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 22, "id": "428889bd-4761-4301-b21e-fbc37eba5622", "metadata": {}, "outputs": [], @@ -705,7 +1017,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 23, "id": "c64d68dc-b952-412f-827c-df5236c98bd7", "metadata": {}, "outputs": [], @@ -715,7 +1027,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 24, "id": "757e6c3c-d0df-4b8f-839d-1e31518d42d7", "metadata": {}, "outputs": [ @@ -723,20 +1035,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 2 entries, 0 to 1\n", + "\n", + "RangeIndex: 23 entries, 0 to 22\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 dataset 2 non-null object\n", - " 1 registry_info_total_records 2 non-null int64 \n", - " 2 registry_info 2 non-null object\n", - " 3 registry_owners 2 non-null object\n", - " 4 registry_public_authorizations 2 non-null object\n", - " 5 combined_sources_info 2 non-null object\n", - " 6 self_reported_info 2 non-null object\n", - "dtypes: int64(1), object(6)\n", - "memory usage: 244.0+ bytes\n" + " 0 dataset 23 non-null str \n", + " 1 registry_info_total_records 23 non-null int64 \n", + " 2 registry_info 23 non-null object\n", + " 3 registry_owners 23 non-null object\n", + " 4 registry_public_authorizations 23 non-null object\n", + " 5 combined_sources_info 23 non-null object\n", + " 6 self_reported_info 23 non-null object\n", + "dtypes: int64(1), object(5), str(1)\n", + "memory usage: 1.4+ KB\n" ] } ], @@ -761,7 +1073,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 25, "id": "4767e025-e26e-4a61-8971-8f2cc90385cc", "metadata": {}, "outputs": [ @@ -794,35 +1106,224 @@ " \n", " \n", " 0\n", - " [{'id': '29fef17154387858d8d4c777311c57f7', 's...\n", - " [{'name': 'SENEVISA', 'flag': 'ESP', 'ssvid': ...\n", - " [{'id': 'bf28c5a58-8c83-8690-8689-7f2d520f926e...\n", + " []\n", + " []\n", + " [{'id': 'b289909a3-3fe7-d20b-ff71-e0faae7cfbd1...\n", " \n", " \n", " 1\n", - " [{'id': '199483471cd2da3717552fddb1a3172a', 's...\n", - " [{'name': 'ARMEMENT SOPASEN', 'flag': 'SEN', '...\n", + " []\n", + " []\n", + " [{'id': 'bf28c5a58-8c83-8690-8689-7f2d520f926e...\n", + " \n", + " \n", + " 2\n", + " [{'id': '4040e99d8c9daabbc124ad166611b46c', 's...\n", + " [{'name': 'SOPERKA', 'flag': 'SEN', 'ssvid': '...\n", + " [{'id': '8a228af50-03a0-6abc-8dd5-08699a638bcb...\n", + " \n", + " \n", + " 3\n", + " []\n", + " []\n", + " [{'id': 'e1978b237-794c-1a7d-171c-6c11f629c154...\n", + " \n", + " \n", + " 4\n", + " []\n", + " []\n", + " [{'id': '32e3e6c0a-aa3e-b7f8-7ac4-0b557fc601a8...\n", + " \n", + " \n", + " 5\n", + " []\n", + " []\n", + " [{'id': 'bd0fd660c-ce55-f36f-ebf5-5bd0126c1d3e...\n", + " \n", + " \n", + " 6\n", + " []\n", + " []\n", + " [{'id': '56797171d-dc16-997d-5765-61029b1e0244...\n", + " \n", + " \n", + " 7\n", + " []\n", + " []\n", + " [{'id': '56797171d-dc16-997d-5765-61029b1e0244...\n", + " \n", + " \n", + " 8\n", + " []\n", + " []\n", + " [{'id': '2dcdb9a93-3782-9140-78eb-9d55e8b4e3d3...\n", + " \n", + " \n", + " 9\n", + " []\n", + " []\n", + " [{'id': '833e0dfb5-52fe-cf51-e0e5-c52f024ebdac...\n", + " \n", + " \n", + " 10\n", + " []\n", + " []\n", + " [{'id': 'c6ca30d4c-cca8-92c9-b821-6620ee18940b...\n", + " \n", + " \n", + " 11\n", + " []\n", + " []\n", + " [{'id': '0caeaa7c7-71e8-b911-3acb-8f01fc9eefe6...\n", + " \n", + " \n", + " 12\n", + " []\n", + " []\n", + " [{'id': '84c3d602c-cc35-8137-7c15-e3ff55b8b3c7...\n", + " \n", + " \n", + " 13\n", + " []\n", + " []\n", + " [{'id': '90ab31dfb-bcab-a05f-d12f-2544e1869205...\n", + " \n", + " \n", + " 14\n", + " []\n", + " []\n", + " [{'id': '50009f324-4ad6-bd1f-55ce-91adcbe14835...\n", + " \n", + " \n", + " 15\n", + " []\n", + " []\n", + " [{'id': '26aa49b9d-d1eb-aa18-71a8-49cc406c4d0f...\n", + " \n", + " \n", + " 16\n", + " []\n", + " []\n", " [{'id': '894bc3ec6-6ade-f09c-e792-ff2e947508d8...\n", " \n", + " \n", + " 17\n", + " []\n", + " []\n", + " [{'id': '54423a274-4798-9a62-79f7-4c80605246ac...\n", + " \n", + " \n", + " 18\n", + " []\n", + " []\n", + " [{'id': '3d4bfcfd2-23c1-3379-d4c2-63239c90b40e...\n", + " \n", + " \n", + " 19\n", + " []\n", + " []\n", + " [{'id': '7bfa4e72a-aa54-4aae-0aa6-145fc83a25bb...\n", + " \n", + " \n", + " 20\n", + " [{'id': 'f9d5d75ccd1fe1610a555c887f171491', 's...\n", + " [{'name': 'ARMADORES DO MAROSA', 'flag': 'ESP'...\n", + " [{'id': 'f5d810a8d-d406-64e0-fc57-8f2edbc0894c...\n", + " \n", + " \n", + " 21\n", + " []\n", + " []\n", + " [{'id': 'cee15c9d1-1057-e347-f773-8a64a32bc08a...\n", + " \n", + " \n", + " 22\n", + " [{'id': '6b2903faedafd9744b62cfed11929a34', 's...\n", + " [{'name': 'SOPERKA', 'flag': 'SEN', 'ssvid': '...\n", + " [{'id': 'd0e58b8b6-6c5d-c117-d258-c0a38bfdcff4...\n", + " \n", " \n", "\n", "" ], "text/plain": [ - " registry_info \\\n", - "0 [{'id': '29fef17154387858d8d4c777311c57f7', 's... \n", - "1 [{'id': '199483471cd2da3717552fddb1a3172a', 's... \n", + " registry_info \\\n", + "0 [] \n", + "1 [] \n", + "2 [{'id': '4040e99d8c9daabbc124ad166611b46c', 's... \n", + "3 [] \n", + "4 [] \n", + "5 [] \n", + "6 [] \n", + "7 [] \n", + "8 [] \n", + "9 [] \n", + "10 [] \n", + "11 [] \n", + "12 [] \n", + "13 [] \n", + "14 [] \n", + "15 [] \n", + "16 [] \n", + "17 [] \n", + "18 [] \n", + "19 [] \n", + "20 [{'id': 'f9d5d75ccd1fe1610a555c887f171491', 's... \n", + "21 [] \n", + "22 [{'id': '6b2903faedafd9744b62cfed11929a34', 's... \n", "\n", - " registry_owners \\\n", - "0 [{'name': 'SENEVISA', 'flag': 'ESP', 'ssvid': ... \n", - "1 [{'name': 'ARMEMENT SOPASEN', 'flag': 'SEN', '... \n", + " registry_owners \\\n", + "0 [] \n", + "1 [] \n", + "2 [{'name': 'SOPERKA', 'flag': 'SEN', 'ssvid': '... \n", + "3 [] \n", + "4 [] \n", + "5 [] \n", + "6 [] \n", + "7 [] \n", + "8 [] \n", + "9 [] \n", + "10 [] \n", + "11 [] \n", + "12 [] \n", + "13 [] \n", + "14 [] \n", + "15 [] \n", + "16 [] \n", + "17 [] \n", + "18 [] \n", + "19 [] \n", + "20 [{'name': 'ARMADORES DO MAROSA', 'flag': 'ESP'... \n", + "21 [] \n", + "22 [{'name': 'SOPERKA', 'flag': 'SEN', 'ssvid': '... \n", "\n", - " self_reported_info \n", - "0 [{'id': 'bf28c5a58-8c83-8690-8689-7f2d520f926e... \n", - "1 [{'id': '894bc3ec6-6ade-f09c-e792-ff2e947508d8... " + " self_reported_info \n", + "0 [{'id': 'b289909a3-3fe7-d20b-ff71-e0faae7cfbd1... \n", + "1 [{'id': 'bf28c5a58-8c83-8690-8689-7f2d520f926e... \n", + "2 [{'id': '8a228af50-03a0-6abc-8dd5-08699a638bcb... \n", + "3 [{'id': 'e1978b237-794c-1a7d-171c-6c11f629c154... \n", + "4 [{'id': '32e3e6c0a-aa3e-b7f8-7ac4-0b557fc601a8... \n", + "5 [{'id': 'bd0fd660c-ce55-f36f-ebf5-5bd0126c1d3e... \n", + "6 [{'id': '56797171d-dc16-997d-5765-61029b1e0244... \n", + "7 [{'id': '56797171d-dc16-997d-5765-61029b1e0244... \n", + "8 [{'id': '2dcdb9a93-3782-9140-78eb-9d55e8b4e3d3... \n", + "9 [{'id': '833e0dfb5-52fe-cf51-e0e5-c52f024ebdac... \n", + "10 [{'id': 'c6ca30d4c-cca8-92c9-b821-6620ee18940b... \n", + "11 [{'id': '0caeaa7c7-71e8-b911-3acb-8f01fc9eefe6... \n", + "12 [{'id': '84c3d602c-cc35-8137-7c15-e3ff55b8b3c7... \n", + "13 [{'id': '90ab31dfb-bcab-a05f-d12f-2544e1869205... \n", + "14 [{'id': '50009f324-4ad6-bd1f-55ce-91adcbe14835... \n", + "15 [{'id': '26aa49b9d-d1eb-aa18-71a8-49cc406c4d0f... \n", + "16 [{'id': '894bc3ec6-6ade-f09c-e792-ff2e947508d8... \n", + "17 [{'id': '54423a274-4798-9a62-79f7-4c80605246ac... \n", + "18 [{'id': '3d4bfcfd2-23c1-3379-d4c2-63239c90b40e... \n", + "19 [{'id': '7bfa4e72a-aa54-4aae-0aa6-145fc83a25bb... \n", + "20 [{'id': 'f5d810a8d-d406-64e0-fc57-8f2edbc0894c... \n", + "21 [{'id': 'cee15c9d1-1057-e347-f773-8a64a32bc08a... \n", + "22 [{'id': 'd0e58b8b6-6c5d-c117-d258-c0a38bfdcff4... " ] }, - "execution_count": 18, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -841,19 +1342,31 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 26, + "id": "61a01040-90d9-4b5d-a99e-ab8b1ef020ef", + "metadata": {}, + "outputs": [], + "source": [ + "step_2_has_registry_info_mask = step_2_vessels_df[\n", + " \"registry_info\"\n", + "].notna() & step_2_vessels_df[\"registry_info\"].astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, "id": "7efdc6d6-52d8-40bb-9537-e09f242fdb25", "metadata": {}, "outputs": [], "source": [ "step_2_registry_info_df = pd.json_normalize(\n", - " step_2_vessels_df[\"registry_info\"].explode()\n", + " step_2_vessels_df[step_2_has_registry_info_mask][\"registry_info\"].explode()\n", ")" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 28, "id": "27ab60a9-6496-4406-9ae4-57a7aa97f6fa", "metadata": {}, "outputs": [ @@ -888,60 +1401,71 @@ " \n", " \n", " \n", - " 0\n", - " 663178000\n", + " 2\n", + " 663131000\n", " SEN\n", - " NUEVO NOSO LAR\n", - " NUEVONOSOLAR\n", + " KANBAL II\n", + " KANBAL2\n", " [TRAWLERS]\n", - " [IMO, TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", + " [IMO, SNP]\n", " \n", " \n", - " 1\n", - " 762178000\n", + " 2\n", + " 663000000\n", " SEN\n", - " NUEVO NOSO LAR\n", - " NUEVONOSOLAR\n", + " KANBAL II\n", + " KANBAL2\n", " [TRAWLERS]\n", - " [IMO, TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", + " [IMO, SNP]\n", " \n", " \n", - " 2\n", - " 552178000\n", + " 20\n", + " 224097970\n", + " ESP\n", + " PRAIA DA MAROSA\n", + " PRAIADAMAROSA\n", + " [TRAWLERS]\n", + " [ESP, EU, ICCAT, IMO, ISSF, SNP]\n", + " \n", + " \n", + " 22\n", + " 663111111\n", " SEN\n", - " NUEVO NOSO LAR\n", - " NUEVONOSOLAR\n", + " LAGHEM I\n", + " LAGUEM1\n", " [TRAWLERS]\n", - " [IMO, TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", + " [IMO, SNP]\n", " \n", " \n", - " 3\n", - " 663115000\n", + " 22\n", + " 663111111\n", " SEN\n", - " BETTY\n", - " BETTY\n", + " LAGHEM I\n", + " LAGHEM1\n", " [TRAWLERS]\n", - " [IMO, TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", + " [IMO, SNP]\n", " \n", " \n", "\n", "" ], "text/plain": [ - " ssvid flag ship_name n_ship_name gear_types \\\n", - "0 663178000 SEN NUEVO NOSO LAR NUEVONOSOLAR [TRAWLERS] \n", - "1 762178000 SEN NUEVO NOSO LAR NUEVONOSOLAR [TRAWLERS] \n", - "2 552178000 SEN NUEVO NOSO LAR NUEVONOSOLAR [TRAWLERS] \n", - "3 663115000 SEN BETTY BETTY [TRAWLERS] \n", + " ssvid flag ship_name n_ship_name gear_types \\\n", + "2 663131000 SEN KANBAL II KANBAL2 [TRAWLERS] \n", + "2 663000000 SEN KANBAL II KANBAL2 [TRAWLERS] \n", + "20 224097970 ESP PRAIA DA MAROSA PRAIADAMAROSA [TRAWLERS] \n", + "22 663111111 SEN LAGHEM I LAGUEM1 [TRAWLERS] \n", + "22 663111111 SEN LAGHEM I LAGHEM1 [TRAWLERS] \n", "\n", - " source_code \n", - "0 [IMO, TMT_NATIONAL, TMT_OTHER_OFFICIAL] \n", - "1 [IMO, TMT_NATIONAL, TMT_OTHER_OFFICIAL] \n", - "2 [IMO, TMT_NATIONAL, TMT_OTHER_OFFICIAL] \n", - "3 [IMO, TMT_NATIONAL, TMT_OTHER_OFFICIAL] " + " source_code \n", + "2 [IMO, SNP] \n", + "2 [IMO, SNP] \n", + "20 [ESP, EU, ICCAT, IMO, ISSF, SNP] \n", + "22 [IMO, SNP] \n", + "22 [IMO, SNP] " ] }, - "execution_count": 20, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -962,19 +1486,43 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 29, + "id": "678d3b49-c434-4cac-90a1-9b4b2521d81d", + "metadata": {}, + "outputs": [], + "source": [ + "step_2_has_registry_owners_mask = step_2_vessels_df[\n", + " \"registry_owners\"\n", + "].notna() & step_2_vessels_df[\"registry_owners\"].astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, "id": "24cec6f2-3e70-4396-a779-13ace779733f", "metadata": {}, "outputs": [], "source": [ "step_2_registry_owners_df = pd.json_normalize(\n", - " step_2_vessels_df[\"registry_owners\"].explode()\n", + " step_2_vessels_df[step_2_has_registry_owners_mask][\"registry_owners\"].explode()\n", ")" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 31, + "id": "afc71dda-3665-4dbf-8ee8-f542108d1097", + "metadata": {}, + "outputs": [], + "source": [ + "step_2_registry_owners_match_registry_info_mask = step_2_registry_owners_df[\n", + " \"ssvid\"\n", + "].isin(step_2_registry_info_df[\"ssvid\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, "id": "16f804f4-ea4f-4a07-8c53-497e09deddfd", "metadata": {}, "outputs": [ @@ -1007,60 +1555,54 @@ " \n", " \n", " \n", - " 0\n", - " 663178000\n", - " ESP\n", - " SENEVISA\n", - " [TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", - " \n", - " \n", - " 1\n", - " 663176000\n", - " ESP\n", - " SENEVISA\n", - " [TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", + " 2\n", + " 663131000\n", + " SEN\n", + " SOPERKA\n", + " [IMO, SNP]\n", " \n", " \n", " 2\n", - " 762178000\n", - " ESP\n", - " SENEVISA\n", - " [TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", + " 663000000\n", + " SEN\n", + " SOPERKA\n", + " [IMO, SNP]\n", " \n", " \n", - " 3\n", - " 552178000\n", + " 20\n", + " 224097970\n", " ESP\n", - " SENEVISA\n", - " [TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", + " ARMADORES DO MAROSA\n", + " [ICCAT, SNP]\n", " \n", " \n", - " 4\n", - " 663115000\n", + " 22\n", + " 663111111\n", " SEN\n", - " ARMEMENT SOPASEN\n", - " [TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", + " SOPERKA\n", + " [IMO, SNP]\n", " \n", " \n", "\n", "" ], "text/plain": [ - " ssvid flag name source_code\n", - "0 663178000 ESP SENEVISA [TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", - "1 663176000 ESP SENEVISA [TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", - "2 762178000 ESP SENEVISA [TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", - "3 552178000 ESP SENEVISA [TMT_NATIONAL, TMT_OTHER_OFFICIAL]\n", - "4 663115000 SEN ARMEMENT SOPASEN [TMT_NATIONAL, TMT_OTHER_OFFICIAL]" + " ssvid flag name source_code\n", + "2 663131000 SEN SOPERKA [IMO, SNP]\n", + "2 663000000 SEN SOPERKA [IMO, SNP]\n", + "20 224097970 ESP ARMADORES DO MAROSA [ICCAT, SNP]\n", + "22 663111111 SEN SOPERKA [IMO, SNP]" ] }, - "execution_count": 22, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "step_2_registry_owners_df[[\"ssvid\", \"flag\", \"name\", \"source_code\"]]" + "step_2_registry_owners_df[step_2_registry_owners_match_registry_info_mask][\n", + " [\"ssvid\", \"flag\", \"name\", \"source_code\"]\n", + "]" ] }, { @@ -1073,20 +1615,46 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 33, + "id": "66cabce1-fe33-494a-bd6b-e2803dde3a45", + "metadata": {}, + "outputs": [], + "source": [ + "step_2_has_self_reported_info_mask = step_2_vessels_df[\n", + " \"self_reported_info\"\n", + "].notna() & step_2_vessels_df[\"self_reported_info\"].astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, "id": "c2fa4bdd-1b51-4b37-abf5-aa18a1cab3e5", "metadata": {}, "outputs": [], "source": [ "step_2_self_reported_info_df = pd.json_normalize(\n", - " step_2_vessels_df[\"self_reported_info\"].explode()\n", + " step_2_vessels_df[step_2_has_self_reported_info_mask][\n", + " \"self_reported_info\"\n", + " ].explode()\n", ")" ] }, { "cell_type": "code", - "execution_count": 24, - "id": "d1b559c7-4587-4019-a843-181017dd3f07", + "execution_count": 35, + "id": "ba204290-3dc9-4b63-8e3a-400d3ba5b27e", + "metadata": {}, + "outputs": [], + "source": [ + "step_2_self_reported_info_match_registry_info_mask = step_2_self_reported_info_df[\n", + " \"ssvid\"\n", + "].isin(step_2_registry_info_df[\"ssvid\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "1c1c03df-2c88-4a68-b0ff-062382c21b1b", "metadata": {}, "outputs": [ { @@ -1119,51 +1687,75 @@ " \n", " \n", " \n", - " 0\n", - " 663178000\n", + " 2\n", + " 663131000\n", " SEN\n", - " NUEVONOSOLAR\n", - " NUEVONOSOLAR\n", + " KANBAL II\n", + " KANBAL2\n", " [AIS]\n", " \n", " \n", - " 1\n", - " 663178000\n", + " 2\n", + " 663131000\n", " SEN\n", - " NUEVO=NOSOLAR+3&!U.?\n", - " NUEVONOSOLAR3U\n", + " KANBAL II\n", + " KANBAL2\n", " [AIS]\n", " \n", " \n", " 2\n", - " 762178000\n", - " None\n", - " NUEVO NOSOLAR\n", - " NUEVONOSOLAR\n", + " 663131000\n", + " SEN\n", + " KAMBAL  2\n", + " KAMBAL2\n", " [AIS]\n", " \n", " \n", - " 3\n", - " 552178000\n", - " None\n", - " NUEVO NOSOLAR\n", - " NUEVONOSOLAR\n", + " 2\n", + " 663000000\n", + " SEN\n", + " KING CRAB\n", + " KINGCRAB\n", " [AIS]\n", " \n", " \n", - " 4\n", - " 663178000\n", + " 20\n", + " 224097970\n", + " ESP\n", + " PRAIA DA MAROSA\n", + " PRAIADAMAROSA\n", + " [AIS]\n", + " \n", + " \n", + " 22\n", + " 663111111\n", " SEN\n", - " NUEVO NOSOLAR\n", - " NUEVONOSOLAR\n", + " LAGUEM I\n", + " LAGUEM1\n", " [AIS]\n", " \n", " \n", - " 5\n", - " 663115000\n", + " 22\n", + " 663111111\n", " SEN\n", - " BETTY\n", - " BETTY\n", + " LAGHEM 1\n", + " LAGHEM1\n", + " [AIS]\n", + " \n", + " \n", + " 22\n", + " 663111111\n", + " SEN\n", + " LAGHEM 1\n", + " LAGHEM1\n", + " [AIS]\n", + " \n", + " \n", + " 22\n", + " 663111111\n", + " SEN\n", + " LAGHEM 1\n", + " LAGHEM1\n", " [AIS]\n", " \n", " \n", @@ -1171,22 +1763,25 @@ "" ], "text/plain": [ - " ssvid flag ship_name n_ship_name source_code\n", - "0 663178000 SEN NUEVONOSOLAR NUEVONOSOLAR [AIS]\n", - "1 663178000 SEN NUEVO=NOSOLAR+3&!U.? NUEVONOSOLAR3U [AIS]\n", - "2 762178000 None NUEVO NOSOLAR NUEVONOSOLAR [AIS]\n", - "3 552178000 None NUEVO NOSOLAR NUEVONOSOLAR [AIS]\n", - "4 663178000 SEN NUEVO NOSOLAR NUEVONOSOLAR [AIS]\n", - "5 663115000 SEN BETTY BETTY [AIS]" + " ssvid flag ship_name n_ship_name source_code\n", + "2 663131000 SEN KANBAL II KANBAL2 [AIS]\n", + "2 663131000 SEN KANBAL II KANBAL2 [AIS]\n", + "2 663131000 SEN KAMBAL 2 KAMBAL2 [AIS]\n", + "2 663000000 SEN KING CRAB KINGCRAB [AIS]\n", + "20 224097970 ESP PRAIA DA MAROSA PRAIADAMAROSA [AIS]\n", + "22 663111111 SEN LAGUEM I LAGUEM1 [AIS]\n", + "22 663111111 SEN LAGHEM 1 LAGHEM1 [AIS]\n", + "22 663111111 SEN LAGHEM 1 LAGHEM1 [AIS]\n", + "22 663111111 SEN LAGHEM 1 LAGHEM1 [AIS]" ] }, - "execution_count": 24, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "step_2_self_reported_info_df[\n", + "step_2_self_reported_info_df[step_2_self_reported_info_match_registry_info_mask][\n", " [\"ssvid\", \"flag\", \"ship_name\", \"n_ship_name\", \"source_code\"]\n", "]" ] @@ -1208,8 +1803,8 @@ " - `NUEVONOSOLAR (mmsi: 663178000, flag: SEN)`- appears to be registered under Senegal (SEN)\n", " - `BETTY (mmsi: 663115000, flag: SEN)` - appears to be registered under Senegal (SEN)\n", "- **Ownership & Historical Changes:**\n", - " - `NUEVONOSOLAR (mmsi: 663178000, flag: SEN)` - **SENEVISA** appears to be listed as the registered owner.\n", - " - `BETTY (mmsi: 663115000, flag: SEN)`- **ARMEMENT SOPASEN** appears to be listed as the registered owner." + " - `KANBAL II (mmsi: 663131000, flag: SEN)` - **SOPERKA** appears to be listed as the registered owner.\n", + " - `LAGUEM I/LAGHEM 1 (mmsi: 663111111, flag: SEN)`- **SOPERKA** appears to be listed as the registered owner." ] }, { diff --git a/notebooks/workflow-guides/workflow-02-analyze-apparent-fishing-effort-argentinian-eez.ipynb b/notebooks/workflow-guides/workflow-02-analyze-apparent-fishing-effort-argentinian-eez.ipynb index a146cfc..c9cd5ca 100644 --- a/notebooks/workflow-guides/workflow-02-analyze-apparent-fishing-effort-argentinian-eez.ipynb +++ b/notebooks/workflow-guides/workflow-02-analyze-apparent-fishing-effort-argentinian-eez.ipynb @@ -97,6 +97,7 @@ "metadata": {}, "outputs": [], "source": [ + "import datetime\n", "import os\n", "\n", "import pandas as pd\n", @@ -228,6 +229,46 @@ "For **[Argentinian EEZ, the region ID is 8466](https://www.marineregions.org/gazetteer.php?p=details&id=8466)** (public-eez-areas dataset)." ] }, + { + "cell_type": "markdown", + "id": "a7c447f6-7dfa-4ac6-aad7-cf47386eb14a", + "metadata": {}, + "source": [ + "**Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "464338ff-dfa6-4790-8320-be01ee21be20", + "metadata": {}, + "outputs": [], + "source": [ + "eez_rois_result = await gfw_client.references.get_eez_regions(iso3=\"ARG\")\n", + "arg_eez_roi = eez_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e79c3a0d-1ca4-420b-820c-c55aa11a0a00", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('8466', 'public-eez-areas', 'Argentinian Exclusive Economic Zone', 'ARG')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arg_eez_roi.id, arg_eez_roi.dataset, arg_eez_roi.label, arg_eez_roi.iso3" + ] + }, { "cell_type": "markdown", "id": "35a0691d-b747-48e6-8b02-5c2f54a5d536", @@ -269,7 +310,48 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, + "id": "3d230203-a39e-41ba-84ac-04e0d126b9aa", + "metadata": {}, + "outputs": [], + "source": [ + "end_date = datetime.date.today()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "95fbec7d-85ce-444a-92cb-af13b776e473", + "metadata": {}, + "outputs": [], + "source": [ + "start_date = end_date - datetime.timedelta(weeks=24)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3f9f0f6b-e250-4abe-844a-d63f216fc87b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(datetime.date(2026, 1, 9), datetime.date(2026, 6, 26))" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "start_date, end_date" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "id": "62b9b9e5-4170-40d0-9aef-7579beb89bc0", "metadata": {}, "outputs": [], @@ -278,19 +360,16 @@ " spatial_resolution=\"HIGH\",\n", " group_by=\"GEARTYPE\",\n", " temporal_resolution=\"MONTHLY\",\n", - " start_date=\"2024-08-01\",\n", - " end_date=\"2025-01-31\",\n", + " start_date=start_date,\n", + " end_date=end_date,\n", " spatial_aggregation=True,\n", - " region={\n", - " \"dataset\": \"public-eez-areas\",\n", - " \"id\": \"8466\",\n", - " },\n", + " region=arg_eez_roi,\n", ")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "id": "4d6190fc-a8fa-4d59-914e-ebae41943381", "metadata": {}, "outputs": [], @@ -300,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "id": "d98ca51f-bb46-4edc-9a61-3e26c56bbcc8", "metadata": {}, "outputs": [ @@ -308,17 +387,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 40 entries, 0 to 39\n", + "\n", + "RangeIndex: 51 entries, 0 to 50\n", "Data columns (total 20 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 date 40 non-null object \n", + " 0 date 51 non-null str \n", " 1 detections 0 non-null object \n", " 2 flag 0 non-null object \n", - " 3 gear_type 40 non-null object \n", - " 4 hours 40 non-null float64\n", - " 5 vessel_ids 40 non-null int64 \n", + " 3 gear_type 51 non-null str \n", + " 4 hours 51 non-null float64\n", + " 5 vessel_ids 51 non-null int64 \n", " 6 vessel_id 0 non-null object \n", " 7 vessel_type 0 non-null object \n", " 8 entry_timestamp 0 non-null object \n", @@ -329,12 +408,12 @@ " 13 mmsi 0 non-null object \n", " 14 call_sign 0 non-null object \n", " 15 dataset 0 non-null object \n", - " 16 report_dataset 40 non-null object \n", + " 16 report_dataset 51 non-null str \n", " 17 ship_name 0 non-null object \n", " 18 lat 0 non-null object \n", " 19 lon 0 non-null object \n", - "dtypes: float64(1), int64(1), object(18)\n", - "memory usage: 6.4+ KB\n" + "dtypes: float64(1), int64(1), object(15), str(3)\n", + "memory usage: 8.1+ KB\n" ] } ], @@ -344,7 +423,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "ce844c77-6cf0-4648-bdfb-3a72589c0785", "metadata": {}, "outputs": [ @@ -377,48 +456,48 @@ " \n", " \n", " 0\n", - " pots_and_traps\n", - " 92.029722\n", - " 3\n", + " inconclusive\n", + " 334.308056\n", + " 7\n", " \n", " \n", " 1\n", - " inconclusive\n", - " 215.623889\n", - " 2\n", + " set_longlines\n", + " 108.396111\n", + " 7\n", " \n", " \n", " 2\n", - " inconclusive\n", - " 98.155833\n", - " 2\n", + " pole_and_line\n", + " 9.884444\n", + " 1\n", " \n", " \n", " 3\n", - " set_longlines\n", - " 17.925833\n", - " 1\n", + " inconclusive\n", + " 137.272500\n", + " 2\n", " \n", " \n", " 4\n", - " other_purse_seines\n", - " 57.377500\n", - " 1\n", + " trawlers\n", + " 40260.790000\n", + " 229\n", " \n", " \n", "\n", "" ], "text/plain": [ - " gear_type hours vessel_ids\n", - "0 pots_and_traps 92.029722 3\n", - "1 inconclusive 215.623889 2\n", - "2 inconclusive 98.155833 2\n", - "3 set_longlines 17.925833 1\n", - "4 other_purse_seines 57.377500 1" + " gear_type hours vessel_ids\n", + "0 inconclusive 334.308056 7\n", + "1 set_longlines 108.396111 7\n", + "2 pole_and_line 9.884444 1\n", + "3 inconclusive 137.272500 2\n", + "4 trawlers 40260.790000 229" ] }, - "execution_count": 8, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -429,7 +508,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "id": "45319c2a-e82e-4a9a-8a2d-2ea6d9cdada4", "metadata": {}, "outputs": [], @@ -443,7 +522,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "id": "121465c6-65ef-4efa-9d8a-4a0a136564bd", "metadata": {}, "outputs": [ @@ -475,34 +554,34 @@ " \n", " \n", " \n", - " 8\n", + " 9\n", " trawlers\n", - " 240642.414444\n", - " 1315\n", + " 216296.657778\n", + " 1261\n", " \n", " \n", - " 1\n", - " fishing\n", - " 16023.201389\n", - " 94\n", + " 8\n", + " squid_jigger\n", + " 51908.746667\n", + " 339\n", " \n", " \n", - " 7\n", - " squid_jigger\n", - " 6124.062222\n", - " 44\n", + " 1\n", + " fishing\n", + " 8480.469444\n", + " 74\n", " \n", " \n", " 2\n", " fixed_gear\n", - " 1948.325556\n", - " 16\n", + " 5068.472500\n", + " 25\n", " \n", " \n", " 3\n", " inconclusive\n", - " 1072.351389\n", - " 14\n", + " 678.016944\n", + " 17\n", " \n", " \n", "\n", @@ -510,14 +589,14 @@ ], "text/plain": [ " gear_type hours vessel_ids\n", - "8 trawlers 240642.414444 1315\n", - "1 fishing 16023.201389 94\n", - "7 squid_jigger 6124.062222 44\n", - "2 fixed_gear 1948.325556 16\n", - "3 inconclusive 1072.351389 14" + "9 trawlers 216296.657778 1261\n", + "8 squid_jigger 51908.746667 339\n", + "1 fishing 8480.469444 74\n", + "2 fixed_gear 5068.472500 25\n", + "3 inconclusive 678.016944 17" ] }, - "execution_count": 10, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -584,7 +663,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "id": "db35d1b2-ba32-4a58-97f3-17059b3de164", "metadata": {}, "outputs": [], @@ -594,19 +673,16 @@ " group_by=\"VESSEL_ID\",\n", " temporal_resolution=\"ENTIRE\",\n", " filters=[\"geartype in ('trawlers')\"],\n", - " start_date=\"2024-08-01\",\n", - " end_date=\"2025-01-31\",\n", + " start_date=start_date,\n", + " end_date=end_date,\n", " spatial_aggregation=True,\n", - " region={\n", - " \"dataset\": \"public-eez-areas\",\n", - " \"id\": \"8466\",\n", - " },\n", + " region=arg_eez_roi,\n", ")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "id": "301f888e-3b7e-4946-a7aa-496e05b53cbd", "metadata": {}, "outputs": [], @@ -616,7 +692,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "id": "866f84fb-50c4-47e2-95fa-43fbc06d17cd", "metadata": {}, "outputs": [ @@ -624,33 +700,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 385 entries, 0 to 384\n", + "\n", + "RangeIndex: 410 entries, 0 to 409\n", "Data columns (total 20 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 date 385 non-null object \n", + " 0 date 410 non-null str \n", " 1 detections 0 non-null object \n", - " 2 flag 385 non-null object \n", - " 3 gear_type 385 non-null object \n", - " 4 hours 385 non-null float64 \n", + " 2 flag 410 non-null str \n", + " 3 gear_type 410 non-null str \n", + " 4 hours 410 non-null float64 \n", " 5 vessel_ids 0 non-null object \n", - " 6 vessel_id 385 non-null object \n", - " 7 vessel_type 385 non-null object \n", - " 8 entry_timestamp 385 non-null datetime64[ns, UTC]\n", - " 9 exit_timestamp 385 non-null datetime64[ns, UTC]\n", - " 10 first_transmission_date 385 non-null datetime64[ns, UTC]\n", - " 11 last_transmission_date 385 non-null datetime64[ns, UTC]\n", - " 12 imo 385 non-null object \n", - " 13 mmsi 385 non-null object \n", - " 14 call_sign 385 non-null object \n", - " 15 dataset 385 non-null object \n", - " 16 report_dataset 385 non-null object \n", - " 17 ship_name 385 non-null object \n", + " 6 vessel_id 410 non-null str \n", + " 7 vessel_type 410 non-null str \n", + " 8 entry_timestamp 410 non-null datetime64[us, UTC]\n", + " 9 exit_timestamp 410 non-null datetime64[us, UTC]\n", + " 10 first_transmission_date 406 non-null datetime64[us, UTC]\n", + " 11 last_transmission_date 406 non-null datetime64[us, UTC]\n", + " 12 imo 410 non-null str \n", + " 13 mmsi 410 non-null str \n", + " 14 call_sign 410 non-null str \n", + " 15 dataset 410 non-null str \n", + " 16 report_dataset 410 non-null str \n", + " 17 ship_name 410 non-null str \n", " 18 lat 0 non-null object \n", " 19 lon 0 non-null object \n", - "dtypes: datetime64[ns, UTC](4), float64(1), object(15)\n", - "memory usage: 60.3+ KB\n" + "dtypes: datetime64[us, UTC](4), float64(1), object(4), str(11)\n", + "memory usage: 64.2+ KB\n" ] } ], @@ -660,7 +736,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "id": "82a786a1-c437-430e-aeb5-f61d02a5f900", "metadata": {}, "outputs": [ @@ -697,56 +773,56 @@ " 0\n", " ARG\n", " TRAWLERS\n", - " 714.842500\n", - " 701000882\n", - " FELIX AUGUSTO\n", + " 190.147500\n", + " 701000714\n", + " EL MARISCO I\n", " \n", " \n", " 1\n", " ARG\n", " TRAWLERS\n", - " 641.522222\n", - " 701000932\n", - " ANTONIO ALVAREZ\n", + " 114.457500\n", + " 701147000\n", + " HUAFENG828\n", " \n", " \n", " 2\n", - " URY\n", + " CHN\n", " TRAWLERS\n", - " 10.023056\n", - " 770576463\n", - " KALATXORI\n", + " 32.674444\n", + " 412549071\n", + " LU QING YUAN YU 225\n", " \n", " \n", " 3\n", " ARG\n", " TRAWLERS\n", - " 326.160833\n", - " 701079000\n", - " ENTRENA UNO\n", + " 454.279722\n", + " 701006390\n", + " MADRE MARGARITA\n", " \n", " \n", " 4\n", " ARG\n", " TRAWLERS\n", - " 295.007500\n", - " 701000820\n", - " CORAJE\n", + " 1255.525278\n", + " 701006050\n", + " SAN MATIAS\n", " \n", " \n", "\n", "" ], "text/plain": [ - " flag gear_type hours mmsi ship_name\n", - "0 ARG TRAWLERS 714.842500 701000882 FELIX AUGUSTO\n", - "1 ARG TRAWLERS 641.522222 701000932 ANTONIO ALVAREZ\n", - "2 URY TRAWLERS 10.023056 770576463 KALATXORI\n", - "3 ARG TRAWLERS 326.160833 701079000 ENTRENA UNO\n", - "4 ARG TRAWLERS 295.007500 701000820 CORAJE" + " flag gear_type hours mmsi ship_name\n", + "0 ARG TRAWLERS 190.147500 701000714 EL MARISCO I\n", + "1 ARG TRAWLERS 114.457500 701147000 HUAFENG828\n", + "2 CHN TRAWLERS 32.674444 412549071 LU QING YUAN YU 225\n", + "3 ARG TRAWLERS 454.279722 701006390 MADRE MARGARITA\n", + "4 ARG TRAWLERS 1255.525278 701006050 SAN MATIAS" ] }, - "execution_count": 14, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -765,7 +841,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 20, "id": "984052a3-3920-4fce-8894-4b67af4a8a69", "metadata": {}, "outputs": [], @@ -779,9 +855,50 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, + "id": "12aa2878-e988-416d-bcf7-a2db1e7f7f62", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 396.000000\n", + "mean 546.203681\n", + "std 565.570234\n", + "min 0.102500\n", + "25% 82.493125\n", + "50% 375.651944\n", + "75% 834.973889\n", + "max 2647.745000\n", + "Name: hours, dtype: float64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "step_2_agg_report_df[\"hours\"].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, "id": "83e4dafb-4491-40c9-9f4d-f4cc1e2b7766", "metadata": {}, + "outputs": [], + "source": [ + "step_2_agg_report_mask = step_2_agg_report_df[\"hours\"] >= step_2_agg_report_df[\n", + " \"hours\"\n", + "].quantile(0.99)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "22249cb6-e1b1-439d-be41-f50308b94700", + "metadata": {}, "outputs": [ { "data": { @@ -813,44 +930,36 @@ " \n", " \n", " \n", - " 297\n", - " ARG\n", - " TRAWLERS\n", - " 701024000\n", - " ATLANTIC SURF III\n", - " 3151.855278\n", - " \n", - " \n", - " 247\n", + " 21\n", " ARG\n", " TRAWLERS\n", - " 701006605\n", - " CAPESANTE\n", - " 2270.097500\n", + " 701000577\n", + " MISS TIDE\n", + " 2647.745000\n", " \n", " \n", - " 22\n", + " 231\n", " ARG\n", " TRAWLERS\n", - " 701000577\n", - " MISS TIDE\n", - " 2244.895833\n", + " 701006445\n", + " API V\n", + " 2573.038889\n", " \n", " \n", - " 296\n", + " 281\n", " ARG\n", " TRAWLERS\n", - " 701023000\n", - " CAROLINA P\n", - " 2065.586944\n", + " 701024000\n", + " ATLANTIC SURF III\n", + " 2461.395000\n", " \n", " \n", - " 301\n", + " 285\n", " ARG\n", " TRAWLERS\n", " 701037000\n", " DON PEDRO\n", - " 1807.146111\n", + " 2304.829444\n", " \n", " \n", "\n", @@ -858,20 +967,19 @@ ], "text/plain": [ " flag gear_type mmsi ship_name hours\n", - "297 ARG TRAWLERS 701024000 ATLANTIC SURF III 3151.855278\n", - "247 ARG TRAWLERS 701006605 CAPESANTE 2270.097500\n", - "22 ARG TRAWLERS 701000577 MISS TIDE 2244.895833\n", - "296 ARG TRAWLERS 701023000 CAROLINA P 2065.586944\n", - "301 ARG TRAWLERS 701037000 DON PEDRO 1807.146111" + "21 ARG TRAWLERS 701000577 MISS TIDE 2647.745000\n", + "231 ARG TRAWLERS 701006445 API V 2573.038889\n", + "281 ARG TRAWLERS 701024000 ATLANTIC SURF III 2461.395000\n", + "285 ARG TRAWLERS 701037000 DON PEDRO 2304.829444" ] }, - "execution_count": 16, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "step_2_agg_report_df.head()" + "step_2_agg_report_df[step_2_agg_report_mask]" ] }, { @@ -887,9 +995,7 @@ "id": "d4fb7cba-a577-45fe-88fa-8e5001bb9869", "metadata": {}, "source": [ - "- There are vessels appear to have been engaged in potential trawling activity in Argentinian EEZ over the past 6 months i.e.,:\n", - " - `ATLANTIC SURF III (mmsi: 701024000, flag: ARG)`\n", - " - `CAPESANTE (mmsi: 701006605, flag: ARG)`\n", + "- There are vessels appear to have been engaged in potential **trawling activity** in Argentinian EEZ over the past 6 months.\n", "- We will retrieve these vessels' `ownership`, `flag history`, and `authorizations` in **Step 3 to validate** them." ] }, @@ -931,27 +1037,27 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 24, "id": "24e8452f-7848-4c88-89db-66e4bf182cd1", "metadata": {}, "outputs": [], "source": [ - "step_2_vessel_mmsis = list(step_2_agg_report_df[\"mmsi\"].head(n=2))" + "step_2_vessel_mmsis = list(step_2_agg_report_df[step_2_agg_report_mask][\"mmsi\"])" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 25, "id": "f25ceec0-3188-4584-bb95-f76c9e0ac578", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['701024000', '701006605']" + "['701000577', '701006445', '701024000', '701037000']" ] }, - "execution_count": 18, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -962,7 +1068,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 26, "id": "446b506a-a19f-49cd-9110-ed8b9103af8b", "metadata": {}, "outputs": [], @@ -976,18 +1082,20 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 27, "id": "6233b1eb-ed4b-4ba9-bce8-faa1ee027dcc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['de8a03acd-dc6c-8e08-2867-24e55ffc0017',\n", + "['4723e8576-6ec2-f4a7-2bc6-3bdb68f05a2a',\n", + " '42b038c49-9432-46d4-042a-a730333e6510',\n", + " '75184b8b0-0b20-6876-48e5-6582ba27ce50',\n", " '8e930bac5-594b-aa3f-081d-d12668819e1f']" ] }, - "execution_count": 20, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -998,7 +1106,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 28, "id": "428889bd-4761-4301-b21e-fbc37eba5622", "metadata": {}, "outputs": [], @@ -1010,7 +1118,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 29, "id": "c64d68dc-b952-412f-827c-df5236c98bd7", "metadata": {}, "outputs": [], @@ -1020,7 +1128,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 30, "id": "757e6c3c-d0df-4b8f-839d-1e31518d42d7", "metadata": {}, "outputs": [ @@ -1028,20 +1136,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 2 entries, 0 to 1\n", + "\n", + "RangeIndex: 4 entries, 0 to 3\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 dataset 2 non-null object\n", - " 1 registry_info_total_records 2 non-null int64 \n", - " 2 registry_info 2 non-null object\n", - " 3 registry_owners 2 non-null object\n", - " 4 registry_public_authorizations 2 non-null object\n", - " 5 combined_sources_info 2 non-null object\n", - " 6 self_reported_info 2 non-null object\n", - "dtypes: int64(1), object(6)\n", - "memory usage: 244.0+ bytes\n" + " 0 dataset 4 non-null str \n", + " 1 registry_info_total_records 4 non-null int64 \n", + " 2 registry_info 4 non-null object\n", + " 3 registry_owners 4 non-null object\n", + " 4 registry_public_authorizations 4 non-null object\n", + " 5 combined_sources_info 4 non-null object\n", + " 6 self_reported_info 4 non-null object\n", + "dtypes: int64(1), object(5), str(1)\n", + "memory usage: 356.0+ bytes\n" ] } ], @@ -1066,7 +1174,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 31, "id": "4767e025-e26e-4a61-8971-8f2cc90385cc", "metadata": {}, "outputs": [ @@ -1099,35 +1207,53 @@ " \n", " \n", " 0\n", - " [{'id': '2d939efefd3f45788ed103ff0723f564', 's...\n", - " [{'name': 'CLEARWATER SEAFOODS', 'flag': 'CAN'...\n", - " [{'id': 'de8a03acd-dc6c-8e08-2867-24e55ffc0017...\n", - " \n", - " \n", - " 1\n", " [{'id': '45502524c9a150e77869ee647423dba1', 's...\n", " [{'name': 'GLACIAR PESQUERA', 'flag': 'ARG', '...\n", " [{'id': '8e930bac5-594b-aa3f-081d-d12668819e1f...\n", " \n", + " \n", + " 1\n", + " [{'id': '0b811955df98f5dbf52e5076e8ab5645', 's...\n", + " [{'name': 'PEDRO MOSCUZZA HIJOS', 'flag': 'ARG...\n", + " [{'id': '4723e8576-6ec2-f4a7-2bc6-3bdb68f05a2a...\n", + " \n", + " \n", + " 2\n", + " [{'id': '582ac67871378cea17051ee3fc7c2821', 's...\n", + " [{'name': 'WANCHESE ARGENTINA', 'flag': 'ARG',...\n", + " [{'id': '75184b8b0-0b20-6876-48e5-6582ba27ce50...\n", + " \n", + " \n", + " 3\n", + " [{'id': 'ac9183ec6e744b635009f58d7a7e3d92', 's...\n", + " [{'name': 'IBERCONSA DE ARGENTINA', 'flag': 'A...\n", + " [{'id': '42b038c49-9432-46d4-042a-a730333e6510...\n", + " \n", " \n", "\n", "" ], "text/plain": [ " registry_info \\\n", - "0 [{'id': '2d939efefd3f45788ed103ff0723f564', 's... \n", - "1 [{'id': '45502524c9a150e77869ee647423dba1', 's... \n", + "0 [{'id': '45502524c9a150e77869ee647423dba1', 's... \n", + "1 [{'id': '0b811955df98f5dbf52e5076e8ab5645', 's... \n", + "2 [{'id': '582ac67871378cea17051ee3fc7c2821', 's... \n", + "3 [{'id': 'ac9183ec6e744b635009f58d7a7e3d92', 's... \n", "\n", " registry_owners \\\n", - "0 [{'name': 'CLEARWATER SEAFOODS', 'flag': 'CAN'... \n", - "1 [{'name': 'GLACIAR PESQUERA', 'flag': 'ARG', '... \n", + "0 [{'name': 'GLACIAR PESQUERA', 'flag': 'ARG', '... \n", + "1 [{'name': 'PEDRO MOSCUZZA HIJOS', 'flag': 'ARG... \n", + "2 [{'name': 'WANCHESE ARGENTINA', 'flag': 'ARG',... \n", + "3 [{'name': 'IBERCONSA DE ARGENTINA', 'flag': 'A... \n", "\n", " self_reported_info \n", - "0 [{'id': 'de8a03acd-dc6c-8e08-2867-24e55ffc0017... \n", - "1 [{'id': '8e930bac5-594b-aa3f-081d-d12668819e1f... " + "0 [{'id': '8e930bac5-594b-aa3f-081d-d12668819e1f... \n", + "1 [{'id': '4723e8576-6ec2-f4a7-2bc6-3bdb68f05a2a... \n", + "2 [{'id': '75184b8b0-0b20-6876-48e5-6582ba27ce50... \n", + "3 [{'id': '42b038c49-9432-46d4-042a-a730333e6510... " ] }, - "execution_count": 24, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1146,19 +1272,31 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 32, + "id": "0a7b763a-9b37-4d90-9dc2-328f8055e568", + "metadata": {}, + "outputs": [], + "source": [ + "step_3_has_registry_info_mask = step_3_vessels_df[\n", + " \"registry_info\"\n", + "].notna() & step_3_vessels_df[\"registry_info\"].astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, "id": "7efdc6d6-52d8-40bb-9537-e09f242fdb25", "metadata": {}, "outputs": [], "source": [ "step_3_registry_info_df = pd.json_normalize(\n", - " step_3_vessels_df[\"registry_info\"].explode()\n", + " step_3_vessels_df[step_3_has_registry_info_mask][\"registry_info\"].explode()\n", ")" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 34, "id": "27ab60a9-6496-4406-9ae4-57a7aa97f6fa", "metadata": {}, "outputs": [ @@ -1194,48 +1332,70 @@ " \n", " \n", " 0\n", - " 701006605\n", + " 701024000\n", " ARG\n", - " CAPESANTE\n", - " CAPESANTE\n", + " ATLANTIC SURF III\n", + " ATLANTICSURF3\n", " [TRAWLERS]\n", - " [GFW-REVIEW, IMO, RESEARCH-PAPER, TMT_OTHER_OF...\n", + " [IMO, SNP]\n", " \n", " \n", " 1\n", - " 316003980\n", - " CAN\n", - " ATLANTICLEADER\n", - " ATLANTICLEADER\n", + " 701037000\n", + " ARG\n", + " DON PEDRO.\n", + " DONPEDRO\n", " [TRAWLERS]\n", - " [IMO, TMT_OTHER_OFFICIAL]\n", + " [IMO, SNP]\n", " \n", " \n", " 2\n", - " 701024000\n", + " 701000577\n", " ARG\n", - " ATLANTIC SURF III\n", - " ATLANTICSURF3\n", + " MISS TIDE\n", + " MISSTIDE\n", " [TRAWLERS]\n", - " [IMO, TMT_OTHER_OFFICIAL]\n", + " [GFW-REVIEW, IMO]\n", + " \n", + " \n", + " 2\n", + " 700000577\n", + " ARG\n", + " MISS TIDE\n", + " MISSTIDE\n", + " [TRAWLERS]\n", + " [GFW-REVIEW, IMO, SNP]\n", + " \n", + " \n", + " 3\n", + " 701006445\n", + " ARG\n", + " API V\n", + " API5\n", + " [TRAWLERS]\n", + " [IMO, SNP]\n", " \n", " \n", "\n", "" ], "text/plain": [ - " ssvid flag ship_name n_ship_name gear_types \\\n", - "0 701006605 ARG CAPESANTE CAPESANTE [TRAWLERS] \n", - "1 316003980 CAN ATLANTICLEADER ATLANTICLEADER [TRAWLERS] \n", - "2 701024000 ARG ATLANTIC SURF III ATLANTICSURF3 [TRAWLERS] \n", + " ssvid flag ship_name n_ship_name gear_types \\\n", + "0 701024000 ARG ATLANTIC SURF III ATLANTICSURF3 [TRAWLERS] \n", + "1 701037000 ARG DON PEDRO. DONPEDRO [TRAWLERS] \n", + "2 701000577 ARG MISS TIDE MISSTIDE [TRAWLERS] \n", + "2 700000577 ARG MISS TIDE MISSTIDE [TRAWLERS] \n", + "3 701006445 ARG API V API5 [TRAWLERS] \n", "\n", - " source_code \n", - "0 [GFW-REVIEW, IMO, RESEARCH-PAPER, TMT_OTHER_OF... \n", - "1 [IMO, TMT_OTHER_OFFICIAL] \n", - "2 [IMO, TMT_OTHER_OFFICIAL] " + " source_code \n", + "0 [IMO, SNP] \n", + "1 [IMO, SNP] \n", + "2 [GFW-REVIEW, IMO] \n", + "2 [GFW-REVIEW, IMO, SNP] \n", + "3 [IMO, SNP] " ] }, - "execution_count": 26, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1256,19 +1416,43 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 35, + "id": "343bf7e5-0bfe-4ffe-9a00-03cebf50250f", + "metadata": {}, + "outputs": [], + "source": [ + "step_3_has_registry_owners_mask = step_3_vessels_df[\n", + " \"registry_owners\"\n", + "].notna() & step_3_vessels_df[\"registry_owners\"].astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, "id": "afe07c2a-81cb-45b3-9e86-c23b4ef67fe9", "metadata": {}, "outputs": [], "source": [ "step_3_registry_owners_df = pd.json_normalize(\n", - " step_3_vessels_df[\"registry_owners\"].explode()\n", + " step_3_vessels_df[step_3_has_registry_owners_mask][\"registry_owners\"].explode()\n", ")" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 37, + "id": "9ca347af-d3e8-4c77-9d3d-634044f84e88", + "metadata": {}, + "outputs": [], + "source": [ + "step_3_registry_owners_match_registry_info_mask = step_3_registry_owners_df[\n", + " \"ssvid\"\n", + "].isin(step_3_registry_info_df[\"ssvid\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 38, "id": "df7e0b65-0ab0-4722-aaec-3d11c7674b8f", "metadata": {}, "outputs": [ @@ -1302,43 +1486,61 @@ " \n", " \n", " 0\n", - " 701006605\n", - " CAN\n", - " CLEARWATER SEAFOODS\n", - " [RESEARCH-PAPER]\n", + " 701024000\n", + " ARG\n", + " GLACIAR PESQUERA\n", + " [SNP, IMO]\n", " \n", " \n", " 1\n", - " 316003980\n", - " CAN\n", - " CS MANPAR\n", - " [TMT_OTHER_OFFICIAL]\n", + " 701037000\n", + " ARG\n", + " PEDRO MOSCUZZA HIJOS\n", + " [IMO, SNP]\n", " \n", " \n", " 2\n", - " 701024000\n", + " 701000577\n", " ARG\n", - " GLACIAR PESQUERA\n", - " [TMT_OTHER_OFFICIAL]\n", + " WANCHESE ARGENTINA\n", + " [IMO]\n", + " \n", + " \n", + " 2\n", + " 700000577\n", + " ARG\n", + " WANCHESE ARGENTINA\n", + " [SNP, IMO]\n", + " \n", + " \n", + " 3\n", + " 701006445\n", + " ARG\n", + " IBERCONSA DE ARGENTINA\n", + " [IMO, SNP]\n", " \n", " \n", "\n", "" ], "text/plain": [ - " ssvid flag name source_code\n", - "0 701006605 CAN CLEARWATER SEAFOODS [RESEARCH-PAPER]\n", - "1 316003980 CAN CS MANPAR [TMT_OTHER_OFFICIAL]\n", - "2 701024000 ARG GLACIAR PESQUERA [TMT_OTHER_OFFICIAL]" + " ssvid flag name source_code\n", + "0 701024000 ARG GLACIAR PESQUERA [SNP, IMO]\n", + "1 701037000 ARG PEDRO MOSCUZZA HIJOS [IMO, SNP]\n", + "2 701000577 ARG WANCHESE ARGENTINA [IMO]\n", + "2 700000577 ARG WANCHESE ARGENTINA [SNP, IMO]\n", + "3 701006445 ARG IBERCONSA DE ARGENTINA [IMO, SNP]" ] }, - "execution_count": 28, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "step_3_registry_owners_df[[\"ssvid\", \"flag\", \"name\", \"source_code\"]]" + "step_3_registry_owners_df[step_3_registry_owners_match_registry_info_mask][\n", + " [\"ssvid\", \"flag\", \"name\", \"source_code\"]\n", + "]" ] }, { @@ -1351,19 +1553,45 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 39, + "id": "a3046adb-5d33-49e1-bca2-e3de3dbf340f", + "metadata": {}, + "outputs": [], + "source": [ + "step_3_has_self_reported_info_mask = step_3_vessels_df[\n", + " \"self_reported_info\"\n", + "].notna() & step_3_vessels_df[\"self_reported_info\"].astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, "id": "c2fa4bdd-1b51-4b37-abf5-aa18a1cab3e5", "metadata": {}, "outputs": [], "source": [ "step_3_self_reported_info_df = pd.json_normalize(\n", - " step_3_vessels_df[\"self_reported_info\"].explode()\n", + " step_3_vessels_df[step_3_has_self_reported_info_mask][\n", + " \"self_reported_info\"\n", + " ].explode()\n", ")" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 41, + "id": "12298e45-ef2b-4b8b-ac6c-46c1d03f5490", + "metadata": {}, + "outputs": [], + "source": [ + "step_3_self_reported_info_match_registry_info_mask = step_3_self_reported_info_df[\n", + " \"ssvid\"\n", + "].isin(step_3_registry_info_df[\"ssvid\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 42, "id": "d1b559c7-4587-4019-a843-181017dd3f07", "metadata": {}, "outputs": [ @@ -1398,26 +1626,74 @@ " \n", " \n", " 0\n", - " 701006605\n", + " 701024000\n", " ARG\n", - " CAPESANTE\n", - " CAPESANTE\n", + " ATLANTIC SURF III\n", + " ATLANTICSURF3\n", " [AIS]\n", " \n", " \n", " 1\n", - " 316003980\n", - " CAN\n", - " ATLANTIC LEADER\n", - " ATLANTICLEADER\n", + " 701037000\n", + " ARG\n", + " DON PEDRO\n", + " DONPEDRO\n", + " [AIS]\n", + " \n", + " \n", + " 1\n", + " 701037000\n", + " ARG\n", + " DP\n", + " DP\n", + " [AIS]\n", + " \n", + " \n", + " 1\n", + " 701037000\n", + " ARG\n", + " DON PEDRO\n", + " DONPEDRO\n", " [AIS]\n", " \n", " \n", " 2\n", - " 701024000\n", + " 701000577\n", " ARG\n", - " ATLANTIC SURF III\n", - " ATLANTICSURF3\n", + " MISS TIDE\n", + " MISSTIDE\n", + " [AIS]\n", + " \n", + " \n", + " 2\n", + " 700000577\n", + " NaN\n", + " MISS TIDE\n", + " MISSTIDE\n", + " [AIS]\n", + " \n", + " \n", + " 2\n", + " 701000577\n", + " ARG\n", + " MISS TIDE\n", + " MISSTIDE\n", + " [AIS]\n", + " \n", + " \n", + " 3\n", + " 701006445\n", + " ARG\n", + " API V\n", + " API5\n", + " [AIS]\n", + " \n", + " \n", + " 3\n", + " 701006445\n", + " ARG\n", + " API V\n", + " API5\n", " [AIS]\n", " \n", " \n", @@ -1425,19 +1701,25 @@ "" ], "text/plain": [ - " ssvid flag ship_name n_ship_name source_code\n", - "0 701006605 ARG CAPESANTE CAPESANTE [AIS]\n", - "1 316003980 CAN ATLANTIC LEADER ATLANTICLEADER [AIS]\n", - "2 701024000 ARG ATLANTIC SURF III ATLANTICSURF3 [AIS]" + " ssvid flag ship_name n_ship_name source_code\n", + "0 701024000 ARG ATLANTIC SURF III ATLANTICSURF3 [AIS]\n", + "1 701037000 ARG DON PEDRO DONPEDRO [AIS]\n", + "1 701037000 ARG DP DP [AIS]\n", + "1 701037000 ARG DON PEDRO DONPEDRO [AIS]\n", + "2 701000577 ARG MISS TIDE MISSTIDE [AIS]\n", + "2 700000577 NaN MISS TIDE MISSTIDE [AIS]\n", + "2 701000577 ARG MISS TIDE MISSTIDE [AIS]\n", + "3 701006445 ARG API V API5 [AIS]\n", + "3 701006445 ARG API V API5 [AIS]" ] }, - "execution_count": 30, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "step_3_self_reported_info_df[\n", + "step_3_self_reported_info_df[step_3_self_reported_info_match_registry_info_mask][\n", " [\"ssvid\", \"flag\", \"ship_name\", \"n_ship_name\", \"source_code\"]\n", "]" ] @@ -1456,11 +1738,15 @@ "metadata": {}, "source": [ "- **Vessel Identity:**\n", - " - `ATLANTIC SURF III (mmsi: 701024000, flag: ARG)`- appears to be registered under Argentina (ARG)\n", - " - `CAPESANTE (mmsi: 701006605, flag: ARG)` - appears to be registered under Argentina (ARG)\n", + " - `MISS TIDE (mmsi: 701000577, flag: ARG)` - appears to be registered under Argentina (ARG)\n", + " - `API V (mmsi: 701006445, flag: ARG)` - appears to be registered under Argentina (ARG)\n", + " - `ATLANTIC SURF III (mmsi: 701024000, flag: ARG)` - appears to be registered under Argentina (ARG)\n", + " - `DON PEDRO (mmsi: 701037000, flag: ARG)` - appears to be registered under Argentina (ARG)\n", "- **Ownership & Historical Changes:**\n", + " - `MISS TIDE (mmsi: 701000577, flag: ARG)` - **WANCHESE ARGENTINA** appears to be listed as the registered owner.\n", + " - `API V (mmsi: 701006445, flag: ARG)` - **IBERCONSA DE ARGENTINA** appears to be listed as the registered owner.\n", " - `ATLANTIC SURF III (mmsi: 701024000, flag: ARG)` - **GLACIAR PESQUERA** appears to be listed as the registered owner.\n", - " - `CAPESANTE (mmsi: 701006605, flag: ARG)`- **CLEARWATER SEAFOODS** appears to be listed as the registered owner." + " - `DON PEDRO (mmsi: 701037000, flag: ARG)` - **PEDRO MOSCUZZA HIJOS** appears to be listed as the registered owner." ] }, { @@ -1498,7 +1784,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 43, "id": "87c3b592-e431-457c-a5f4-5ce07711d52f", "metadata": {}, "outputs": [], @@ -1511,8 +1797,8 @@ " ],\n", " vessels=step_2_vessel_ids,\n", " types=[\"ENCOUNTER\", \"FISHING\", \"PORT_VISIT\"],\n", - " start_date=\"2024-08-01\",\n", - " end_date=\"2025-01-31\",\n", + " start_date=start_date,\n", + " end_date=end_date,\n", " encounter_types=[\"CARRIER-FISHING\"],\n", " sort=\"-start\",\n", ")" @@ -1520,7 +1806,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 44, "id": "64cb0776-7f0d-481e-be98-da00b448c720", "metadata": {}, "outputs": [], @@ -1530,7 +1816,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 45, "id": "4c82d647-3870-4a54-979b-7f586575e5c1", "metadata": {}, "outputs": [ @@ -1538,27 +1824,27 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 359 entries, 0 to 358\n", + "\n", + "RangeIndex: 929 entries, 0 to 928\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 start 359 non-null datetime64[ns, UTC]\n", - " 1 end 359 non-null datetime64[ns, UTC]\n", - " 2 id 359 non-null object \n", - " 3 type 359 non-null object \n", - " 4 position 359 non-null object \n", - " 5 regions 359 non-null object \n", - " 6 bounding_box 359 non-null object \n", - " 7 distances 359 non-null object \n", - " 8 vessel 359 non-null object \n", + " 0 start 929 non-null datetime64[us, UTC]\n", + " 1 end 929 non-null datetime64[us, UTC]\n", + " 2 id 929 non-null str \n", + " 3 type 929 non-null str \n", + " 4 position 929 non-null object \n", + " 5 regions 929 non-null object \n", + " 6 bounding_box 929 non-null object \n", + " 7 distances 929 non-null object \n", + " 8 vessel 929 non-null object \n", " 9 encounter 0 non-null object \n", - " 10 fishing 351 non-null object \n", + " 10 fishing 908 non-null object \n", " 11 gap 0 non-null object \n", " 12 loitering 0 non-null object \n", - " 13 port_visit 8 non-null object \n", - "dtypes: datetime64[ns, UTC](2), object(12)\n", - "memory usage: 39.4+ KB\n" + " 13 port_visit 21 non-null object \n", + "dtypes: datetime64[us, UTC](2), object(10), str(2)\n", + "memory usage: 101.7+ KB\n" ] } ], @@ -1568,7 +1854,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 46, "id": "17d89920-cda2-497d-9797-1e05a84a0daa", "metadata": {}, "outputs": [ @@ -1576,12 +1862,12 @@ "data": { "text/plain": [ "type\n", - "fishing 351\n", - "port_visit 8\n", + "fishing 908\n", + "port_visit 21\n", "Name: count, dtype: int64" ] }, - "execution_count": 34, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -1600,7 +1886,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 47, "id": "4d360bc4-9762-4ae6-b5b1-1b937e2d2ed0", "metadata": {}, "outputs": [], @@ -1610,7 +1896,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 48, "id": "18ff38d6-9442-4742-9dc9-accfb1299e43", "metadata": {}, "outputs": [], @@ -1626,7 +1912,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 49, "id": "fc6ebae0-eba2-4a8a-892c-6dfc38822bd7", "metadata": {}, "outputs": [ @@ -1634,25 +1920,25 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 351 entries, 0 to 350\n", + "\n", + "Index: 908 entries, 10 to 927\n", "Data columns (total 12 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 id 351 non-null object \n", - " 1 name 351 non-null object \n", - " 2 ssvid 351 non-null object \n", - " 3 flag 351 non-null object \n", - " 4 type 351 non-null object \n", - " 5 public_authorizations 351 non-null object \n", + " 0 id 908 non-null str \n", + " 1 name 908 non-null str \n", + " 2 ssvid 908 non-null str \n", + " 3 flag 908 non-null str \n", + " 4 type 908 non-null str \n", + " 5 public_authorizations 908 non-null object \n", " 6 nextPort 0 non-null object \n", - " 7 total_distance_km 351 non-null float64\n", - " 8 average_speed_knots 351 non-null float64\n", + " 7 total_distance_km 908 non-null float64\n", + " 8 average_speed_knots 908 non-null float64\n", " 9 average_duration_hours 0 non-null object \n", - " 10 potential_risk 351 non-null bool \n", - " 11 vessel_public_authorization_status 351 non-null object \n", - "dtypes: bool(1), float64(2), object(9)\n", - "memory usage: 30.6+ KB\n" + " 10 potential_risk 908 non-null bool \n", + " 11 vessel_public_authorization_status 908 non-null str \n", + "dtypes: bool(1), float64(2), object(3), str(6)\n", + "memory usage: 86.0+ KB\n" ] } ], @@ -1662,7 +1948,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 50, "id": "8799e988-56a2-41d5-b472-bc83064e1807", "metadata": {}, "outputs": [ @@ -1695,39 +1981,39 @@ " \n", " \n", " \n", - " 0\n", - " ATLANTIC SURF III\n", - " 701024000\n", - " 222.721610\n", - " 4.098386\n", + " 10\n", + " API V\n", + " 701006445\n", + " 21.281764\n", + " 4.159375\n", " \n", " \n", - " 1\n", - " ATLANTIC SURF III\n", - " 701024000\n", - " 7.905335\n", - " 4.590909\n", + " 11\n", + " API V\n", + " 701006445\n", + " 24.251514\n", + " 4.293750\n", " \n", " \n", - " 2\n", - " ATLANTIC SURF III\n", - " 701024000\n", - " 2.794681\n", - " 4.655555\n", + " 12\n", + " API V\n", + " 701006445\n", + " 59.370875\n", + " 4.660937\n", " \n", " \n", - " 3\n", - " ATLANTIC SURF III\n", - " 701024000\n", - " 2.012392\n", - " 2.953846\n", + " 13\n", + " API V\n", + " 701006445\n", + " 8.297152\n", + " 4.800000\n", " \n", " \n", - " 4\n", - " ATLANTIC SURF III\n", - " 701024000\n", - " 3.260040\n", - " 3.705556\n", + " 14\n", + " API V\n", + " 701006445\n", + " 49.013410\n", + " 3.661111\n", " \n", " \n", " ...\n", @@ -1737,63 +2023,63 @@ " ...\n", " \n", " \n", - " 346\n", - " CAPESANTE\n", - " 701006605\n", - " 25.568796\n", - " 4.074561\n", + " 923\n", + " ATLANTIC SURF III\n", + " 701024000\n", + " 81.924980\n", + " 4.230000\n", " \n", " \n", - " 347\n", - " CAPESANTE\n", - " 701006605\n", - " 45.830980\n", - " 3.771852\n", + " 924\n", + " ATLANTIC SURF III\n", + " 701024000\n", + " 279.915578\n", + " 4.318182\n", " \n", " \n", - " 348\n", - " CAPESANTE\n", - " 701006605\n", - " 74.611830\n", - " 4.087912\n", + " 925\n", + " ATLANTIC SURF III\n", + " 701024000\n", + " 7.111741\n", + " 4.537500\n", " \n", " \n", - " 349\n", - " CAPESANTE\n", - " 701006605\n", - " 82.607525\n", - " 3.788187\n", + " 926\n", + " ATLANTIC SURF III\n", + " 701024000\n", + " 11.170620\n", + " 4.703226\n", " \n", " \n", - " 350\n", - " CAPESANTE\n", - " 701006605\n", - " 205.826282\n", - " 4.136848\n", + " 927\n", + " ATLANTIC SURF III\n", + " 701024000\n", + " 140.673516\n", + " 4.444764\n", " \n", " \n", "\n", - "

351 rows × 4 columns

\n", + "

908 rows × 4 columns

\n", "" ], "text/plain": [ " name ssvid total_distance_km average_speed_knots\n", - "0 ATLANTIC SURF III 701024000 222.721610 4.098386\n", - "1 ATLANTIC SURF III 701024000 7.905335 4.590909\n", - "2 ATLANTIC SURF III 701024000 2.794681 4.655555\n", - "3 ATLANTIC SURF III 701024000 2.012392 2.953846\n", - "4 ATLANTIC SURF III 701024000 3.260040 3.705556\n", + "10 API V 701006445 21.281764 4.159375\n", + "11 API V 701006445 24.251514 4.293750\n", + "12 API V 701006445 59.370875 4.660937\n", + "13 API V 701006445 8.297152 4.800000\n", + "14 API V 701006445 49.013410 3.661111\n", ".. ... ... ... ...\n", - "346 CAPESANTE 701006605 25.568796 4.074561\n", - "347 CAPESANTE 701006605 45.830980 3.771852\n", - "348 CAPESANTE 701006605 74.611830 4.087912\n", - "349 CAPESANTE 701006605 82.607525 3.788187\n", - "350 CAPESANTE 701006605 205.826282 4.136848\n", + "923 ATLANTIC SURF III 701024000 81.924980 4.230000\n", + "924 ATLANTIC SURF III 701024000 279.915578 4.318182\n", + "925 ATLANTIC SURF III 701024000 7.111741 4.537500\n", + "926 ATLANTIC SURF III 701024000 11.170620 4.703226\n", + "927 ATLANTIC SURF III 701024000 140.673516 4.444764\n", "\n", - "[351 rows x 4 columns]" + "[908 rows x 4 columns]" ] }, - "execution_count": 38, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -1811,7 +2097,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 51, "id": "3659c048-c8bf-463c-a6cb-fc29f818ed77", "metadata": {}, "outputs": [ @@ -1819,12 +2105,14 @@ "data": { "text/plain": [ "ssvid\n", - "701024000 245\n", - "701006605 106\n", + "701024000 297\n", + "701037000 261\n", + "701000577 215\n", + "701006445 135\n", "Name: count, dtype: int64" ] }, - "execution_count": 39, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -1843,7 +2131,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 52, "id": "e806616b-6ed7-4c0c-9bf4-2b4a23de2046", "metadata": {}, "outputs": [], @@ -1853,7 +2141,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 53, "id": "58a68605-9432-43fe-b8eb-4d0393f95f60", "metadata": {}, "outputs": [], @@ -1869,7 +2157,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 54, "id": "685a746e-8091-4321-b851-79a5f3ca0f81", "metadata": {}, "outputs": [ @@ -1877,50 +2165,50 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 8 entries, 0 to 7\n", + "\n", + "Index: 21 entries, 0 to 928\n", "Data columns (total 37 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 id 8 non-null object \n", - " 1 name 8 non-null object \n", - " 2 ssvid 8 non-null object \n", - " 3 flag 8 non-null object \n", - " 4 type 8 non-null object \n", - " 5 public_authorizations 8 non-null object \n", + " 0 id 21 non-null str \n", + " 1 name 21 non-null str \n", + " 2 ssvid 21 non-null str \n", + " 3 flag 21 non-null str \n", + " 4 type 21 non-null str \n", + " 5 public_authorizations 21 non-null object \n", " 6 nextPort 0 non-null object \n", - " 7 visit_id 8 non-null object \n", - " 8 confidence 8 non-null object \n", - " 9 duration_hrs 8 non-null float64\n", - " 10 start_anchorage_anchorage_id 8 non-null object \n", - " 11 start_anchorage_at_dock 8 non-null bool \n", - " 12 start_anchorage_distance_from_shore_km 8 non-null float64\n", - " 13 start_anchorage_flag 8 non-null object \n", - " 14 start_anchorage_id 8 non-null object \n", - " 15 start_anchorage_lat 8 non-null float64\n", - " 16 start_anchorage_lon 8 non-null float64\n", - " 17 start_anchorage_name 8 non-null object \n", - " 18 start_anchorage_top_destination 8 non-null object \n", - " 19 intermediate_anchorage_anchorage_id 8 non-null object \n", - " 20 intermediate_anchorage_at_dock 8 non-null bool \n", - " 21 intermediate_anchorage_distance_from_shore_km 8 non-null float64\n", - " 22 intermediate_anchorage_flag 8 non-null object \n", - " 23 intermediate_anchorage_id 8 non-null object \n", - " 24 intermediate_anchorage_lat 8 non-null float64\n", - " 25 intermediate_anchorage_lon 8 non-null float64\n", - " 26 intermediate_anchorage_name 8 non-null object \n", - " 27 intermediate_anchorage_top_destination 8 non-null object \n", - " 28 end_anchorage_anchorage_id 8 non-null object \n", - " 29 end_anchorage_at_dock 8 non-null bool \n", - " 30 end_anchorage_distance_from_shore_km 8 non-null float64\n", - " 31 end_anchorage_flag 8 non-null object \n", - " 32 end_anchorage_id 8 non-null object \n", - " 33 end_anchorage_lat 8 non-null float64\n", - " 34 end_anchorage_lon 8 non-null float64\n", - " 35 end_anchorage_name 8 non-null object \n", - " 36 end_anchorage_top_destination 8 non-null object \n", - "dtypes: bool(3), float64(10), object(24)\n", - "memory usage: 2.3+ KB\n" + " 7 visit_id 21 non-null str \n", + " 8 confidence 21 non-null str \n", + " 9 duration_hrs 21 non-null float64\n", + " 10 start_anchorage_anchorage_id 21 non-null str \n", + " 11 start_anchorage_at_dock 21 non-null bool \n", + " 12 start_anchorage_distance_from_shore_km 21 non-null float64\n", + " 13 start_anchorage_flag 21 non-null str \n", + " 14 start_anchorage_id 21 non-null str \n", + " 15 start_anchorage_lat 21 non-null float64\n", + " 16 start_anchorage_lon 21 non-null float64\n", + " 17 start_anchorage_name 17 non-null str \n", + " 18 start_anchorage_top_destination 21 non-null str \n", + " 19 intermediate_anchorage_anchorage_id 21 non-null str \n", + " 20 intermediate_anchorage_at_dock 21 non-null bool \n", + " 21 intermediate_anchorage_distance_from_shore_km 21 non-null float64\n", + " 22 intermediate_anchorage_flag 21 non-null str \n", + " 23 intermediate_anchorage_id 21 non-null str \n", + " 24 intermediate_anchorage_lat 21 non-null float64\n", + " 25 intermediate_anchorage_lon 21 non-null float64\n", + " 26 intermediate_anchorage_name 17 non-null str \n", + " 27 intermediate_anchorage_top_destination 21 non-null str \n", + " 28 end_anchorage_anchorage_id 21 non-null str \n", + " 29 end_anchorage_at_dock 21 non-null bool \n", + " 30 end_anchorage_distance_from_shore_km 21 non-null float64\n", + " 31 end_anchorage_flag 21 non-null str \n", + " 32 end_anchorage_id 21 non-null str \n", + " 33 end_anchorage_lat 21 non-null float64\n", + " 34 end_anchorage_lon 21 non-null float64\n", + " 35 end_anchorage_name 17 non-null str \n", + " 36 end_anchorage_top_destination 21 non-null str \n", + "dtypes: bool(3), float64(10), object(2), str(22)\n", + "memory usage: 5.8+ KB\n" ] } ], @@ -1930,7 +2218,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 55, "id": "0e77b956-f0f8-4fbd-8a49-0116e4401c12", "metadata": {}, "outputs": [ @@ -1966,8 +2254,8 @@ " \n", " \n", " 0\n", - " ATLANTIC SURF III\n", - " 701024000\n", + " DON PEDRO\n", + " 701037000\n", " 4\n", " MAR DEL PLATA\n", " MAR DEL PLATA\n", @@ -1975,17 +2263,17 @@ " \n", " \n", " 1\n", - " CAPESANTE\n", - " 701006605\n", + " DON PEDRO\n", + " 701037000\n", " 4\n", - " USHUAIA\n", - " USHUAIA\n", - " USHUAIA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", " \n", " \n", " 2\n", - " ATLANTIC SURF III\n", - " 701024000\n", + " MISS TIDE\n", + " 701000577\n", " 4\n", " MAR DEL PLATA\n", " MAR DEL PLATA\n", @@ -1993,12 +2281,12 @@ " \n", " \n", " 3\n", - " CAPESANTE\n", - " 701006605\n", + " ATLANTIC SURF III\n", + " 701024000\n", " 4\n", - " USHUAIA\n", - " USHUAIA\n", - " USHUAIA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", " \n", " \n", " 4\n", @@ -2011,15 +2299,87 @@ " \n", " \n", " 5\n", - " CAPESANTE\n", - " 701006605\n", + " MISS TIDE\n", + " 701000577\n", " 4\n", - " USHUAIA\n", - " USHUAIA\n", - " USHUAIA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", " \n", " \n", " 6\n", + " MISS TIDE\n", + " 701000577\n", + " 4\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " \n", + " \n", + " 7\n", + " MISS TIDE\n", + " 701000577\n", + " 4\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " \n", + " \n", + " 8\n", + " MISS TIDE\n", + " 701000577\n", + " 4\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " \n", + " \n", + " 9\n", + " MISS TIDE\n", + " 701000577\n", + " 4\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " \n", + " \n", + " 146\n", + " API V\n", + " 701006445\n", + " 4\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 147\n", + " API V\n", + " 701006445\n", + " 4\n", + " PUERTO DESEADO\n", + " PUERTO DESEADO\n", + " PUERTO DESEADO\n", + " \n", + " \n", + " 148\n", + " API V\n", + " 701006445\n", + " 4\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 149\n", + " API V\n", + " 701006445\n", + " 4\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 410\n", " ATLANTIC SURF III\n", " 701024000\n", " 4\n", @@ -2028,41 +2388,112 @@ " MAR DEL PLATA\n", " \n", " \n", - " 7\n", - " CAPESANTE\n", - " 701006605\n", + " 411\n", + " ATLANTIC SURF III\n", + " 701024000\n", + " 3\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " \n", + " \n", + " 412\n", + " API V\n", + " 701006445\n", " 4\n", - " USHUAIA\n", - " USHUAIA\n", - " USHUAIA\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 413\n", + " API V\n", + " 701006445\n", + " 4\n", + " PUERTO DESEADO\n", + " PUERTO DESEADO\n", + " PUERTO DESEADO\n", + " \n", + " \n", + " 565\n", + " MISS TIDE\n", + " 701000577\n", + " 4\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " \n", + " \n", + " 608\n", + " MISS TIDE\n", + " 701000577\n", + " 4\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " \n", + " \n", + " 928\n", + " ATLANTIC SURF III\n", + " 701024000\n", + " 4\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", + " MAR DEL PLATA\n", " \n", " \n", "\n", "" ], "text/plain": [ - " name ssvid confidence start_anchorage_name \\\n", - "0 ATLANTIC SURF III 701024000 4 MAR DEL PLATA \n", - "1 CAPESANTE 701006605 4 USHUAIA \n", - "2 ATLANTIC SURF III 701024000 4 MAR DEL PLATA \n", - "3 CAPESANTE 701006605 4 USHUAIA \n", - "4 ATLANTIC SURF III 701024000 4 MAR DEL PLATA \n", - "5 CAPESANTE 701006605 4 USHUAIA \n", - "6 ATLANTIC SURF III 701024000 4 MAR DEL PLATA \n", - "7 CAPESANTE 701006605 4 USHUAIA \n", + " name ssvid confidence start_anchorage_name \\\n", + "0 DON PEDRO 701037000 4 MAR DEL PLATA \n", + "1 DON PEDRO 701037000 4 MAR DEL PLATA \n", + "2 MISS TIDE 701000577 4 MAR DEL PLATA \n", + "3 ATLANTIC SURF III 701024000 4 MAR DEL PLATA \n", + "4 ATLANTIC SURF III 701024000 4 MAR DEL PLATA \n", + "5 MISS TIDE 701000577 4 MAR DEL PLATA \n", + "6 MISS TIDE 701000577 4 MAR DEL PLATA \n", + "7 MISS TIDE 701000577 4 MAR DEL PLATA \n", + "8 MISS TIDE 701000577 4 MAR DEL PLATA \n", + "9 MISS TIDE 701000577 4 MAR DEL PLATA \n", + "146 API V 701006445 4 NaN \n", + "147 API V 701006445 4 PUERTO DESEADO \n", + "148 API V 701006445 4 NaN \n", + "149 API V 701006445 4 NaN \n", + "410 ATLANTIC SURF III 701024000 4 MAR DEL PLATA \n", + "411 ATLANTIC SURF III 701024000 3 MAR DEL PLATA \n", + "412 API V 701006445 4 NaN \n", + "413 API V 701006445 4 PUERTO DESEADO \n", + "565 MISS TIDE 701000577 4 MAR DEL PLATA \n", + "608 MISS TIDE 701000577 4 MAR DEL PLATA \n", + "928 ATLANTIC SURF III 701024000 4 MAR DEL PLATA \n", "\n", - " intermediate_anchorage_name end_anchorage_name \n", - "0 MAR DEL PLATA MAR DEL PLATA \n", - "1 USHUAIA USHUAIA \n", - "2 MAR DEL PLATA MAR DEL PLATA \n", - "3 USHUAIA USHUAIA \n", - "4 MAR DEL PLATA MAR DEL PLATA \n", - "5 USHUAIA USHUAIA \n", - "6 MAR DEL PLATA MAR DEL PLATA \n", - "7 USHUAIA USHUAIA " + " intermediate_anchorage_name end_anchorage_name \n", + "0 MAR DEL PLATA MAR DEL PLATA \n", + "1 MAR DEL PLATA MAR DEL PLATA \n", + "2 MAR DEL PLATA MAR DEL PLATA \n", + "3 MAR DEL PLATA MAR DEL PLATA \n", + "4 MAR DEL PLATA MAR DEL PLATA \n", + "5 MAR DEL PLATA MAR DEL PLATA \n", + "6 MAR DEL PLATA MAR DEL PLATA \n", + "7 MAR DEL PLATA MAR DEL PLATA \n", + "8 MAR DEL PLATA MAR DEL PLATA \n", + "9 MAR DEL PLATA MAR DEL PLATA \n", + "146 NaN NaN \n", + "147 PUERTO DESEADO PUERTO DESEADO \n", + "148 NaN NaN \n", + "149 NaN NaN \n", + "410 MAR DEL PLATA MAR DEL PLATA \n", + "411 MAR DEL PLATA MAR DEL PLATA \n", + "412 NaN NaN \n", + "413 PUERTO DESEADO PUERTO DESEADO \n", + "565 MAR DEL PLATA MAR DEL PLATA \n", + "608 MAR DEL PLATA MAR DEL PLATA \n", + "928 MAR DEL PLATA MAR DEL PLATA " ] }, - "execution_count": 43, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -2082,7 +2513,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 56, "id": "32ee1871-f1c7-4561-9384-5a681293e7e5", "metadata": {}, "outputs": [ @@ -2090,12 +2521,14 @@ "data": { "text/plain": [ "ssvid\n", - "701024000 4\n", - "701006605 4\n", + "701000577 8\n", + "701006445 6\n", + "701024000 5\n", + "701037000 2\n", "Name: count, dtype: int64" ] }, - "execution_count": 44, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } @@ -2118,11 +2551,15 @@ "metadata": {}, "source": [ "- **Apparent Fishing Events:**\n", - " - `ATLANTIC SURF III (mmsi: 701024000, flag: ARG)`- has been detected in multiple apparent fishing events during the analyzed timeframe (August 2024 – January 2025)\n", - " - `CAPESANTE (mmsi: 701006605, flag: ARG)` - has been detected in multiple apparent fishing events during the analyzed timeframe (August 2024 – January 2025)\n", + " - `MISS TIDE (mmsi: 701000577, flag: ARG)` - has been detected in multiple apparent fishing events over the past 6 months.\n", + " - `API V (mmsi: 701006445, flag: ARG)` - has been detected in multiple apparent fishing events over the past 6 months.\n", + " - `ATLANTIC SURF III (mmsi: 701024000, flag: ARG)` - has been detected in multiple apparent fishing events over the past 6 months.\n", + " - `DON PEDRO (mmsi: 701037000, flag: ARG)` - has been detected in multiple apparent fishing events over the past 6 months.\n", "- **Port Visit Events:**\n", + " - `MISS TIDE (mmsi: 701000577, flag: ARG)`- potentially made multiple port visits, including stops at `MAR DEL PLATA`\n", + " - `API V (mmsi: 701006445, flag: ARG)`- potentially made multiple port visits, including stops at `PUERTO DESEADO`\n", " - `ATLANTIC SURF III (mmsi: 701024000, flag: ARG)`- potentially made multiple port visits, including stops at `MAR DEL PLATA`\n", - " - `CAPESANTE (mmsi: 701006605, flag: ARG)` - potentially made multiple port visits, including stops at `USHUAIA`\n", + " - `DON PEDRO (mmsi: 701037000, flag: ARG)` - potentially made multiple port visits, including stops at `MAR DEL PLATA`\n", "- **ENCOUNTER Events:** No explicit **ENCOUNTER** events were returned in the response dataset. Check more details [here](https://globalfishingwatch.org/faqs/what-is-a-vessel-encounter/). You can read more about transshipment behavior from our [report](https://globalfishingwatch.org/wp-content/uploads/GlobalViewOfTransshipment_Aug2017.pdf) or [scientific publication](https://www.frontiersin.org/articles/10.3389/fmars.2018.00240/full)." ] }, diff --git a/notebooks/workflow-guides/workflow-03-analyze-fleet-in-ghanaian-eez.ipynb b/notebooks/workflow-guides/workflow-03-analyze-fleet-in-ghanaian-eez.ipynb index 76f7033..bf0ebad 100644 --- a/notebooks/workflow-guides/workflow-03-analyze-fleet-in-ghanaian-eez.ipynb +++ b/notebooks/workflow-guides/workflow-03-analyze-fleet-in-ghanaian-eez.ipynb @@ -97,6 +97,7 @@ "metadata": {}, "outputs": [], "source": [ + "import datetime\n", "import os\n", "\n", "import pandas as pd\n", @@ -227,6 +228,46 @@ "For **[Ghanaian EEZ, the region ID is 8400](https://www.marineregions.org/gazetteer.php?p=details&id=8400)** (public-eez-areas dataset)." ] }, + { + "cell_type": "markdown", + "id": "e0829c64-bf58-4981-b779-3318f101a34c", + "metadata": {}, + "source": [ + "**Note:** See how to use the [Reference Data API - Usage Guides](https://globalfishingwatch.github.io/gfw-api-python-client/usage-guides/references-data-api.html) to obtain and filter predefined [**Regions of Interest (ROIs)**](https://globalfishingwatch.org/our-apis/documentation#regions), such as Exclusive Economic Zones (**EEZs**), Marine Protected Areas (**MPAs**), and Regional Fisheries Management Organizations (**RFMOs**)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b108e8da-3f58-4e20-a95c-52d2ce220c3a", + "metadata": {}, + "outputs": [], + "source": [ + "eez_rois_result = await gfw_client.references.get_eez_regions(iso3=\"GHA\")\n", + "gha_eez_roi = eez_rois_result.data()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c4eb8873-407b-4585-aa56-7db26452c7f6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('8400', 'public-eez-areas', 'Ghanaian Exclusive Economic Zone', 'GHA')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gha_eez_roi.id, gha_eez_roi.dataset, gha_eez_roi.label, gha_eez_roi.iso3" + ] + }, { "cell_type": "markdown", "id": "6fe5edf3-8e96-4147-8c59-a6bc8982662d", @@ -257,7 +298,48 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, + "id": "03547f92-8e6f-453e-bfa0-319f7de19f57", + "metadata": {}, + "outputs": [], + "source": [ + "end_date = datetime.date.today()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ee5c713a-7338-4b23-a246-d1c867042370", + "metadata": {}, + "outputs": [], + "source": [ + "start_date = end_date - datetime.timedelta(weeks=52)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "40174e1d-3df3-4395-88aa-5556a2c56880", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(datetime.date(2025, 6, 27), datetime.date(2026, 6, 26))" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "start_date, end_date" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "id": "38c3bc74-e0f6-4f81-b830-3e3393d6b3d0", "metadata": {}, "outputs": [], @@ -266,19 +348,16 @@ " spatial_resolution=\"LOW\",\n", " group_by=\"GEARTYPE\",\n", " temporal_resolution=\"ENTIRE\",\n", - " start_date=\"2024-01-01\",\n", - " end_date=\"2025-01-01\",\n", + " start_date=start_date, # \"2024-01-01\"\n", + " end_date=end_date, # \"2025-01-01\"\n", " spatial_aggregation=True,\n", - " region={\n", - " \"dataset\": \"public-eez-areas\",\n", - " \"id\": \"8400\",\n", - " },\n", + " region=gha_eez_roi,\n", ")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "id": "5ee5daa3-6696-4552-8d1a-61df417b8873", "metadata": {}, "outputs": [], @@ -288,7 +367,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "id": "35dac242-6479-437a-a35f-249ad22cf191", "metadata": {}, "outputs": [ @@ -296,17 +375,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 9 entries, 0 to 8\n", + "\n", + "RangeIndex: 7 entries, 0 to 6\n", "Data columns (total 20 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 date 9 non-null object \n", + " 0 date 7 non-null str \n", " 1 detections 0 non-null object \n", " 2 flag 0 non-null object \n", - " 3 gear_type 9 non-null object \n", - " 4 hours 9 non-null float64\n", - " 5 vessel_ids 9 non-null int64 \n", + " 3 gear_type 7 non-null str \n", + " 4 hours 7 non-null float64\n", + " 5 vessel_ids 7 non-null int64 \n", " 6 vessel_id 0 non-null object \n", " 7 vessel_type 0 non-null object \n", " 8 entry_timestamp 0 non-null object \n", @@ -317,12 +396,12 @@ " 13 mmsi 0 non-null object \n", " 14 call_sign 0 non-null object \n", " 15 dataset 0 non-null object \n", - " 16 report_dataset 9 non-null object \n", + " 16 report_dataset 7 non-null str \n", " 17 ship_name 0 non-null object \n", " 18 lat 0 non-null object \n", " 19 lon 0 non-null object \n", - "dtypes: float64(1), int64(1), object(18)\n", - "memory usage: 1.5+ KB\n" + "dtypes: float64(1), int64(1), object(15), str(3)\n", + "memory usage: 1.2+ KB\n" ] } ], @@ -332,7 +411,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "6a245cc8-0236-4554-b4c8-d9bbc11db235", "metadata": {}, "outputs": [ @@ -368,36 +447,36 @@ " 0\n", " None\n", " drifting_longlines\n", - " 593.138333\n", - " 3\n", + " 663.434444\n", + " 2\n", " \n", " \n", " 1\n", " None\n", - " purse_seines\n", - " 6.340556\n", - " 1\n", + " fishing\n", + " 23077.302778\n", + " 30\n", " \n", " \n", " 2\n", " None\n", - " pole_and_line\n", - " 3481.509167\n", - " 5\n", + " inconclusive\n", + " 22606.803611\n", + " 27\n", " \n", " \n", " 3\n", " None\n", " other_purse_seines\n", - " 26.581111\n", + " 284.236944\n", " 1\n", " \n", " \n", " 4\n", " None\n", - " fishing\n", - " 20929.563333\n", - " 21\n", + " pole_and_line\n", + " 2955.103611\n", + " 3\n", " \n", " \n", "\n", @@ -405,14 +484,14 @@ ], "text/plain": [ " flag gear_type hours vessel_ids\n", - "0 None drifting_longlines 593.138333 3\n", - "1 None purse_seines 6.340556 1\n", - "2 None pole_and_line 3481.509167 5\n", - "3 None other_purse_seines 26.581111 1\n", - "4 None fishing 20929.563333 21" + "0 None drifting_longlines 663.434444 2\n", + "1 None fishing 23077.302778 30\n", + "2 None inconclusive 22606.803611 27\n", + "3 None other_purse_seines 284.236944 1\n", + "4 None pole_and_line 2955.103611 3" ] }, - "execution_count": 8, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -423,7 +502,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "id": "b56ec0e4-5f85-4a10-bd94-39ea55128920", "metadata": {}, "outputs": [], @@ -437,7 +516,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "id": "87289a41-0d54-44c9-a17a-f82b318af293", "metadata": {}, "outputs": [ @@ -469,57 +548,45 @@ " \n", " \n", " \n", - " 7\n", - " trawlers\n", - " 46704.797222\n", - " 26\n", + " 1\n", + " fishing\n", + " 23077.302778\n", + " 30\n", " \n", " \n", - " 3\n", + " 2\n", " inconclusive\n", - " 30496.173611\n", + " 22606.803611\n", " 27\n", " \n", " \n", - " 1\n", - " fishing\n", - " 20929.563333\n", - " 21\n", + " 5\n", + " trawlers\n", + " 22540.676667\n", + " 29\n", " \n", " \n", - " 8\n", + " 6\n", " tuna_purse_seines\n", - " 5146.623889\n", - " 23\n", + " 5324.287778\n", + " 20\n", " \n", " \n", - " 5\n", + " 4\n", " pole_and_line\n", - " 3481.509167\n", - " 5\n", + " 2955.103611\n", + " 3\n", " \n", " \n", " 0\n", " drifting_longlines\n", - " 593.138333\n", - " 3\n", + " 663.434444\n", + " 2\n", " \n", " \n", - " 4\n", + " 3\n", " other_purse_seines\n", - " 26.581111\n", - " 1\n", - " \n", - " \n", - " 6\n", - " purse_seines\n", - " 6.340556\n", - " 1\n", - " \n", - " \n", - " 2\n", - " fixed_gear\n", - " 0.163611\n", + " 284.236944\n", " 1\n", " \n", " \n", @@ -528,18 +595,16 @@ ], "text/plain": [ " gear_type hours vessel_ids\n", - "7 trawlers 46704.797222 26\n", - "3 inconclusive 30496.173611 27\n", - "1 fishing 20929.563333 21\n", - "8 tuna_purse_seines 5146.623889 23\n", - "5 pole_and_line 3481.509167 5\n", - "0 drifting_longlines 593.138333 3\n", - "4 other_purse_seines 26.581111 1\n", - "6 purse_seines 6.340556 1\n", - "2 fixed_gear 0.163611 1" + "1 fishing 23077.302778 30\n", + "2 inconclusive 22606.803611 27\n", + "5 trawlers 22540.676667 29\n", + "6 tuna_purse_seines 5324.287778 20\n", + "4 pole_and_line 2955.103611 3\n", + "0 drifting_longlines 663.434444 2\n", + "3 other_purse_seines 284.236944 1" ] }, - "execution_count": 10, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -562,7 +627,7 @@ "metadata": {}, "source": [ "1. Kwame now has apparent fishing effort data for multiple gear types.\n", - "2. There are **potential 3 vessels** operating as a **longliners (i.e., drifting_longlines)** in **[Ghanaian EEZ](https://www.marineregions.org/gazetteer.php?p=details&id=8400)** with `593.138333 hours` logged." + "2. There are **potential 2 vessels** operating as a **longliners (i.e., drifting_longlines)** in **[Ghanaian EEZ](https://www.marineregions.org/gazetteer.php?p=details&id=8400)** with `663.434444 hours` logged." ] }, { @@ -605,7 +670,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "id": "0797f962-6582-4d3b-bfba-3597d5513a73", "metadata": {}, "outputs": [], @@ -615,19 +680,16 @@ " group_by=\"VESSEL_ID\",\n", " temporal_resolution=\"ENTIRE\",\n", " filters=[\"geartype in ('drifting_longlines')\"],\n", - " start_date=\"2024-01-01\",\n", - " end_date=\"2025-01-01\",\n", + " start_date=start_date,\n", + " end_date=end_date,\n", " spatial_aggregation=True,\n", - " region={\n", - " \"dataset\": \"public-eez-areas\",\n", - " \"id\": \"8400\",\n", - " },\n", + " region=gha_eez_roi,\n", ")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "id": "8be53e48-4b93-4140-ae0e-aa910a993da7", "metadata": {}, "outputs": [], @@ -637,7 +699,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "id": "061b9fbf-88cf-47d8-9b30-b878c0b867cf", "metadata": {}, "outputs": [ @@ -645,33 +707,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 3 entries, 0 to 2\n", + "\n", + "RangeIndex: 2 entries, 0 to 1\n", "Data columns (total 20 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 date 3 non-null object \n", + " 0 date 2 non-null str \n", " 1 detections 0 non-null object \n", - " 2 flag 3 non-null object \n", - " 3 gear_type 3 non-null object \n", - " 4 hours 3 non-null float64 \n", + " 2 flag 2 non-null str \n", + " 3 gear_type 2 non-null str \n", + " 4 hours 2 non-null float64 \n", " 5 vessel_ids 0 non-null object \n", - " 6 vessel_id 3 non-null object \n", - " 7 vessel_type 3 non-null object \n", - " 8 entry_timestamp 3 non-null datetime64[ns, UTC]\n", - " 9 exit_timestamp 3 non-null datetime64[ns, UTC]\n", - " 10 first_transmission_date 3 non-null datetime64[ns, UTC]\n", - " 11 last_transmission_date 3 non-null datetime64[ns, UTC]\n", - " 12 imo 3 non-null object \n", - " 13 mmsi 3 non-null object \n", - " 14 call_sign 3 non-null object \n", - " 15 dataset 3 non-null object \n", - " 16 report_dataset 3 non-null object \n", - " 17 ship_name 3 non-null object \n", + " 6 vessel_id 2 non-null str \n", + " 7 vessel_type 2 non-null str \n", + " 8 entry_timestamp 2 non-null datetime64[us, UTC]\n", + " 9 exit_timestamp 2 non-null datetime64[us, UTC]\n", + " 10 first_transmission_date 2 non-null datetime64[us, UTC]\n", + " 11 last_transmission_date 2 non-null datetime64[us, UTC]\n", + " 12 imo 2 non-null str \n", + " 13 mmsi 2 non-null str \n", + " 14 call_sign 2 non-null str \n", + " 15 dataset 2 non-null str \n", + " 16 report_dataset 2 non-null str \n", + " 17 ship_name 2 non-null str \n", " 18 lat 0 non-null object \n", " 19 lon 0 non-null object \n", - "dtypes: datetime64[ns, UTC](4), float64(1), object(15)\n", - "memory usage: 612.0+ bytes\n" + "dtypes: datetime64[us, UTC](4), float64(1), object(4), str(11)\n", + "memory usage: 452.0+ bytes\n" ] } ], @@ -681,7 +743,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "id": "932bd692-c689-4e1d-985c-18eeccdf9ec3", "metadata": {}, "outputs": [ @@ -716,40 +778,31 @@ " \n", " \n", " 0\n", - " CHN\n", + " TWN\n", " DRIFTING_LONGLINES\n", - " 2.750556\n", - " 412331032\n", - " \n", + " 2.213056\n", + " 416004904\n", + " MAAN FARN NO.1\n", " \n", " \n", " 1\n", - " JPN\n", - " DRIFTING_LONGLINES\n", - " 588.879722\n", - " 431100690\n", - " SENSHU MARU NO.3\n", - " \n", - " \n", - " 2\n", - " TWN\n", + " \n", " DRIFTING_LONGLINES\n", - " 1.508056\n", - " 416007496\n", - " HUNG CHUAN SHUN\n", + " 661.221389\n", + " 983110470\n", + " \n", " \n", " \n", "\n", "" ], "text/plain": [ - " flag gear_type hours mmsi ship_name\n", - "0 CHN DRIFTING_LONGLINES 2.750556 412331032 \n", - "1 JPN DRIFTING_LONGLINES 588.879722 431100690 SENSHU MARU NO.3\n", - "2 TWN DRIFTING_LONGLINES 1.508056 416007496 HUNG CHUAN SHUN" + " flag gear_type hours mmsi ship_name\n", + "0 TWN DRIFTING_LONGLINES 2.213056 416004904 MAAN FARN NO.1\n", + "1 DRIFTING_LONGLINES 661.221389 983110470 " ] }, - "execution_count": 14, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -771,9 +824,9 @@ "id": "72a9efeb-ae54-4bbd-ab85-77845f928376", "metadata": {}, "source": [ - "1. Kwame identifies `3 vessels` operating as longliners within **[Ghanaian EEZ](https://www.marineregions.org/gazetteer.php?p=details&id=8400)**.\n", - "2. The vessel `(mmsi: 431100690, ship_name: SENSHU MARU NO.3)` shows significant activity with `588.879722 hours` logged.\n", - "3. Other vessels `(mmsi: 416007496, ship_name: HUNG CHUAN SHUN)` and `(mmsi: 412331032)` shows apparent fishing effort over a short duration.\n", + "1. Kwame identifies `2 vessels` operating as longliners within **[Ghanaian EEZ](https://www.marineregions.org/gazetteer.php?p=details&id=8400)**.\n", + "2. The vessel `(mmsi: 983110470)` shows significant activity with `661.221389 hours` logged.\n", + "3. Other vessels `(mmsi: 416004904)` shows apparent fishing effort over a short duration.\n", "4. This response is based on **AIS self-reported** data and should be further validated.\n" ] }, @@ -815,7 +868,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 20, "id": "15889e5c-dea7-499d-bd77-68a12aca8d98", "metadata": {}, "outputs": [], @@ -825,19 +878,18 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "id": "8477d36f-27ea-4ca9-aaad-dfaf8d94cd80", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['f37ebdc1b-be44-0740-7904-49397360e29d',\n", - " 'b1dad8628-8c9c-2ee7-258b-3d8fb747f1c8',\n", - " '60f7bb972-2c90-4553-650b-23c38f9521bf']" + "['dc4940890-0883-9dd7-797b-ce8edfc33b2d',\n", + " '5ad9cc284-44ca-8033-43f5-8ce653b05834']" ] }, - "execution_count": 16, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -848,7 +900,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "id": "33d970d9-1321-46ec-87bc-91aed1f446ff", "metadata": {}, "outputs": [], @@ -860,7 +912,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 23, "id": "c1b9a6e9-683f-44fe-bb47-98825cf9d150", "metadata": {}, "outputs": [], @@ -870,7 +922,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 24, "id": "d87c1cef-92d1-462d-a86e-c2af11912ed9", "metadata": {}, "outputs": [ @@ -878,20 +930,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 3 entries, 0 to 2\n", + "\n", + "RangeIndex: 2 entries, 0 to 1\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 dataset 3 non-null object\n", - " 1 registry_info_total_records 3 non-null int64 \n", - " 2 registry_info 3 non-null object\n", - " 3 registry_owners 3 non-null object\n", - " 4 registry_public_authorizations 3 non-null object\n", - " 5 combined_sources_info 3 non-null object\n", - " 6 self_reported_info 3 non-null object\n", - "dtypes: int64(1), object(6)\n", - "memory usage: 300.0+ bytes\n" + " 0 dataset 2 non-null str \n", + " 1 registry_info_total_records 2 non-null int64 \n", + " 2 registry_info 2 non-null object\n", + " 3 registry_owners 2 non-null object\n", + " 4 registry_public_authorizations 2 non-null object\n", + " 5 combined_sources_info 2 non-null object\n", + " 6 self_reported_info 2 non-null object\n", + "dtypes: int64(1), object(5), str(1)\n", + "memory usage: 244.0+ bytes\n" ] } ], @@ -916,7 +968,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 25, "id": "348301a7-c290-4684-bb03-23298e8bc7e0", "metadata": {}, "outputs": [ @@ -949,21 +1001,15 @@ " \n", " \n", " 0\n", - " [{'id': '0b0dec977c1aad4ae6652c4076572cc7', 's...\n", - " [{'name': 'TOMIOKA FISHERIES', 'flag': 'JPN', ...\n", - " [{'id': 'c86d138c5-5d51-3620-fe01-a9e0ed37fb91...\n", + " [{'id': '05bb903aecec1cc5efc55708d7fd0868', 's...\n", + " [{'name': 'HASBRO FISHERIES GROUP', 'flag': 'T...\n", + " [{'id': 'dc4940890-0883-9dd7-797b-ce8edfc33b2d...\n", " \n", " \n", " 1\n", " []\n", " []\n", - " [{'id': 'f37ebdc1b-be44-0740-7904-49397360e29d...\n", - " \n", - " \n", - " 2\n", - " [{'id': '02eda7d2da02943eecd48813fb7d562a', 's...\n", - " [{'name': 'HER RONG SHUN FISHERIES', 'flag': '...\n", - " [{'id': '60f7bb972-2c90-4553-650b-23c38f9521bf...\n", + " [{'id': '5ad9cc284-44ca-8033-43f5-8ce653b05834...\n", " \n", " \n", "\n", @@ -971,22 +1017,19 @@ ], "text/plain": [ " registry_info \\\n", - "0 [{'id': '0b0dec977c1aad4ae6652c4076572cc7', 's... \n", + "0 [{'id': '05bb903aecec1cc5efc55708d7fd0868', 's... \n", "1 [] \n", - "2 [{'id': '02eda7d2da02943eecd48813fb7d562a', 's... \n", "\n", " registry_owners \\\n", - "0 [{'name': 'TOMIOKA FISHERIES', 'flag': 'JPN', ... \n", + "0 [{'name': 'HASBRO FISHERIES GROUP', 'flag': 'T... \n", "1 [] \n", - "2 [{'name': 'HER RONG SHUN FISHERIES', 'flag': '... \n", "\n", " self_reported_info \n", - "0 [{'id': 'c86d138c5-5d51-3620-fe01-a9e0ed37fb91... \n", - "1 [{'id': 'f37ebdc1b-be44-0740-7904-49397360e29d... \n", - "2 [{'id': '60f7bb972-2c90-4553-650b-23c38f9521bf... " + "0 [{'id': 'dc4940890-0883-9dd7-797b-ce8edfc33b2d... \n", + "1 [{'id': '5ad9cc284-44ca-8033-43f5-8ce653b05834... " ] }, - "execution_count": 20, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1005,20 +1048,31 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 26, + "id": "ca068396-9c53-4a2c-98b7-b2eee6ad4423", + "metadata": {}, + "outputs": [], + "source": [ + "step_3_has_registry_info_mask = step_3_vessels_df[\n", + " \"registry_info\"\n", + "].notna() & step_3_vessels_df[\"registry_info\"].astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, "id": "7e5cf6db-8f45-4c7e-a8c8-ccdb312eca7d", "metadata": {}, "outputs": [], "source": [ "step_3_registry_info_df = pd.json_normalize(\n", - " step_3_vessels_df[\"registry_info\"].explode()\n", - ")\n", - "step_3_registry_info_df = step_3_registry_info_df.dropna()" + " step_3_vessels_df[step_3_has_registry_info_mask][\"registry_info\"].explode()\n", + ")" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 28, "id": "b350572a-93ec-434f-8f6f-d0418f693b46", "metadata": {}, "outputs": [ @@ -1054,37 +1108,26 @@ " \n", " \n", " 0\n", - " 431100690\n", - " JPN\n", - " SENSHU MARU NO.3\n", - " SENSHUMARU3\n", - " [DRIFTING_LONGLINES]\n", - " [CCSBT, GFCM, IATTC, ICCAT, IMO, IOTC, OPRT, R...\n", - " \n", - " \n", - " 2\n", - " 416007496\n", + " 416004904\n", " TWN\n", - " HUNG CHUAN SHUN\n", - " HUNGCHUANSHUN\n", + " MAAN FARN 1\n", + " MAANFARN1\n", " [DRIFTING_LONGLINES]\n", - " [ICCAT, IMO, ISSF, OPRT, TMT_ICCAT, TMT_OTHER_...\n", + " [ICCAT, IMO, ISSF, OPRT, RESEARCH-PAPER, SNP]\n", " \n", " \n", "\n", "" ], "text/plain": [ - " ssvid flag ship_name n_ship_name gear_types \\\n", - "0 431100690 JPN SENSHU MARU NO.3 SENSHUMARU3 [DRIFTING_LONGLINES] \n", - "2 416007496 TWN HUNG CHUAN SHUN HUNGCHUANSHUN [DRIFTING_LONGLINES] \n", + " ssvid flag ship_name n_ship_name gear_types \\\n", + "0 416004904 TWN MAAN FARN 1 MAANFARN1 [DRIFTING_LONGLINES] \n", "\n", - " source_code \n", - "0 [CCSBT, GFCM, IATTC, ICCAT, IMO, IOTC, OPRT, R... \n", - "2 [ICCAT, IMO, ISSF, OPRT, TMT_ICCAT, TMT_OTHER_... " + " source_code \n", + "0 [ICCAT, IMO, ISSF, OPRT, RESEARCH-PAPER, SNP] " ] }, - "execution_count": 22, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1105,20 +1148,43 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 29, + "id": "3a4c79b5-f7d3-46bb-9c78-a7db8900d924", + "metadata": {}, + "outputs": [], + "source": [ + "step_3_has_registry_owners_mask = step_3_vessels_df[\n", + " \"registry_owners\"\n", + "].notna() & step_3_vessels_df[\"registry_owners\"].astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, "id": "0ed351c2-104c-4132-bcae-a3c757ac2465", "metadata": {}, "outputs": [], "source": [ "step_3_registry_owners_df = pd.json_normalize(\n", - " step_3_vessels_df[\"registry_owners\"].explode()\n", - ")\n", - "step_3_registry_owners_df = step_3_registry_owners_df.dropna()" + " step_3_vessels_df[step_3_has_registry_owners_mask][\"registry_owners\"].explode()\n", + ")" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 31, + "id": "008ca5eb-b3a6-4e48-99f6-961b576790f2", + "metadata": {}, + "outputs": [], + "source": [ + "step_3_registry_owners_match_registry_info_mask = step_3_registry_owners_df[\n", + " \"ssvid\"\n", + "].isin(step_3_registry_info_df[\"ssvid\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, "id": "d6bbbc39-1928-42cd-9866-71fa95a601d8", "metadata": {}, "outputs": [ @@ -1152,66 +1218,29 @@ " \n", " \n", " 0\n", - " 431100690\n", - " JPN\n", - " TOMIOKA FISHERIES\n", - " [TMT_CCSBT, TMT_IATTC, TMT_ICCAT, TMT_OTHER_OF...\n", - " \n", - " \n", - " 1\n", - " 431100690\n", - " JPN\n", - " TOMIOKA\n", - " [TMT_CCSBT, TMT_IATTC, TMT_ICCAT, TMT_OTHER_OF...\n", - " \n", - " \n", - " 2\n", - " 431100690\n", - " JPN\n", - " YAMAMOTO YUUKI\n", - " [TMT_CCSBT, TMT_IATTC, TMT_ICCAT, TMT_OTHER_OF...\n", - " \n", - " \n", - " 3\n", - " 431100690\n", - " JPN\n", - " YAMAMOTO HIROKI\n", - " [TMT_CCSBT, TMT_IATTC, TMT_ICCAT, TMT_OTHER_OF...\n", - " \n", - " \n", - " 5\n", - " 416007496\n", + " 416004904\n", " TWN\n", - " HER RONG SHUN FISHERIES\n", - " [TMT_ICCAT, TMT_OTHER_OFFICIAL]\n", + " HASBRO FISHERIES GROUP\n", + " [RESEARCH-PAPER]\n", " \n", " \n", "\n", "" ], "text/plain": [ - " ssvid flag name \\\n", - "0 431100690 JPN TOMIOKA FISHERIES \n", - "1 431100690 JPN TOMIOKA \n", - "2 431100690 JPN YAMAMOTO YUUKI \n", - "3 431100690 JPN YAMAMOTO HIROKI \n", - "5 416007496 TWN HER RONG SHUN FISHERIES \n", - "\n", - " source_code \n", - "0 [TMT_CCSBT, TMT_IATTC, TMT_ICCAT, TMT_OTHER_OF... \n", - "1 [TMT_CCSBT, TMT_IATTC, TMT_ICCAT, TMT_OTHER_OF... \n", - "2 [TMT_CCSBT, TMT_IATTC, TMT_ICCAT, TMT_OTHER_OF... \n", - "3 [TMT_CCSBT, TMT_IATTC, TMT_ICCAT, TMT_OTHER_OF... \n", - "5 [TMT_ICCAT, TMT_OTHER_OFFICIAL] " + " ssvid flag name source_code\n", + "0 416004904 TWN HASBRO FISHERIES GROUP [RESEARCH-PAPER]" ] }, - "execution_count": 24, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "step_3_registry_owners_df[[\"ssvid\", \"flag\", \"name\", \"source_code\"]]" + "step_3_registry_owners_df[step_3_registry_owners_match_registry_info_mask][\n", + " [\"ssvid\", \"flag\", \"name\", \"source_code\"]\n", + "]" ] }, { @@ -1224,19 +1253,33 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 33, + "id": "3c570f2e-dc0c-4f2f-b4b0-df16715ba5ad", + "metadata": {}, + "outputs": [], + "source": [ + "step_3_has_self_reported_info_mask = step_3_vessels_df[\n", + " \"self_reported_info\"\n", + "].notna() & step_3_vessels_df[\"self_reported_info\"].astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, "id": "add8a66e-9736-4cdb-a74d-0607fcc530a2", "metadata": {}, "outputs": [], "source": [ "step_3_self_reported_info_df = pd.json_normalize(\n", - " step_3_vessels_df[\"self_reported_info\"].explode()\n", + " step_3_vessels_df[step_3_has_self_reported_info_mask][\n", + " \"self_reported_info\"\n", + " ].explode()\n", ")" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 35, "id": "fd0831dc-e29e-4ccc-864f-704eebee2686", "metadata": {}, "outputs": [ @@ -1266,70 +1309,68 @@ " ship_name\n", " n_ship_name\n", " source_code\n", + " transmission_date_from\n", " \n", " \n", " \n", " \n", " 0\n", - " 431100690\n", - " JPN\n", - " None\n", - " None\n", - " [AIS]\n", - " \n", - " \n", - " 1\n", - " 431100690\n", - " JPN\n", - " SENSHU MARU NO.3\n", - " SENSHUMARU3\n", - " [AIS]\n", - " \n", - " \n", - " 2\n", - " 431100690\n", - " JPN\n", - " SENSHU MARU NO3\n", - " SENSHUMARU3\n", + " 416004904\n", + " TWN\n", + " MAAN FARN NO.1\n", + " MAANFARN1\n", " [AIS]\n", + " 2017-09-24 12:55:55+00:00\n", " \n", " \n", - " 3\n", - " 412331032\n", - " CHN\n", - " None\n", - " None\n", + " 0\n", + " 416004904\n", + " TWN\n", + " MAAN FAKN NO.1\n", + " MAANFAKN1\n", " [AIS]\n", + " 2015-04-22 01:47:27+00:00\n", " \n", " \n", - " 4\n", - " 416007496\n", - " TWN\n", - " HUNG CHUAN SHUN\n", - " HUNGCHUANSHUN\n", + " 1\n", + " 983110470\n", + " NaN\n", + " NaN\n", + " NaN\n", " [AIS]\n", + " 2022-11-16 11:32:26+00:00\n", " \n", " \n", "\n", "" ], "text/plain": [ - " ssvid flag ship_name n_ship_name source_code\n", - "0 431100690 JPN None None [AIS]\n", - "1 431100690 JPN SENSHU MARU NO.3 SENSHUMARU3 [AIS]\n", - "2 431100690 JPN SENSHU MARU NO3 SENSHUMARU3 [AIS]\n", - "3 412331032 CHN None None [AIS]\n", - "4 416007496 TWN HUNG CHUAN SHUN HUNGCHUANSHUN [AIS]" + " ssvid flag ship_name n_ship_name source_code \\\n", + "0 416004904 TWN MAAN FARN NO.1 MAANFARN1 [AIS] \n", + "0 416004904 TWN MAAN FAKN NO.1 MAANFAKN1 [AIS] \n", + "1 983110470 NaN NaN NaN [AIS] \n", + "\n", + " transmission_date_from \n", + "0 2017-09-24 12:55:55+00:00 \n", + "0 2015-04-22 01:47:27+00:00 \n", + "1 2022-11-16 11:32:26+00:00 " ] }, - "execution_count": 26, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "step_3_self_reported_info_df[\n", - " [\"ssvid\", \"flag\", \"ship_name\", \"n_ship_name\", \"source_code\"]\n", + " [\n", + " \"ssvid\",\n", + " \"flag\",\n", + " \"ship_name\",\n", + " \"n_ship_name\",\n", + " \"source_code\",\n", + " \"transmission_date_from\",\n", + " ]\n", "]" ] }, @@ -1346,10 +1387,10 @@ "id": "37e54952-ec16-4c85-a6c3-ee7e844d007b", "metadata": {}, "source": [ - "- The vessel `mmsi/ssvid: 412331032` appears to be a drifting longliner flagged under China.\n", + "- The vessel `mmsi/ssvid: 983110470` appears to be a drifting longliner flagged under China.\n", "- No public registry data is found for this vessel.\n", "- The vessel's identity information is based on `AIS self-reported data`, which may not always align with official registries.\n", - "- The vessel appears to have been active since `2014`, based on `self-reported AIS records`.\n", + "- The vessel appears to have been active since `2022`, based on `self-reported AIS records`.\n", "- This vessel's data needs further validation against official public sources" ] }, @@ -1378,7 +1419,7 @@ "\n", "1. **Vessel ID** from 4Wings API\n", "2. **[Event Types](https://globalfishingwatch.org/our-apis/documentation#events-post-body-parameters)** - Port visits, encounters (potential transshipment), and fishing events.\n", - "3. **Time Range** - Last 6 months.\n", + "3. **Time Range** - Last 12 months.\n", "4. **[Datasets](https://globalfishingwatch.org/our-apis/documentation#api-dataset)**:\n", " - `public-global-port-visits-events::latest` (Port Visits)\n", " - `public-global-encounters-events:latest` (Encounters between vessels)\n", @@ -1388,7 +1429,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 36, "id": "ae1567e5-0594-4eb3-a78a-5e8902deb2ac", "metadata": {}, "outputs": [], @@ -1401,15 +1442,15 @@ " ],\n", " vessels=step_2_vessel_ids,\n", " types=[\"ENCOUNTER\", \"FISHING\", \"PORT_VISIT\"],\n", - " start_date=\"2024-08-01\",\n", - " end_date=\"2025-01-31\",\n", + " start_date=start_date,\n", + " end_date=end_date,\n", " encounter_types=[\"FISHING-FISHING\"],\n", ")" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 37, "id": "904c3745-e999-4fc7-a3fe-dd639c6a87c2", "metadata": {}, "outputs": [], @@ -1419,7 +1460,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 38, "id": "7f2ef6a8-6eed-47c5-bb44-253bad118956", "metadata": {}, "outputs": [ @@ -1427,27 +1468,27 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 418 entries, 0 to 417\n", + "\n", + "RangeIndex: 501 entries, 0 to 500\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 start 418 non-null datetime64[ns, UTC]\n", - " 1 end 418 non-null datetime64[ns, UTC]\n", - " 2 id 418 non-null object \n", - " 3 type 418 non-null object \n", - " 4 position 418 non-null object \n", - " 5 regions 418 non-null object \n", - " 6 bounding_box 418 non-null object \n", - " 7 distances 418 non-null object \n", - " 8 vessel 418 non-null object \n", + " 0 start 501 non-null datetime64[us, UTC]\n", + " 1 end 501 non-null datetime64[us, UTC]\n", + " 2 id 501 non-null str \n", + " 3 type 501 non-null str \n", + " 4 position 501 non-null object \n", + " 5 regions 501 non-null object \n", + " 6 bounding_box 501 non-null object \n", + " 7 distances 501 non-null object \n", + " 8 vessel 501 non-null object \n", " 9 encounter 0 non-null object \n", - " 10 fishing 414 non-null object \n", + " 10 fishing 483 non-null object \n", " 11 gap 0 non-null object \n", " 12 loitering 0 non-null object \n", - " 13 port_visit 4 non-null object \n", - "dtypes: datetime64[ns, UTC](2), object(12)\n", - "memory usage: 45.8+ KB\n" + " 13 port_visit 18 non-null object \n", + "dtypes: datetime64[us, UTC](2), object(10), str(2)\n", + "memory usage: 54.9+ KB\n" ] } ], @@ -1457,7 +1498,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 39, "id": "39b4f990-1da9-4862-b9fc-554f7b4c7dc0", "metadata": {}, "outputs": [ @@ -1465,12 +1506,12 @@ "data": { "text/plain": [ "type\n", - "fishing 414\n", - "port_visit 4\n", + "fishing 483\n", + "port_visit 18\n", "Name: count, dtype: int64" ] }, - "execution_count": 30, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -1489,7 +1530,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 40, "id": "87b32f2a-29ea-4f11-87c2-a16655ac5005", "metadata": {}, "outputs": [], @@ -1499,7 +1540,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 41, "id": "7ee57f28-eff1-40d7-865a-e3ad04103b4c", "metadata": {}, "outputs": [], @@ -1515,7 +1556,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 42, "id": "4a8deceb-aaef-4fe3-8a43-88b4c34d2a70", "metadata": {}, "outputs": [ @@ -1523,25 +1564,25 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 414 entries, 0 to 413\n", + "\n", + "Index: 483 entries, 0 to 493\n", "Data columns (total 12 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 id 414 non-null object \n", - " 1 name 389 non-null object \n", - " 2 ssvid 414 non-null object \n", - " 3 flag 414 non-null object \n", - " 4 type 414 non-null object \n", - " 5 public_authorizations 414 non-null object \n", + " 0 id 483 non-null str \n", + " 1 name 460 non-null str \n", + " 2 ssvid 483 non-null str \n", + " 3 flag 460 non-null str \n", + " 4 type 483 non-null str \n", + " 5 public_authorizations 483 non-null object \n", " 6 nextPort 0 non-null object \n", - " 7 total_distance_km 414 non-null float64\n", - " 8 average_speed_knots 414 non-null float64\n", + " 7 total_distance_km 483 non-null float64\n", + " 8 average_speed_knots 483 non-null float64\n", " 9 average_duration_hours 0 non-null object \n", - " 10 potential_risk 414 non-null bool \n", - " 11 vessel_public_authorization_status 414 non-null object \n", - "dtypes: bool(1), float64(2), object(9)\n", - "memory usage: 36.1+ KB\n" + " 10 potential_risk 483 non-null bool \n", + " 11 vessel_public_authorization_status 483 non-null str \n", + "dtypes: bool(1), float64(2), object(3), str(6)\n", + "memory usage: 45.8+ KB\n" ] } ], @@ -1551,7 +1592,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 43, "id": "0b95e76a-7fe8-45ba-87a3-ffab7d15c7d1", "metadata": {}, "outputs": [ @@ -1585,38 +1626,38 @@ " \n", " \n", " 0\n", - " HUNG CHUAN SHUN\n", - " 416007496\n", - " 44.026882\n", - " 3.808824\n", + " NaN\n", + " 983110470\n", + " 199.845790\n", + " 4.646491\n", " \n", " \n", " 1\n", - " HUNG CHUAN SHUN\n", - " 416007496\n", - " 9.353500\n", - " 8.200000\n", + " MAAN FARN NO.1\n", + " 416004904\n", + " 30.629027\n", + " 4.584615\n", " \n", " \n", " 2\n", - " HUNG CHUAN SHUN\n", - " 416007496\n", - " 9.868050\n", - " 5.494444\n", + " MAAN FARN NO.1\n", + " 416004904\n", + " 36.941824\n", + " 8.577778\n", " \n", " \n", " 3\n", - " HUNG CHUAN SHUN\n", - " 416007496\n", - " 10.955310\n", - " 8.555556\n", + " NaN\n", + " 983110470\n", + " 21.235980\n", + " 4.292857\n", " \n", " \n", " 4\n", - " HUNG CHUAN SHUN\n", - " 416007496\n", - " 9.630687\n", - " 8.942857\n", + " NaN\n", + " 983110470\n", + " 30.004830\n", + " 5.738462\n", " \n", " \n", " ...\n", @@ -1626,63 +1667,63 @@ " ...\n", " \n", " \n", - " 409\n", - " HUNG CHUAN SHUN\n", - " 416007496\n", - " 101.402263\n", - " 4.041791\n", + " 489\n", + " MAAN FARN NO.1\n", + " 416004904\n", + " 2.436448\n", + " 3.015385\n", " \n", " \n", - " 410\n", - " HUNG CHUAN SHUN\n", - " 416007496\n", - " 115.642159\n", - " 4.743357\n", + " 490\n", + " MAAN FARN NO.1\n", + " 416004904\n", + " 11.147134\n", + " 2.141176\n", " \n", " \n", - " 411\n", - " HUNG CHUAN SHUN\n", - " 416007496\n", - " 100.019456\n", - " 4.402564\n", + " 491\n", + " MAAN FARN NO.1\n", + " 416004904\n", + " 6.077442\n", + " 1.147541\n", " \n", " \n", - " 412\n", - " HUNG CHUAN SHUN\n", - " 416007496\n", - " 155.923138\n", - " 4.975000\n", + " 492\n", + " MAAN FARN NO.1\n", + " 416004904\n", + " 39.414255\n", + " 3.263636\n", " \n", " \n", - " 413\n", - " HUNG CHUAN SHUN\n", - " 416007496\n", - " 173.253137\n", - " 4.869231\n", + " 493\n", + " MAAN FARN NO.1\n", + " 416004904\n", + " 64.456215\n", + " 3.900000\n", " \n", " \n", "\n", - "

414 rows × 4 columns

\n", + "

483 rows × 4 columns

\n", "" ], "text/plain": [ - " name ssvid total_distance_km average_speed_knots\n", - "0 HUNG CHUAN SHUN 416007496 44.026882 3.808824\n", - "1 HUNG CHUAN SHUN 416007496 9.353500 8.200000\n", - "2 HUNG CHUAN SHUN 416007496 9.868050 5.494444\n", - "3 HUNG CHUAN SHUN 416007496 10.955310 8.555556\n", - "4 HUNG CHUAN SHUN 416007496 9.630687 8.942857\n", - ".. ... ... ... ...\n", - "409 HUNG CHUAN SHUN 416007496 101.402263 4.041791\n", - "410 HUNG CHUAN SHUN 416007496 115.642159 4.743357\n", - "411 HUNG CHUAN SHUN 416007496 100.019456 4.402564\n", - "412 HUNG CHUAN SHUN 416007496 155.923138 4.975000\n", - "413 HUNG CHUAN SHUN 416007496 173.253137 4.869231\n", + " name ssvid total_distance_km average_speed_knots\n", + "0 NaN 983110470 199.845790 4.646491\n", + "1 MAAN FARN NO.1 416004904 30.629027 4.584615\n", + "2 MAAN FARN NO.1 416004904 36.941824 8.577778\n", + "3 NaN 983110470 21.235980 4.292857\n", + "4 NaN 983110470 30.004830 5.738462\n", + ".. ... ... ... ...\n", + "489 MAAN FARN NO.1 416004904 2.436448 3.015385\n", + "490 MAAN FARN NO.1 416004904 11.147134 2.141176\n", + "491 MAAN FARN NO.1 416004904 6.077442 1.147541\n", + "492 MAAN FARN NO.1 416004904 39.414255 3.263636\n", + "493 MAAN FARN NO.1 416004904 64.456215 3.900000\n", "\n", - "[414 rows x 4 columns]" + "[483 rows x 4 columns]" ] }, - "execution_count": 34, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -1700,7 +1741,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 44, "id": "2416d761-b334-4995-ba67-81ff96c3dd57", "metadata": {}, "outputs": [ @@ -1708,13 +1749,12 @@ "data": { "text/plain": [ "ssvid\n", - "416007496 363\n", - "431100690 26\n", - "412331032 25\n", + "416004904 460\n", + "983110470 23\n", "Name: count, dtype: int64" ] }, - "execution_count": 35, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -1733,7 +1773,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 45, "id": "337ddf63-42ba-4ea7-a56d-2d8d602f5a22", "metadata": {}, "outputs": [], @@ -1743,7 +1783,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 46, "id": "a77465d1-02ce-42bb-a0b3-7c243cf02a40", "metadata": {}, "outputs": [], @@ -1759,7 +1799,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 47, "id": "566e8a67-8fff-4d10-9749-83c6dae727b7", "metadata": {}, "outputs": [ @@ -1767,50 +1807,50 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 4 entries, 0 to 3\n", + "\n", + "Index: 18 entries, 25 to 500\n", "Data columns (total 37 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 id 4 non-null object \n", - " 1 name 3 non-null object \n", - " 2 ssvid 4 non-null object \n", - " 3 flag 4 non-null object \n", - " 4 type 4 non-null object \n", - " 5 public_authorizations 4 non-null object \n", + " 0 id 18 non-null str \n", + " 1 name 3 non-null str \n", + " 2 ssvid 18 non-null str \n", + " 3 flag 3 non-null str \n", + " 4 type 18 non-null str \n", + " 5 public_authorizations 18 non-null object \n", " 6 nextPort 0 non-null object \n", - " 7 visit_id 4 non-null object \n", - " 8 confidence 4 non-null object \n", - " 9 duration_hrs 4 non-null float64\n", - " 10 start_anchorage_anchorage_id 4 non-null object \n", - " 11 start_anchorage_at_dock 4 non-null bool \n", - " 12 start_anchorage_distance_from_shore_km 4 non-null float64\n", - " 13 start_anchorage_flag 4 non-null object \n", - " 14 start_anchorage_id 4 non-null object \n", - " 15 start_anchorage_lat 4 non-null float64\n", - " 16 start_anchorage_lon 4 non-null float64\n", - " 17 start_anchorage_name 4 non-null object \n", - " 18 start_anchorage_top_destination 4 non-null object \n", - " 19 intermediate_anchorage_anchorage_id 4 non-null object \n", - " 20 intermediate_anchorage_at_dock 4 non-null bool \n", - " 21 intermediate_anchorage_distance_from_shore_km 4 non-null float64\n", - " 22 intermediate_anchorage_flag 4 non-null object \n", - " 23 intermediate_anchorage_id 4 non-null object \n", - " 24 intermediate_anchorage_lat 4 non-null float64\n", - " 25 intermediate_anchorage_lon 4 non-null float64\n", - " 26 intermediate_anchorage_name 4 non-null object \n", - " 27 intermediate_anchorage_top_destination 4 non-null object \n", - " 28 end_anchorage_anchorage_id 4 non-null object \n", - " 29 end_anchorage_at_dock 4 non-null bool \n", - " 30 end_anchorage_distance_from_shore_km 4 non-null float64\n", - " 31 end_anchorage_flag 4 non-null object \n", - " 32 end_anchorage_id 4 non-null object \n", - " 33 end_anchorage_lat 4 non-null float64\n", - " 34 end_anchorage_lon 4 non-null float64\n", - " 35 end_anchorage_name 4 non-null object \n", - " 36 end_anchorage_top_destination 4 non-null object \n", - "dtypes: bool(3), float64(10), object(24)\n", - "memory usage: 1.2+ KB\n" + " 7 visit_id 18 non-null str \n", + " 8 confidence 18 non-null str \n", + " 9 duration_hrs 18 non-null float64\n", + " 10 start_anchorage_anchorage_id 18 non-null str \n", + " 11 start_anchorage_at_dock 18 non-null bool \n", + " 12 start_anchorage_distance_from_shore_km 18 non-null float64\n", + " 13 start_anchorage_flag 18 non-null str \n", + " 14 start_anchorage_id 18 non-null str \n", + " 15 start_anchorage_lat 18 non-null float64\n", + " 16 start_anchorage_lon 18 non-null float64\n", + " 17 start_anchorage_name 13 non-null str \n", + " 18 start_anchorage_top_destination 18 non-null str \n", + " 19 intermediate_anchorage_anchorage_id 18 non-null str \n", + " 20 intermediate_anchorage_at_dock 18 non-null bool \n", + " 21 intermediate_anchorage_distance_from_shore_km 18 non-null float64\n", + " 22 intermediate_anchorage_flag 18 non-null str \n", + " 23 intermediate_anchorage_id 18 non-null str \n", + " 24 intermediate_anchorage_lat 18 non-null float64\n", + " 25 intermediate_anchorage_lon 18 non-null float64\n", + " 26 intermediate_anchorage_name 13 non-null str \n", + " 27 intermediate_anchorage_top_destination 18 non-null str \n", + " 28 end_anchorage_anchorage_id 18 non-null str \n", + " 29 end_anchorage_at_dock 18 non-null bool \n", + " 30 end_anchorage_distance_from_shore_km 18 non-null float64\n", + " 31 end_anchorage_flag 18 non-null str \n", + " 32 end_anchorage_id 18 non-null str \n", + " 33 end_anchorage_lat 18 non-null float64\n", + " 34 end_anchorage_lon 18 non-null float64\n", + " 35 end_anchorage_name 13 non-null str \n", + " 36 end_anchorage_top_destination 18 non-null str \n", + "dtypes: bool(3), float64(10), object(2), str(22)\n", + "memory usage: 5.0+ KB\n" ] } ], @@ -1820,7 +1860,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 48, "id": "0381a7bd-cf81-4b58-9f8d-f01601121ad2", "metadata": {}, "outputs": [ @@ -1855,36 +1895,162 @@ " \n", " \n", " \n", - " 0\n", - " SENSHU MARU NO.3\n", - " 431100690\n", + " 25\n", + " NaN\n", + " 983110470\n", " 4\n", - " TEMA\n", - " TEMA\n", - " TEMA\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", - " 1\n", - " HUNG CHUAN SHUN\n", - " 416007496\n", + " 66\n", + " NaN\n", + " 983110470\n", " 4\n", - " TEMA\n", - " TEMA\n", - " TEMA\n", + " KIZOMBA FPSO\n", + " KIZOMBA FPSO\n", + " KIZOMBA FPSO\n", " \n", " \n", - " 2\n", - " SENSHU MARU NO.3\n", - " 431100690\n", + " 67\n", + " NaN\n", + " 983110470\n", " 4\n", - " TEMA\n", - " TEMA\n", - " TEMA\n", + " LAGOS\n", + " LAGOS\n", + " LAGOS\n", " \n", " \n", - " 3\n", - " None\n", - " 412331032\n", + " 68\n", + " NaN\n", + " 983110470\n", + " 4\n", + " BLOCK 15\n", + " BLOCK 15\n", + " BLOCK 15\n", + " \n", + " \n", + " 69\n", + " NaN\n", + " 983110470\n", + " 4\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 70\n", + " NaN\n", + " 983110470\n", + " 4\n", + " LUANDA\n", + " LUANDA\n", + " LUANDA\n", + " \n", + " \n", + " 71\n", + " NaN\n", + " 983110470\n", + " 4\n", + " COD-4\n", + " COD-4\n", + " COD-4\n", + " \n", + " \n", + " 72\n", + " NaN\n", + " 983110470\n", + " 4\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 365\n", + " NaN\n", + " 983110470\n", + " 4\n", + " KIZOMBA FPSO\n", + " KIZOMBA FPSO\n", + " BLOCK 15\n", + " \n", + " \n", + " 366\n", + " NaN\n", + " 983110470\n", + " 4\n", + " KIZOMBA FPSO\n", + " KIZOMBA FPSO\n", + " KIZOMBA FPSO\n", + " \n", + " \n", + " 367\n", + " MAAN FARN NO.1\n", + " 416004904\n", + " 3\n", + " LAS PALMAS\n", + " LAS PALMAS\n", + " LAS PALMAS\n", + " \n", + " \n", + " 494\n", + " NaN\n", + " 983110470\n", + " 4\n", + " COD-4\n", + " COD-4\n", + " BLOCK 15\n", + " \n", + " \n", + " 495\n", + " NaN\n", + " 983110470\n", + " 4\n", + " BLOCK 15\n", + " BLOCK 15\n", + " BLOCK 15\n", + " \n", + " \n", + " 496\n", + " NaN\n", + " 983110470\n", + " 4\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 497\n", + " NaN\n", + " 983110470\n", + " 4\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 498\n", + " NaN\n", + " 983110470\n", + " 4\n", + " COD-4\n", + " COD-4\n", + " COD-4\n", + " \n", + " \n", + " 499\n", + " MAAN FARN NO.1\n", + " 416004904\n", + " 4\n", + " DAKAR\n", + " DAKAR\n", + " DAKAR\n", + " \n", + " \n", + " 500\n", + " MAAN FARN NO.1\n", + " 416004904\n", " 4\n", " DAKAR\n", " DAKAR\n", @@ -1896,19 +2062,47 @@ ], "text/plain": [ " name ssvid confidence start_anchorage_name \\\n", - "0 SENSHU MARU NO.3 431100690 4 TEMA \n", - "1 HUNG CHUAN SHUN 416007496 4 TEMA \n", - "2 SENSHU MARU NO.3 431100690 4 TEMA \n", - "3 None 412331032 4 DAKAR \n", + "25 NaN 983110470 4 NaN \n", + "66 NaN 983110470 4 KIZOMBA FPSO \n", + "67 NaN 983110470 4 LAGOS \n", + "68 NaN 983110470 4 BLOCK 15 \n", + "69 NaN 983110470 4 NaN \n", + "70 NaN 983110470 4 LUANDA \n", + "71 NaN 983110470 4 COD-4 \n", + "72 NaN 983110470 4 NaN \n", + "365 NaN 983110470 4 KIZOMBA FPSO \n", + "366 NaN 983110470 4 KIZOMBA FPSO \n", + "367 MAAN FARN NO.1 416004904 3 LAS PALMAS \n", + "494 NaN 983110470 4 COD-4 \n", + "495 NaN 983110470 4 BLOCK 15 \n", + "496 NaN 983110470 4 NaN \n", + "497 NaN 983110470 4 NaN \n", + "498 NaN 983110470 4 COD-4 \n", + "499 MAAN FARN NO.1 416004904 4 DAKAR \n", + "500 MAAN FARN NO.1 416004904 4 DAKAR \n", "\n", - " intermediate_anchorage_name end_anchorage_name \n", - "0 TEMA TEMA \n", - "1 TEMA TEMA \n", - "2 TEMA TEMA \n", - "3 DAKAR DAKAR " + " intermediate_anchorage_name end_anchorage_name \n", + "25 NaN NaN \n", + "66 KIZOMBA FPSO KIZOMBA FPSO \n", + "67 LAGOS LAGOS \n", + "68 BLOCK 15 BLOCK 15 \n", + "69 NaN NaN \n", + "70 LUANDA LUANDA \n", + "71 COD-4 COD-4 \n", + "72 NaN NaN \n", + "365 KIZOMBA FPSO BLOCK 15 \n", + "366 KIZOMBA FPSO KIZOMBA FPSO \n", + "367 LAS PALMAS LAS PALMAS \n", + "494 COD-4 BLOCK 15 \n", + "495 BLOCK 15 BLOCK 15 \n", + "496 NaN NaN \n", + "497 NaN NaN \n", + "498 COD-4 COD-4 \n", + "499 DAKAR DAKAR \n", + "500 DAKAR DAKAR " ] }, - "execution_count": 39, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -1939,7 +2133,7 @@ "id": "709d2a5c-5246-4995-ab68-0997adf62349", "metadata": {}, "source": [ - "- `4: port visits`, `0: encounters`, and `414: fishing` events were found for the queried vessels in the given date range.\n", + "- `18: port visits`, `0: encounters`, and `484: fishing` events were found for the queried vessels in the given date range.\n", "- Some events were missed due to **AIS data coverage gaps**.\n", "- Different filters may need to be applied to refine results." ] diff --git a/pyproject.toml b/pyproject.toml index 3a22d42..1e33197 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -254,6 +254,7 @@ bump_message = "chore(release): bump to v$new_version" [tool.codespell] skip = ".git,env*,venv*,build*,tmp*" +ignore-words-list = "FRO" [tool.pydistcheck] inspect = false diff --git a/src/gfwapiclient/resources/bulk_downloads/resources.py b/src/gfwapiclient/resources/bulk_downloads/resources.py index 7f76c2c..fe24f65 100644 --- a/src/gfwapiclient/resources/bulk_downloads/resources.py +++ b/src/gfwapiclient/resources/bulk_downloads/resources.py @@ -131,9 +131,15 @@ async def create_bulk_report( Example: `"public-fixed-infrastructure-data:latest"`. geojson (Optional[Union[GeoJson, str, Path, Dict[str, Any], SupportsGeoJsonInterface]], default=None): - Custom GeoJSON geometry to filter the bulk report. Either a path to a + Custom valid GeoJSON geometry to filter the events. Either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object - (e.g., JSON string or dictionary) or `GeoJson` model instance. Defaults to `None`. + (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, + an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. + Spatial files are loaded using + [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) + and supported formats depend on a properly configured + [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip). + Defaults to `None`. Example: `{"type": "Polygon", "coordinates": [...]}`, or `/path/to/your/custom/region.shp`. format (Optional[Union[BulkReportFormat, str]], default="JSON"): @@ -187,9 +193,7 @@ async def get_bulk_report_by_id( self, *, id: str, - **kwargs: Dict[ - str, Any - ], # TODO: polling logics (throttled retry based on status) + **kwargs: Any, # TODO: polling logics (throttled retry based on status) ) -> BulkReportDetailResult: """Get a bulk report by ID. diff --git a/src/gfwapiclient/resources/events/resources.py b/src/gfwapiclient/resources/events/resources.py index 57a7a8e..f8c6eda 100644 --- a/src/gfwapiclient/resources/events/resources.py +++ b/src/gfwapiclient/resources/events/resources.py @@ -141,9 +141,15 @@ async def get_all_events( Example: `["USA", "CAN"]`. geometry (Optional[Union[GeoJson, str, Path, Dict[str, Any], SupportsGeoJsonInterface]], default=None): - Custom GeoJSON geometry to filter the events. Either a path to a + Custom valid GeoJSON geometry to filter the events. Either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object - (e.g., JSON string or dictionary) or `GeoJson` model instance. Defaults to `None`. + (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, + an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. + Spatial files are loaded using + [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) + and supported formats depend on a properly configured + [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip). + Defaults to `None`. Example: `{"type": "Polygon", "coordinates": [...]}`, or `/path/to/your/custom/region.shp`. region (Optional[Union[EventRegion, Region, Dict[str, Any]]], default=None): @@ -348,9 +354,15 @@ async def get_events_stats( Example: `["USA", "CAN"]`. geometry (Optional[Union[GeoJson, str, Path, Dict[str, Any], SupportsGeoJsonInterface]], default=None): - Custom GeoJSON geometry to filter the events statistics. Either a path to a + Custom valid GeoJSON geometry to filter the events. Either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object - (e.g., JSON string or dictionary) or `GeoJson` model instance. Defaults to `None`. + (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, + an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. + Spatial files are loaded using + [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) + and supported formats depend on a properly configured + [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip). + Defaults to `None`. Example: `{"type": "Polygon", "coordinates": [...]}`, or `/path/to/your/custom/region.shp`. region (Optional[Union[EventRegion, Region, Dict[str, Any]]], default=None): diff --git a/src/gfwapiclient/resources/fourwings/resources.py b/src/gfwapiclient/resources/fourwings/resources.py index c501d6c..37be022 100644 --- a/src/gfwapiclient/resources/fourwings/resources.py +++ b/src/gfwapiclient/resources/fourwings/resources.py @@ -139,9 +139,15 @@ async def create_fishing_effort_report( Example: `3`. geojson (Optional[Union[GeoJson, str, Path, Dict[str, Any], SupportsGeoJsonInterface]], default=None): - Custom GeoJSON geometry to filter the report. Either a path to a + Custom valid GeoJSON geometry to filter the events. Either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object - (e.g., JSON string or dictionary) or `GeoJson` model instance. Defaults to `None`. + (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, + an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. + Spatial files are loaded using + [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) + and supported formats depend on a properly configured + [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip). + Defaults to `None`. Example: `{"type": "Polygon", "coordinates": [...]}`, or `/path/to/your/custom/region.shp`. region (Optional[Union[FourWingsReportRegion, Region, Dict[str, Any]]], default=None): @@ -262,9 +268,15 @@ async def create_ais_presence_report( Example: `True`. geojson (Optional[Union[GeoJson, str, Path, Dict[str, Any], SupportsGeoJsonInterface]], default=None): - Custom GeoJSON geometry to filter the report. Either a path to a + Custom valid GeoJSON geometry to filter the events. Either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object - (e.g., JSON string or dictionary) or `GeoJson` model instance. Defaults to `None`. + (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, + an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. + Spatial files are loaded using + [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) + and supported formats depend on a properly configured + [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip). + Defaults to `None`. Example: `{"type": "Polygon", "coordinates": [...]}`, or `/path/to/your/custom/region.shp`. region (Optional[Union[FourWingsReportRegion, Region, Dict[str, Any]]], default=None): @@ -384,9 +396,15 @@ async def create_sar_presence_report( Example: `True`. geojson (Optional[Union[GeoJson, str, Path, Dict[str, Any], SupportsGeoJsonInterface]], default=None): - Custom GeoJSON geometry to filter the report. Either a path to a + Custom valid GeoJSON geometry to filter the events. Either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object - (e.g., JSON string or dictionary) or `GeoJson` model instance. Defaults to `None`. + (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, + an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. + Spatial files are loaded using + [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) + and supported formats depend on a properly configured + [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip). + Defaults to `None`. Example: `{"type": "Polygon", "coordinates": [...]}`, or `/path/to/your/custom/region.shp`. region (Optional[Union[FourWingsReportRegion, Region, Dict[str, Any]]], default=None): @@ -535,9 +553,15 @@ async def create_report( Example: `3`. geojson (Optional[Union[GeoJson, str, Path, Dict[str, Any], SupportsGeoJsonInterface]], default=None): - Custom GeoJSON geometry to filter the report. Either a path to a + Custom valid GeoJSON geometry to filter the events. Either a path to a spatial file (e.g., GeoJSON, Shapefile, etc.), GeoJSON-like object - (e.g., JSON string or dictionary) or `GeoJson` model instance. Defaults to `None`. + (e.g., JSON string, dictionary, `geopandas.GeoDataFrame`, `shapely`, + an object implementing `__geo_interface__` etc.) or `GeoJson` model instance. + Spatial files are loaded using + [geopandas.read_file](https://geopandas.org/en/stable/docs/reference/api/geopandas.read_file.html) + and supported formats depend on a properly configured + [geopandas/GDAL installation](https://geopandas.org/en/stable/getting_started/install.html#installing-with-pip). + Defaults to `None`. Example: `{"type": "Polygon", "coordinates": [...]}`, or `/path/to/your/custom/region.shp`. region (Optional[Union[FourWingsReportRegion, Region, Dict[str, Any]]], default=None):