From 4de89c31254a76cc3e48533d56656de38353b2e0 Mon Sep 17 00:00:00 2001 From: honza Date: Thu, 19 Mar 2026 17:47:05 +0100 Subject: [PATCH] Support direct path to a raster file in file_path parameter --- CHANGELOG.md | 2 +- doc/rest.md | 9 +- src/layman/layer/filesystem/input_file.py | 10 +- src/layman/layer/rest_workspace_layers.py | 10 +- src/layman/layer/util.py | 138 ++++++++++++------ .../layer_local_path/test_file_path.py | 128 ++++++++++++++-- 6 files changed, 224 insertions(+), 73 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4567609b..6f8ffca64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ ### Changes - [#1168](https://github.com/LayerManager/layman/issues/1168) Extend [PATCH Workspace Layer](doc/rest.md#patch-workspace-layer) with ability of appending data to existing time-series layer. - When publishing a layer or map to Micka via CSW, Layman sends the creating user (Layman username) in the SOAP request header (`CreateUser`), so the metadata record in Micka is associated with the user who created the publication. -- [#1185](https://github.com/LayerManager/layman/issues/1185) POST Workspace [Layers](doc/rest.md#post-workspace-layers) supports import of raster layers from an existing server-side directory via the file_path parameter, including ImageMosaic timeseries layers. +- [#1185](https://github.com/LayerManager/layman/issues/1185) POST Workspace [Layers](doc/rest.md#post-workspace-layers) supports import of raster layers from existing server-side data via `file_path` (directory for ImageMosaic timeseries layers, or direct GeoTIFF file path). - [#1185](https://github.com/LayerManager/layman/issues/1185)[GET Workspace Layer](doc/rest.md#get-workspace-layer) returns `file_path` key for raster layers published using this parameter. ## v2.3.0 diff --git a/doc/rest.md b/doc/rest.md index 6fc48c33f..fe8eb8354 100644 --- a/doc/rest.md +++ b/doc/rest.md @@ -167,13 +167,16 @@ Body parameters: - exactly one of `file`, `file_path`, or `external_table_uri` must be set - relative path to a directory that already exists on the server - the path must be relative to the root of the GeoServer data directory - - the referenced directory must be physically located inside the GeoServer data directory - - the directory must contain at least one GeoTIFF file (with extension `.tif` or `.tiff`) + - the referenced path must be physically located inside the GeoServer data directory + - if directory is used, it must contain at least one GeoTIFF file (with extension `.tif` or `.tiff`) + - if directory contains more than one raster file, `time_regex` parameter is required + - if file is used, it must be a GeoTIFF file (with extension `.tif` or `.tiff`) - for raster layers: - supported only for GeoTIFF files (`.tif` or `.tiff` extension) - raster files are not normalized when using `file_path` parameter - this may result in different styling behavior compared to layers published via `file` parameter + - may point directly to a single raster file (published as a single coverage) - may point to a directory containing a single raster file (published as a single coverage) - may point to a directory containing multiple raster files: - if `time_regex` parameter is provided, files are treated as a time series and published as an ImageMosaic @@ -346,7 +349,7 @@ JSON object with following structure: - *status*: Status information about publishing style. See [GET Workspace Layer](#get-workspace-layer) **wms** property for meaning. - *error*: If status is FAILURE, this may contain error object. - **original_data_source**: String. Either `file` if layer was published from file, or `database_table` if layer was published from external database table -- **file_path**: String. Available only for raster layers published using `file_path` parameter. Relative path to the directory containing raster files, relative to the root of the GeoServer data directory. +- **file_path**: String. Available only for raster layers published using `file_path` parameter. Relative path (to directory or direct GeoTIFF file) inside the root of the GeoServer data directory. - *metadata* - *identifier*: String. Identifier of metadata record in CSW instance. - *record_url*: String. URL of metadata record accessible by web browser, probably with some editing capabilities. diff --git a/src/layman/layer/filesystem/input_file.py b/src/layman/layer/filesystem/input_file.py index fcebe18f2..788c0c79c 100644 --- a/src/layman/layer/filesystem/input_file.py +++ b/src/layman/layer/filesystem/input_file.py @@ -86,15 +86,7 @@ def get_file_path_info(publ_uuid): abs_path = os.path.join(settings.GEOSERVER_DATADIR, file_path_relative) - if not os.path.isdir(abs_path): - raise LaymanError(2, { - 'parameter': 'file_path', - 'message': 'Path is not a directory', - 'expected': 'Relative path to directory containing raster files', - 'found': file_path_relative, - }) - - tifs = layer_util.get_geotiff_files(abs_path) + tifs = layer_util.get_file_path_geotiff_files(abs_path) if not tifs: return None return [{'absolute': tif, 'gdal': tif, 'file_path': file_path_relative} for tif in tifs] diff --git a/src/layman/layer/rest_workspace_layers.py b/src/layman/layer/rest_workspace_layers.py index 53977a5c1..1b80a336a 100644 --- a/src/layman/layer/rest_workspace_layers.py +++ b/src/layman/layer/rest_workspace_layers.py @@ -108,10 +108,15 @@ def post(workspace): time_regex = request.form.get('time_regex') or None time_regex_format = request.form.get('time_regex_format') or None - file_path_relative, file_path_absolute, _, _ = util.validate_and_process_file_path(file_path, check_crs=check_crs) + file_path_relative, file_path_absolute, file_path_type, tif_files = util.validate_and_process_file_path(file_path, check_crs=check_crs) if file_path_relative: file_path = file_path_absolute - util.validate_file_path_requires_time_regex(file_path_absolute, time_regex) + util.validate_file_path_requires_time_regex( + file_path_absolute, + time_regex, + file_path_type=file_path_type, + tif_files=tif_files, + ) util.validate_time_regex(time_regex, time_regex_format) slugified_time_regex = input_file.slugify_timeseries_filename_pattern(time_regex) if time_regex else None @@ -147,7 +152,6 @@ def post(workspace): if file_path: geodata_type = settings.GEODATA_TYPE_RASTER if not crs_id: - tif_files = util.get_geotiff_files(file_path) crs_id = input_file.get_raster_crs_id(tif_files[0]) if tif_files else None if not crs_id: raise LaymanError(4, {'found': None, 'supported_values': settings.INPUT_SRS_LIST}) diff --git a/src/layman/layer/util.py b/src/layman/layer/util.py index 2d1cdf36c..189c42d04 100644 --- a/src/layman/layer/util.py +++ b/src/layman/layer/util.py @@ -1,7 +1,6 @@ from functools import wraps, partial from urllib import parse import os -import glob import re import logging import shutil @@ -28,6 +27,9 @@ EXTERNAL_TABLE_URI_PATTERN = 'postgresql://:@:/?schema=&table=&geo_column=' logger = logging.getLogger(__name__) +FILE_PATH_TYPE_DIRECTORY = 'directory' +FILE_PATH_TYPE_FILE = 'file' +FILE_PATH_MAIN_EXTENSIONS = {ext.lower() for ext in settings.FILE_PATH_MAIN_FILE_EXTENSIONS} def to_safe_layer_name(value): @@ -254,11 +256,78 @@ def layer_info_to_metadata_properties(info): def get_geotiff_files(directory): - files = [] - for ext in settings.FILE_PATH_MAIN_FILE_EXTENSIONS: - files.extend(glob.glob(os.path.join(directory, f'*{ext}'))) - files.extend(glob.glob(os.path.join(directory, f'*{ext.upper()}'))) - return files + return [ + os.path.join(directory, f) + for f in os.listdir(directory) + if os.path.splitext(f)[1].lower() in FILE_PATH_MAIN_EXTENSIONS + ] + + +def get_file_path_type(file_path_absolute): + if os.path.isdir(file_path_absolute): + return FILE_PATH_TYPE_DIRECTORY + if os.path.isfile(file_path_absolute): + return FILE_PATH_TYPE_FILE + return None + + +def get_file_path_geotiff_files(file_path_absolute, *, file_path_type=None): + file_path_type = file_path_type or get_file_path_type(file_path_absolute) + if file_path_type == FILE_PATH_TYPE_DIRECTORY: + return get_geotiff_files(file_path_absolute) + if file_path_type == FILE_PATH_TYPE_FILE: + ext = os.path.splitext(file_path_absolute)[1].lower() + if ext in FILE_PATH_MAIN_EXTENSIONS: + return [file_path_absolute] + return [] + + +def validate_file_path_source(file_path, file_path_absolute): + file_path_type = get_file_path_type(file_path_absolute) + if file_path_type == FILE_PATH_TYPE_DIRECTORY: + if not ( + os.access(file_path_absolute, os.R_OK) + and os.access(file_path_absolute, os.X_OK) + ): + raise LaymanError(2, { + 'parameter': 'file_path', + 'message': 'Directory is not readable', + 'expected': 'Readable directory containing raster files', + 'found': file_path, + }) + elif file_path_type == FILE_PATH_TYPE_FILE: + ext = os.path.splitext(file_path_absolute)[1].lower() + if ext not in FILE_PATH_MAIN_EXTENSIONS: + raise LaymanError(2, { + 'parameter': 'file_path', + 'message': 'Path is not a supported raster file', + 'expected': f'Relative path to GeoTIFF file with extension in {settings.FILE_PATH_MAIN_FILE_EXTENSIONS}', + 'found': file_path, + }) + if not os.access(file_path_absolute, os.R_OK): + raise LaymanError(2, { + 'parameter': 'file_path', + 'message': 'Raster file is not readable', + 'expected': 'Readable GeoTIFF file', + 'found': file_path, + }) + else: + raise LaymanError(2, { + 'parameter': 'file_path', + 'message': 'Path is not a directory or file', + 'expected': 'Relative path to existing directory or GeoTIFF file', + 'found': file_path, + }) + + tif_files = get_file_path_geotiff_files(file_path_absolute, file_path_type=file_path_type) + if not tif_files: + raise LaymanError(2, { + 'parameter': 'file_path', + 'message': 'Directory does not contain any raster files', + 'expected': 'Directory containing at least one GeoTIFF file (.tif or .tiff) or a GeoTIFF file path', + 'found': file_path, + }) + return file_path_type, tif_files def validate_and_process_file_path(file_path, *, check_crs=True): @@ -271,7 +340,7 @@ def validate_and_process_file_path(file_path, *, check_crs=True): raise LaymanError(2, { 'parameter': 'file_path', 'message': 'Absolute path is not allowed', - 'expected': 'Relative path to directory (relative to GEOSERVER_DATADIR)', + 'expected': 'Relative path to directory or GeoTIFF file (relative to GEOSERVER_DATADIR)', 'found': file_path, }) @@ -282,61 +351,46 @@ def validate_and_process_file_path(file_path, *, check_crs=True): raise LaymanError(2, { 'parameter': 'file_path', 'message': 'Path is outside GeoServer data directory', - 'expected': 'Relative path to directory inside GeoServer data directory', + 'expected': 'Relative path to directory or GeoTIFF file inside GeoServer data directory', 'found': file_path, }) if not os.path.exists(file_path_absolute): raise LaymanError(2, { 'parameter': 'file_path', - 'message': 'Directory does not exist', - 'expected': 'Relative path to existing directory on server', + 'message': 'Path does not exist', + 'expected': 'Relative path to existing directory or GeoTIFF file on server', 'found': file_path, }) - if not os.path.isdir(file_path_absolute): - raise LaymanError(2, { - 'parameter': 'file_path', - 'message': 'Path is not a directory', - 'expected': 'Relative path to existing directory containing raster files', - 'found': file_path, - }) - - if not ( - os.access(file_path_absolute, os.R_OK) - and os.access(file_path_absolute, os.X_OK) - ): - raise LaymanError(2, { - 'parameter': 'file_path', - 'message': 'Directory is not readable', - 'expected': 'Readable directory containing raster files', - 'found': file_path, - }) - - tif_files = get_geotiff_files(file_path_absolute) - if not tif_files: - raise LaymanError(2, { - 'parameter': 'file_path', - 'message': 'Directory does not contain any raster files', - 'expected': 'Directory containing at least one GeoTIFF file (.tif or .tiff)', - 'found': file_path, - }) + file_path_type, tif_files = validate_file_path_source(file_path, file_path_absolute) if check_crs: for tif_file in tif_files: input_file.check_raster_layer_crs(tif_file) - return file_path, file_path_absolute, None, None + return file_path, file_path_absolute, file_path_type, tif_files + +def validate_file_path_requires_time_regex(file_path_absolute, time_regex, *, file_path_type, tif_files): + if file_path_type == FILE_PATH_TYPE_FILE and time_regex: + file_path_relative = os.path.relpath(file_path_absolute, settings.GEOSERVER_DATADIR) + raise LaymanError(48, { + 'parameters': ['file_path', 'time_regex'], + 'message': 'time_regex is not allowed for file_path pointing to a file', + 'expected': 'Provide file_path to a directory when using time_regex', + 'found': { + 'file_path': file_path_relative, + 'time_regex': time_regex, + }, + }) -def validate_file_path_requires_time_regex(file_path_absolute, time_regex): - tif_files = get_geotiff_files(file_path_absolute) - if len(tif_files) > 1 and not time_regex: + if file_path_type == FILE_PATH_TYPE_DIRECTORY and len(tif_files) > 1 and not time_regex: file_path_relative = os.path.relpath(file_path_absolute, settings.GEOSERVER_DATADIR) raise LaymanError(48, { 'parameters': ['file_path', 'time_regex'], 'message': 'Directory contains multiple raster files, but time_regex is not provided', - 'expected': 'Provide time_regex parameter for image mosaic when directory contains multiple raster files', + 'expected': 'Provide time_regex for image mosaic or specify file_path to a single raster file', 'found': { 'file_path': file_path_relative, 'raster_files_count': len(tif_files), diff --git a/tests/dynamic_data/publications/layer_local_path/test_file_path.py b/tests/dynamic_data/publications/layer_local_path/test_file_path.py index c9afcc4a8..7a1358642 100644 --- a/tests/dynamic_data/publications/layer_local_path/test_file_path.py +++ b/tests/dynamic_data/publications/layer_local_path/test_file_path.py @@ -8,7 +8,6 @@ from tests.asserts.final import publication as asserts_publ from tests.asserts.final.publication import util as assert_util from tests.dynamic_data import base_test -from tests.dynamic_data.publications import common_publications from tests import Publication4Test pytest_generate_tests = base_test.pytest_generate_tests @@ -133,6 +132,38 @@ def generate_test_cases(): ) test_cases.append(test_case_single) + file_path_relative_single_file = os.path.join( + normalized_raster_data_dir_name, + 'layers', + TEST_UUID_SINGLE, + 'raster.tif', + ) + publication_single_file = Publication4Test( + type=process_client.LAYER_TYPE, + workspace=WORKSPACE, + name='test_file_path_single_file', + ) + test_case_single_file = base_test.TestCaseType( + key='file_path_single_file', + type=base_test.EnumTestTypes.MANDATORY, + publication=publication_single_file, + rest_method=base_test.RestMethod.POST, + rest_args={ + 'file_path': file_path_relative_single_file, + }, + params={ + 'exp_info': { + 'exp_publication_detail': { + 'geodata_type': 'raster', + 'image_mosaic': False, + }, + 'publ_type_detail': ('raster', 'sld'), + }, + 'exp_thumbnail': 'test_tools/data/thumbnail/raster_layer_tif.png', + }, + ) + test_cases.append(test_case_single_file) + return test_cases @@ -219,18 +250,18 @@ def generate_negative_test_cases(): ) test_cases.append(test_case_nonexistent) - publication_file = Publication4Test( + publication_no_tif = Publication4Test( type=process_client.LAYER_TYPE, workspace=WORKSPACE, - name='test_file_path_is_file', + name='test_file_path_no_tif', ) - test_case_file = base_test.TestCaseType( - key='file_path_is_file', + test_case_no_tif = base_test.TestCaseType( + key='file_path_no_tif', type=base_test.EnumTestTypes.MANDATORY, - publication=publication_file, + publication=publication_no_tif, rest_method=base_test.RestMethod.POST, rest_args={ - 'file_path': os.path.join(normalized_raster_data_dir_name, 'layers', TEST_UUID_SINGLE, 'raster.tif'), + 'file_path': os.path.join(normalized_raster_data_dir_name, 'layers', 'empty_dir'), }, params={ 'should_succeed': False, @@ -238,20 +269,20 @@ def generate_negative_test_cases(): 'expected_error_param': 'file_path', }, ) - test_cases.append(test_case_file) + test_cases.append(test_case_no_tif) - publication_no_tif = Publication4Test( + publication_file_unsupported = Publication4Test( type=process_client.LAYER_TYPE, workspace=WORKSPACE, - name='test_file_path_no_tif', + name='test_file_path_file_unsupported', ) - test_case_no_tif = base_test.TestCaseType( - key='file_path_no_tif', + test_case_file_unsupported = base_test.TestCaseType( + key='file_path_file_unsupported', type=base_test.EnumTestTypes.MANDATORY, - publication=publication_no_tif, + publication=publication_file_unsupported, rest_method=base_test.RestMethod.POST, rest_args={ - 'file_path': os.path.join(normalized_raster_data_dir_name, 'layers', 'empty_dir'), + 'file_path': os.path.join(normalized_raster_data_dir_name, 'layers', 'unsupported_file', 'raster.png'), }, params={ 'should_succeed': False, @@ -259,7 +290,28 @@ def generate_negative_test_cases(): 'expected_error_param': 'file_path', }, ) - test_cases.append(test_case_no_tif) + test_cases.append(test_case_file_unsupported) + + publication_vector_file = Publication4Test( + type=process_client.LAYER_TYPE, + workspace=WORKSPACE, + name='test_file_path_vector_file', + ) + test_case_vector_file = base_test.TestCaseType( + key='file_path_vector_file', + type=base_test.EnumTestTypes.MANDATORY, + publication=publication_vector_file, + rest_method=base_test.RestMethod.POST, + rest_args={ + 'file_path': os.path.join(normalized_raster_data_dir_name, 'layers', 'vector_file', 'sample.shp'), + }, + params={ + 'should_succeed': False, + 'expected_error_code': 2, + 'expected_error_param': 'file_path', + }, + ) + test_cases.append(test_case_vector_file) publication_no_regex = Publication4Test( type=process_client.LAYER_TYPE, @@ -282,6 +334,28 @@ def generate_negative_test_cases(): ) test_cases.append(test_case_no_regex) + publication_file_with_regex = Publication4Test( + type=process_client.LAYER_TYPE, + workspace=WORKSPACE, + name='test_file_path_file_with_regex', + ) + test_case_file_with_regex = base_test.TestCaseType( + key='file_path_file_with_regex', + type=base_test.EnumTestTypes.MANDATORY, + publication=publication_file_with_regex, + rest_method=base_test.RestMethod.POST, + rest_args={ + 'file_path': os.path.join(normalized_raster_data_dir_name, 'layers', TEST_UUID_SINGLE, 'raster.tif'), + 'time_regex': '[0-9]{8}', + }, + params={ + 'should_succeed': False, + 'expected_error_code': 48, + 'expected_error_params': ['file_path', 'time_regex'], + }, + ) + test_cases.append(test_case_file_with_regex) + return test_cases @@ -312,6 +386,30 @@ def prepare_negative_test_data(): shutil.copy2(sample_file, os.path.join(multi_no_regex_dir, 'raster2.tif')) target_dirs.append(multi_no_regex_dir) + unsupported_file_dir = os.path.join(layers_dir, 'unsupported_file') + if os.path.exists(unsupported_file_dir): + shutil.rmtree(unsupported_file_dir) + os.makedirs(unsupported_file_dir, exist_ok=True) + with open(os.path.join(unsupported_file_dir, 'raster.png'), 'wb') as unsupported_file: + unsupported_file.write(b'not-a-geotiff') + target_dirs.append(unsupported_file_dir) + + vector_file_dir = os.path.join(layers_dir, 'vector_file') + if os.path.exists(vector_file_dir): + shutil.rmtree(vector_file_dir) + os.makedirs(vector_file_dir, exist_ok=True) + source_shp = os.path.join(DIRECTORY, 'layers', TEST_UUID_MOSAIC, f'{TEST_UUID_MOSAIC}.shp') + assert os.path.exists(source_shp), f"Sample vector file not found: {source_shp}" + shutil.copy2(source_shp, os.path.join(vector_file_dir, 'sample.shp')) + target_dirs.append(vector_file_dir) + + single_file_for_regex_conflict_dir = os.path.join(layers_dir, TEST_UUID_SINGLE) + if os.path.exists(single_file_for_regex_conflict_dir): + shutil.rmtree(single_file_for_regex_conflict_dir) + os.makedirs(single_file_for_regex_conflict_dir, exist_ok=True) + shutil.copy2(sample_file, os.path.join(single_file_for_regex_conflict_dir, 'raster.tif')) + target_dirs.append(single_file_for_regex_conflict_dir) + yield for target_dir in target_dirs: