From 77dd923f2eff2aa671e3c458a465102c0c6c391a Mon Sep 17 00:00:00 2001 From: Carsten Ehbrecht Date: Fri, 5 Dec 2014 16:42:22 +0100 Subject: [PATCH 01/19] enabled temporal search --- .gitignore | 1 + pyesgf/search/connection.py | 10 ++++------ pyesgf/search/context.py | 9 +++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index a564df9..00821bd 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ generated/ /eclipse /backup ./do_jenkins.sh +test/url_cache diff --git a/pyesgf/search/connection.py b/pyesgf/search/connection.py index c7a239f..c58995b 100644 --- a/pyesgf/search/connection.py +++ b/pyesgf/search/connection.py @@ -126,7 +126,7 @@ def send_wget(self, query_dict, shards=None): def _send_query(self, endpoint, full_query): """ Generally not to be called directly by the user but via SearchContext - instances. + instances. :param full_query: dictionary of query string parameers to send. :return: the urllib2 response object from the query. @@ -229,8 +229,7 @@ def get_shard_list(self): def new_context(self, context_class=None, latest=None, facets=None, fields=None, - #!TODO: add once implemented - #from_timestamp=None, to_timestamp=None, + from_timestamp=None, to_timestamp=None, replica=None, shards=None, **constraints): """ @@ -246,9 +245,8 @@ def new_context(self, context_class=None, return context_class(self, constraints, latest=latest, facets=facets, fields=fields, - #!TODO: add once implemented - #from_timestamp=from_timestamp, - #to_timestamp=to_timestamp, + from_timestamp=from_timestamp, + to_timestamp=to_timestamp, replica=replica, shards=shards, ) diff --git a/pyesgf/search/context.py b/pyesgf/search/context.py index b88261b..531379d 100644 --- a/pyesgf/search/context.py +++ b/pyesgf/search/context.py @@ -80,7 +80,7 @@ def __init__(self, connection, constraints, search_type=None, # Constraints self.freetext_constraint = None self.facet_constraints = MultiDict() - self.temporal_constraint = (None, None) + self.temporal_constraint = (from_timestamp, to_timestamp) self.geosplatial_constraint = None self._update_constraints(constraints) @@ -221,7 +221,8 @@ def _update_constraints(self, constraints): self._constrain_freetext(new_freetext) #!TODO: implement temporal and geospatial constraints - #self._constrain_temporal() + #print constraints_split['temporal'] + self._constrain_temporal(start=self.temporal_constraint[0], end=self.temporal_constraint[1]) #self._constrain_geospatial() # reset cached values @@ -300,8 +301,8 @@ def _build_query(self): query_dict.extend(self.facet_constraints) #!TODO: encode datetime - #start, end = self.temporal_constraint - #query_dict.update(start=start, end=end) + start, end = self.temporal_constraint + query_dict.update(start=start, end=end) return query_dict From c0ef521fc0b142adeecfdc58c4b0b5ab96fdf52a Mon Sep 17 00:00:00 2001 From: Carsten Ehbrecht Date: Tue, 14 Apr 2015 10:20:40 +0200 Subject: [PATCH 02/19] fixed comment --- pyesgf/search/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyesgf/search/connection.py b/pyesgf/search/connection.py index c58995b..bdb18e9 100644 --- a/pyesgf/search/connection.py +++ b/pyesgf/search/connection.py @@ -126,7 +126,7 @@ def send_wget(self, query_dict, shards=None): def _send_query(self, endpoint, full_query): """ Generally not to be called directly by the user but via SearchContext - instances. + instances. :param full_query: dictionary of query string parameers to send. :return: the urllib2 response object from the query. From f30da296f4ede56d2d09531bf1867c750d11d1ce Mon Sep 17 00:00:00 2001 From: root Date: Mon, 15 Jun 2015 16:30:02 +0100 Subject: [PATCH 03/19] Added a test for temporal search parameters on Datasets. Tests "to_timestamp" and "from_timestamp" work to select Datasets. --- test/test_temporal_search.py | 48 ++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 test/test_temporal_search.py diff --git a/test/test_temporal_search.py b/test/test_temporal_search.py new file mode 100644 index 0000000..5fb271f --- /dev/null +++ b/test/test_temporal_search.py @@ -0,0 +1,48 @@ +""" +test_temporal_search.py +======================= + +Uses CMIP5 to find HadGEM2-ES dataset ids that include data between 1960 and 1962. + +Tests whether the "from_timestamp" and "to_timestamp" options are working in esgf-pyclient. + +""" + +import os +os.environ["http_proxy"] = "http://wwwcache.rl.ac.uk:8080" +os.environ["https_proxy"] = "https://wwwcache.rl.ac.uk:8080" + +from pyesgf.search import SearchConnection + +conn = SearchConnection('http://esgf-index1.ceda.ac.uk/esg-search', + distrib=False) + +ctx = conn.new_context(project = "CMIP5", model = "HadGEM2-ES") +print ctx.hit_count +ctx = ctx.constrain(time_frequency = "mon", realm = "atmos", ensemble = "r1i1p1", latest = True) +#print ctx.facet_counts +print ctx.hit_count + +if 0: + ctx = ctx.constrain(from_timestamp = "2100-12-30T23:23:59Z") +else: + print "THINGS TO FIX:" + + print """ 1. 400 Error - grab errors from Tomcat response that explain error. + 2. Allow DEBUG to be switched on/off. + 3. Explain about timestamp searching in the docs. + 4. Explain that timestamp search is only for Datasets, not Files. + 5. Fix so context.constrain(**args) can take "to_timestamp" and "from_timestamp". +""" + ctx = conn.new_context(project = "CMIP5", model = "HadGEM2-ES", + time_frequency = "mon", realm = "atmos", ensemble = "r1i1p1", latest = True, + from_timestamp = "2100-12-30T23:23:59Z", to_timestamp = "2200-01-01T00:00:00Z") + +print ctx.hit_count + +for (i, d) in enumerate(ctx.search()): + print "Dataset ID:", d.dataset_id + #if i > 10: break + + for f in d.file_context().search(): + print f.filename, f.size, f.checksum From b86abe07f96c08a03f31c99ba9a92240ee91c65c Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jun 2015 10:00:40 +0100 Subject: [PATCH 04/19] Updated documentation to include temporal search. --- docs/concepts.rst | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/concepts.rst b/docs/concepts.rst index 8f06c9f..d733704 100644 --- a/docs/concepts.rst +++ b/docs/concepts.rst @@ -27,18 +27,22 @@ facets SearchContext Set in constructor fields SearchContext Set in constructor replica SearchContext Set in constructor type SearchContext Create contexts with the right type using :meth:`ResultSet.file_context`, etc. -from SearchContext Not implemented yet. Placeholder name "from_timestamp" -to SearchContext Not implemented yet. Placeholder name "to_timestamp" +from SearchContext Set in constructor. Use "from_timestamp" in the context API. +to SearchContext Set in constructor. Use "to_timestamp" in the context API. fields n/a Managed internally format n/a Managed internally id n/a Managed internally =========== ================ =================================================================================================== +Temporal keywords +''''''''''''''''' -Temporal / Spatial keywords -''''''''''''''''''''''''''' +Temporal keywords are supported for Dataset search. The terms "from_timestamp" and "to_timestamp" should be used with values following the format "YYYY-MM-DDThh:mm:ssZ". -Temporal and spatial keywords are not yet supported by :mod:`pyesgf.search` however the API does have placeholders for these keywords anticipating future implementation: +Spatial keywords +'''''''''''''''' + +Spatial keywords are not yet supported by :mod:`pyesgf.search` however the API does have placeholders for these keywords anticipating future implementation: Facet keywords '''''''''''''' From 8dd99f59a5715fa1bd1e5c83df78d8965df1d16b Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jun 2015 14:46:24 +0100 Subject: [PATCH 05/19] Clened up temporal test. --- test/test_temporal_search.py | 48 ++++++++++++++---------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/test/test_temporal_search.py b/test/test_temporal_search.py index 5fb271f..2c697ce 100644 --- a/test/test_temporal_search.py +++ b/test/test_temporal_search.py @@ -8,41 +8,31 @@ """ -import os -os.environ["http_proxy"] = "http://wwwcache.rl.ac.uk:8080" -os.environ["https_proxy"] = "https://wwwcache.rl.ac.uk:8080" - from pyesgf.search import SearchConnection -conn = SearchConnection('http://esgf-index1.ceda.ac.uk/esg-search', + +def test_temporal_search_CMIP5(): + conn = SearchConnection('http://esgf-index1.ceda.ac.uk/esg-search', distrib=False) -ctx = conn.new_context(project = "CMIP5", model = "HadGEM2-ES") -print ctx.hit_count -ctx = ctx.constrain(time_frequency = "mon", realm = "atmos", ensemble = "r1i1p1", latest = True) -#print ctx.facet_counts -print ctx.hit_count - -if 0: - ctx = ctx.constrain(from_timestamp = "2100-12-30T23:23:59Z") -else: - print "THINGS TO FIX:" - - print """ 1. 400 Error - grab errors from Tomcat response that explain error. - 2. Allow DEBUG to be switched on/off. - 3. Explain about timestamp searching in the docs. - 4. Explain that timestamp search is only for Datasets, not Files. - 5. Fix so context.constrain(**args) can take "to_timestamp" and "from_timestamp". -""" - ctx = conn.new_context(project = "CMIP5", model = "HadGEM2-ES", + ctx1 = conn.new_context(project = "CMIP5", model = "HadGEM2-ES", + time_frequency = "mon", realm = "atmos", ensemble = "r1i1p1", latest = True) + + ctx2 = conn.new_context(project = "CMIP5", model = "HadGEM2-ES", time_frequency = "mon", realm = "atmos", ensemble = "r1i1p1", latest = True, from_timestamp = "2100-12-30T23:23:59Z", to_timestamp = "2200-01-01T00:00:00Z") -print ctx.hit_count + assert ctx2.hit_count < ctx1.hit_count + +def test_temporal_search_CORDEX(): + conn = SearchConnection("http://esgf-data.dkrz.de/esg-search", distrib=True) + + ctx1 = conn.new_context(project='CORDEX', + from_timestamp="1990-01-01T12:00:00Z", + to_timestamp="2100-12-31T12:00:00Z") -for (i, d) in enumerate(ctx.search()): - print "Dataset ID:", d.dataset_id - #if i > 10: break + ctx2 = conn.new_context(project='CORDEX', + from_timestamp="2011-01-01T12:00:00Z", + to_timestamp="2100-12-31T12:00:00Z") - for f in d.file_context().search(): - print f.filename, f.size, f.checksum + assert ctx2.hit_count < ctx1.hit_count From 9cd6c0fd44bd7442475dca11c741dba82269a547 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jun 2015 15:25:33 +0100 Subject: [PATCH 06/19] Updated tests based on modern results and CEDA index node as first contact. --- test/config.py | 2 +- test/test_connection.py | 4 ++-- test/test_context.py | 27 +++++++++++++++++++-------- test/test_results.py | 5 ++--- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/test/config.py b/test/config.py index 7ae4674..d14bd5d 100644 --- a/test/config.py +++ b/test/config.py @@ -3,6 +3,6 @@ """ -TEST_SERVICE='http://esgf-node.ipsl.fr/esg-search' +TEST_SERVICE='http://esgf-index1.ceda.ac.uk/esg-search' CACHE_DIR = 'url_cache' diff --git a/test/test_connection.py b/test/test_connection.py index b21c3ba..5d9d404 100644 --- a/test/test_connection.py +++ b/test/test_connection.py @@ -28,9 +28,9 @@ def test_get_shard_list(): shards = conn.get_shard_list() #!NOTE: the exact shard list will change depending on the shard replication configuration # on the test server - assert 'esgf-node.ipsl.fr' in shards + assert 'esgf-index2.ceda.ac.uk' in shards # IPSL now replicates all non-local shards. Just check it has a few shards - assert len(shards['esgf-node.ipsl.fr']) > 4 + assert len(shards['esgf-index2.ceda.ac.uk']) > 3 def test_url_fixing(): diff --git a/test/test_context.py b/test/test_context.py index f9e375b..fe03cb5 100644 --- a/test/test_context.py +++ b/test/test_context.py @@ -63,14 +63,10 @@ def test_context_facet_options(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5', model='IPSL-CM5A-LR', ensemble='r1i1p1', experiment='rcp60', - realm='seaIce' - ) + realm='seaIce') - assert context.get_facet_options().keys() == [ - 'product', 'cf_standard_name', 'variable_long_name', 'cmor_table', - 'time_frequency', 'variable' - ] - + assert context.get_facet_options().keys() == ['data_node', 'cf_standard_name', 'variable_long_name', + 'cmor_table', 'time_frequency', 'variable'] def test_context_facets3(): @@ -153,7 +149,6 @@ def test_negative_facet(): assert hits1 == hits2 + hits3 - def test_replica(): # Test that we can exclude replicas # This tests assumes the test dataset is replicated @@ -169,3 +164,19 @@ def test_replica(): replica=False) assert context.hit_count == 1 + +def test_response_from_bad_parameter(): + # Test that a bad parameter name raises a useful exception + # NOTE::: !!! This fails because urllib2 HTTP query is overrided with + # !!! cache handler instead of usual response. + # !!! Fix needs to make sure cached URL request has response exceptions matching urllib2 exception + conn = SearchConnection(TEST_SERVICE) + context = conn.new_context(project='CMIP5', rubbish='nonsense') + context.hit_count + + try: + context.hit_count + except Exception, err: + assert str(err).strip() == "Invalid query parameter(s): rubbish" + + diff --git a/test/test_results.py b/test/test_results.py index 3b37f47..c052a0b 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -17,7 +17,7 @@ def test_result1(): results = ctx.search() r1 = results[0] - assert re.match(r'cmip5\.output1\.IPSL\..\|vesg.ipsl.fr', r1.dataset_id) + assert re.match(r'cmip5\.output1\.MOHC\..+\|esgf-data2.ceda.ac.uk', r1.dataset_id) def test_file_context(): conn = SearchConnection(TEST_SERVICE, distrib=False) @@ -60,8 +60,7 @@ def test_file_list2(): file_results = f_ctx.search() for file_result in file_results: - print file_result.download_url - assert re.match(r'http://vesg.ipsl.fr/thredds/.*\.nc', file_result.download_url) + assert re.search(r'ds/.*\.nc', file_result.download_url) def test_aggregations(): conn = SearchConnection(TEST_SERVICE, distrib=False) From 5ffd072802b3295110dfba9f208995cd992806c2 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jun 2015 15:26:10 +0100 Subject: [PATCH 07/19] Added temporal search and improved exception response when bad search parameter provided. Added temporal search using: to_timestamp - start time from_timestamp - end time These arguments only apply to dataset searches. When an unrecognised search parameter is provided the code now attempts to intercept that error (code = 400) and grab the responses from the Tomcat server. --- pyesgf/search/connection.py | 15 +++++++++++++-- pyesgf/search/context.py | 27 ++++++++++----------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/pyesgf/search/connection.py b/pyesgf/search/connection.py index c58995b..c8f8fa2 100644 --- a/pyesgf/search/connection.py +++ b/pyesgf/search/connection.py @@ -27,7 +27,9 @@ import warnings import logging +logging.basicConfig() log = logging.getLogger(__name__) +log.setLevel(logging.DEBUG) from .context import DatasetSearchContext from .consts import RESPONSE_FORMAT, SHARD_REXP @@ -138,7 +140,16 @@ def _send_query(self, endpoint, full_query): query_url = '%s/%s?%s' % (self.url, endpoint, urlencode(full_query)) log.debug('Query request is %s' % query_url) - response = urllib2.urlopen(query_url) + try: + response = urllib2.urlopen(query_url) + except urllib2.HTTPError, err: + log.warn("HTTP request received error code: %s" % err.code) + if err.code == 400: + errors = set(re.findall("Invalid HTTP query parameter=(\w+)", err.fp.read())) + content = "; ".join([e for e in list(errors)]) + raise Exception("Invalid query parameter(s): %s" % content) + else: + raise Exception("Error returned from URL: %s" % query_url) return response @@ -264,7 +275,7 @@ def query_keyword_type(keyword): if keyword == 'query': return 'freetext' - elif keyword in ['start', 'end']: + elif keyword in ['start', 'end', 'from_timestamp', 'to_timestamp']: return 'temporal' elif keyword in ['lat', 'lon', 'bbox', 'location', 'radius', 'polygon']: return 'geospatial' diff --git a/pyesgf/search/context.py b/pyesgf/search/context.py index 531379d..eb061c1 100644 --- a/pyesgf/search/context.py +++ b/pyesgf/search/context.py @@ -65,8 +65,10 @@ def __init__(self, connection, constraints, search_type=None, or only non-latest versions, or None to return both. :param shards: list of shards to restrict searches to. Should be from the list self.connection.get_shard_list() - :param from_timestamp: NotImplemented - :param to_timestamp: NotImplemented + :param from_timestamp: Date-time string to specify start of search range + (e.g. "2000-01-01T00:00:00Z"). + :param to_timestamp: Date-time string to specify end of search range + (e.g. "2100-12-31T23:59:59Z"). """ @@ -80,7 +82,7 @@ def __init__(self, connection, constraints, search_type=None, # Constraints self.freetext_constraint = None self.facet_constraints = MultiDict() - self.temporal_constraint = (from_timestamp, to_timestamp) + self.temporal_constraint = [from_timestamp, to_timestamp] self.geosplatial_constraint = None self._update_constraints(constraints) @@ -221,8 +223,10 @@ def _update_constraints(self, constraints): self._constrain_freetext(new_freetext) #!TODO: implement temporal and geospatial constraints - #print constraints_split['temporal'] - self._constrain_temporal(start=self.temporal_constraint[0], end=self.temporal_constraint[1]) + if 'from_timestamp' in constraints_split['temporal']: + self.temporal_constraint[0] = constraints_split['temporal']['from_timestamp'] + if 'to_timestamp' in constraints_split['temporal']: + self.temporal_constraint[1] = constraints_split['temporal']['to_timestamp'] #self._constrain_geospatial() # reset cached values @@ -243,18 +247,6 @@ def _constrain_facets(self, facet_constraints): def _constrain_freetext(self, query): self.freetext_constraint = query - def _constrain_temporal(self, start, end): - """ - :param start: a datetime instance specifying the start of the temporal - constraint. - :param end: a datetime instance specifying the end of the temporal - constraint. - - """ - #!TODO: support solr date keywords like "NOW" and "NOW-1DAY" - # we will probably need a separate TemporalConstraint object - self.temporal_constraint = (start, end) - def _constrain_geospatial(self, lat=None, lon=None, bbox=None, location=None, radius=None, polygon=None): self.geospatial_constraint = GeospatialConstraint(lat, lon, bbox, location, radius, polygon) @@ -278,6 +270,7 @@ def _split_constraints(self, constraints): from .connection import query_keyword_type constraints_split = dict((kw, MultiDict()) for kw in QUERY_KEYWORD_TYPES) + for kw, val in constraints.items(): constraint_type = query_keyword_type(kw) constraints_split[constraint_type][kw] = val From e298fe19814c5dacf8ae7611c850d1c928f5f886 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jun 2015 15:34:15 +0100 Subject: [PATCH 08/19] Updated version and setup info. --- pyesgf/__init__.py | 2 +- setup.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyesgf/__init__.py b/pyesgf/__init__.py index 9edfdbc..c597065 100644 --- a/pyesgf/__init__.py +++ b/pyesgf/__init__.py @@ -3,7 +3,7 @@ """ -__version__ = '0.1.2' +__version__ = '0.1.3' #!TODO: ResultFormatter class. process response json to specialise the result json. Default is None #!TODO: pipe results to new process. Command-line interface. diff --git a/setup.py b/setup.py index 255439a..7267343 100644 --- a/setup.py +++ b/setup.py @@ -25,10 +25,10 @@ 'Programming Language :: Python :: 2.6', ], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers keywords='', - author='Stephen Pascoe', - author_email='Stephen.Pascoe@stfc.ac.uk', + author='Ag Stephens', + author_email='Ag.Stephens@stfc.ac.uk', url='http://esgf-pyclient.readthedocs.org', - download_url='http://github.org/stephenpascoe/esgf-pyclient', + download_url='http://github.com/ESGF/esgf-pyclient', license='BSD', packages=find_packages(exclude=['ez_setup', 'examples', 'test']), include_package_data=True, From 59edd598f1847436c77d31b41c9a9d331b308635 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jun 2015 15:39:18 +0100 Subject: [PATCH 09/19] Incremented version to 0.1.4 --- pyesgf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyesgf/__init__.py b/pyesgf/__init__.py index c597065..023b885 100644 --- a/pyesgf/__init__.py +++ b/pyesgf/__init__.py @@ -3,7 +3,7 @@ """ -__version__ = '0.1.3' +__version__ = '0.1.4' #!TODO: ResultFormatter class. process response json to specialise the result json. Default is None #!TODO: pipe results to new process. Command-line interface. From ee489ce162712927ca53e5636036b4a4b1c41ce7 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jun 2015 15:53:22 +0100 Subject: [PATCH 10/19] Switched off default logging to DEBUG and updated examples in docs. --- docs/examples.rst | 20 ++++++++++++++++++++ pyesgf/search/connection.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/docs/examples.rst b/docs/examples.rst index 307d5f1..2316c95 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -52,6 +52,26 @@ Find download URLs for all files in a dataset http://esg-datanode.jpl.nasa.gov/thredds/fileServer/esg_dataroot/obs4MIPs/observations/atmos/tro3/mon/grid/NASA-JPL/TES/v20110608/tro3_TES_L3_tbd_200507-200912.nc http://esg-datanode.jpl.nasa.gov/thredds/fileServer/esg_dataroot/obs4MIPs/observations/atmos/tro3Stderr/mon/grid/NASA-JPL/TES/v20110608/tro3Stderr_TES_L3_tbd_200507-200912.nc +Define a search for datasets that includes a temporal range: + + >>> conn = SearchConnection('http://esgf-index1.ceda.ac.uk/esg-search', + distrib=False) + >>> ctx = conn.new_context(project = "CMIP5", model = "HadGEM2-ES", + time_frequency = "mon", realm = "atmos", ensemble = "r1i1p1", latest = True, + from_timestamp = "2100-12-30T23:23:59Z", to_timestamp = "2200-01-01T00:00:00Z") + >>> ctx.hit_count + 3 + +Or do the same thing by searching without temporal constraints and then applying the constraint: + + >>> ctx = conn.new_context(project = "CMIP5", model = "HadGEM2-ES", + time_frequency = "mon", realm = "atmos", ensemble = "r1i1p1", latest = True) + >>> ctx.hit_count + 21 + >>> ctx = ctx.constrain(from_timestamp = "2100-12-30T23:23:59Z", to_timestamp = "2200-01-01T00:00:00Z") + >>> ctx.hit_count + 3 + Obtain MyProxy credentials to allow downloading files or using secured OPeNDAP >>> from pyesgf.logon import LogonManager diff --git a/pyesgf/search/connection.py b/pyesgf/search/connection.py index c8f8fa2..f53b525 100644 --- a/pyesgf/search/connection.py +++ b/pyesgf/search/connection.py @@ -29,7 +29,7 @@ logging.basicConfig() log = logging.getLogger(__name__) -log.setLevel(logging.DEBUG) +log.setLevel(logging.INFO) from .context import DatasetSearchContext from .consts import RESPONSE_FORMAT, SHARD_REXP From 56fd9c6661ea4fb4c81925d673341c3fdf0d29a8 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 16 Jun 2015 15:56:52 +0100 Subject: [PATCH 11/19] Upgraded version. --- pyesgf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyesgf/__init__.py b/pyesgf/__init__.py index 023b885..3acc7dd 100644 --- a/pyesgf/__init__.py +++ b/pyesgf/__init__.py @@ -3,7 +3,7 @@ """ -__version__ = '0.1.4' +__version__ = '0.1.4b' #!TODO: ResultFormatter class. process response json to specialise the result json. Default is None #!TODO: pipe results to new process. Command-line interface. From a48767faa8562170b7a969213070171de0cc3d01 Mon Sep 17 00:00:00 2001 From: Katharina Berger Date: Tue, 27 Oct 2015 17:46:52 +0100 Subject: [PATCH 12/19] enable search facet search_type to search for Files and Aggregations --- pyesgf/search/connection.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyesgf/search/connection.py b/pyesgf/search/connection.py index f53b525..83ed5a7 100644 --- a/pyesgf/search/connection.py +++ b/pyesgf/search/connection.py @@ -241,7 +241,7 @@ def get_shard_list(self): def new_context(self, context_class=None, latest=None, facets=None, fields=None, from_timestamp=None, to_timestamp=None, - replica=None, shards=None, + replica=None, shards=None, search_type=None, **constraints): """ Returns a :class:`pyesgf.search.context.SearchContext` class for @@ -259,6 +259,7 @@ def new_context(self, context_class=None, from_timestamp=from_timestamp, to_timestamp=to_timestamp, replica=replica, shards=shards, + search_type=search_type, ) From 1eb146c0628c3261ba3743d5cdb0cc6b98e85a7c Mon Sep 17 00:00:00 2001 From: Matthew Harris Date: Mon, 4 Jan 2016 10:11:46 -0800 Subject: [PATCH 13/19] MBH: updating .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 00821bd..30d83dc 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,5 @@ generated/ /backup ./do_jenkins.sh test/url_cache +.esg/ +env/ From 3791f3e3e9311e534ee238926df5c4277fa4ed50 Mon Sep 17 00:00:00 2001 From: agstephens Date: Thu, 7 Jan 2016 00:15:01 +0000 Subject: [PATCH 14/19] Updated version and documentation for logon module (python >= 2.7.9 is required for this to work). --- docs/logon.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/logon.rst b/docs/logon.rst index 30642d1..e08bcff 100644 --- a/docs/logon.rst +++ b/docs/logon.rst @@ -4,5 +4,7 @@ ESGF Security API :mod:`pyesgf` provides a simplified interface to obtaining ESGF credentials. +.. warning:: This interface only works with ***Python versions 2.7.9 or greater*** (due to an SSL update). + .. automodule:: pyesgf.logon :members: From 537ceb7d94f9a72715d79bb7d05fb73743d5e264 Mon Sep 17 00:00:00 2001 From: agstephens Date: Fri, 13 May 2016 12:01:47 +0100 Subject: [PATCH 15/19] Fixed error with SHARD_REXP not matching all cases. Updated the SHARD regex so that all cases, with or without the port, will match and be used in the available shards dictionary. --- pyesgf/search/connection.py | 7 ++++++- pyesgf/search/consts.py | 3 ++- test/test_shard_regex.py | 40 +++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 test/test_shard_regex.py diff --git a/pyesgf/search/connection.py b/pyesgf/search/connection.py index 83ed5a7..5026c67 100644 --- a/pyesgf/search/connection.py +++ b/pyesgf/search/connection.py @@ -166,7 +166,12 @@ def _build_query(self, query_dict, limit=None, offset=None, shards=None): else: for port, suffix in self._available_shards[shard]: # suffix should be ommited when querying - shard_specs.append('%s:%s/solr' % (shard, port)) + if not port: + port_string = "" + else: + port_string = ":%s" % port + + shard_specs.append('%s%s/solr' % (shard, port_string)) shard_str = ','.join(shard_specs) else: diff --git a/pyesgf/search/consts.py b/pyesgf/search/consts.py index ec82a5f..0950962 100644 --- a/pyesgf/search/consts.py +++ b/pyesgf/search/consts.py @@ -10,4 +10,5 @@ OPERATOR_NEQ = 'not_equal' -SHARD_REXP = r'(?P.*?):(?P\d*)/solr(?P.*)' +SHARD_REXP = r'^(?Phttps?://)?(?P.+?):?(?P\d+)?/(?P.*)$' + diff --git a/test/test_shard_regex.py b/test/test_shard_regex.py new file mode 100644 index 0000000..aa92b98 --- /dev/null +++ b/test/test_shard_regex.py @@ -0,0 +1,40 @@ +""" +Test regular expression for matching shard end points. +""" + +from pyesgf.search.consts import SHARD_REXP +import re + +tests = [ +"https://esgf-test.a.b.c/solr", +"http://esgf.a.c/solr/data", +"http://esgs.a.d:80/data/solr", +"esgf.a.c:80/solr", +"esgf.a.c/solr" +] + +expected = [ +("https://", "esgf-test.a.b.c", None, "solr"), +("http://", "esgf.a.c", None, "solr/data"), +("http://", "esgs.a.d", "80", "data/solr"), +(None, "esgf.a.c", "80", "solr"), +(None, "esgf.a.c", None, "solr") +] + +keys = ("prefix", "host", "port", "suffix") + +R = re.compile("^(?Phttps?://)?(?P.+?):?(?P\d+)?/(?P.+)$") + +def test_regex(): + for i, test in enumerate(tests): + + match = R.match(test) + d = match.groupdict() + values = tuple([d[key] for key in keys]) + + assert values == expected[i] + + +if __name__ == "__main__": + test_regex() + From 532e1590f78209cc50311d6a39bab0a80a04323a Mon Sep 17 00:00:00 2001 From: agstephens Date: Mon, 16 May 2016 10:41:58 +0100 Subject: [PATCH 16/19] Upgraded version. --- pyesgf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyesgf/__init__.py b/pyesgf/__init__.py index 3acc7dd..9edfdbc 100644 --- a/pyesgf/__init__.py +++ b/pyesgf/__init__.py @@ -3,7 +3,7 @@ """ -__version__ = '0.1.4b' +__version__ = '0.1.2' #!TODO: ResultFormatter class. process response json to specialise the result json. Default is None #!TODO: pipe results to new process. Command-line interface. From 601050426b222a018c31fa3693b5c4b042d31a90 Mon Sep 17 00:00:00 2001 From: agstephens Date: Mon, 16 May 2016 10:46:10 +0100 Subject: [PATCH 17/19] Updated version to 0.1.3. --- pyesgf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyesgf/__init__.py b/pyesgf/__init__.py index 9edfdbc..c597065 100644 --- a/pyesgf/__init__.py +++ b/pyesgf/__init__.py @@ -3,7 +3,7 @@ """ -__version__ = '0.1.2' +__version__ = '0.1.3' #!TODO: ResultFormatter class. process response json to specialise the result json. Default is None #!TODO: pipe results to new process. Command-line interface. From d8be2a718e84207bb3acf29e1113e5fde412dfd9 Mon Sep 17 00:00:00 2001 From: agstephens Date: Mon, 16 May 2016 10:48:22 +0100 Subject: [PATCH 18/19] Upgraded version to 0.1.6. --- pyesgf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyesgf/__init__.py b/pyesgf/__init__.py index c597065..c454589 100644 --- a/pyesgf/__init__.py +++ b/pyesgf/__init__.py @@ -3,7 +3,7 @@ """ -__version__ = '0.1.3' +__version__ = '0.1.6' #!TODO: ResultFormatter class. process response json to specialise the result json. Default is None #!TODO: pipe results to new process. Command-line interface. From 77240591f322de264505acf46fb76706bc87ad8f Mon Sep 17 00:00:00 2001 From: agstephens Date: Tue, 17 May 2016 15:06:27 +0100 Subject: [PATCH 19/19] Fixed unit tests that were failing due to changes in node configurations. Some node locations had changed so update those. GeoMIP product set to "output" instead of "output1" which is required. --- test/test_results.py | 3 ++- test/test_util.py | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/test/test_results.py b/test/test_results.py index c052a0b..0461851 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -17,7 +17,7 @@ def test_result1(): results = ctx.search() r1 = results[0] - assert re.match(r'cmip5\.output1\.MOHC\..+\|esgf-data2.ceda.ac.uk', r1.dataset_id) + assert re.match(r'cmip5\.output1\.MOHC\..+\|esgf-data1.ceda.ac.uk', r1.dataset_id) def test_file_context(): conn = SearchConnection(TEST_SERVICE, distrib=False) @@ -122,6 +122,7 @@ def test_shards_constrain(): full_query = f_ctx.connection._build_query(query_dict, shards=f_ctx.shards) #!TODO: Force fail to see whether shards is passed through. + # NOTE: 'shards' is NOT even a key in this dictionary. Needs rewrite!!! q_shard = full_query['shards'] # Check it isn't a ',' separated list assert ',' not in q_shard diff --git a/test/test_util.py b/test/test_util.py index c323435..4b6d0c3 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -14,18 +14,18 @@ def test_get_manifest(): conn = SearchConnection(CEDA_SERVICE, distrib=False) - manifest = get_manifest('GeoMIP.output1.MOHC.HadGEM2-ES.G1.day.atmos.day.r1i1p1', + manifest = get_manifest('GeoMIP.output.MOHC.HadGEM2-ES.G1.day.atmos.day.r1i1p1', 20120223, conn) filename = 'psl_day_HadGEM2-ES_G1_r1i1p1_19291201-19291230.nc' - assert manifest[filename]['checksum'] == 'd20bbba8e05d6689f44cf3f8eebb9e7b' + assert manifest[filename]['checksum'] == '5c459a61cfb904ca235ad1f796227114df095d9162a2a3f044bc01f881b532ce' #!TODO: this test belongs somewhere else def test_opendap_url(): conn = SearchConnection(CEDA_SERVICE, distrib=False) ctx = conn.new_context() - results = ctx.search(drs_id='GeoMIP.output1.MOHC.HadGEM2-ES.G1.day.atmos.day.r1i1p1') + results = ctx.search(drs_id='GeoMIP.output.MOHC.HadGEM2-ES.G1.day.atmos.day.r1i1p1') assert len(results) == 1 agg_ctx = results[0].aggregation_context() @@ -45,7 +45,7 @@ def test_download_url(): conn = SearchConnection(CEDA_SERVICE, distrib=False) ctx = conn.new_context() - results = ctx.search(drs_id='GeoMIP.output1.MOHC.HadGEM2-ES.G1.day.atmos.day.r1i1p1') + results = ctx.search(drs_id='GeoMIP.output.MOHC.HadGEM2-ES.G1.day.atmos.day.r1i1p1') files = results[0].file_context().search() download_url = files[0].download_url @@ -57,7 +57,7 @@ def test_opendap_fail(): ctx = conn.new_context() results = ctx.search(project='CMIP5', experiment='rcp45', time_frequency='mon', - realm='atmos', ensemble='r1i1p1') + realm='fx', ensemble='r1i1p1') files_ctx = results[0].file_context() hit = files_ctx.search()[0]