diff --git a/fbi_core/annotate.py b/fbi_core/annotate.py index fc0329f..4136a0f 100644 --- a/fbi_core/annotate.py +++ b/fbi_core/annotate.py @@ -6,14 +6,14 @@ import elasticsearch import requests -from ceda_es_client import CEDAElasticsearchClient +from elasticsearch import Elasticsearch -from .conf import APIKEY +from .conf import APIKEY, ES_HOSTS if APIKEY: - es = CEDAElasticsearchClient(headers={"x-api-key": APIKEY}) + es = elasticsearch.Elasticsearch(hosts=ES_HOSTS, headers={"x-api-key": APIKEY}) else: - es = CEDAElasticsearchClient() + es = elasticsearch.Elasticsearch(hosts=ES_HOSTS) indexname = "fbi-annotations" @@ -67,7 +67,7 @@ def get_moles_records(): coll_url += "/?fields=ob_id,uuid,title,publicationState" coll_url += "&limit=10000" r = requests.get(coll_url, timeout=200) - + collection_records_by_obid = {} for collection_rec in r.json()["results"]: collection_records_by_obid[collection_rec["ob_id"]] = collection_rec diff --git a/fbi_core/conf.py b/fbi_core/conf.py index d1ef3f5..269404f 100644 --- a/fbi_core/conf.py +++ b/fbi_core/conf.py @@ -1,11 +1,13 @@ -import yaml import os +import yaml conf_file = os.path.join(os.environ["HOME"], ".fbi.yml") + if os.path.exists(conf_file): conf = yaml.load(open(conf_file), Loader=yaml.Loader) APIKEY = conf["ES"]["api_key"] + ES_HOSTS = conf["ES"].get("hosts", ["https://elasticsearch.ceda.ac.uk:443"]) else: APIKEY = None - + ES_HOSTS = ["https://elasticsearch.ceda.ac.uk:443"] diff --git a/fbi_core/fbi_tools.py b/fbi_core/fbi_tools.py index e9ad6bc..aba79e1 100644 --- a/fbi_core/fbi_tools.py +++ b/fbi_core/fbi_tools.py @@ -5,15 +5,14 @@ from datetime import datetime import elasticsearch -from ceda_es_client import CEDAElasticsearchClient from elasticsearch.helpers import scan -from .conf import APIKEY +from .conf import APIKEY, ES_HOSTS if APIKEY: - es = CEDAElasticsearchClient(headers={"x-api-key": APIKEY}) + es = elasticsearch.Elasticsearch(hosts=ES_HOSTS, headers={"x-api-key": APIKEY}) else: - es = CEDAElasticsearchClient() + es = elasticsearch.Elasticsearch(hosts=ES_HOSTS) indexname = "fbi-2022" @@ -32,9 +31,9 @@ def fbi_records( :param str after: paths after this are iterated over. Defaults to "/" :param str stop: iteration stops when the path is greater than or equal to this. Defaults to "~" :param int fetch_size: The number of records to request from elasticsearch at a time. - :param bool exclude_phenomena: remove the bulky phenomena attribute from + :param bool exclude_phenomena: remove the bulky phenomena attribute from the record. Default is False. - :param str item_type: Item type for the records. Either "file", "dir" or "link". + :param str item_type: Item type for the records. Either "file", "dir" or "link". Defaults to all types. :return iterator[dict]: Yeilds FBI records as dictionaries. @@ -211,7 +210,6 @@ def all_under_query( if blank: must.append({"term": {blank: {"value": ""}}}) - if exclude_readmes: must_not.append({"prefix": {"name.keyword": {"value": "00README"}}}) must_not.append({"prefix": {"name.keyword": {"value": "README"}}}) @@ -406,11 +404,11 @@ def archive_summary( def _split(splitlist, batch_size, **kwargs): """ - Divide a list of directories into by adding subdirectories if there are too many + Divide a list of directories into by adding subdirectories if there are too many items in a directory. :param list splitlist: A list of tuples containing a directory name and an item count. - e.g. [("/x/y", 100)] may expand to [("/x/y/a", 50), + e.g. [("/x/y", 100)] may expand to [("/x/y/a", 50), ("/x/y/b", 10), ("/x/y/c", 40),] """ new_splits = []