Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions fbi_core/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

import elasticsearch
import requests
from ceda_es_client import CEDAElasticsearchClient
from elasticsearch import Elasticsearch

from .conf import APIKEY
from .conf import APIKEY, ES_HOSTS

if APIKEY:
es = CEDAElasticsearchClient(headers={"x-api-key": APIKEY})
es = elasticsearch.Elasticsearch(hosts=ES_HOSTS, headers={"x-api-key": APIKEY})
else:
es = CEDAElasticsearchClient()
es = elasticsearch.Elasticsearch(hosts=ES_HOSTS)

indexname = "fbi-annotations"

Expand Down Expand Up @@ -67,7 +67,7 @@ def get_moles_records():
coll_url += "/?fields=ob_id,uuid,title,publicationState"
coll_url += "&limit=10000"
r = requests.get(coll_url, timeout=200)

collection_records_by_obid = {}
for collection_rec in r.json()["results"]:
collection_records_by_obid[collection_rec["ob_id"]] = collection_rec
Expand Down
6 changes: 4 additions & 2 deletions fbi_core/conf.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import yaml
import os

import yaml

conf_file = os.path.join(os.environ["HOME"], ".fbi.yml")

if os.path.exists(conf_file):
conf = yaml.load(open(conf_file), Loader=yaml.Loader)
APIKEY = conf["ES"]["api_key"]
ES_HOSTS = conf["ES"].get("hosts", ["https://elasticsearch.ceda.ac.uk:443"])
else:
APIKEY = None

ES_HOSTS = ["https://elasticsearch.ceda.ac.uk:443"]
16 changes: 7 additions & 9 deletions fbi_core/fbi_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
from datetime import datetime

import elasticsearch
from ceda_es_client import CEDAElasticsearchClient
from elasticsearch.helpers import scan

from .conf import APIKEY
from .conf import APIKEY, ES_HOSTS

if APIKEY:
es = CEDAElasticsearchClient(headers={"x-api-key": APIKEY})
es = elasticsearch.Elasticsearch(hosts=ES_HOSTS, headers={"x-api-key": APIKEY})
else:
es = CEDAElasticsearchClient()
es = elasticsearch.Elasticsearch(hosts=ES_HOSTS)

indexname = "fbi-2022"

Expand All @@ -32,9 +31,9 @@ def fbi_records(
:param str after: paths after this are iterated over. Defaults to "/"
:param str stop: iteration stops when the path is greater than or equal to this. Defaults to "~"
:param int fetch_size: The number of records to request from elasticsearch at a time.
:param bool exclude_phenomena: remove the bulky phenomena attribute from
:param bool exclude_phenomena: remove the bulky phenomena attribute from
the record. Default is False.
:param str item_type: Item type for the records. Either "file", "dir" or "link".
:param str item_type: Item type for the records. Either "file", "dir" or "link".
Defaults to all types.

:return iterator[dict]: Yeilds FBI records as dictionaries.
Expand Down Expand Up @@ -211,7 +210,6 @@ def all_under_query(
if blank:
must.append({"term": {blank: {"value": ""}}})


if exclude_readmes:
must_not.append({"prefix": {"name.keyword": {"value": "00README"}}})
must_not.append({"prefix": {"name.keyword": {"value": "README"}}})
Expand Down Expand Up @@ -406,11 +404,11 @@ def archive_summary(

def _split(splitlist, batch_size, **kwargs):
"""
Divide a list of directories into by adding subdirectories if there are too many
Divide a list of directories into by adding subdirectories if there are too many
items in a directory.

:param list splitlist: A list of tuples containing a directory name and an item count.
e.g. [("/x/y", 100)] may expand to [("/x/y/a", 50),
e.g. [("/x/y", 100)] may expand to [("/x/y/a", 50),
("/x/y/b", 10), ("/x/y/c", 40),]
"""
new_splits = []
Expand Down