Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ eamena/eamena/elasticsearch
eamena/eamena/logs
eamena/eamena/uploadedfiles
eamena/eamena/bulk_upload
eamena/eamena/additional_resource_graphs
virtualenv/ENV
tests/elasticsearch
tests/logs
Expand Down
9 changes: 9 additions & 0 deletions arches/app/search/elasticsearch_dsl_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ def search(self, index='', doc_type='', **kwargs):
#print self
return self.se.search(index=index, doc_type=doc_type, body=self.dsl)

def scan(self, index='', doc_type='', **kwargs):
self.fields = kwargs.pop('fields', self.fields)
self.start = kwargs.pop('start', self.start)
self.limit = kwargs.pop('limit', self.limit)

self.prepare()
# print self
return self.se.scan(index=index, doc_type=doc_type, query=self.dsl)

def delete(self, index=''):
return self.se.delete(index=index, body=self.dsl)

Expand Down
32 changes: 32 additions & 0 deletions arches/app/search/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,38 @@ def search(self, **kwargs):

return ret

def scan(self, **kwargs):
"""
Search for an item in the index using the scan helper.
Pass an index, doc_type, and id to get a specific document
Pass a query dsl to perform a search

"""

query = kwargs.get('query', None)
index = kwargs.get('index', None)
id = kwargs.get('id', None)

if index is None:
raise NotImplementedError("You must specify an 'index' in your call to search")

if id:
if isinstance(id, list):
kwargs.setdefault('query', {'ids': kwargs.pop('id')})
return self.es.mget(**kwargs)
else:
return self.es.get(**kwargs)

ret = None
try:
ret = helpers.scan(self.es, **kwargs)
except Exception as detail:
self.logger.warning(
'%s: WARNING: search failed for query: %s \nException detail: %s\n' % (datetime.now(), query, detail))
pass

return ret

def index_term(self, term, id, context='', options={}):
"""
If the term is already indexed, then simply increment the count and add the id of the term to the existing index.
Expand Down
20 changes: 1 addition & 19 deletions arches/app/utils/data_management/resources/formats/archesjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,8 @@ def write_resources(self, resources, resource_export_configs):
iso_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
json_file_name = os.path.join('{0}_{1}.{2}'.format('EAMENA', iso_date, 'json'))
f = StringIO()


for count, resource in enumerate(resources, 1):
if count % 1000 == 0:
print "%s Resources exported" % count
errors = []

try:
a_resource = Resource().get(resource['_id'])

a_resource.form_groups = None
json_resources.append(a_resource)
except Exception as e:
if e not in errors:
errors.append(e)
if len(errors) > 0:
print errors[0], ':', len(errors)


f.write((JSONSerializer().serialize({'resources':json_resources}, indent = 4, separators=(',',':'))))
f.write((JSONSerializer().serialize({'resources': resources}, indent = 4, separators=(',',':'))))
json_resources_for_export.append({'name': json_file_name, 'outputfile': f})
return json_resources_for_export

Expand Down
7 changes: 5 additions & 2 deletions eamena/eamena/views/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,15 @@ def export_results(request):
dsl.add_filter(ids_filter)


search_results = dsl.search(index='entity', doc_type='')
search_results = dsl.scan(index='entity', doc_type='')
allres = []
for res in search_results:
allres.append(res)

response = None
format = request.GET.get('export', 'csv')
exporter = ResourceExporter(format)
results = exporter.export(search_results['hits']['hits'])
results = exporter.export(allres)

related_resources = [{'id1':rr.entityid1, 'id2':rr.entityid2, 'type':rr.relationshiptype} for rr in models.RelatedResource.objects.all()]
csv_name = 'resource_relationships.csv'
Expand Down