azerbini · TeriForey · Jul 3, 2018 · Sep 24, 2018 · Oct 1, 2018
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,7 @@ eamena/eamena/elasticsearch
 eamena/eamena/logs
 eamena/eamena/uploadedfiles
 eamena/eamena/bulk_upload
+eamena/eamena/additional_resource_graphs
 virtualenv/ENV
 tests/elasticsearch
 tests/logs

diff --git a/arches/app/search/elasticsearch_dsl_builder.py b/arches/app/search/elasticsearch_dsl_builder.py
@@ -112,6 +112,15 @@ def search(self, index='', doc_type='', **kwargs):
         #print self
         return self.se.search(index=index, doc_type=doc_type, body=self.dsl)
 
+    def scan(self, index='', doc_type='', **kwargs):
+        self.fields = kwargs.pop('fields', self.fields)
+        self.start = kwargs.pop('start', self.start)
+        self.limit = kwargs.pop('limit', self.limit)
+
+        self.prepare()
+        # print self
+        return self.se.scan(index=index, doc_type=doc_type, query=self.dsl)
+
     def delete(self, index=''):
         return self.se.delete(index=index, body=self.dsl)
 

diff --git a/arches/app/search/search.py b/arches/app/search/search.py
@@ -93,6 +93,38 @@ def search(self, **kwargs):
 
         return ret
 
+    def scan(self, **kwargs):
+        """
+        Search for an item in the index using the scan helper.
+        Pass an index, doc_type, and id to get a specific document
+        Pass a query dsl to perform a search
+
+        """
+
+        query = kwargs.get('query', None)
+        index = kwargs.get('index', None)
+        id = kwargs.get('id', None)
+
+        if index is None:
+            raise NotImplementedError("You must specify an 'index' in your call to search")
+
+        if id:
+            if isinstance(id, list):
+                kwargs.setdefault('query', {'ids': kwargs.pop('id')})
+                return self.es.mget(**kwargs)
+            else:
+                return self.es.get(**kwargs)
+
+        ret = None
+        try:
+            ret = helpers.scan(self.es, **kwargs)
+        except Exception as detail:
+            self.logger.warning(
+                '%s: WARNING: search failed for query: %s \nException detail: %s\n' % (datetime.now(), query, detail))
+            pass
+
+        return ret
+
     def index_term(self, term, id, context='', options={}):
         """
         If the term is already indexed, then simply increment the count and add the id of the term to the existing index.

diff --git a/arches/app/utils/data_management/resources/formats/archesjson.py b/arches/app/utils/data_management/resources/formats/archesjson.py
@@ -30,26 +30,8 @@ def write_resources(self, resources, resource_export_configs):
         iso_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
         json_file_name = os.path.join('{0}_{1}.{2}'.format('EAMENA', iso_date, 'json'))
         f = StringIO()
-
-
-        for count, resource in enumerate(resources, 1):
-            if count % 1000 == 0:
-                print "%s Resources exported" % count            
-            errors = []
-
-            try:
-                a_resource = Resource().get(resource['_id'])
-
-                a_resource.form_groups = None
-                json_resources.append(a_resource)
-            except Exception as e:
-                if e not in errors:
-                    errors.append(e)
-        if len(errors) > 0:
-            print errors[0], ':', len(errors)
-
 
-        f.write((JSONSerializer().serialize({'resources':json_resources}, indent = 4, separators=(',',':'))))
+        f.write((JSONSerializer().serialize({'resources': resources}, indent = 4, separators=(',',':'))))
         json_resources_for_export.append({'name': json_file_name, 'outputfile': f})
         return json_resources_for_export
 

diff --git a/eamena/eamena/views/search.py b/eamena/eamena/views/search.py
@@ -143,12 +143,15 @@ def export_results(request):
         dsl.add_filter(ids_filter)
 
 
-    search_results = dsl.search(index='entity', doc_type='')
+    search_results = dsl.scan(index='entity', doc_type='')
+    allres = []
+    for res in search_results:
+        allres.append(res)
 
     response = None
     format = request.GET.get('export', 'csv')
     exporter = ResourceExporter(format)
-    results = exporter.export(search_results['hits']['hits'])
+    results = exporter.export(allres)
 
     related_resources = [{'id1':rr.entityid1, 'id2':rr.entityid2, 'type':rr.relationshiptype} for rr in models.RelatedResource.objects.all()] 
     csv_name = 'resource_relationships.csv'