diff --git a/AUTHORS.rst b/AUTHORS.rst
index 0f98385..abd11dd 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -9,7 +9,7 @@ Software Sustainability Institute Hackday 2016 Team
 * Laurence Billingham <laurence@bgs.ac.uk>
 * Martin Hammitzsch
 * Steve Harris
-* Craig MacLachlan
+* Craig MacLachlan <cs.maclachlan@gmail.com>
 
 Contributors
 ------------
diff --git a/commit_opener/commit_opener.py b/commit_opener/commit_opener.py
index 9404d58..9c3fdc8 100644
--- a/commit_opener/commit_opener.py
+++ b/commit_opener/commit_opener.py
@@ -4,22 +4,27 @@
 import pandas as pd
 from shutil import rmtree
 
-from . tree_scrape import author_minded
-from . query_pmc import pmc_data
+
+from commit_opener.grab_dependencies import get_dependencies
+from commit_opener.tree_scrape import author_minded
+from commit_opener.query_pmc import pmc_data as pubmed_data
 
 OUT_SUBFOLDER = 'contrib_data'
 AUTHOR_DATA = 'author_data.json'
 
+
 def verify_local_repo_location(repo):
     if not os.path.isdir(repo):
         raise IOError('could not locate repository {}'.format(repo))
 
+
 def build_out_path(repo_name, parent_path=None):
     if parent_path is None:
         parent_path = os.path.abspath(os.curdir)
     out_path = os.path.join(parent_path, repo_name, OUT_SUBFOLDER)
     return out_path
 
+
 def make_output_folder(path_, overwrite):
     if not os.path.exists(path_):
         os.mkdir(path_)
@@ -27,14 +32,15 @@ def make_output_folder(path_, overwrite):
         rmtree(path_)
         os.mkdir(path_)
 
+
 @click.command()
-@click.option('--repo', prompt='git repository location', help='path to folder containing .git repository or url')
+@click.option('--repo', prompt='git repository location',
+              help='path to folder containing .git repository or url')
 @click.option('--out_dir', default=None,
-    help='parent dir for output data, default same as .git folder scraped')
+              help='parent dir for output data, default same as .git folder scraped')
 @click.option('--clobber_output', default=True,
-        help='should we overwrite existing data?, default True')
-@click.option('--verbose/--no-verbose', default=False)
-
+              help='should we overwrite existing data?, default True')
+@click.option('--verbose/--no-verbose', default=True)
 def main(repo, out_dir, clobber_output, verbose):
     """  """
     import logging
@@ -55,10 +61,18 @@ def main(repo, out_dir, clobber_output, verbose):
     repo_name = os.path.basename(repo)
     make_output_folder(out_dir, overwrite=clobber_output)
     contributor_data = author_minded(repo)
-    citation_data = pmc_data('SPSS')
-    logging.info("output path: %s" % os.path.join(out_dir,'contributor_data.json'))
-    contributor_data.to_json(os.path.join(out_dir,'contributor_data.json'), date_format='iso')
-    citation_data['citations'].to_json(os.path.join(out_dir,'citation_data.json'))
+    citation_data = pubmed_data('SPSS')
+    depends_data = get_dependencies(repo_name, repo)
+    logging.info('got dependency data of type {}'.format(type(depends_data)))
+    logging.info('got dependency data:\n {}'.format(depends_data))
+    logging.info("output path: %s" % os.path.join(out_dir,
+                                                  'contributor_data.json'))
+    contributor_data.to_json(os.path.join(out_dir,
+                                          'contributor_data.json'),
+                             date_format='iso')
+    citation_data['citations'].to_json(os.path.join(out_dir,
+                                                    'citation_data.json'))
+    depends_data.to_json(os.path.join(out_dir, 'dependencies_data.json'))
 
 if __name__ == '__main__':
     main()
diff --git a/commit_opener/depsy.py b/commit_opener/depsy.py
new file mode 100644
index 0000000..a44e64d
--- /dev/null
+++ b/commit_opener/depsy.py
@@ -0,0 +1,133 @@
+import re
+import pickle
+import ast
+import os.path
+import errno
+import requests
+
+# """Functions from depsy"""
+
+
+def parse_requirements_txt(contents):
+    # see here for spec used in parsing the file:
+    # https://pip.readthedocs.org/en/1.1/requirements.html#the-requirements-file-format
+    # it doesn't mention the '#' comment but found it often in examples.
+    # not using this test str in  the function, just a handy place to keep it.
+    test_str = """# my comment
+file://blahblah
+foo==10.2
+baz>=3.6
+# other comment
+foo.bar>=3.33
+foo-bar==2.2
+foo_bar==1.1
+foo == 5.5
+.for some reason there is a dot sometimes
+--index-url blahblah
+-e http://blah
+  foo_with_space_in_front = 1.1"""
+
+    reqs = re.findall(
+        '^(?!file:|-|\.)\s*([\w\.-]+)',
+        contents,
+        re.MULTILINE | re.IGNORECASE
+    )
+    return sorted(reqs)
+
+
+def parse_setup_py(contents):
+    parsed = ast.parse(contents)
+    ret = []
+    # see ast docs: https://greentreesnakes.readthedocs.org/en/latest/index.html
+    for node in ast.walk(parsed):
+        try:
+            if node.func.id == "setup":
+                for keyword in node.keywords:
+                    if keyword.arg == "install_requires":
+                        print("found requirements in setup.py 'install_requires' arg")
+                        for elt in keyword.value.elts:
+                            ret.append(_clean_setup_req(elt.s))
+
+                    if keyword.arg == "requires":
+                        print("found requirements in setup.py 'requires' arg")
+                        for elt in keyword.value.elts:
+                            ret.append(_clean_setup_req(elt.s))
+
+                    if keyword.arg == "extras_require":
+                        print("found requirements in setup.py 'extras_require' arg")
+                        for my_list in keyword.value.values:
+                            for elt in my_list.elts:
+                                ret.append(_clean_setup_req(elt.s))
+
+        except AttributeError:
+            continue
+
+    return sorted(ret)
+
+
+class PythonStandardLibs():
+
+    def __init__(self):
+        self.url = "https://docs.python.org/2.7/py-modindex.html"
+        self.data_dir = os.path.join(os.path.dirname(__file__),
+                                     "../../data")
+
+        self.pickle_path = os.path.join(self.data_dir,
+                                        "python_standard_libs.pickle")
+        self.libs = None
+
+    def _mkdir(self):
+        try:
+            os.makedirs(self.data_dir)
+        except OSError as exp:
+            if exp.errno != errno.EEXIST:
+                raise
+        self.pickle_path = os.path.join(self.data_dir,
+                                        "python_standard_libs.pickle")
+
+    def retrieve_from_web(self):
+        # only needs to be used once ever, here for tidiness
+        # checked the result into source control as python_standard_libs.pickle
+        html = requests.get(self.url).text
+        exp = r'class="xref">([^<]+)'
+        matches = re.findall(exp, html)
+        self.libs = [m for m in matches if '.' not in m]
+
+    def pickle_libs(self):
+
+        if self.libs is None:
+            self.retrieve_from_web()
+
+        self._mkdir()
+        with open(self.pickle_path, "wb") as f:
+            pickle.dump(self.libs, f)
+
+        print("saved these to file: {}".format(self.libs))
+
+    def get(self):
+        if self.libs is None:
+            try:
+                with open(self.pickle_path, "rb") as f:
+                    print("Loading list of Stdandard Python Libraries from pickle file")
+                    self.libs = pickle.load(f)
+            except:
+                self.retrieve_from_web()
+                self.pickle_libs()
+
+    def clean(self):
+        try:
+            os.remove(self.pickle_path)
+        except:
+            pass
+
+
+def save_python_standard_libs(clean=False):
+    pystdlibs = PythonStandardLibs()
+    if clean:
+        pystdlibs.clean()
+    pystdlibs.get()
+
+    # to show the thing works
+    new_libs_obj = PythonStandardLibs()
+    new_libs_obj.get()
+    print("got these from pickled file: {}".format(new_libs_obj.libs))
diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py
index 5729bc8..38d8b87 100644
--- a/commit_opener/grab_dependencies.py
+++ b/commit_opener/grab_dependencies.py
@@ -1,8 +1,104 @@
 """
-Extract the dependencies from the repository 
+Extract the dependencies from the repository
 
 Issue:
 work out dependencies #3
 https://github.com/lbillingham/commit_opener/issues/3
 
-"""
\ No newline at end of file
+The key function is get_dependencies().
+
+"""
+import re
+import os
+import pandas
+
+from commit_opener import depsy
+from commit_opener import repo
+
+def catfile(filename):
+    """Get text contents of a file."""
+
+    with open(filename, 'r') as fhandle:
+        print("Opening file {} and reading contents".format(filename))
+        text = fhandle.read()
+    return text
+
+
+def get_dependencies(name, url):
+    """
+    Get the dependecies for a git repository or any local python package.
+
+    """
+    # Let's instantiate the repo object, so we can parse through it.
+    myrepo = repo.Repo(name, url)
+    print("Created a repository instance for {}".format(url))
+
+    # Extract a local copy
+    myrepo.extract_local_copy()
+    print("Local copy now available here: {}".format(myrepo.tmpdir))
+    myrepo._get_filelist()
+
+    # Note: the file has to be opened and read before passing to depsy
+    # functions.
+    if myrepo.has("requirements.txt"):
+        print("Repository has a requirements.txt file")
+        filetext = catfile(myrepo.has("requirements.txt"))
+        reqs = depsy.parse_requirements_txt(filetext)
+    elif myrepo.has("setup.py"):
+        print("Repository has a setup.py file")
+        filetext = catfile(myrepo.has("setup.py"))
+        reqs = depsy.parse_setup_py(filetext)
+        if len(reqs) < 1:
+            print("No reqs in setup file,"
+                  "so determining dependencies ourselves.")
+            reqs = search_files_for_imports(myrepo)
+    else:
+        # No standard descriptions of the dependencies so let's try to work
+        # them out for ourselves.
+        print("No req or setup file, so determining dependencies ourselves.")
+        reqs = search_files_for_imports(myrepo)
+
+    # Convert the list of requirements to a set.
+    reqs = set(reqs)
+    print("Found the following imports: {}".format("\n".join(reqs)))
+
+    # Get the list of standard packages so that these can be removed.
+    stdlibs = depsy.PythonStandardLibs()
+    stdlibs.get()
+    set_std_libs = set(stdlibs.libs)
+
+
+    data = pandas.Series(list(reqs-set_std_libs))
+    data.sort_values(inplace=True)
+    return data
+
+
+
+def search_files_for_imports(repo_instance):
+    """
+    Walk all the python files in the repository and extract the import info.
+
+    """
+    dep_list = []
+    for f in repo_instance.file_list:
+        if ".py" in f:
+            print("Looking in {} for imports".format(os.path.basename(f)))
+            filetext = catfile(f)
+            dep_list.extend(find_imports(filetext))
+
+    return dep_list
+
+
+def find_imports(text):
+    """Apply regular expression searching to a file"""
+    # list of regexes.
+    reexps = [re.compile('^[\si]+mport\s+(\w+)[\s\.]', re.MULTILINE),
+              re.compile('^[\sf]+rom\s+(\w+)[\s\.]+', re.MULTILINE)
+              ]
+    import_list = []
+    for myregex in reexps:
+        try:
+            import_list.extend(re.findall(myregex, text))
+        except AttributeError:
+            pass
+    return import_list
diff --git a/repo.py b/commit_opener/repo.py
similarity index 57%
rename from repo.py
rename to commit_opener/repo.py
index 96d9972..4c0ff8f 100644
--- a/repo.py
+++ b/commit_opener/repo.py
@@ -1,5 +1,5 @@
 """
-This module contains a class that allows us to interact with the 
+This module contains a class that allows us to interact with the
 repository of interest.
 
 """
@@ -7,89 +7,98 @@
 import shutil
 import subprocess
 import os
+import os.path
+
 
 class Repo(object):
     """
     Interact with a repository: attributes, extract a copy, cleanup.
-    
+
     This could maybe be re-written as a context manger so the cleanup happens
     automatically.
-    
+
     """
-    
+
     def __init__(self, name, url, rtype="git"):
         """
         name - Name of the project
         url - url of the project
         rtype - type of the repository. This will mostly be git.
-            
+
         """
         self.name = name
         self.url = url
         self.rtype = rtype
-        
+
         self.local_resources = []
         self.extracted = False
         self.file_list = []
         self.tmpdir = None
-            
-        
+
     def extract_local_copy(self):
         """Extract a local copy of the repository"""
-        
-        self.tmpdir = tempfile.mkdtemp()
-        self.local_resources.append(self.tmpdir)
-        if self.rtype is "git":
-            extract_cmd = "git clone {url} {odir}".format(url=self.url,
-                                                          odir=self.tmpdir)
-        else:
-            # We could implement SVN here, a quick svn export would do.
-            raise NotImplemented
-            
-        try:
-            subprocess.check_call(extract_cmd.split())
-        except subprocess.CalledProcessError:
-            raise IOError("Unable to extract a local copy of repository")
+        if "http" not in self.url:
+            if os.path.exists(self.url):
+                print("Repository exists locally")
+                self.tmpdir = self.url
+                self.extracted = True
+                return
+            else:
+                raise IOError("Path to repository doesn't exist")
+
         else:
-            self.extracted = True
-    
+            print("Extracting local copy of repository")
+            self.tmpdir = tempfile.mkdtemp()
+            self.local_resources.append(self.tmpdir)
+            print("Created temporary directory")
+            if self.rtype is "git":
+                extract_cmd = "git clone {url} {odir}".format(url=self.url,
+                                                              odir=self.tmpdir)
+            else:
+                # We could implement SVN here, a quick svn export would do.
+                raise NotImplemented
+
+            try:
+                subprocess.check_call(extract_cmd.split())
+            except subprocess.CalledProcessError:
+                raise IOError("Unable to extract a local copy of repository")
+            else:
+                self.extracted = True
+
     def has(self, filename):
         """
         Does the repository have a file matching a particular name? If it does
         then return the filename, otherwise return False.
-        
+
         """
         if not self.extracted:
             self.extract_local_copy()
-            
+
         if not self.file_list:
             self._get_filelist()
-            
+
         for f in self.file_list:
             if filename in f:
                 return f
-                
+
         return False
-            
+
     def _get_filelist(self):
         """Just get a list of the files in the repo."""
-        
+
         if not self.extracted:
-            self.extract_local_copy()        
-        
+            self.extract_local_copy()
+
         for root, dirs, files in os.walk(self.tmpdir, topdown=True):
             for name in files:
+                print(os.path.join(root, name))
                 self.file_list.append(os.path.join(root, name))
-        
-            
+
     def cleanup(self):
         """Remove any local resources"""
-        
+
         for resource in self.local_resources:
             try:
                 shutil.rmtree(resource)
             except:
-                print "Unable to remove: {}".format(resource)
-
-
-
+                print("Unable to remove: {}".format(resource))
diff --git a/grab_dependencies.py b/grab_dependencies.py
deleted file mode 100644
index af5cc0f..0000000
--- a/grab_dependencies.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
-Extract the dependencies from the repository 
-
-Issue:
-work out dependencies #3
-https://github.com/lbillingham/commit_opener/issues/3
-
-"""
-
-def catfile(filename):
-    """Get text contents of a file."""
-    
-    with open(filename, 'r') as fhandle:
-        return "\n".join(fhandle.read())
-    
-
-def get_dependencies(name, url):
-    
-    # Let's instantiate the repo object, so we can parse through it.
-    myrepo = repo.Repo(name, url)
-    
-    # Extract a local copy
-    myrepo.extract_local_copy()
-
-    # Note: the file has to be opened and read before passing to depsy 
-    # functions.
-    if myrepo.has("requirements.txt"):
-        filetext = catfile(myrepo.has("requirements.txt"))    
-        reqs = depsy.models.python(filetext)
-    elif myrepo.has("setup.py"):
-        filetext = catfile(myrepo.has("setup.py"))    
-        reqs = depsy.models.parse_setup_py(filetext)
-    else:
-        # No standard descriptions of the dependencies so let's try to work 
-        # them out for ourselves.
-        pass
-
-
diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py
new file mode 100644
index 0000000..757985d
--- /dev/null
+++ b/tests/test_grab_dependencies.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+test_commit_opener
+----------------------------------
+
+Tests for `commit_opener` module.
+"""
+
+import commit_opener.grab_dependencies as co_grab
+
+
+def test_import_search():
+    text = ("""
+import os
+import scipy
+import pandas
+from numpy import something
+import matplotlib.pyplot as plt
+""")
+    # Ordering matters here. Normal imports done first, then froms.
+    expected = ['os', 'scipy', 'pandas', 'matplotlib', 'numpy']
+    assert expected == co_grab.find_imports(text)
+
+
+def test_commented():
+    text = ("""
+import os
+#import scipy
+""")
+    expected = ['os']
+    assert expected == co_grab.find_imports(text)
+
+
+def test_indented():
+    text = ("""
+import os
+    import scipy
+""")
+    expected = ['os', 'scipy']
+    assert expected == co_grab.find_imports(text)