From 9234e88060a93082324b942f3400d37742fe48ea Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 11:59:21 +0000 Subject: [PATCH 01/51] Lots of changes including tests of the import regexs and print statements. --- AUTHORS.rst | 2 +- commit_opener/grab_dependencies.py | 68 +++++++++++++++++++++++++++++- repo.py => commit_opener/repo.py | 41 +++++++++++------- grab_dependencies.py | 38 ----------------- tests/test_grab_dependencies.py | 40 ++++++++++++++++++ 5 files changed, 134 insertions(+), 55 deletions(-) rename repo.py => commit_opener/repo.py (64%) delete mode 100644 grab_dependencies.py create mode 100644 tests/test_grab_dependencies.py diff --git a/AUTHORS.rst b/AUTHORS.rst index 0f98385..abd11dd 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -9,7 +9,7 @@ Software Sustainability Institute Hackday 2016 Team * Laurence Billingham * Martin Hammitzsch * Steve Harris -* Craig MacLachlan +* Craig MacLachlan Contributors ------------ diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 5729bc8..f9fa698 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -5,4 +5,70 @@ work out dependencies #3 https://github.com/lbillingham/commit_opener/issues/3 -""" \ No newline at end of file +""" +import re + +import depsy.models +import commit_opener.repo + +def catfile(filename): + """Get text contents of a file.""" + + with open(filename, 'r') as fhandle: + print("Opening file {} and reading contents".format(filename)) + return "\n".join(fhandle.read()) + + +def get_dependencies(name, url): + + # Let's instantiate the repo object, so we can parse through it. + myrepo = commit_opener.repo.Repo(name, url) + print("Created a repository instance for {}".format(url)) + + # Extract a local copy + myrepo.extract_local_copy() + print("Local copy now available here: {}".format(myrepo.tmpdir)) + + # Note: the file has to be opened and read before passing to depsy + # functions. + if myrepo.has("requirements.txt"): + print("Repository has a requirements.txt file") + filetext = catfile(myrepo.has("requirements.txt")) + reqs = depsy.models.python(filetext) + elif myrepo.has("setup.py"): + print("Repository has a setup.py file") + filetext = catfile(myrepo.has("setup.py")) + reqs = depsy.models.parse_setup_py(filetext) + else: + # No standard descriptions of the dependencies so let's try to work + # them out for ourselves. + print("No req or setup file, so determining dependencies ourselves.") + reqs = search_files_for_imports(myrepo) + + print("Found the following imports: {}".format("\n".join(reqs))) + +def search_files_for_imports(repo_instance): + """ + Walk all the python files in the repository and extract the import info. + + """ + dep_list = [] + for f in repo_instance.file_list: + if ".py" in f: + print("Looking in {} for imports".format(os.basename(f))) + filetext = catfile(f) + dep_list.extend(find_imports(filetext)) + + return dep_list + + +def find_imports(text): + """Apply regular expression searching to a file""" + # list of regexes + reexps = [re.compile(r'^import\s+(\w+)[\s\.\w]+'), + re.compile(r'^from\s+(\w+)[\s\.\w]+import') + ] + import_list = [] + for myregex in reexps: + import_list.extend(re.findall(myregex, text)) + return import_list diff --git a/repo.py b/commit_opener/repo.py similarity index 64% rename from repo.py rename to commit_opener/repo.py index 96d9972..f92e095 100644 --- a/repo.py +++ b/commit_opener/repo.py @@ -36,22 +36,33 @@ def __init__(self, name, url, rtype="git"): def extract_local_copy(self): """Extract a local copy of the repository""" - - self.tmpdir = tempfile.mkdtemp() - self.local_resources.append(self.tmpdir) - if self.rtype is "git": - extract_cmd = "git clone {url} {odir}".format(url=self.url, - odir=self.tmpdir) - else: - # We could implement SVN here, a quick svn export would do. - raise NotImplemented - - try: - subprocess.check_call(extract_cmd.split()) - except subprocess.CalledProcessError: - raise IOError("Unable to extract a local copy of repository") + if "http" not in self.url: + if os.path.exists(self.url): + print("Repository exists locally") + self.tmpdir = self.url + self.extracted = True + return + else: + raise IOError("Path to repository doesn't exist") + else: - self.extracted = True + print("Extracting local copy of repository") + self.tmpdir = tempfile.mkdtemp() + self.local_resources.append(self.tmpdir) + print("Created temporary directory") + if self.rtype is "git": + extract_cmd = "git clone {url} {odir}".format(url=self.url, + odir=self.tmpdir) + else: + # We could implement SVN here, a quick svn export would do. + raise NotImplemented + + try: + subprocess.check_call(extract_cmd.split()) + except subprocess.CalledProcessError: + raise IOError("Unable to extract a local copy of repository") + else: + self.extracted = True def has(self, filename): """ diff --git a/grab_dependencies.py b/grab_dependencies.py deleted file mode 100644 index af5cc0f..0000000 --- a/grab_dependencies.py +++ /dev/null @@ -1,38 +0,0 @@ -""" -Extract the dependencies from the repository - -Issue: -work out dependencies #3 -https://github.com/lbillingham/commit_opener/issues/3 - -""" - -def catfile(filename): - """Get text contents of a file.""" - - with open(filename, 'r') as fhandle: - return "\n".join(fhandle.read()) - - -def get_dependencies(name, url): - - # Let's instantiate the repo object, so we can parse through it. - myrepo = repo.Repo(name, url) - - # Extract a local copy - myrepo.extract_local_copy() - - # Note: the file has to be opened and read before passing to depsy - # functions. - if myrepo.has("requirements.txt"): - filetext = catfile(myrepo.has("requirements.txt")) - reqs = depsy.models.python(filetext) - elif myrepo.has("setup.py"): - filetext = catfile(myrepo.has("setup.py")) - reqs = depsy.models.parse_setup_py(filetext) - else: - # No standard descriptions of the dependencies so let's try to work - # them out for ourselves. - pass - - diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py new file mode 100644 index 0000000..0fca150 --- /dev/null +++ b/tests/test_grab_dependencies.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +test_commit_opener +---------------------------------- + +Tests for `commit_opener` module. +""" + +import commit_opener.grab_dependencies as co_grab + + +def test_import_search(self): + text = """ +import os +import scipy +import pandas +from numpy import something +import matplotlib.pyplot as plt +""" + expected = ['os', 'scipy', 'numpy', 'matplotlib'] + assert expected == co_grab.find_imports(text) + +def test_commented(self): + text = """ +import os +#import scipy +""" + expected = ['os'] + assert expected == co_grab.find_imports(text) + +def test_indented(self): + text = """ +import os +import scipy +""" + expected = ['os'] + assert expected == find_imports(text) + \ No newline at end of file From 99331a8ad462cdfff7f0b9d50ed6e75dc37d61b0 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 12:46:45 +0000 Subject: [PATCH 02/51] Fixed the depsy module call. It's not a proper module so can't be imported normally. --- commit_opener/grab_dependencies.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index f9fa698..4c7072e 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -5,10 +5,14 @@ work out dependencies #3 https://github.com/lbillingham/commit_opener/issues/3 +The key function is get_dependencies(). + """ import re -import depsy.models +# THis is an import depsy, but it's not a proper package. +import models as depsymodels + import commit_opener.repo def catfile(filename): @@ -20,7 +24,10 @@ def catfile(filename): def get_dependencies(name, url): + """ + Get the dependecies for a git repository or any local python package. + """ # Let's instantiate the repo object, so we can parse through it. myrepo = commit_opener.repo.Repo(name, url) print("Created a repository instance for {}".format(url)) @@ -34,11 +41,11 @@ def get_dependencies(name, url): if myrepo.has("requirements.txt"): print("Repository has a requirements.txt file") filetext = catfile(myrepo.has("requirements.txt")) - reqs = depsy.models.python(filetext) + reqs = depsymodels.python(filetext) elif myrepo.has("setup.py"): print("Repository has a setup.py file") filetext = catfile(myrepo.has("setup.py")) - reqs = depsy.models.parse_setup_py(filetext) + reqs = depsymodels.parse_setup_py(filetext) else: # No standard descriptions of the dependencies so let's try to work # them out for ourselves. From 23f9a9750443012dd89261b792f406431e3a1ccf Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 13:15:20 +0000 Subject: [PATCH 03/51] Testing something. --- commit_opener/grab_dependencies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 4c7072e..b1e9fd7 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -13,7 +13,8 @@ # THis is an import depsy, but it's not a proper package. import models as depsymodels -import commit_opener.repo +#import commit_opener.repo +import repo def catfile(filename): """Get text contents of a file.""" From 37a08e4062cf11913c55d7f325153b673db426c1 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 13:19:05 +0000 Subject: [PATCH 04/51] Removed redundant selfs. --- tests/test_grab_dependencies.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py index 0fca150..00ff501 100644 --- a/tests/test_grab_dependencies.py +++ b/tests/test_grab_dependencies.py @@ -11,7 +11,7 @@ import commit_opener.grab_dependencies as co_grab -def test_import_search(self): +def test_import_search(): text = """ import os import scipy @@ -22,7 +22,7 @@ def test_import_search(self): expected = ['os', 'scipy', 'numpy', 'matplotlib'] assert expected == co_grab.find_imports(text) -def test_commented(self): +def test_commented(): text = """ import os #import scipy @@ -30,7 +30,7 @@ def test_commented(self): expected = ['os'] assert expected == co_grab.find_imports(text) -def test_indented(self): +def test_indented(): text = """ import os import scipy From eeddbe8b7faf7d1fd0cdd26d14dcc4e9c39262c7 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 13:33:57 +0000 Subject: [PATCH 05/51] Fixed regex searching. --- commit_opener/grab_dependencies.py | 4 ++-- tests/test_grab_dependencies.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index b1e9fd7..eb444b0 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -73,8 +73,8 @@ def search_files_for_imports(repo_instance): def find_imports(text): """Apply regular expression searching to a file""" # list of regexes - reexps = [re.compile(r'^import\s+(\w+)[\s\.\w]+'), - re.compile(r'^from\s+(\w+)[\s\.\w]+import') + reexps = [re.compile(r'^import\s+(\w+)[\s\.\w]+', re.MULTILINE), + re.compile(r'^from\s+(\w+)[\s\.\w]+import', re.MULTILINE) ] import_list = [] for myregex in reexps: diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py index 00ff501..678dfd7 100644 --- a/tests/test_grab_dependencies.py +++ b/tests/test_grab_dependencies.py @@ -20,6 +20,7 @@ def test_import_search(): import matplotlib.pyplot as plt """ expected = ['os', 'scipy', 'numpy', 'matplotlib'] + print co_grab.find_imports(text) assert expected == co_grab.find_imports(text) def test_commented(): @@ -28,6 +29,7 @@ def test_commented(): #import scipy """ expected = ['os'] + print co_grab.find_imports(text) assert expected == co_grab.find_imports(text) def test_indented(): @@ -36,5 +38,6 @@ def test_indented(): import scipy """ expected = ['os'] + print co_grab.find_imports(text) assert expected == find_imports(text) \ No newline at end of file From e286e0e9a43f40da76121f4e386a29b5d1ca303d Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 13:45:25 +0000 Subject: [PATCH 06/51] Fixed test results. They were wrong, not the code. --- tests/test_grab_dependencies.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py index 678dfd7..e919b3e 100644 --- a/tests/test_grab_dependencies.py +++ b/tests/test_grab_dependencies.py @@ -19,7 +19,7 @@ def test_import_search(): from numpy import something import matplotlib.pyplot as plt """ - expected = ['os', 'scipy', 'numpy', 'matplotlib'] + expected = ['os', 'scipy', 'pandas', 'numpy', 'matplotlib'] print co_grab.find_imports(text) assert expected == co_grab.find_imports(text) @@ -40,4 +40,9 @@ def test_indented(): expected = ['os'] print co_grab.find_imports(text) assert expected == find_imports(text) + +test_import_search() +test_commented() +test_indented() + \ No newline at end of file From ff2d31c784915f7dbfbb0b5e1e8856cb26751a20 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 13:46:24 +0000 Subject: [PATCH 07/51] Fixed regex code. --- commit_opener/grab_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index eb444b0..b4fb916 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -74,7 +74,7 @@ def find_imports(text): """Apply regular expression searching to a file""" # list of regexes reexps = [re.compile(r'^import\s+(\w+)[\s\.\w]+', re.MULTILINE), - re.compile(r'^from\s+(\w+)[\s\.\w]+import', re.MULTILINE) + re.compile(r'^from\s+(\w+)', re.MULTILINE) ] import_list = [] for myregex in reexps: From 92e6285f538f5be6f9161fe442d2d79c550e15c5 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 13:55:51 +0000 Subject: [PATCH 08/51] Another regex fix. --- commit_opener/grab_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index b4fb916..6ad320e 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -73,7 +73,7 @@ def search_files_for_imports(repo_instance): def find_imports(text): """Apply regular expression searching to a file""" # list of regexes - reexps = [re.compile(r'^import\s+(\w+)[\s\.\w]+', re.MULTILINE), + reexps = [re.compile(r'^import\s+(\w+)', re.MULTILINE), re.compile(r'^from\s+(\w+)', re.MULTILINE) ] import_list = [] From c45627525fc32c6e079649324e87bf38a0ac253b Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 13:57:34 +0000 Subject: [PATCH 09/51] Fix expected test results. --- tests/test_grab_dependencies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py index e919b3e..fb10cde 100644 --- a/tests/test_grab_dependencies.py +++ b/tests/test_grab_dependencies.py @@ -19,7 +19,8 @@ def test_import_search(): from numpy import something import matplotlib.pyplot as plt """ - expected = ['os', 'scipy', 'pandas', 'numpy', 'matplotlib'] + # Ordering matters here. Normal imports done first, then froms. + expected = ['os', 'scipy', 'pandas', 'matplotlib', 'numpy'] print co_grab.find_imports(text) assert expected == co_grab.find_imports(text) From 6479f4d276ff802605e7d9b5ba56dd0177199fb0 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 13:58:13 +0000 Subject: [PATCH 10/51] More fixing --- tests/test_grab_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py index fb10cde..35cd0c9 100644 --- a/tests/test_grab_dependencies.py +++ b/tests/test_grab_dependencies.py @@ -40,7 +40,7 @@ def test_indented(): """ expected = ['os'] print co_grab.find_imports(text) - assert expected == find_imports(text) + assert expected == co_grab.find_imports(text) test_import_search() test_commented() From 1a6efbd18ac4ca83298c55760bf5b079838bf906 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 13:58:56 +0000 Subject: [PATCH 11/51] Fixy fixy --- tests/test_grab_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py index 35cd0c9..53970ef 100644 --- a/tests/test_grab_dependencies.py +++ b/tests/test_grab_dependencies.py @@ -36,7 +36,7 @@ def test_commented(): def test_indented(): text = """ import os -import scipy + import scipy """ expected = ['os'] print co_grab.find_imports(text) From 2026cebef7579bbf7831c7cb58655e03f7a7cb7a Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 13:59:54 +0000 Subject: [PATCH 12/51] Removed testing bits. --- tests/test_grab_dependencies.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py index 53970ef..a861110 100644 --- a/tests/test_grab_dependencies.py +++ b/tests/test_grab_dependencies.py @@ -21,7 +21,6 @@ def test_import_search(): """ # Ordering matters here. Normal imports done first, then froms. expected = ['os', 'scipy', 'pandas', 'matplotlib', 'numpy'] - print co_grab.find_imports(text) assert expected == co_grab.find_imports(text) def test_commented(): @@ -30,7 +29,6 @@ def test_commented(): #import scipy """ expected = ['os'] - print co_grab.find_imports(text) assert expected == co_grab.find_imports(text) def test_indented(): @@ -39,11 +37,5 @@ def test_indented(): import scipy """ expected = ['os'] - print co_grab.find_imports(text) assert expected == co_grab.find_imports(text) -test_import_search() -test_commented() -test_indented() - - \ No newline at end of file From 979645cc958339868791715f87c096724082ae97 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:03:19 +0000 Subject: [PATCH 13/51] Fixed module referencing. --- commit_opener/grab_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 6ad320e..49484f5 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -30,7 +30,7 @@ def get_dependencies(name, url): """ # Let's instantiate the repo object, so we can parse through it. - myrepo = commit_opener.repo.Repo(name, url) + myrepo = repo.Repo(name, url) print("Created a repository instance for {}".format(url)) # Extract a local copy From ce261be23470fbdd3ea74c8a4985435a3525c210 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:10:46 +0000 Subject: [PATCH 14/51] Brought Depsy functions into package. --- commit_opener/depsy.py | 100 +++++++++++++++++++++++++++++ commit_opener/grab_dependencies.py | 7 +- 2 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 commit_opener/depsy.py diff --git a/commit_opener/depsy.py b/commit_opener/depsy.py new file mode 100644 index 0000000..e67b5d5 --- /dev/null +++ b/commit_opener/depsy.py @@ -0,0 +1,100 @@ +import re +import requests +import pickle +import ast +from pathlib import Path + + + +"""Functions from depsy""" +def parse_requirements_txt(contents): + # see here for spec used in parsing the file: + # https://pip.readthedocs.org/en/1.1/requirements.html#the-requirements-file-format + # it doesn't mention the '#' comment but found it often in examples. + # not using this test str in the function, just a handy place to keep it. + test_str = """# my comment +file://blahblah +foo==10.2 +baz>=3.6 +# other comment +foo.bar>=3.33 +foo-bar==2.2 +foo_bar==1.1 +foo == 5.5 +.for some reason there is a dot sometimes +--index-url blahblah +-e http://blah + foo_with_space_in_front = 1.1""" + + reqs = re.findall( + r'^(?!file:|-|\.)\s*([\w\.-]+)', + contents, + re.MULTILINE | re.IGNORECASE + ) + return sorted(reqs) + + +def parse_setup_py(contents): + parsed = ast.parse(contents) + ret = [] + # see ast docs: https://greentreesnakes.readthedocs.org/en/latest/index.html + for node in ast.walk(parsed): + try: + if node.func.id == "setup": + for keyword in node.keywords: + if keyword.arg=="install_requires": + print "found requirements in setup.py 'install_requires' arg" + for elt in keyword.value.elts: + ret.append(_clean_setup_req(elt.s)) + + if keyword.arg=="requires": + print "found requirements in setup.py 'requires' arg" + for elt in keyword.value.elts: + ret.append(_clean_setup_req(elt.s)) + + if keyword.arg == "extras_require": + print "found requirements in setup.py 'extras_require' arg" + for my_list in keyword.value.values: + for elt in my_list.elts: + ret.append(_clean_setup_req(elt.s)) + + except AttributeError: + continue + +return sorted(ret) + + + +class PythonStandardLibs(): + """ NOT using yet but will be useful to remove standard libs""" + url = "https://docs.python.org/2.7/py-modindex.html" + data_dir = Path(__file__, "../../data").resolve() + pickle_path = Path(data_dir, "python_standard_libs.pickle") + + @classmethod + def save_from_web(cls): + # only needs to be used once ever, here for tidiness + # checked the result into source control as python_standard_libs.pickle + html = requests.get(cls.url).text + exp = r'([^<]+)' + matches = re.findall(exp, html) + libs = [m for m in matches if '.' not in m] + + with open(str(cls.pickle_path), "w") as f: + pickle.dump(libs, f) + + print "saved these to file: {}".format(libs) + + @classmethod + def get(cls): + with open(str(cls.pickle_path), "r") as f: + return pickle.load(f) + + +def save_python_standard_libs(): + PythonStandardLibs.save_from_web() + + # to show the thing works + print "got these from pickled file: {}".format(PythonStandardLibs.get()) + + diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 49484f5..5b28185 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -10,8 +10,7 @@ """ import re -# THis is an import depsy, but it's not a proper package. -import models as depsymodels +import depsy #import commit_opener.repo import repo @@ -42,11 +41,11 @@ def get_dependencies(name, url): if myrepo.has("requirements.txt"): print("Repository has a requirements.txt file") filetext = catfile(myrepo.has("requirements.txt")) - reqs = depsymodels.python(filetext) + reqs = depsy.parse_requirements_txt(filetext) elif myrepo.has("setup.py"): print("Repository has a setup.py file") filetext = catfile(myrepo.has("setup.py")) - reqs = depsymodels.parse_setup_py(filetext) + reqs = depsy.parse_setup_py(filetext) else: # No standard descriptions of the dependencies so let's try to work # them out for ourselves. From 5534d6ca7a375ad8cbd6c576e7c9077c7036eed7 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:11:50 +0000 Subject: [PATCH 15/51] Fix indent. --- commit_opener/depsy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit_opener/depsy.py b/commit_opener/depsy.py index e67b5d5..4f67ed6 100644 --- a/commit_opener/depsy.py +++ b/commit_opener/depsy.py @@ -61,7 +61,7 @@ def parse_setup_py(contents): except AttributeError: continue -return sorted(ret) + return sorted(ret) From 7ca2f84fc9d96e6a3362631741732a7addffe3d5 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:13:19 +0000 Subject: [PATCH 16/51] Removed missing import. --- commit_opener/depsy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/commit_opener/depsy.py b/commit_opener/depsy.py index 4f67ed6..5d16ed2 100644 --- a/commit_opener/depsy.py +++ b/commit_opener/depsy.py @@ -1,5 +1,4 @@ import re -import requests import pickle import ast from pathlib import Path From c1ca333d78f89ecadde9cb67a9fa02dbd1ec1be2 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:14:29 +0000 Subject: [PATCH 17/51] Removed extaneous code. --- commit_opener/depsy.py | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/commit_opener/depsy.py b/commit_opener/depsy.py index 5d16ed2..fddee3e 100644 --- a/commit_opener/depsy.py +++ b/commit_opener/depsy.py @@ -1,7 +1,7 @@ import re import pickle import ast -from pathlib import Path + @@ -64,36 +64,3 @@ def parse_setup_py(contents): -class PythonStandardLibs(): - """ NOT using yet but will be useful to remove standard libs""" - url = "https://docs.python.org/2.7/py-modindex.html" - data_dir = Path(__file__, "../../data").resolve() - pickle_path = Path(data_dir, "python_standard_libs.pickle") - - @classmethod - def save_from_web(cls): - # only needs to be used once ever, here for tidiness - # checked the result into source control as python_standard_libs.pickle - html = requests.get(cls.url).text - exp = r'([^<]+)' - matches = re.findall(exp, html) - libs = [m for m in matches if '.' not in m] - - with open(str(cls.pickle_path), "w") as f: - pickle.dump(libs, f) - - print "saved these to file: {}".format(libs) - - @classmethod - def get(cls): - with open(str(cls.pickle_path), "r") as f: - return pickle.load(f) - - -def save_python_standard_libs(): - PythonStandardLibs.save_from_web() - - # to show the thing works - print "got these from pickled file: {}".format(PythonStandardLibs.get()) - - From cbb5adfff8785bf45c16dcb182d035dce220c206 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:21:23 +0000 Subject: [PATCH 18/51] Scraping requirements doesn't work on our repo. --- commit_opener/grab_dependencies.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 5b28185..ae02ff0 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -38,11 +38,12 @@ def get_dependencies(name, url): # Note: the file has to be opened and read before passing to depsy # functions. - if myrepo.has("requirements.txt"): - print("Repository has a requirements.txt file") - filetext = catfile(myrepo.has("requirements.txt")) - reqs = depsy.parse_requirements_txt(filetext) - elif myrepo.has("setup.py"): +# if myrepo.has("requirements.txt"): +# print("Repository has a requirements.txt file") +# filetext = catfile(myrepo.has("requirements.txt")) +# reqs = depsy.parse_requirements_txt(filetext) +# elif myrepo.has("setup.py"): + if myrepo.has("setup.py"): print("Repository has a setup.py file") filetext = catfile(myrepo.has("setup.py")) reqs = depsy.parse_setup_py(filetext) From 9a2ce789f760c0c8561db8ac7a1189544046b7ec Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:22:41 +0000 Subject: [PATCH 19/51] Setup.py doesn't work either. --- commit_opener/grab_dependencies.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index ae02ff0..34cebbe 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -43,15 +43,14 @@ def get_dependencies(name, url): # filetext = catfile(myrepo.has("requirements.txt")) # reqs = depsy.parse_requirements_txt(filetext) # elif myrepo.has("setup.py"): - if myrepo.has("setup.py"): - print("Repository has a setup.py file") - filetext = catfile(myrepo.has("setup.py")) - reqs = depsy.parse_setup_py(filetext) - else: +# print("Repository has a setup.py file") +# filetext = catfile(myrepo.has("setup.py")) +# reqs = depsy.parse_setup_py(filetext) +# else: # No standard descriptions of the dependencies so let's try to work # them out for ourselves. - print("No req or setup file, so determining dependencies ourselves.") - reqs = search_files_for_imports(myrepo) + print("No req or setup file, so determining dependencies ourselves.") + reqs = search_files_for_imports(myrepo) print("Found the following imports: {}".format("\n".join(reqs))) From 7df78d80f91ac5d53122041370b0dabce884dd16 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:26:42 +0000 Subject: [PATCH 20/51] testing --- commit_opener/grab_dependencies.py | 1 + commit_opener/repo.py | 1 + 2 files changed, 2 insertions(+) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 34cebbe..1d29e63 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -35,6 +35,7 @@ def get_dependencies(name, url): # Extract a local copy myrepo.extract_local_copy() print("Local copy now available here: {}".format(myrepo.tmpdir)) + print myrepo.file_list # Note: the file has to be opened and read before passing to depsy # functions. diff --git a/commit_opener/repo.py b/commit_opener/repo.py index f92e095..00a2722 100644 --- a/commit_opener/repo.py +++ b/commit_opener/repo.py @@ -90,6 +90,7 @@ def _get_filelist(self): for root, dirs, files in os.walk(self.tmpdir, topdown=True): for name in files: + print os.path.join(root, name) self.file_list.append(os.path.join(root, name)) From a9e7e3274b6bc7a112b5188f3b5cec3c21459d20 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:28:20 +0000 Subject: [PATCH 21/51] get the filelist manually. --- commit_opener/grab_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 1d29e63..b0d18a2 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -35,7 +35,7 @@ def get_dependencies(name, url): # Extract a local copy myrepo.extract_local_copy() print("Local copy now available here: {}".format(myrepo.tmpdir)) - print myrepo.file_list + myrepo._get_filelist() # Note: the file has to be opened and read before passing to depsy # functions. From 9ea94c193a19599bbd32570931efff2d5a35bd5f Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:29:43 +0000 Subject: [PATCH 22/51] corrected import. --- commit_opener/grab_dependencies.py | 2 +- commit_opener/repo.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index b0d18a2..4dd0e07 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -9,7 +9,7 @@ """ import re - +import os import depsy #import commit_opener.repo diff --git a/commit_opener/repo.py b/commit_opener/repo.py index 00a2722..9cf25cd 100644 --- a/commit_opener/repo.py +++ b/commit_opener/repo.py @@ -7,6 +7,7 @@ import shutil import subprocess import os +import os.path class Repo(object): """ From 24a13a6ddfabebd66fd1efe22e80c0c15bbcb150 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:30:32 +0000 Subject: [PATCH 23/51] fixed basename command. --- commit_opener/grab_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 4dd0e07..dc9f46d 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -63,7 +63,7 @@ def search_files_for_imports(repo_instance): dep_list = [] for f in repo_instance.file_list: if ".py" in f: - print("Looking in {} for imports".format(os.basename(f))) + print("Looking in {} for imports".format(os.path.basename(f))) filetext = catfile(f) dep_list.extend(find_imports(filetext)) From 11e6f6a4fa82c23bbc9b392cc1c9b06bf06d4b24 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:31:58 +0000 Subject: [PATCH 24/51] Need to allow leading whitespace. --- commit_opener/grab_dependencies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index dc9f46d..781cc47 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -73,8 +73,8 @@ def search_files_for_imports(repo_instance): def find_imports(text): """Apply regular expression searching to a file""" # list of regexes - reexps = [re.compile(r'^import\s+(\w+)', re.MULTILINE), - re.compile(r'^from\s+(\w+)', re.MULTILINE) + reexps = [re.compile(r'^\w+import\s+(\w+)', re.MULTILINE), + re.compile(r'^\w+from\s+(\w+)', re.MULTILINE) ] import_list = [] for myregex in reexps: From ef3072c7e12d3f1e187f375becba85ddcdf99744 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:34:19 +0000 Subject: [PATCH 25/51] Need the right symbol for whitepace. --- commit_opener/grab_dependencies.py | 4 ++-- tests/test_grab_dependencies.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 781cc47..d11b6c1 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -73,8 +73,8 @@ def search_files_for_imports(repo_instance): def find_imports(text): """Apply regular expression searching to a file""" # list of regexes - reexps = [re.compile(r'^\w+import\s+(\w+)', re.MULTILINE), - re.compile(r'^\w+from\s+(\w+)', re.MULTILINE) + reexps = [re.compile(r'^\s+import\s+(\w+)', re.MULTILINE), + re.compile(r'^\s+from\s+(\w+)', re.MULTILINE) ] import_list = [] for myregex in reexps: diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py index a861110..795854d 100644 --- a/tests/test_grab_dependencies.py +++ b/tests/test_grab_dependencies.py @@ -36,6 +36,6 @@ def test_indented(): import os import scipy """ - expected = ['os'] + expected = ['os', 'scipy'] assert expected == co_grab.find_imports(text) From 5e484f9d5dc9b535a0ae80dad18ed6aff0bbc505 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:39:40 +0000 Subject: [PATCH 26/51] Removed start of line symbol from regex. --- commit_opener/grab_dependencies.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index d11b6c1..a628ecb 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -72,9 +72,9 @@ def search_files_for_imports(repo_instance): def find_imports(text): """Apply regular expression searching to a file""" - # list of regexes - reexps = [re.compile(r'^\s+import\s+(\w+)', re.MULTILINE), - re.compile(r'^\s+from\s+(\w+)', re.MULTILINE) + # list of regexes. Strat of line "^" doesn't work for some reason. + reexps = [re.compile(r'\s+import\s+(\w+)', re.MULTILINE), + re.compile(r'\s+from\s+(\w+)', re.MULTILINE) ] import_list = [] for myregex in reexps: From 88e8bdb2683677bc5eb66360c014441de43e677d Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:40:51 +0000 Subject: [PATCH 27/51] remove whitespace. --- commit_opener/grab_dependencies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index a628ecb..4e6edb8 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -73,8 +73,8 @@ def search_files_for_imports(repo_instance): def find_imports(text): """Apply regular expression searching to a file""" # list of regexes. Strat of line "^" doesn't work for some reason. - reexps = [re.compile(r'\s+import\s+(\w+)', re.MULTILINE), - re.compile(r'\s+from\s+(\w+)', re.MULTILINE) + reexps = [re.compile(r'import\s+(\w+)', re.MULTILINE), + re.compile(r'from\s+(\w+)', re.MULTILINE) ] import_list = [] for myregex in reexps: From d10513c53488a44830e54ed163b010ace029b41e Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:46:47 +0000 Subject: [PATCH 28/51] Fixed regex again! --- commit_opener/grab_dependencies.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 4e6edb8..5d7c762 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -73,10 +73,11 @@ def search_files_for_imports(repo_instance): def find_imports(text): """Apply regular expression searching to a file""" # list of regexes. Strat of line "^" doesn't work for some reason. - reexps = [re.compile(r'import\s+(\w+)', re.MULTILINE), - re.compile(r'from\s+(\w+)', re.MULTILINE) + reexps = [re.compile(r'^[\si]mport\s+(\w+)'), + re.compile(r'^[\sf]+rom\s+(\w+)') ] import_list = [] for myregex in reexps: - import_list.extend(re.findall(myregex, text)) + for line in text.split('\n'): + import_list.append(re.match(myregex, text).group(1)) return import_list From e45e42b826f88726f014ee27aa103bac4de0dcbb Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:48:08 +0000 Subject: [PATCH 29/51] protect lines that don't import. --- commit_opener/grab_dependencies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 5d7c762..f41e004 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -79,5 +79,6 @@ def find_imports(text): import_list = [] for myregex in reexps: for line in text.split('\n'): - import_list.append(re.match(myregex, text).group(1)) + if 'import' in line: + import_list.append(re.match(myregex, text).group(1)) return import_list From eec68819c00fff94afc71a22941a26e7d509a049 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:49:13 +0000 Subject: [PATCH 30/51] add print --- commit_opener/grab_dependencies.py | 1 + 1 file changed, 1 insertion(+) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index f41e004..6f734fe 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -80,5 +80,6 @@ def find_imports(text): for myregex in reexps: for line in text.split('\n'): if 'import' in line: + print line import_list.append(re.match(myregex, text).group(1)) return import_list From 0030bd8e6fef66459a2bfd222d4bca7270e1e223 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:50:08 +0000 Subject: [PATCH 31/51] more diag --- commit_opener/grab_dependencies.py | 1 + 1 file changed, 1 insertion(+) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 6f734fe..4c10482 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -79,6 +79,7 @@ def find_imports(text): import_list = [] for myregex in reexps: for line in text.split('\n'): + print line if 'import' in line: print line import_list.append(re.match(myregex, text).group(1)) From f7b522e655baa80570feed2abf8937cbca812c0d Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:51:54 +0000 Subject: [PATCH 32/51] change spliting --- commit_opener/grab_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 4c10482..967fa91 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -78,7 +78,7 @@ def find_imports(text): ] import_list = [] for myregex in reexps: - for line in text.split('\n'): + for line in text.split(): print line if 'import' in line: print line From 53f83c0244ee3ae46d68f8cbb7ddb0404a90ab15 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:53:47 +0000 Subject: [PATCH 33/51] remove split? --- commit_opener/grab_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 967fa91..92347e0 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -78,7 +78,7 @@ def find_imports(text): ] import_list = [] for myregex in reexps: - for line in text.split(): + for line in text: print line if 'import' in line: print line From be74c8635adae3a7201f358d8869d0c4e85d1c2b Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:54:48 +0000 Subject: [PATCH 34/51] print the text --- commit_opener/grab_dependencies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 92347e0..dc0f067 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -78,8 +78,9 @@ def find_imports(text): ] import_list = [] for myregex in reexps: + print text for line in text: - print line + if 'import' in line: print line import_list.append(re.match(myregex, text).group(1)) From 572cf1219b0a69fc83e2b37b2f980368e4b9966c Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:56:48 +0000 Subject: [PATCH 35/51] change to cat file func --- commit_opener/grab_dependencies.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index dc0f067..6f4cfd6 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -20,7 +20,7 @@ def catfile(filename): with open(filename, 'r') as fhandle: print("Opening file {} and reading contents".format(filename)) - return "\n".join(fhandle.read()) + return fhandle.read() def get_dependencies(name, url): @@ -80,7 +80,6 @@ def find_imports(text): for myregex in reexps: print text for line in text: - if 'import' in line: print line import_list.append(re.match(myregex, text).group(1)) From 2e683c0833cc1bbf92cd21bf8f53c2185a41f317 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 14:59:15 +0000 Subject: [PATCH 36/51] arrrgggh --- commit_opener/grab_dependencies.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 6f4cfd6..2c28412 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -20,7 +20,8 @@ def catfile(filename): with open(filename, 'r') as fhandle: print("Opening file {} and reading contents".format(filename)) - return fhandle.read() + text = fhandle.readlines() + return text def get_dependencies(name, url): @@ -81,6 +82,6 @@ def find_imports(text): print text for line in text: if 'import' in line: - print line + print "blah", line import_list.append(re.match(myregex, text).group(1)) return import_list From 6a7c41ac7836e784f315d982d2bd219083edcaaf Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 15:01:19 +0000 Subject: [PATCH 37/51] commit --- commit_opener/grab_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 2c28412..903ee50 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -83,5 +83,5 @@ def find_imports(text): for line in text: if 'import' in line: print "blah", line - import_list.append(re.match(myregex, text).group(1)) + import_list.append(re.match(myregex, line).group(1)) return import_list From c2714ad420832e90deaaa77f81d3cc3d1a9aeddc Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 15:02:29 +0000 Subject: [PATCH 38/51] simplify the regex --- commit_opener/grab_dependencies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 903ee50..0bc30d9 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -74,8 +74,8 @@ def search_files_for_imports(repo_instance): def find_imports(text): """Apply regular expression searching to a file""" # list of regexes. Strat of line "^" doesn't work for some reason. - reexps = [re.compile(r'^[\si]mport\s+(\w+)'), - re.compile(r'^[\sf]+rom\s+(\w+)') + reexps = [re.compile(r'import\s+(\w+)'), + re.compile(r'from\s+(\w+)') ] import_list = [] for myregex in reexps: From c6016e9a08eb9371ac0ba4f51a9c0b6c0624b311 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 15:03:45 +0000 Subject: [PATCH 39/51] some exception handling. --- commit_opener/grab_dependencies.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 0bc30d9..8d7b614 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -83,5 +83,8 @@ def find_imports(text): for line in text: if 'import' in line: print "blah", line - import_list.append(re.match(myregex, line).group(1)) + try: + import_list.append(re.match(myregex, line).group(1)) + except AttributeError: + pass return import_list From d66a42946121e10cec12d2fdf946a5b31aa30fb6 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 15:04:32 +0000 Subject: [PATCH 40/51] Remove excessive prints --- commit_opener/grab_dependencies.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 8d7b614..8a3a6b5 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -79,10 +79,8 @@ def find_imports(text): ] import_list = [] for myregex in reexps: - print text for line in text: if 'import' in line: - print "blah", line try: import_list.append(re.match(myregex, line).group(1)) except AttributeError: From 11c48c4408c28ebd3545b629a0bf8aec0c9a8d15 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 15:10:32 +0000 Subject: [PATCH 41/51] arrrgh2 --- commit_opener/grab_dependencies.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 8a3a6b5..7429899 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -80,9 +80,8 @@ def find_imports(text): import_list = [] for myregex in reexps: for line in text: - if 'import' in line: - try: - import_list.append(re.match(myregex, line).group(1)) - except AttributeError: - pass + try: + import_list.append(re.match(myregex, line).group(1)) + except AttributeError: + pass return import_list From 823fc36b5b8a1d4c593154856d7a560bafc4f1f2 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 15:12:42 +0000 Subject: [PATCH 42/51] stupid regex i can think of --- commit_opener/grab_dependencies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 7429899..e01cd41 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -74,8 +74,8 @@ def search_files_for_imports(repo_instance): def find_imports(text): """Apply regular expression searching to a file""" # list of regexes. Strat of line "^" doesn't work for some reason. - reexps = [re.compile(r'import\s+(\w+)'), - re.compile(r'from\s+(\w+)') + reexps = [re.compile(r'import (\w+)'), + re.compile(r'from (\w+) import') ] import_list = [] for myregex in reexps: From b8abebb852b8de0f0b12c004162dad55a34c4859 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 15:14:31 +0000 Subject: [PATCH 43/51] changed match to search. --- commit_opener/grab_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index e01cd41..749dc91 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -81,7 +81,7 @@ def find_imports(text): for myregex in reexps: for line in text: try: - import_list.append(re.match(myregex, line).group(1)) + import_list.append(re.search(myregex, line).group(1)) except AttributeError: pass return import_list From b91c2159303582a2666f61c0ce5d497551b06988 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 23 Mar 2016 15:22:05 +0000 Subject: [PATCH 44/51] OK working now. IN the most horrible way. --- commit_opener/grab_dependencies.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 749dc91..34e0a66 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -11,6 +11,7 @@ import re import os import depsy +import pandas #import commit_opener.repo import repo @@ -55,6 +56,14 @@ def get_dependencies(name, url): reqs = search_files_for_imports(myrepo) print("Found the following imports: {}".format("\n".join(reqs))) + + + data = pd.Series(reqs) + data = data.unique() + data.sort_values(inplace=True) + return data + + def search_files_for_imports(repo_instance): """ From 8a26d4f3f0aaab19b714eedf3dedbbda77d01ac1 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 30 Mar 2016 15:53:05 +0100 Subject: [PATCH 45/51] Fixed the regular expression searching and added the filtering of the standard python packages. --- commit_opener/depsy.py | 72 ++++++++++++++++++++++++++++-- commit_opener/grab_dependencies.py | 27 ++++++----- tests/test_grab_dependencies.py | 12 ++--- 3 files changed, 90 insertions(+), 21 deletions(-) diff --git a/commit_opener/depsy.py b/commit_opener/depsy.py index fddee3e..179393a 100644 --- a/commit_opener/depsy.py +++ b/commit_opener/depsy.py @@ -1,9 +1,9 @@ import re import pickle import ast - - - +import os.path +import errno +import requests """Functions from depsy""" def parse_requirements_txt(contents): @@ -26,7 +26,7 @@ def parse_requirements_txt(contents): foo_with_space_in_front = 1.1""" reqs = re.findall( - r'^(?!file:|-|\.)\s*([\w\.-]+)', + '^(?!file:|-|\.)\s*([\w\.-]+)', contents, re.MULTILINE | re.IGNORECASE ) @@ -62,5 +62,69 @@ def parse_setup_py(contents): return sorted(ret) +class PythonStandardLibs(): + + def __init__(self): + self.url = "https://docs.python.org/2.7/py-modindex.html" + self.data_dir = os.path.join(os.path.dirname(__file__), + "../../data") + + self.pickle_path = os.path.join(self.data_dir, + "python_standard_libs.pickle") + self.libs = None + + def _mkdir(self): + try: + os.makedirs(self.data_dir) + except OSError as exp: + if exp.errno != errno.EEXIST: + raise + self.pickle_path = os.path.join(self.data_dir, + "python_standard_libs.pickle") + + def retrieve_from_web(self): + # only needs to be used once ever, here for tidiness + # checked the result into source control as python_standard_libs.pickle + html = requests.get(self.url).text + exp = r'class="xref">([^<]+)' + matches = re.findall(exp, html) + self.libs = [m for m in matches if '.' not in m] + + def pickle_libs(self): + + if self.libs is None: + self.retrieve_from_web() + + self._mkdir() + with open(self.pickle_path, "w") as f: + pickle.dump(self.libs, f) + + print "saved these to file: {}".format(self.libs) + + def get(self): + if self.libs is None: + try: + with open(self.pickle_path, "r") as f: + print "Loading list of Stdandard Python Libraries from pickle file" + self.libs = pickle.load(f) + except: + self.retrieve_from_web() + self.pickle_libs() + + def clean(self): + try: + os.remove(self.pickle_path) + except: + pass + +def save_python_standard_libs(clean=False): + pystdlibs = PythonStandardLibs() + if clean: + pystdlibs.clean() + pystdlibs.get() + # to show the thing works + new_libs_obj = PythonStandardLibs() + new_libs_obj.get() + print "got these from pickled file: {}".format(new_libs_obj.libs) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 34e0a66..d285441 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -21,7 +21,7 @@ def catfile(filename): with open(filename, 'r') as fhandle: print("Opening file {} and reading contents".format(filename)) - text = fhandle.readlines() + text = fhandle.read() return text @@ -55,11 +55,17 @@ def get_dependencies(name, url): print("No req or setup file, so determining dependencies ourselves.") reqs = search_files_for_imports(myrepo) + # Convert the list of requirements to a set. + reqs = set(reqs) print("Found the following imports: {}".format("\n".join(reqs))) + # Get the list of standard packages so that these can be removed. + stdlibs = depsy.PythonStandardLibs() + stdlibs.get() + set_std_libs = set(stdlibs.libs) - data = pd.Series(reqs) - data = data.unique() + + data = pandas.Series(list(reqs-set_std_libs)) data.sort_values(inplace=True) return data @@ -82,15 +88,14 @@ def search_files_for_imports(repo_instance): def find_imports(text): """Apply regular expression searching to a file""" - # list of regexes. Strat of line "^" doesn't work for some reason. - reexps = [re.compile(r'import (\w+)'), - re.compile(r'from (\w+) import') + # list of regexes. + reexps = [re.compile('^[\si]+mport\s+(\w+)[\s\.]', re.MULTILINE), + re.compile('^[\sf]+rom\s+(\w+)[\s\.]+', re.MULTILINE) ] import_list = [] for myregex in reexps: - for line in text: - try: - import_list.append(re.search(myregex, line).group(1)) - except AttributeError: - pass + try: + import_list.extend(re.findall(myregex, text)) + except AttributeError: + pass return import_list diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py index 795854d..f710e86 100644 --- a/tests/test_grab_dependencies.py +++ b/tests/test_grab_dependencies.py @@ -12,30 +12,30 @@ def test_import_search(): - text = """ + text = (""" import os import scipy import pandas from numpy import something import matplotlib.pyplot as plt -""" +""") # Ordering matters here. Normal imports done first, then froms. expected = ['os', 'scipy', 'pandas', 'matplotlib', 'numpy'] assert expected == co_grab.find_imports(text) def test_commented(): - text = """ + text = (""" import os #import scipy -""" +""") expected = ['os'] assert expected == co_grab.find_imports(text) def test_indented(): - text = """ + text = (""" import os import scipy -""" +""") expected = ['os', 'scipy'] assert expected == co_grab.find_imports(text) From 651c0aa907845a7c5045fea3dd5389d9b3368257 Mon Sep 17 00:00:00 2001 From: Craig MacLachlan Date: Wed, 30 Mar 2016 16:00:54 +0100 Subject: [PATCH 46/51] Uncommeted logic block that allows the use of requirements.txt and setup.py files to get dependencies. --- commit_opener/grab_dependencies.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index d285441..3f506f9 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -41,19 +41,19 @@ def get_dependencies(name, url): # Note: the file has to be opened and read before passing to depsy # functions. -# if myrepo.has("requirements.txt"): -# print("Repository has a requirements.txt file") -# filetext = catfile(myrepo.has("requirements.txt")) -# reqs = depsy.parse_requirements_txt(filetext) -# elif myrepo.has("setup.py"): -# print("Repository has a setup.py file") -# filetext = catfile(myrepo.has("setup.py")) -# reqs = depsy.parse_setup_py(filetext) -# else: + if myrepo.has("requirements.txt"): + print("Repository has a requirements.txt file") + filetext = catfile(myrepo.has("requirements.txt")) + reqs = depsy.parse_requirements_txt(filetext) + elif myrepo.has("setup.py"): + print("Repository has a setup.py file") + filetext = catfile(myrepo.has("setup.py")) + reqs = depsy.parse_setup_py(filetext) + else: # No standard descriptions of the dependencies so let's try to work # them out for ourselves. - print("No req or setup file, so determining dependencies ourselves.") - reqs = search_files_for_imports(myrepo) + print("No req or setup file, so determining dependencies ourselves.") + reqs = search_files_for_imports(myrepo) # Convert the list of requirements to a set. reqs = set(reqs) From 8f402ad960ab912a7f198ad3c39c449fe1240f22 Mon Sep 17 00:00:00 2001 From: Billingham Date: Mon, 11 Apr 2016 21:18:49 +0100 Subject: [PATCH 47/51] tests now run in python 3python3 prints and fixed tabs/vs spaces --- commit_opener/commit_opener.py | 32 ++++++++++++++------- commit_opener/depsy.py | 43 +++++++++++++++------------- commit_opener/repo.py | 50 +++++++++++++++------------------ tests/test_grab_dependencies.py | 7 +++-- 4 files changed, 72 insertions(+), 60 deletions(-) diff --git a/commit_opener/commit_opener.py b/commit_opener/commit_opener.py index 9404d58..de74c3f 100644 --- a/commit_opener/commit_opener.py +++ b/commit_opener/commit_opener.py @@ -4,22 +4,27 @@ import pandas as pd from shutil import rmtree + +from . grab_dependencies import get_dependencies from . tree_scrape import author_minded -from . query_pmc import pmc_data +from . query_pmc import pmc_data as pubmed_data OUT_SUBFOLDER = 'contrib_data' AUTHOR_DATA = 'author_data.json' + def verify_local_repo_location(repo): if not os.path.isdir(repo): raise IOError('could not locate repository {}'.format(repo)) + def build_out_path(repo_name, parent_path=None): if parent_path is None: parent_path = os.path.abspath(os.curdir) out_path = os.path.join(parent_path, repo_name, OUT_SUBFOLDER) return out_path + def make_output_folder(path_, overwrite): if not os.path.exists(path_): os.mkdir(path_) @@ -27,14 +32,15 @@ def make_output_folder(path_, overwrite): rmtree(path_) os.mkdir(path_) + @click.command() -@click.option('--repo', prompt='git repository location', help='path to folder containing .git repository or url') +@click.option('--repo', prompt='git repository location', + help='path to folder containing .git repository or url') @click.option('--out_dir', default=None, - help='parent dir for output data, default same as .git folder scraped') + help='parent dir for output data, default same as .git folder scraped') @click.option('--clobber_output', default=True, - help='should we overwrite existing data?, default True') -@click.option('--verbose/--no-verbose', default=False) - + help='should we overwrite existing data?, default True') +@click.option('--verbose/--no-verbose', default=True) def main(repo, out_dir, clobber_output, verbose): """ """ import logging @@ -55,10 +61,16 @@ def main(repo, out_dir, clobber_output, verbose): repo_name = os.path.basename(repo) make_output_folder(out_dir, overwrite=clobber_output) contributor_data = author_minded(repo) - citation_data = pmc_data('SPSS') - logging.info("output path: %s" % os.path.join(out_dir,'contributor_data.json')) - contributor_data.to_json(os.path.join(out_dir,'contributor_data.json'), date_format='iso') - citation_data['citations'].to_json(os.path.join(out_dir,'citation_data.json')) + citation_data = pubmed_data('SPSS') + depends_data = get_dependencies(repo_name, repo) + logging.info("output path: %s" % os.path.join(out_dir, + 'contributor_data.json')) + contributor_data.to_json(os.path.join(out_dir, + 'contributor_data.json'), + date_format='iso') + citation_data['citations'].to_json(os.path.join(out_dir, + 'citation_data.json')) + depends_data.to_json(os.path.join(out_dir, 'dependencies_data.json')) if __name__ == '__main__': main() diff --git a/commit_opener/depsy.py b/commit_opener/depsy.py index 179393a..eaaa02e 100644 --- a/commit_opener/depsy.py +++ b/commit_opener/depsy.py @@ -5,7 +5,9 @@ import errno import requests -"""Functions from depsy""" +# """Functions from depsy""" + + def parse_requirements_txt(contents): # see here for spec used in parsing the file: # https://pip.readthedocs.org/en/1.1/requirements.html#the-requirements-file-format @@ -41,45 +43,46 @@ def parse_setup_py(contents): try: if node.func.id == "setup": for keyword in node.keywords: - if keyword.arg=="install_requires": - print "found requirements in setup.py 'install_requires' arg" + if keyword.arg == "install_requires": + print("found requirements in setup.py 'install_requires' arg") for elt in keyword.value.elts: ret.append(_clean_setup_req(elt.s)) - - if keyword.arg=="requires": - print "found requirements in setup.py 'requires' arg" + + if keyword.arg == "requires": + print("found requirements in setup.py 'requires' arg") for elt in keyword.value.elts: ret.append(_clean_setup_req(elt.s)) - + if keyword.arg == "extras_require": - print "found requirements in setup.py 'extras_require' arg" + print("found requirements in setup.py 'extras_require' arg") for my_list in keyword.value.values: for elt in my_list.elts: ret.append(_clean_setup_req(elt.s)) - + except AttributeError: continue return sorted(ret) + class PythonStandardLibs(): def __init__(self): - self.url = "https://docs.python.org/2.7/py-modindex.html" - self.data_dir = os.path.join(os.path.dirname(__file__), - "../../data") + self.url = "https://docs.python.org/2.7/py-modindex.html" + self.data_dir = os.path.join(os.path.dirname(__file__), + "../../data") self.pickle_path = os.path.join(self.data_dir, "python_standard_libs.pickle") self.libs = None def _mkdir(self): - try: + try: os.makedirs(self.data_dir) - except OSError as exp: + except OSError as exp: if exp.errno != errno.EEXIST: - raise - self.pickle_path = os.path.join(self.data_dir, + raise + self.pickle_path = os.path.join(self.data_dir, "python_standard_libs.pickle") def retrieve_from_web(self): @@ -99,13 +102,13 @@ def pickle_libs(self): with open(self.pickle_path, "w") as f: pickle.dump(self.libs, f) - print "saved these to file: {}".format(self.libs) + print("saved these to file: {}".format(self.libs)) def get(self): if self.libs is None: try: with open(self.pickle_path, "r") as f: - print "Loading list of Stdandard Python Libraries from pickle file" + print("Loading list of Stdandard Python Libraries from pickle file") self.libs = pickle.load(f) except: self.retrieve_from_web() @@ -117,6 +120,7 @@ def clean(self): except: pass + def save_python_standard_libs(clean=False): pystdlibs = PythonStandardLibs() if clean: @@ -126,5 +130,4 @@ def save_python_standard_libs(clean=False): # to show the thing works new_libs_obj = PythonStandardLibs() new_libs_obj.get() - print "got these from pickled file: {}".format(new_libs_obj.libs) - + print("got these from pickled file: {}".format(new_libs_obj.libs)) diff --git a/commit_opener/repo.py b/commit_opener/repo.py index 9cf25cd..4c0ff8f 100644 --- a/commit_opener/repo.py +++ b/commit_opener/repo.py @@ -1,5 +1,5 @@ """ -This module contains a class that allows us to interact with the +This module contains a class that allows us to interact with the repository of interest. """ @@ -9,32 +9,32 @@ import os import os.path + class Repo(object): """ Interact with a repository: attributes, extract a copy, cleanup. - + This could maybe be re-written as a context manger so the cleanup happens automatically. - + """ - + def __init__(self, name, url, rtype="git"): """ name - Name of the project url - url of the project rtype - type of the repository. This will mostly be git. - + """ self.name = name self.url = url self.rtype = rtype - + self.local_resources = [] self.extracted = False self.file_list = [] self.tmpdir = None - - + def extract_local_copy(self): """Extract a local copy of the repository""" if "http" not in self.url: @@ -45,7 +45,7 @@ def extract_local_copy(self): return else: raise IOError("Path to repository doesn't exist") - + else: print("Extracting local copy of repository") self.tmpdir = tempfile.mkdtemp() @@ -57,52 +57,48 @@ def extract_local_copy(self): else: # We could implement SVN here, a quick svn export would do. raise NotImplemented - + try: subprocess.check_call(extract_cmd.split()) except subprocess.CalledProcessError: raise IOError("Unable to extract a local copy of repository") else: self.extracted = True - + def has(self, filename): """ Does the repository have a file matching a particular name? If it does then return the filename, otherwise return False. - + """ if not self.extracted: self.extract_local_copy() - + if not self.file_list: self._get_filelist() - + for f in self.file_list: if filename in f: return f - + return False - + def _get_filelist(self): """Just get a list of the files in the repo.""" - + if not self.extracted: - self.extract_local_copy() - + self.extract_local_copy() + for root, dirs, files in os.walk(self.tmpdir, topdown=True): for name in files: - print os.path.join(root, name) + print(os.path.join(root, name)) self.file_list.append(os.path.join(root, name)) - - + def cleanup(self): """Remove any local resources""" - + for resource in self.local_resources: try: shutil.rmtree(resource) except: - print "Unable to remove: {}".format(resource) - - - + print("Unable to remove: {}".format(resource)) diff --git a/tests/test_grab_dependencies.py b/tests/test_grab_dependencies.py index f710e86..757985d 100644 --- a/tests/test_grab_dependencies.py +++ b/tests/test_grab_dependencies.py @@ -22,7 +22,8 @@ def test_import_search(): # Ordering matters here. Normal imports done first, then froms. expected = ['os', 'scipy', 'pandas', 'matplotlib', 'numpy'] assert expected == co_grab.find_imports(text) - + + def test_commented(): text = (""" import os @@ -30,7 +31,8 @@ def test_commented(): """) expected = ['os'] assert expected == co_grab.find_imports(text) - + + def test_indented(): text = (""" import os @@ -38,4 +40,3 @@ def test_indented(): """) expected = ['os', 'scipy'] assert expected == co_grab.find_imports(text) - From 1e02d785e050712c752ed25d1c094735d78b767b Mon Sep 17 00:00:00 2001 From: Billingham Date: Tue, 12 Apr 2016 21:23:30 +0100 Subject: [PATCH 48/51] integrated `get_dependencies` MAY NOT WORK YET can't check if it works properly or not as require internet connection and I'm currently web-less --- commit_opener/commit_opener.py | 6 ++-- commit_opener/grab_dependencies.py | 47 +++++++++++++++--------------- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/commit_opener/commit_opener.py b/commit_opener/commit_opener.py index de74c3f..0c7b0c1 100644 --- a/commit_opener/commit_opener.py +++ b/commit_opener/commit_opener.py @@ -5,9 +5,9 @@ from shutil import rmtree -from . grab_dependencies import get_dependencies -from . tree_scrape import author_minded -from . query_pmc import pmc_data as pubmed_data +from commit_opener.grab_dependencies import get_dependencies +from commit_opener.tree_scrape import author_minded +from commit_opener.query_pmc import pmc_data as pubmed_data OUT_SUBFOLDER = 'contrib_data' AUTHOR_DATA = 'author_data.json' diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 3f506f9..91d392a 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -1,5 +1,5 @@ """ -Extract the dependencies from the repository +Extract the dependencies from the repository Issue: work out dependencies #3 @@ -10,47 +10,46 @@ """ import re import os -import depsy import pandas -#import commit_opener.repo -import repo +from commit_opener import depsy +from commit_opener import repo def catfile(filename): """Get text contents of a file.""" - + with open(filename, 'r') as fhandle: print("Opening file {} and reading contents".format(filename)) text = fhandle.read() return text - + def get_dependencies(name, url): """ Get the dependecies for a git repository or any local python package. - + """ # Let's instantiate the repo object, so we can parse through it. myrepo = repo.Repo(name, url) - print("Created a repository instance for {}".format(url)) - + print("Created a repository instance for {}".format(url)) + # Extract a local copy myrepo.extract_local_copy() print("Local copy now available here: {}".format(myrepo.tmpdir)) myrepo._get_filelist() - # Note: the file has to be opened and read before passing to depsy + # Note: the file has to be opened and read before passing to depsy # functions. if myrepo.has("requirements.txt"): print("Repository has a requirements.txt file") - filetext = catfile(myrepo.has("requirements.txt")) + filetext = catfile(myrepo.has("requirements.txt")) reqs = depsy.parse_requirements_txt(filetext) elif myrepo.has("setup.py"): print("Repository has a setup.py file") - filetext = catfile(myrepo.has("setup.py")) + filetext = catfile(myrepo.has("setup.py")) reqs = depsy.parse_setup_py(filetext) else: - # No standard descriptions of the dependencies so let's try to work + # No standard descriptions of the dependencies so let's try to work # them out for ourselves. print("No req or setup file, so determining dependencies ourselves.") reqs = search_files_for_imports(myrepo) @@ -58,41 +57,41 @@ def get_dependencies(name, url): # Convert the list of requirements to a set. reqs = set(reqs) print("Found the following imports: {}".format("\n".join(reqs))) - + # Get the list of standard packages so that these can be removed. stdlibs = depsy.PythonStandardLibs() - stdlibs.get() - set_std_libs = set(stdlibs.libs) + stdlibs.get() + set_std_libs = set(stdlibs.libs) data = pandas.Series(list(reqs-set_std_libs)) data.sort_values(inplace=True) return data - - + + def search_files_for_imports(repo_instance): """ Walk all the python files in the repository and extract the import info. - + """ dep_list = [] for f in repo_instance.file_list: if ".py" in f: - print("Looking in {} for imports".format(os.path.basename(f))) + print("Looking in {} for imports".format(os.path.basename(f))) filetext = catfile(f) dep_list.extend(find_imports(filetext)) return dep_list - - + + def find_imports(text): """Apply regular expression searching to a file""" - # list of regexes. + # list of regexes. reexps = [re.compile('^[\si]+mport\s+(\w+)[\s\.]', re.MULTILINE), re.compile('^[\sf]+rom\s+(\w+)[\s\.]+', re.MULTILINE) ] - import_list = [] + import_list = [] for myregex in reexps: try: import_list.extend(re.findall(myregex, text)) From 988ebe30d9da0e35755b2d91c07f7795c896be99 Mon Sep 17 00:00:00 2001 From: Billingham Date: Thu, 14 Apr 2016 10:34:39 +0100 Subject: [PATCH 49/51] fixed file mode py3k --- commit_opener/depsy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commit_opener/depsy.py b/commit_opener/depsy.py index eaaa02e..a44e64d 100644 --- a/commit_opener/depsy.py +++ b/commit_opener/depsy.py @@ -99,7 +99,7 @@ def pickle_libs(self): self.retrieve_from_web() self._mkdir() - with open(self.pickle_path, "w") as f: + with open(self.pickle_path, "wb") as f: pickle.dump(self.libs, f) print("saved these to file: {}".format(self.libs)) @@ -107,7 +107,7 @@ def pickle_libs(self): def get(self): if self.libs is None: try: - with open(self.pickle_path, "r") as f: + with open(self.pickle_path, "rb") as f: print("Loading list of Stdandard Python Libraries from pickle file") self.libs = pickle.load(f) except: From 3749d28e3b5cff02e6908a2f8890d832050fc6ab Mon Sep 17 00:00:00 2001 From: Billingham Date: Thu, 14 Apr 2016 10:41:39 +0100 Subject: [PATCH 50/51] debug statements --- commit_opener/commit_opener.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/commit_opener/commit_opener.py b/commit_opener/commit_opener.py index 0c7b0c1..9c3fdc8 100644 --- a/commit_opener/commit_opener.py +++ b/commit_opener/commit_opener.py @@ -63,6 +63,8 @@ def main(repo, out_dir, clobber_output, verbose): contributor_data = author_minded(repo) citation_data = pubmed_data('SPSS') depends_data = get_dependencies(repo_name, repo) + logging.info('got dependency data of type {}'.format(type(depends_data))) + logging.info('got dependency data:\n {}'.format(depends_data)) logging.info("output path: %s" % os.path.join(out_dir, 'contributor_data.json')) contributor_data.to_json(os.path.join(out_dir, From 5ab8547f57c66126e69db005a11b73c217d9cf64 Mon Sep 17 00:00:00 2001 From: Billingham Date: Thu, 14 Apr 2016 10:42:27 +0100 Subject: [PATCH 51/51] can also have a setup.py with no dependencies in it --- commit_opener/grab_dependencies.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/commit_opener/grab_dependencies.py b/commit_opener/grab_dependencies.py index 91d392a..38d8b87 100644 --- a/commit_opener/grab_dependencies.py +++ b/commit_opener/grab_dependencies.py @@ -48,6 +48,10 @@ def get_dependencies(name, url): print("Repository has a setup.py file") filetext = catfile(myrepo.has("setup.py")) reqs = depsy.parse_setup_py(filetext) + if len(reqs) < 1: + print("No reqs in setup file," + "so determining dependencies ourselves.") + reqs = search_files_for_imports(myrepo) else: # No standard descriptions of the dependencies so let's try to work # them out for ourselves.