From 329c3d7a319583e81b55c7ffa5fc41187571de44 Mon Sep 17 00:00:00 2001
From: Adam Ross <14985050+R055A@users.noreply.github.com>
Date: Sat, 8 Mar 2025 20:53:24 +0100
Subject: [PATCH 1/3] Add optional setting: include/exclude owners
---
.github/workflows/auto_update_stat_images.yml | 9 +-
.../non_auto_generate_stat_images.yml | 9 +-
README.md | 48 +++-
src/env_vars.py | 42 +++-
src/github_api_queries.py | 38 ++-
src/github_repo_stats.py | 232 ++++++++++--------
test/git_stats_test.py | 24 +-
7 files changed, 266 insertions(+), 136 deletions(-)
diff --git a/.github/workflows/auto_update_stat_images.yml b/.github/workflows/auto_update_stat_images.yml
index c84d369..b33a108 100644
--- a/.github/workflows/auto_update_stat_images.yml
+++ b/.github/workflows/auto_update_stat_images.yml
@@ -57,7 +57,8 @@ jobs:
env:
ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- EXCLUDED: ${{ secrets.EXCLUDED }}
+ EXCLUDED_REPOS: ${{ secrets.EXCLUDED_REPOS }}
+ EXCLUDED_OWNERS: ${{ secrets.EXCLUDED_OWNERS }}
EXCLUDED_LANGS: ${{ secrets.EXCLUDED_LANGS }}
EXCLUDED_REPO_LANGS: ${{ secrets.EXCLUDED_REPO_LANGS }}
IS_INCLUDE_FORKED_REPOS: ${{ secrets.IS_INCLUDE_FORKED_REPOS }}
@@ -71,10 +72,14 @@ jobs:
IS_STORE_REPO_VIEWS: ${{ secrets.IS_STORE_REPO_VIEWS }}
MORE_COLLABS: ${{ secrets.MORE_COLLABS }}
MORE_REPOS: ${{ secrets.MORE_REPOS }}
- ONLY_INCLUDED: ${{ secrets.ONLY_INCLUDED }}
+ ONLY_INCLUDED_REPOS: ${{ secrets.ONLY_INCLUDED_REPOS }}
+ ONLY_INCLUDED_OWNERS: ${{ secrets.ONLY_INCLUDED_OWNERS }}
ONLY_INCLUDED_COLLAB_REPOS: ${{ secrets.ONLY_INCLUDED_COLLAB_REPOS }}
+ ONLY_INCLUDED_COLLAB_REPO_OWNERS: ${{ secrets.ONLY_INCLUDED_COLLAB_REPO_OWNERS }}
EXCLUDED_COLLAB_REPOS: ${{ secrets.EXCLUDED_COLLAB_REPOS }}
+ EXCLUDED_COLLAB_REPO_OWNERS: ${{ secrets.EXCLUDED_COLLAB_REPO_OWNERS }}
MORE_COLLAB_REPOS: ${{ secrets.MORE_COLLAB_REPOS }}
+ MORE_COLLAB_REPO_OWNERS: ${{ secrets.MORE_COLLAB_REPO_OWNERS }}
# Commits all changed files to the repository
- name: Commit to the repo
diff --git a/.github/workflows/non_auto_generate_stat_images.yml b/.github/workflows/non_auto_generate_stat_images.yml
index 4d7dee6..6b3586d 100644
--- a/.github/workflows/non_auto_generate_stat_images.yml
+++ b/.github/workflows/non_auto_generate_stat_images.yml
@@ -60,7 +60,8 @@ jobs:
env:
ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- EXCLUDED: ${{ secrets.EXCLUDED }}
+ EXCLUDED_REPOS: ${{ secrets.EXCLUDED_REPOS }}
+ EXCLUDED_OWNERS: ${{ secrets.EXCLUDED_OWNERS }}
EXCLUDED_LANGS: ${{ secrets.EXCLUDED_LANGS }}
EXCLUDED_REPO_LANGS: ${{ secrets.EXCLUDED_REPO_LANGS }}
IS_INCLUDE_FORKED_REPOS: ${{ secrets.IS_INCLUDE_FORKED_REPOS }}
@@ -74,10 +75,14 @@ jobs:
IS_STORE_REPO_VIEWS: ${{ secrets.IS_STORE_REPO_VIEWS }}
MORE_COLLABS: ${{ secrets.MORE_COLLABS }}
MORE_REPOS: ${{ secrets.MORE_REPOS }}
- ONLY_INCLUDED: ${{ secrets.ONLY_INCLUDED }}
+ ONLY_INCLUDED_REPOS: ${{ secrets.ONLY_INCLUDED_REPOS }}
+ ONLY_INCLUDED_OWNERS: ${{ secrets.ONLY_INCLUDED_OWNERS }}
ONLY_INCLUDED_COLLAB_REPOS: ${{ secrets.ONLY_INCLUDED_COLLAB_REPOS }}
+ ONLY_INCLUDED_COLLAB_REPO_OWNERS: ${{ secrets.ONLY_INCLUDED_COLLAB_REPO_OWNERS }}
EXCLUDED_COLLAB_REPOS: ${{ secrets.EXCLUDED_COLLAB_REPOS }}
+ EXCLUDED_COLLAB_REPO_OWNERS: ${{ secrets.EXCLUDED_COLLAB_REPO_OWNERS }}
MORE_COLLAB_REPOS: ${{ secrets.MORE_COLLAB_REPOS }}
+ MORE_COLLAB_REPO_OWNERS: ${{ secrets.MORE_COLLAB_REPO_OWNERS }}
# Commits all changed files to the repository
- name: Commit to the repo
diff --git a/README.md b/README.md
index 7a716e4..814f2cd 100644
--- a/README.md
+++ b/README.md
@@ -101,15 +101,23 @@ Generate regularly updated visualizations of user and repository statistics from
Click drop-down to view optional repository Secrets for customizing GitHub statistic visualizations
-* ### Optional Secret *Name*: `EXCLUDED`
- For excluding repositories from being included entirely in the generated statistic visualizations.
+* ### Optional Secret *Name*: `EXCLUDED_REPOS`
+ For excluding repositories from the generated statistic visualizations.
**Instructions**:
* enter *Value* in the following format (separated by commas):
* `[owner/repo],[owner/repo],...,[owner/repo]`
* example:
* `jstrieb/github-stats,rahul-jha98/github-stats-transparent,idiotWu/stats`
-* ### Optional Secret *Name*: `ONLY_INCLUDED`
+* ### Optional Secret *Name*: `EXCLUDED_OWNERS`
+ For excluding repositories associated with (user/organisation) owners from the generated statistic visualizations.
+
+ **Instructions**:
+ * enter *Value* in the following format (separated by commas):
+ * `[owner],[owner],...,[owner]`
+ * example:
+ * `R055A,University-Project-Repos`
+* ### Optional Secret *Name*: `ONLY_INCLUDED_REPOS`
For **ONLY** including repositories in the generated statistic visualizations
- such as when there are fewer repositories to include than to exclude
@@ -118,6 +126,15 @@ Generate regularly updated visualizations of user and repository statistics from
* `[owner/repo],[owner/repo],...,[owner/repo]`
* example:
* `R055A/GitStats,R055A/R055A`
+* ### Optional Secret *Name*: `ONLY_INCLUDED_OWNERS`
+ For **ONLY** including repositories associated with (user/organisation) owners in the generated statistic visualizations
+ - such as when there are fewer owners to include than to exclude
+
+ **Instructions**:
+ * enter *Value* in the following format (separated by commas):
+ * `[owner],[owner],...,[owner]`
+ * example:
+ * `R055A,University-Project-Repos`
* ### Optional Secret *Name*: `EXCLUDED_LANGS`
For excluding undesired languages from being included in the generated statistic visualizations
@@ -208,6 +225,15 @@ Generate regularly updated visualizations of user and repository statistics from
* `[owner/repo],[owner/repo],...,[owner/repo]`
* example:
* `R055A/UniversityProject-A,R055A/UniversityProject-B`
+* ### Optional Secret *Name*: `ONLY_INCLUDED_COLLAB_REPO_OWNERS`
+ For **ONLY** including collaborative repositories associated with owner(s) in the generated average contribution statistics calculations
+ - such as when there are fewer collaborative repository owners to include than to exclude
+
+ **Instructions**:
+ * enter *Value* in the following format (separated by commas):
+ * `[owner],[owner],...,[owner]`
+ * example:
+ * `R055A,University-Project-Repos`
* ### Optional Secret *Name*: `EXCLUDED_COLLAB_REPOS`
For excluding collaborative repositories from being included in the average contribution statistics calculations
- for example, such as for when
@@ -221,6 +247,14 @@ Generate regularly updated visualizations of user and repository statistics from
* `[owner/repo],[owner/repo],...,[owner/repo]`
* example:
* `tera_open_source/bit_typo_fix,peer_repo/missing_or_no_git_co_author_credit,dude_collab/email_not_reg_on_github,dog_ate/my_repo,mars/attacks`
+* ### Optional Secret *Name*: `EXCLUDED_COLLAB_REPO_OWNERS`
+ For excluding collaborative repositories associated with owner(s) from being included in the average contribution statistics calculations
+
+ **Instructions**:
+ * enter *Value* in the following format (separated by commas):
+ * `[owner],[owner],...,[owner]`
+ * example:
+ * `R055A,University-Project-Repos`
* ### Optional Secret *Name*: `MORE_COLLAB_REPOS`
For including collaborative repositories that are otherwise not included in the average contribution statistics calculations
- for example, such as when
@@ -232,6 +266,14 @@ Generate regularly updated visualizations of user and repository statistics from
* `[owner/repo],[owner/repo],...,[owner/repo]`
* example:
* `imported_ghosted/large_A+_collab_project,slave_trade/larger_A++_project`
+* ### Optional Secret *Name*: `MORE_COLLAB_REPO_OWNERS`
+ For including collaborative repositories associated with owner(s) that are otherwise not included in the average contribution statistics calculations
+
+ **Instructions**:
+ * enter *Value* in the following format (separated by commas):
+ * `[owner],[owner],...,[owner]`
+ * example:
+ * `R055A,University-Project-Repos`
* ### Optional Secret *Name*: `IS_STORE_REPO_VIEWS`
Boolean for storing generated repository view statistic visualization data beyond the 14 day-limit GitHub API allows
- `true` by default
diff --git a/src/env_vars.py b/src/env_vars.py
index 67563be..a5bf4b8 100644
--- a/src/env_vars.py
+++ b/src/env_vars.py
@@ -18,7 +18,8 @@ def __init__(
self,
username: str,
access_token: str,
- exclude_repos: Optional[str] = getenv("EXCLUDED"),
+ exclude_repos: Optional[str] = getenv("EXCLUDED_REPOS"),
+ exclude_owners: Optional[str] = getenv("EXCLUDED_OWNERS"),
exclude_langs: Optional[str] = getenv("EXCLUDED_LANGS"),
exclude_repo_langs: Optional[str] = getenv("EXCLUDED_REPO_LANGS"),
is_include_forked_repos: str = getenv("IS_INCLUDE_FORKED_REPOS"),
@@ -32,12 +33,18 @@ def __init__(
is_store_repo_view_count: str = getenv("IS_STORE_REPO_VIEWS"),
more_collaborators: Optional[str] = getenv("MORE_COLLABS"),
manually_added_repos: Optional[str] = getenv("MORE_REPOS"),
- only_included_repos: Optional[str] = getenv("ONLY_INCLUDED"),
+ only_included_repos: Optional[str] = getenv("ONLY_INCLUDED_REPOS"),
+ only_included_owners: Optional[str] = getenv("ONLY_INCLUDED_OWNERS"),
only_included_collab_repos: Optional[str] = getenv(
"ONLY_INCLUDED_COLLAB_REPOS"
),
+ only_included_collab_repo_owners: Optional[str] = getenv(
+ "ONLY_INCLUDED_COLLAB_REPO_OWNERS"
+ ),
exclude_collab_repos: Optional[str] = getenv("EXCLUDED_COLLAB_REPOS"),
+ exclude_collab_repo_owners: Optional[str] = getenv("EXCLUDED_COLLAB_REPO_OWNERS"),
more_collab_repos: Optional[str] = getenv("MORE_COLLAB_REPOS"),
+ more_collab_repo_owners: Optional[str] = getenv("MORE_COLLAB_REPO_OWNERS")
) -> None:
self.__db: GitRepoStatsDB = GitRepoStatsDB()
@@ -49,6 +56,11 @@ def __init__(
else:
self.exclude_repos = {x.strip() for x in exclude_repos.split(",")}
+ if exclude_owners is None:
+ self.exclude_owners: set[str] = set()
+ else:
+ self.exclude_owners = {x.strip() for x in exclude_owners.split(",")}
+
if exclude_langs is None:
self.exclude_langs: set[str] = set()
else:
@@ -154,6 +166,13 @@ def __init__(
x.strip() for x in only_included_repos.split(",")
}
+ if only_included_owners is None or only_included_owners == "":
+ self.only_included_owners: set[str] = set()
+ else:
+ self.only_included_owners = {
+ x.strip() for x in only_included_owners.split(",")
+ }
+
if only_included_collab_repos is None or only_included_collab_repos == "":
self.only_included_collab_repos: set[str] = set()
else:
@@ -161,6 +180,13 @@ def __init__(
x.strip() for x in only_included_collab_repos.split(",")
}
+ if only_included_collab_repo_owners is None or only_included_collab_repo_owners == "":
+ self.only_included_collab_repo_owners: set[str] = set()
+ else:
+ self.only_included_collab_repo_owners = {
+ x.strip() for x in only_included_collab_repo_owners.split(",")
+ }
+
if exclude_collab_repos is None:
self.exclude_collab_repos: set[str] = set()
else:
@@ -168,11 +194,23 @@ def __init__(
x.strip() for x in exclude_collab_repos.split(",")
}
+ if exclude_collab_repo_owners is None:
+ self.exclude_collab_repo_owners: set[str] = set()
+ else:
+ self.exclude_collab_repo_owners = {
+ x.strip() for x in exclude_collab_repo_owners.split(",")
+ }
+
if more_collab_repos is None:
self.more_collab_repos: set[str] = set()
else:
self.more_collab_repos = {x.strip() for x in more_collab_repos.split(",")}
+ if more_collab_repo_owners is None:
+ self.more_collab_repo_owners: set[str] = set()
+ else:
+ self.more_collab_repo_owners = {x.strip() for x in more_collab_repo_owners.split(",")}
+
self.pull_requests_count: int = self.__db.pull_requests
self.issues_count: int = self.__db.issues
diff --git a/src/github_api_queries.py b/src/github_api_queries.py
index 1ebe058..b22a274 100644
--- a/src/github_api_queries.py
+++ b/src/github_api_queries.py
@@ -19,8 +19,8 @@ class GitHubApiQueries(object):
API. Also includes functions to dynamically generate GraphQL queries.
"""
- __GITHUB_API_URL: str = "https://api.github.com/"
- __GRAPHQL_PATH: str = "graphql"
+ __GITHUB_API_URL: str = 'https://api.github.com/'
+ __GRAPHQL_PATH: str = 'graphql'
__REST_QUERY_LIMIT: int = 60
__ASYNCIO_SLEEP_TIME: int = 2
__DEFAULT_MAX_CONNECTIONS: int = 10
@@ -37,7 +37,7 @@ def __init__(
self.session: ClientSession = session
self.semaphore: Semaphore = Semaphore(max_connections)
self.headers: dict[str, str] = {
- "Authorization": f"Bearer {self.access_token}",
+ 'Authorization': f'Bearer {self.access_token}',
}
async def query(self, generated_query: str) -> dict[str, dict]:
@@ -52,21 +52,21 @@ async def query(self, generated_query: str) -> dict[str, dict]:
r_async = await self.session.post(
url=self.__GITHUB_API_URL + self.__GRAPHQL_PATH,
headers=self.headers,
- json={"query": generated_query},
+ json={'query': generated_query},
)
result: dict[str, dict] = await r_async.json()
if result is not None:
return result
except ConnectionError:
- print("aiohttp failed for GraphQL query")
+ print('aiohttp failed for GraphQL query')
# Fall back on non-async requests
async with self.semaphore:
r_requests = post(
url=self.__GITHUB_API_URL + self.__GRAPHQL_PATH,
headers=self.headers,
- json={"query": generated_query},
+ json={'query': generated_query},
)
result = r_requests.json()
@@ -86,7 +86,7 @@ async def query_rest(
for i in range(self.__REST_QUERY_LIMIT):
if params is None:
params = dict()
- if path.startswith("/"):
+ if path.startswith('/'):
path = path[1:]
try:
@@ -98,7 +98,7 @@ async def query_rest(
)
if r_async.status == HTTPStatus.ACCEPTED.value:
- print(f"A path returned {HTTPStatus.ACCEPTED.value}. Retrying...")
+ print(f'A path returned {HTTPStatus.ACCEPTED.value}. Retrying...')
await sleep(self.__ASYNCIO_SLEEP_TIME)
continue
@@ -107,7 +107,7 @@ async def query_rest(
if result is not None:
return result
except ConnectionError:
- print("aiohttp failed for REST query attempt #" + str(i + 1))
+ print('aiohttp failed for REST query attempt #' + str(i + 1))
# Fall back on non-async requests
async with self.semaphore:
@@ -119,7 +119,7 @@ async def query_rest(
if r_requests.status_code == HTTPStatus.ACCEPTED.value:
print(
- f"A path returned {HTTPStatus.ACCEPTED.value}. Retrying..."
+ f'A path returned {HTTPStatus.ACCEPTED.value}. Retrying...'
)
await sleep(self.__ASYNCIO_SLEEP_TIME)
continue
@@ -127,10 +127,23 @@ async def query_rest(
return r_requests.json()
print(
- f"Too many {HTTPStatus.ACCEPTED.value}s. Data for this repository will be incomplete."
+ f'Too many {HTTPStatus.ACCEPTED.value}s. Data for this repository will be incomplete.'
)
return dict()
+ @staticmethod
+ def get_user() -> str:
+ """
+ :return: GraphQL query with user login and name
+ """
+ return f"""
+ {{
+ viewer {{
+ login
+ name
+ }}
+ }}"""
+
@staticmethod
def repos_overview(
contrib_cursor: Optional[str] = None, owned_cursor: Optional[str] = None
@@ -142,7 +155,6 @@ def repos_overview(
{{
viewer {{
login,
- name,
repositories(
first: 100,
orderBy: {{
@@ -275,6 +287,6 @@ def all_contributions(cls, years: list[str]) -> str:
@staticmethod
def get_language_colors() -> dict[str, dict[str, str]]:
url: models.Response = get(
- "https://raw.githubusercontent.com/ozh/github-colors/master/colors.json"
+ 'https://raw.githubusercontent.com/ozh/github-colors/master/colors.json'
)
return loads(url.text)
diff --git a/src/github_repo_stats.py b/src/github_repo_stats.py
index 4d7cf53..1a69a16 100644
--- a/src/github_repo_stats.py
+++ b/src/github_repo_stats.py
@@ -17,11 +17,11 @@ class GitHubRepoStats(object):
Retrieve and store statistics about GitHub usage.
"""
- _DATE_FORMAT: str = "%Y-%m-%d"
+ _DATE_FORMAT: str = '%Y-%m-%d'
_EXCLUDED_USER_NAMES: list[str] = [
- "dependabot[bot]"
+ 'dependabot[bot]'
] # exclude bot data from being included in statistical calculations
- _NO_NAME: str = "No Name"
+ _NO_NAME: str = 'Unknown'
def __init__(
self, environment_vars: EnvironmentVariables, session: ClientSession
@@ -62,8 +62,8 @@ async def to_str(self) -> str:
:return: summary of all available statistics
"""
languages: dict[str, float] = await self.languages_proportional
- formatted_languages: str = "\n\t\t\t- ".join(
- [f"{k}: {v:0.4f}%" for k, v in languages.items()]
+ formatted_languages: str = '\n\t\t\t- '.join(
+ [f'{k}: {v:0.4f}%' for k, v in languages.items()]
)
users_lines_changed: tuple[int, int] = await self.lines_changed
@@ -100,10 +100,13 @@ async def is_repo_name_invalid(self, repo_name: str) -> bool:
:return: True if repo is not to be included in self._repos
"""
return (
- repo_name in self._repos
- or len(self.environment_vars.only_included_repos) > 0
- and repo_name not in self.environment_vars.only_included_repos
+ # repo_name in self._repos
+ (len(self.environment_vars.only_included_owners) > 0
+ and repo_name.split('/')[0] not in self.environment_vars.only_included_owners)
+ or (len(self.environment_vars.only_included_repos) > 0
+ and repo_name not in self.environment_vars.only_included_repos)
or repo_name in self.environment_vars.exclude_repos
+ or repo_name.split('/')[0] in self.environment_vars.exclude_owners
)
async def is_repo_type_excluded(
@@ -120,13 +123,13 @@ async def is_repo_type_excluded(
"""
return (
not self.environment_vars.is_include_forked_repos
- and (repo_data.get("isFork") or repo_data.get("fork"))
+ and (repo_data.get('isFork') or repo_data.get('fork'))
or self.environment_vars.is_exclude_archive_repos
- and (repo_data.get("isArchived") or repo_data.get("archived"))
+ and (repo_data.get('isArchived') or repo_data.get('archived'))
or self.environment_vars.is_exclude_private_repos
- and (repo_data.get("isPrivate") or repo_data.get("private"))
+ and (repo_data.get('isPrivate') or repo_data.get('private'))
or self.environment_vars.is_exclude_public_repos
- and (not repo_data.get("isPrivate") or not repo_data.get("private"))
+ and (not repo_data.get('isPrivate') or not repo_data.get('private'))
)
async def get_stats(self) -> None:
@@ -144,39 +147,44 @@ async def get_stats(self) -> None:
next_owned: str | None = None
next_contrib: str | None = None
+ user_raw_result: dict[str, dict] = await self.queries.query(
+ generated_query=GitHubApiQueries.get_user()
+ )
+ user_raw_result = user_raw_result if user_raw_result else {}
+ if (
+ user_raw_result.get('data', {}) is not None
+ and user_raw_result.get('data', {}).get('viewer', {}) is not None
+ and (
+ user_raw_result.get('data', {}).get('viewer', {}).get('name', None) is not None or
+ user_raw_result.get('data', {}).get('viewer', {}).get('user', None) is not None
+ )
+ ):
+ self._name = user_raw_result.get('data', {}).get('viewer', {}).get('name', self._NO_NAME)
+ elif user_raw_result.get('message', '').lower() == 'bad credentials':
+ raise ConnectionRefusedError(f'Unauthorized Error: Invalid Access Token')
+
while True:
- raw_results: dict[str, dict] = await self.queries.query(
+ repo_overview_raw_results: dict[str, dict] = await self.queries.query(
generated_query=GitHubApiQueries.repos_overview(
owned_cursor=next_owned, contrib_cursor=next_contrib
)
)
- raw_results = raw_results if raw_results else {}
+ repo_overview_raw_results = repo_overview_raw_results if repo_overview_raw_results else {}
if (
- raw_results.get("data", {}) is not None
- and raw_results.get("data", {}).get("viewer", {}) is not None
+ repo_overview_raw_results.get('data', {}) is not None
+ and repo_overview_raw_results.get('data', {}).get('viewer', {}) is not None
):
- if not self._name:
- self._name = (
- raw_results.get("data", {}).get("viewer", {}).get("name", None)
- )
- if self._name is None:
- self._name = (
- raw_results.get("data", {})
- .get("viewer", {})
- .get("login", self._NO_NAME)
- )
-
owned_repos: dict[str, dict | list[dict]] = (
- raw_results.get("data", {})
- .get("viewer", {})
- .get("repositories", {})
+ repo_overview_raw_results.get('data', {})
+ .get('viewer', {})
+ .get('repositories', {})
)
- repos: list[dict] = owned_repos.get("nodes", [])
+ repos: list[dict] = owned_repos.get('nodes', [])
contrib_repos: dict[str, dict | list] = (
- raw_results.get("data", {})
- .get("viewer", {})
- .get("repositoriesContributedTo", {})
+ repo_overview_raw_results.get('data', {})
+ .get('viewer', {})
+ .get('repositoriesContributedTo', {})
)
else:
owned_repos = {}
@@ -184,23 +192,23 @@ async def get_stats(self) -> None:
contrib_repos = {}
if not self.environment_vars.is_exclude_contrib_repos:
- repos += contrib_repos.get("nodes", [])
+ repos += contrib_repos.get('nodes', [])
await self.repo_stats(repos=repos)
- is_cur_owned: bool = owned_repos.get("pageInfo", {}).get(
- "hasNextPage", False
+ is_cur_owned: bool = owned_repos.get('pageInfo', {}).get(
+ 'hasNextPage', False
)
- is_cur_contrib: bool = contrib_repos.get("pageInfo", {}).get(
- "hasNextPage", False
+ is_cur_contrib: bool = contrib_repos.get('pageInfo', {}).get(
+ 'hasNextPage', False
)
if is_cur_owned or is_cur_contrib:
- next_owned = owned_repos.get("pageInfo", {}).get(
- "endCursor", next_owned
+ next_owned = owned_repos.get('pageInfo', {}).get(
+ 'endCursor', next_owned
)
- next_contrib = contrib_repos.get("pageInfo", {}).get(
- "endCursor", next_contrib
+ next_contrib = contrib_repos.get('pageInfo', {}).get(
+ 'endCursor', next_contrib
)
else:
break
@@ -215,9 +223,9 @@ async def get_stats(self) -> None:
self._excluded_languages.add(lang_name)
# TODO: Improve languages to scale by number of contributions to specific filetypes
- langs_total: int = sum([v.get("size", 0) for v in self._languages.values()])
+ langs_total: int = sum([v.get('size', 0) for v in self._languages.values()])
for k, v in self._languages.items():
- v["prop"]: float = 100 * (v.get("size", 0) / langs_total)
+ v['prop']: float = 100 * (v.get('size', 0) / langs_total)
def __exclude_repo_langs(
self,
@@ -244,20 +252,20 @@ async def repo_stats(self, repos: list[dict]) -> None:
if not repo or await self.is_repo_type_excluded(repo_data=repo):
continue
- repo_name: str = repo.get("nameWithOwner")
- if await self.is_repo_name_invalid(repo_name):
+ repo_name: str = repo.get('nameWithOwner')
+ if await self.is_repo_name_invalid(repo_name=repo_name):
continue
self._repos.add(repo_name)
- self._stargazers += repo.get("stargazers").get("totalCount", 0)
- self._forks += repo.get("forkCount", 0)
+ self._stargazers += repo.get('stargazers').get('totalCount', 0)
+ self._forks += repo.get('forkCount', 0)
- if repo.get("isEmpty"):
+ if repo.get('isEmpty'):
self._empty_repos.add(repo_name)
continue
- for lang in repo.get("languages", {}).get("edges", []):
- lang_name: str = lang.get("node", {}).get("name", "Other")
+ for lang in repo.get('languages', {}).get('edges', []):
+ lang_name: str = lang.get('node', {}).get('name', 'Other')
languages: dict[str, dict[str, float | str]] = await self.languages
if self.__exclude_repo_langs(
@@ -270,13 +278,13 @@ async def repo_stats(self, repos: list[dict]) -> None:
continue
if lang_name in languages:
- languages[lang_name]["size"] += lang.get("size", 0)
- languages[lang_name]["occurrences"] += 1
+ languages[lang_name]['size'] += lang.get('size', 0)
+ languages[lang_name]['occurrences'] += 1
else:
languages[lang_name] = {
- "size": lang.get("size", 0),
- "occurrences": 1,
- "color": lang.get("node", {}).get("color"),
+ 'size': lang.get('size', 0),
+ 'occurrences': 1,
+ 'color': lang.get('node', {}).get('color'),
}
async def manually_added_repo_stats(self) -> None:
@@ -291,21 +299,21 @@ async def manually_added_repo_stats(self) -> None:
self._repos.add(repo_name)
repo_stats: dict[str, str | int | dict] = await self.queries.query_rest(
- path=f"/repos/{repo_name}"
+ path=f'/repos/{repo_name}'
)
if await self.is_repo_type_excluded(repo_data=repo_stats):
continue
- self._stargazers += repo_stats.get("stargazers_count", 0)
- self._forks += repo_stats.get("forks", 0)
+ self._stargazers += repo_stats.get('stargazers_count', 0)
+ self._forks += repo_stats.get('forks', 0)
- if repo_stats.get("size") == 0:
+ if repo_stats.get('size') == 0:
self._empty_repos.add(repo_name)
continue
- if repo_stats.get("language"):
+ if repo_stats.get('language'):
langs: dict[str, int] = await self.queries.query_rest(
- path=f"/repos/{repo_name}/languages"
+ path=f'/repos/{repo_name}/languages'
)
for lang_name, size in langs.items():
@@ -321,13 +329,13 @@ async def manually_added_repo_stats(self) -> None:
continue
if lang_name in languages:
- languages[lang_name]["size"] += size
- languages[lang_name]["occurrences"] += 1
+ languages[lang_name]['size'] += size
+ languages[lang_name]['occurrences'] += 1
else:
languages[lang_name] = {
- "size": size,
- "occurrences": 1,
- "color": lang_cols.get(lang_name).get("color"),
+ 'size': size,
+ 'occurrences': 1,
+ 'color': lang_cols.get(lang_name).get('color'),
}
@property
@@ -393,7 +401,7 @@ async def languages_proportional(self) -> dict[str, float]:
if self._languages is None:
await self.get_stats()
assert self._languages is not None
- return {k: v.get("prop", 0) for (k, v) in self._languages.items()}
+ return {k: v.get('prop', 0) for (k, v) in self._languages.items()}
@property
async def repos(self) -> set[str]:
@@ -419,7 +427,7 @@ async def owned_repos(self) -> set[str]:
[
i
for i in self._repos
- if self.environment_vars.username == i.split("/")[0]
+ if self.environment_vars.username == i.split('/')[0]
]
)
return self._owned_repos
@@ -450,10 +458,10 @@ async def total_contributions(self) -> int:
generated_query=GitHubApiQueries.contributions_all_years()
)
)
- .get("data", {})
- .get("viewer", {})
- .get("contributionsCollection", {})
- .get("contributionYears", [])
+ .get('data', {})
+ .get('viewer', {})
+ .get('contributionsCollection', {})
+ .get('contributionYears', [])
)
by_year: list[dict[str, dict[str, int]]] = list(
@@ -462,14 +470,14 @@ async def total_contributions(self) -> int:
generated_query=GitHubApiQueries.all_contributions(years=years)
)
)
- .get("data", {})
- .get("viewer", {})
+ .get('data', {})
+ .get('viewer', {})
.values()
)
for year in by_year:
- self._total_contributions += year.get("contributionCalendar", {}).get(
- "totalContributions", 0
+ self._total_contributions += year.get('contributionCalendar', {}).get(
+ 'totalContributions', 0
)
return cast(typ=int, val=self._total_contributions)
@@ -486,9 +494,13 @@ async def lines_changed(self) -> tuple[int, int]:
return self._users_lines_changed
_, collab_repos = await self.raw_collaborators()
slave_status_repos: set[str] = self.environment_vars.more_collab_repos
+ slave_status_repo_owners: set[str] = self.environment_vars.more_collab_repo_owners
exclusive_collab_repos: set[str] = (
self.environment_vars.only_included_collab_repos
)
+ exclusive_collab_repo_owners: set[str] = (
+ self.environment_vars.only_included_collab_repo_owners
+ )
contributor_set: set[str] = set()
repo_total_changes_arr: list[int] = []
@@ -511,16 +523,16 @@ async def lines_changed(self) -> tuple[int, int]:
author_deletions: int = 0
r: list[dict[str, any]] = await self.queries.query_rest(
- path=f"/repos/{repo}/stats/contributors"
+ path=f'/repos/{repo}/stats/contributors'
)
for author_obj in r:
# Handle malformed response from API by skipping this repo
if not isinstance(author_obj, dict) or not isinstance(
- author_obj.get("author", {}), dict
+ author_obj.get('author', {}), dict
):
continue
- author: str = author_obj.get("author", {}).get("login", "")
+ author: str = author_obj.get('author', {}).get('login', '')
contributor_set.add(
author
) # for count number of total other contributors
@@ -529,14 +541,14 @@ async def lines_changed(self) -> tuple[int, int]:
author != self.environment_vars.username
and author not in self._EXCLUDED_USER_NAMES
):
- for week in author_obj.get("weeks", []):
- other_authors_total_changes += week.get("a", 0)
- other_authors_total_changes += week.get("d", 0)
+ for week in author_obj.get('weeks', []):
+ other_authors_total_changes += week.get('a', 0)
+ other_authors_total_changes += week.get('d', 0)
repo_contributors.add(author)
else:
- for week in author_obj.get("weeks", []):
- author_additions += week.get("a", 0)
- author_deletions += week.get("d", 0)
+ for week in author_obj.get('weeks', []):
+ author_additions += week.get('a', 0)
+ author_deletions += week.get('d', 0)
author_total_additions += author_additions
author_total_deletions += author_deletions
@@ -547,10 +559,13 @@ async def lines_changed(self) -> tuple[int, int]:
# calculate average author's contributions to each repository with at least one other collaborator
if (
repo not in self.environment_vars.exclude_collab_repos
+ and repo.split('/')[0] not in self.environment_vars.exclude_collab_repo_owners
and (
- not exclusive_collab_repos
+ not (exclusive_collab_repos or exclusive_collab_repo_owners)
or repo in exclusive_collab_repos
+ or repo.split('/')[0] in exclusive_collab_repo_owners
or repo in slave_status_repos
+ or repo.split('/')[0] in slave_status_repo_owners
)
and (author_additions + author_deletions) > 0
and (
@@ -559,6 +574,7 @@ async def lines_changed(self) -> tuple[int, int]:
in collab_repos.union(
slave_status_repos
) # either collaborators are ghosting or no show in repo
+ or repo.split('/')[0] in slave_status_repo_owners
)
):
repo_total_changes: int = (
@@ -582,13 +598,13 @@ async def lines_changed(self) -> tuple[int, int]:
if sum(author_contribution_percentages) > 0:
self._avg_percent: str = (
- f"{(sum(author_contribution_percentages) / len(repo_total_changes_arr) * 100):0.2f}%"
+ f'{(sum(author_contribution_percentages) / len(repo_total_changes_arr) * 100):0.2f}%'
)
self._avg_percent_weighted: str = (
- f"{(sum(author_contribution_percentages_weighted) / len(repo_total_changes_arr) * 100):0.2f}%"
+ f'{(sum(author_contribution_percentages_weighted) / len(repo_total_changes_arr) * 100):0.2f}%'
)
else:
- self._avg_percent_weighted = self._avg_percent = "N/A"
+ self._avg_percent_weighted = self._avg_percent = 'N/A'
self._contributors: set[str] = contributor_set
@@ -640,23 +656,23 @@ async def views(self) -> int:
today_view_count: int = 0
for repo in await self.repos:
r: dict[str, str | list[dict[str, str]]] = await self.queries.query_rest(
- path=f"/repos/{repo}/traffic/views"
+ path=f'/repos/{repo}/traffic/views'
)
- for view in r.get("views", []):
- if view.get("timestamp")[:10] == today:
- today_view_count += view.get("count", 0)
- elif view.get("timestamp")[:10] > last_viewed:
- self.environment_vars.set_views(views=view.get("count", 0))
- dates.add(view.get("timestamp")[:10])
+ for view in r.get('views', []):
+ if view.get('timestamp')[:10] == today:
+ today_view_count += view.get('count', 0)
+ elif view.get('timestamp')[:10] > last_viewed:
+ self.environment_vars.set_views(views=view.get('count', 0))
+ dates.add(view.get('timestamp')[:10])
- if last_viewed == "0000-00-00":
+ if last_viewed == '0000-00-00':
dates.remove(last_viewed)
if self.environment_vars.is_store_repo_view_count:
self.environment_vars.set_last_viewed(new_last_viewed_date=yesterday)
- if self.environment_vars.repo_first_viewed == "0000-00-00":
+ if self.environment_vars.repo_first_viewed == '0000-00-00':
self.environment_vars.repo_first_viewed = min(dates)
self.environment_vars.set_first_viewed(
new_first_viewed_date=self.environment_vars.repo_first_viewed
@@ -688,14 +704,14 @@ async def raw_collaborators(self) -> tuple[set[str], set[str]]:
for repo in await self.repos:
r: list[dict[str, any]] = await self.queries.query_rest(
- path=f"/repos/{repo}/collaborators"
+ path=f'/repos/{repo}/collaborators'
)
collab_count: int = 0
for obj in r:
if isinstance(obj, dict):
collab_count += 1
- self._collaborator_set.add(obj.get("login"))
+ self._collaborator_set.add(obj.get('login'))
if collab_count > 1:
self._collab_repos.add(repo)
@@ -743,14 +759,14 @@ async def pull_requests(self) -> int:
if not self._is_fetch_rate_limit_exceeded:
for repo in await self.repos:
end_point: str = (
- f"/repos/{repo}/pulls?state=all&involved={self.environment_vars.username}"
+ f'/repos/{repo}/pulls?state=all&involved={self.environment_vars.username}'
)
for pr_data in await self.queries.query_rest(path=end_point):
try:
(
- pull_requests.add(pr_data["url"])
- if "url" in pr_data.keys()
+ pull_requests.add(pr_data['url'])
+ if 'url' in pr_data.keys()
else None
)
except AttributeError:
@@ -781,14 +797,14 @@ async def issues(self) -> int:
if not self._is_fetch_rate_limit_exceeded:
for repo in await self.repos:
end_point: str = (
- f"/repos/{repo}/issues?state=all&involved={self.environment_vars.username}"
+ f'/repos/{repo}/issues?state=all&involved={self.environment_vars.username}'
)
for issue_data in await self.queries.query_rest(path=end_point):
try:
(
- issues.add(issue_data["url"])
- if "url" in issue_data.keys()
+ issues.add(issue_data['url'])
+ if 'url' in issue_data.keys()
else None
)
except AttributeError:
diff --git a/test/git_stats_test.py b/test/git_stats_test.py
index 7afbcca..5a5cc8a 100644
--- a/test/git_stats_test.py
+++ b/test/git_stats_test.py
@@ -16,7 +16,8 @@
GITHUB_ACTOR: str = getenv("GITHUB_ACTOR") # or manually enter ''
# OPTIONAL
-EXCLUDED_REPOS: str = getenv("EXCLUDED") # or enter: '[owner/repo],...,[owner/repo]'
+EXCLUDED_REPOS: str = getenv("EXCLUDED_REPOS") # or enter: '[owner/repo],...,[owner/repo]'
+EXCLUDED_OWNERS: str = getenv("EXCLUDED_OWNERS") # or enter: '[owner],...,[owner]'
EXCLUDED_LANGS: str = getenv("EXCLUDED_LANGS") # or enter: '[lang],...,[lang]'
EXCLUDED_REPO_LANGS: str = getenv(
"EXCLUDED_REPO_LANGS"
@@ -32,14 +33,21 @@
IS_MAINTAIN_REPO_VIEWS: str = getenv("IS_STORE_REPO_VIEWS") # or enter: ''
MORE_COLLABS: str = getenv("MORE_COLLABS") # or enter: ''
MORE_REPOS: str = getenv("MORE_REPOS") # or enter: '[owner/repo],...,[owner/repo]'
-ONLY_INCLUDED: str = getenv("ONLY_INCLUDED") # or enter: '[owner/repo],...'
+ONLY_INCLUDED_REPOS: str = getenv("ONLY_INCLUDED_REPOS") # or enter: '[owner/repo],...,[owner/repo]'
+ONLY_INCLUDED_OWNERS: str = "University-Project-Repos" # getenv("ONLY_INCLUDED_OWNERS") # or enter: [owner],...,[owner]
ONLY_INCLUDED_COLLAB_REPOS: str = getenv(
"ONLY_INCLUDED_COLLAB_REPOS"
-) # or enter: '[owner/repo],...'
+) # or enter: [owner/repo],...,[owner/repo]
+ONLY_INCLUDED_COLLAB_REPO_OWNERS: str = getenv(
+ "ONLY_INCLUDED_COLLAB_REPO_OWNERS"
+) # or enter: [owner],...,[owner]
EXCLUDED_COLLAB_REPOS: str = getenv(
"EXCLUDED_COLLAB_REPOS"
-) # or enter: '[owner/repo],...'
-MORE_COLLAB_REPOS: str = getenv("MORE_COLLAB_REPOS") # or enter: '[owner/repo],...'
+) # or enter: [owner/repo],...,[owner/repo]
+EXCLUDED_COLLAB_REPO_OWNERS: str = getenv(
+ "EXCLUDED_COLLAB_REPO_OWNERS"
+) # or enter: [owner],...,[owner]
+MORE_COLLAB_REPOS: str = getenv("MORE_COLLAB_REPOS") # or enter: [owner/repo],...,[owner/repo]
async def main() -> None:
@@ -57,6 +65,7 @@ async def main() -> None:
username=GITHUB_ACTOR,
access_token=ACCESS_TOKEN,
exclude_repos=EXCLUDED_REPOS,
+ exclude_owners=EXCLUDED_OWNERS,
exclude_langs=EXCLUDED_LANGS,
exclude_repo_langs=EXCLUDED_REPO_LANGS,
is_include_forked_repos=IS_INCLUDE_FORKED_REPOS,
@@ -70,9 +79,12 @@ async def main() -> None:
is_store_repo_view_count=IS_MAINTAIN_REPO_VIEWS,
more_collaborators=MORE_COLLABS,
manually_added_repos=MORE_REPOS,
- only_included_repos=ONLY_INCLUDED,
+ only_included_repos=ONLY_INCLUDED_REPOS,
+ only_included_owners=ONLY_INCLUDED_OWNERS,
only_included_collab_repos=ONLY_INCLUDED_COLLAB_REPOS,
+ only_included_collab_repo_owners=ONLY_INCLUDED_COLLAB_REPO_OWNERS,
exclude_collab_repos=EXCLUDED_COLLAB_REPOS,
+ exclude_collab_repo_owners=EXCLUDED_COLLAB_REPO_OWNERS,
more_collab_repos=MORE_COLLAB_REPOS,
),
session=session,
From d1d5414861dc9a6f18f533f5521dc4338f0b67da Mon Sep 17 00:00:00 2001
From: Adam Ross <14985050+R055A@users.noreply.github.com>
Date: Sat, 8 Mar 2025 20:56:20 +0100
Subject: [PATCH 2/3] Fix convention
---
src/env_vars.py | 15 ++-
src/github_api_queries.py | 26 ++---
src/github_repo_stats.py | 237 ++++++++++++++++++++------------------
test/git_stats_test.py | 16 ++-
4 files changed, 163 insertions(+), 131 deletions(-)
diff --git a/src/env_vars.py b/src/env_vars.py
index a5bf4b8..70cbb86 100644
--- a/src/env_vars.py
+++ b/src/env_vars.py
@@ -42,9 +42,11 @@ def __init__(
"ONLY_INCLUDED_COLLAB_REPO_OWNERS"
),
exclude_collab_repos: Optional[str] = getenv("EXCLUDED_COLLAB_REPOS"),
- exclude_collab_repo_owners: Optional[str] = getenv("EXCLUDED_COLLAB_REPO_OWNERS"),
+ exclude_collab_repo_owners: Optional[str] = getenv(
+ "EXCLUDED_COLLAB_REPO_OWNERS"
+ ),
more_collab_repos: Optional[str] = getenv("MORE_COLLAB_REPOS"),
- more_collab_repo_owners: Optional[str] = getenv("MORE_COLLAB_REPO_OWNERS")
+ more_collab_repo_owners: Optional[str] = getenv("MORE_COLLAB_REPO_OWNERS"),
) -> None:
self.__db: GitRepoStatsDB = GitRepoStatsDB()
@@ -180,7 +182,10 @@ def __init__(
x.strip() for x in only_included_collab_repos.split(",")
}
- if only_included_collab_repo_owners is None or only_included_collab_repo_owners == "":
+ if (
+ only_included_collab_repo_owners is None
+ or only_included_collab_repo_owners == ""
+ ):
self.only_included_collab_repo_owners: set[str] = set()
else:
self.only_included_collab_repo_owners = {
@@ -209,7 +214,9 @@ def __init__(
if more_collab_repo_owners is None:
self.more_collab_repo_owners: set[str] = set()
else:
- self.more_collab_repo_owners = {x.strip() for x in more_collab_repo_owners.split(",")}
+ self.more_collab_repo_owners = {
+ x.strip() for x in more_collab_repo_owners.split(",")
+ }
self.pull_requests_count: int = self.__db.pull_requests
self.issues_count: int = self.__db.issues
diff --git a/src/github_api_queries.py b/src/github_api_queries.py
index b22a274..0901552 100644
--- a/src/github_api_queries.py
+++ b/src/github_api_queries.py
@@ -19,8 +19,8 @@ class GitHubApiQueries(object):
API. Also includes functions to dynamically generate GraphQL queries.
"""
- __GITHUB_API_URL: str = 'https://api.github.com/'
- __GRAPHQL_PATH: str = 'graphql'
+ __GITHUB_API_URL: str = "https://api.github.com/"
+ __GRAPHQL_PATH: str = "graphql"
__REST_QUERY_LIMIT: int = 60
__ASYNCIO_SLEEP_TIME: int = 2
__DEFAULT_MAX_CONNECTIONS: int = 10
@@ -37,7 +37,7 @@ def __init__(
self.session: ClientSession = session
self.semaphore: Semaphore = Semaphore(max_connections)
self.headers: dict[str, str] = {
- 'Authorization': f'Bearer {self.access_token}',
+ "Authorization": f"Bearer {self.access_token}",
}
async def query(self, generated_query: str) -> dict[str, dict]:
@@ -52,21 +52,21 @@ async def query(self, generated_query: str) -> dict[str, dict]:
r_async = await self.session.post(
url=self.__GITHUB_API_URL + self.__GRAPHQL_PATH,
headers=self.headers,
- json={'query': generated_query},
+ json={"query": generated_query},
)
result: dict[str, dict] = await r_async.json()
if result is not None:
return result
except ConnectionError:
- print('aiohttp failed for GraphQL query')
+ print("aiohttp failed for GraphQL query")
# Fall back on non-async requests
async with self.semaphore:
r_requests = post(
url=self.__GITHUB_API_URL + self.__GRAPHQL_PATH,
headers=self.headers,
- json={'query': generated_query},
+ json={"query": generated_query},
)
result = r_requests.json()
@@ -86,7 +86,7 @@ async def query_rest(
for i in range(self.__REST_QUERY_LIMIT):
if params is None:
params = dict()
- if path.startswith('/'):
+ if path.startswith("/"):
path = path[1:]
try:
@@ -98,7 +98,7 @@ async def query_rest(
)
if r_async.status == HTTPStatus.ACCEPTED.value:
- print(f'A path returned {HTTPStatus.ACCEPTED.value}. Retrying...')
+ print(f"A path returned {HTTPStatus.ACCEPTED.value}. Retrying...")
await sleep(self.__ASYNCIO_SLEEP_TIME)
continue
@@ -107,7 +107,7 @@ async def query_rest(
if result is not None:
return result
except ConnectionError:
- print('aiohttp failed for REST query attempt #' + str(i + 1))
+ print("aiohttp failed for REST query attempt #" + str(i + 1))
# Fall back on non-async requests
async with self.semaphore:
@@ -119,7 +119,7 @@ async def query_rest(
if r_requests.status_code == HTTPStatus.ACCEPTED.value:
print(
- f'A path returned {HTTPStatus.ACCEPTED.value}. Retrying...'
+ f"A path returned {HTTPStatus.ACCEPTED.value}. Retrying..."
)
await sleep(self.__ASYNCIO_SLEEP_TIME)
continue
@@ -127,7 +127,7 @@ async def query_rest(
return r_requests.json()
print(
- f'Too many {HTTPStatus.ACCEPTED.value}s. Data for this repository will be incomplete.'
+ f"Too many {HTTPStatus.ACCEPTED.value}s. Data for this repository will be incomplete."
)
return dict()
@@ -136,7 +136,7 @@ def get_user() -> str:
"""
:return: GraphQL query with user login and name
"""
- return f"""
+ return """
{{
viewer {{
login
@@ -287,6 +287,6 @@ def all_contributions(cls, years: list[str]) -> str:
@staticmethod
def get_language_colors() -> dict[str, dict[str, str]]:
url: models.Response = get(
- 'https://raw.githubusercontent.com/ozh/github-colors/master/colors.json'
+ "https://raw.githubusercontent.com/ozh/github-colors/master/colors.json"
)
return loads(url.text)
diff --git a/src/github_repo_stats.py b/src/github_repo_stats.py
index 1a69a16..f84dde5 100644
--- a/src/github_repo_stats.py
+++ b/src/github_repo_stats.py
@@ -17,11 +17,11 @@ class GitHubRepoStats(object):
Retrieve and store statistics about GitHub usage.
"""
- _DATE_FORMAT: str = '%Y-%m-%d'
+ _DATE_FORMAT: str = "%Y-%m-%d"
_EXCLUDED_USER_NAMES: list[str] = [
- 'dependabot[bot]'
+ "dependabot[bot]"
] # exclude bot data from being included in statistical calculations
- _NO_NAME: str = 'Unknown'
+ _NO_NAME: str = "Unknown"
def __init__(
self, environment_vars: EnvironmentVariables, session: ClientSession
@@ -62,8 +62,8 @@ async def to_str(self) -> str:
:return: summary of all available statistics
"""
languages: dict[str, float] = await self.languages_proportional
- formatted_languages: str = '\n\t\t\t- '.join(
- [f'{k}: {v:0.4f}%' for k, v in languages.items()]
+ formatted_languages: str = "\n\t\t\t- ".join(
+ [f"{k}: {v:0.4f}%" for k, v in languages.items()]
)
users_lines_changed: tuple[int, int] = await self.lines_changed
@@ -101,12 +101,17 @@ async def is_repo_name_invalid(self, repo_name: str) -> bool:
"""
return (
# repo_name in self._repos
- (len(self.environment_vars.only_included_owners) > 0
- and repo_name.split('/')[0] not in self.environment_vars.only_included_owners)
- or (len(self.environment_vars.only_included_repos) > 0
- and repo_name not in self.environment_vars.only_included_repos)
+ (
+ len(self.environment_vars.only_included_owners) > 0
+ and repo_name.split("/")[0]
+ not in self.environment_vars.only_included_owners
+ )
+ or (
+ len(self.environment_vars.only_included_repos) > 0
+ and repo_name not in self.environment_vars.only_included_repos
+ )
or repo_name in self.environment_vars.exclude_repos
- or repo_name.split('/')[0] in self.environment_vars.exclude_owners
+ or repo_name.split("/")[0] in self.environment_vars.exclude_owners
)
async def is_repo_type_excluded(
@@ -123,13 +128,13 @@ async def is_repo_type_excluded(
"""
return (
not self.environment_vars.is_include_forked_repos
- and (repo_data.get('isFork') or repo_data.get('fork'))
+ and (repo_data.get("isFork") or repo_data.get("fork"))
or self.environment_vars.is_exclude_archive_repos
- and (repo_data.get('isArchived') or repo_data.get('archived'))
+ and (repo_data.get("isArchived") or repo_data.get("archived"))
or self.environment_vars.is_exclude_private_repos
- and (repo_data.get('isPrivate') or repo_data.get('private'))
+ and (repo_data.get("isPrivate") or repo_data.get("private"))
or self.environment_vars.is_exclude_public_repos
- and (not repo_data.get('isPrivate') or not repo_data.get('private'))
+ and (not repo_data.get("isPrivate") or not repo_data.get("private"))
)
async def get_stats(self) -> None:
@@ -152,16 +157,22 @@ async def get_stats(self) -> None:
)
user_raw_result = user_raw_result if user_raw_result else {}
if (
- user_raw_result.get('data', {}) is not None
- and user_raw_result.get('data', {}).get('viewer', {}) is not None
- and (
- user_raw_result.get('data', {}).get('viewer', {}).get('name', None) is not None or
- user_raw_result.get('data', {}).get('viewer', {}).get('user', None) is not None
- )
+ user_raw_result.get("data", {}) is not None
+ and user_raw_result.get("data", {}).get("viewer", {}) is not None
+ and (
+ user_raw_result.get("data", {}).get("viewer", {}).get("name", None)
+ is not None
+ or user_raw_result.get("data", {}).get("viewer", {}).get("user", None)
+ is not None
+ )
):
- self._name = user_raw_result.get('data', {}).get('viewer', {}).get('name', self._NO_NAME)
- elif user_raw_result.get('message', '').lower() == 'bad credentials':
- raise ConnectionRefusedError(f'Unauthorized Error: Invalid Access Token')
+ self._name = (
+ user_raw_result.get("data", {})
+ .get("viewer", {})
+ .get("name", self._NO_NAME)
+ )
+ elif user_raw_result.get("message", "").lower() == "bad credentials":
+ raise ConnectionRefusedError("Unauthorized Error: Invalid Access Token")
while True:
repo_overview_raw_results: dict[str, dict] = await self.queries.query(
@@ -169,22 +180,25 @@ async def get_stats(self) -> None:
owned_cursor=next_owned, contrib_cursor=next_contrib
)
)
- repo_overview_raw_results = repo_overview_raw_results if repo_overview_raw_results else {}
+ repo_overview_raw_results = (
+ repo_overview_raw_results if repo_overview_raw_results else {}
+ )
if (
- repo_overview_raw_results.get('data', {}) is not None
- and repo_overview_raw_results.get('data', {}).get('viewer', {}) is not None
+ repo_overview_raw_results.get("data", {}) is not None
+ and repo_overview_raw_results.get("data", {}).get("viewer", {})
+ is not None
):
owned_repos: dict[str, dict | list[dict]] = (
- repo_overview_raw_results.get('data', {})
- .get('viewer', {})
- .get('repositories', {})
+ repo_overview_raw_results.get("data", {})
+ .get("viewer", {})
+ .get("repositories", {})
)
- repos: list[dict] = owned_repos.get('nodes', [])
+ repos: list[dict] = owned_repos.get("nodes", [])
contrib_repos: dict[str, dict | list] = (
- repo_overview_raw_results.get('data', {})
- .get('viewer', {})
- .get('repositoriesContributedTo', {})
+ repo_overview_raw_results.get("data", {})
+ .get("viewer", {})
+ .get("repositoriesContributedTo", {})
)
else:
owned_repos = {}
@@ -192,23 +206,23 @@ async def get_stats(self) -> None:
contrib_repos = {}
if not self.environment_vars.is_exclude_contrib_repos:
- repos += contrib_repos.get('nodes', [])
+ repos += contrib_repos.get("nodes", [])
await self.repo_stats(repos=repos)
- is_cur_owned: bool = owned_repos.get('pageInfo', {}).get(
- 'hasNextPage', False
+ is_cur_owned: bool = owned_repos.get("pageInfo", {}).get(
+ "hasNextPage", False
)
- is_cur_contrib: bool = contrib_repos.get('pageInfo', {}).get(
- 'hasNextPage', False
+ is_cur_contrib: bool = contrib_repos.get("pageInfo", {}).get(
+ "hasNextPage", False
)
if is_cur_owned or is_cur_contrib:
- next_owned = owned_repos.get('pageInfo', {}).get(
- 'endCursor', next_owned
+ next_owned = owned_repos.get("pageInfo", {}).get(
+ "endCursor", next_owned
)
- next_contrib = contrib_repos.get('pageInfo', {}).get(
- 'endCursor', next_contrib
+ next_contrib = contrib_repos.get("pageInfo", {}).get(
+ "endCursor", next_contrib
)
else:
break
@@ -223,9 +237,9 @@ async def get_stats(self) -> None:
self._excluded_languages.add(lang_name)
# TODO: Improve languages to scale by number of contributions to specific filetypes
- langs_total: int = sum([v.get('size', 0) for v in self._languages.values()])
+ langs_total: int = sum([v.get("size", 0) for v in self._languages.values()])
for k, v in self._languages.items():
- v['prop']: float = 100 * (v.get('size', 0) / langs_total)
+ v["prop"]: float = 100 * (v.get("size", 0) / langs_total)
def __exclude_repo_langs(
self,
@@ -252,20 +266,20 @@ async def repo_stats(self, repos: list[dict]) -> None:
if not repo or await self.is_repo_type_excluded(repo_data=repo):
continue
- repo_name: str = repo.get('nameWithOwner')
+ repo_name: str = repo.get("nameWithOwner")
if await self.is_repo_name_invalid(repo_name=repo_name):
continue
self._repos.add(repo_name)
- self._stargazers += repo.get('stargazers').get('totalCount', 0)
- self._forks += repo.get('forkCount', 0)
+ self._stargazers += repo.get("stargazers").get("totalCount", 0)
+ self._forks += repo.get("forkCount", 0)
- if repo.get('isEmpty'):
+ if repo.get("isEmpty"):
self._empty_repos.add(repo_name)
continue
- for lang in repo.get('languages', {}).get('edges', []):
- lang_name: str = lang.get('node', {}).get('name', 'Other')
+ for lang in repo.get("languages", {}).get("edges", []):
+ lang_name: str = lang.get("node", {}).get("name", "Other")
languages: dict[str, dict[str, float | str]] = await self.languages
if self.__exclude_repo_langs(
@@ -278,13 +292,13 @@ async def repo_stats(self, repos: list[dict]) -> None:
continue
if lang_name in languages:
- languages[lang_name]['size'] += lang.get('size', 0)
- languages[lang_name]['occurrences'] += 1
+ languages[lang_name]["size"] += lang.get("size", 0)
+ languages[lang_name]["occurrences"] += 1
else:
languages[lang_name] = {
- 'size': lang.get('size', 0),
- 'occurrences': 1,
- 'color': lang.get('node', {}).get('color'),
+ "size": lang.get("size", 0),
+ "occurrences": 1,
+ "color": lang.get("node", {}).get("color"),
}
async def manually_added_repo_stats(self) -> None:
@@ -299,21 +313,21 @@ async def manually_added_repo_stats(self) -> None:
self._repos.add(repo_name)
repo_stats: dict[str, str | int | dict] = await self.queries.query_rest(
- path=f'/repos/{repo_name}'
+ path=f"/repos/{repo_name}"
)
if await self.is_repo_type_excluded(repo_data=repo_stats):
continue
- self._stargazers += repo_stats.get('stargazers_count', 0)
- self._forks += repo_stats.get('forks', 0)
+ self._stargazers += repo_stats.get("stargazers_count", 0)
+ self._forks += repo_stats.get("forks", 0)
- if repo_stats.get('size') == 0:
+ if repo_stats.get("size") == 0:
self._empty_repos.add(repo_name)
continue
- if repo_stats.get('language'):
+ if repo_stats.get("language"):
langs: dict[str, int] = await self.queries.query_rest(
- path=f'/repos/{repo_name}/languages'
+ path=f"/repos/{repo_name}/languages"
)
for lang_name, size in langs.items():
@@ -329,13 +343,13 @@ async def manually_added_repo_stats(self) -> None:
continue
if lang_name in languages:
- languages[lang_name]['size'] += size
- languages[lang_name]['occurrences'] += 1
+ languages[lang_name]["size"] += size
+ languages[lang_name]["occurrences"] += 1
else:
languages[lang_name] = {
- 'size': size,
- 'occurrences': 1,
- 'color': lang_cols.get(lang_name).get('color'),
+ "size": size,
+ "occurrences": 1,
+ "color": lang_cols.get(lang_name).get("color"),
}
@property
@@ -401,7 +415,7 @@ async def languages_proportional(self) -> dict[str, float]:
if self._languages is None:
await self.get_stats()
assert self._languages is not None
- return {k: v.get('prop', 0) for (k, v) in self._languages.items()}
+ return {k: v.get("prop", 0) for (k, v) in self._languages.items()}
@property
async def repos(self) -> set[str]:
@@ -427,7 +441,7 @@ async def owned_repos(self) -> set[str]:
[
i
for i in self._repos
- if self.environment_vars.username == i.split('/')[0]
+ if self.environment_vars.username == i.split("/")[0]
]
)
return self._owned_repos
@@ -458,10 +472,10 @@ async def total_contributions(self) -> int:
generated_query=GitHubApiQueries.contributions_all_years()
)
)
- .get('data', {})
- .get('viewer', {})
- .get('contributionsCollection', {})
- .get('contributionYears', [])
+ .get("data", {})
+ .get("viewer", {})
+ .get("contributionsCollection", {})
+ .get("contributionYears", [])
)
by_year: list[dict[str, dict[str, int]]] = list(
@@ -470,14 +484,14 @@ async def total_contributions(self) -> int:
generated_query=GitHubApiQueries.all_contributions(years=years)
)
)
- .get('data', {})
- .get('viewer', {})
+ .get("data", {})
+ .get("viewer", {})
.values()
)
for year in by_year:
- self._total_contributions += year.get('contributionCalendar', {}).get(
- 'totalContributions', 0
+ self._total_contributions += year.get("contributionCalendar", {}).get(
+ "totalContributions", 0
)
return cast(typ=int, val=self._total_contributions)
@@ -494,7 +508,9 @@ async def lines_changed(self) -> tuple[int, int]:
return self._users_lines_changed
_, collab_repos = await self.raw_collaborators()
slave_status_repos: set[str] = self.environment_vars.more_collab_repos
- slave_status_repo_owners: set[str] = self.environment_vars.more_collab_repo_owners
+ slave_status_repo_owners: set[str] = (
+ self.environment_vars.more_collab_repo_owners
+ )
exclusive_collab_repos: set[str] = (
self.environment_vars.only_included_collab_repos
)
@@ -523,16 +539,16 @@ async def lines_changed(self) -> tuple[int, int]:
author_deletions: int = 0
r: list[dict[str, any]] = await self.queries.query_rest(
- path=f'/repos/{repo}/stats/contributors'
+ path=f"/repos/{repo}/stats/contributors"
)
for author_obj in r:
# Handle malformed response from API by skipping this repo
if not isinstance(author_obj, dict) or not isinstance(
- author_obj.get('author', {}), dict
+ author_obj.get("author", {}), dict
):
continue
- author: str = author_obj.get('author', {}).get('login', '')
+ author: str = author_obj.get("author", {}).get("login", "")
contributor_set.add(
author
) # for count number of total other contributors
@@ -541,14 +557,14 @@ async def lines_changed(self) -> tuple[int, int]:
author != self.environment_vars.username
and author not in self._EXCLUDED_USER_NAMES
):
- for week in author_obj.get('weeks', []):
- other_authors_total_changes += week.get('a', 0)
- other_authors_total_changes += week.get('d', 0)
+ for week in author_obj.get("weeks", []):
+ other_authors_total_changes += week.get("a", 0)
+ other_authors_total_changes += week.get("d", 0)
repo_contributors.add(author)
else:
- for week in author_obj.get('weeks', []):
- author_additions += week.get('a', 0)
- author_deletions += week.get('d', 0)
+ for week in author_obj.get("weeks", []):
+ author_additions += week.get("a", 0)
+ author_deletions += week.get("d", 0)
author_total_additions += author_additions
author_total_deletions += author_deletions
@@ -559,13 +575,14 @@ async def lines_changed(self) -> tuple[int, int]:
# calculate average author's contributions to each repository with at least one other collaborator
if (
repo not in self.environment_vars.exclude_collab_repos
- and repo.split('/')[0] not in self.environment_vars.exclude_collab_repo_owners
+ and repo.split("/")[0]
+ not in self.environment_vars.exclude_collab_repo_owners
and (
not (exclusive_collab_repos or exclusive_collab_repo_owners)
or repo in exclusive_collab_repos
- or repo.split('/')[0] in exclusive_collab_repo_owners
+ or repo.split("/")[0] in exclusive_collab_repo_owners
or repo in slave_status_repos
- or repo.split('/')[0] in slave_status_repo_owners
+ or repo.split("/")[0] in slave_status_repo_owners
)
and (author_additions + author_deletions) > 0
and (
@@ -574,7 +591,7 @@ async def lines_changed(self) -> tuple[int, int]:
in collab_repos.union(
slave_status_repos
) # either collaborators are ghosting or no show in repo
- or repo.split('/')[0] in slave_status_repo_owners
+ or repo.split("/")[0] in slave_status_repo_owners
)
):
repo_total_changes: int = (
@@ -598,13 +615,13 @@ async def lines_changed(self) -> tuple[int, int]:
if sum(author_contribution_percentages) > 0:
self._avg_percent: str = (
- f'{(sum(author_contribution_percentages) / len(repo_total_changes_arr) * 100):0.2f}%'
+ f"{(sum(author_contribution_percentages) / len(repo_total_changes_arr) * 100):0.2f}%"
)
self._avg_percent_weighted: str = (
- f'{(sum(author_contribution_percentages_weighted) / len(repo_total_changes_arr) * 100):0.2f}%'
+ f"{(sum(author_contribution_percentages_weighted) / len(repo_total_changes_arr) * 100):0.2f}%"
)
else:
- self._avg_percent_weighted = self._avg_percent = 'N/A'
+ self._avg_percent_weighted = self._avg_percent = "N/A"
self._contributors: set[str] = contributor_set
@@ -656,23 +673,23 @@ async def views(self) -> int:
today_view_count: int = 0
for repo in await self.repos:
r: dict[str, str | list[dict[str, str]]] = await self.queries.query_rest(
- path=f'/repos/{repo}/traffic/views'
+ path=f"/repos/{repo}/traffic/views"
)
- for view in r.get('views', []):
- if view.get('timestamp')[:10] == today:
- today_view_count += view.get('count', 0)
- elif view.get('timestamp')[:10] > last_viewed:
- self.environment_vars.set_views(views=view.get('count', 0))
- dates.add(view.get('timestamp')[:10])
+ for view in r.get("views", []):
+ if view.get("timestamp")[:10] == today:
+ today_view_count += view.get("count", 0)
+ elif view.get("timestamp")[:10] > last_viewed:
+ self.environment_vars.set_views(views=view.get("count", 0))
+ dates.add(view.get("timestamp")[:10])
- if last_viewed == '0000-00-00':
+ if last_viewed == "0000-00-00":
dates.remove(last_viewed)
if self.environment_vars.is_store_repo_view_count:
self.environment_vars.set_last_viewed(new_last_viewed_date=yesterday)
- if self.environment_vars.repo_first_viewed == '0000-00-00':
+ if self.environment_vars.repo_first_viewed == "0000-00-00":
self.environment_vars.repo_first_viewed = min(dates)
self.environment_vars.set_first_viewed(
new_first_viewed_date=self.environment_vars.repo_first_viewed
@@ -704,14 +721,14 @@ async def raw_collaborators(self) -> tuple[set[str], set[str]]:
for repo in await self.repos:
r: list[dict[str, any]] = await self.queries.query_rest(
- path=f'/repos/{repo}/collaborators'
+ path=f"/repos/{repo}/collaborators"
)
collab_count: int = 0
for obj in r:
if isinstance(obj, dict):
collab_count += 1
- self._collaborator_set.add(obj.get('login'))
+ self._collaborator_set.add(obj.get("login"))
if collab_count > 1:
self._collab_repos.add(repo)
@@ -759,14 +776,14 @@ async def pull_requests(self) -> int:
if not self._is_fetch_rate_limit_exceeded:
for repo in await self.repos:
end_point: str = (
- f'/repos/{repo}/pulls?state=all&involved={self.environment_vars.username}'
+ f"/repos/{repo}/pulls?state=all&involved={self.environment_vars.username}"
)
for pr_data in await self.queries.query_rest(path=end_point):
try:
(
- pull_requests.add(pr_data['url'])
- if 'url' in pr_data.keys()
+ pull_requests.add(pr_data["url"])
+ if "url" in pr_data.keys()
else None
)
except AttributeError:
@@ -797,14 +814,14 @@ async def issues(self) -> int:
if not self._is_fetch_rate_limit_exceeded:
for repo in await self.repos:
end_point: str = (
- f'/repos/{repo}/issues?state=all&involved={self.environment_vars.username}'
+ f"/repos/{repo}/issues?state=all&involved={self.environment_vars.username}"
)
for issue_data in await self.queries.query_rest(path=end_point):
try:
(
- issues.add(issue_data['url'])
- if 'url' in issue_data.keys()
+ issues.add(issue_data["url"])
+ if "url" in issue_data.keys()
else None
)
except AttributeError:
diff --git a/test/git_stats_test.py b/test/git_stats_test.py
index 5a5cc8a..42b3ba5 100644
--- a/test/git_stats_test.py
+++ b/test/git_stats_test.py
@@ -16,7 +16,9 @@
GITHUB_ACTOR: str = getenv("GITHUB_ACTOR") # or manually enter ''
# OPTIONAL
-EXCLUDED_REPOS: str = getenv("EXCLUDED_REPOS") # or enter: '[owner/repo],...,[owner/repo]'
+EXCLUDED_REPOS: str = getenv(
+ "EXCLUDED_REPOS"
+) # or enter: '[owner/repo],...,[owner/repo]'
EXCLUDED_OWNERS: str = getenv("EXCLUDED_OWNERS") # or enter: '[owner],...,[owner]'
EXCLUDED_LANGS: str = getenv("EXCLUDED_LANGS") # or enter: '[lang],...,[lang]'
EXCLUDED_REPO_LANGS: str = getenv(
@@ -33,8 +35,12 @@
IS_MAINTAIN_REPO_VIEWS: str = getenv("IS_STORE_REPO_VIEWS") # or enter: ''
MORE_COLLABS: str = getenv("MORE_COLLABS") # or enter: ''
MORE_REPOS: str = getenv("MORE_REPOS") # or enter: '[owner/repo],...,[owner/repo]'
-ONLY_INCLUDED_REPOS: str = getenv("ONLY_INCLUDED_REPOS") # or enter: '[owner/repo],...,[owner/repo]'
-ONLY_INCLUDED_OWNERS: str = "University-Project-Repos" # getenv("ONLY_INCLUDED_OWNERS") # or enter: [owner],...,[owner]
+ONLY_INCLUDED_REPOS: str = getenv(
+ "ONLY_INCLUDED_REPOS"
+) # or enter: '[owner/repo],...,[owner/repo]'
+ONLY_INCLUDED_OWNERS: str = (
+ "University-Project-Repos" # getenv("ONLY_INCLUDED_OWNERS") # or enter: [owner],...,[owner]
+)
ONLY_INCLUDED_COLLAB_REPOS: str = getenv(
"ONLY_INCLUDED_COLLAB_REPOS"
) # or enter: [owner/repo],...,[owner/repo]
@@ -47,7 +53,9 @@
EXCLUDED_COLLAB_REPO_OWNERS: str = getenv(
"EXCLUDED_COLLAB_REPO_OWNERS"
) # or enter: [owner],...,[owner]
-MORE_COLLAB_REPOS: str = getenv("MORE_COLLAB_REPOS") # or enter: [owner/repo],...,[owner/repo]
+MORE_COLLAB_REPOS: str = getenv(
+ "MORE_COLLAB_REPOS"
+) # or enter: [owner/repo],...,[owner/repo]
async def main() -> None:
From c82de026a95ef2df2c906499117bdc3877b90461 Mon Sep 17 00:00:00 2001
From: Adam Ross <14985050+R055A@users.noreply.github.com>
Date: Sat, 8 Mar 2025 21:29:30 +0100
Subject: [PATCH 3/3] Fix API-fetch name/user, division by zero
---
src/generate_images.py | 2 +-
src/github_api_queries.py | 8 ++++----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/generate_images.py b/src/generate_images.py
index b945355..3b29a41 100644
--- a/src/generate_images.py
+++ b/src/generate_images.py
@@ -192,7 +192,7 @@ async def generate_overview(self) -> None:
else add_unit(num_repos)
)
repos_str: str = (
- f"{repos:,} [{'%g' % round(num_collab_repos / num_repos * 100, 2)}%]"
+ f"{repos:,} [{'%g' % round(num_collab_repos / num_repos * 100, 2) if num_collab_repos > 0 and num_repos > 0 else 0}%]"
)
output = sub(pattern="{{ repos_str }}", repl=repos_str, string=output)
diff --git a/src/github_api_queries.py b/src/github_api_queries.py
index 0901552..d6ae671 100644
--- a/src/github_api_queries.py
+++ b/src/github_api_queries.py
@@ -137,12 +137,12 @@ def get_user() -> str:
:return: GraphQL query with user login and name
"""
return """
- {{
- viewer {{
+ {
+ viewer {
login
name
- }}
- }}"""
+ }
+ }"""
@staticmethod
def repos_overview(