From 329c3d7a319583e81b55c7ffa5fc41187571de44 Mon Sep 17 00:00:00 2001 From: Adam Ross <14985050+R055A@users.noreply.github.com> Date: Sat, 8 Mar 2025 20:53:24 +0100 Subject: [PATCH 1/3] Add optional setting: include/exclude owners --- .github/workflows/auto_update_stat_images.yml | 9 +- .../non_auto_generate_stat_images.yml | 9 +- README.md | 48 +++- src/env_vars.py | 42 +++- src/github_api_queries.py | 38 ++- src/github_repo_stats.py | 232 ++++++++++-------- test/git_stats_test.py | 24 +- 7 files changed, 266 insertions(+), 136 deletions(-) diff --git a/.github/workflows/auto_update_stat_images.yml b/.github/workflows/auto_update_stat_images.yml index c84d369..b33a108 100644 --- a/.github/workflows/auto_update_stat_images.yml +++ b/.github/workflows/auto_update_stat_images.yml @@ -57,7 +57,8 @@ jobs: env: ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - EXCLUDED: ${{ secrets.EXCLUDED }} + EXCLUDED_REPOS: ${{ secrets.EXCLUDED_REPOS }} + EXCLUDED_OWNERS: ${{ secrets.EXCLUDED_OWNERS }} EXCLUDED_LANGS: ${{ secrets.EXCLUDED_LANGS }} EXCLUDED_REPO_LANGS: ${{ secrets.EXCLUDED_REPO_LANGS }} IS_INCLUDE_FORKED_REPOS: ${{ secrets.IS_INCLUDE_FORKED_REPOS }} @@ -71,10 +72,14 @@ jobs: IS_STORE_REPO_VIEWS: ${{ secrets.IS_STORE_REPO_VIEWS }} MORE_COLLABS: ${{ secrets.MORE_COLLABS }} MORE_REPOS: ${{ secrets.MORE_REPOS }} - ONLY_INCLUDED: ${{ secrets.ONLY_INCLUDED }} + ONLY_INCLUDED_REPOS: ${{ secrets.ONLY_INCLUDED_REPOS }} + ONLY_INCLUDED_OWNERS: ${{ secrets.ONLY_INCLUDED_OWNERS }} ONLY_INCLUDED_COLLAB_REPOS: ${{ secrets.ONLY_INCLUDED_COLLAB_REPOS }} + ONLY_INCLUDED_COLLAB_REPO_OWNERS: ${{ secrets.ONLY_INCLUDED_COLLAB_REPO_OWNERS }} EXCLUDED_COLLAB_REPOS: ${{ secrets.EXCLUDED_COLLAB_REPOS }} + EXCLUDED_COLLAB_REPO_OWNERS: ${{ secrets.EXCLUDED_COLLAB_REPO_OWNERS }} MORE_COLLAB_REPOS: ${{ secrets.MORE_COLLAB_REPOS }} + MORE_COLLAB_REPO_OWNERS: ${{ secrets.MORE_COLLAB_REPO_OWNERS }} # Commits all changed files to the repository - name: Commit to the repo diff --git a/.github/workflows/non_auto_generate_stat_images.yml b/.github/workflows/non_auto_generate_stat_images.yml index 4d7dee6..6b3586d 100644 --- a/.github/workflows/non_auto_generate_stat_images.yml +++ b/.github/workflows/non_auto_generate_stat_images.yml @@ -60,7 +60,8 @@ jobs: env: ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - EXCLUDED: ${{ secrets.EXCLUDED }} + EXCLUDED_REPOS: ${{ secrets.EXCLUDED_REPOS }} + EXCLUDED_OWNERS: ${{ secrets.EXCLUDED_OWNERS }} EXCLUDED_LANGS: ${{ secrets.EXCLUDED_LANGS }} EXCLUDED_REPO_LANGS: ${{ secrets.EXCLUDED_REPO_LANGS }} IS_INCLUDE_FORKED_REPOS: ${{ secrets.IS_INCLUDE_FORKED_REPOS }} @@ -74,10 +75,14 @@ jobs: IS_STORE_REPO_VIEWS: ${{ secrets.IS_STORE_REPO_VIEWS }} MORE_COLLABS: ${{ secrets.MORE_COLLABS }} MORE_REPOS: ${{ secrets.MORE_REPOS }} - ONLY_INCLUDED: ${{ secrets.ONLY_INCLUDED }} + ONLY_INCLUDED_REPOS: ${{ secrets.ONLY_INCLUDED_REPOS }} + ONLY_INCLUDED_OWNERS: ${{ secrets.ONLY_INCLUDED_OWNERS }} ONLY_INCLUDED_COLLAB_REPOS: ${{ secrets.ONLY_INCLUDED_COLLAB_REPOS }} + ONLY_INCLUDED_COLLAB_REPO_OWNERS: ${{ secrets.ONLY_INCLUDED_COLLAB_REPO_OWNERS }} EXCLUDED_COLLAB_REPOS: ${{ secrets.EXCLUDED_COLLAB_REPOS }} + EXCLUDED_COLLAB_REPO_OWNERS: ${{ secrets.EXCLUDED_COLLAB_REPO_OWNERS }} MORE_COLLAB_REPOS: ${{ secrets.MORE_COLLAB_REPOS }} + MORE_COLLAB_REPO_OWNERS: ${{ secrets.MORE_COLLAB_REPO_OWNERS }} # Commits all changed files to the repository - name: Commit to the repo diff --git a/README.md b/README.md index 7a716e4..814f2cd 100644 --- a/README.md +++ b/README.md @@ -101,15 +101,23 @@ Generate regularly updated visualizations of user and repository statistics from Click drop-down to view optional repository Secrets for customizing GitHub statistic visualizations -* ### Optional Secret *Name*: `EXCLUDED` - For excluding repositories from being included entirely in the generated statistic visualizations. +* ### Optional Secret *Name*: `EXCLUDED_REPOS` + For excluding repositories from the generated statistic visualizations. **Instructions**: * enter *Value* in the following format (separated by commas): * `[owner/repo],[owner/repo],...,[owner/repo]` * example: * `jstrieb/github-stats,rahul-jha98/github-stats-transparent,idiotWu/stats` -* ### Optional Secret *Name*: `ONLY_INCLUDED` +* ### Optional Secret *Name*: `EXCLUDED_OWNERS` + For excluding repositories associated with (user/organisation) owners from the generated statistic visualizations. + + **Instructions**: + * enter *Value* in the following format (separated by commas): + * `[owner],[owner],...,[owner]` + * example: + * `R055A,University-Project-Repos` +* ### Optional Secret *Name*: `ONLY_INCLUDED_REPOS` For **ONLY** including repositories in the generated statistic visualizations - such as when there are fewer repositories to include than to exclude @@ -118,6 +126,15 @@ Generate regularly updated visualizations of user and repository statistics from * `[owner/repo],[owner/repo],...,[owner/repo]` * example: * `R055A/GitStats,R055A/R055A` +* ### Optional Secret *Name*: `ONLY_INCLUDED_OWNERS` + For **ONLY** including repositories associated with (user/organisation) owners in the generated statistic visualizations + - such as when there are fewer owners to include than to exclude + + **Instructions**: + * enter *Value* in the following format (separated by commas): + * `[owner],[owner],...,[owner]` + * example: + * `R055A,University-Project-Repos` * ### Optional Secret *Name*: `EXCLUDED_LANGS` For excluding undesired languages from being included in the generated statistic visualizations @@ -208,6 +225,15 @@ Generate regularly updated visualizations of user and repository statistics from * `[owner/repo],[owner/repo],...,[owner/repo]` * example: * `R055A/UniversityProject-A,R055A/UniversityProject-B` +* ### Optional Secret *Name*: `ONLY_INCLUDED_COLLAB_REPO_OWNERS` + For **ONLY** including collaborative repositories associated with owner(s) in the generated average contribution statistics calculations + - such as when there are fewer collaborative repository owners to include than to exclude + + **Instructions**: + * enter *Value* in the following format (separated by commas): + * `[owner],[owner],...,[owner]` + * example: + * `R055A,University-Project-Repos` * ### Optional Secret *Name*: `EXCLUDED_COLLAB_REPOS` For excluding collaborative repositories from being included in the average contribution statistics calculations - for example, such as for when @@ -221,6 +247,14 @@ Generate regularly updated visualizations of user and repository statistics from * `[owner/repo],[owner/repo],...,[owner/repo]` * example: * `tera_open_source/bit_typo_fix,peer_repo/missing_or_no_git_co_author_credit,dude_collab/email_not_reg_on_github,dog_ate/my_repo,mars/attacks` +* ### Optional Secret *Name*: `EXCLUDED_COLLAB_REPO_OWNERS` + For excluding collaborative repositories associated with owner(s) from being included in the average contribution statistics calculations + + **Instructions**: + * enter *Value* in the following format (separated by commas): + * `[owner],[owner],...,[owner]` + * example: + * `R055A,University-Project-Repos` * ### Optional Secret *Name*: `MORE_COLLAB_REPOS` For including collaborative repositories that are otherwise not included in the average contribution statistics calculations - for example, such as when @@ -232,6 +266,14 @@ Generate regularly updated visualizations of user and repository statistics from * `[owner/repo],[owner/repo],...,[owner/repo]` * example: * `imported_ghosted/large_A+_collab_project,slave_trade/larger_A++_project` +* ### Optional Secret *Name*: `MORE_COLLAB_REPO_OWNERS` + For including collaborative repositories associated with owner(s) that are otherwise not included in the average contribution statistics calculations + + **Instructions**: + * enter *Value* in the following format (separated by commas): + * `[owner],[owner],...,[owner]` + * example: + * `R055A,University-Project-Repos` * ### Optional Secret *Name*: `IS_STORE_REPO_VIEWS` Boolean for storing generated repository view statistic visualization data beyond the 14 day-limit GitHub API allows - `true` by default diff --git a/src/env_vars.py b/src/env_vars.py index 67563be..a5bf4b8 100644 --- a/src/env_vars.py +++ b/src/env_vars.py @@ -18,7 +18,8 @@ def __init__( self, username: str, access_token: str, - exclude_repos: Optional[str] = getenv("EXCLUDED"), + exclude_repos: Optional[str] = getenv("EXCLUDED_REPOS"), + exclude_owners: Optional[str] = getenv("EXCLUDED_OWNERS"), exclude_langs: Optional[str] = getenv("EXCLUDED_LANGS"), exclude_repo_langs: Optional[str] = getenv("EXCLUDED_REPO_LANGS"), is_include_forked_repos: str = getenv("IS_INCLUDE_FORKED_REPOS"), @@ -32,12 +33,18 @@ def __init__( is_store_repo_view_count: str = getenv("IS_STORE_REPO_VIEWS"), more_collaborators: Optional[str] = getenv("MORE_COLLABS"), manually_added_repos: Optional[str] = getenv("MORE_REPOS"), - only_included_repos: Optional[str] = getenv("ONLY_INCLUDED"), + only_included_repos: Optional[str] = getenv("ONLY_INCLUDED_REPOS"), + only_included_owners: Optional[str] = getenv("ONLY_INCLUDED_OWNERS"), only_included_collab_repos: Optional[str] = getenv( "ONLY_INCLUDED_COLLAB_REPOS" ), + only_included_collab_repo_owners: Optional[str] = getenv( + "ONLY_INCLUDED_COLLAB_REPO_OWNERS" + ), exclude_collab_repos: Optional[str] = getenv("EXCLUDED_COLLAB_REPOS"), + exclude_collab_repo_owners: Optional[str] = getenv("EXCLUDED_COLLAB_REPO_OWNERS"), more_collab_repos: Optional[str] = getenv("MORE_COLLAB_REPOS"), + more_collab_repo_owners: Optional[str] = getenv("MORE_COLLAB_REPO_OWNERS") ) -> None: self.__db: GitRepoStatsDB = GitRepoStatsDB() @@ -49,6 +56,11 @@ def __init__( else: self.exclude_repos = {x.strip() for x in exclude_repos.split(",")} + if exclude_owners is None: + self.exclude_owners: set[str] = set() + else: + self.exclude_owners = {x.strip() for x in exclude_owners.split(",")} + if exclude_langs is None: self.exclude_langs: set[str] = set() else: @@ -154,6 +166,13 @@ def __init__( x.strip() for x in only_included_repos.split(",") } + if only_included_owners is None or only_included_owners == "": + self.only_included_owners: set[str] = set() + else: + self.only_included_owners = { + x.strip() for x in only_included_owners.split(",") + } + if only_included_collab_repos is None or only_included_collab_repos == "": self.only_included_collab_repos: set[str] = set() else: @@ -161,6 +180,13 @@ def __init__( x.strip() for x in only_included_collab_repos.split(",") } + if only_included_collab_repo_owners is None or only_included_collab_repo_owners == "": + self.only_included_collab_repo_owners: set[str] = set() + else: + self.only_included_collab_repo_owners = { + x.strip() for x in only_included_collab_repo_owners.split(",") + } + if exclude_collab_repos is None: self.exclude_collab_repos: set[str] = set() else: @@ -168,11 +194,23 @@ def __init__( x.strip() for x in exclude_collab_repos.split(",") } + if exclude_collab_repo_owners is None: + self.exclude_collab_repo_owners: set[str] = set() + else: + self.exclude_collab_repo_owners = { + x.strip() for x in exclude_collab_repo_owners.split(",") + } + if more_collab_repos is None: self.more_collab_repos: set[str] = set() else: self.more_collab_repos = {x.strip() for x in more_collab_repos.split(",")} + if more_collab_repo_owners is None: + self.more_collab_repo_owners: set[str] = set() + else: + self.more_collab_repo_owners = {x.strip() for x in more_collab_repo_owners.split(",")} + self.pull_requests_count: int = self.__db.pull_requests self.issues_count: int = self.__db.issues diff --git a/src/github_api_queries.py b/src/github_api_queries.py index 1ebe058..b22a274 100644 --- a/src/github_api_queries.py +++ b/src/github_api_queries.py @@ -19,8 +19,8 @@ class GitHubApiQueries(object): API. Also includes functions to dynamically generate GraphQL queries. """ - __GITHUB_API_URL: str = "https://api.github.com/" - __GRAPHQL_PATH: str = "graphql" + __GITHUB_API_URL: str = 'https://api.github.com/' + __GRAPHQL_PATH: str = 'graphql' __REST_QUERY_LIMIT: int = 60 __ASYNCIO_SLEEP_TIME: int = 2 __DEFAULT_MAX_CONNECTIONS: int = 10 @@ -37,7 +37,7 @@ def __init__( self.session: ClientSession = session self.semaphore: Semaphore = Semaphore(max_connections) self.headers: dict[str, str] = { - "Authorization": f"Bearer {self.access_token}", + 'Authorization': f'Bearer {self.access_token}', } async def query(self, generated_query: str) -> dict[str, dict]: @@ -52,21 +52,21 @@ async def query(self, generated_query: str) -> dict[str, dict]: r_async = await self.session.post( url=self.__GITHUB_API_URL + self.__GRAPHQL_PATH, headers=self.headers, - json={"query": generated_query}, + json={'query': generated_query}, ) result: dict[str, dict] = await r_async.json() if result is not None: return result except ConnectionError: - print("aiohttp failed for GraphQL query") + print('aiohttp failed for GraphQL query') # Fall back on non-async requests async with self.semaphore: r_requests = post( url=self.__GITHUB_API_URL + self.__GRAPHQL_PATH, headers=self.headers, - json={"query": generated_query}, + json={'query': generated_query}, ) result = r_requests.json() @@ -86,7 +86,7 @@ async def query_rest( for i in range(self.__REST_QUERY_LIMIT): if params is None: params = dict() - if path.startswith("/"): + if path.startswith('/'): path = path[1:] try: @@ -98,7 +98,7 @@ async def query_rest( ) if r_async.status == HTTPStatus.ACCEPTED.value: - print(f"A path returned {HTTPStatus.ACCEPTED.value}. Retrying...") + print(f'A path returned {HTTPStatus.ACCEPTED.value}. Retrying...') await sleep(self.__ASYNCIO_SLEEP_TIME) continue @@ -107,7 +107,7 @@ async def query_rest( if result is not None: return result except ConnectionError: - print("aiohttp failed for REST query attempt #" + str(i + 1)) + print('aiohttp failed for REST query attempt #' + str(i + 1)) # Fall back on non-async requests async with self.semaphore: @@ -119,7 +119,7 @@ async def query_rest( if r_requests.status_code == HTTPStatus.ACCEPTED.value: print( - f"A path returned {HTTPStatus.ACCEPTED.value}. Retrying..." + f'A path returned {HTTPStatus.ACCEPTED.value}. Retrying...' ) await sleep(self.__ASYNCIO_SLEEP_TIME) continue @@ -127,10 +127,23 @@ async def query_rest( return r_requests.json() print( - f"Too many {HTTPStatus.ACCEPTED.value}s. Data for this repository will be incomplete." + f'Too many {HTTPStatus.ACCEPTED.value}s. Data for this repository will be incomplete.' ) return dict() + @staticmethod + def get_user() -> str: + """ + :return: GraphQL query with user login and name + """ + return f""" + {{ + viewer {{ + login + name + }} + }}""" + @staticmethod def repos_overview( contrib_cursor: Optional[str] = None, owned_cursor: Optional[str] = None @@ -142,7 +155,6 @@ def repos_overview( {{ viewer {{ login, - name, repositories( first: 100, orderBy: {{ @@ -275,6 +287,6 @@ def all_contributions(cls, years: list[str]) -> str: @staticmethod def get_language_colors() -> dict[str, dict[str, str]]: url: models.Response = get( - "https://raw.githubusercontent.com/ozh/github-colors/master/colors.json" + 'https://raw.githubusercontent.com/ozh/github-colors/master/colors.json' ) return loads(url.text) diff --git a/src/github_repo_stats.py b/src/github_repo_stats.py index 4d7cf53..1a69a16 100644 --- a/src/github_repo_stats.py +++ b/src/github_repo_stats.py @@ -17,11 +17,11 @@ class GitHubRepoStats(object): Retrieve and store statistics about GitHub usage. """ - _DATE_FORMAT: str = "%Y-%m-%d" + _DATE_FORMAT: str = '%Y-%m-%d' _EXCLUDED_USER_NAMES: list[str] = [ - "dependabot[bot]" + 'dependabot[bot]' ] # exclude bot data from being included in statistical calculations - _NO_NAME: str = "No Name" + _NO_NAME: str = 'Unknown' def __init__( self, environment_vars: EnvironmentVariables, session: ClientSession @@ -62,8 +62,8 @@ async def to_str(self) -> str: :return: summary of all available statistics """ languages: dict[str, float] = await self.languages_proportional - formatted_languages: str = "\n\t\t\t- ".join( - [f"{k}: {v:0.4f}%" for k, v in languages.items()] + formatted_languages: str = '\n\t\t\t- '.join( + [f'{k}: {v:0.4f}%' for k, v in languages.items()] ) users_lines_changed: tuple[int, int] = await self.lines_changed @@ -100,10 +100,13 @@ async def is_repo_name_invalid(self, repo_name: str) -> bool: :return: True if repo is not to be included in self._repos """ return ( - repo_name in self._repos - or len(self.environment_vars.only_included_repos) > 0 - and repo_name not in self.environment_vars.only_included_repos + # repo_name in self._repos + (len(self.environment_vars.only_included_owners) > 0 + and repo_name.split('/')[0] not in self.environment_vars.only_included_owners) + or (len(self.environment_vars.only_included_repos) > 0 + and repo_name not in self.environment_vars.only_included_repos) or repo_name in self.environment_vars.exclude_repos + or repo_name.split('/')[0] in self.environment_vars.exclude_owners ) async def is_repo_type_excluded( @@ -120,13 +123,13 @@ async def is_repo_type_excluded( """ return ( not self.environment_vars.is_include_forked_repos - and (repo_data.get("isFork") or repo_data.get("fork")) + and (repo_data.get('isFork') or repo_data.get('fork')) or self.environment_vars.is_exclude_archive_repos - and (repo_data.get("isArchived") or repo_data.get("archived")) + and (repo_data.get('isArchived') or repo_data.get('archived')) or self.environment_vars.is_exclude_private_repos - and (repo_data.get("isPrivate") or repo_data.get("private")) + and (repo_data.get('isPrivate') or repo_data.get('private')) or self.environment_vars.is_exclude_public_repos - and (not repo_data.get("isPrivate") or not repo_data.get("private")) + and (not repo_data.get('isPrivate') or not repo_data.get('private')) ) async def get_stats(self) -> None: @@ -144,39 +147,44 @@ async def get_stats(self) -> None: next_owned: str | None = None next_contrib: str | None = None + user_raw_result: dict[str, dict] = await self.queries.query( + generated_query=GitHubApiQueries.get_user() + ) + user_raw_result = user_raw_result if user_raw_result else {} + if ( + user_raw_result.get('data', {}) is not None + and user_raw_result.get('data', {}).get('viewer', {}) is not None + and ( + user_raw_result.get('data', {}).get('viewer', {}).get('name', None) is not None or + user_raw_result.get('data', {}).get('viewer', {}).get('user', None) is not None + ) + ): + self._name = user_raw_result.get('data', {}).get('viewer', {}).get('name', self._NO_NAME) + elif user_raw_result.get('message', '').lower() == 'bad credentials': + raise ConnectionRefusedError(f'Unauthorized Error: Invalid Access Token') + while True: - raw_results: dict[str, dict] = await self.queries.query( + repo_overview_raw_results: dict[str, dict] = await self.queries.query( generated_query=GitHubApiQueries.repos_overview( owned_cursor=next_owned, contrib_cursor=next_contrib ) ) - raw_results = raw_results if raw_results else {} + repo_overview_raw_results = repo_overview_raw_results if repo_overview_raw_results else {} if ( - raw_results.get("data", {}) is not None - and raw_results.get("data", {}).get("viewer", {}) is not None + repo_overview_raw_results.get('data', {}) is not None + and repo_overview_raw_results.get('data', {}).get('viewer', {}) is not None ): - if not self._name: - self._name = ( - raw_results.get("data", {}).get("viewer", {}).get("name", None) - ) - if self._name is None: - self._name = ( - raw_results.get("data", {}) - .get("viewer", {}) - .get("login", self._NO_NAME) - ) - owned_repos: dict[str, dict | list[dict]] = ( - raw_results.get("data", {}) - .get("viewer", {}) - .get("repositories", {}) + repo_overview_raw_results.get('data', {}) + .get('viewer', {}) + .get('repositories', {}) ) - repos: list[dict] = owned_repos.get("nodes", []) + repos: list[dict] = owned_repos.get('nodes', []) contrib_repos: dict[str, dict | list] = ( - raw_results.get("data", {}) - .get("viewer", {}) - .get("repositoriesContributedTo", {}) + repo_overview_raw_results.get('data', {}) + .get('viewer', {}) + .get('repositoriesContributedTo', {}) ) else: owned_repos = {} @@ -184,23 +192,23 @@ async def get_stats(self) -> None: contrib_repos = {} if not self.environment_vars.is_exclude_contrib_repos: - repos += contrib_repos.get("nodes", []) + repos += contrib_repos.get('nodes', []) await self.repo_stats(repos=repos) - is_cur_owned: bool = owned_repos.get("pageInfo", {}).get( - "hasNextPage", False + is_cur_owned: bool = owned_repos.get('pageInfo', {}).get( + 'hasNextPage', False ) - is_cur_contrib: bool = contrib_repos.get("pageInfo", {}).get( - "hasNextPage", False + is_cur_contrib: bool = contrib_repos.get('pageInfo', {}).get( + 'hasNextPage', False ) if is_cur_owned or is_cur_contrib: - next_owned = owned_repos.get("pageInfo", {}).get( - "endCursor", next_owned + next_owned = owned_repos.get('pageInfo', {}).get( + 'endCursor', next_owned ) - next_contrib = contrib_repos.get("pageInfo", {}).get( - "endCursor", next_contrib + next_contrib = contrib_repos.get('pageInfo', {}).get( + 'endCursor', next_contrib ) else: break @@ -215,9 +223,9 @@ async def get_stats(self) -> None: self._excluded_languages.add(lang_name) # TODO: Improve languages to scale by number of contributions to specific filetypes - langs_total: int = sum([v.get("size", 0) for v in self._languages.values()]) + langs_total: int = sum([v.get('size', 0) for v in self._languages.values()]) for k, v in self._languages.items(): - v["prop"]: float = 100 * (v.get("size", 0) / langs_total) + v['prop']: float = 100 * (v.get('size', 0) / langs_total) def __exclude_repo_langs( self, @@ -244,20 +252,20 @@ async def repo_stats(self, repos: list[dict]) -> None: if not repo or await self.is_repo_type_excluded(repo_data=repo): continue - repo_name: str = repo.get("nameWithOwner") - if await self.is_repo_name_invalid(repo_name): + repo_name: str = repo.get('nameWithOwner') + if await self.is_repo_name_invalid(repo_name=repo_name): continue self._repos.add(repo_name) - self._stargazers += repo.get("stargazers").get("totalCount", 0) - self._forks += repo.get("forkCount", 0) + self._stargazers += repo.get('stargazers').get('totalCount', 0) + self._forks += repo.get('forkCount', 0) - if repo.get("isEmpty"): + if repo.get('isEmpty'): self._empty_repos.add(repo_name) continue - for lang in repo.get("languages", {}).get("edges", []): - lang_name: str = lang.get("node", {}).get("name", "Other") + for lang in repo.get('languages', {}).get('edges', []): + lang_name: str = lang.get('node', {}).get('name', 'Other') languages: dict[str, dict[str, float | str]] = await self.languages if self.__exclude_repo_langs( @@ -270,13 +278,13 @@ async def repo_stats(self, repos: list[dict]) -> None: continue if lang_name in languages: - languages[lang_name]["size"] += lang.get("size", 0) - languages[lang_name]["occurrences"] += 1 + languages[lang_name]['size'] += lang.get('size', 0) + languages[lang_name]['occurrences'] += 1 else: languages[lang_name] = { - "size": lang.get("size", 0), - "occurrences": 1, - "color": lang.get("node", {}).get("color"), + 'size': lang.get('size', 0), + 'occurrences': 1, + 'color': lang.get('node', {}).get('color'), } async def manually_added_repo_stats(self) -> None: @@ -291,21 +299,21 @@ async def manually_added_repo_stats(self) -> None: self._repos.add(repo_name) repo_stats: dict[str, str | int | dict] = await self.queries.query_rest( - path=f"/repos/{repo_name}" + path=f'/repos/{repo_name}' ) if await self.is_repo_type_excluded(repo_data=repo_stats): continue - self._stargazers += repo_stats.get("stargazers_count", 0) - self._forks += repo_stats.get("forks", 0) + self._stargazers += repo_stats.get('stargazers_count', 0) + self._forks += repo_stats.get('forks', 0) - if repo_stats.get("size") == 0: + if repo_stats.get('size') == 0: self._empty_repos.add(repo_name) continue - if repo_stats.get("language"): + if repo_stats.get('language'): langs: dict[str, int] = await self.queries.query_rest( - path=f"/repos/{repo_name}/languages" + path=f'/repos/{repo_name}/languages' ) for lang_name, size in langs.items(): @@ -321,13 +329,13 @@ async def manually_added_repo_stats(self) -> None: continue if lang_name in languages: - languages[lang_name]["size"] += size - languages[lang_name]["occurrences"] += 1 + languages[lang_name]['size'] += size + languages[lang_name]['occurrences'] += 1 else: languages[lang_name] = { - "size": size, - "occurrences": 1, - "color": lang_cols.get(lang_name).get("color"), + 'size': size, + 'occurrences': 1, + 'color': lang_cols.get(lang_name).get('color'), } @property @@ -393,7 +401,7 @@ async def languages_proportional(self) -> dict[str, float]: if self._languages is None: await self.get_stats() assert self._languages is not None - return {k: v.get("prop", 0) for (k, v) in self._languages.items()} + return {k: v.get('prop', 0) for (k, v) in self._languages.items()} @property async def repos(self) -> set[str]: @@ -419,7 +427,7 @@ async def owned_repos(self) -> set[str]: [ i for i in self._repos - if self.environment_vars.username == i.split("/")[0] + if self.environment_vars.username == i.split('/')[0] ] ) return self._owned_repos @@ -450,10 +458,10 @@ async def total_contributions(self) -> int: generated_query=GitHubApiQueries.contributions_all_years() ) ) - .get("data", {}) - .get("viewer", {}) - .get("contributionsCollection", {}) - .get("contributionYears", []) + .get('data', {}) + .get('viewer', {}) + .get('contributionsCollection', {}) + .get('contributionYears', []) ) by_year: list[dict[str, dict[str, int]]] = list( @@ -462,14 +470,14 @@ async def total_contributions(self) -> int: generated_query=GitHubApiQueries.all_contributions(years=years) ) ) - .get("data", {}) - .get("viewer", {}) + .get('data', {}) + .get('viewer', {}) .values() ) for year in by_year: - self._total_contributions += year.get("contributionCalendar", {}).get( - "totalContributions", 0 + self._total_contributions += year.get('contributionCalendar', {}).get( + 'totalContributions', 0 ) return cast(typ=int, val=self._total_contributions) @@ -486,9 +494,13 @@ async def lines_changed(self) -> tuple[int, int]: return self._users_lines_changed _, collab_repos = await self.raw_collaborators() slave_status_repos: set[str] = self.environment_vars.more_collab_repos + slave_status_repo_owners: set[str] = self.environment_vars.more_collab_repo_owners exclusive_collab_repos: set[str] = ( self.environment_vars.only_included_collab_repos ) + exclusive_collab_repo_owners: set[str] = ( + self.environment_vars.only_included_collab_repo_owners + ) contributor_set: set[str] = set() repo_total_changes_arr: list[int] = [] @@ -511,16 +523,16 @@ async def lines_changed(self) -> tuple[int, int]: author_deletions: int = 0 r: list[dict[str, any]] = await self.queries.query_rest( - path=f"/repos/{repo}/stats/contributors" + path=f'/repos/{repo}/stats/contributors' ) for author_obj in r: # Handle malformed response from API by skipping this repo if not isinstance(author_obj, dict) or not isinstance( - author_obj.get("author", {}), dict + author_obj.get('author', {}), dict ): continue - author: str = author_obj.get("author", {}).get("login", "") + author: str = author_obj.get('author', {}).get('login', '') contributor_set.add( author ) # for count number of total other contributors @@ -529,14 +541,14 @@ async def lines_changed(self) -> tuple[int, int]: author != self.environment_vars.username and author not in self._EXCLUDED_USER_NAMES ): - for week in author_obj.get("weeks", []): - other_authors_total_changes += week.get("a", 0) - other_authors_total_changes += week.get("d", 0) + for week in author_obj.get('weeks', []): + other_authors_total_changes += week.get('a', 0) + other_authors_total_changes += week.get('d', 0) repo_contributors.add(author) else: - for week in author_obj.get("weeks", []): - author_additions += week.get("a", 0) - author_deletions += week.get("d", 0) + for week in author_obj.get('weeks', []): + author_additions += week.get('a', 0) + author_deletions += week.get('d', 0) author_total_additions += author_additions author_total_deletions += author_deletions @@ -547,10 +559,13 @@ async def lines_changed(self) -> tuple[int, int]: # calculate average author's contributions to each repository with at least one other collaborator if ( repo not in self.environment_vars.exclude_collab_repos + and repo.split('/')[0] not in self.environment_vars.exclude_collab_repo_owners and ( - not exclusive_collab_repos + not (exclusive_collab_repos or exclusive_collab_repo_owners) or repo in exclusive_collab_repos + or repo.split('/')[0] in exclusive_collab_repo_owners or repo in slave_status_repos + or repo.split('/')[0] in slave_status_repo_owners ) and (author_additions + author_deletions) > 0 and ( @@ -559,6 +574,7 @@ async def lines_changed(self) -> tuple[int, int]: in collab_repos.union( slave_status_repos ) # either collaborators are ghosting or no show in repo + or repo.split('/')[0] in slave_status_repo_owners ) ): repo_total_changes: int = ( @@ -582,13 +598,13 @@ async def lines_changed(self) -> tuple[int, int]: if sum(author_contribution_percentages) > 0: self._avg_percent: str = ( - f"{(sum(author_contribution_percentages) / len(repo_total_changes_arr) * 100):0.2f}%" + f'{(sum(author_contribution_percentages) / len(repo_total_changes_arr) * 100):0.2f}%' ) self._avg_percent_weighted: str = ( - f"{(sum(author_contribution_percentages_weighted) / len(repo_total_changes_arr) * 100):0.2f}%" + f'{(sum(author_contribution_percentages_weighted) / len(repo_total_changes_arr) * 100):0.2f}%' ) else: - self._avg_percent_weighted = self._avg_percent = "N/A" + self._avg_percent_weighted = self._avg_percent = 'N/A' self._contributors: set[str] = contributor_set @@ -640,23 +656,23 @@ async def views(self) -> int: today_view_count: int = 0 for repo in await self.repos: r: dict[str, str | list[dict[str, str]]] = await self.queries.query_rest( - path=f"/repos/{repo}/traffic/views" + path=f'/repos/{repo}/traffic/views' ) - for view in r.get("views", []): - if view.get("timestamp")[:10] == today: - today_view_count += view.get("count", 0) - elif view.get("timestamp")[:10] > last_viewed: - self.environment_vars.set_views(views=view.get("count", 0)) - dates.add(view.get("timestamp")[:10]) + for view in r.get('views', []): + if view.get('timestamp')[:10] == today: + today_view_count += view.get('count', 0) + elif view.get('timestamp')[:10] > last_viewed: + self.environment_vars.set_views(views=view.get('count', 0)) + dates.add(view.get('timestamp')[:10]) - if last_viewed == "0000-00-00": + if last_viewed == '0000-00-00': dates.remove(last_viewed) if self.environment_vars.is_store_repo_view_count: self.environment_vars.set_last_viewed(new_last_viewed_date=yesterday) - if self.environment_vars.repo_first_viewed == "0000-00-00": + if self.environment_vars.repo_first_viewed == '0000-00-00': self.environment_vars.repo_first_viewed = min(dates) self.environment_vars.set_first_viewed( new_first_viewed_date=self.environment_vars.repo_first_viewed @@ -688,14 +704,14 @@ async def raw_collaborators(self) -> tuple[set[str], set[str]]: for repo in await self.repos: r: list[dict[str, any]] = await self.queries.query_rest( - path=f"/repos/{repo}/collaborators" + path=f'/repos/{repo}/collaborators' ) collab_count: int = 0 for obj in r: if isinstance(obj, dict): collab_count += 1 - self._collaborator_set.add(obj.get("login")) + self._collaborator_set.add(obj.get('login')) if collab_count > 1: self._collab_repos.add(repo) @@ -743,14 +759,14 @@ async def pull_requests(self) -> int: if not self._is_fetch_rate_limit_exceeded: for repo in await self.repos: end_point: str = ( - f"/repos/{repo}/pulls?state=all&involved={self.environment_vars.username}" + f'/repos/{repo}/pulls?state=all&involved={self.environment_vars.username}' ) for pr_data in await self.queries.query_rest(path=end_point): try: ( - pull_requests.add(pr_data["url"]) - if "url" in pr_data.keys() + pull_requests.add(pr_data['url']) + if 'url' in pr_data.keys() else None ) except AttributeError: @@ -781,14 +797,14 @@ async def issues(self) -> int: if not self._is_fetch_rate_limit_exceeded: for repo in await self.repos: end_point: str = ( - f"/repos/{repo}/issues?state=all&involved={self.environment_vars.username}" + f'/repos/{repo}/issues?state=all&involved={self.environment_vars.username}' ) for issue_data in await self.queries.query_rest(path=end_point): try: ( - issues.add(issue_data["url"]) - if "url" in issue_data.keys() + issues.add(issue_data['url']) + if 'url' in issue_data.keys() else None ) except AttributeError: diff --git a/test/git_stats_test.py b/test/git_stats_test.py index 7afbcca..5a5cc8a 100644 --- a/test/git_stats_test.py +++ b/test/git_stats_test.py @@ -16,7 +16,8 @@ GITHUB_ACTOR: str = getenv("GITHUB_ACTOR") # or manually enter '' # OPTIONAL -EXCLUDED_REPOS: str = getenv("EXCLUDED") # or enter: '[owner/repo],...,[owner/repo]' +EXCLUDED_REPOS: str = getenv("EXCLUDED_REPOS") # or enter: '[owner/repo],...,[owner/repo]' +EXCLUDED_OWNERS: str = getenv("EXCLUDED_OWNERS") # or enter: '[owner],...,[owner]' EXCLUDED_LANGS: str = getenv("EXCLUDED_LANGS") # or enter: '[lang],...,[lang]' EXCLUDED_REPO_LANGS: str = getenv( "EXCLUDED_REPO_LANGS" @@ -32,14 +33,21 @@ IS_MAINTAIN_REPO_VIEWS: str = getenv("IS_STORE_REPO_VIEWS") # or enter: '' MORE_COLLABS: str = getenv("MORE_COLLABS") # or enter: '' MORE_REPOS: str = getenv("MORE_REPOS") # or enter: '[owner/repo],...,[owner/repo]' -ONLY_INCLUDED: str = getenv("ONLY_INCLUDED") # or enter: '[owner/repo],...' +ONLY_INCLUDED_REPOS: str = getenv("ONLY_INCLUDED_REPOS") # or enter: '[owner/repo],...,[owner/repo]' +ONLY_INCLUDED_OWNERS: str = "University-Project-Repos" # getenv("ONLY_INCLUDED_OWNERS") # or enter: [owner],...,[owner] ONLY_INCLUDED_COLLAB_REPOS: str = getenv( "ONLY_INCLUDED_COLLAB_REPOS" -) # or enter: '[owner/repo],...' +) # or enter: [owner/repo],...,[owner/repo] +ONLY_INCLUDED_COLLAB_REPO_OWNERS: str = getenv( + "ONLY_INCLUDED_COLLAB_REPO_OWNERS" +) # or enter: [owner],...,[owner] EXCLUDED_COLLAB_REPOS: str = getenv( "EXCLUDED_COLLAB_REPOS" -) # or enter: '[owner/repo],...' -MORE_COLLAB_REPOS: str = getenv("MORE_COLLAB_REPOS") # or enter: '[owner/repo],...' +) # or enter: [owner/repo],...,[owner/repo] +EXCLUDED_COLLAB_REPO_OWNERS: str = getenv( + "EXCLUDED_COLLAB_REPO_OWNERS" +) # or enter: [owner],...,[owner] +MORE_COLLAB_REPOS: str = getenv("MORE_COLLAB_REPOS") # or enter: [owner/repo],...,[owner/repo] async def main() -> None: @@ -57,6 +65,7 @@ async def main() -> None: username=GITHUB_ACTOR, access_token=ACCESS_TOKEN, exclude_repos=EXCLUDED_REPOS, + exclude_owners=EXCLUDED_OWNERS, exclude_langs=EXCLUDED_LANGS, exclude_repo_langs=EXCLUDED_REPO_LANGS, is_include_forked_repos=IS_INCLUDE_FORKED_REPOS, @@ -70,9 +79,12 @@ async def main() -> None: is_store_repo_view_count=IS_MAINTAIN_REPO_VIEWS, more_collaborators=MORE_COLLABS, manually_added_repos=MORE_REPOS, - only_included_repos=ONLY_INCLUDED, + only_included_repos=ONLY_INCLUDED_REPOS, + only_included_owners=ONLY_INCLUDED_OWNERS, only_included_collab_repos=ONLY_INCLUDED_COLLAB_REPOS, + only_included_collab_repo_owners=ONLY_INCLUDED_COLLAB_REPO_OWNERS, exclude_collab_repos=EXCLUDED_COLLAB_REPOS, + exclude_collab_repo_owners=EXCLUDED_COLLAB_REPO_OWNERS, more_collab_repos=MORE_COLLAB_REPOS, ), session=session, From d1d5414861dc9a6f18f533f5521dc4338f0b67da Mon Sep 17 00:00:00 2001 From: Adam Ross <14985050+R055A@users.noreply.github.com> Date: Sat, 8 Mar 2025 20:56:20 +0100 Subject: [PATCH 2/3] Fix convention --- src/env_vars.py | 15 ++- src/github_api_queries.py | 26 ++--- src/github_repo_stats.py | 237 ++++++++++++++++++++------------------ test/git_stats_test.py | 16 ++- 4 files changed, 163 insertions(+), 131 deletions(-) diff --git a/src/env_vars.py b/src/env_vars.py index a5bf4b8..70cbb86 100644 --- a/src/env_vars.py +++ b/src/env_vars.py @@ -42,9 +42,11 @@ def __init__( "ONLY_INCLUDED_COLLAB_REPO_OWNERS" ), exclude_collab_repos: Optional[str] = getenv("EXCLUDED_COLLAB_REPOS"), - exclude_collab_repo_owners: Optional[str] = getenv("EXCLUDED_COLLAB_REPO_OWNERS"), + exclude_collab_repo_owners: Optional[str] = getenv( + "EXCLUDED_COLLAB_REPO_OWNERS" + ), more_collab_repos: Optional[str] = getenv("MORE_COLLAB_REPOS"), - more_collab_repo_owners: Optional[str] = getenv("MORE_COLLAB_REPO_OWNERS") + more_collab_repo_owners: Optional[str] = getenv("MORE_COLLAB_REPO_OWNERS"), ) -> None: self.__db: GitRepoStatsDB = GitRepoStatsDB() @@ -180,7 +182,10 @@ def __init__( x.strip() for x in only_included_collab_repos.split(",") } - if only_included_collab_repo_owners is None or only_included_collab_repo_owners == "": + if ( + only_included_collab_repo_owners is None + or only_included_collab_repo_owners == "" + ): self.only_included_collab_repo_owners: set[str] = set() else: self.only_included_collab_repo_owners = { @@ -209,7 +214,9 @@ def __init__( if more_collab_repo_owners is None: self.more_collab_repo_owners: set[str] = set() else: - self.more_collab_repo_owners = {x.strip() for x in more_collab_repo_owners.split(",")} + self.more_collab_repo_owners = { + x.strip() for x in more_collab_repo_owners.split(",") + } self.pull_requests_count: int = self.__db.pull_requests self.issues_count: int = self.__db.issues diff --git a/src/github_api_queries.py b/src/github_api_queries.py index b22a274..0901552 100644 --- a/src/github_api_queries.py +++ b/src/github_api_queries.py @@ -19,8 +19,8 @@ class GitHubApiQueries(object): API. Also includes functions to dynamically generate GraphQL queries. """ - __GITHUB_API_URL: str = 'https://api.github.com/' - __GRAPHQL_PATH: str = 'graphql' + __GITHUB_API_URL: str = "https://api.github.com/" + __GRAPHQL_PATH: str = "graphql" __REST_QUERY_LIMIT: int = 60 __ASYNCIO_SLEEP_TIME: int = 2 __DEFAULT_MAX_CONNECTIONS: int = 10 @@ -37,7 +37,7 @@ def __init__( self.session: ClientSession = session self.semaphore: Semaphore = Semaphore(max_connections) self.headers: dict[str, str] = { - 'Authorization': f'Bearer {self.access_token}', + "Authorization": f"Bearer {self.access_token}", } async def query(self, generated_query: str) -> dict[str, dict]: @@ -52,21 +52,21 @@ async def query(self, generated_query: str) -> dict[str, dict]: r_async = await self.session.post( url=self.__GITHUB_API_URL + self.__GRAPHQL_PATH, headers=self.headers, - json={'query': generated_query}, + json={"query": generated_query}, ) result: dict[str, dict] = await r_async.json() if result is not None: return result except ConnectionError: - print('aiohttp failed for GraphQL query') + print("aiohttp failed for GraphQL query") # Fall back on non-async requests async with self.semaphore: r_requests = post( url=self.__GITHUB_API_URL + self.__GRAPHQL_PATH, headers=self.headers, - json={'query': generated_query}, + json={"query": generated_query}, ) result = r_requests.json() @@ -86,7 +86,7 @@ async def query_rest( for i in range(self.__REST_QUERY_LIMIT): if params is None: params = dict() - if path.startswith('/'): + if path.startswith("/"): path = path[1:] try: @@ -98,7 +98,7 @@ async def query_rest( ) if r_async.status == HTTPStatus.ACCEPTED.value: - print(f'A path returned {HTTPStatus.ACCEPTED.value}. Retrying...') + print(f"A path returned {HTTPStatus.ACCEPTED.value}. Retrying...") await sleep(self.__ASYNCIO_SLEEP_TIME) continue @@ -107,7 +107,7 @@ async def query_rest( if result is not None: return result except ConnectionError: - print('aiohttp failed for REST query attempt #' + str(i + 1)) + print("aiohttp failed for REST query attempt #" + str(i + 1)) # Fall back on non-async requests async with self.semaphore: @@ -119,7 +119,7 @@ async def query_rest( if r_requests.status_code == HTTPStatus.ACCEPTED.value: print( - f'A path returned {HTTPStatus.ACCEPTED.value}. Retrying...' + f"A path returned {HTTPStatus.ACCEPTED.value}. Retrying..." ) await sleep(self.__ASYNCIO_SLEEP_TIME) continue @@ -127,7 +127,7 @@ async def query_rest( return r_requests.json() print( - f'Too many {HTTPStatus.ACCEPTED.value}s. Data for this repository will be incomplete.' + f"Too many {HTTPStatus.ACCEPTED.value}s. Data for this repository will be incomplete." ) return dict() @@ -136,7 +136,7 @@ def get_user() -> str: """ :return: GraphQL query with user login and name """ - return f""" + return """ {{ viewer {{ login @@ -287,6 +287,6 @@ def all_contributions(cls, years: list[str]) -> str: @staticmethod def get_language_colors() -> dict[str, dict[str, str]]: url: models.Response = get( - 'https://raw.githubusercontent.com/ozh/github-colors/master/colors.json' + "https://raw.githubusercontent.com/ozh/github-colors/master/colors.json" ) return loads(url.text) diff --git a/src/github_repo_stats.py b/src/github_repo_stats.py index 1a69a16..f84dde5 100644 --- a/src/github_repo_stats.py +++ b/src/github_repo_stats.py @@ -17,11 +17,11 @@ class GitHubRepoStats(object): Retrieve and store statistics about GitHub usage. """ - _DATE_FORMAT: str = '%Y-%m-%d' + _DATE_FORMAT: str = "%Y-%m-%d" _EXCLUDED_USER_NAMES: list[str] = [ - 'dependabot[bot]' + "dependabot[bot]" ] # exclude bot data from being included in statistical calculations - _NO_NAME: str = 'Unknown' + _NO_NAME: str = "Unknown" def __init__( self, environment_vars: EnvironmentVariables, session: ClientSession @@ -62,8 +62,8 @@ async def to_str(self) -> str: :return: summary of all available statistics """ languages: dict[str, float] = await self.languages_proportional - formatted_languages: str = '\n\t\t\t- '.join( - [f'{k}: {v:0.4f}%' for k, v in languages.items()] + formatted_languages: str = "\n\t\t\t- ".join( + [f"{k}: {v:0.4f}%" for k, v in languages.items()] ) users_lines_changed: tuple[int, int] = await self.lines_changed @@ -101,12 +101,17 @@ async def is_repo_name_invalid(self, repo_name: str) -> bool: """ return ( # repo_name in self._repos - (len(self.environment_vars.only_included_owners) > 0 - and repo_name.split('/')[0] not in self.environment_vars.only_included_owners) - or (len(self.environment_vars.only_included_repos) > 0 - and repo_name not in self.environment_vars.only_included_repos) + ( + len(self.environment_vars.only_included_owners) > 0 + and repo_name.split("/")[0] + not in self.environment_vars.only_included_owners + ) + or ( + len(self.environment_vars.only_included_repos) > 0 + and repo_name not in self.environment_vars.only_included_repos + ) or repo_name in self.environment_vars.exclude_repos - or repo_name.split('/')[0] in self.environment_vars.exclude_owners + or repo_name.split("/")[0] in self.environment_vars.exclude_owners ) async def is_repo_type_excluded( @@ -123,13 +128,13 @@ async def is_repo_type_excluded( """ return ( not self.environment_vars.is_include_forked_repos - and (repo_data.get('isFork') or repo_data.get('fork')) + and (repo_data.get("isFork") or repo_data.get("fork")) or self.environment_vars.is_exclude_archive_repos - and (repo_data.get('isArchived') or repo_data.get('archived')) + and (repo_data.get("isArchived") or repo_data.get("archived")) or self.environment_vars.is_exclude_private_repos - and (repo_data.get('isPrivate') or repo_data.get('private')) + and (repo_data.get("isPrivate") or repo_data.get("private")) or self.environment_vars.is_exclude_public_repos - and (not repo_data.get('isPrivate') or not repo_data.get('private')) + and (not repo_data.get("isPrivate") or not repo_data.get("private")) ) async def get_stats(self) -> None: @@ -152,16 +157,22 @@ async def get_stats(self) -> None: ) user_raw_result = user_raw_result if user_raw_result else {} if ( - user_raw_result.get('data', {}) is not None - and user_raw_result.get('data', {}).get('viewer', {}) is not None - and ( - user_raw_result.get('data', {}).get('viewer', {}).get('name', None) is not None or - user_raw_result.get('data', {}).get('viewer', {}).get('user', None) is not None - ) + user_raw_result.get("data", {}) is not None + and user_raw_result.get("data", {}).get("viewer", {}) is not None + and ( + user_raw_result.get("data", {}).get("viewer", {}).get("name", None) + is not None + or user_raw_result.get("data", {}).get("viewer", {}).get("user", None) + is not None + ) ): - self._name = user_raw_result.get('data', {}).get('viewer', {}).get('name', self._NO_NAME) - elif user_raw_result.get('message', '').lower() == 'bad credentials': - raise ConnectionRefusedError(f'Unauthorized Error: Invalid Access Token') + self._name = ( + user_raw_result.get("data", {}) + .get("viewer", {}) + .get("name", self._NO_NAME) + ) + elif user_raw_result.get("message", "").lower() == "bad credentials": + raise ConnectionRefusedError("Unauthorized Error: Invalid Access Token") while True: repo_overview_raw_results: dict[str, dict] = await self.queries.query( @@ -169,22 +180,25 @@ async def get_stats(self) -> None: owned_cursor=next_owned, contrib_cursor=next_contrib ) ) - repo_overview_raw_results = repo_overview_raw_results if repo_overview_raw_results else {} + repo_overview_raw_results = ( + repo_overview_raw_results if repo_overview_raw_results else {} + ) if ( - repo_overview_raw_results.get('data', {}) is not None - and repo_overview_raw_results.get('data', {}).get('viewer', {}) is not None + repo_overview_raw_results.get("data", {}) is not None + and repo_overview_raw_results.get("data", {}).get("viewer", {}) + is not None ): owned_repos: dict[str, dict | list[dict]] = ( - repo_overview_raw_results.get('data', {}) - .get('viewer', {}) - .get('repositories', {}) + repo_overview_raw_results.get("data", {}) + .get("viewer", {}) + .get("repositories", {}) ) - repos: list[dict] = owned_repos.get('nodes', []) + repos: list[dict] = owned_repos.get("nodes", []) contrib_repos: dict[str, dict | list] = ( - repo_overview_raw_results.get('data', {}) - .get('viewer', {}) - .get('repositoriesContributedTo', {}) + repo_overview_raw_results.get("data", {}) + .get("viewer", {}) + .get("repositoriesContributedTo", {}) ) else: owned_repos = {} @@ -192,23 +206,23 @@ async def get_stats(self) -> None: contrib_repos = {} if not self.environment_vars.is_exclude_contrib_repos: - repos += contrib_repos.get('nodes', []) + repos += contrib_repos.get("nodes", []) await self.repo_stats(repos=repos) - is_cur_owned: bool = owned_repos.get('pageInfo', {}).get( - 'hasNextPage', False + is_cur_owned: bool = owned_repos.get("pageInfo", {}).get( + "hasNextPage", False ) - is_cur_contrib: bool = contrib_repos.get('pageInfo', {}).get( - 'hasNextPage', False + is_cur_contrib: bool = contrib_repos.get("pageInfo", {}).get( + "hasNextPage", False ) if is_cur_owned or is_cur_contrib: - next_owned = owned_repos.get('pageInfo', {}).get( - 'endCursor', next_owned + next_owned = owned_repos.get("pageInfo", {}).get( + "endCursor", next_owned ) - next_contrib = contrib_repos.get('pageInfo', {}).get( - 'endCursor', next_contrib + next_contrib = contrib_repos.get("pageInfo", {}).get( + "endCursor", next_contrib ) else: break @@ -223,9 +237,9 @@ async def get_stats(self) -> None: self._excluded_languages.add(lang_name) # TODO: Improve languages to scale by number of contributions to specific filetypes - langs_total: int = sum([v.get('size', 0) for v in self._languages.values()]) + langs_total: int = sum([v.get("size", 0) for v in self._languages.values()]) for k, v in self._languages.items(): - v['prop']: float = 100 * (v.get('size', 0) / langs_total) + v["prop"]: float = 100 * (v.get("size", 0) / langs_total) def __exclude_repo_langs( self, @@ -252,20 +266,20 @@ async def repo_stats(self, repos: list[dict]) -> None: if not repo or await self.is_repo_type_excluded(repo_data=repo): continue - repo_name: str = repo.get('nameWithOwner') + repo_name: str = repo.get("nameWithOwner") if await self.is_repo_name_invalid(repo_name=repo_name): continue self._repos.add(repo_name) - self._stargazers += repo.get('stargazers').get('totalCount', 0) - self._forks += repo.get('forkCount', 0) + self._stargazers += repo.get("stargazers").get("totalCount", 0) + self._forks += repo.get("forkCount", 0) - if repo.get('isEmpty'): + if repo.get("isEmpty"): self._empty_repos.add(repo_name) continue - for lang in repo.get('languages', {}).get('edges', []): - lang_name: str = lang.get('node', {}).get('name', 'Other') + for lang in repo.get("languages", {}).get("edges", []): + lang_name: str = lang.get("node", {}).get("name", "Other") languages: dict[str, dict[str, float | str]] = await self.languages if self.__exclude_repo_langs( @@ -278,13 +292,13 @@ async def repo_stats(self, repos: list[dict]) -> None: continue if lang_name in languages: - languages[lang_name]['size'] += lang.get('size', 0) - languages[lang_name]['occurrences'] += 1 + languages[lang_name]["size"] += lang.get("size", 0) + languages[lang_name]["occurrences"] += 1 else: languages[lang_name] = { - 'size': lang.get('size', 0), - 'occurrences': 1, - 'color': lang.get('node', {}).get('color'), + "size": lang.get("size", 0), + "occurrences": 1, + "color": lang.get("node", {}).get("color"), } async def manually_added_repo_stats(self) -> None: @@ -299,21 +313,21 @@ async def manually_added_repo_stats(self) -> None: self._repos.add(repo_name) repo_stats: dict[str, str | int | dict] = await self.queries.query_rest( - path=f'/repos/{repo_name}' + path=f"/repos/{repo_name}" ) if await self.is_repo_type_excluded(repo_data=repo_stats): continue - self._stargazers += repo_stats.get('stargazers_count', 0) - self._forks += repo_stats.get('forks', 0) + self._stargazers += repo_stats.get("stargazers_count", 0) + self._forks += repo_stats.get("forks", 0) - if repo_stats.get('size') == 0: + if repo_stats.get("size") == 0: self._empty_repos.add(repo_name) continue - if repo_stats.get('language'): + if repo_stats.get("language"): langs: dict[str, int] = await self.queries.query_rest( - path=f'/repos/{repo_name}/languages' + path=f"/repos/{repo_name}/languages" ) for lang_name, size in langs.items(): @@ -329,13 +343,13 @@ async def manually_added_repo_stats(self) -> None: continue if lang_name in languages: - languages[lang_name]['size'] += size - languages[lang_name]['occurrences'] += 1 + languages[lang_name]["size"] += size + languages[lang_name]["occurrences"] += 1 else: languages[lang_name] = { - 'size': size, - 'occurrences': 1, - 'color': lang_cols.get(lang_name).get('color'), + "size": size, + "occurrences": 1, + "color": lang_cols.get(lang_name).get("color"), } @property @@ -401,7 +415,7 @@ async def languages_proportional(self) -> dict[str, float]: if self._languages is None: await self.get_stats() assert self._languages is not None - return {k: v.get('prop', 0) for (k, v) in self._languages.items()} + return {k: v.get("prop", 0) for (k, v) in self._languages.items()} @property async def repos(self) -> set[str]: @@ -427,7 +441,7 @@ async def owned_repos(self) -> set[str]: [ i for i in self._repos - if self.environment_vars.username == i.split('/')[0] + if self.environment_vars.username == i.split("/")[0] ] ) return self._owned_repos @@ -458,10 +472,10 @@ async def total_contributions(self) -> int: generated_query=GitHubApiQueries.contributions_all_years() ) ) - .get('data', {}) - .get('viewer', {}) - .get('contributionsCollection', {}) - .get('contributionYears', []) + .get("data", {}) + .get("viewer", {}) + .get("contributionsCollection", {}) + .get("contributionYears", []) ) by_year: list[dict[str, dict[str, int]]] = list( @@ -470,14 +484,14 @@ async def total_contributions(self) -> int: generated_query=GitHubApiQueries.all_contributions(years=years) ) ) - .get('data', {}) - .get('viewer', {}) + .get("data", {}) + .get("viewer", {}) .values() ) for year in by_year: - self._total_contributions += year.get('contributionCalendar', {}).get( - 'totalContributions', 0 + self._total_contributions += year.get("contributionCalendar", {}).get( + "totalContributions", 0 ) return cast(typ=int, val=self._total_contributions) @@ -494,7 +508,9 @@ async def lines_changed(self) -> tuple[int, int]: return self._users_lines_changed _, collab_repos = await self.raw_collaborators() slave_status_repos: set[str] = self.environment_vars.more_collab_repos - slave_status_repo_owners: set[str] = self.environment_vars.more_collab_repo_owners + slave_status_repo_owners: set[str] = ( + self.environment_vars.more_collab_repo_owners + ) exclusive_collab_repos: set[str] = ( self.environment_vars.only_included_collab_repos ) @@ -523,16 +539,16 @@ async def lines_changed(self) -> tuple[int, int]: author_deletions: int = 0 r: list[dict[str, any]] = await self.queries.query_rest( - path=f'/repos/{repo}/stats/contributors' + path=f"/repos/{repo}/stats/contributors" ) for author_obj in r: # Handle malformed response from API by skipping this repo if not isinstance(author_obj, dict) or not isinstance( - author_obj.get('author', {}), dict + author_obj.get("author", {}), dict ): continue - author: str = author_obj.get('author', {}).get('login', '') + author: str = author_obj.get("author", {}).get("login", "") contributor_set.add( author ) # for count number of total other contributors @@ -541,14 +557,14 @@ async def lines_changed(self) -> tuple[int, int]: author != self.environment_vars.username and author not in self._EXCLUDED_USER_NAMES ): - for week in author_obj.get('weeks', []): - other_authors_total_changes += week.get('a', 0) - other_authors_total_changes += week.get('d', 0) + for week in author_obj.get("weeks", []): + other_authors_total_changes += week.get("a", 0) + other_authors_total_changes += week.get("d", 0) repo_contributors.add(author) else: - for week in author_obj.get('weeks', []): - author_additions += week.get('a', 0) - author_deletions += week.get('d', 0) + for week in author_obj.get("weeks", []): + author_additions += week.get("a", 0) + author_deletions += week.get("d", 0) author_total_additions += author_additions author_total_deletions += author_deletions @@ -559,13 +575,14 @@ async def lines_changed(self) -> tuple[int, int]: # calculate average author's contributions to each repository with at least one other collaborator if ( repo not in self.environment_vars.exclude_collab_repos - and repo.split('/')[0] not in self.environment_vars.exclude_collab_repo_owners + and repo.split("/")[0] + not in self.environment_vars.exclude_collab_repo_owners and ( not (exclusive_collab_repos or exclusive_collab_repo_owners) or repo in exclusive_collab_repos - or repo.split('/')[0] in exclusive_collab_repo_owners + or repo.split("/")[0] in exclusive_collab_repo_owners or repo in slave_status_repos - or repo.split('/')[0] in slave_status_repo_owners + or repo.split("/")[0] in slave_status_repo_owners ) and (author_additions + author_deletions) > 0 and ( @@ -574,7 +591,7 @@ async def lines_changed(self) -> tuple[int, int]: in collab_repos.union( slave_status_repos ) # either collaborators are ghosting or no show in repo - or repo.split('/')[0] in slave_status_repo_owners + or repo.split("/")[0] in slave_status_repo_owners ) ): repo_total_changes: int = ( @@ -598,13 +615,13 @@ async def lines_changed(self) -> tuple[int, int]: if sum(author_contribution_percentages) > 0: self._avg_percent: str = ( - f'{(sum(author_contribution_percentages) / len(repo_total_changes_arr) * 100):0.2f}%' + f"{(sum(author_contribution_percentages) / len(repo_total_changes_arr) * 100):0.2f}%" ) self._avg_percent_weighted: str = ( - f'{(sum(author_contribution_percentages_weighted) / len(repo_total_changes_arr) * 100):0.2f}%' + f"{(sum(author_contribution_percentages_weighted) / len(repo_total_changes_arr) * 100):0.2f}%" ) else: - self._avg_percent_weighted = self._avg_percent = 'N/A' + self._avg_percent_weighted = self._avg_percent = "N/A" self._contributors: set[str] = contributor_set @@ -656,23 +673,23 @@ async def views(self) -> int: today_view_count: int = 0 for repo in await self.repos: r: dict[str, str | list[dict[str, str]]] = await self.queries.query_rest( - path=f'/repos/{repo}/traffic/views' + path=f"/repos/{repo}/traffic/views" ) - for view in r.get('views', []): - if view.get('timestamp')[:10] == today: - today_view_count += view.get('count', 0) - elif view.get('timestamp')[:10] > last_viewed: - self.environment_vars.set_views(views=view.get('count', 0)) - dates.add(view.get('timestamp')[:10]) + for view in r.get("views", []): + if view.get("timestamp")[:10] == today: + today_view_count += view.get("count", 0) + elif view.get("timestamp")[:10] > last_viewed: + self.environment_vars.set_views(views=view.get("count", 0)) + dates.add(view.get("timestamp")[:10]) - if last_viewed == '0000-00-00': + if last_viewed == "0000-00-00": dates.remove(last_viewed) if self.environment_vars.is_store_repo_view_count: self.environment_vars.set_last_viewed(new_last_viewed_date=yesterday) - if self.environment_vars.repo_first_viewed == '0000-00-00': + if self.environment_vars.repo_first_viewed == "0000-00-00": self.environment_vars.repo_first_viewed = min(dates) self.environment_vars.set_first_viewed( new_first_viewed_date=self.environment_vars.repo_first_viewed @@ -704,14 +721,14 @@ async def raw_collaborators(self) -> tuple[set[str], set[str]]: for repo in await self.repos: r: list[dict[str, any]] = await self.queries.query_rest( - path=f'/repos/{repo}/collaborators' + path=f"/repos/{repo}/collaborators" ) collab_count: int = 0 for obj in r: if isinstance(obj, dict): collab_count += 1 - self._collaborator_set.add(obj.get('login')) + self._collaborator_set.add(obj.get("login")) if collab_count > 1: self._collab_repos.add(repo) @@ -759,14 +776,14 @@ async def pull_requests(self) -> int: if not self._is_fetch_rate_limit_exceeded: for repo in await self.repos: end_point: str = ( - f'/repos/{repo}/pulls?state=all&involved={self.environment_vars.username}' + f"/repos/{repo}/pulls?state=all&involved={self.environment_vars.username}" ) for pr_data in await self.queries.query_rest(path=end_point): try: ( - pull_requests.add(pr_data['url']) - if 'url' in pr_data.keys() + pull_requests.add(pr_data["url"]) + if "url" in pr_data.keys() else None ) except AttributeError: @@ -797,14 +814,14 @@ async def issues(self) -> int: if not self._is_fetch_rate_limit_exceeded: for repo in await self.repos: end_point: str = ( - f'/repos/{repo}/issues?state=all&involved={self.environment_vars.username}' + f"/repos/{repo}/issues?state=all&involved={self.environment_vars.username}" ) for issue_data in await self.queries.query_rest(path=end_point): try: ( - issues.add(issue_data['url']) - if 'url' in issue_data.keys() + issues.add(issue_data["url"]) + if "url" in issue_data.keys() else None ) except AttributeError: diff --git a/test/git_stats_test.py b/test/git_stats_test.py index 5a5cc8a..42b3ba5 100644 --- a/test/git_stats_test.py +++ b/test/git_stats_test.py @@ -16,7 +16,9 @@ GITHUB_ACTOR: str = getenv("GITHUB_ACTOR") # or manually enter '' # OPTIONAL -EXCLUDED_REPOS: str = getenv("EXCLUDED_REPOS") # or enter: '[owner/repo],...,[owner/repo]' +EXCLUDED_REPOS: str = getenv( + "EXCLUDED_REPOS" +) # or enter: '[owner/repo],...,[owner/repo]' EXCLUDED_OWNERS: str = getenv("EXCLUDED_OWNERS") # or enter: '[owner],...,[owner]' EXCLUDED_LANGS: str = getenv("EXCLUDED_LANGS") # or enter: '[lang],...,[lang]' EXCLUDED_REPO_LANGS: str = getenv( @@ -33,8 +35,12 @@ IS_MAINTAIN_REPO_VIEWS: str = getenv("IS_STORE_REPO_VIEWS") # or enter: '' MORE_COLLABS: str = getenv("MORE_COLLABS") # or enter: '' MORE_REPOS: str = getenv("MORE_REPOS") # or enter: '[owner/repo],...,[owner/repo]' -ONLY_INCLUDED_REPOS: str = getenv("ONLY_INCLUDED_REPOS") # or enter: '[owner/repo],...,[owner/repo]' -ONLY_INCLUDED_OWNERS: str = "University-Project-Repos" # getenv("ONLY_INCLUDED_OWNERS") # or enter: [owner],...,[owner] +ONLY_INCLUDED_REPOS: str = getenv( + "ONLY_INCLUDED_REPOS" +) # or enter: '[owner/repo],...,[owner/repo]' +ONLY_INCLUDED_OWNERS: str = ( + "University-Project-Repos" # getenv("ONLY_INCLUDED_OWNERS") # or enter: [owner],...,[owner] +) ONLY_INCLUDED_COLLAB_REPOS: str = getenv( "ONLY_INCLUDED_COLLAB_REPOS" ) # or enter: [owner/repo],...,[owner/repo] @@ -47,7 +53,9 @@ EXCLUDED_COLLAB_REPO_OWNERS: str = getenv( "EXCLUDED_COLLAB_REPO_OWNERS" ) # or enter: [owner],...,[owner] -MORE_COLLAB_REPOS: str = getenv("MORE_COLLAB_REPOS") # or enter: [owner/repo],...,[owner/repo] +MORE_COLLAB_REPOS: str = getenv( + "MORE_COLLAB_REPOS" +) # or enter: [owner/repo],...,[owner/repo] async def main() -> None: From c82de026a95ef2df2c906499117bdc3877b90461 Mon Sep 17 00:00:00 2001 From: Adam Ross <14985050+R055A@users.noreply.github.com> Date: Sat, 8 Mar 2025 21:29:30 +0100 Subject: [PATCH 3/3] Fix API-fetch name/user, division by zero --- src/generate_images.py | 2 +- src/github_api_queries.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/generate_images.py b/src/generate_images.py index b945355..3b29a41 100644 --- a/src/generate_images.py +++ b/src/generate_images.py @@ -192,7 +192,7 @@ async def generate_overview(self) -> None: else add_unit(num_repos) ) repos_str: str = ( - f"{repos:,} [{'%g' % round(num_collab_repos / num_repos * 100, 2)}%]" + f"{repos:,} [{'%g' % round(num_collab_repos / num_repos * 100, 2) if num_collab_repos > 0 and num_repos > 0 else 0}%]" ) output = sub(pattern="{{ repos_str }}", repl=repos_str, string=output) diff --git a/src/github_api_queries.py b/src/github_api_queries.py index 0901552..d6ae671 100644 --- a/src/github_api_queries.py +++ b/src/github_api_queries.py @@ -137,12 +137,12 @@ def get_user() -> str: :return: GraphQL query with user login and name """ return """ - {{ - viewer {{ + { + viewer { login name - }} - }}""" + } + }""" @staticmethod def repos_overview(