diff --git a/src/ForgejoRepoAPI.py b/src/ForgejoRepoAPI.py index 3dc6ea1..d76fb93 100644 --- a/src/ForgejoRepoAPI.py +++ b/src/ForgejoRepoAPI.py @@ -25,7 +25,7 @@ class ForgejoRepoAPI(IRepositoryAPI): - def __init__(self, client): + def __init__(self, client: PyforgejoApi): self.client = client def get_user_data(self, user) -> User: @@ -141,6 +141,8 @@ def get_pull_requests(self, repo: Repository) -> list[PullRequest]: issue_url=None, # TODO если возможно - пока не нашел labels=[label.name for label in p.labels] if p.labels else [], milestone=p.milestone.title if p.milestone else None, + comments=p.comments, + review_comments=p.review_comments ) for p in pulls ] @@ -241,13 +243,21 @@ def get_comments(self, repo, obj) -> list[Comment]: ] elif isinstance(obj, PullRequest): - comments = self.client.repository.repo_get_pull_review_comments( - repo.owner.login, repo.name, obj._id, 100000 - ) - # нет id комментария - сейчас не работает, т.к. требуется ID ревью - нужно сначала получить \ - # список ревью /repos/{owner}/{repo}/pulls/{index}/reviews - # TODO: нужны отдельные запросы для получения комментариев и комментариев ревью #163, \ - # в основной сущности PullRequest есть поля "comments": int, "review_comments": int + pull_request = obj + comments = [] + pr_data = (repo.owner.login, repo.name, pull_request._id) + if pull_request.comments: + # simple comments in PR + comments.extend(self.client.issue.get_comments(*pr_data)) + if pull_request.review_comments: + # get all PR reviews + pr_reviewes = self.get_all_data_from_pages(self.client.repository.repo_list_pull_reviews, *pr_data) + for review in pr_reviewes: + comments.extend( + self.client.repository.repo_get_pull_review_comments( + *pr_data, review.id + ) + ) result = [ Comment( body=c.body, diff --git a/src/commits_parser.py b/src/commits_parser.py index f700850..74a2d8c 100644 --- a/src/commits_parser.py +++ b/src/commits_parser.py @@ -1,93 +1,93 @@ -from dataclasses import asdict, dataclass -from datetime import datetime -from time import sleep -from typing import Generator - -import pytz - -from src.constants import EMPTY_FIELD, GOOGLE_MAX_CELL_LEN, TIMEDELTA, TIMEZONE -from src.interface_wrapper import IRepositoryAPI, Repository -from src.utils import logger - - -@dataclass(kw_only=True, frozen=True) -class CommitData: - repository_name: str = '' - author_name: str = '' - author_login: str = '' - author_email: str = '' - date_and_time: str = '' - changed_files: str = '' - commit_id: str = '' - branch: str = '' - additions: str = '' - deletions: str = '' - - -def log_repository_commits( - client: IRepositoryAPI, repository: Repository, csv_name, start, finish, branch -): - branches = [] - match branch: - case 'all': - for branch in client.get_branches(repository): - branches.append(branch.name) - case None: - branches.append(repository.default_branch.name) - case _: - branches.append(branch) - - for branch in branches: - print(f'Processing branch {branch}') - commits = client.get_commits(repository) - for commit in commits: - if ( - commit.date.astimezone(pytz.timezone(TIMEZONE)) < start - or commit.date.astimezone(pytz.timezone(TIMEZONE)) > finish - ): - continue - - changed_files = '; '.join([file for file in commit.files]) - changed_files = changed_files[:GOOGLE_MAX_CELL_LEN] - commit_data = CommitData( - repository_name=repository.name, - author_name=commit.author.username if commit.author else EMPTY_FIELD, - author_login=commit.author.login if commit.author else EMPTY_FIELD, - author_email=commit.author.email if commit.author else EMPTY_FIELD, - date_and_time=commit.date.astimezone(pytz.timezone(TIMEZONE)).isoformat(), - changed_files=changed_files, - commit_id=commit._id, - branch=branch, - additions=commit.additions, - deletions=commit.deletions, - ) - info = asdict(commit_data) - - logger.log_to_csv(csv_name, list(info.keys()), info) - logger.log_to_stdout(info) - - sleep(TIMEDELTA) - - -def log_commits( - binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], - csv_name: str, - start: datetime, - finish: datetime, - branch: str, - fork_flag: bool, -): - info = asdict(CommitData()) - logger.log_to_csv(csv_name, list(info.keys())) - - for client, repo, token in binded_repos: - logger.log_title(repo.name) - log_repository_commits(client, repo, csv_name, start, finish, branch) - if fork_flag: - for forked_repo in client.get_forks(repo): - logger.log_title(f"FORKED: {forked_repo.name}") - log_repository_commits( - client, forked_repo, csv_name, start, finish, branch - ) - sleep(TIMEDELTA) - sleep(TIMEDELTA) +from dataclasses import asdict, dataclass +from datetime import datetime +from time import sleep +from typing import Generator + +import pytz + +from src.constants import EMPTY_FIELD, GOOGLE_MAX_CELL_LEN, TIMEDELTA, TIMEZONE +from src.interface_wrapper import IRepositoryAPI, Repository +from src.utils import logger + + +@dataclass(kw_only=True, frozen=True) +class CommitData: + repository_name: str = '' + author_name: str = '' + author_login: str = '' + author_email: str = '' + date_and_time: str = '' + changed_files: str = '' + commit_id: str = '' + branch: str = '' + additions: str = '' + deletions: str = '' + + +def log_repository_commits( + client: IRepositoryAPI, repository: Repository, csv_name, start, finish, branch +): + branches = [] + match branch: + case 'all': + for branch in client.get_branches(repository): + branches.append(branch.name) + case None: + branches.append(repository.default_branch.name) + case _: + branches.append(branch) + + for branch in branches: + print(f'Processing branch {branch}') + commits = client.get_commits(repository) + for commit in commits: + if ( + commit.date.astimezone(pytz.timezone(TIMEZONE)) < start + or commit.date.astimezone(pytz.timezone(TIMEZONE)) > finish + ): + continue + + changed_files = '; '.join([file for file in commit.files]) + changed_files = changed_files[:GOOGLE_MAX_CELL_LEN] + commit_data = CommitData( + repository_name=repository.name, + author_name=commit.author.username if commit.author else EMPTY_FIELD, + author_login=commit.author.login if commit.author else EMPTY_FIELD, + author_email=commit.author.email if commit.author else EMPTY_FIELD, + date_and_time=commit.date.astimezone(pytz.timezone(TIMEZONE)).isoformat(), + changed_files=changed_files, + commit_id=commit._id, + branch=branch, + additions=commit.additions, + deletions=commit.deletions, + ) + info = asdict(commit_data) + + logger.log_to_csv(csv_name, list(info.keys()), info) + logger.log_to_stdout(info) + + sleep(TIMEDELTA) + + +def log_commits( + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], + csv_name: str, + start: datetime, + finish: datetime, + branch: str, + fork_flag: bool = False, +): + info = asdict(CommitData()) + logger.log_to_csv(csv_name, list(info.keys())) + + for client, repo, token in binded_repos: + logger.log_title(repo.name) + log_repository_commits(client, repo, csv_name, start, finish, branch) + if fork_flag: + for forked_repo in client.get_forks(repo): + logger.log_title(f"FORKED: {forked_repo.name}") + log_repository_commits( + client, forked_repo, csv_name, start, finish, branch + ) + sleep(TIMEDELTA) + sleep(TIMEDELTA) diff --git a/src/interface_wrapper.py b/src/interface_wrapper.py index dc6e62b..2419686 100644 --- a/src/interface_wrapper.py +++ b/src/interface_wrapper.py @@ -89,7 +89,9 @@ class PullRequest: files: list[str] issue_url: str labels: list[str] - milestone: str + milestone: str + comments: int = 0 + review_comments: int = 0 @dataclass diff --git a/src/pull_requests_parser.py b/src/pull_requests_parser.py index 3e54062..ac177d8 100644 --- a/src/pull_requests_parser.py +++ b/src/pull_requests_parser.py @@ -1,197 +1,197 @@ -import json -from dataclasses import asdict -from datetime import datetime -from time import sleep -from typing import Generator - -import pytz -import requests - -from src.constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE -from src.git_logger import get_assignee_story -from src.interface_wrapper import IRepositoryAPI, Repository -from src.utils import logger -from src.repo_dataclasses import PullRequestData - - -def get_related_issues(pull_request_number, repo_owner, repo_name, token): - # TODO как-то заменить - return - access_token = token - repo_owner = repo_owner.login - - # Формирование запроса GraphQL - query = """ - { - repository(owner: "%s", name: "%s") { - pullRequest(number: %d) { - id - closingIssuesReferences(first: 50) { - edges { - node { - id - body - number - title - url - } - } - } - } - } - } - """ % ( - repo_owner, - repo_name, - pull_request_number, - ) - - # Формирование заголовков запроса - headers = { - "Authorization": f"Bearer {access_token}", - "Content-Type": "application/json", - } - - # Отправка запроса GraphQL - response = requests.post( - "https://api.github.com/graphql", - headers=headers, - data=json.dumps({"query": query}), - ) - response_data = response.json() - # Обработка полученных данных - pull_request_data = response_data["data"]["repository"]["pullRequest"] - issues_data = pull_request_data["closingIssuesReferences"]["edges"] - list_issues_url = [] - # сохранение информации об issues - for issue in issues_data: - issue_node = issue["node"] - list_issues_url.append(issue_node["url"]) - return ';'.join(list_issues_url) - - -def nvl(val): - return val or EMPTY_FIELD - - -def get_info(obj, attr): - return EMPTY_FIELD if obj is None else getattr(obj, attr) - - -# -----------GithubAPI block-------------- - - -def log_repositories_pr( - client: IRepositoryAPI, - repository: Repository, - csv_name, - token, - start, - finish, - log_comments=False, -): - def nvl(val): - return val or EMPTY_FIELD - - def get_info(obj, attr): - return EMPTY_FIELD if obj is None else getattr(obj, attr) - - pulls = client.get_pull_requests(repository) - for pull in pulls: - if ( - pull.created_at.astimezone(pytz.timezone(TIMEZONE)) < start - or pull.created_at.astimezone(pytz.timezone(TIMEZONE)) > finish - ): - continue - - pr_data = PullRequestData( - repository_name=repository.name, - title=pull.title, - id=pull._id, - state=pull.state, - commit_into=pull.base_label, - commit_from=pull.head_label, - created_at=str(pull.created_at), - creator_name=nvl(pull.author.username), - creator_login=pull.author.login, - creator_email=pull.author.email, - changed_files='; '.join(pull.files), - merger_name=pull.merged_by.username if pull.merged_by else None, - merger_login=pull.merged_by.login if pull.merged_by else None, - merger_email=pull.merged_by.email if pull.merged_by else None, - merged=pull.merged, - source_branch=pull.head_ref, - target_branch=pull.base_ref, - assignee_story=get_assignee_story(pull, client, token, repository), - related_issues=( - get_related_issues(pull._id, repository.owner, repository.name, token) - if pull.issue_url is not None - else EMPTY_FIELD - ), - labels=';'.join(pull.labels) if pull.labels else EMPTY_FIELD, - milestone=pull.milestone, - ) - - if log_comments: - comments = client.get_comments(repository, pull) - if comments: - for comment in comments: - comment_data = PullRequestData( - **( - asdict(pr_data) | - dict( - comment_body=comment.body, - comment_created_at=str(comment.created_at), - comment_author_name=comment.author.name, - comment_author_login=comment.author.login, - comment_author_email=nvl(comment.author.email), - ) - ) - ) - comment_data = asdict(comment_data) - - logger.log_to_csv(csv_name, list(comment_data.keys()), comment_data) - logger.log_to_stdout(comment_data) - else: - base_pr_info = asdict(pr_data) - logger.log_to_csv(csv_name, list(base_pr_info.keys()), base_pr_info) - logger.log_to_stdout(base_pr_info) - else: - base_pr_info = asdict(pr_data) - logger.log_to_csv(csv_name, list(base_pr_info.keys()), base_pr_info) - logger.log_to_stdout(base_pr_info) - - sleep(TIMEDELTA) - - -def log_pull_requests( - binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], - csv_name: str, - start: datetime, - finish: datetime, - fork_flag: bool, - log_comments=False, -): - info = asdict(PullRequestData()) - logger.log_to_csv(csv_name, list(info.keys())) - - for client, repo, token in binded_repos: - logger.log_title(repo.name) - log_repositories_pr( - client, repo, csv_name, token, start, finish, log_comments - ) - if fork_flag: - forked_repos = client.get_repo(repo._id).get_forks() - for forked_repo in forked_repos: - logger.log_title(f"FORKED: {forked_repo.name}") - log_repositories_pr( - client, - forked_repo, - csv_name, - token, - start, - finish, - log_comments, - ) - sleep(TIMEDELTA) - sleep(TIMEDELTA) +import json +from dataclasses import asdict +from datetime import datetime +from time import sleep +from typing import Generator + +import pytz +import requests + +from src.constants import EMPTY_FIELD, TIMEDELTA, TIMEZONE +from src.git_logger import get_assignee_story +from src.interface_wrapper import IRepositoryAPI, Repository +from src.utils import logger +from src.repo_dataclasses import PullRequestData + + +def get_related_issues(pull_request_number, repo_owner, repo_name, token): + # TODO как-то заменить + return + access_token = token + repo_owner = repo_owner.login + + # Формирование запроса GraphQL + query = """ + { + repository(owner: "%s", name: "%s") { + pullRequest(number: %d) { + id + closingIssuesReferences(first: 50) { + edges { + node { + id + body + number + title + url + } + } + } + } + } + } + """ % ( + repo_owner, + repo_name, + pull_request_number, + ) + + # Формирование заголовков запроса + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json", + } + + # Отправка запроса GraphQL + response = requests.post( + "https://api.github.com/graphql", + headers=headers, + data=json.dumps({"query": query}), + ) + response_data = response.json() + # Обработка полученных данных + pull_request_data = response_data["data"]["repository"]["pullRequest"] + issues_data = pull_request_data["closingIssuesReferences"]["edges"] + list_issues_url = [] + # сохранение информации об issues + for issue in issues_data: + issue_node = issue["node"] + list_issues_url.append(issue_node["url"]) + return ';'.join(list_issues_url) + + +def nvl(val): + return val or EMPTY_FIELD + + +def get_info(obj, attr): + return EMPTY_FIELD if obj is None else getattr(obj, attr) + + +# -----------GithubAPI block-------------- + + +def log_repositories_pr( + client: IRepositoryAPI, + repository: Repository, + csv_name, + token, + start, + finish, + log_comments=False, +): + def nvl(val): + return val or EMPTY_FIELD + + def get_info(obj, attr): + return EMPTY_FIELD if obj is None else getattr(obj, attr) + + pulls = client.get_pull_requests(repository) + for pull in pulls: + if ( + pull.created_at.astimezone(pytz.timezone(TIMEZONE)) < start + or pull.created_at.astimezone(pytz.timezone(TIMEZONE)) > finish + ): + continue + + pr_data = PullRequestData( + repository_name=repository.name, + title=pull.title, + id=pull._id, + state=pull.state, + commit_into=pull.base_label, + commit_from=pull.head_label, + created_at=str(pull.created_at), + creator_name=nvl(pull.author.username), + creator_login=pull.author.login, + creator_email=pull.author.email, + changed_files='; '.join(pull.files), + merger_name=pull.merged_by.username if pull.merged_by else None, + merger_login=pull.merged_by.login if pull.merged_by else None, + merger_email=pull.merged_by.email if pull.merged_by else None, + merged=pull.merged, + source_branch=pull.head_ref, + target_branch=pull.base_ref, + assignee_story=get_assignee_story(pull, client, token, repository), + related_issues=( + get_related_issues(pull._id, repository.owner, repository.name, token) + if pull.issue_url is not None + else EMPTY_FIELD + ), + labels=';'.join(pull.labels) if pull.labels else EMPTY_FIELD, + milestone=pull.milestone, + ) + + if log_comments: + comments = client.get_comments(repository, pull) + if comments: + for comment in comments: + comment_data = PullRequestData( + **( + asdict(pr_data) | + dict( + comment_body=comment.body, + comment_created_at=str(comment.created_at), + comment_author_name=comment.author.username, + comment_author_login=comment.author.login, + comment_author_email=nvl(comment.author.email), + ) + ) + ) + comment_data = asdict(comment_data) + + logger.log_to_csv(csv_name, list(comment_data.keys()), comment_data) + logger.log_to_stdout(comment_data) + else: + base_pr_info = asdict(pr_data) + logger.log_to_csv(csv_name, list(base_pr_info.keys()), base_pr_info) + logger.log_to_stdout(base_pr_info) + else: + base_pr_info = asdict(pr_data) + logger.log_to_csv(csv_name, list(base_pr_info.keys()), base_pr_info) + logger.log_to_stdout(base_pr_info) + + sleep(TIMEDELTA) + + +def log_pull_requests( + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], + csv_name: str, + start: datetime, + finish: datetime, + fork_flag: bool, + log_comments=False, +): + info = asdict(PullRequestData()) + logger.log_to_csv(csv_name, list(info.keys())) + + for client, repo, token in binded_repos: + logger.log_title(repo.name) + log_repositories_pr( + client, repo, csv_name, token, start, finish, log_comments + ) + if fork_flag: + forked_repos = client.get_repo(repo._id).get_forks() + for forked_repo in forked_repos: + logger.log_title(f"FORKED: {forked_repo.name}") + log_repositories_pr( + client, + forked_repo, + csv_name, + token, + start, + finish, + log_comments, + ) + sleep(TIMEDELTA) + sleep(TIMEDELTA)