From b0f21b431f132f0ccd296fc0d2bbd1fe75be438f Mon Sep 17 00:00:00 2001 From: Robert-Furth Date: Mon, 28 Nov 2022 12:20:20 -0800 Subject: [PATCH 1/2] copy updated bugswarm/common dir from bugswarm/bugswarm repo --- .../common/artifact_processing/runners.py | 2 +- bugswarm/common/artifact_processing/utils.py | 2 +- bugswarm/common/credentials.py | 8 +- bugswarm/common/decorators/classproperty.py | 2 + bugswarm/common/filter_reasons.py | 19 ++ bugswarm/common/github_wrapper.py | 5 +- bugswarm/common/log_downloader.py | 19 +- bugswarm/common/outdated.py | 2 +- bugswarm/common/rest_api/database_api.py | 224 ++++++++++++++++-- bugswarm/common/shell_wrapper.py | 10 +- bugswarm/common/travis_wrapper.py | 54 ++++- 11 files changed, 292 insertions(+), 55 deletions(-) create mode 100644 bugswarm/common/filter_reasons.py diff --git a/bugswarm/common/artifact_processing/runners.py b/bugswarm/common/artifact_processing/runners.py index 2512bd1..3e96cc5 100644 --- a/bugswarm/common/artifact_processing/runners.py +++ b/bugswarm/common/artifact_processing/runners.py @@ -6,7 +6,7 @@ from typing import List from . import utils as procutils -from .. import log +from bugswarm.common import log class ParallelArtifactRunner(object): diff --git a/bugswarm/common/artifact_processing/utils.py b/bugswarm/common/artifact_processing/utils.py index 4ef1076..d8e3d63 100644 --- a/bugswarm/common/artifact_processing/utils.py +++ b/bugswarm/common/artifact_processing/utils.py @@ -5,7 +5,7 @@ import shutil from ..shell_wrapper import ShellWrapper -from bugswarm.common.rest_api.database_api import DatabaseAPI +from ..rest_api.database_api import DatabaseAPI REPOS_DIR = '/home/travis/build' _SANDBOX = 'bugswarm-sandbox' diff --git a/bugswarm/common/credentials.py b/bugswarm/common/credentials.py index c65d644..53d289b 100644 --- a/bugswarm/common/credentials.py +++ b/bugswarm/common/credentials.py @@ -1,3 +1,9 @@ # DockerHub DOCKER_HUB_USERNAME = 'bugswarm' -DOCKER_HUB_REPO = '{}/{}'.format(DOCKER_HUB_USERNAME, 'images') +DOCKER_HUB_REPO = 'bugswarm/images' +DOCKER_HUB_CACHED_REPO = 'bugswarm/cached-images' + +COMMON_HOSTNAME = 'www.api.bugswarm.org' + +# Travis +TRAVIS_TOKENS = [] diff --git a/bugswarm/common/decorators/classproperty.py b/bugswarm/common/decorators/classproperty.py index 1ec6289..343cb80 100644 --- a/bugswarm/common/decorators/classproperty.py +++ b/bugswarm/common/decorators/classproperty.py @@ -4,11 +4,13 @@ These decorators are adapted from answers to the Stack Overflow question at https://stackoverflow.com/q/3203286. """ + class classproperty: """ Same as property(), but passes obj.__class__ instead of obj to fget/fset/fdel. Original code for property emulation at https://docs.python.org/3.5/howto/descriptor.html#properties. """ + def __init__(self, fget=None, fset=None, fdel=None, doc=None): self.fget = fget self.fset = fset diff --git a/bugswarm/common/filter_reasons.py b/bugswarm/common/filter_reasons.py new file mode 100644 index 0000000..1f85a6b --- /dev/null +++ b/bugswarm/common/filter_reasons.py @@ -0,0 +1,19 @@ +""" +String constants for each reason that Pair Filter filters a pair. Also used in the evaluation script. + +Think carefully before changing these constants! Any changes will create inconsistency in the 'filtered_reason' +attribute in the metadata database. In turn, changes will break the evaluation script. In short, if these constants +change, then to remove inconsistencies between previously created artifacts and newly created artifacts, either every +artifact will need to be recreated or the metadata database will need to be updated so that previously created artifacts +use the new constants in their metadata. +""" + +NO_HEAD_SHA = 'no head sha' +NO_ORIGINAL_LOG = 'do not have original log' +ERROR_READING_ORIGINAL_LOG = 'error when reading original log' +NO_IMAGE_PROVISION_TIMESTAMP = 'original log does not have provisioned datetime string' +INACCESSIBLE_IMAGE = 'do not have image' +NOT_RESETTABLE = 'not resettable' # Deprecated. +NOT_ACQUIRABLE = 'not acquirable' # Deprecated. +NOT_AVAILABLE = 'not available' +SAME_COMMIT_PAIR = 'failed build has same sha with passed build' diff --git a/bugswarm/common/github_wrapper.py b/bugswarm/common/github_wrapper.py index 45d7c0c..b7c01b2 100644 --- a/bugswarm/common/github_wrapper.py +++ b/bugswarm/common/github_wrapper.py @@ -10,7 +10,7 @@ import requests import copy -from . import log +from bugswarm.common import log class GitHubWrapper(object): @@ -71,6 +71,9 @@ def get(self, url: str): elif response.status_code == 451: # Repository access blocked. log.error('Repository access blocked:', url) return None, None + elif response.status_code == 401: # Not authorized. + log.error('Invalid GitHub API token: ', self._session.headers['Authorization']) + return None, None elif response.status_code == 422: return None, None else: diff --git a/bugswarm/common/log_downloader.py b/bugswarm/common/log_downloader.py index f522825..ed116cc 100644 --- a/bugswarm/common/log_downloader.py +++ b/bugswarm/common/log_downloader.py @@ -8,7 +8,7 @@ from typing import Union from urllib.error import URLError -from . import log +from bugswarm.common import log _DEFAULT_RETRIES = 3 @@ -40,14 +40,15 @@ def download_log(job_id: Union[str, int], raise FileExistsError job_id = str(job_id) - - aws_log_link = 'https://s3.amazonaws.com/archive.travis-ci.org/jobs/{}/log.txt'.format(job_id) travis_log_link = 'https://api.travis-ci.org/jobs/{}/log.txt'.format(job_id) - - content = _get_log_from_url(aws_log_link, retries) or _get_log_from_url(travis_log_link, retries) + content = _get_log_from_url(travis_log_link, retries) if not content: - return False + travis_log_link = 'https://api.travis-ci.com/v3/job/{}/log.txt'.format(job_id) + content = _get_log_from_url(travis_log_link, retries) + # If this endpoint fails, the log is not on either endpoint and does not exist + if not content: + return False with open(destination, 'wb') as f: f.write(content) @@ -111,12 +112,12 @@ def _get_log_from_url(log_url: str, max_retries: int, retry_count: int = 0): result = url.read() log.info('Downloaded log from {}.'.format(log_url)) return result - except URLError: - log.info('Could not download log from {}.'.format(log_url)) + except URLError as e: + log.error('Could not download log from {}.'.format(log_url, e.reason)) return None except ConnectionResetError: if retry_count == max_retries: - log.info('Could not download log from', log_url, 'after retrying', max_retries, 'times.') + log.warning('Could not download log from', log_url, 'after retrying', max_retries, 'times.') return None log.warning('The server reset the connection. Retrying after', sleep_duration, 'seconds.') time.sleep(sleep_duration) diff --git a/bugswarm/common/outdated.py b/bugswarm/common/outdated.py index 7877509..cf63ec8 100644 --- a/bugswarm/common/outdated.py +++ b/bugswarm/common/outdated.py @@ -6,7 +6,7 @@ import requests -from . import log +from bugswarm.common import log from .shell_wrapper import ShellWrapper diff --git a/bugswarm/common/rest_api/database_api.py b/bugswarm/common/rest_api/database_api.py index a402b74..620e25d 100644 --- a/bugswarm/common/rest_api/database_api.py +++ b/bugswarm/common/rest_api/database_api.py @@ -1,21 +1,23 @@ +import os import datetime import json import pprint -from datetime import date +import sys +import secrets +import requests +import requests.auth +from datetime import date from typing import Dict from typing import Generator from typing import List from urllib.parse import urljoin - -import requests -import requests.auth - from requests import Response -from bugswarm.common.decorators.classproperty import classproperty -from bugswarm.common.decorators.classproperty import classproperty_support -from bugswarm.common import log +from ..decorators.classproperty import classproperty +from ..decorators.classproperty import classproperty_support +from .. import log +from bugswarm.common.credentials import COMMON_HOSTNAME __all__ = ['Endpoint', 'DatabaseAPI'] @@ -30,7 +32,11 @@ class DatabaseAPI(object): # The base URL must include 'www'. Otherwise, the redirect performed by the backend will cause the requests library # to strip authorization headers, which are needed for authentication. # See https://github.com/requests/requests/issues/2949 for more information. - _HOSTNAME = 'www.api.bugswarm.org' + _HOSTNAME = COMMON_HOSTNAME + if not _HOSTNAME: + print('[ERROR]: COMMON_HOSTNAME has not been found. Please set the environmental variable and reinstall ' + 'common package') + sys.exit(1) _API_VERSION = 'v1' _BASE_URL = 'http://{}/{}'.format(_HOSTNAME, _API_VERSION) _ARTIFACTS_RESOURCE = 'artifacts' @@ -38,6 +44,7 @@ class DatabaseAPI(object): _MINED_PROJECTS_RESOURCE = 'minedProjects' _EMAIL_SUBSCRIBERS_RESOURCE = 'emailSubscribers' _ACCOUNTS_RESOURCE = 'accounts' + _LOGS_RESOURCE = 'logs' def __init__(self, token: str): """ @@ -91,15 +98,15 @@ def find_artifact(self, image_tag: str, error_if_not_found: bool = True) -> Resp :param error_if_not_found: return err if the image tag not found. default True. :return: The response object. e.g. find_artifact("Abjad-abjad-289716771") - """ + """ log.debug('Trying to find artifact with image_tag {}.'.format(image_tag)) return self._get(DatabaseAPI._artifact_image_tag_endpoint(image_tag), error_if_not_found) def list_artifacts(self) -> List: """ - Return a List of java and python artifacts that has at least one reproduce_successes. - """ - api_filter = '{"reproduce_successes":{"$gt":0},"lang":{"$in":["Java","Python"]}}' + Return a List of artifacts that has at least one reproduce_successes. + """ + api_filter = '{"reproduce_successes":{"$gt":0}}' return self.filter_artifacts(api_filter) def filter_artifacts(self, api_filter: str) -> List: @@ -186,8 +193,8 @@ def set_artifact_classification_category(self, image_tag: str, category_type: st """ Add a classification to an existing artifact. - If the metric with name `category_type` already exists, then its value will be overridden with `category_confidence`. - Otherwise, the metric will be created with value `category_confidence`. + If the metric with name `category_type` already exists, then its value will be overridden with + `category_confidence`. Otherwise, the metric will be created with value `category_confidence`. :param image_tag: The image tag identifying the artifact to update. :param category_type: The name of the category type to update. @@ -291,6 +298,62 @@ def set_artifact_current_status(self, image_tag: str, status: str, date: str) -> updates = {'current_status': {'status': status, 'time_stamp': date}} return self._patch(DatabaseAPI._artifact_image_tag_endpoint(image_tag), updates) + def update_artifact_repo_name(self, image_tag: str, new_repo: str) -> Response: + """ + Update the repository name of an artifact. + :param image_tag: The image tag identifying the artifact to update. + :param new_repo: The updated repo name we are setting. + :return: The response object. + """ + if not isinstance(image_tag, str): + raise TypeError + if not isinstance(new_repo, str): + raise TypeError + updates = {'repo': new_repo} + return self._patch(DatabaseAPI._artifact_image_tag_endpoint(image_tag), updates) + + ################################### + # Logs REST methods + ################################### + def set_build_log(self, job_id: str, build_log_fp: str) -> Response: + """ + Add the failed or passed build log for the artifact to the logs collection. + :param job_id: The job id corresponding to the passed or failed build log. + :param build_log_fp: The path for the build log to be loaded into the database. + :return: The response object. + """ + if not isinstance(job_id, str): + raise TypeError + if not job_id: + raise ValueError + if not isinstance(build_log_fp, str): + raise TypeError + if not build_log_fp: + raise ValueError + if not os.path.exists(build_log_fp): + raise IOError + with open(build_log_fp, 'r') as build_log: + build_log_text = build_log.read() + log_entry = { + 'job_id': job_id, + 'build_log': build_log_text, + } + return self._insert(DatabaseAPI._logs_endpoint(), log_entry, 'log') + + def get_build_log(self, job_id: str, error_if_not_found: bool = True) -> Response: + """ + Get artifact failed or passed build log based on job id. + :param job_id: The job id corresponding to the passed or failed build log. + :param error_if_not_found: return err if the image tag not found. default True. + :return: The build_log. + """ + if not isinstance(job_id, str): + raise TypeError + if not job_id: + raise ValueError + log_object = self._get(DatabaseAPI._logs_job_id_endpoint(job_id), error_if_not_found).json() + return log_object['build_log'] + ################################### # Mined Build Pair REST methods ################################### @@ -349,6 +412,18 @@ def replace_mined_build_pairs_for_repo(self, repo: str, new_build_pairs: List[Di return False return True + def update_mined_build_pairs_repo_name(self, obj_id: str, new_repo: str) -> Response: + """ + Update the repository name of an artifact in the minedBuildPairs db. + :param obj_id: The unique id of an object. + :param new_repo: The updated repo name we are setting. + :return: The response object. + """ + if not isinstance(new_repo, str): + raise TypeError + updates = {'repo': new_repo} + return self._patch(DatabaseAPI._mined_build_pair_object_id_endpoint(obj_id), updates) + ################################### # Mined Project REST methods ################################### @@ -356,9 +431,9 @@ def replace_mined_build_pairs_for_repo(self, repo: str, new_build_pairs: List[Di def insert_mined_project(self, mined_project) -> Response: return self._insert(DatabaseAPI._mined_projects_endpoint(), mined_project, 'mined project') - def find_mined_project(self, repo: str, error_if_not_found: bool = True) -> Response: - log.debug('Trying to find mined project with repo {}.'.format(repo)) - return self._get(DatabaseAPI._mined_project_repo_endpoint(repo), error_if_not_found) + def find_mined_project(self, repo: str, ci_service: str, error_if_not_found: bool = True) -> Response: + log.debug('Trying to find mined project with repo {} and CI service {}.'.format(repo, ci_service)) + return self._get(DatabaseAPI._mined_project_repo_endpoint(repo, ci_service), error_if_not_found) def list_mined_projects(self) -> List: return self._list(DatabaseAPI._mined_projects_endpoint()) @@ -366,6 +441,7 @@ def list_mined_projects(self) -> List: def filter_mined_projects(self, api_filter: str) -> List: return self._filter(DatabaseAPI._mined_projects_endpoint(), api_filter) + # TODO should this count repos, or repo-CI pairs? def count_mined_projects(self) -> int: return self._count(DatabaseAPI._mined_projects_endpoint()) @@ -374,10 +450,41 @@ def upsert_mined_project(self, mined_project) -> Response: Upsert a mined project. Can be used for initial mining or re-mining of a project. """ repo = mined_project.get('repo') + ci_service = mined_project.get('ci_service') assert repo - return self._upsert(DatabaseAPI._mined_project_repo_endpoint(repo), mined_project, 'mined project') + assert ci_service + return self._upsert(DatabaseAPI._mined_project_repo_endpoint(repo, ci_service), mined_project, 'mined project') + + def set_latest_build_info_metric(self, repo: str, ci_service: str, build_number: int, build_id: int): + """ + Set the build information regarding the build number and build id of the latest build we've mined. + + :param repo: The repository slug for identifying the mined project to update. + :param ci_service: The CI service for identifying the project to update. + :param build_number: The latest build number associated to the last build we've mined + :param build_id: The latest build id associated to the last build we've mined + :return: The response object. + """ + if not isinstance(repo, str): + raise TypeError + if not repo: + raise ValueError + if not isinstance(build_number, int): + raise TypeError + if not build_number: + raise ValueError + if not isinstance(build_id, int): + raise TypeError + if not build_id: + raise ValueError + updates = { + 'last_build_mined.build_number': build_number, + 'last_build_mined.build_id': build_id + } + return self._patch(DatabaseAPI._mined_project_repo_endpoint(repo, ci_service), updates) - def set_mined_project_progression_metric(self, repo: str, metric_name: str, metric_value) -> Response: + def set_mined_project_progression_metric(self, repo: str, ci_service: str, + metric_name: str, metric_value) -> Response: """ Add a mining progression metric to an existing mined project. The value of the metric can be any valid database type. @@ -399,7 +506,22 @@ def set_mined_project_progression_metric(self, repo: str, metric_name: str, metr if not metric_name: raise ValueError updates = {'progression_metrics.{}'.format(metric_name): metric_value} - return self._patch(DatabaseAPI._mined_project_repo_endpoint(repo), updates) + return self._patch(DatabaseAPI._mined_project_repo_endpoint(repo, ci_service), updates) + + def update_mined_project_repo_name(self, repo: str, ci_service: str, new_repo: str): + if not isinstance(repo, str): + raise TypeError + if not isinstance(repo, str): + raise TypeError + updates = {'repo': new_repo} + return self._patch(DatabaseAPI._mined_project_repo_endpoint(repo, ci_service), updates) + + def soft_delete_mined_project(self, repo: str, ci_service: str) -> Response: + if not isinstance(repo, str): + raise TypeError + if not repo: + raise ValueError + return self._delete(DatabaseAPI._mined_project_repo_endpoint(repo, ci_service)) ################################### # Email Subscriber REST methods @@ -433,8 +555,23 @@ def unsubscribe_email_subscriber(self, email: str) -> Response: # Account REST methods ################################### - def insert_account(self, account) -> Response: - return self._insert(DatabaseAPI._accounts_endpoint(), account, 'account') + def create_account(self, email: str, role: list = None) -> Response: + """ + Create an account with given email and role. Return token in Response if succeed. + :param email: email address. + :param role: default to ['user']. + :return: The response object. + """ + token = secrets.token_urlsafe() + while self.filter_account_for_token(token): + token = secrets.token_urlsafe() + if role is None: + role = ['user'] + account = {'email': email, 'roles': role, 'token': token, 'password': ''} + response = self._insert(DatabaseAPI._accounts_endpoint(), account, 'account') + if response.ok: + response._content = json.dumps(account).encode('utf-8') + return response def find_account(self, email: str, error_if_not_found: bool = True) -> Response: log.debug('Trying to find account with email {}.'.format(email)) @@ -573,11 +710,32 @@ def _bulk_insert(self, log.debug('Trying to bulk insert {} {}.'.format(len(entities), plural_entity_name)) # Insert the entities in chunks to avoid a 413 Request Entity Too Large error. for chunk in DatabaseAPI._chunks(entities, 100): - resp = self._post(endpoint, chunk) + list_for_insertion = [] + list_of_build_ids = [] + for data in chunk: + failed_build_id = data['failed_build']['build_id'] + list_of_build_ids.append(failed_build_id) + + api_filter = '{"failed_build.build_id":{"$in":' + '{}'.format(list_of_build_ids) + '}}' + list_of_existing_build_pairs = self.filter_mined_build_pairs(api_filter) + for data in chunk: + bp_exists = False + for bp in list_of_existing_build_pairs: + if data['failed_build']['build_id'] == bp['failed_build']['build_id']: + log.info('Failed Build ID: {} already exists in the database. Skipping insertion.' + .format(data['failed_build']['build_id'])) + bp_exists = True + break + if bp_exists: + continue + list_for_insertion.append(data) + resp = self._post(endpoint, list_for_insertion) if resp.status_code == 422: log.error('The', plural_entity_name, 'were not inserted because they failed validation.') log.error(pprint.pformat(chunk)) log.error(resp.content) + elif resp.status_code == 400: + log.error('Buildpairs were not inserted because the list is either empty, or all pairs already exist.') yield resp def _upsert(self, endpoint: Endpoint, entity, singular_entity_name: str = 'entity') -> Response: @@ -721,12 +879,14 @@ def _mined_projects_endpoint() -> Endpoint: return DatabaseAPI._endpoint(DatabaseAPI._MINED_PROJECTS_RESOURCE) @staticmethod - def _mined_project_repo_endpoint(repo: str) -> Endpoint: + def _mined_project_repo_endpoint(repo: str, ci_service) -> Endpoint: if not isinstance(repo, str): raise TypeError if not repo: raise ValueError - return '/'.join([DatabaseAPI._mined_projects_endpoint(), repo]) + if ci_service not in ['travis', 'github']: + raise ValueError + return '/'.join([DatabaseAPI._mined_projects_endpoint(), ci_service, repo]) @staticmethod def _email_subscribers_endpoint() -> Endpoint: @@ -751,3 +911,15 @@ def _account_email_endpoint(email: str) -> Endpoint: if not email: raise ValueError return '/'.join([DatabaseAPI._accounts_endpoint(), email]) + + @staticmethod + def _logs_endpoint() -> Endpoint: + return DatabaseAPI._endpoint(DatabaseAPI._LOGS_RESOURCE) + + @staticmethod + def _logs_job_id_endpoint(job_id: str) -> Endpoint: + if not isinstance(job_id, str): + raise TypeError + if not job_id: + raise ValueError + return '/'.join([DatabaseAPI._logs_endpoint(), job_id]) diff --git a/bugswarm/common/shell_wrapper.py b/bugswarm/common/shell_wrapper.py index 2a6495a..83473a6 100644 --- a/bugswarm/common/shell_wrapper.py +++ b/bugswarm/common/shell_wrapper.py @@ -1,16 +1,13 @@ import subprocess from typing import Optional +from typing import Tuple class ShellWrapper(object): - Command = str - ReturnCode = int - StreamOr = Optional[str] - @staticmethod - def run_commands(*commands: Command, **kwargs) -> (StreamOr, StreamOr, ReturnCode): + def run_commands(*commands: str, **kwargs) -> Tuple[Optional[str], Optional[str], int]: """ Run a list of commands sequentially in the same shell environment and wait for the commands to complete. All keyword arguments are passed to the subprocess.run function. @@ -21,7 +18,8 @@ def run_commands(*commands: Command, **kwargs) -> (StreamOr, StreamOr, ReturnCod can be None depending on the passed values of `stdout` and `stderr`. """ command = ' ; '.join(commands) - process = subprocess.run(command, **kwargs) # Indirectly waits for a return code. + # Indirectly waits for a return code. + process = subprocess.run(command, **kwargs) stdout = process.stdout stderr = process.stderr # Decode stdout and stderr to strings if needed. diff --git a/bugswarm/common/travis_wrapper.py b/bugswarm/common/travis_wrapper.py index e82ada3..ab19ee0 100644 --- a/bugswarm/common/travis_wrapper.py +++ b/bugswarm/common/travis_wrapper.py @@ -1,19 +1,31 @@ import time +from collections import deque import cachecontrol import requests -from . import log +from bugswarm.common import log +from bugswarm.common import credentials -_BASE_URL = 'https://api.travis-ci.org' +_BASE_URL = 'https://api.travis-ci.com' # Number of seconds to sleep before retrying. Five seconds has been long enough to obey the Travis API rate limit. _SLEEP_SECONDS = 5 -_MAX_SLEEP_SECONDS = 60 * 5 # 5 minutes. +_TOKENS = deque(credentials.TRAVIS_TOKENS) class TravisWrapper(object): def __init__(self): self._session = cachecontrol.CacheControl(requests.Session()) + if credentials.TRAVIS_TOKENS: + if not isinstance(credentials.TRAVIS_TOKENS, list): + raise TypeError('TRAVIS_TOKENS must be a list.') + if not all(isinstance(t, str) for t in credentials.TRAVIS_TOKENS): + raise ValueError('All Travis CI Tokens must be given as strings.') + + # Start with the first token in TRAVIS_TOKENS list and pop() element from right and append to front + # In the case where we are using 2 or more threads, each thread will grab the next token in the list + self._session.headers['Authorization'] = 'token {}'.format(_TOKENS[0]) + _TOKENS.appendleft(_TOKENS.pop()) def __enter__(self): return self @@ -27,6 +39,7 @@ def _close(self): # Potentially raises requests.exceptions.Timeout or requests.exceptions.RequestException. def _get(self, address, **kwargs): sleep_seconds = _SLEEP_SECONDS + attempts = 0 while True: response = self._session.get(address, params=kwargs) code = response.status_code @@ -35,21 +48,41 @@ def _get(self, address, **kwargs): elif code == 404: log.error('Get request for {} returned 404 Not Found.'.format(address)) response.raise_for_status() + elif code == 403: + # Token did not successfully authorize, try next one in list + # deque.pop() removes element from the right so we appendleft() + self._session.headers['Authorization'] = 'token {}'.format(_TOKENS[0]) + _TOKENS.appendleft(_TOKENS.pop()) elif code == 429: - log.warning( - 'The Travis API returned status code 429 Too Many Requests. ' - 'Retrying after sleeping for {} seconds.'.format(sleep_seconds)) - time.sleep(sleep_seconds) - sleep_seconds = min(sleep_seconds * 2, _MAX_SLEEP_SECONDS) + if attempts < 1 or not _TOKENS: + log.warning( + 'The Travis API returned status code 429 Too Many Requests. ' + 'Retrying after sleeping for {} seconds.'.format(sleep_seconds)) + time.sleep(sleep_seconds) + attempts += 1 + else: + # Use another token if # of attempts for GET Requests >= 1, will use next token in list + self._session.headers['Authorization'] = 'token {}'.format(_TOKENS[0]) + _TOKENS.appendleft(_TOKENS.pop()) else: log.error('Get request for {} returned {}.'.format(address, code)) raise requests.exceptions.ConnectionError('{} download failed. Error code is {}.'.format(address, code)) def _get_iterate(self, address, **kwargs): after_number = None + build_number_exists = False + if 'last_build_number' in kwargs: + build_number = kwargs['last_build_number'] + build_number_exists = True result = self._get(address, **kwargs) + latest_result_build_number = result[0]['number'] + if build_number_exists: + if int(latest_result_build_number) == build_number: + return while True: if after_number: + if build_number_exists and int(after_number) < build_number: + return result = self._get(address, after_number=after_number) if not result: return @@ -65,7 +98,10 @@ def _endpoint(path): def search(self, term): return self._get_iterate(TravisWrapper._endpoint('search/repositories'), query=term) - def get_builds_for_repo(self, repo): + def get_builds_for_repo(self, repo, build_number=None): + if build_number: + return self._get_iterate(TravisWrapper._endpoint('repositories/{}/builds'.format(repo)), + last_build_number=build_number) return self._get_iterate(TravisWrapper._endpoint('repositories/{}/builds'.format(repo))) def get_build_info(self, build_id): From c0ff4157fb173d49b89dfa3814d58feb46a19f29 Mon Sep 17 00:00:00 2001 From: Robert-Furth Date: Tue, 29 Nov 2022 09:42:39 -0800 Subject: [PATCH 2/2] version bump --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b97c30f..c4c82f7 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name='bugswarm-common', - version='0.1.15', + version='0.2022.11.29', url='https://github.com/BugSwarm/common', author='BugSwarm', author_email='dev.bugswarm@gmail.com',