Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 105 additions & 61 deletions mozregression/fetch_build_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import os
import re
from dataclasses import dataclass
from datetime import datetime
from threading import Lock, Thread

Expand All @@ -19,7 +20,7 @@
from taskcluster.exceptions import TaskclusterFailure

from mozregression.build_info import IntegrationBuildInfo, NightlyBuildInfo
from mozregression.errors import BuildInfoNotFound, MozRegressionError
from mozregression.errors import BuildInfoNotFound, EmptyPushlogError, MozRegressionError
from mozregression.json_pushes import JsonPushes, Push
from mozregression.network import retry_get, url_links

Expand All @@ -32,37 +33,7 @@ def __init__(self, fetch_config):
self.build_regex = re.compile(fetch_config.build_regex())
self.build_info_regex = re.compile(fetch_config.build_info_regex())

def _update_build_info_from_txt(self, build_info):
LOG.debug("Update build info from {}".format(build_info))
if "build_txt_url" in build_info:
build_info.update(self._fetch_txt_info(build_info["build_txt_url"]))

def _fetch_txt_info(self, url):
"""
Retrieve information from a build information txt file.

Returns a dict with keys repository and changeset if information
is found.
"""
LOG.debug("Fetching txt info from {}".format(url))
data = {}
response = retry_get(url)
for line in response.text.splitlines():
if "/rev/" in line:
repository, changeset = line.split("/rev/")
data["repository"] = repository
data["changeset"] = changeset
break
if not data:
# the txt file could be in an old format:
# DATE CHANGESET
# we can try to extract that to get the changeset at least.
matched = re.match(r"^\d+ (\w+)$", response.text.strip())
if matched:
data["changeset"] = matched.group(1)
return data

def find_build_info(self, changeset_or_date, fetch_txt_info=True):
def find_build_info(self, changeset_or_date):
"""
Abstract method to retrieve build information over the internet for
one build.
Expand Down Expand Up @@ -172,9 +143,80 @@ def find_build_info(self, push):
)


@dataclass
class ChangesetInfo:
"""
Result of resolving changeset info for a nightly build.

These are generated during the fetch process and then turned into a
`BuildInfo` before being consumed elsewhere in the system. Some sample
providers are included, but the fetch config's `get_nightly_changeset`
can provide other behaviours.
"""

changeset: str = None
repo_url: str = None

@staticmethod
def from_nightly_txt(archive_urls):
"""
Lookup changeset info from .txt file on archive server if exists.
"""
if archive_urls.build_txt_url:
LOG.debug("Fetching txt info from {}".format(archive_urls.build_txt_url))

response = retry_get(archive_urls.build_txt_url)
for line in response.text.splitlines():
if "/rev/" in line:
repo_url, changeset = line.split("/rev/")
return ChangesetInfo(changeset, repo_url)

# the txt file could be in an old format:
# DATE CHANGESET
# we can try to extract that to get the changeset at least.
matched = re.match(r"^\d+ (\w+)$", response.text.strip())
if matched:
changeset = matched.group(1)
return ChangesetInfo(changeset)

return ChangesetInfo()

@staticmethod
def from_pushlog(archive_urls, fetch_config):
"""
Use the json-pushes API of the source control server to lookup the
changeset using the build timestamp.
"""
build_dt = fetch_config.get_nightly_timestamp_from_url(archive_urls.build_url)
branch = fetch_config.get_nightly_repo(build_dt.date())

try:
jpushes = JsonPushes(branch=branch)
except MozRegressionError:
LOG.info(f"Repo {branch} does not support json-pushes queries")
return ChangesetInfo()

try:
push = jpushes.push_by_timestamp(build_dt)[-1]
except EmptyPushlogError:
LOG.info(f"Unable to fetch push by timestamp for {build_dt}")
return ChangesetInfo()

return ChangesetInfo(push.changeset, jpushes.repo_url)


@dataclass
class ArchiveBuildUrls:
"""Result of scraping build folder on archive server."""

build_url: str
build_txt_url: str


class NightlyInfoFetcher(InfoFetcher):
def __init__(self, fetch_config):
InfoFetcher.__init__(self, fetch_config)

self._cache_months = {}
self._lock = Lock()
self._fetch_lock = Lock()
Expand All @@ -183,34 +225,38 @@ def _fetch_build_info_from_url(self, url, index, lst):
"""
Retrieve information from a build folder url.

Stores in a list the url index and a dict instance with keys
Stores in a list the url index and a ArchiveBuildUrls instance with
build_url and build_txt_url if respectively a build file and a
build info file are found for the url.
"""
LOG.debug("Fetching build info from {}".format(url))
data = {}

if not url.endswith("/"):
url += "/"
links = url_links(url)
if not self.fetch_config.has_build_info:
links += url_links(self.fetch_config.get_nightly_info_url(url))
for link in links:
name = os.path.basename(link)
if "build_url" not in data and self.build_regex.match(name):
data["build_url"] = link
elif "build_txt_url" not in data and self.build_info_regex.match(name):
data["build_txt_url"] = link
if data:
# Check that we found all required data. The URL in build_url is
# required. build_txt_url is optional.
if "build_url" not in data:
raise BuildInfoNotFound(
"Failed to find a build file in directory {} that "
"matches regex '{}'".format(url, self.build_regex.pattern)
)
info_url = self.fetch_config.get_nightly_info_url(url)

build_urls = []
build_txt_urls = []
for folder in {url, info_url}:
for link in url_links(folder):
name = os.path.basename(link)
if self.build_regex.match(name):
build_urls.append(link)
if self.build_info_regex.match(name):
build_txt_urls.append(link)

if build_urls:
build = build_urls[0]
build_txt = build_txt_urls[0] if build_txt_urls else None

with self._fetch_lock:
lst.append((index, data))
lst.append((index, ArchiveBuildUrls(build, build_txt)))
elif build_txt_urls:
raise BuildInfoNotFound(
"Failed to find a build file in directory {} that matches regex '{}'".format(
url, self.build_regex.pattern
)
)

def _get_month_links(self, url):
with self._lock:
Expand All @@ -223,7 +269,7 @@ def _get_urls(self, date):
Get the url list of the build folder for a given date.

This methods needs to be thread-safe as it is used in
:meth:`NightlyBuildData.get_build_url`.
:meth:`find_build_info`.
"""
LOG.debug("Get URLs for {}".format(date))
url = self.fetch_config.get_nightly_base_url(date)
Expand All @@ -240,7 +286,7 @@ def _get_urls(self, date):
matches.reverse()
return matches

def find_build_info(self, date, fetch_txt_info=True, max_workers=2):
def find_build_info(self, date, max_workers=2):
"""
Find build info for a nightly build, given a date.

Expand Down Expand Up @@ -274,16 +320,14 @@ def find_build_info(self, date, fetch_txt_info=True, max_workers=2):
thread.join(0.1)
LOG.debug("got valid_builds %s" % valid_builds)
if valid_builds:
infos = sorted(valid_builds, key=lambda b: b[0])[0][1]
if fetch_txt_info:
self._update_build_info_from_txt(infos)

selection = sorted(valid_builds, key=lambda b: b[0])[0][1]
changeset = self.fetch_config.get_nightly_changeset(selection)
build_info = NightlyBuildInfo(
self.fetch_config,
build_url=infos["build_url"],
build_url=selection.build_url,
build_date=date,
changeset=infos.get("changeset"),
repo_url=infos.get("repository"),
changeset=changeset.changeset,
repo_url=changeset.repo_url,
)
break
build_urls = build_urls[max_workers:]
Expand Down
Loading