From 38208c8858e517b8d9926f125fdeb9c362915093 Mon Sep 17 00:00:00 2001 From: Daniil Anfimov Date: Fri, 22 May 2026 12:11:48 +0200 Subject: [PATCH] Add the ability to use multiple QA repos --- alts/shared/utils/git_utils.py | 45 ++++++++++++++++++++++++ alts/worker/runners/base.py | 8 ++--- tests/shared/test_git_utils.py | 64 ++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 4 deletions(-) create mode 100644 tests/shared/test_git_utils.py diff --git a/alts/shared/utils/git_utils.py b/alts/shared/utils/git_utils.py index 6cda8b67..bc70fc35 100644 --- a/alts/shared/utils/git_utils.py +++ b/alts/shared/utils/git_utils.py @@ -1,3 +1,6 @@ +import os +import re +import urllib.parse from logging import Logger from pathlib import Path from random import randint @@ -9,6 +12,48 @@ from alts.shared.utils.path_utils import get_abspath +_SCP_URL_RE = re.compile(r'^(?P[^@]+@)?(?P[^:/]+):(?P.+)$') + + +def repo_reference_subpath(repo_url: str) -> str: + """ + Derive a host-aware on-disk subpath for a git repository's reference + mirror. Two repositories that share a basename but live on different + hosts (e.g. a Gerrit "QA" and a GitLab "QA") must map to different + subpaths to avoid clobbering each other's bare mirrors. + + Examples + -------- + >>> repo_reference_subpath('ssh://gerrit.cloudlinux.com:29418/QA') + 'gerrit.cloudlinux.com/QA.git' + >>> repo_reference_subpath('https://gitlab.cloudlinux.com/qa/QA.git') + 'gitlab.cloudlinux.com/qa/QA.git' + >>> repo_reference_subpath('git@gitlab.com:qa/QA.git') + 'gitlab.com/qa/QA.git' + """ + host = None + path = None + parsed = urllib.parse.urlparse(repo_url) + if parsed.hostname: + host = parsed.hostname + path = parsed.path + else: + scp_match = _SCP_URL_RE.match(repo_url) + if scp_match: + host = scp_match.group('host') + path = scp_match.group('path') + if not host or not path: + # Unparseable URL — fall back to basename so behaviour stays + # predictable; collisions across hosts remain the caller's problem + # in that degenerate case. + basename = os.path.basename(repo_url) or 'repo' + return basename if basename.endswith('.git') else f'{basename}.git' + path = path.strip('/') + if not path.endswith('.git'): + path = f'{path}.git' + return f'{host.lower()}/{path}' + + def checkout( git_ref: str, git_repo_path: Path, diff --git a/alts/worker/runners/base.py b/alts/worker/runners/base.py index 76b1e8cc..e45ddeda 100644 --- a/alts/worker/runners/base.py +++ b/alts/worker/runners/base.py @@ -52,6 +52,7 @@ clone_git_repo, git_reset_hard, prepare_gerrit_command, + repo_reference_subpath, ) from alts.shared.utils.log_utils import ( get_temp_log_files, @@ -1150,13 +1151,12 @@ def clone_third_party_repo( self._logger.debug('An unknown repository format, skipping') return git_repo_path self._logger.info('Cloning %s to %s', repo_url, self._work_dir) - repo_name = os.path.basename(repo_url) - if not repo_name.endswith('.git'): - repo_name += '.git' repo_reference_dir = None if CONFIG.git_reference_directory: repo_reference_dir = os.path.join( - CONFIG.git_reference_directory, repo_name) + CONFIG.git_reference_directory, + repo_reference_subpath(repo_url), + ) repo_path = None for attempt in range(1, 6): try: diff --git a/tests/shared/test_git_utils.py b/tests/shared/test_git_utils.py new file mode 100644 index 00000000..2c8b5d07 --- /dev/null +++ b/tests/shared/test_git_utils.py @@ -0,0 +1,64 @@ +"""Tests for alts.shared.utils.git_utils.repo_reference_subpath.""" +import pytest + +from alts.shared.utils.git_utils import repo_reference_subpath + + +class TestRepoReferenceSubpath: + @pytest.mark.parametrize( + 'url,expected', + [ + ( + 'ssh://gerrit.cloudlinux.com:29418/QA', + 'gerrit.cloudlinux.com/QA.git', + ), + ( + 'https://gitlab.cloudlinux.com/qa-team/QA.git', + 'gitlab.cloudlinux.com/qa-team/QA.git', + ), + ( + 'https://github.com/foo/bar.git', + 'github.com/foo/bar.git', + ), + ( + 'git@gitlab.com:qa/QA.git', + 'gitlab.com/qa/QA.git', + ), + ( + 'git@gitlab.com:qa/QA', + 'gitlab.com/qa/QA.git', + ), + # SCP-style URL without a user prefix. + ( + 'gitlab.com:qa/QA.git', + 'gitlab.com/qa/QA.git', + ), + ( + 'gerrit.cloudlinux.com:QA', + 'gerrit.cloudlinux.com/QA.git', + ), + ], + ) + def test_known_urls_map_to_host_aware_subpaths(self, url, expected): + assert repo_reference_subpath(url) == expected + + def test_same_basename_different_hosts_diverge(self): + gerrit = repo_reference_subpath( + 'ssh://gerrit.cloudlinux.com:29418/QA', + ) + gitlab = repo_reference_subpath( + 'https://gitlab.cloudlinux.com/qa-team/QA.git', + ) + assert gerrit != gitlab + # Both keys preserve enough host information to be distinguishable. + assert 'gerrit.cloudlinux.com' in gerrit + assert 'gitlab.cloudlinux.com' in gitlab + + def test_host_is_lowercased(self): + assert repo_reference_subpath( + 'https://GITLAB.COM/foo/Bar.git', + ) == 'gitlab.com/foo/Bar.git' + + def test_unparseable_url_falls_back_to_basename(self): + # No scheme, no SCP-style colon — degenerate input. + assert repo_reference_subpath('just-a-name') == 'just-a-name.git'