Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions alts/shared/utils/git_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os
import re
import urllib.parse
from logging import Logger
from pathlib import Path
from random import randint
Expand All @@ -9,6 +12,48 @@
from alts.shared.utils.path_utils import get_abspath


_SCP_URL_RE = re.compile(r'^(?P<user>[^@]+@)?(?P<host>[^:/]+):(?P<path>.+)$')


def repo_reference_subpath(repo_url: str) -> str:
"""
Derive a host-aware on-disk subpath for a git repository's reference
mirror. Two repositories that share a basename but live on different
hosts (e.g. a Gerrit "QA" and a GitLab "QA") must map to different
subpaths to avoid clobbering each other's bare mirrors.

Examples
--------
>>> repo_reference_subpath('ssh://gerrit.cloudlinux.com:29418/QA')
'gerrit.cloudlinux.com/QA.git'
>>> repo_reference_subpath('https://gitlab.cloudlinux.com/qa/QA.git')
'gitlab.cloudlinux.com/qa/QA.git'
>>> repo_reference_subpath('git@gitlab.com:qa/QA.git')
'gitlab.com/qa/QA.git'
"""
host = None
path = None
parsed = urllib.parse.urlparse(repo_url)
if parsed.hostname:
host = parsed.hostname
path = parsed.path
else:
scp_match = _SCP_URL_RE.match(repo_url)
if scp_match:
host = scp_match.group('host')
path = scp_match.group('path')
if not host or not path:
# Unparseable URL — fall back to basename so behaviour stays
# predictable; collisions across hosts remain the caller's problem
# in that degenerate case.
basename = os.path.basename(repo_url) or 'repo'
return basename if basename.endswith('.git') else f'{basename}.git'
path = path.strip('/')
if not path.endswith('.git'):
path = f'{path}.git'
return f'{host.lower()}/{path}'


def checkout(
git_ref: str,
git_repo_path: Path,
Expand Down
8 changes: 4 additions & 4 deletions alts/worker/runners/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
clone_git_repo,
git_reset_hard,
prepare_gerrit_command,
repo_reference_subpath,
)
from alts.shared.utils.log_utils import (
get_temp_log_files,
Expand Down Expand Up @@ -1150,13 +1151,12 @@ def clone_third_party_repo(
self._logger.debug('An unknown repository format, skipping')
return git_repo_path
self._logger.info('Cloning %s to %s', repo_url, self._work_dir)
repo_name = os.path.basename(repo_url)
if not repo_name.endswith('.git'):
repo_name += '.git'
repo_reference_dir = None
if CONFIG.git_reference_directory:
repo_reference_dir = os.path.join(
CONFIG.git_reference_directory, repo_name)
CONFIG.git_reference_directory,
repo_reference_subpath(repo_url),
)
repo_path = None
for attempt in range(1, 6):
try:
Expand Down
64 changes: 64 additions & 0 deletions tests/shared/test_git_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Tests for alts.shared.utils.git_utils.repo_reference_subpath."""
import pytest

from alts.shared.utils.git_utils import repo_reference_subpath


class TestRepoReferenceSubpath:
@pytest.mark.parametrize(
'url,expected',
[
(
'ssh://gerrit.cloudlinux.com:29418/QA',
'gerrit.cloudlinux.com/QA.git',
),
(
'https://gitlab.cloudlinux.com/qa-team/QA.git',
'gitlab.cloudlinux.com/qa-team/QA.git',
),
(
'https://github.com/foo/bar.git',
'github.com/foo/bar.git',
),
(
'git@gitlab.com:qa/QA.git',
'gitlab.com/qa/QA.git',
),
(
'git@gitlab.com:qa/QA',
'gitlab.com/qa/QA.git',
),
# SCP-style URL without a user prefix.
(
'gitlab.com:qa/QA.git',
'gitlab.com/qa/QA.git',
),
(
'gerrit.cloudlinux.com:QA',
'gerrit.cloudlinux.com/QA.git',
),
],
)
def test_known_urls_map_to_host_aware_subpaths(self, url, expected):
assert repo_reference_subpath(url) == expected

def test_same_basename_different_hosts_diverge(self):
gerrit = repo_reference_subpath(
'ssh://gerrit.cloudlinux.com:29418/QA',
)
gitlab = repo_reference_subpath(
'https://gitlab.cloudlinux.com/qa-team/QA.git',
)
assert gerrit != gitlab
# Both keys preserve enough host information to be distinguishable.
assert 'gerrit.cloudlinux.com' in gerrit
assert 'gitlab.cloudlinux.com' in gitlab

def test_host_is_lowercased(self):
assert repo_reference_subpath(
'https://GITLAB.COM/foo/Bar.git',
) == 'gitlab.com/foo/Bar.git'

def test_unparseable_url_falls_back_to_basename(self):
# No scheme, no SCP-style colon — degenerate input.
assert repo_reference_subpath('just-a-name') == 'just-a-name.git'
Loading