diff --git a/.gitignore b/.gitignore index e633e7c..be4b330 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /dist/ /state/ +/*-source/ diff --git a/internal/strategy/git/backend.go b/internal/strategy/git/backend.go index 9015a37..07091fd 100644 --- a/internal/strategy/git/backend.go +++ b/internal/strategy/git/backend.go @@ -2,6 +2,7 @@ package git import ( "bufio" + "bytes" "context" "log/slog" "net/http" @@ -40,17 +41,37 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, c *c host := r.PathValue("host") pathValue := r.PathValue("path") - // git http-backend expects the path as-is: /host/repo.git/info/refs - backendPath := "/" + host + "/" + pathValue + // For regular clones, we need to insert /.git before the git protocol paths + // Find where the git operation starts (e.g., /info/refs, /git-upload-pack) + var gitOperation string + var repoPathWithSuffix string + + for _, op := range []string{"/info/refs", "/git-upload-pack", "/git-receive-pack"} { + if idx := strings.Index(pathValue, op); idx != -1 { + repoPathWithSuffix = pathValue[:idx] + gitOperation = pathValue[idx:] + break + } + } + + // Remove .git suffix from repo path for the filesystem path + repoPath := strings.TrimSuffix(repoPathWithSuffix, ".git") + + // Construct backend path with .git directory: /host/repo/.git/info/refs + backendPath := "/" + host + "/" + repoPath + "/.git" + gitOperation logger.DebugContext(r.Context(), "Serving with git http-backend", slog.String("original_path", r.URL.Path), slog.String("backend_path", backendPath), slog.String("clone_path", c.path)) + // Capture stderr from git http-backend to log errors + var stderrBuf bytes.Buffer + handler := &cgi.Handler{ - Path: gitPath, - Args: []string{"http-backend"}, + Path: gitPath, + Args: []string{"http-backend"}, + Stderr: &stderrBuf, Env: []string{ "GIT_PROJECT_ROOT=" + absRoot, "GIT_HTTP_EXPORT_ALL=1", @@ -63,9 +84,16 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, c *c r2.URL.Path = backendPath handler.ServeHTTP(w, r2) + + // Log stderr if there was any output (indicates an error) + if stderrBuf.Len() > 0 { + logger.ErrorContext(r.Context(), "git http-backend error", + slog.String("stderr", stderrBuf.String()), + slog.String("path", backendPath)) + } } -// executeClone performs a git clone --bare operation. +// executeClone performs a git clone operation. func (s *Strategy) executeClone(ctx context.Context, c *clone) error { logger := logging.FromContext(ctx) @@ -75,7 +103,7 @@ func (s *Strategy) executeClone(ctx context.Context, c *clone) error { // #nosec G204 - c.upstreamURL and c.path are controlled by us // Configure git for large repositories to avoid network buffer issues - args := []string{"clone", "--bare"} + args := []string{"clone"} if s.config.CloneDepth > 0 { args = append(args, "--depth", strconv.Itoa(s.config.CloneDepth)) } @@ -96,11 +124,40 @@ func (s *Strategy) executeClone(ctx context.Context, c *clone) error { return errors.Wrap(err, "git clone") } - logger.DebugContext(ctx, "git clone succeeded", slog.String("output", string(output))) + // Configure remote to fetch all branches, not just the default branch + // git clone sets fetch = +refs/heads/master:refs/remotes/origin/master by default + // We need to change it to fetch all branches + // #nosec G204 - c.path is controlled by us + cmd = exec.CommandContext(ctx, "git", "-C", c.path, "config", "remote.origin.fetch", "+refs/heads/*:refs/remotes/origin/*") + output, err = cmd.CombinedOutput() + if err != nil { + logger.ErrorContext(ctx, "git config failed", + slog.String("error", err.Error()), + slog.String("output", string(output))) + return errors.Wrap(err, "configure fetch refspec") + } + + // Fetch all branches now that the refspec is configured + cmd, err = gitCommand(ctx, c.upstreamURL, "-C", c.path, + "-c", "http.postBuffer=524288000", + "-c", "http.lowSpeedLimit=1000", + "-c", "http.lowSpeedTime=600", + "fetch", "--all") + if err != nil { + return errors.Wrap(err, "create git command for fetch") + } + output, err = cmd.CombinedOutput() + if err != nil { + logger.ErrorContext(ctx, "git fetch --all failed", + slog.String("error", err.Error()), + slog.String("output", string(output))) + return errors.Wrap(err, "fetch all branches") + } + return nil } -// executeFetch performs a git fetch --all operation. +// executeFetch performs a git remote update operation. func (s *Strategy) executeFetch(ctx context.Context, c *clone) error { logger := logging.FromContext(ctx) @@ -193,10 +250,18 @@ func (s *Strategy) ensureRefsUpToDate(ctx context.Context, c *clone) error { if strings.HasSuffix(ref, "^{}") { continue } - localSHA, exists := localRefs[ref] + // Only check refs/heads/* from upstream since those are what we fetch + // (GitHub exposes refs/pull/* and other refs we don't fetch) + if !strings.HasPrefix(ref, "refs/heads/") { + continue + } + // Convert refs/heads/X to refs/remotes/origin/X for local lookup + localRef := "refs/remotes/origin/" + strings.TrimPrefix(ref, "refs/heads/") + localSHA, exists := localRefs[localRef] if !exists || localSHA != upstreamSHA { logger.DebugContext(ctx, "Upstream ref differs from local", - slog.String("ref", ref), + slog.String("upstream_ref", ref), + slog.String("local_ref", localRef), slog.String("upstream_sha", upstreamSHA), slog.String("local_sha", localSHA)) needsFetch = true @@ -225,14 +290,12 @@ func (s *Strategy) ensureRefsUpToDate(ctx context.Context, c *clone) error { // getLocalRefs returns a map of ref names to SHAs for the local clone. func (s *Strategy) getLocalRefs(ctx context.Context, c *clone) (map[string]string, error) { // #nosec G204 - c.path is controlled by us - // Use --head to include HEAD symbolic ref - cmd, err := gitCommand(ctx, "", "-C", c.path, "show-ref", "--head") - if err != nil { - return nil, errors.Wrap(err, "create git command") - } + // Use for-each-ref to get all refs including remote refs + // No need for insteadOf protection since this is purely local + cmd := exec.CommandContext(ctx, "git", "-C", c.path, "for-each-ref", "--format=%(objectname) %(refname)") output, err := cmd.CombinedOutput() if err != nil { - return nil, errors.Wrap(err, "git show-ref") + return nil, errors.Wrap(err, "git for-each-ref") } return ParseGitRefs(output), nil diff --git a/internal/strategy/git/bundle.go b/internal/strategy/git/bundle.go index 8f1e2e0..c99cb69 100644 --- a/internal/strategy/git/bundle.go +++ b/internal/strategy/git/bundle.go @@ -6,6 +6,7 @@ import ( "log/slog" "net/textproto" "os" + "strings" "time" "github.com/alecthomas/errors" @@ -90,8 +91,9 @@ func (s *Strategy) generateAndUploadBundle(ctx context.Context, c *clone) { // Stream bundle directly to cache // #nosec G204 - c.path is controlled by us - cmd, err := gitCommand(ctx, "", "-C", c.path, - "bundle", "create", "-", "--branches") + // Use --branches --remotes to include all branches but exclude tags (which can be massive) + args := []string{"-C", c.path, "bundle", "create", "-", "--branches", "--remotes"} + cmd, err := gitCommand(ctx, "", args...) if err != nil { logger.ErrorContext(ctx, "Failed to create git command", slog.String("upstream", c.upstreamURL), @@ -109,6 +111,10 @@ func (s *Strategy) generateAndUploadBundle(ctx context.Context, c *clone) { return } + logger.DebugContext(ctx, "Starting bundle generation", + slog.String("upstream", c.upstreamURL), + slog.String("command", "git "+strings.Join(args, " "))) + if err := cmd.Start(); err != nil { logger.ErrorContext(ctx, "Failed to start bundle generation", slog.String("upstream", c.upstreamURL), @@ -126,6 +132,12 @@ func (s *Strategy) generateAndUploadBundle(ctx context.Context, c *clone) { return } + if len(stderr) > 0 { + logger.DebugContext(ctx, "Bundle generation stderr", + slog.String("upstream", c.upstreamURL), + slog.String("stderr", string(stderr))) + } + logger.InfoContext(ctx, "Bundle uploaded successfully", slog.String("upstream", c.upstreamURL)) } diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index 5b38b38..bf1f90d 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -27,14 +27,14 @@ func init() { // Config for the Git strategy. type Config struct { - MirrorRoot string `hcl:"mirror-root" help:"Directory to store git mirrors." required:""` + MirrorRoot string `hcl:"mirror-root" help:"Directory to store git clones." required:""` FetchInterval time.Duration `hcl:"fetch-interval,optional" help:"How often to fetch from upstream in minutes." default:"15m"` RefCheckInterval time.Duration `hcl:"ref-check-interval,optional" help:"How long to cache ref checks." default:"10s"` BundleInterval time.Duration `hcl:"bundle-interval,optional" help:"How often to generate bundles. 0 disables bundling." default:"0"` CloneDepth int `hcl:"clone-depth,optional" help:"Depth for shallow clones. 0 means full clone." default:"0"` } -// cloneState represents the current state of a bare clone. +// cloneState represents the current state of a clone. type cloneState int const ( @@ -43,7 +43,7 @@ const ( stateReady // Clone is ready to serve ) -// clone represents a bare clone of an upstream repository. +// clone represents a checked out clone of an upstream repository. type clone struct { mu sync.RWMutex state cloneState @@ -292,7 +292,9 @@ func (s *Strategy) getOrCreateClone(ctx context.Context, upstreamURL string) *cl } // Check if clone already exists on disk (from previous run) - if _, err := os.Stat(clonePath); err == nil { + // Verify it has a .git directory to ensure it's a valid clone + gitDir := filepath.Join(clonePath, ".git") + if _, err := os.Stat(gitDir); err == nil { c.state = stateReady logging.FromContext(ctx).DebugContext(ctx, "Found existing clone on disk", slog.String("path", clonePath)) @@ -315,12 +317,12 @@ func (s *Strategy) clonePathForURL(upstreamURL string) string { parsed, err := url.Parse(upstreamURL) if err != nil { // Fallback to simple hash if URL parsing fails - return filepath.Join(s.config.MirrorRoot, "unknown.git") + return filepath.Join(s.config.MirrorRoot, "unknown") } - // Create path: {mirror_root}/{host}/{path}.git + // Create path: {mirror_root}/{host}/{path} repoPath := strings.TrimSuffix(parsed.Path, ".git") - return filepath.Join(s.config.MirrorRoot, parsed.Host, repoPath+".git") + return filepath.Join(s.config.MirrorRoot, parsed.Host, repoPath) } // discoverExistingClones scans the mirror root for existing clones and starts bundle loops. @@ -338,10 +340,19 @@ func (s *Strategy) discoverExistingClones(ctx context.Context) error { return nil } - // Check if this directory is a bare git repository by looking for HEAD file - headPath := filepath.Join(path, "HEAD") + // Check if this directory is a git repository by looking for .git directory or HEAD file + gitDir := filepath.Join(path, ".git") + headPath := filepath.Join(path, ".git", "HEAD") + if _, statErr := os.Stat(gitDir); statErr != nil { + // Skip if .git doesn't exist (not a git repo) + if errors.Is(statErr, os.ErrNotExist) { + return nil + } + // Return other errors + return errors.Wrap(statErr, "stat .git directory") + } if _, statErr := os.Stat(headPath); statErr != nil { - // Skip if HEAD doesn't exist (not a git repo) + // Skip if HEAD doesn't exist (not a valid git repo) if errors.Is(statErr, os.ErrNotExist) { return nil } @@ -365,7 +376,7 @@ func (s *Strategy) discoverExistingClones(ctx context.Context) error { } host := parts[0] - repoPath := strings.TrimSuffix(strings.Join(parts[1:], "/"), ".git") + repoPath := strings.Join(parts[1:], "/") upstreamURL := "https://" + host + "/" + repoPath // Create clone entry diff --git a/internal/strategy/git/git_test.go b/internal/strategy/git/git_test.go index 84d50ae..9942a3b 100644 --- a/internal/strategy/git/git_test.go +++ b/internal/strategy/git/git_test.go @@ -132,8 +132,15 @@ func TestNewWithExistingCloneOnDisk(t *testing.T) { tmpDir := t.TempDir() // Create a fake clone directory on disk before initializing strategy - clonePath := filepath.Join(tmpDir, "github.com", "org", "repo.git") - err := os.MkdirAll(clonePath, 0o750) + // For regular clones, we need a .git subdirectory with HEAD file + clonePath := filepath.Join(tmpDir, "github.com", "org", "repo") + gitDir := filepath.Join(clonePath, ".git") + err := os.MkdirAll(gitDir, 0o750) + assert.NoError(t, err) + + // Create HEAD file to make it look like a valid git repo + headPath := filepath.Join(gitDir, "HEAD") + err = os.WriteFile(headPath, []byte("ref: refs/heads/main\n"), 0o640) assert.NoError(t, err) mux := newTestMux() diff --git a/internal/strategy/git/integration_test.go b/internal/strategy/git/integration_test.go index fb5cf4d..a33bc80 100644 --- a/internal/strategy/git/integration_test.go +++ b/internal/strategy/git/integration_test.go @@ -99,11 +99,17 @@ func TestIntegrationGitCloneViaProxy(t *testing.T) { _, err = os.Stat(readmePath2) assert.NoError(t, err) - // Verify the bare clone was created - bareClonePath := filepath.Join(clonesDir, "github.com", "octocat", "Hello-World.git") - info, err := os.Stat(bareClonePath) + // Verify the clone was created + clonePath := filepath.Join(clonesDir, "github.com", "octocat", "Hello-World") + info, err := os.Stat(clonePath) assert.NoError(t, err) assert.True(t, info.IsDir()) + + // Verify it has a .git directory (regular clone) + gitDir := filepath.Join(clonePath, ".git") + gitInfo, err := os.Stat(gitDir) + assert.NoError(t, err) + assert.True(t, gitInfo.IsDir()) } // TestIntegrationGitFetchViaProxy tests fetching updates through the proxy.