From b307863617654ccabdec8abb4d4dc856ffd7e402 Mon Sep 17 00:00:00 2001 From: Alec Thomas Date: Fri, 16 Jan 2026 21:12:03 +1100 Subject: [PATCH] feat: create bundle snapshots periodically Interestingly this doesn't seem to result in significant gains. It's also quite costly in terms of storage, so it's disabled by default. That said, it likely does reduce CPU utilisation on the cache server, as it can just stream the bundle back and not have to compute it on the fly. I was planning to automatically inject bundle-uri commands in the prototocol response, but I think given the minimal improvements I'll defer that. ``` ~/dev/cachew $ rm -rf linux-source ; time git clone http://127.0.0.1:8080/git/github.com/torvalds/linux linux-source Cloning into 'linux-source'... remote: Enumerating objects: 11276289, done. remote: Counting objects: 100% (98/98), done. remote: Compressing objects: 100% (56/56), done. remote: Total 11276289 (delta 68), reused 49 (delta 42), pack-reused 11276191 (from 1) Receiving objects: 100% (11276289/11276289), 5.65 GiB | 7.55 MiB/s, done. Resolving deltas: 100% (9153747/9153747), done. Updating files: 100% (92194/92194), done. git clone http://127.0.0.1:8080/git/github.com/torvalds/linux linux-source 486.00s user 112.07s system 67% cpu 14:50.60 total ~/dev/cachew $ rm -rf linux-source ; time git clone http://127.0.0.1:8080/git/github.com/torvalds/linux linux-source Cloning into 'linux-source'... remote: Enumerating objects: 11084152, done. remote: Counting objects: 100% (11084152/11084152), done. remote: Compressing objects: 100% (2034375/2034375), done. remote: Total 11084152 (delta 8997999), reused 11084152 (delta 8997999), pack-reused 0 (from 0) Receiving objects: 100% (11084152/11084152), 5.58 GiB | 101.10 MiB/s, done. Resolving deltas: 100% (8997999/8997999), done. Updating files: 100% (92194/92194), done. git clone http://127.0.0.1:8080/git/github.com/torvalds/linux linux-source 410.10s user 86.08s system 176% cpu 4:40.95 total ~/dev/cachew $ rm -rf linux-source ; time git clone --bundle-uri http://127.0.0.1:8080/git/github.com/torvalds/linux.git/bundle http://127.0.0.1:8080/git/github.com/torvalds/linux.git linux-source Cloning into 'linux-source'... remote: Enumerating objects: 908, done. remote: Counting objects: 100% (908/908), done. remote: Compressing objects: 100% (908/908), done. remote: Total 908 (delta 0), reused 908 (delta 0), pack-reused 0 (from 0) Receiving objects: 100% (908/908), 414.26 KiB | 17.26 MiB/s, done. Checking connectivity: 908, done. Updating files: 100% (92194/92194), done. git clone --bundle-uri linux-source 358.40s user 81.89s system 197% cpu 3:42.47 total ``` --- cachew.hcl | 1 + cmd/cachewd/main.go | 6 +- internal/strategy/git/backend.go | 34 ++++-- internal/strategy/git/bundle.go | 131 +++++++++++++++++++++ internal/strategy/git/bundle_test.go | 111 ++++++++++++++++++ internal/strategy/git/command.go | 74 ++++++++++++ internal/strategy/git/command_test.go | 73 ++++++++++++ internal/strategy/git/git.go | 159 +++++++++++++++++++++++++- 8 files changed, 578 insertions(+), 11 deletions(-) create mode 100644 internal/strategy/git/bundle.go create mode 100644 internal/strategy/git/bundle_test.go create mode 100644 internal/strategy/git/command.go create mode 100644 internal/strategy/git/command_test.go diff --git a/cachew.hcl b/cachew.hcl index 01a2052..622b158 100644 --- a/cachew.hcl +++ b/cachew.hcl @@ -8,6 +8,7 @@ git { mirror-root = "./state/git-mirrors" + clone-depth = 1000 } host "https://w3.org" {} diff --git a/cmd/cachewd/main.go b/cmd/cachewd/main.go index 74e3b60..89bab72 100644 --- a/cmd/cachewd/main.go +++ b/cmd/cachewd/main.go @@ -38,9 +38,9 @@ func main() { server := &http.Server{ Addr: cli.Bind, Handler: httputil.LoggingMiddleware(mux), - ReadTimeout: 30 * time.Second, - WriteTimeout: 30 * time.Second, - ReadHeaderTimeout: 10 * time.Second, + ReadTimeout: 30 * time.Minute, + WriteTimeout: 30 * time.Minute, + ReadHeaderTimeout: 30 * time.Second, BaseContext: func(net.Listener) context.Context { return ctx }, diff --git a/internal/strategy/git/backend.go b/internal/strategy/git/backend.go index 74904c5..9015a37 100644 --- a/internal/strategy/git/backend.go +++ b/internal/strategy/git/backend.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "path/filepath" + "strconv" "strings" "time" @@ -64,7 +65,7 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, c *c handler.ServeHTTP(w, r2) } -// executeClone performs a git clone --bare --mirror operation. +// executeClone performs a git clone --bare operation. func (s *Strategy) executeClone(ctx context.Context, c *clone) error { logger := logging.FromContext(ctx) @@ -74,12 +75,19 @@ func (s *Strategy) executeClone(ctx context.Context, c *clone) error { // #nosec G204 - c.upstreamURL and c.path are controlled by us // Configure git for large repositories to avoid network buffer issues - cmd := exec.CommandContext(ctx, "git", "clone", - "--bare", "--mirror", + args := []string{"clone", "--bare"} + if s.config.CloneDepth > 0 { + args = append(args, "--depth", strconv.Itoa(s.config.CloneDepth)) + } + args = append(args, "-c", "http.postBuffer=524288000", // 500MB buffer "-c", "http.lowSpeedLimit=1000", // 1KB/s minimum speed "-c", "http.lowSpeedTime=600", // 10 minute timeout at low speed c.upstreamURL, c.path) + cmd, err := gitCommand(ctx, c.upstreamURL, args...) + if err != nil { + return errors.Wrap(err, "create git command") + } output, err := cmd.CombinedOutput() if err != nil { logger.ErrorContext(ctx, "git clone failed", @@ -121,12 +129,18 @@ func (s *Strategy) executeFetch(ctx context.Context, c *clone) error { // #nosec G204 - c.path is controlled by us // Configure git for large repositories to avoid network buffer issues - // Use 'remote update' for mirror clones to properly handle ref updates and pruning - cmd := exec.CommandContext(ctx, "git", "-C", c.path, + // Use 'remote update' to properly handle ref updates and pruning + cmd, err := gitCommand(ctx, c.upstreamURL, "-C", c.path, "-c", "http.postBuffer=524288000", // 500MB buffer "-c", "http.lowSpeedLimit=1000", // 1KB/s minimum speed "-c", "http.lowSpeedTime=600", // 10 minute timeout at low speed "remote", "update", "--prune") + if err != nil { + logger.ErrorContext(ctx, "Failed to create git command", + slog.String("upstream", c.upstreamURL), + slog.String("error", err.Error())) + return errors.Wrap(err, "create git command") + } output, err := cmd.CombinedOutput() if err != nil { logger.ErrorContext(ctx, "git remote update failed", @@ -212,7 +226,10 @@ func (s *Strategy) ensureRefsUpToDate(ctx context.Context, c *clone) error { func (s *Strategy) getLocalRefs(ctx context.Context, c *clone) (map[string]string, error) { // #nosec G204 - c.path is controlled by us // Use --head to include HEAD symbolic ref - cmd := exec.CommandContext(ctx, "git", "-C", c.path, "show-ref", "--head") + cmd, err := gitCommand(ctx, "", "-C", c.path, "show-ref", "--head") + if err != nil { + return nil, errors.Wrap(err, "create git command") + } output, err := cmd.CombinedOutput() if err != nil { return nil, errors.Wrap(err, "git show-ref") @@ -224,7 +241,10 @@ func (s *Strategy) getLocalRefs(ctx context.Context, c *clone) (map[string]strin // getUpstreamRefs returns a map of ref names to SHAs for the upstream repository. func (s *Strategy) getUpstreamRefs(ctx context.Context, c *clone) (map[string]string, error) { // #nosec G204 - c.upstreamURL is controlled by us - cmd := exec.CommandContext(ctx, "git", "ls-remote", c.upstreamURL) + cmd, err := gitCommand(ctx, c.upstreamURL, "ls-remote", c.upstreamURL) + if err != nil { + return nil, errors.Wrap(err, "create git command") + } output, err := cmd.CombinedOutput() if err != nil { return nil, errors.Wrap(err, "git ls-remote") diff --git a/internal/strategy/git/bundle.go b/internal/strategy/git/bundle.go new file mode 100644 index 0000000..8f1e2e0 --- /dev/null +++ b/internal/strategy/git/bundle.go @@ -0,0 +1,131 @@ +package git + +import ( + "context" + "io" + "log/slog" + "net/textproto" + "os" + "time" + + "github.com/alecthomas/errors" + + "github.com/block/cachew/internal/cache" + "github.com/block/cachew/internal/logging" +) + +// cloneBundleLoop generates bundles periodically for a single clone. +func (s *Strategy) cloneBundleLoop(ctx context.Context, c *clone) { + logger := logging.FromContext(ctx) + + // Generate bundle immediately on start if one doesn't exist + s.generateAndUploadBundleIfMissing(ctx, c) + + ticker := time.NewTicker(s.config.BundleInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + logger.DebugContext(ctx, "Bundle generator shutting down", + slog.String("upstream", c.upstreamURL)) + return + + case <-ticker.C: + s.generateAndUploadBundle(ctx, c) + } + } +} + +// generateAndUploadBundleIfMissing generates a bundle only if one doesn't exist in cache. +func (s *Strategy) generateAndUploadBundleIfMissing(ctx context.Context, c *clone) { + logger := logging.FromContext(ctx) + + // Check if bundle already exists in cache + cacheKey := cache.NewKey(c.upstreamURL + ".bundle") + + reader, _, err := s.cache.Open(ctx, cacheKey) + if err == nil { + // Bundle exists, close and skip generation + _ = reader.Close() + logger.DebugContext(ctx, "Bundle already exists in cache, skipping generation", + slog.String("upstream", c.upstreamURL)) + return + } + + // Only generate if the error is that the bundle doesn't exist + if !errors.Is(err, os.ErrNotExist) { + logger.ErrorContext(ctx, "Failed to check for existing bundle", + slog.String("upstream", c.upstreamURL), + slog.String("error", err.Error())) + return + } + + // Bundle doesn't exist, generate it + s.generateAndUploadBundle(ctx, c) +} + +// generateAndUploadBundle generates a bundle and streams it directly to cache. +func (s *Strategy) generateAndUploadBundle(ctx context.Context, c *clone) { + logger := logging.FromContext(ctx) + + logger.InfoContext(ctx, "Generating bundle", + slog.String("upstream", c.upstreamURL)) + + cacheKey := cache.NewKey(c.upstreamURL + ".bundle") + + // Create cache writer + headers := textproto.MIMEHeader{ + "Content-Type": []string{"application/x-git-bundle"}, + } + ttl := 7 * 24 * time.Hour + w, err := s.cache.Create(ctx, cacheKey, headers, ttl) + if err != nil { + logger.ErrorContext(ctx, "Failed to create cache entry", + slog.String("upstream", c.upstreamURL), + slog.String("error", err.Error())) + return + } + defer w.Close() + + // Stream bundle directly to cache + // #nosec G204 - c.path is controlled by us + cmd, err := gitCommand(ctx, "", "-C", c.path, + "bundle", "create", "-", "--branches") + if err != nil { + logger.ErrorContext(ctx, "Failed to create git command", + slog.String("upstream", c.upstreamURL), + slog.String("error", err.Error())) + return + } + cmd.Stdout = w + + // Capture stderr for error reporting + stderrPipe, err := cmd.StderrPipe() + if err != nil { + logger.ErrorContext(ctx, "Failed to create stderr pipe", + slog.String("upstream", c.upstreamURL), + slog.String("error", err.Error())) + return + } + + if err := cmd.Start(); err != nil { + logger.ErrorContext(ctx, "Failed to start bundle generation", + slog.String("upstream", c.upstreamURL), + slog.String("error", err.Error())) + return + } + + stderr, _ := io.ReadAll(stderrPipe) //nolint:errcheck // Only used for logging + + if err := cmd.Wait(); err != nil { + logger.ErrorContext(ctx, "Failed to generate bundle", + slog.String("upstream", c.upstreamURL), + slog.String("error", err.Error()), + slog.String("stderr", string(stderr))) + return + } + + logger.InfoContext(ctx, "Bundle uploaded successfully", + slog.String("upstream", c.upstreamURL)) +} diff --git a/internal/strategy/git/bundle_test.go b/internal/strategy/git/bundle_test.go new file mode 100644 index 0000000..6a1160a --- /dev/null +++ b/internal/strategy/git/bundle_test.go @@ -0,0 +1,111 @@ +package git_test + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/alecthomas/assert/v2" + + "github.com/block/cachew/internal/cache" + "github.com/block/cachew/internal/logging" + "github.com/block/cachew/internal/strategy/git" +) + +func TestBundleHTTPEndpoint(t *testing.T) { + _, ctx := logging.Configure(context.Background(), logging.Config{}) + tmpDir := t.TempDir() + + memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{}) + assert.NoError(t, err) + mux := newTestMux() + + _, err = git.New(ctx, git.Config{ + MirrorRoot: tmpDir, + BundleInterval: 24 * time.Hour, + }, memCache, mux) + assert.NoError(t, err) + + // Create a fake bundle in the cache + upstreamURL := "https://github.com/org/repo" + cacheKey := cache.NewKey(upstreamURL + ".bundle") + bundleData := []byte("fake bundle data") + + headers := make(map[string][]string) + headers["Content-Type"] = []string{"application/x-git-bundle"} + writer, err := memCache.Create(ctx, cacheKey, headers, 24*time.Hour) + assert.NoError(t, err) + _, err = writer.Write(bundleData) + assert.NoError(t, err) + err = writer.Close() + assert.NoError(t, err) + + // Test bundle endpoint exists + handler := mux.handlers["GET /git/{host}/{path...}"] + assert.NotZero(t, handler) + + // Test successful bundle request + req := httptest.NewRequest(http.MethodGet, "/git/github.com/org/repo/bundle", nil) + req = req.WithContext(ctx) + req.SetPathValue("host", "github.com") + req.SetPathValue("path", "org/repo/bundle") + w := httptest.NewRecorder() + + handler.ServeHTTP(w, req) + + assert.Equal(t, 200, w.Code) + assert.Equal(t, "application/x-git-bundle", w.Header().Get("Content-Type")) + assert.Equal(t, bundleData, w.Body.Bytes()) + + // Test bundle not found + req = httptest.NewRequest(http.MethodGet, "/git/github.com/org/nonexistent/bundle", nil) + req = req.WithContext(ctx) + req.SetPathValue("host", "github.com") + req.SetPathValue("path", "org/nonexistent/bundle") + w = httptest.NewRecorder() + + handler.ServeHTTP(w, req) + + assert.Equal(t, 404, w.Code) +} + +func TestBundleInterval(t *testing.T) { + _, ctx := logging.Configure(context.Background(), logging.Config{}) + tmpDir := t.TempDir() + + tests := []struct { + name string + bundleInterval time.Duration + expectDefault bool + }{ + { + name: "CustomInterval", + bundleInterval: 1 * time.Hour, + expectDefault: false, + }, + { + name: "DefaultInterval", + bundleInterval: 0, + expectDefault: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{}) + assert.NoError(t, err) + mux := newTestMux() + + s, err := git.New(ctx, git.Config{ + MirrorRoot: tmpDir, + BundleInterval: tt.bundleInterval, + }, memCache, mux) + assert.NoError(t, err) + assert.NotZero(t, s) + + // Strategy should be created successfully regardless of bundle interval + }) + } +} diff --git a/internal/strategy/git/command.go b/internal/strategy/git/command.go new file mode 100644 index 0000000..f172a54 --- /dev/null +++ b/internal/strategy/git/command.go @@ -0,0 +1,74 @@ +package git + +import ( + "bufio" + "context" + "os/exec" + "strings" + + "github.com/alecthomas/errors" +) + +// gitCommand creates a git command with insteadOf URL rewriting disabled for the given URL. +// This prevents git config rules like "url.X.insteadOf=Y" from rewriting the specific URL +// to point back through the proxy, which would cause infinite loops. +// Other insteadOf rules and all auth configuration are preserved. +func gitCommand(ctx context.Context, url string, args ...string) (*exec.Cmd, error) { + // Query for insteadOf rules that would affect this URL and build -c flags to disable them + configArgs, err := getInsteadOfDisableArgsForURL(ctx, url) + if err != nil { + return nil, errors.Wrap(err, "get insteadOf disable args") + } + + // Prepend disable args to the git command arguments + var allArgs []string + if len(configArgs) > 0 { + allArgs = append(allArgs, configArgs...) + } + allArgs = append(allArgs, args...) + + cmd := exec.CommandContext(ctx, "git", allArgs...) + return cmd, nil +} + +// getInsteadOfDisableArgsForURL queries git config for insteadOf rules that would affect +// the given URL and returns arguments to disable only those specific rules. +func getInsteadOfDisableArgsForURL(ctx context.Context, targetURL string) ([]string, error) { + if targetURL == "" { + return nil, nil + } + + // Query git config for all url.*.insteadOf and url.*.pushInsteadOf settings + cmd := exec.CommandContext(ctx, "git", "config", "--get-regexp", "^url\\..*\\.(insteadof|pushinsteadof)$") + output, err := cmd.CombinedOutput() + if err != nil { + // No insteadOf rules found (exit code 1) is expected and not an error + // Return empty args to continue without disabling any rules + return []string{}, nil //nolint:nilerr // Exit code 1 is expected when no rules exist + } + + // Parse output and check which rules would match our URL + // Output format: url..insteadof or url..pushinsteadof + var args []string + scanner := bufio.NewScanner(strings.NewReader(string(output))) + for scanner.Scan() { + line := scanner.Text() + // Split into config key and value + parts := strings.Fields(line) + if len(parts) >= 2 { + configKey := parts[0] + pattern := parts[1] + + // Check if our target URL would match this insteadOf pattern + if strings.HasPrefix(targetURL, pattern) { + // This rule would affect our URL, so disable it + args = append(args, "-c", configKey+"=") + } + } + } + if err := scanner.Err(); err != nil { + return nil, errors.Wrap(err, "scan insteadOf output") + } + + return args, nil +} diff --git a/internal/strategy/git/command_test.go b/internal/strategy/git/command_test.go new file mode 100644 index 0000000..cf64b13 --- /dev/null +++ b/internal/strategy/git/command_test.go @@ -0,0 +1,73 @@ +package git //nolint:testpackage // Internal functions need to be tested + +import ( + "context" + "testing" + + "github.com/alecthomas/assert/v2" +) + +func TestGetInsteadOfDisableArgsForURL(t *testing.T) { + ctx := context.Background() + + tests := []struct { + name string + targetURL string + // We can't easily test the actual git config reading in a unit test, + // but we can test the logic would work correctly + skipTest bool + }{ + { + name: "EmptyURL", + targetURL: "", + skipTest: false, + }, + { + name: "GitHubURL", + targetURL: "https://github.com/user/repo", + skipTest: true, // Skip actual git config test + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.skipTest { + t.Skip("Requires git config setup") + } + + args, err := getInsteadOfDisableArgsForURL(ctx, tt.targetURL) + assert.NoError(t, err) + if tt.targetURL == "" { + assert.Equal(t, 0, len(args)) + } + }) + } +} + +func TestGitCommand(t *testing.T) { + ctx := context.Background() + + // Test that gitCommand creates a valid command + cmd, err := gitCommand(ctx, "https://github.com/user/repo", "version") + assert.NoError(t, err) + + assert.NotZero(t, cmd) + // Should have at least "git" and "version" in args + assert.True(t, len(cmd.Args) >= 2) + // First arg should be git binary path + assert.Equal(t, "git", cmd.Args[0]) + // Last arg should be "version" + assert.Equal(t, "version", cmd.Args[len(cmd.Args)-1]) +} + +func TestGitCommandWithEmptyURL(t *testing.T) { + ctx := context.Background() + + // Test with empty URL (for commands that don't need URL filtering) + cmd, err := gitCommand(ctx, "", "version") + assert.NoError(t, err) + + assert.NotZero(t, cmd) + assert.Equal(t, "git", cmd.Args[0]) + assert.Equal(t, "version", cmd.Args[len(cmd.Args)-1]) +} diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index 3f3e9f4..5b38b38 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -3,6 +3,7 @@ package git import ( "context" + "io" "log/slog" "net/http" "net/http/httputil" @@ -29,6 +30,8 @@ type Config struct { MirrorRoot string `hcl:"mirror-root" help:"Directory to store git mirrors." required:""` FetchInterval time.Duration `hcl:"fetch-interval,optional" help:"How often to fetch from upstream in minutes." default:"15m"` RefCheckInterval time.Duration `hcl:"ref-check-interval,optional" help:"How long to cache ref checks." default:"10s"` + BundleInterval time.Duration `hcl:"bundle-interval,optional" help:"How often to generate bundles. 0 disables bundling." default:"0"` + CloneDepth int `hcl:"clone-depth,optional" help:"Depth for shallow clones. 0 means full clone." default:"0"` } // cloneState represents the current state of a bare clone. @@ -60,6 +63,7 @@ type Strategy struct { clonesMu sync.RWMutex httpClient *http.Client proxy *httputil.ReverseProxy + ctx context.Context // Strategy lifecycle context } // New creates a new Git caching strategy. @@ -87,6 +91,13 @@ func New(ctx context.Context, config Config, cache cache.Cache, mux strategy.Mux cache: cache, clones: make(map[string]*clone), httpClient: http.DefaultClient, + ctx: ctx, + } + + // Scan for existing clones on disk and start bundle loops for them + if err := s.discoverExistingClones(ctx); err != nil { + logger.WarnContext(ctx, "Failed to discover existing clones", + slog.String("error", err.Error())) } s.proxy = &httputil.ReverseProxy{ @@ -109,7 +120,8 @@ func New(ctx context.Context, config Config, cache cache.Cache, mux strategy.Mux logger.InfoContext(ctx, "Git strategy initialized", "mirror_root", config.MirrorRoot, "fetch_interval", config.FetchInterval, - "ref_check_interval", config.RefCheckInterval) + "ref_check_interval", config.RefCheckInterval, + "bundle_interval", config.BundleInterval) return s, nil } @@ -131,6 +143,12 @@ func (s *Strategy) handleRequest(w http.ResponseWriter, r *http.Request) { slog.String("host", host), slog.String("path", pathValue)) + // Check if this is a bundle request + if strings.HasSuffix(pathValue, "/bundle") { + s.handleBundleRequest(w, r, host, pathValue) + return + } + // Determine the service type from query param or path service := r.URL.Query().Get("service") isReceivePack := service == "git-receive-pack" || strings.HasSuffix(pathValue, "/git-receive-pack") @@ -193,6 +211,58 @@ func ExtractRepoPath(pathValue string) string { return repoPath } +// handleBundleRequest serves a git bundle from the cache. +func (s *Strategy) handleBundleRequest(w http.ResponseWriter, r *http.Request, host, pathValue string) { + ctx := r.Context() + logger := logging.FromContext(ctx) + + logger.DebugContext(ctx, "Bundle request", + slog.String("host", host), + slog.String("path", pathValue)) + + // Remove /bundle suffix to get repo path + pathValue = strings.TrimSuffix(pathValue, "/bundle") + + // Extract repo path and construct upstream URL + repoPath := ExtractRepoPath(pathValue) + upstreamURL := "https://" + host + "/" + repoPath + + // Generate cache key + cacheKey := cache.NewKey(upstreamURL + ".bundle") + + // Open bundle from cache + reader, headers, err := s.cache.Open(ctx, cacheKey) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + logger.DebugContext(ctx, "Bundle not found in cache", + slog.String("upstream", upstreamURL)) + http.NotFound(w, r) + return + } + logger.ErrorContext(ctx, "Failed to open bundle from cache", + slog.String("upstream", upstreamURL), + slog.String("error", err.Error())) + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } + defer reader.Close() + + // Set headers + for key, values := range headers { + for _, value := range values { + w.Header().Add(key, value) + } + } + + // Stream bundle to client + _, err = io.Copy(w, reader) + if err != nil { + logger.ErrorContext(ctx, "Failed to stream bundle", + slog.String("upstream", upstreamURL), + slog.String("error", err.Error())) + } +} + // getOrCreateClone returns an existing clone or creates a new one in empty state. func (s *Strategy) getOrCreateClone(ctx context.Context, upstreamURL string) *clone { s.clonesMu.RLock() @@ -226,6 +296,11 @@ func (s *Strategy) getOrCreateClone(ctx context.Context, upstreamURL string) *cl c.state = stateReady logging.FromContext(ctx).DebugContext(ctx, "Found existing clone on disk", slog.String("path", clonePath)) + + // Start bundle generation loop for existing clone + if s.config.BundleInterval > 0 { + go s.cloneBundleLoop(s.ctx, c) + } } // Initialize semaphore as available @@ -248,6 +323,83 @@ func (s *Strategy) clonePathForURL(upstreamURL string) string { return filepath.Join(s.config.MirrorRoot, parsed.Host, repoPath+".git") } +// discoverExistingClones scans the mirror root for existing clones and starts bundle loops. +func (s *Strategy) discoverExistingClones(ctx context.Context) error { + logger := logging.FromContext(ctx) + + // Walk the mirror root directory + err := filepath.Walk(s.config.MirrorRoot, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Skip non-directories + if !info.IsDir() { + return nil + } + + // Check if this directory is a bare git repository by looking for HEAD file + headPath := filepath.Join(path, "HEAD") + if _, statErr := os.Stat(headPath); statErr != nil { + // Skip if HEAD doesn't exist (not a git repo) + if errors.Is(statErr, os.ErrNotExist) { + return nil + } + // Return other errors + return errors.Wrap(statErr, "stat HEAD file") + } + + // Extract upstream URL from path + relPath, err := filepath.Rel(s.config.MirrorRoot, path) + if err != nil { + logger.WarnContext(ctx, "Failed to get relative path", + slog.String("path", path), + slog.String("error", err.Error())) + return nil + } + + // Convert path to upstream URL: {host}/{path}.git -> https://{host}/{path} + parts := strings.Split(filepath.ToSlash(relPath), "/") + if len(parts) < 2 { + return nil + } + + host := parts[0] + repoPath := strings.TrimSuffix(strings.Join(parts[1:], "/"), ".git") + upstreamURL := "https://" + host + "/" + repoPath + + // Create clone entry + c := &clone{ + state: stateReady, + path: path, + upstreamURL: upstreamURL, + fetchSem: make(chan struct{}, 1), + } + c.fetchSem <- struct{}{} + + s.clonesMu.Lock() + s.clones[upstreamURL] = c + s.clonesMu.Unlock() + + logger.DebugContext(ctx, "Discovered existing clone", + slog.String("path", path), + slog.String("upstream", upstreamURL)) + + // Start bundle generation loop + if s.config.BundleInterval > 0 { + go s.cloneBundleLoop(s.ctx, c) + } + + return nil + }) + + if err != nil { + return errors.Wrap(err, "walk mirror root") + } + + return nil +} + // startClone initiates a git clone operation. func (s *Strategy) startClone(ctx context.Context, c *clone) { logger := logging.FromContext(ctx) @@ -282,6 +434,11 @@ func (s *Strategy) startClone(ctx context.Context, c *clone) { logger.InfoContext(ctx, "Clone completed", slog.String("upstream", c.upstreamURL), slog.String("path", c.path)) + + // Start bundle generation loop for new clone + if s.config.BundleInterval > 0 { + go s.cloneBundleLoop(context.WithoutCancel(ctx), c) + } } // maybeBackgroundFetch triggers a background fetch if enough time has passed.