diff --git a/cachew.hcl b/cachew.hcl
index 01a2052..622b158 100644
--- a/cachew.hcl
+++ b/cachew.hcl
@@ -8,6 +8,7 @@
git {
mirror-root = "./state/git-mirrors"
+ clone-depth = 1000
}
host "https://w3.org" {}
diff --git a/cmd/cachewd/main.go b/cmd/cachewd/main.go
index 74e3b60..89bab72 100644
--- a/cmd/cachewd/main.go
+++ b/cmd/cachewd/main.go
@@ -38,9 +38,9 @@ func main() {
server := &http.Server{
Addr: cli.Bind,
Handler: httputil.LoggingMiddleware(mux),
- ReadTimeout: 30 * time.Second,
- WriteTimeout: 30 * time.Second,
- ReadHeaderTimeout: 10 * time.Second,
+ ReadTimeout: 30 * time.Minute,
+ WriteTimeout: 30 * time.Minute,
+ ReadHeaderTimeout: 30 * time.Second,
BaseContext: func(net.Listener) context.Context {
return ctx
},
diff --git a/internal/strategy/git/backend.go b/internal/strategy/git/backend.go
index 74904c5..9015a37 100644
--- a/internal/strategy/git/backend.go
+++ b/internal/strategy/git/backend.go
@@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"path/filepath"
+ "strconv"
"strings"
"time"
@@ -64,7 +65,7 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, c *c
handler.ServeHTTP(w, r2)
}
-// executeClone performs a git clone --bare --mirror operation.
+// executeClone performs a git clone --bare operation.
func (s *Strategy) executeClone(ctx context.Context, c *clone) error {
logger := logging.FromContext(ctx)
@@ -74,12 +75,19 @@ func (s *Strategy) executeClone(ctx context.Context, c *clone) error {
// #nosec G204 - c.upstreamURL and c.path are controlled by us
// Configure git for large repositories to avoid network buffer issues
- cmd := exec.CommandContext(ctx, "git", "clone",
- "--bare", "--mirror",
+ args := []string{"clone", "--bare"}
+ if s.config.CloneDepth > 0 {
+ args = append(args, "--depth", strconv.Itoa(s.config.CloneDepth))
+ }
+ args = append(args,
"-c", "http.postBuffer=524288000", // 500MB buffer
"-c", "http.lowSpeedLimit=1000", // 1KB/s minimum speed
"-c", "http.lowSpeedTime=600", // 10 minute timeout at low speed
c.upstreamURL, c.path)
+ cmd, err := gitCommand(ctx, c.upstreamURL, args...)
+ if err != nil {
+ return errors.Wrap(err, "create git command")
+ }
output, err := cmd.CombinedOutput()
if err != nil {
logger.ErrorContext(ctx, "git clone failed",
@@ -121,12 +129,18 @@ func (s *Strategy) executeFetch(ctx context.Context, c *clone) error {
// #nosec G204 - c.path is controlled by us
// Configure git for large repositories to avoid network buffer issues
- // Use 'remote update' for mirror clones to properly handle ref updates and pruning
- cmd := exec.CommandContext(ctx, "git", "-C", c.path,
+ // Use 'remote update' to properly handle ref updates and pruning
+ cmd, err := gitCommand(ctx, c.upstreamURL, "-C", c.path,
"-c", "http.postBuffer=524288000", // 500MB buffer
"-c", "http.lowSpeedLimit=1000", // 1KB/s minimum speed
"-c", "http.lowSpeedTime=600", // 10 minute timeout at low speed
"remote", "update", "--prune")
+ if err != nil {
+ logger.ErrorContext(ctx, "Failed to create git command",
+ slog.String("upstream", c.upstreamURL),
+ slog.String("error", err.Error()))
+ return errors.Wrap(err, "create git command")
+ }
output, err := cmd.CombinedOutput()
if err != nil {
logger.ErrorContext(ctx, "git remote update failed",
@@ -212,7 +226,10 @@ func (s *Strategy) ensureRefsUpToDate(ctx context.Context, c *clone) error {
func (s *Strategy) getLocalRefs(ctx context.Context, c *clone) (map[string]string, error) {
// #nosec G204 - c.path is controlled by us
// Use --head to include HEAD symbolic ref
- cmd := exec.CommandContext(ctx, "git", "-C", c.path, "show-ref", "--head")
+ cmd, err := gitCommand(ctx, "", "-C", c.path, "show-ref", "--head")
+ if err != nil {
+ return nil, errors.Wrap(err, "create git command")
+ }
output, err := cmd.CombinedOutput()
if err != nil {
return nil, errors.Wrap(err, "git show-ref")
@@ -224,7 +241,10 @@ func (s *Strategy) getLocalRefs(ctx context.Context, c *clone) (map[string]strin
// getUpstreamRefs returns a map of ref names to SHAs for the upstream repository.
func (s *Strategy) getUpstreamRefs(ctx context.Context, c *clone) (map[string]string, error) {
// #nosec G204 - c.upstreamURL is controlled by us
- cmd := exec.CommandContext(ctx, "git", "ls-remote", c.upstreamURL)
+ cmd, err := gitCommand(ctx, c.upstreamURL, "ls-remote", c.upstreamURL)
+ if err != nil {
+ return nil, errors.Wrap(err, "create git command")
+ }
output, err := cmd.CombinedOutput()
if err != nil {
return nil, errors.Wrap(err, "git ls-remote")
diff --git a/internal/strategy/git/bundle.go b/internal/strategy/git/bundle.go
new file mode 100644
index 0000000..8f1e2e0
--- /dev/null
+++ b/internal/strategy/git/bundle.go
@@ -0,0 +1,131 @@
+package git
+
+import (
+ "context"
+ "io"
+ "log/slog"
+ "net/textproto"
+ "os"
+ "time"
+
+ "github.com/alecthomas/errors"
+
+ "github.com/block/cachew/internal/cache"
+ "github.com/block/cachew/internal/logging"
+)
+
+// cloneBundleLoop generates bundles periodically for a single clone.
+func (s *Strategy) cloneBundleLoop(ctx context.Context, c *clone) {
+ logger := logging.FromContext(ctx)
+
+ // Generate bundle immediately on start if one doesn't exist
+ s.generateAndUploadBundleIfMissing(ctx, c)
+
+ ticker := time.NewTicker(s.config.BundleInterval)
+ defer ticker.Stop()
+
+ for {
+ select {
+ case <-ctx.Done():
+ logger.DebugContext(ctx, "Bundle generator shutting down",
+ slog.String("upstream", c.upstreamURL))
+ return
+
+ case <-ticker.C:
+ s.generateAndUploadBundle(ctx, c)
+ }
+ }
+}
+
+// generateAndUploadBundleIfMissing generates a bundle only if one doesn't exist in cache.
+func (s *Strategy) generateAndUploadBundleIfMissing(ctx context.Context, c *clone) {
+ logger := logging.FromContext(ctx)
+
+ // Check if bundle already exists in cache
+ cacheKey := cache.NewKey(c.upstreamURL + ".bundle")
+
+ reader, _, err := s.cache.Open(ctx, cacheKey)
+ if err == nil {
+ // Bundle exists, close and skip generation
+ _ = reader.Close()
+ logger.DebugContext(ctx, "Bundle already exists in cache, skipping generation",
+ slog.String("upstream", c.upstreamURL))
+ return
+ }
+
+ // Only generate if the error is that the bundle doesn't exist
+ if !errors.Is(err, os.ErrNotExist) {
+ logger.ErrorContext(ctx, "Failed to check for existing bundle",
+ slog.String("upstream", c.upstreamURL),
+ slog.String("error", err.Error()))
+ return
+ }
+
+ // Bundle doesn't exist, generate it
+ s.generateAndUploadBundle(ctx, c)
+}
+
+// generateAndUploadBundle generates a bundle and streams it directly to cache.
+func (s *Strategy) generateAndUploadBundle(ctx context.Context, c *clone) {
+ logger := logging.FromContext(ctx)
+
+ logger.InfoContext(ctx, "Generating bundle",
+ slog.String("upstream", c.upstreamURL))
+
+ cacheKey := cache.NewKey(c.upstreamURL + ".bundle")
+
+ // Create cache writer
+ headers := textproto.MIMEHeader{
+ "Content-Type": []string{"application/x-git-bundle"},
+ }
+ ttl := 7 * 24 * time.Hour
+ w, err := s.cache.Create(ctx, cacheKey, headers, ttl)
+ if err != nil {
+ logger.ErrorContext(ctx, "Failed to create cache entry",
+ slog.String("upstream", c.upstreamURL),
+ slog.String("error", err.Error()))
+ return
+ }
+ defer w.Close()
+
+ // Stream bundle directly to cache
+ // #nosec G204 - c.path is controlled by us
+ cmd, err := gitCommand(ctx, "", "-C", c.path,
+ "bundle", "create", "-", "--branches")
+ if err != nil {
+ logger.ErrorContext(ctx, "Failed to create git command",
+ slog.String("upstream", c.upstreamURL),
+ slog.String("error", err.Error()))
+ return
+ }
+ cmd.Stdout = w
+
+ // Capture stderr for error reporting
+ stderrPipe, err := cmd.StderrPipe()
+ if err != nil {
+ logger.ErrorContext(ctx, "Failed to create stderr pipe",
+ slog.String("upstream", c.upstreamURL),
+ slog.String("error", err.Error()))
+ return
+ }
+
+ if err := cmd.Start(); err != nil {
+ logger.ErrorContext(ctx, "Failed to start bundle generation",
+ slog.String("upstream", c.upstreamURL),
+ slog.String("error", err.Error()))
+ return
+ }
+
+ stderr, _ := io.ReadAll(stderrPipe) //nolint:errcheck // Only used for logging
+
+ if err := cmd.Wait(); err != nil {
+ logger.ErrorContext(ctx, "Failed to generate bundle",
+ slog.String("upstream", c.upstreamURL),
+ slog.String("error", err.Error()),
+ slog.String("stderr", string(stderr)))
+ return
+ }
+
+ logger.InfoContext(ctx, "Bundle uploaded successfully",
+ slog.String("upstream", c.upstreamURL))
+}
diff --git a/internal/strategy/git/bundle_test.go b/internal/strategy/git/bundle_test.go
new file mode 100644
index 0000000..6a1160a
--- /dev/null
+++ b/internal/strategy/git/bundle_test.go
@@ -0,0 +1,111 @@
+package git_test
+
+import (
+ "context"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+ "time"
+
+ "github.com/alecthomas/assert/v2"
+
+ "github.com/block/cachew/internal/cache"
+ "github.com/block/cachew/internal/logging"
+ "github.com/block/cachew/internal/strategy/git"
+)
+
+func TestBundleHTTPEndpoint(t *testing.T) {
+ _, ctx := logging.Configure(context.Background(), logging.Config{})
+ tmpDir := t.TempDir()
+
+ memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{})
+ assert.NoError(t, err)
+ mux := newTestMux()
+
+ _, err = git.New(ctx, git.Config{
+ MirrorRoot: tmpDir,
+ BundleInterval: 24 * time.Hour,
+ }, memCache, mux)
+ assert.NoError(t, err)
+
+ // Create a fake bundle in the cache
+ upstreamURL := "https://github.com/org/repo"
+ cacheKey := cache.NewKey(upstreamURL + ".bundle")
+ bundleData := []byte("fake bundle data")
+
+ headers := make(map[string][]string)
+ headers["Content-Type"] = []string{"application/x-git-bundle"}
+ writer, err := memCache.Create(ctx, cacheKey, headers, 24*time.Hour)
+ assert.NoError(t, err)
+ _, err = writer.Write(bundleData)
+ assert.NoError(t, err)
+ err = writer.Close()
+ assert.NoError(t, err)
+
+ // Test bundle endpoint exists
+ handler := mux.handlers["GET /git/{host}/{path...}"]
+ assert.NotZero(t, handler)
+
+ // Test successful bundle request
+ req := httptest.NewRequest(http.MethodGet, "/git/github.com/org/repo/bundle", nil)
+ req = req.WithContext(ctx)
+ req.SetPathValue("host", "github.com")
+ req.SetPathValue("path", "org/repo/bundle")
+ w := httptest.NewRecorder()
+
+ handler.ServeHTTP(w, req)
+
+ assert.Equal(t, 200, w.Code)
+ assert.Equal(t, "application/x-git-bundle", w.Header().Get("Content-Type"))
+ assert.Equal(t, bundleData, w.Body.Bytes())
+
+ // Test bundle not found
+ req = httptest.NewRequest(http.MethodGet, "/git/github.com/org/nonexistent/bundle", nil)
+ req = req.WithContext(ctx)
+ req.SetPathValue("host", "github.com")
+ req.SetPathValue("path", "org/nonexistent/bundle")
+ w = httptest.NewRecorder()
+
+ handler.ServeHTTP(w, req)
+
+ assert.Equal(t, 404, w.Code)
+}
+
+func TestBundleInterval(t *testing.T) {
+ _, ctx := logging.Configure(context.Background(), logging.Config{})
+ tmpDir := t.TempDir()
+
+ tests := []struct {
+ name string
+ bundleInterval time.Duration
+ expectDefault bool
+ }{
+ {
+ name: "CustomInterval",
+ bundleInterval: 1 * time.Hour,
+ expectDefault: false,
+ },
+ {
+ name: "DefaultInterval",
+ bundleInterval: 0,
+ expectDefault: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{})
+ assert.NoError(t, err)
+ mux := newTestMux()
+
+ s, err := git.New(ctx, git.Config{
+ MirrorRoot: tmpDir,
+ BundleInterval: tt.bundleInterval,
+ }, memCache, mux)
+ assert.NoError(t, err)
+ assert.NotZero(t, s)
+
+ // Strategy should be created successfully regardless of bundle interval
+ })
+ }
+}
diff --git a/internal/strategy/git/command.go b/internal/strategy/git/command.go
new file mode 100644
index 0000000..f172a54
--- /dev/null
+++ b/internal/strategy/git/command.go
@@ -0,0 +1,74 @@
+package git
+
+import (
+ "bufio"
+ "context"
+ "os/exec"
+ "strings"
+
+ "github.com/alecthomas/errors"
+)
+
+// gitCommand creates a git command with insteadOf URL rewriting disabled for the given URL.
+// This prevents git config rules like "url.X.insteadOf=Y" from rewriting the specific URL
+// to point back through the proxy, which would cause infinite loops.
+// Other insteadOf rules and all auth configuration are preserved.
+func gitCommand(ctx context.Context, url string, args ...string) (*exec.Cmd, error) {
+ // Query for insteadOf rules that would affect this URL and build -c flags to disable them
+ configArgs, err := getInsteadOfDisableArgsForURL(ctx, url)
+ if err != nil {
+ return nil, errors.Wrap(err, "get insteadOf disable args")
+ }
+
+ // Prepend disable args to the git command arguments
+ var allArgs []string
+ if len(configArgs) > 0 {
+ allArgs = append(allArgs, configArgs...)
+ }
+ allArgs = append(allArgs, args...)
+
+ cmd := exec.CommandContext(ctx, "git", allArgs...)
+ return cmd, nil
+}
+
+// getInsteadOfDisableArgsForURL queries git config for insteadOf rules that would affect
+// the given URL and returns arguments to disable only those specific rules.
+func getInsteadOfDisableArgsForURL(ctx context.Context, targetURL string) ([]string, error) {
+ if targetURL == "" {
+ return nil, nil
+ }
+
+ // Query git config for all url.*.insteadOf and url.*.pushInsteadOf settings
+ cmd := exec.CommandContext(ctx, "git", "config", "--get-regexp", "^url\\..*\\.(insteadof|pushinsteadof)$")
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ // No insteadOf rules found (exit code 1) is expected and not an error
+ // Return empty args to continue without disabling any rules
+ return []string{}, nil //nolint:nilerr // Exit code 1 is expected when no rules exist
+ }
+
+ // Parse output and check which rules would match our URL
+ // Output format: url..insteadof or url..pushinsteadof
+ var args []string
+ scanner := bufio.NewScanner(strings.NewReader(string(output)))
+ for scanner.Scan() {
+ line := scanner.Text()
+ // Split into config key and value
+ parts := strings.Fields(line)
+ if len(parts) >= 2 {
+ configKey := parts[0]
+ pattern := parts[1]
+
+ // Check if our target URL would match this insteadOf pattern
+ if strings.HasPrefix(targetURL, pattern) {
+ // This rule would affect our URL, so disable it
+ args = append(args, "-c", configKey+"=")
+ }
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, errors.Wrap(err, "scan insteadOf output")
+ }
+
+ return args, nil
+}
diff --git a/internal/strategy/git/command_test.go b/internal/strategy/git/command_test.go
new file mode 100644
index 0000000..cf64b13
--- /dev/null
+++ b/internal/strategy/git/command_test.go
@@ -0,0 +1,73 @@
+package git //nolint:testpackage // Internal functions need to be tested
+
+import (
+ "context"
+ "testing"
+
+ "github.com/alecthomas/assert/v2"
+)
+
+func TestGetInsteadOfDisableArgsForURL(t *testing.T) {
+ ctx := context.Background()
+
+ tests := []struct {
+ name string
+ targetURL string
+ // We can't easily test the actual git config reading in a unit test,
+ // but we can test the logic would work correctly
+ skipTest bool
+ }{
+ {
+ name: "EmptyURL",
+ targetURL: "",
+ skipTest: false,
+ },
+ {
+ name: "GitHubURL",
+ targetURL: "https://github.com/user/repo",
+ skipTest: true, // Skip actual git config test
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if tt.skipTest {
+ t.Skip("Requires git config setup")
+ }
+
+ args, err := getInsteadOfDisableArgsForURL(ctx, tt.targetURL)
+ assert.NoError(t, err)
+ if tt.targetURL == "" {
+ assert.Equal(t, 0, len(args))
+ }
+ })
+ }
+}
+
+func TestGitCommand(t *testing.T) {
+ ctx := context.Background()
+
+ // Test that gitCommand creates a valid command
+ cmd, err := gitCommand(ctx, "https://github.com/user/repo", "version")
+ assert.NoError(t, err)
+
+ assert.NotZero(t, cmd)
+ // Should have at least "git" and "version" in args
+ assert.True(t, len(cmd.Args) >= 2)
+ // First arg should be git binary path
+ assert.Equal(t, "git", cmd.Args[0])
+ // Last arg should be "version"
+ assert.Equal(t, "version", cmd.Args[len(cmd.Args)-1])
+}
+
+func TestGitCommandWithEmptyURL(t *testing.T) {
+ ctx := context.Background()
+
+ // Test with empty URL (for commands that don't need URL filtering)
+ cmd, err := gitCommand(ctx, "", "version")
+ assert.NoError(t, err)
+
+ assert.NotZero(t, cmd)
+ assert.Equal(t, "git", cmd.Args[0])
+ assert.Equal(t, "version", cmd.Args[len(cmd.Args)-1])
+}
diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go
index 3f3e9f4..5b38b38 100644
--- a/internal/strategy/git/git.go
+++ b/internal/strategy/git/git.go
@@ -3,6 +3,7 @@ package git
import (
"context"
+ "io"
"log/slog"
"net/http"
"net/http/httputil"
@@ -29,6 +30,8 @@ type Config struct {
MirrorRoot string `hcl:"mirror-root" help:"Directory to store git mirrors." required:""`
FetchInterval time.Duration `hcl:"fetch-interval,optional" help:"How often to fetch from upstream in minutes." default:"15m"`
RefCheckInterval time.Duration `hcl:"ref-check-interval,optional" help:"How long to cache ref checks." default:"10s"`
+ BundleInterval time.Duration `hcl:"bundle-interval,optional" help:"How often to generate bundles. 0 disables bundling." default:"0"`
+ CloneDepth int `hcl:"clone-depth,optional" help:"Depth for shallow clones. 0 means full clone." default:"0"`
}
// cloneState represents the current state of a bare clone.
@@ -60,6 +63,7 @@ type Strategy struct {
clonesMu sync.RWMutex
httpClient *http.Client
proxy *httputil.ReverseProxy
+ ctx context.Context // Strategy lifecycle context
}
// New creates a new Git caching strategy.
@@ -87,6 +91,13 @@ func New(ctx context.Context, config Config, cache cache.Cache, mux strategy.Mux
cache: cache,
clones: make(map[string]*clone),
httpClient: http.DefaultClient,
+ ctx: ctx,
+ }
+
+ // Scan for existing clones on disk and start bundle loops for them
+ if err := s.discoverExistingClones(ctx); err != nil {
+ logger.WarnContext(ctx, "Failed to discover existing clones",
+ slog.String("error", err.Error()))
}
s.proxy = &httputil.ReverseProxy{
@@ -109,7 +120,8 @@ func New(ctx context.Context, config Config, cache cache.Cache, mux strategy.Mux
logger.InfoContext(ctx, "Git strategy initialized",
"mirror_root", config.MirrorRoot,
"fetch_interval", config.FetchInterval,
- "ref_check_interval", config.RefCheckInterval)
+ "ref_check_interval", config.RefCheckInterval,
+ "bundle_interval", config.BundleInterval)
return s, nil
}
@@ -131,6 +143,12 @@ func (s *Strategy) handleRequest(w http.ResponseWriter, r *http.Request) {
slog.String("host", host),
slog.String("path", pathValue))
+ // Check if this is a bundle request
+ if strings.HasSuffix(pathValue, "/bundle") {
+ s.handleBundleRequest(w, r, host, pathValue)
+ return
+ }
+
// Determine the service type from query param or path
service := r.URL.Query().Get("service")
isReceivePack := service == "git-receive-pack" || strings.HasSuffix(pathValue, "/git-receive-pack")
@@ -193,6 +211,58 @@ func ExtractRepoPath(pathValue string) string {
return repoPath
}
+// handleBundleRequest serves a git bundle from the cache.
+func (s *Strategy) handleBundleRequest(w http.ResponseWriter, r *http.Request, host, pathValue string) {
+ ctx := r.Context()
+ logger := logging.FromContext(ctx)
+
+ logger.DebugContext(ctx, "Bundle request",
+ slog.String("host", host),
+ slog.String("path", pathValue))
+
+ // Remove /bundle suffix to get repo path
+ pathValue = strings.TrimSuffix(pathValue, "/bundle")
+
+ // Extract repo path and construct upstream URL
+ repoPath := ExtractRepoPath(pathValue)
+ upstreamURL := "https://" + host + "/" + repoPath
+
+ // Generate cache key
+ cacheKey := cache.NewKey(upstreamURL + ".bundle")
+
+ // Open bundle from cache
+ reader, headers, err := s.cache.Open(ctx, cacheKey)
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ logger.DebugContext(ctx, "Bundle not found in cache",
+ slog.String("upstream", upstreamURL))
+ http.NotFound(w, r)
+ return
+ }
+ logger.ErrorContext(ctx, "Failed to open bundle from cache",
+ slog.String("upstream", upstreamURL),
+ slog.String("error", err.Error()))
+ http.Error(w, "Internal server error", http.StatusInternalServerError)
+ return
+ }
+ defer reader.Close()
+
+ // Set headers
+ for key, values := range headers {
+ for _, value := range values {
+ w.Header().Add(key, value)
+ }
+ }
+
+ // Stream bundle to client
+ _, err = io.Copy(w, reader)
+ if err != nil {
+ logger.ErrorContext(ctx, "Failed to stream bundle",
+ slog.String("upstream", upstreamURL),
+ slog.String("error", err.Error()))
+ }
+}
+
// getOrCreateClone returns an existing clone or creates a new one in empty state.
func (s *Strategy) getOrCreateClone(ctx context.Context, upstreamURL string) *clone {
s.clonesMu.RLock()
@@ -226,6 +296,11 @@ func (s *Strategy) getOrCreateClone(ctx context.Context, upstreamURL string) *cl
c.state = stateReady
logging.FromContext(ctx).DebugContext(ctx, "Found existing clone on disk",
slog.String("path", clonePath))
+
+ // Start bundle generation loop for existing clone
+ if s.config.BundleInterval > 0 {
+ go s.cloneBundleLoop(s.ctx, c)
+ }
}
// Initialize semaphore as available
@@ -248,6 +323,83 @@ func (s *Strategy) clonePathForURL(upstreamURL string) string {
return filepath.Join(s.config.MirrorRoot, parsed.Host, repoPath+".git")
}
+// discoverExistingClones scans the mirror root for existing clones and starts bundle loops.
+func (s *Strategy) discoverExistingClones(ctx context.Context) error {
+ logger := logging.FromContext(ctx)
+
+ // Walk the mirror root directory
+ err := filepath.Walk(s.config.MirrorRoot, func(path string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+
+ // Skip non-directories
+ if !info.IsDir() {
+ return nil
+ }
+
+ // Check if this directory is a bare git repository by looking for HEAD file
+ headPath := filepath.Join(path, "HEAD")
+ if _, statErr := os.Stat(headPath); statErr != nil {
+ // Skip if HEAD doesn't exist (not a git repo)
+ if errors.Is(statErr, os.ErrNotExist) {
+ return nil
+ }
+ // Return other errors
+ return errors.Wrap(statErr, "stat HEAD file")
+ }
+
+ // Extract upstream URL from path
+ relPath, err := filepath.Rel(s.config.MirrorRoot, path)
+ if err != nil {
+ logger.WarnContext(ctx, "Failed to get relative path",
+ slog.String("path", path),
+ slog.String("error", err.Error()))
+ return nil
+ }
+
+ // Convert path to upstream URL: {host}/{path}.git -> https://{host}/{path}
+ parts := strings.Split(filepath.ToSlash(relPath), "/")
+ if len(parts) < 2 {
+ return nil
+ }
+
+ host := parts[0]
+ repoPath := strings.TrimSuffix(strings.Join(parts[1:], "/"), ".git")
+ upstreamURL := "https://" + host + "/" + repoPath
+
+ // Create clone entry
+ c := &clone{
+ state: stateReady,
+ path: path,
+ upstreamURL: upstreamURL,
+ fetchSem: make(chan struct{}, 1),
+ }
+ c.fetchSem <- struct{}{}
+
+ s.clonesMu.Lock()
+ s.clones[upstreamURL] = c
+ s.clonesMu.Unlock()
+
+ logger.DebugContext(ctx, "Discovered existing clone",
+ slog.String("path", path),
+ slog.String("upstream", upstreamURL))
+
+ // Start bundle generation loop
+ if s.config.BundleInterval > 0 {
+ go s.cloneBundleLoop(s.ctx, c)
+ }
+
+ return nil
+ })
+
+ if err != nil {
+ return errors.Wrap(err, "walk mirror root")
+ }
+
+ return nil
+}
+
// startClone initiates a git clone operation.
func (s *Strategy) startClone(ctx context.Context, c *clone) {
logger := logging.FromContext(ctx)
@@ -282,6 +434,11 @@ func (s *Strategy) startClone(ctx context.Context, c *clone) {
logger.InfoContext(ctx, "Clone completed",
slog.String("upstream", c.upstreamURL),
slog.String("path", c.path))
+
+ // Start bundle generation loop for new clone
+ if s.config.BundleInterval > 0 {
+ go s.cloneBundleLoop(context.WithoutCancel(ctx), c)
+ }
}
// maybeBackgroundFetch triggers a background fetch if enough time has passed.