Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cachew.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

git {
mirror-root = "./state/git-mirrors"
clone-depth = 1000
}

host "https://w3.org" {}
Expand Down
6 changes: 3 additions & 3 deletions cmd/cachewd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ func main() {
server := &http.Server{
Addr: cli.Bind,
Handler: httputil.LoggingMiddleware(mux),
ReadTimeout: 30 * time.Second,
WriteTimeout: 30 * time.Second,
ReadHeaderTimeout: 10 * time.Second,
ReadTimeout: 30 * time.Minute,
WriteTimeout: 30 * time.Minute,
ReadHeaderTimeout: 30 * time.Second,
BaseContext: func(net.Listener) context.Context {
return ctx
},
Expand Down
34 changes: 27 additions & 7 deletions internal/strategy/git/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -64,7 +65,7 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, c *c
handler.ServeHTTP(w, r2)
}

// executeClone performs a git clone --bare --mirror operation.
// executeClone performs a git clone --bare operation.
func (s *Strategy) executeClone(ctx context.Context, c *clone) error {
logger := logging.FromContext(ctx)

Expand All @@ -74,12 +75,19 @@ func (s *Strategy) executeClone(ctx context.Context, c *clone) error {

// #nosec G204 - c.upstreamURL and c.path are controlled by us
// Configure git for large repositories to avoid network buffer issues
cmd := exec.CommandContext(ctx, "git", "clone",
"--bare", "--mirror",
args := []string{"clone", "--bare"}
if s.config.CloneDepth > 0 {
args = append(args, "--depth", strconv.Itoa(s.config.CloneDepth))
}
args = append(args,
"-c", "http.postBuffer=524288000", // 500MB buffer
"-c", "http.lowSpeedLimit=1000", // 1KB/s minimum speed
"-c", "http.lowSpeedTime=600", // 10 minute timeout at low speed
c.upstreamURL, c.path)
cmd, err := gitCommand(ctx, c.upstreamURL, args...)
if err != nil {
return errors.Wrap(err, "create git command")
}
output, err := cmd.CombinedOutput()
if err != nil {
logger.ErrorContext(ctx, "git clone failed",
Expand Down Expand Up @@ -121,12 +129,18 @@ func (s *Strategy) executeFetch(ctx context.Context, c *clone) error {

// #nosec G204 - c.path is controlled by us
// Configure git for large repositories to avoid network buffer issues
// Use 'remote update' for mirror clones to properly handle ref updates and pruning
cmd := exec.CommandContext(ctx, "git", "-C", c.path,
// Use 'remote update' to properly handle ref updates and pruning
cmd, err := gitCommand(ctx, c.upstreamURL, "-C", c.path,
"-c", "http.postBuffer=524288000", // 500MB buffer
"-c", "http.lowSpeedLimit=1000", // 1KB/s minimum speed
"-c", "http.lowSpeedTime=600", // 10 minute timeout at low speed
"remote", "update", "--prune")
if err != nil {
logger.ErrorContext(ctx, "Failed to create git command",
slog.String("upstream", c.upstreamURL),
slog.String("error", err.Error()))
return errors.Wrap(err, "create git command")
}
output, err := cmd.CombinedOutput()
if err != nil {
logger.ErrorContext(ctx, "git remote update failed",
Expand Down Expand Up @@ -212,7 +226,10 @@ func (s *Strategy) ensureRefsUpToDate(ctx context.Context, c *clone) error {
func (s *Strategy) getLocalRefs(ctx context.Context, c *clone) (map[string]string, error) {
// #nosec G204 - c.path is controlled by us
// Use --head to include HEAD symbolic ref
cmd := exec.CommandContext(ctx, "git", "-C", c.path, "show-ref", "--head")
cmd, err := gitCommand(ctx, "", "-C", c.path, "show-ref", "--head")
if err != nil {
return nil, errors.Wrap(err, "create git command")
}
output, err := cmd.CombinedOutput()
if err != nil {
return nil, errors.Wrap(err, "git show-ref")
Expand All @@ -224,7 +241,10 @@ func (s *Strategy) getLocalRefs(ctx context.Context, c *clone) (map[string]strin
// getUpstreamRefs returns a map of ref names to SHAs for the upstream repository.
func (s *Strategy) getUpstreamRefs(ctx context.Context, c *clone) (map[string]string, error) {
// #nosec G204 - c.upstreamURL is controlled by us
cmd := exec.CommandContext(ctx, "git", "ls-remote", c.upstreamURL)
cmd, err := gitCommand(ctx, c.upstreamURL, "ls-remote", c.upstreamURL)
if err != nil {
return nil, errors.Wrap(err, "create git command")
}
output, err := cmd.CombinedOutput()
if err != nil {
return nil, errors.Wrap(err, "git ls-remote")
Expand Down
131 changes: 131 additions & 0 deletions internal/strategy/git/bundle.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package git

import (
"context"
"io"
"log/slog"
"net/textproto"
"os"
"time"

"github.com/alecthomas/errors"

"github.com/block/cachew/internal/cache"
"github.com/block/cachew/internal/logging"
)

// cloneBundleLoop generates bundles periodically for a single clone.
func (s *Strategy) cloneBundleLoop(ctx context.Context, c *clone) {
logger := logging.FromContext(ctx)

// Generate bundle immediately on start if one doesn't exist
s.generateAndUploadBundleIfMissing(ctx, c)

ticker := time.NewTicker(s.config.BundleInterval)
defer ticker.Stop()

for {
select {
case <-ctx.Done():
logger.DebugContext(ctx, "Bundle generator shutting down",
slog.String("upstream", c.upstreamURL))
return

case <-ticker.C:
s.generateAndUploadBundle(ctx, c)
}
}
}

// generateAndUploadBundleIfMissing generates a bundle only if one doesn't exist in cache.
func (s *Strategy) generateAndUploadBundleIfMissing(ctx context.Context, c *clone) {
logger := logging.FromContext(ctx)

// Check if bundle already exists in cache
cacheKey := cache.NewKey(c.upstreamURL + ".bundle")

reader, _, err := s.cache.Open(ctx, cacheKey)
if err == nil {
// Bundle exists, close and skip generation
_ = reader.Close()
logger.DebugContext(ctx, "Bundle already exists in cache, skipping generation",
slog.String("upstream", c.upstreamURL))
return
}

// Only generate if the error is that the bundle doesn't exist
if !errors.Is(err, os.ErrNotExist) {
logger.ErrorContext(ctx, "Failed to check for existing bundle",
slog.String("upstream", c.upstreamURL),
slog.String("error", err.Error()))
return
}

// Bundle doesn't exist, generate it
s.generateAndUploadBundle(ctx, c)
}

// generateAndUploadBundle generates a bundle and streams it directly to cache.
func (s *Strategy) generateAndUploadBundle(ctx context.Context, c *clone) {
logger := logging.FromContext(ctx)

logger.InfoContext(ctx, "Generating bundle",
slog.String("upstream", c.upstreamURL))

cacheKey := cache.NewKey(c.upstreamURL + ".bundle")

// Create cache writer
headers := textproto.MIMEHeader{
"Content-Type": []string{"application/x-git-bundle"},
}
ttl := 7 * 24 * time.Hour
w, err := s.cache.Create(ctx, cacheKey, headers, ttl)
if err != nil {
logger.ErrorContext(ctx, "Failed to create cache entry",
slog.String("upstream", c.upstreamURL),
slog.String("error", err.Error()))
return
}
defer w.Close()

// Stream bundle directly to cache
// #nosec G204 - c.path is controlled by us
cmd, err := gitCommand(ctx, "", "-C", c.path,
"bundle", "create", "-", "--branches")
if err != nil {
logger.ErrorContext(ctx, "Failed to create git command",
slog.String("upstream", c.upstreamURL),
slog.String("error", err.Error()))
return
}
cmd.Stdout = w

// Capture stderr for error reporting
stderrPipe, err := cmd.StderrPipe()
if err != nil {
logger.ErrorContext(ctx, "Failed to create stderr pipe",
slog.String("upstream", c.upstreamURL),
slog.String("error", err.Error()))
return
}

if err := cmd.Start(); err != nil {
logger.ErrorContext(ctx, "Failed to start bundle generation",
slog.String("upstream", c.upstreamURL),
slog.String("error", err.Error()))
return
}

stderr, _ := io.ReadAll(stderrPipe) //nolint:errcheck // Only used for logging

if err := cmd.Wait(); err != nil {
logger.ErrorContext(ctx, "Failed to generate bundle",
slog.String("upstream", c.upstreamURL),
slog.String("error", err.Error()),
slog.String("stderr", string(stderr)))
return
}

logger.InfoContext(ctx, "Bundle uploaded successfully",
slog.String("upstream", c.upstreamURL))
}
111 changes: 111 additions & 0 deletions internal/strategy/git/bundle_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package git_test

import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"

"github.com/alecthomas/assert/v2"

"github.com/block/cachew/internal/cache"
"github.com/block/cachew/internal/logging"
"github.com/block/cachew/internal/strategy/git"
)

func TestBundleHTTPEndpoint(t *testing.T) {
_, ctx := logging.Configure(context.Background(), logging.Config{})
tmpDir := t.TempDir()

memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{})
assert.NoError(t, err)
mux := newTestMux()

_, err = git.New(ctx, git.Config{
MirrorRoot: tmpDir,
BundleInterval: 24 * time.Hour,
}, memCache, mux)
assert.NoError(t, err)

// Create a fake bundle in the cache
upstreamURL := "https://github.com/org/repo"
cacheKey := cache.NewKey(upstreamURL + ".bundle")
bundleData := []byte("fake bundle data")

headers := make(map[string][]string)
headers["Content-Type"] = []string{"application/x-git-bundle"}
writer, err := memCache.Create(ctx, cacheKey, headers, 24*time.Hour)
assert.NoError(t, err)
_, err = writer.Write(bundleData)
assert.NoError(t, err)
err = writer.Close()
assert.NoError(t, err)

// Test bundle endpoint exists
handler := mux.handlers["GET /git/{host}/{path...}"]
assert.NotZero(t, handler)

// Test successful bundle request
req := httptest.NewRequest(http.MethodGet, "/git/github.com/org/repo/bundle", nil)
req = req.WithContext(ctx)
req.SetPathValue("host", "github.com")
req.SetPathValue("path", "org/repo/bundle")
w := httptest.NewRecorder()

handler.ServeHTTP(w, req)

assert.Equal(t, 200, w.Code)
assert.Equal(t, "application/x-git-bundle", w.Header().Get("Content-Type"))
assert.Equal(t, bundleData, w.Body.Bytes())

// Test bundle not found
req = httptest.NewRequest(http.MethodGet, "/git/github.com/org/nonexistent/bundle", nil)
req = req.WithContext(ctx)
req.SetPathValue("host", "github.com")
req.SetPathValue("path", "org/nonexistent/bundle")
w = httptest.NewRecorder()

handler.ServeHTTP(w, req)

assert.Equal(t, 404, w.Code)
}

func TestBundleInterval(t *testing.T) {
_, ctx := logging.Configure(context.Background(), logging.Config{})
tmpDir := t.TempDir()

tests := []struct {
name string
bundleInterval time.Duration
expectDefault bool
}{
{
name: "CustomInterval",
bundleInterval: 1 * time.Hour,
expectDefault: false,
},
{
name: "DefaultInterval",
bundleInterval: 0,
expectDefault: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{})
assert.NoError(t, err)
mux := newTestMux()

s, err := git.New(ctx, git.Config{
MirrorRoot: tmpDir,
BundleInterval: tt.bundleInterval,
}, memCache, mux)
assert.NoError(t, err)
assert.NotZero(t, s)

// Strategy should be created successfully regardless of bundle interval
})
}
}
Loading