diff --git a/go.mod b/go.mod index 81dc1a9..7c018fc 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/projectdiscovery/roundrobin v0.0.6 github.com/projectdiscovery/tinydns v0.0.77 github.com/projectdiscovery/utils v0.4.13 + github.com/stretchr/testify v1.9.0 github.com/things-go/go-socks5 v0.0.5 golang.org/x/net v0.33.0 gopkg.in/yaml.v3 v3.0.1 @@ -37,6 +38,7 @@ require ( github.com/klauspost/pgzip v1.2.6 // indirect github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/mholt/archiver/v3 v3.5.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983 // indirect github.com/shirou/gopsutil/v3 v3.23.7 // indirect diff --git a/pkg/logger/har/har.go b/pkg/logger/har/har.go new file mode 100644 index 0000000..bbe5c3d --- /dev/null +++ b/pkg/logger/har/har.go @@ -0,0 +1,257 @@ +package har + +import ( + "encoding/json" + "io" + "net/http" + "os" + "sync" + "time" + + "github.com/projectdiscovery/gologger" + "github.com/projectdiscovery/martian/v3/har" +) + +const FlushInterval = 10 * time.Second + +type Writer struct { + f *os.File + mutex *sync.Mutex +} + +type Logger struct { + martianHarLogger *har.Logger + writer *Writer + done chan struct{} + wg sync.WaitGroup +} + +func NewLogger(filePath string, flushInterval time.Duration) (*Logger, error) { + martianHarLogger := har.NewLogger() + writer, err := newWriter(filePath) + if err != nil { + return nil, err + } + + logger := &Logger{ + martianHarLogger: martianHarLogger, + writer: writer, + done: make(chan struct{}), + } + + logger.wg.Add(1) + go func(logger *Logger) { + defer logger.wg.Done() + ticker := time.NewTicker(flushInterval) + defer ticker.Stop() + for { + select { + case <-ticker.C: + if err := writer.append(martianHarLogger.ExportAndReset()); err != nil { + gologger.Error().Msgf("Could not write HAR log: %s\n", err) + } + case <-logger.done: + return + } + } + }(logger) + + return logger, nil +} + +func newWriter(filePath string) (*Writer, error) { + file, err := os.Create(filePath) + if err != nil { + return nil, err + } + return &Writer{f: file, mutex: &sync.Mutex{}}, nil +} + +func (l *Logger) ModifyRequest(req *http.Request) error { + return l.martianHarLogger.ModifyRequest(req) +} + +func (l *Logger) ModifyResponse(resp *http.Response) error { + return l.martianHarLogger.ModifyResponse(resp) +} + +func (l *Logger) Flush() error { + return l.writer.append(l.martianHarLogger.ExportAndReset()) +} + +func (w *Writer) append(harObj *har.HAR) error { + if harObj == nil || harObj.Log == nil || len(harObj.Log.Entries) == 0 { + return nil + } + + w.mutex.Lock() + defer w.mutex.Unlock() + + // Check if file is empty (new file) + fileInfo, err := w.f.Stat() + if err != nil { + return err + } + + if fileInfo.Size() == 0 { + // Write complete HAR structure for new file + encoder := json.NewEncoder(w.f) + encoder.SetIndent("", " ") + return encoder.Encode(harObj) + } + + // For existing file, append entries efficiently + return w.appendEntries(harObj.Log.Entries) +} + +func (w *Writer) appendEntries(entries []*har.Entry) error { + // Get current file size + fileInfo, err := w.f.Stat() + if err != nil { + return err + } + + // Read the last few bytes to find where to insert new entries + // We need to find the position before the closing "]" of the entries array + readSize := int64(200) // Read last 200 bytes, should be enough to find closing brackets + if fileInfo.Size() < readSize { + readSize = fileInfo.Size() + } + + // Seek to position to read from + seekPos := fileInfo.Size() - readSize + if _, err := w.f.Seek(seekPos, io.SeekStart); err != nil { + return err + } + + // Read the last part of the file + lastBytes := make([]byte, readSize) + n, err := w.f.Read(lastBytes) + if err != nil && err != io.EOF { + return err + } + lastBytes = lastBytes[:n] + + // Find the position of the last "]" before the final "}" + // This is where we need to insert new entries + content := string(lastBytes) + lastEntryEnd := -1 + + // Look for the pattern "]\n}" which indicates end of entries array + for i := len(content) - 3; i >= 0; i-- { + if i+2 < len(content) && content[i:i+3] == "]\n}" { + lastEntryEnd = i + break + } + } + + // If we can't find the pattern, fall back to rewriting the whole file + if lastEntryEnd == -1 { + return w.rewriteFile(entries) + } + + // Calculate the actual position in the file where we need to truncate + truncatePos := seekPos + int64(lastEntryEnd) + + // Truncate the file at the position before the closing "]\n}" + if err := w.f.Truncate(truncatePos); err != nil { + return err + } + + // Seek to the truncation point + if _, err := w.f.Seek(truncatePos, io.SeekStart); err != nil { + return err + } + + // Write comma and newline if there were existing entries + if truncatePos > 0 { + if _, err := w.f.WriteString(",\n"); err != nil { + return err + } + } + + // Write each new entry with proper formatting + for i, entry := range entries { + if i > 0 { + if _, err := w.f.WriteString(",\n"); err != nil { + return err + } + } + + // Marshal the entry with proper indentation + entryBytes, err := json.MarshalIndent(entry, " ", " ") + if err != nil { + return err + } + + // Adjust indentation to match HAR file format + entryStr := string(entryBytes) + // Replace the first 4 spaces with 2 spaces to match the entries array indentation + if len(entryStr) > 4 && entryStr[:4] == " " { + entryStr = " " + entryStr[4:] + } + + if _, err := w.f.WriteString(entryStr); err != nil { + return err + } + } + + // Write the closing brackets + if _, err := w.f.WriteString("\n ]\n}"); err != nil { + return err + } + + return nil +} + +func (w *Writer) rewriteFile(entries []*har.Entry) error { + // Fallback method: read existing file and rewrite + // This is the old inefficient method, but kept as fallback + if _, err := w.f.Seek(0, 0); err != nil { + return err + } + + decoder := json.NewDecoder(w.f) + var existingHar har.HAR + if err := decoder.Decode(&existingHar); err != nil && err != io.EOF { + return err + } + + // Merge entries + if existingHar.Log != nil { + existingHar.Log.Entries = append(existingHar.Log.Entries, entries...) + } else { + // This shouldn't happen in normal flow, but handle it + existingHar = har.HAR{ + Log: &har.Log{ + Version: "1.2", + Creator: &har.Creator{ + Name: "proxify", + Version: "1.0", + }, + Entries: entries, + }, + } + } + + // Truncate and rewrite + if err := w.f.Truncate(0); err != nil { + return err + } + if _, err := w.f.Seek(0, 0); err != nil { + return err + } + + encoder := json.NewEncoder(w.f) + encoder.SetIndent("", " ") + return encoder.Encode(existingHar) +} + +func (l *Logger) Close() error { + close(l.done) + l.wg.Wait() + if err := l.Flush(); err != nil { + gologger.Error().Msgf("Could not flush HAR log on close: %s\n", err) + } + return l.writer.f.Close() +} diff --git a/pkg/logger/har/har_test.go b/pkg/logger/har/har_test.go new file mode 100644 index 0000000..a1068b7 --- /dev/null +++ b/pkg/logger/har/har_test.go @@ -0,0 +1,162 @@ +package har + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + "time" + + "github.com/projectdiscovery/martian/v3/har" + "github.com/stretchr/testify/require" +) + +func TestNewLogger(t *testing.T) { + tempDir := t.TempDir() + tempFilePath := filepath.Join(tempDir, "test.har") + + logger, err := NewLogger(tempFilePath, FlushInterval) + require.NoError(t, err) + require.NotNil(t, logger) + defer func() { + require.NoError(t, logger.Close()) + }() + + // Check that the logger and its components are initialized + require.NotNil(t, logger.martianHarLogger) + require.NotNil(t, logger.writer) + require.NotNil(t, logger.done) + + // Check that the file was created + _, err = os.Stat(tempFilePath) + require.NoError(t, err) +} + +func TestAppend(t *testing.T) { + tempDir := t.TempDir() + tempFilePath := filepath.Join(tempDir, "test.har") + + writer, err := newWriter(tempFilePath) + require.NoError(t, err) + require.NotNil(t, writer) + defer func() { + require.NoError(t, writer.f.Close()) + }() + + // First append + har1 := &har.HAR{ + Log: &har.Log{ + Entries: []*har.Entry{ + {Request: &har.Request{URL: "https://example.com/1"}}, + }, + }, + } + err = writer.append(har1) + require.NoError(t, err) + + // Second append + har2 := &har.HAR{ + Log: &har.Log{ + Entries: []*har.Entry{ + {Request: &har.Request{URL: "https://example.com/2"}}, + }, + }, + } + err = writer.append(har2) + require.NoError(t, err) + + // Read the file and verify its content + fileContent, err := os.ReadFile(tempFilePath) + require.NoError(t, err) + + var resultHar har.HAR + err = json.Unmarshal(fileContent, &resultHar) + require.NoError(t, err) + + // Verify the entries + require.Len(t, resultHar.Log.Entries, 2) + require.Equal(t, "https://example.com/1", resultHar.Log.Entries[0].Request.URL) + require.Equal(t, "https://example.com/2", resultHar.Log.Entries[1].Request.URL) +} + +func TestLoggerLifecycle(t *testing.T) { + tempDir := t.TempDir() + tempFilePath := filepath.Join(tempDir, "test.har") + + // Use a shorter flush period for testing + flushInterval := 200 * time.Millisecond + + logger, err := NewLogger(tempFilePath, flushInterval) + require.NoError(t, err) + require.NotNil(t, logger) + + // Create a test server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + var harResult har.HAR + + // Simulate some requests and responses by calling the logger directly + // This is not ideal, but it's the simplest way to test the flushing + // without setting up a full martian proxy. + request1 := mustNewRequest(t, "GET", server.URL+"/req1") + response1 := &http.Response{Request: request1} + request2 := mustNewRequest(t, "POST", server.URL+"/req2") + response2 := &http.Response{Request: request2} + require.NoError(t, logger.martianHarLogger.RecordRequest("1", request1)) + require.NoError(t, logger.martianHarLogger.RecordResponse("1", response1)) + require.NoError(t, logger.martianHarLogger.RecordRequest("2", request2)) + require.NoError(t, logger.martianHarLogger.RecordResponse("2", response2)) + + // Wait for the background goroutine to flush + require.Eventually(t, func() bool { + fileContent, err := os.ReadFile(tempFilePath) + if err != nil { + return false + } + var harResult har.HAR + if err := json.Unmarshal(fileContent, &harResult); err != nil { + return false + } + return len(harResult.Log.Entries) == 2 + }, time.Second, 50*time.Millisecond) + + // Check the content after the periodic flush + fileContent, err := os.ReadFile(tempFilePath) + require.NoError(t, err) + + err = json.Unmarshal(fileContent, &harResult) + require.NoError(t, err) + require.Len(t, harResult.Log.Entries, 2) + require.Equal(t, server.URL+"/req1", harResult.Log.Entries[0].Request.URL) + require.Equal(t, server.URL+"/req2", harResult.Log.Entries[1].Request.URL) + + // Simulate more requests and then close the logger + request3 := mustNewRequest(t, "PUT", server.URL+"/req3") + response3 := &http.Response{Request: request3} + require.NoError(t, logger.martianHarLogger.RecordRequest("3", request3)) + require.NoError(t, logger.martianHarLogger.RecordResponse("3", response3)) + + require.NoError(t, logger.Close()) + + // Check the final content after Close calls Flush + fileContent, err = os.ReadFile(tempFilePath) + require.NoError(t, err) + + err = json.Unmarshal(fileContent, &harResult) + require.NoError(t, err) + require.Len(t, harResult.Log.Entries, 3) + require.Equal(t, server.URL+"/req1", harResult.Log.Entries[0].Request.URL) + require.Equal(t, server.URL+"/req2", harResult.Log.Entries[1].Request.URL) + require.Equal(t, server.URL+"/req3", harResult.Log.Entries[2].Request.URL) +} + +func mustNewRequest(t *testing.T, method, url string) *http.Request { + req, err := http.NewRequest(method, url, nil) + require.NoError(t, err) + return req +} diff --git a/pkg/logger/logger.go b/pkg/logger/logger.go index e29c803..96131b4 100644 --- a/pkg/logger/logger.go +++ b/pkg/logger/logger.go @@ -1,20 +1,18 @@ package logger import ( - "encoding/json" "fmt" "io" "net/http" "net/http/httputil" - "os" "strings" "time" "github.com/asaskevich/govalidator" "github.com/projectdiscovery/gologger" - "github.com/projectdiscovery/martian/v3/har" "github.com/projectdiscovery/proxify/pkg/logger/elastic" "github.com/projectdiscovery/proxify/pkg/logger/file" + "github.com/projectdiscovery/proxify/pkg/logger/har" "github.com/projectdiscovery/proxify/pkg/logger/kafka" "github.com/projectdiscovery/utils/conversion" pdhttpUtils "github.com/projectdiscovery/utils/http" @@ -31,14 +29,13 @@ const ( type OptionsLogger struct { Verbosity types.Verbosity - OutputFolder string // when output is written to multiple files - OutputFile string // when output is written to single file - OutputFormat string // jsonl or yaml - HarLogger *har.Logger // martian har logger instance - OutputHar string // when output is written to a HAR file - DumpRequest bool // dump request to file - DumpResponse bool // dump response to file - MaxSize int // max size of the output + OutputFolder string // when output is written to multiple files + OutputFile string // when output is written to single file + OutputFormat string // jsonl or yaml + OutputHar string // when output is written to a HAR file + DumpRequest bool // dump request to file + DumpResponse bool // dump response to file + MaxSize int // max size of the output Elastic *elastic.Options Kafka *kafka.Options } @@ -52,6 +49,7 @@ type Logger struct { asyncqueue chan types.HTTPTransaction Store []Store sWriter OutputFileWriter // sWriter is the structured writer + harLogger *har.Logger } // NewLogger instance @@ -101,33 +99,18 @@ func NewLogger(options *OptionsLogger) *Logger { } if options.OutputHar != "" { - options.HarLogger = har.NewLogger() + harLogger, err := har.NewLogger(options.OutputHar, har.FlushInterval) + if err != nil { + gologger.Error().Msgf("Could not create HAR logger: %s", err) + } else { + logger.harLogger = harLogger + } } go logger.AsyncWrite() return logger } -func (l *Logger) exportHar() error { - if l.options.HarLogger == nil || l.options.OutputHar == "" { - return nil - } - - harLog := l.options.HarLogger.Export() - - harFile, err := os.Create(l.options.OutputHar) - if err != nil { - return err - } - defer func() { - _ = harFile.Close() - }() - - encoder := json.NewEncoder(harFile) - encoder.SetIndent("", " ") - return encoder.Encode(harLog) -} - // LogRequest and user data func (l *Logger) LogRequest(req *http.Request, userdata types.UserData) error { if req == nil { @@ -140,8 +123,8 @@ func (l *Logger) LogRequest(req *http.Request, userdata types.UserData) error { Request: req, } - if l.options.HarLogger != nil { - if err := l.options.HarLogger.ModifyRequest(req); err != nil { + if l.harLogger != nil { + if err := l.harLogger.ModifyRequest(req); err != nil { gologger.Error().Msgf("Could not modify HAR request: %s\n", err) } } @@ -162,8 +145,8 @@ func (l *Logger) LogResponse(resp *http.Response, userdata types.UserData) error Request: resp.Request, } - if l.options.HarLogger != nil { - if err := l.options.HarLogger.ModifyResponse(resp); err != nil { + if l.harLogger != nil { + if err := l.harLogger.ModifyResponse(resp); err != nil { gologger.Error().Msgf("Could not modify HAR response: %s\n", err) } } @@ -265,8 +248,10 @@ func (l *Logger) AsyncWrite() { // Close logger instance func (l *Logger) Close() { - if err := l.exportHar(); err != nil { - gologger.Error().Msgf("Could not export HAR log: %s\\n", err) + if l.harLogger != nil { + if err := l.harLogger.Close(); err != nil { + gologger.Error().Msgf("Could not close HAR logger: %s\n", err) + } } if l.sWriter != nil { if err := l.sWriter.Close(); err != nil {