From d4fcde5629d04854e5c06caa5054ea62d76242dc Mon Sep 17 00:00:00 2001
From: Skiada Alexandre <25521495+askiada@users.noreply.github.com>
Date: Wed, 17 Aug 2022 10:49:33 +0100
Subject: [PATCH 01/16] Full implementaion with custom reader and writer

---
 env.list                                      |  5 ++-
 file/batchingchannels/batching_channel.go     | 16 +++----
 .../batchingchannels/batching_channel_test.go | 11 +++--
 file/chunk.go                                 | 21 +++++----
 file/file.go                                  | 41 ++++++++++--------
 file/sort.go                                  | 24 ++++-------
 internal/env.go                               |  3 ++
 main.go                                       | 32 ++++++++++----
 main_bench_test.go                            | 15 ++++---
 main_test.go                                  | 26 ++++++-----
 reader/contract.go                            |  7 +++
 reader/separated_values.go                    | 43 +++++++++++++++++++
 reader/std_scanner.go                         | 27 ++++++++++++
 vector/element.go                             |  4 +-
 vector/key/int_key.go                         | 12 +++++-
 vector/key/string_key.go                      | 14 ++++++
 vector/key/tsv_key.go                         | 25 ++++++++---
 vector/slice_vector.go                        | 10 ++---
 vector/vector.go                              | 36 ++++++++++------
 writer/contract.go                            |  6 +++
 writer/separated_values.go                    | 40 +++++++++++++++++
 writer/std_writer.go                          | 39 +++++++++++++++++
 22 files changed, 349 insertions(+), 108 deletions(-)
 create mode 100644 reader/contract.go
 create mode 100644 reader/separated_values.go
 create mode 100644 reader/std_scanner.go
 create mode 100644 writer/contract.go
 create mode 100644 writer/separated_values.go
 create mode 100644 writer/std_writer.go

diff --git a/env.list b/env.list
index 169be0f..cbbeaa3 100644
--- a/env.list
+++ b/env.list
@@ -1,6 +1,7 @@
-INPUT_PATH=./works.tsv
+INPUT_PATH=/Users/alex/Desktop/Projects/Blokur/Repo/external-sort/rec_sample.tsv
 OUTPUT_PATH=./output.tsv
 CHUNK_FOLDER=./data/chunks/
 CHUNK_SIZE=1000000
 MAX_WORKERS=10
-OUTPUT_BUFFER_SIZE=1000
\ No newline at end of file
+OUTPUT_BUFFER_SIZE=1000
+TSV_FIELDS=2 4
\ No newline at end of file
diff --git a/file/batchingchannels/batching_channel.go b/file/batchingchannels/batching_channel.go
index 6826685..aebca36 100644
--- a/file/batchingchannels/batching_channel.go
+++ b/file/batchingchannels/batching_channel.go
@@ -12,11 +12,11 @@ import (
 // on Out(), it batches together the entire internal buffer each time. Trying to construct an unbuffered batching channel
 // will panic, that configuration is not supported (and provides no benefit over an unbuffered NativeChannel).
 type BatchingChannel struct {
-	input     chan string
+	input     chan interface{}
 	output    chan vector.Vector
 	buffer    vector.Vector
 	allocate  *vector.Allocate
-	g         *errgroup.Group
+	G         *errgroup.Group
 	sem       *semaphore.Weighted
 	dCtx      context.Context
 	size      int
@@ -32,12 +32,12 @@ func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorke
 	}
 	g, dCtx := errgroup.WithContext(ctx)
 	ch := &BatchingChannel{
-		input:     make(chan string),
+		input:     make(chan interface{}),
 		output:    make(chan vector.Vector),
 		size:      size,
 		allocate:  allocate,
 		maxWorker: maxWorker,
-		g:         g,
+		G:         g,
 		sem:       semaphore.NewWeighted(maxWorker),
 		dCtx:      dCtx,
 	}
@@ -45,7 +45,7 @@ func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorke
 	return ch
 }
 
-func (ch *BatchingChannel) In() chan<- string {
+func (ch *BatchingChannel) In() chan<- interface{} {
 	return ch.input
 }
 
@@ -62,12 +62,12 @@ func (ch *BatchingChannel) ProcessOut(f func(vector.Vector) error) error {
 			return err
 		}
 		val := val
-		ch.g.Go(func() error {
+		ch.G.Go(func() error {
 			defer ch.sem.Release(1)
 			return f(val)
 		})
 	}
-	err := ch.g.Wait()
+	err := ch.G.Wait()
 	if err != nil {
 		return err
 	}
@@ -93,7 +93,7 @@ func (ch *BatchingChannel) batchingBuffer() {
 		if open {
 			err := ch.buffer.PushBack(elem)
 			if err != nil {
-				ch.g.Go(func() error {
+				ch.G.Go(func() error {
 					return err
 				})
 			}
diff --git a/file/batchingchannels/batching_channel_test.go b/file/batchingchannels/batching_channel_test.go
index d9a0cce..b33aaf3 100644
--- a/file/batchingchannels/batching_channel_test.go
+++ b/file/batchingchannels/batching_channel_test.go
@@ -10,6 +10,7 @@ import (
 	"github.com/askiada/external-sort/file/batchingchannels"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/vector/key"
+	"github.com/pkg/errors"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -17,7 +18,11 @@ type Int struct {
 	value int
 }
 
-func AllocateInt(line string) (key.Key, error) {
+func AllocateInt(row interface{}) (key.Key, error) {
+	line, ok := row.(string)
+	if !ok {
+		return nil, errors.Errorf("can't convert interface{} to string: %+v", row)
+	}
 	num, err := strconv.Atoi(line)
 	if err != nil {
 		return nil, err
@@ -86,7 +91,7 @@ func testBatches(t *testing.T, ch *batchingchannels.BatchingChannel) {
 }
 
 func TestBatchingChannel(t *testing.T) {
-	allocate := vector.DefaultVector(AllocateInt)
+	allocate := vector.DefaultVector(AllocateInt, nil, nil)
 	ch := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 50)
 	testBatches(t, ch)
 
@@ -98,7 +103,7 @@ func TestBatchingChannel(t *testing.T) {
 }
 
 func TestBatchingChannelCap(t *testing.T) {
-	allocate := vector.DefaultVector(AllocateInt)
+	allocate := vector.DefaultVector(AllocateInt, nil, nil)
 	ch := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 5)
 	if ch.Cap() != 5 {
 		t.Error("incorrect capacity on infinite channel")
diff --git a/file/chunk.go b/file/chunk.go
index 77966c0..f3414f9 100644
--- a/file/chunk.go
+++ b/file/chunk.go
@@ -1,10 +1,10 @@
 package file
 
 import (
-	"bufio"
 	"os"
 	"sort"
 
+	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
 
 	"github.com/pkg/errors"
@@ -13,7 +13,7 @@ import (
 // chunkInfo Describe a chunk.
 type chunkInfo struct {
 	file     *os.File
-	scanner  *bufio.Scanner
+	reader   reader.Reader
 	buffer   vector.Vector
 	filename string
 }
@@ -22,13 +22,16 @@ type chunkInfo struct {
 // It stops if there is no elements left to add.
 func (c *chunkInfo) pullSubset(size int) (err error) {
 	i := 0
-	for i < size && c.scanner.Scan() {
-		text := c.scanner.Text()
-		c.buffer.PushBack(text)
+	for i < size && c.reader.Next() {
+		row, err := c.reader.Read()
+		if err != nil {
+			return errors.Wrap(err, "")
+		}
+		c.buffer.PushBack(row)
 		i++
 	}
-	if c.scanner.Err() != nil {
-		return c.scanner.Err()
+	if c.reader.Err() != nil {
+		return c.reader.Err()
 	}
 	return nil
 }
@@ -44,11 +47,11 @@ func (c *chunks) new(chunkPath string, allocate *vector.Allocate, size int) erro
 	if err != nil {
 		return err
 	}
-	scanner := bufio.NewScanner(f)
+	reader := allocate.FnReader(f)
 	elem := &chunkInfo{
 		filename: chunkPath,
 		file:     f,
-		scanner:  scanner,
+		reader:   reader,
 		buffer:   allocate.Vector(size, allocate.Key),
 	}
 	err = elem.pullSubset(size)
diff --git a/file/file.go b/file/file.go
index 5bc397f..e7a007a 100644
--- a/file/file.go
+++ b/file/file.go
@@ -1,25 +1,26 @@
 package file
 
 import (
-	"bufio"
 	"context"
+	"io"
 	"sync"
 
-	"io"
 	"path"
 	"strconv"
 
 	"github.com/askiada/external-sort/file/batchingchannels"
 	"github.com/askiada/external-sort/vector"
+	"github.com/askiada/external-sort/writer"
 
 	"github.com/pkg/errors"
 )
 
 type Info struct {
 	mu            *MemUsage
-	Reader        io.Reader
 	Allocate      *vector.Allocate
-	OutputPath    string
+	InputReader   io.Reader
+	OutputFile    string
+	outputWriter  writer.Writer
 	totalRows     int
 	PrintMemUsage bool
 }
@@ -40,25 +41,33 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 	if err != nil {
 		return nil, errors.Wrap(err, fn)
 	}
+
+	inputReader := f.Allocate.FnReader(f.InputReader)
+
 	row := 0
 	chunkPaths := []string{}
-	scanner := bufio.NewScanner(f.Reader)
+
 	mu := sync.Mutex{}
-	wg := &sync.WaitGroup{}
-	wg.Add(1)
+
 	batchChan := batchingchannels.NewBatchingChannel(ctx, f.Allocate, maxWorkers, dumpSize)
-	go func() {
-		defer wg.Done()
-		for scanner.Scan() {
+	batchChan.G.Go(func() error {
+		for inputReader.Next() {
 			if f.PrintMemUsage {
 				f.mu.Collect()
 			}
-			text := scanner.Text()
-			batchChan.In() <- text
+			elem, err := inputReader.Read()
+			if err != nil {
+				return errors.Wrap(err, fn)
+			}
+			batchChan.In() <- elem
 			row++
 		}
 		batchChan.Close()
-	}()
+		if inputReader.Err() != nil {
+			return errors.Wrap(inputReader.Err(), fn)
+		}
+		return nil
+	})
 
 	chunkIdx := 0
 	err = batchChan.ProcessOut(func(v vector.Vector) error {
@@ -67,7 +76,7 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 		chunkPath := path.Join(chunkFolder, "chunk_"+strconv.Itoa(chunkIdx)+".tsv")
 		mu.Unlock()
 		v.Sort()
-		err := vector.Dump(v, chunkPath)
+		err := f.Allocate.Dump(v, chunkPath)
 		if err != nil {
 			return err
 		}
@@ -79,10 +88,6 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 	if err != nil {
 		return nil, errors.Wrap(err, fn)
 	}
-	wg.Wait()
-	if scanner.Err() != nil {
-		return nil, errors.Wrap(scanner.Err(), fn)
-	}
 	f.totalRows = row
 	return chunkPaths, nil
 }
diff --git a/file/sort.go b/file/sort.go
index fd90f33..ae53b62 100644
--- a/file/sort.go
+++ b/file/sort.go
@@ -1,12 +1,12 @@
 package file
 
 import (
-	"bufio"
 	"fmt"
 	"os"
 	"runtime"
 
 	"github.com/askiada/external-sort/vector"
+	"github.com/askiada/external-sort/writer"
 	"github.com/cheggaaa/pb/v3"
 )
 
@@ -52,16 +52,12 @@ func (f *Info) MergeSort(chunkPaths []string, k int) (err error) {
 			return err
 		}
 	}
-
-	outputFile, err := os.Create(f.OutputPath)
+	w, err := os.Create(f.OutputFile)
 	if err != nil {
 		return err
 	}
-	// remember to close the file
-	defer outputFile.Close()
-
-	outputBuffer := bufio.NewWriter(outputFile)
-
+	f.outputWriter = f.Allocate.FnWriter(w)
+	defer f.outputWriter.Close()
 	bar := pb.StartNew(f.totalRows)
 	chunks.resetOrder()
 	for {
@@ -69,7 +65,7 @@ func (f *Info) MergeSort(chunkPaths []string, k int) (err error) {
 			f.mu.Collect()
 		}
 		if chunks.len() == 0 || output.Len() == k {
-			err = WriteBuffer(outputBuffer, output)
+			err = WriteBuffer(f.outputWriter, output)
 			if err != nil {
 				return err
 			}
@@ -80,7 +76,7 @@ func (f *Info) MergeSort(chunkPaths []string, k int) (err error) {
 		toShrink := []int{}
 		// search the smallest value across chunk buffers by comparing first elements only
 		minChunk, minValue, minIdx := chunks.min()
-		err = output.PushBack(minValue.Line)
+		err = output.PushBack(minValue.Row)
 		if err != nil {
 			return err
 		}
@@ -108,10 +104,6 @@ func (f *Info) MergeSort(chunkPaths []string, k int) (err error) {
 		}
 		bar.Increment()
 	}
-	err = outputBuffer.Flush()
-	if err != nil {
-		return err
-	}
 	bar.Finish()
 	if f.PrintMemUsage {
 		f.mu.PrintMemUsage()
@@ -119,9 +111,9 @@ func (f *Info) MergeSort(chunkPaths []string, k int) (err error) {
 	return chunks.close()
 }
 
-func WriteBuffer(buffer *bufio.Writer, rows vector.Vector) error {
+func WriteBuffer(w writer.Writer, rows vector.Vector) error {
 	for i := 0; i < rows.Len(); i++ {
-		_, err := buffer.WriteString(rows.Get(i).Line + "\n")
+		err := w.Write(rows.Get(i).Row)
 		if err != nil {
 			return err
 		}
diff --git a/internal/env.go b/internal/env.go
index 852adde..f4bafcb 100644
--- a/internal/env.go
+++ b/internal/env.go
@@ -14,11 +14,13 @@ const (
 	ChunkSizeName        = "chunk_size"
 	MaxWorkersName       = "max_workers"
 	OutputBufferSizeName = "output_buffer_size"
+	TsvFieldsName        = "tsv_fields"
 )
 
 // Environment variables.
 var (
 	InputFile        string
+	TsvFields        []string
 	OutputFile       string
 	ChunkFolder      string
 	ChunkSize        int
@@ -34,4 +36,5 @@ func init() {
 	viper.SetDefault(ChunkSizeName, 0)
 	viper.SetDefault(MaxWorkersName, 0)
 	viper.SetDefault(OutputBufferSizeName, 0)
+	viper.SetDefault(TsvFieldsName, []string{"0"})
 }
diff --git a/main.go b/main.go
index b44da1c..5a166fb 100644
--- a/main.go
+++ b/main.go
@@ -3,13 +3,17 @@ package main
 import (
 	"context"
 	"fmt"
+	"io"
 	"os"
+	"strconv"
 	"time"
 
 	"github.com/askiada/external-sort/file"
 	"github.com/askiada/external-sort/internal"
+	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/vector/key"
+	"github.com/askiada/external-sort/writer"
 	"github.com/spf13/cobra"
 	"github.com/spf13/viper"
 )
@@ -28,28 +32,38 @@ func main() {
 	rootCmd.PersistentFlags().IntVarP(&internal.ChunkSize, internal.ChunkSizeName, "s", viper.GetInt(internal.ChunkSizeName), "chunk size.")
 	rootCmd.PersistentFlags().Int64VarP(&internal.MaxWorkers, internal.MaxWorkersName, "w", viper.GetInt64(internal.MaxWorkersName), "max worker.")
 	rootCmd.PersistentFlags().IntVarP(&internal.OutputBufferSize, internal.OutputBufferSizeName, "b", viper.GetInt(internal.OutputBufferSizeName), "output buffer size.")
+	rootCmd.PersistentFlags().StringSliceVarP(&internal.TsvFields, internal.TsvFieldsName, "t", viper.GetStringSlice(internal.TsvFieldsName), "")
 
 	fmt.Println("Input file", internal.InputFile)
 	fmt.Println("Output file", internal.OutputFile)
-	fmt.Println("Chunk foler", internal.ChunkFolder)
+	fmt.Println("Chunk folder", internal.ChunkFolder)
+	fmt.Println("TSV Fields", internal.TsvFields)
+
 	cobra.CheckErr(rootCmd.Execute())
 }
 
 func rootRun(cmd *cobra.Command, args []string) error {
 	start := time.Now()
-	inputPath := internal.InputFile
 	// open a file
-	f, err := os.Open(inputPath)
+	inputReader, err := os.Open(internal.InputFile)
 	if err != nil {
 		return err
 	}
-	defer f.Close()
+	defer inputReader.Close()
+	tsvFields := []int{}
+	for _, field := range internal.TsvFields {
+		i, err := strconv.Atoi(field)
+		if err != nil {
+			return err
+		}
+		tsvFields = append(tsvFields, i)
+	}
 	fI := &file.Info{
-		Reader: f,
-		Allocate: vector.DefaultVector(func(line string) (key.Key, error) {
-			return key.AllocateTsv(line, 0)
-		}),
-		OutputPath:    internal.OutputFile,
+		InputReader: inputReader,
+		OutputFile:  internal.OutputFile,
+		Allocate: vector.DefaultVector(func(row interface{}) (key.Key, error) {
+			return key.AllocateTsv(row, tsvFields...)
+		}, func(r io.Reader) reader.Reader { return reader.NewSeparatedValues(r, '\t') }, func(w io.Writer) writer.Writer { return writer.NewSeparatedValues(w, '\t') }),
 		PrintMemUsage: false,
 	}
 
diff --git a/main_bench_test.go b/main_bench_test.go
index a600863..f6515c7 100644
--- a/main_bench_test.go
+++ b/main_bench_test.go
@@ -2,28 +2,30 @@ package main_test
 
 import (
 	"context"
+	"io"
 	"io/ioutil"
 	"os"
 	"path"
 	"testing"
 
 	"github.com/askiada/external-sort/file"
+	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/vector/key"
+	"github.com/askiada/external-sort/writer"
 	"github.com/stretchr/testify/assert"
 )
 
 func BenchmarkMergeSort(b *testing.B) {
 	filename := "test.tsv"
-	chunkSize := 10000
-	bufferSize := 5000
 	f, err := os.Open(filename)
 	assert.NoError(b, err)
-
+	chunkSize := 10000
+	bufferSize := 5000
 	fI := &file.Info{
-		Reader:     f,
-		Allocate:   vector.DefaultVector(key.AllocateInt),
-		OutputPath: "testdata/chunks/output.tsv",
+		InputReader: f,
+		Allocate:    vector.DefaultVector(key.AllocateInt, reader.NewStdScanner, func(w io.Writer) writer.Writer { return writer.NewStdWriter(w) }),
+		OutputFile:  "testdata/chunks/output.tsv",
 	}
 	chunkPaths, err := fI.CreateSortedChunks(context.Background(), "testdata/chunks", chunkSize, 100)
 	assert.NoError(b, err)
@@ -32,7 +34,6 @@ func BenchmarkMergeSort(b *testing.B) {
 		err = fI.MergeSort(chunkPaths, bufferSize)
 		_ = err
 	}
-	f.Close()
 	dir, err := ioutil.ReadDir("testdata/chunks")
 	assert.NoError(b, err)
 	for _, d := range dir {
diff --git a/main_test.go b/main_test.go
index 69b54d8..6b03fa5 100644
--- a/main_test.go
+++ b/main_test.go
@@ -4,6 +4,7 @@ import (
 	"bufio"
 	"context"
 	"errors"
+	"io"
 	"io/ioutil"
 	"os"
 	"path"
@@ -11,8 +12,10 @@ import (
 	"testing"
 
 	"github.com/askiada/external-sort/file"
+	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/vector/key"
+	"github.com/askiada/external-sort/writer"
 
 	"github.com/stretchr/testify/assert"
 )
@@ -21,11 +24,10 @@ func prepareChunks(ctx context.Context, t *testing.T, allocate *vector.Allocate,
 	t.Helper()
 	f, err := os.Open(filename)
 	assert.NoError(t, err)
-
 	fI := &file.Info{
-		Reader:     f,
-		Allocate:   allocate,
-		OutputPath: outputFilename,
+		InputReader: f,
+		Allocate:    allocate,
+		OutputFile:  "testdata/chunks/output.tsv",
 	}
 	chunkPaths, err := fI.CreateSortedChunks(ctx, "testdata/chunks", chunkSize, 10)
 	assert.NoError(t, err)
@@ -65,7 +67,7 @@ func TestBasics(t *testing.T) {
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 	}
-	allocate := vector.DefaultVector(key.AllocateInt)
+
 	for name, tc := range tcs {
 		filename := tc.filename
 		outputFilename := tc.outputFilename
@@ -77,8 +79,9 @@ func TestBasics(t *testing.T) {
 				bufferSize := bufferSize
 				t.Run(name+"_"+strconv.Itoa(chunkSize)+"_"+strconv.Itoa(bufferSize), func(t *testing.T) {
 					ctx := context.Background()
+
+					allocate := vector.DefaultVector(key.AllocateInt, reader.NewStdScanner, writer.NewStdWriter)
 					fI, chunkPaths := prepareChunks(ctx, t, allocate, filename, outputFilename, chunkSize)
-					fI.OutputPath = outputFilename
 					err := fI.MergeSort(chunkPaths, bufferSize)
 					assert.NoError(t, err)
 					outputFile, err := os.Open(outputFilename)
@@ -112,7 +115,7 @@ func Test100Elems(t *testing.T) {
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 	}
-	allocate := vector.DefaultVector(key.AllocateInt)
+
 	for name, tc := range tcs {
 		filename := tc.filename
 		outputFilename := tc.outputFilename
@@ -120,6 +123,7 @@ func Test100Elems(t *testing.T) {
 		expectedErr := tc.expectedErr
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
+			allocate := vector.DefaultVector(key.AllocateInt, reader.NewStdScanner, writer.NewStdWriter)
 			fI, chunkPaths := prepareChunks(ctx, t, allocate, filename, outputFilename, 21)
 			err := fI.MergeSort(chunkPaths, 10)
 			assert.NoError(t, err)
@@ -161,9 +165,7 @@ func TestTsvKey(t *testing.T) {
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 	}
-	allocate := vector.DefaultVector(func(line string) (key.Key, error) {
-		return key.AllocateTsv(line, 1)
-	})
+
 	for name, tc := range tcs {
 		filename := tc.filename
 		outputFilename := tc.outputFilename
@@ -171,6 +173,10 @@ func TestTsvKey(t *testing.T) {
 		expectedErr := tc.expectedErr
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
+
+			allocate := vector.DefaultVector(func(row interface{}) (key.Key, error) {
+				return key.AllocateTsv(row, 1)
+			}, func(r io.Reader) reader.Reader { return reader.NewSeparatedValues(r, '\t') }, func(w io.Writer) writer.Writer { return writer.NewSeparatedValues(w, '\t') })
 			fI, chunkPaths := prepareChunks(ctx, t, allocate, filename, outputFilename, 21)
 			err := fI.MergeSort(chunkPaths, 10)
 			assert.NoError(t, err)
diff --git a/reader/contract.go b/reader/contract.go
new file mode 100644
index 0000000..3589f3f
--- /dev/null
+++ b/reader/contract.go
@@ -0,0 +1,7 @@
+package reader
+
+type Reader interface {
+	Next() bool
+	Read() (interface{}, error)
+	Err() error
+}
diff --git a/reader/separated_values.go b/reader/separated_values.go
new file mode 100644
index 0000000..fdb4fea
--- /dev/null
+++ b/reader/separated_values.go
@@ -0,0 +1,43 @@
+package reader
+
+import (
+	"encoding/csv"
+	"errors"
+	"io"
+)
+
+type SeparatedValuesReader struct {
+	row []string
+	r   *csv.Reader
+	err error
+}
+
+func NewSeparatedValues(r io.Reader, separator rune) *SeparatedValuesReader {
+	s := &SeparatedValuesReader{
+		r: csv.NewReader(r),
+	}
+	s.r.Comma = separator
+	return s
+}
+
+func (s *SeparatedValuesReader) Next() bool {
+	s.row, s.err = s.r.Read()
+	if errors.Is(s.err, io.EOF) {
+		s.err = nil
+		return false
+	}
+	return true
+}
+
+func (s *SeparatedValuesReader) Read() (interface{}, error) {
+	if s.err != nil {
+		return nil, s.err
+	}
+	return s.row, nil
+}
+
+func (s *SeparatedValuesReader) Err() error {
+	return s.err
+}
+
+var _ Reader = &SeparatedValuesReader{}
diff --git a/reader/std_scanner.go b/reader/std_scanner.go
new file mode 100644
index 0000000..7610ffd
--- /dev/null
+++ b/reader/std_scanner.go
@@ -0,0 +1,27 @@
+package reader
+
+import (
+	"bufio"
+	"io"
+)
+
+type StdScanner struct {
+	r *bufio.Scanner
+}
+
+func NewStdScanner(r io.Reader) Reader {
+	s := &StdScanner{
+		r: bufio.NewScanner(r),
+	}
+	return s
+}
+
+func (s *StdScanner) Next() bool {
+	return s.r.Scan()
+}
+func (s *StdScanner) Read() (interface{}, error) {
+	return s.r.Text(), nil
+}
+func (s *StdScanner) Err() error {
+	return s.r.Err()
+}
diff --git a/vector/element.go b/vector/element.go
index e49fb3e..3b9e768 100644
--- a/vector/element.go
+++ b/vector/element.go
@@ -3,8 +3,8 @@ package vector
 import "github.com/askiada/external-sort/vector/key"
 
 type Element struct {
-	Key  key.Key
-	Line string
+	Key key.Key
+	Row interface{}
 }
 
 // Less returns wether v1 is smaller than v2 based on the keys.
diff --git a/vector/key/int_key.go b/vector/key/int_key.go
index 0744e82..7ffb83d 100644
--- a/vector/key/int_key.go
+++ b/vector/key/int_key.go
@@ -1,12 +1,20 @@
 package key
 
-import "strconv"
+import (
+	"strconv"
+
+	"github.com/pkg/errors"
+)
 
 type Int struct {
 	value int
 }
 
-func AllocateInt(line string) (Key, error) {
+func AllocateInt(row interface{}) (Key, error) {
+	line, ok := row.(string)
+	if !ok {
+		return nil, errors.Errorf("can't convert interface{} to string: %+v", row)
+	}
 	num, err := strconv.Atoi(line)
 	if err != nil {
 		return nil, err
diff --git a/vector/key/string_key.go b/vector/key/string_key.go
index d774e0c..b98669c 100644
--- a/vector/key/string_key.go
+++ b/vector/key/string_key.go
@@ -1,5 +1,7 @@
 package key
 
+import "strings"
+
 type String struct {
 	value string
 }
@@ -11,3 +13,15 @@ func AllocateString(line string) (Key, error) {
 func (k *String) Less(other Key) bool {
 	return k.value < other.(*String).value
 }
+
+type UpperString struct {
+	value string
+}
+
+func AllocateUpperString(line string) (Key, error) {
+	return &UpperString{strings.TrimSpace(strings.ToUpper(line))}, nil
+}
+
+func (k *UpperString) Less(other Key) bool {
+	return k.value < other.(*UpperString).value
+}
diff --git a/vector/key/tsv_key.go b/vector/key/tsv_key.go
index d3d3f8e..6f3ee04 100644
--- a/vector/key/tsv_key.go
+++ b/vector/key/tsv_key.go
@@ -6,10 +6,25 @@ import (
 	"github.com/pkg/errors"
 )
 
-func AllocateTsv(line string, pos int) (Key, error) {
-	splitted := strings.Split(line, "\t")
-	if len(splitted) < pos+1 {
-		return nil, errors.Errorf("can't allocate tsv key line is invalid: %s", line)
+const salt = "##!##"
+
+func AllocateTsv(row interface{}, pos ...int) (Key, error) {
+	splitted, ok := row.([]string)
+	if !ok {
+		return nil, errors.Errorf("can't convert interface{} to []string: %+v", row)
+	}
+	k := strings.Builder{}
+	for i, p := range pos {
+		if len(splitted) < p+1 {
+			return nil, errors.Errorf("can't allocate tsv key line is invalid: %s", row)
+		}
+		k.WriteString(splitted[p])
+		if i < len(pos)-1 {
+			k.WriteString(salt)
+		}
 	}
-	return &String{splitted[pos]}, nil
+
+	// fmt.Println(row, pos, k.String())
+
+	return &String{k.String()}, nil
 }
diff --git a/vector/slice_vector.go b/vector/slice_vector.go
index 270015d..c31b0fe 100644
--- a/vector/slice_vector.go
+++ b/vector/slice_vector.go
@@ -8,7 +8,7 @@ import (
 
 var _ Vector = &SliceVec{}
 
-func AllocateSlice(size int, allocateKey func(line string) (key.Key, error)) Vector {
+func AllocateSlice(size int, allocateKey func(row interface{}) (key.Key, error)) Vector {
 	return &SliceVec{
 		allocateKey: allocateKey,
 		s:           make([]*Element, 0, size),
@@ -16,7 +16,7 @@ func AllocateSlice(size int, allocateKey func(line string) (key.Key, error)) Vec
 }
 
 type SliceVec struct {
-	allocateKey func(line string) (key.Key, error)
+	allocateKey func(row interface{}) (key.Key, error)
 	s           []*Element
 }
 
@@ -32,12 +32,12 @@ func (v *SliceVec) Len() int {
 	return len(v.s)
 }
 
-func (v *SliceVec) PushBack(line string) error {
-	k, err := v.allocateKey(line)
+func (v *SliceVec) PushBack(row interface{}) error {
+	k, err := v.allocateKey(row)
 	if err != nil {
 		return err
 	}
-	v.s = append(v.s, &Element{Line: line, Key: k})
+	v.s = append(v.s, &Element{Row: row, Key: k})
 	return nil
 }
 
diff --git a/vector/vector.go b/vector/vector.go
index cc5471e..0e8916f 100644
--- a/vector/vector.go
+++ b/vector/vector.go
@@ -1,22 +1,28 @@
 package vector
 
 import (
-	"bufio"
+	"io"
 	"os"
 
+	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector/key"
+	"github.com/askiada/external-sort/writer"
 	"github.com/pkg/errors"
 )
 
 type Allocate struct {
-	Vector func(int, func(line string) (key.Key, error)) Vector
-	Key    func(line string) (key.Key, error)
+	Vector   func(int, func(row interface{}) (key.Key, error)) Vector
+	FnReader func(r io.Reader) reader.Reader
+	FnWriter func(w io.Writer) writer.Writer
+	Key      func(elem interface{}) (key.Key, error)
 }
 
-func DefaultVector(allocateKey func(line string) (key.Key, error)) *Allocate {
+func DefaultVector(allocateKey func(elem interface{}) (key.Key, error), fnReader func(r io.Reader) reader.Reader, fnWr func(w io.Writer) writer.Writer) *Allocate {
 	return &Allocate{
-		Vector: AllocateSlice,
-		Key:    allocateKey,
+		FnReader: fnReader,
+		FnWriter: fnWr,
+		Vector:   AllocateSlice,
+		Key:      allocateKey,
 	}
 }
 
@@ -24,7 +30,7 @@ type Vector interface {
 	// Get Access i-th element
 	Get(i int) *Element
 	// PushBack Add item at the end
-	PushBack(line string) error
+	PushBack(row interface{}) error
 	// FrontShift Remove the first element
 	FrontShift()
 	// Len Length of the Vector
@@ -35,19 +41,25 @@ type Vector interface {
 	Sort()
 }
 
-func Dump(v Vector, filename string) error {
+func (a *Allocate) Dump(v Vector, filename string) error {
 	file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
 	if err != nil {
 		return errors.Errorf("failed creating file: %s", err)
 	}
-	datawriter := bufio.NewWriter(file)
+	datawriter := a.FnWriter(file)
 	for i := 0; i < v.Len(); i++ {
-		_, err = datawriter.WriteString(v.Get(i).Line + "\n")
+		err = datawriter.Write(v.Get(i).Row)
 		if err != nil {
 			return errors.Errorf("failed writing file: %s", err)
 		}
 	}
-	datawriter.Flush()
-	file.Close()
+	err = datawriter.Close()
+	if err != nil {
+		return errors.Wrap(err, "can't close chunk writer")
+	}
+	err = file.Close()
+	if err != nil {
+		return errors.Wrap(err, "can't close chunf file")
+	}
 	return nil
 }
diff --git a/writer/contract.go b/writer/contract.go
new file mode 100644
index 0000000..30a3310
--- /dev/null
+++ b/writer/contract.go
@@ -0,0 +1,6 @@
+package writer
+
+type Writer interface {
+	Write(interface{}) error
+	Close() error
+}
diff --git a/writer/separated_values.go b/writer/separated_values.go
new file mode 100644
index 0000000..4e073ea
--- /dev/null
+++ b/writer/separated_values.go
@@ -0,0 +1,40 @@
+package writer
+
+import (
+	"encoding/csv"
+	"io"
+
+	"github.com/pkg/errors"
+)
+
+type SeparatedValuesWriter struct {
+	w *csv.Writer
+}
+
+func NewSeparatedValues(w io.Writer, separator rune) Writer {
+	s := &SeparatedValuesWriter{
+		w: csv.NewWriter(w),
+	}
+	s.w.Comma = separator
+	return s
+}
+
+func (s *SeparatedValuesWriter) Write(elem interface{}) error {
+	line, ok := elem.([]string)
+	if !ok {
+		return errors.Errorf("can't converte interface{} to []string: %+v", elem)
+	}
+	err := s.w.Write(line)
+	if err != nil {
+		return errors.Wrap(err, "can't write line")
+	}
+	return nil
+}
+
+func (s *SeparatedValuesWriter) Close() error {
+	s.w.Flush()
+	if s.w.Error() != nil {
+		return errors.Wrap(s.w.Error(), "can't close writer")
+	}
+	return nil
+}
diff --git a/writer/std_writer.go b/writer/std_writer.go
new file mode 100644
index 0000000..a339a10
--- /dev/null
+++ b/writer/std_writer.go
@@ -0,0 +1,39 @@
+package writer
+
+import (
+	"bufio"
+	"io"
+
+	"github.com/pkg/errors"
+)
+
+type StdWriter struct {
+	w *bufio.Writer
+}
+
+func NewStdWriter(w io.Writer) Writer {
+	s := &StdWriter{
+		w: bufio.NewWriter(w),
+	}
+	return s
+}
+
+func (w *StdWriter) Write(elem interface{}) error {
+	line, ok := elem.(string)
+	if !ok {
+		return errors.Errorf("can't converte interface{} to string: %+v", elem)
+	}
+	_, err := w.w.WriteString(line + "\n")
+	if err != nil {
+		return errors.Wrap(err, "can't write string")
+	}
+	return err
+}
+
+func (w *StdWriter) Close() error {
+	err := w.w.Flush()
+	if err != nil {
+		return errors.Wrap(err, "can't close writer")
+	}
+	return nil
+}

From a5c954774acd2d110cc211ceb9e9616fb98ef543 Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Sun, 2 Oct 2022 15:33:19 +0200
Subject: [PATCH 02/16] gzip + drop duplicates

---
 bucket/contract.go                            |  61 ++++++
 bucket/errors.go                              |   8 +
 bucket/s3.go                                  | 111 +++++++++++
 file/batchingchannels/batching_channel.go     |   4 +-
 .../batchingchannels/batching_channel_test.go |   4 +
 file/chunk.go                                 |   7 +-
 file/file.go                                  |  34 +++-
 file/sort.go                                  |  25 ++-
 go.mod                                        |  23 ++-
 go.sum                                        |  50 ++++-
 internal/env.go                               |  18 +-
 internal/progress/contract.go                 |  67 +++++++
 internal/rw/rw.go                             | 178 ++++++++++++++++++
 main.go                                       |  78 +++++---
 main_bench_test.go                            |  33 ++--
 main_test.go                                  | 163 +++++++++++++---
 reader/gzip_separated_values.go               |  53 ++++++
 testdata/100elemsWithHeaders.tsv              | 101 ++++++++++
 vector/key/int_key.go                         |   4 +
 vector/key/key.go                             |   1 +
 vector/key/string_key.go                      |   7 +
 vector/slice_vector.go                        |   5 +
 vector/vector.go                              |  13 +-
 23 files changed, 957 insertions(+), 91 deletions(-)
 create mode 100644 bucket/contract.go
 create mode 100644 bucket/errors.go
 create mode 100644 bucket/s3.go
 create mode 100644 internal/progress/contract.go
 create mode 100644 internal/rw/rw.go
 create mode 100644 reader/gzip_separated_values.go
 create mode 100644 testdata/100elemsWithHeaders.tsv

diff --git a/bucket/contract.go b/bucket/contract.go
new file mode 100644
index 0000000..e8f8450
--- /dev/null
+++ b/bucket/contract.go
@@ -0,0 +1,61 @@
+package bucket
+
+import (
+	"github.com/askiada/external-sort/internal/progress"
+	"github.com/aws/aws-sdk-go-v2/feature/s3/manager"
+	"github.com/aws/aws-sdk-go-v2/service/s3"
+)
+
+// S3ClientAPI S3 client contract for this repo.
+type S3ClientAPI interface {
+	manager.UploadAPIClient
+	manager.DownloadAPIClient
+	s3.HeadObjectAPIClient
+}
+
+// ConfigFunc is a function that can be passed to the New function to configure
+// the S3 object.
+type ConfigFunc func(s *S3)
+
+// Region sets the region of the S3 bucket.
+func Region(region string) ConfigFunc {
+	return func(s *S3) {
+		s.region = region
+	}
+}
+
+// PartBodyMaxRetries sets the number of retries when performing upload multi part.
+func PartBodyMaxRetries(r int) ConfigFunc {
+	return func(s *S3) {
+		s.partBodyMaxRetries = r
+	}
+}
+
+// Buffer is the amount of memory in MB to use for buffering the data.
+func Buffer(buffer int) ConfigFunc {
+	return func(s *S3) {
+		s.bufferLen = buffer * 1024 * 1024
+	}
+}
+
+// Client sets the S3 client to use. If you provide this option, we will not be
+// able to set the region.
+func Client(client S3ClientAPI) ConfigFunc {
+	return func(s *S3) {
+		s.s3Client = client
+	}
+}
+
+// MaxRetries sets the maximum number of retried per request before returning an error.
+func MaxRetries(maxRetries int) ConfigFunc {
+	return func(s *S3) {
+		s.maxRetries = maxRetries
+	}
+}
+
+// Progress sets a progress bar to be used when performing bucket actions.
+func Progress(p progress.Progress) ConfigFunc {
+	return func(s *S3) {
+		s.progress = p
+	}
+}
diff --git a/bucket/errors.go b/bucket/errors.go
new file mode 100644
index 0000000..63b153e
--- /dev/null
+++ b/bucket/errors.go
@@ -0,0 +1,8 @@
+package bucket
+
+import "errors"
+
+var (
+	// ErrInvalidInput is returned when the input is invalid.
+	ErrInvalidInput = errors.New("invalid input")
+)
diff --git a/bucket/s3.go b/bucket/s3.go
new file mode 100644
index 0000000..6183d60
--- /dev/null
+++ b/bucket/s3.go
@@ -0,0 +1,111 @@
+// Package bucket implements the io.ReadWriter for communication with the S3
+// API.
+package bucket
+
+import (
+	"context"
+	"io"
+
+	"github.com/askiada/external-sort/internal/progress"
+	"github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/feature/s3/manager"
+	"github.com/aws/aws-sdk-go-v2/service/s3"
+	"github.com/pkg/errors"
+)
+
+// S3 can read and write from/to S3 buckets using io.Reader and io.Writer
+// inputs.
+type S3 struct {
+	s3Client           S3ClientAPI
+	progress           progress.Progress
+	region             string
+	maxRetries         int
+	bufferLen          int
+	partBodyMaxRetries int
+}
+
+// New returns an instance of the S3 struct.
+func New(ctx context.Context, cfg ...ConfigFunc) (*S3, error) {
+	s := &S3{
+		region:             "eu-west-1",
+		bufferLen:          1024,
+		maxRetries:         10,
+		partBodyMaxRetries: 3,
+	}
+	for _, c := range cfg {
+		c(s)
+	}
+
+	if s.region == "" {
+		return nil, errors.Wrap(ErrInvalidInput, "region")
+	}
+	if s.bufferLen <= 0 {
+		return nil, errors.Wrap(ErrInvalidInput, "buffer length")
+	}
+	if s.s3Client == nil {
+		cfg, err := config.LoadDefaultConfig(ctx,
+			config.WithRegion(s.region),
+			config.WithRetryMaxAttempts(s.maxRetries),
+		)
+		if err != nil {
+			return nil, errors.New("can't create aws config")
+		}
+		s.s3Client = s3.NewFromConfig(cfg)
+	}
+	return s, nil
+}
+
+// Upload reads from the reader and uploads it to the S3 bucket with the
+// filename key.
+func (s *S3) Upload(ctx context.Context, r io.Reader, bucket string, key string) error {
+	uploader := manager.NewUploader(s.s3Client, func(u *manager.Uploader) {
+		u.BufferProvider = manager.NewBufferedReadSeekerWriteToPool(s.bufferLen)
+	})
+	_, err := uploader.Upload(ctx, &s3.PutObjectInput{
+		Bucket: aws.String(bucket),
+		Key:    aws.String(key),
+		Body:   r,
+	})
+	return errors.Wrap(err, "upload failed")
+}
+
+type seqWriterAt struct {
+	w            io.Writer
+	progressFunc func(n int)
+}
+
+func (s *seqWriterAt) WriteAt(p []byte, offset int64) (n int, err error) {
+	n, err = s.w.Write(p)
+	if s.progressFunc != nil {
+		s.progressFunc(n)
+	}
+	return n, errors.Wrap(err, "can't write bytes at offset")
+}
+
+type DownloadFileInfo struct {
+	Bucket string
+	Key    string
+}
+
+// Download downloads the file from the S3 bucket with the filename key and
+// writes it to the writer.
+func (s *S3) Download(ctx context.Context, w io.Writer, filesinfo ...*DownloadFileInfo) error {
+	downloader := manager.NewDownloader(s.s3Client, func(d *manager.Downloader) {
+		d.PartBodyMaxRetries = s.partBodyMaxRetries
+		d.PartSize = int64(s.bufferLen)
+		// we need to force this to be a sequential download.
+		d.Concurrency = 1
+	})
+	ww := &seqWriterAt{w, nil}
+	for _, fileinfo := range filesinfo {
+		_, err := downloader.Download(ctx, ww, &s3.GetObjectInput{
+			Bucket: aws.String(fileinfo.Bucket),
+			Key:    aws.String(fileinfo.Key),
+		})
+		if err != nil {
+			return errors.Wrapf(err, "download failed for bucket %s and key %s", fileinfo.Bucket, fileinfo.Key)
+		}
+	}
+	return nil
+}
diff --git a/file/batchingchannels/batching_channel.go b/file/batchingchannels/batching_channel.go
index aebca36..2f575cb 100644
--- a/file/batchingchannels/batching_channel.go
+++ b/file/batchingchannels/batching_channel.go
@@ -89,9 +89,9 @@ func (ch *BatchingChannel) Close() {
 func (ch *BatchingChannel) batchingBuffer() {
 	ch.buffer = ch.allocate.Vector(ch.size, ch.allocate.Key)
 	for {
-		elem, open := <-ch.input
+		row, open := <-ch.input
 		if open {
-			err := ch.buffer.PushBack(elem)
+			err := ch.buffer.PushBack(row)
 			if err != nil {
 				ch.G.Go(func() error {
 					return err
diff --git a/file/batchingchannels/batching_channel_test.go b/file/batchingchannels/batching_channel_test.go
index b33aaf3..7330ed0 100644
--- a/file/batchingchannels/batching_channel_test.go
+++ b/file/batchingchannels/batching_channel_test.go
@@ -37,6 +37,10 @@ func (k *Int) Get() int {
 func (k *Int) Less(other key.Key) bool {
 	return k.value < other.(*Int).value
 }
+func (k *Int) Equal(other key.Key) bool {
+	return k.value == other.(*Int).value
+}
+
 func testBatches(t *testing.T, ch *batchingchannels.BatchingChannel) {
 	maxI := 10000
 	expectedSum := (maxI - 1) * maxI / 2
diff --git a/file/chunk.go b/file/chunk.go
index f3414f9..380a0cf 100644
--- a/file/chunk.go
+++ b/file/chunk.go
@@ -42,12 +42,15 @@ type chunks struct {
 }
 
 // new Create a new chunk and initialize it.
-func (c *chunks) new(chunkPath string, allocate *vector.Allocate, size int) error {
+func (c *chunks) new(chunkPath string, allocate *vector.Allocate, size int, withHeader bool) error {
 	f, err := os.Open(chunkPath)
 	if err != nil {
 		return err
 	}
-	reader := allocate.FnReader(f)
+	reader, err := allocate.FnReader(f)
+	if err != nil {
+		return err
+	}
 	elem := &chunkInfo{
 		filename: chunkPath,
 		file:     f,
diff --git a/file/file.go b/file/file.go
index e7a007a..074a4e0 100644
--- a/file/file.go
+++ b/file/file.go
@@ -11,18 +11,23 @@ import (
 	"github.com/askiada/external-sort/file/batchingchannels"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/writer"
+	"github.com/sirupsen/logrus"
 
 	"github.com/pkg/errors"
 )
 
+var logger = logrus.StandardLogger()
+
 type Info struct {
 	mu            *MemUsage
 	Allocate      *vector.Allocate
 	InputReader   io.Reader
-	OutputFile    string
+	OutputFile    io.Writer
 	outputWriter  writer.Writer
 	totalRows     int
 	PrintMemUsage bool
+	WithHeader    bool
+	headers       interface{}
 }
 
 // CreateSortedChunks Scan a file and divide it into small sorted chunks.
@@ -42,9 +47,11 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 		return nil, errors.Wrap(err, fn)
 	}
 
-	inputReader := f.Allocate.FnReader(f.InputReader)
-
-	row := 0
+	inputReader, err := f.Allocate.FnReader(f.InputReader)
+	if err != nil {
+		return nil, errors.Wrap(err, fn)
+	}
+	count_rows := 0
 	chunkPaths := []string{}
 
 	mu := sync.Mutex{}
@@ -55,12 +62,16 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 			if f.PrintMemUsage {
 				f.mu.Collect()
 			}
-			elem, err := inputReader.Read()
+			row, err := inputReader.Read()
 			if err != nil {
 				return errors.Wrap(err, fn)
 			}
-			batchChan.In() <- elem
-			row++
+			if f.WithHeader && f.headers == nil {
+				f.headers = row
+			} else {
+				batchChan.In() <- row
+			}
+			count_rows++
 		}
 		batchChan.Close()
 		if inputReader.Err() != nil {
@@ -74,8 +85,15 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 		mu.Lock()
 		chunkIdx++
 		chunkPath := path.Join(chunkFolder, "chunk_"+strconv.Itoa(chunkIdx)+".tsv")
+		logger.Infoln("Created chunk", chunkPath)
 		mu.Unlock()
 		v.Sort()
+		if f.WithHeader {
+			err = v.PushFrontNoKey(f.headers)
+			if err != nil {
+				return err
+			}
+		}
 		err := f.Allocate.Dump(v, chunkPath)
 		if err != nil {
 			return err
@@ -88,6 +106,6 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 	if err != nil {
 		return nil, errors.Wrap(err, fn)
 	}
-	f.totalRows = row
+	f.totalRows = count_rows
 	return chunkPaths, nil
 }
diff --git a/file/sort.go b/file/sort.go
index ae53b62..60a2eb1 100644
--- a/file/sort.go
+++ b/file/sort.go
@@ -2,7 +2,6 @@ package file
 
 import (
 	"fmt"
-	"os"
 	"runtime"
 
 	"github.com/askiada/external-sort/vector"
@@ -39,24 +38,30 @@ func bToMb(b uint64) uint64 {
 	return b / 1024 / 1024
 }
 
-func (f *Info) MergeSort(chunkPaths []string, k int) (err error) {
+func (f *Info) MergeSort(chunkPaths []string, k int, dropDuplicates bool) (err error) {
+	var oldElem *vector.Element
 	output := f.Allocate.Vector(k, f.Allocate.Key)
 	if f.PrintMemUsage && f.mu == nil {
 		f.mu = &MemUsage{}
 	}
+	if f.WithHeader {
+		err = output.PushFrontNoKey(f.headers)
+		if err != nil {
+			return err
+		}
+	}
 	// create a chunk per file path
 	chunks := &chunks{list: make([]*chunkInfo, 0, len(chunkPaths))}
 	for _, chunkPath := range chunkPaths {
-		err := chunks.new(chunkPath, f.Allocate, k)
+		err := chunks.new(chunkPath, f.Allocate, k, f.WithHeader)
 		if err != nil {
 			return err
 		}
 	}
-	w, err := os.Create(f.OutputFile)
+	f.outputWriter, err = f.Allocate.FnWriter(f.OutputFile)
 	if err != nil {
 		return err
 	}
-	f.outputWriter = f.Allocate.FnWriter(w)
 	defer f.outputWriter.Close()
 	bar := pb.StartNew(f.totalRows)
 	chunks.resetOrder()
@@ -76,10 +81,14 @@ func (f *Info) MergeSort(chunkPaths []string, k int) (err error) {
 		toShrink := []int{}
 		// search the smallest value across chunk buffers by comparing first elements only
 		minChunk, minValue, minIdx := chunks.min()
-		err = output.PushBack(minValue.Row)
-		if err != nil {
-			return err
+		if (!dropDuplicates || oldElem == nil) || (dropDuplicates && !minValue.Key.Equal(oldElem.Key)) {
+			err = output.PushBack(minValue.Row)
+			if err != nil {
+				return err
+			}
+			oldElem = minValue
 		}
+
 		// remove the first element from the chunk we pulled the smallest value
 		minChunk.buffer.FrontShift()
 		isEmpty := false
diff --git a/go.mod b/go.mod
index 42f2d65..36e9033 100644
--- a/go.mod
+++ b/go.mod
@@ -3,9 +3,14 @@ module github.com/askiada/external-sort
 go 1.17
 
 require (
+	github.com/aws/aws-sdk-go-v2 v1.16.16
+	github.com/aws/aws-sdk-go-v2/config v1.17.8
+	github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.34
+	github.com/aws/aws-sdk-go-v2/service/s3 v1.27.11
 	github.com/cheggaaa/pb/v3 v3.0.8
 	github.com/pkg/errors v0.9.1
 	github.com/pkg/sftp v1.13.4
+	github.com/sirupsen/logrus v1.9.0
 	github.com/spf13/cobra v1.2.1
 	github.com/spf13/viper v1.8.1
 	github.com/stretchr/testify v1.7.0
@@ -15,11 +20,27 @@ require (
 
 require (
 	github.com/VividCortex/ewma v1.2.0 // indirect
+	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.8 // indirect
+	github.com/aws/aws-sdk-go-v2/credentials v1.12.21 // indirect
+	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.14 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.9 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.18 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.17 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.17 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sso v1.11.23 // indirect
+	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sts v1.16.19 // indirect
+	github.com/aws/smithy-go v1.13.3 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/fatih/color v1.13.0 // indirect
 	github.com/fsnotify/fsnotify v1.4.9 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
+	github.com/jmespath/go-jmespath v0.4.0 // indirect
 	github.com/kr/fs v0.1.0 // indirect
 	github.com/magiconair/properties v1.8.5 // indirect
 	github.com/mattn/go-colorable v0.1.12 // indirect
@@ -34,7 +55,7 @@ require (
 	github.com/spf13/jwalterweatherman v1.1.0 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/subosito/gotenv v1.2.0 // indirect
-	golang.org/x/sys v0.0.0-20220209214540-3681064d5158 // indirect
+	golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect
 	golang.org/x/text v0.3.6 // indirect
 	gopkg.in/ini.v1 v1.62.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
diff --git a/go.sum b/go.sum
index 45da8db..86f0dcb 100644
--- a/go.sum
+++ b/go.sum
@@ -46,6 +46,44 @@ github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kd
 github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
 github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
 github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
+github.com/aws/aws-sdk-go-v2 v1.16.16 h1:M1fj4FE2lB4NzRb9Y0xdWsn2P0+2UHVxwKyOa4YJNjk=
+github.com/aws/aws-sdk-go-v2 v1.16.16/go.mod h1:SwiyXi/1zTUZ6KIAmLK5V5ll8SiURNUYOqTerZPaF9k=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.8 h1:tcFliCWne+zOuUfKNRn8JdFBuWPDuISDH08wD2ULkhk=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.8/go.mod h1:JTnlBSot91steJeti4ryyu/tLd4Sk84O5W22L7O2EQU=
+github.com/aws/aws-sdk-go-v2/config v1.17.8 h1:b9LGqNnOdg9vR4Q43tBTVWk4J6F+W774MSchvKJsqnE=
+github.com/aws/aws-sdk-go-v2/config v1.17.8/go.mod h1:UkCI3kb0sCdvtjiXYiU4Zx5h07BOpgBTtkPu/49r+kA=
+github.com/aws/aws-sdk-go-v2/credentials v1.12.21 h1:4tjlyCD0hRGNQivh5dN8hbP30qQhMLBE/FgQR1vHHWM=
+github.com/aws/aws-sdk-go-v2/credentials v1.12.21/go.mod h1:O+4XyAt4e+oBAoIwNUYkRg3CVMscaIJdmZBOcPgJ8D8=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17 h1:r08j4sbZu/RVi+BNxkBJwPMUYY3P8mgSDuKkZ/ZN1lE=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17/go.mod h1:yIkQcCDYNsZfXpd5UX2Cy+sWA1jPgIhGTw9cOBzfVnQ=
+github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.34 h1:1PNtaCM+2ruo1dfYL2RweUdtbuPvinjAejjNcPa/RQY=
+github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.34/go.mod h1:+Six+CXNHYllXam32j+YW8ixk82+am345ei89kEz8p4=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23 h1:s4g/wnzMf+qepSNgTvaQQHNxyMLKSawNhKCPNy++2xY=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23/go.mod h1:2DFxAQ9pfIRy0imBCJv+vZ2X6RKxves6fbnEuSry6b4=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17 h1:/K482T5A3623WJgWT8w1yRAFK4RzGzEl7y39yhtn9eA=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17/go.mod h1:pRwaTYCJemADaqCbUAxltMoHKata7hmB5PjEXeu0kfg=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24 h1:wj5Rwc05hvUSvKuOF29IYb9QrCLjU+rHAy/x/o0DK2c=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24/go.mod h1:jULHjqqjDlbyTa7pfM7WICATnOv+iOhjletM3N0Xbu8=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.14 h1:ZSIPAkAsCCjYrhqfw2+lNzWDzxzHXEckFkTePL5RSWQ=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.14/go.mod h1:AyGgqiKv9ECM6IZeNQtdT8NnMvUb3/2wokeq2Fgryto=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.9 h1:Lh1AShsuIJTwMkoxVCAYPJgNG5H+eN6SmoUn8nOZ5wE=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.9/go.mod h1:a9j48l6yL5XINLHLcOKInjdvknN+vWqPBxqeIDw7ktw=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.18 h1:BBYoNQt2kUZUUK4bIPsKrCcjVPUMNsgQpNAwhznK/zo=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.18/go.mod h1:NS55eQ4YixUJPTC+INxi2/jCqe1y2Uw3rnh9wEOVJxY=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.17 h1:Jrd/oMh0PKQc6+BowB+pLEwLIgaQF29eYbe7E1Av9Ug=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.17/go.mod h1:4nYOrY41Lrbk2170/BGkcJKBhws9Pfn8MG3aGqjjeFI=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.17 h1:HfVVR1vItaG6le+Bpw6P4midjBDMKnjMyZnw9MXYUcE=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.17/go.mod h1:YqMdV+gEKCQ59NrB7rzrJdALeBIsYiVi8Inj3+KcqHI=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.27.11 h1:3/gm/JTX9bX8CpzTgIlrtYpB3EVBDxyg/GY/QdcIEZw=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.27.11/go.mod h1:fmgDANqTUCxciViKl9hb/zD5LFbvPINFRgWhDbR+vZo=
+github.com/aws/aws-sdk-go-v2/service/sso v1.11.23 h1:pwvCchFUEnlceKIgPUouBJwK81aCkQ8UDMORfeFtW10=
+github.com/aws/aws-sdk-go-v2/service/sso v1.11.23/go.mod h1:/w0eg9IhFGjGyyncHIQrXtU8wvNsTJOP0R6PPj0wf80=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 h1:OwhhKc1P9ElfWbMKPIbMMZBV6hzJlL2JKD76wNNVzgQ=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6/go.mod h1:csZuQY65DAdFBt1oIjO5hhBR49kQqop4+lcuCjf2arA=
+github.com/aws/aws-sdk-go-v2/service/sts v1.16.19 h1:9pPi0PsFNAGILFfPCk8Y0iyEBGc6lu6OQ97U7hmdesg=
+github.com/aws/aws-sdk-go-v2/service/sts v1.16.19/go.mod h1:h4J3oPZQbxLhzGnk+j9dfYHi5qIOVJ5kczZd658/ydM=
+github.com/aws/smithy-go v1.13.3 h1:l7LYxGuzK6/K+NzJ2mC+VvLUbae0sL3bXU//04MkmnA=
+github.com/aws/smithy-go v1.13.3/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
 github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
 github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
@@ -125,6 +163,8 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
 github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
+github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
 github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
@@ -172,6 +212,10 @@ github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:
 github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
 github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
+github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
+github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
+github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
+github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
 github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
 github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
 github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
@@ -235,6 +279,8 @@ github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD
 github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
 github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
 github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
+github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
+github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM=
 github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
 github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s=
@@ -432,8 +478,8 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220209214540-3681064d5158 h1:rm+CHSpPEEW2IsXUib1ThaHIjuBVZjxNgSKmBLFfD4c=
-golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ=
+golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
diff --git a/internal/env.go b/internal/env.go
index f4bafcb..fc847b7 100644
--- a/internal/env.go
+++ b/internal/env.go
@@ -8,33 +8,45 @@ import (
 
 // Argument names.
 const (
-	InputFileName        = "input_path"
+	WithHeaderName       = "with_header"
+	InputFileNames       = "input_paths"
 	OutputFileName       = "output_path"
 	ChunkFolderName      = "chunk_folder"
 	ChunkSizeName        = "chunk_size"
 	MaxWorkersName       = "max_workers"
 	OutputBufferSizeName = "output_buffer_size"
 	TsvFieldsName        = "tsv_fields"
+
+	S3RegionName           = "s3_region"
+	S3RetryMaxAttemptsName = "s3_retry_max_attempts"
 )
 
 // Environment variables.
 var (
-	InputFile        string
+	WithHeader       bool
+	InputFiles       []string
 	TsvFields        []string
 	OutputFile       string
 	ChunkFolder      string
 	ChunkSize        int
 	MaxWorkers       int64
 	OutputBufferSize int
+
+	S3Region           string
+	S3RetryMaxAttempts int
 )
 
 func init() {
 	viper.AutomaticEnv()
-	viper.SetDefault(InputFileName, "")
+	viper.SetDefault(WithHeaderName, false)
+	viper.SetDefault(InputFileNames, "")
 	viper.SetDefault(OutputFileName, "")
 	viper.SetDefault(ChunkFolderName, "")
 	viper.SetDefault(ChunkSizeName, 0)
 	viper.SetDefault(MaxWorkersName, 0)
 	viper.SetDefault(OutputBufferSizeName, 0)
 	viper.SetDefault(TsvFieldsName, []string{"0"})
+
+	viper.SetDefault(S3RegionName, "eu-west-1")
+	viper.SetDefault(S3RetryMaxAttemptsName, 10)
 }
diff --git a/internal/progress/contract.go b/internal/progress/contract.go
new file mode 100644
index 0000000..7b9766c
--- /dev/null
+++ b/internal/progress/contract.go
@@ -0,0 +1,67 @@
+// Package progress defines standard and simple progress bar to track file download progress.
+package progress
+
+import (
+	"math"
+
+	"github.com/cheggaaa/pb/v3"
+	"github.com/sirupsen/logrus"
+)
+
+// Progress defines a simple progress bar contract.
+type Progress interface {
+	// Begin sets and starts the progress bar.
+	Begin(total int64)
+	// Add increments the progress bar with n elements
+	Add(n int64)
+	// End terminates the progress bar
+	End()
+}
+
+// Pb implements Progress contract using cheggaaa pb v3.
+type Pb struct {
+	bar *pb.ProgressBar
+}
+
+// Begin start a new progress bar in byte mode.
+func (p *Pb) Begin(total int64) {
+	p.bar = pb.Full.Start64(total)
+	p.bar.Set(pb.Bytes, true)
+}
+
+// Add increment the bar by n elements.
+func (p *Pb) Add(n int64) {
+	p.bar.Add64(n)
+}
+
+// End terminates the bar.
+func (p *Pb) End() {
+	p.bar.Finish()
+}
+
+var _ Progress = &Pb{}
+
+// Basic implements Progress contract using stdout to print status.
+type Basic struct {
+	total     float64
+	written   float64
+	milestone int
+}
+
+// Begin start a new progress bar.
+func (b *Basic) Begin(total int64) {
+	b.total = float64(total)
+}
+
+// Add increment the bar by n elements.
+func (b *Basic) Add(val int64) {
+	b.written += float64(val)
+	progress := int(math.Round(b.written / b.total * 100))
+	if progress >= b.milestone {
+		b.milestone += 5 // every 5%
+		logrus.Debugf("Download from S3 at %3d%%\n\n", progress)
+	}
+}
+
+// End noop.
+func (b *Basic) End() {}
diff --git a/internal/rw/rw.go b/internal/rw/rw.go
new file mode 100644
index 0000000..fa89a15
--- /dev/null
+++ b/internal/rw/rw.go
@@ -0,0 +1,178 @@
+package rw
+
+import (
+	"context"
+	"io"
+	"net/url"
+	"os"
+	"strings"
+
+	"github.com/askiada/external-sort/bucket"
+	"github.com/askiada/external-sort/internal"
+	"github.com/askiada/external-sort/internal/progress"
+	"github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/service/s3"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sync/errgroup"
+)
+
+var logger = logrus.StandardLogger()
+
+type InputOutput struct {
+	s3Client   bucket.S3ClientAPI
+	Input      io.Reader
+	inputPipe  *io.PipeReader
+	Output     io.Writer
+	outputPipe *io.PipeWriter
+	g          *errgroup.Group
+	dCtx       context.Context
+}
+
+func NewInputOutput(ctx context.Context) *InputOutput {
+	g, dCtx := errgroup.WithContext(ctx)
+	return &InputOutput{
+		g:    g,
+		dCtx: dCtx,
+	}
+}
+
+func (i *InputOutput) s3Check() error {
+	if i.s3Client != nil {
+		return nil
+	}
+	cfg, err := config.LoadDefaultConfig(context.Background(),
+		config.WithRegion(internal.S3Region),
+		config.WithRetryMaxAttempts(internal.S3RetryMaxAttempts),
+	)
+	if err != nil {
+		return errors.New("can't create aws config")
+	}
+	i.s3Client = s3.NewFromConfig(cfg)
+	return nil
+}
+
+func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string) (err error) {
+	if strings.HasPrefix(inputFiles[0], "s3") || strings.HasPrefix(inputFiles[0], "S3") {
+		err = i.s3Check()
+		if err != nil {
+			return errors.Wrap(err, "can't check s3")
+		}
+		s3Api, err := bucket.New(ctx,
+			bucket.Client(i.s3Client),
+			bucket.Buffer(1_000_000),
+			bucket.Progress(&progress.Pb{}),
+		)
+		if err != nil {
+			return errors.Wrap(err, "can't create s3 client")
+		}
+		files := []*bucket.DownloadFileInfo{}
+		for _, inputFile := range inputFiles {
+			u, _ := url.Parse(inputFile)
+			u.Path = strings.TrimLeft(u.Path, "/")
+			logger.Debugf("Proto: %q, Bucket: %q, Key: %q", u.Scheme, u.Host, u.Path)
+			files = append(files, &bucket.DownloadFileInfo{
+				Bucket: u.Host,
+				Key:    u.Path,
+			})
+		}
+
+		pr, pw := io.Pipe()
+		i.Input = pr
+		i.inputPipe = pr
+		i.g.Go(func() error {
+			defer pw.Close() // nolint:errcheck //no need to check this error
+			err := s3Api.Download(i.dCtx, pw, files...)
+			if err != nil {
+				return errors.Wrap(err, "can't download files")
+			}
+			return nil
+		})
+	} else {
+		var files []io.Reader
+		for _, inputFile := range inputFiles {
+			f, err := os.Open(inputFile)
+			if err != nil {
+				return errors.Wrapf(err, "can't open file %s", inputFile)
+			}
+			files = append(files, f)
+		}
+		i.Input = io.MultiReader(files...)
+	}
+	return nil
+}
+
+func (i *InputOutput) SetOutputWriter(ctx context.Context, outputFile string) (err error) {
+	if strings.HasPrefix(outputFile, "s3") || strings.HasPrefix(outputFile, "S3") {
+		err = i.s3Check()
+		if err != nil {
+			return errors.Wrap(err, "can't check s3")
+		}
+		u, _ := url.Parse(outputFile)
+		u.Path = strings.TrimLeft(u.Path, "/")
+		logger.Debugf("Proto: %q, Bucket: %q, Key: %q", u.Scheme, u.Host, u.Path)
+		s3Api, err := bucket.New(ctx,
+			bucket.Client(i.s3Client),
+			bucket.Buffer(1_000_000),
+			bucket.Progress(&progress.Pb{}),
+		)
+		if err != nil {
+			return errors.Wrap(err, "can't create s3 client")
+		}
+
+		pr, pw := io.Pipe()
+		i.Output = pw
+		i.outputPipe = pw
+		i.g.Go(func() error {
+			defer pr.Close() // nolint:errcheck //no need to check this error
+			err := s3Api.Upload(i.dCtx, pr, u.Host, u.Path)
+			if err != nil {
+				return errors.Wrapf(err, "can't upload file %s", outputFile)
+			}
+			return nil
+		})
+	} else {
+		i.Output, err = os.Create(outputFile)
+		if err != nil {
+			return errors.Wrapf(err, "can't create file %s", outputFile)
+		}
+	}
+	return nil
+}
+
+func (i *InputOutput) Do(f func() error) {
+	i.g.Go(func() error {
+		err := f()
+		if err != nil {
+			return err
+		}
+		err = i.Close()
+		if err != nil {
+			return err
+		}
+		return nil
+	})
+}
+
+func (i *InputOutput) Close() error {
+	if i.inputPipe != nil {
+		err := i.inputPipe.Close()
+		if err != nil {
+			return errors.Wrap(err, "can't close input reader")
+		}
+	}
+	if i.outputPipe != nil {
+		err := i.outputPipe.Close()
+		if err != nil {
+			return errors.Wrap(err, "can't close output writer")
+		}
+	}
+	return nil
+}
+
+func (i *InputOutput) Err() error {
+	if err := i.g.Wait(); err != nil {
+		return errors.Wrap(err, "one of the go routines went wrong")
+	}
+	return nil
+}
diff --git a/main.go b/main.go
index 5a166fb..a440bbe 100644
--- a/main.go
+++ b/main.go
@@ -2,30 +2,33 @@ package main
 
 import (
 	"context"
-	"fmt"
 	"io"
-	"os"
 	"strconv"
 	"time"
 
 	"github.com/askiada/external-sort/file"
 	"github.com/askiada/external-sort/internal"
+	"github.com/askiada/external-sort/internal/rw"
 	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/vector/key"
 	"github.com/askiada/external-sort/writer"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
 	"github.com/spf13/cobra"
 	"github.com/spf13/viper"
 )
 
+var logger = logrus.StandardLogger()
+
 func main() {
 	rootCmd := &cobra.Command{
 		Use:   "external-sort",
 		Short: "Perform an external sorting on an input file",
 		RunE:  rootRun,
 	}
-
-	rootCmd.PersistentFlags().StringVarP(&internal.InputFile, internal.InputFileName, "i", viper.GetString(internal.InputFileName), "input file path.")
+	rootCmd.PersistentFlags().BoolVarP(&internal.WithHeader, internal.WithHeaderName, "i", viper.GetBool(internal.WithHeaderName), "Input file has headers.")
+	rootCmd.PersistentFlags().StringSliceVarP(&internal.InputFiles, internal.InputFileNames, "i", viper.GetStringSlice(internal.InputFileNames), "input file path.")
 	rootCmd.PersistentFlags().StringVarP(&internal.OutputFile, internal.OutputFileName, "o", viper.GetString(internal.OutputFileName), "output file path.")
 	rootCmd.PersistentFlags().StringVarP(&internal.ChunkFolder, internal.ChunkFolderName, "c", viper.GetString(internal.ChunkFolderName), "chunk folder.")
 
@@ -34,22 +37,29 @@ func main() {
 	rootCmd.PersistentFlags().IntVarP(&internal.OutputBufferSize, internal.OutputBufferSizeName, "b", viper.GetInt(internal.OutputBufferSizeName), "output buffer size.")
 	rootCmd.PersistentFlags().StringSliceVarP(&internal.TsvFields, internal.TsvFieldsName, "t", viper.GetStringSlice(internal.TsvFieldsName), "")
 
-	fmt.Println("Input file", internal.InputFile)
-	fmt.Println("Output file", internal.OutputFile)
-	fmt.Println("Chunk folder", internal.ChunkFolder)
-	fmt.Println("TSV Fields", internal.TsvFields)
+	rootCmd.Flags().StringVar(&internal.S3Region, internal.S3RegionName, viper.GetString(internal.S3RegionName), "the bucket region")
+	rootCmd.Flags().IntVar(&internal.S3RetryMaxAttempts, internal.S3RetryMaxAttemptsName, viper.GetInt(internal.S3RetryMaxAttemptsName), "the number of retries per S3 request before failing")
 
+	logger.Infoln("Input files", internal.InputFiles)
+	logger.Infoln("With header", internal.WithHeader)
+	logger.Infoln("Output file", internal.OutputFile)
+	logger.Infoln("Chunk folder", internal.ChunkFolder)
+	logger.Infoln("TSV Fields", internal.TsvFields)
 	cobra.CheckErr(rootCmd.Execute())
 }
 
 func rootRun(cmd *cobra.Command, args []string) error {
 	start := time.Now()
-	// open a file
-	inputReader, err := os.Open(internal.InputFile)
+	ctx := context.Background()
+	i := rw.NewInputOutput(ctx)
+	err := i.SetInputReader(ctx, internal.InputFiles...)
+	if err != nil {
+		return err
+	}
+	err = i.SetOutputWriter(ctx, internal.OutputFile)
 	if err != nil {
 		return err
 	}
-	defer inputReader.Close()
 	tsvFields := []int{}
 	for _, field := range internal.TsvFields {
 		i, err := strconv.Atoi(field)
@@ -59,26 +69,38 @@ func rootRun(cmd *cobra.Command, args []string) error {
 		tsvFields = append(tsvFields, i)
 	}
 	fI := &file.Info{
-		InputReader: inputReader,
-		OutputFile:  internal.OutputFile,
-		Allocate: vector.DefaultVector(func(row interface{}) (key.Key, error) {
-			return key.AllocateTsv(row, tsvFields...)
-		}, func(r io.Reader) reader.Reader { return reader.NewSeparatedValues(r, '\t') }, func(w io.Writer) writer.Writer { return writer.NewSeparatedValues(w, '\t') }),
+		WithHeader:  internal.WithHeader,
+		InputReader: i.Input,
+		OutputFile:  i.Output,
+		Allocate: vector.DefaultVector(
+			func(row interface{}) (key.Key, error) {
+				return key.AllocateTsv(row, tsvFields...)
+			},
+			func(r io.Reader) (reader.Reader, error) { return reader.NewGZipSeparatedValues(r, '\t') }, func(w io.Writer) (writer.Writer, error) {
+				return writer.NewGZipSeparatedValues(w, '\t')
+			},
+		),
 		PrintMemUsage: false,
 	}
-
-	// create small files with maximum 30 rows in each
-	chunkPaths, err := fI.CreateSortedChunks(context.Background(), internal.ChunkFolder, internal.ChunkSize, internal.MaxWorkers)
-	if err != nil {
-		return err
-	}
-	// perform a merge sort on all the chunks files.
-	// we sort using a buffer so we don't have to load the entire chunks when merging
-	err = fI.MergeSort(chunkPaths, internal.OutputBufferSize)
+	i.Do(func() error {
+		// create small files with maximum 30 rows in each
+		chunkPaths, err := fI.CreateSortedChunks(context.Background(), internal.ChunkFolder, internal.ChunkSize, internal.MaxWorkers)
+		if err != nil {
+			return errors.Wrap(err, "can't create sorted chunks")
+		}
+		// perform a merge sort on all the chunks files.
+		// we sort using a buffer so we don't have to load the entire chunks when merging
+		err = fI.MergeSort(chunkPaths, internal.OutputBufferSize, true)
+		if err != nil {
+			return errors.Wrap(err, "can't merge sort")
+		}
+		elapsed := time.Since(start)
+		logger.Infoln("It took", elapsed)
+		return nil
+	})
+	err = i.Err()
 	if err != nil {
-		return err
+		return errors.Wrap(err, "can't finish")
 	}
-	elapsed := time.Since(start)
-	fmt.Println(elapsed)
 	return nil
 }
diff --git a/main_bench_test.go b/main_bench_test.go
index f6515c7..47a36a5 100644
--- a/main_bench_test.go
+++ b/main_bench_test.go
@@ -3,12 +3,12 @@ package main_test
 import (
 	"context"
 	"io"
-	"io/ioutil"
 	"os"
 	"path"
 	"testing"
 
 	"github.com/askiada/external-sort/file"
+	"github.com/askiada/external-sort/internal/rw"
 	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/vector/key"
@@ -18,23 +18,32 @@ import (
 
 func BenchmarkMergeSort(b *testing.B) {
 	filename := "test.tsv"
-	f, err := os.Open(filename)
+	ctx := context.Background()
+	i := rw.NewInputOutput(ctx)
+	err := i.SetInputReader(ctx, filename)
+	assert.NoError(b, err)
+	err = i.SetOutputWriter(ctx, "testdata/chunks/output.tsv")
 	assert.NoError(b, err)
 	chunkSize := 10000
 	bufferSize := 5000
 	fI := &file.Info{
-		InputReader: f,
-		Allocate:    vector.DefaultVector(key.AllocateInt, reader.NewStdScanner, func(w io.Writer) writer.Writer { return writer.NewStdWriter(w) }),
-		OutputFile:  "testdata/chunks/output.tsv",
+		InputReader: i.Input,
+		Allocate:    vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil }),
+		OutputFile:  i.Output,
 	}
-	chunkPaths, err := fI.CreateSortedChunks(context.Background(), "testdata/chunks", chunkSize, 100)
+	i.Do(func() (err error) {
+		chunkPaths, err := fI.CreateSortedChunks(context.Background(), "testdata/chunks", chunkSize, 100)
+		assert.NoError(b, err)
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			err = fI.MergeSort(chunkPaths, bufferSize, false)
+			_ = err
+		}
+		return nil
+	})
+	err = i.Err()
 	assert.NoError(b, err)
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		err = fI.MergeSort(chunkPaths, bufferSize)
-		_ = err
-	}
-	dir, err := ioutil.ReadDir("testdata/chunks")
+	dir, err := os.ReadDir("testdata/chunks")
 	assert.NoError(b, err)
 	for _, d := range dir {
 		err = os.RemoveAll(path.Join("testdata/chunks", d.Name()))
diff --git a/main_test.go b/main_test.go
index 6b03fa5..b8669bf 100644
--- a/main_test.go
+++ b/main_test.go
@@ -5,13 +5,13 @@ import (
 	"context"
 	"errors"
 	"io"
-	"io/ioutil"
 	"os"
 	"path"
 	"strconv"
 	"testing"
 
 	"github.com/askiada/external-sort/file"
+	"github.com/askiada/external-sort/internal/rw"
 	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/vector/key"
@@ -20,21 +20,31 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
-func prepareChunks(ctx context.Context, t *testing.T, allocate *vector.Allocate, filename, outputFilename string, chunkSize int) (*file.Info, []string) {
+func prepareChunks(ctx context.Context, t *testing.T, allocate *vector.Allocate, filename, outputFilename string, chunkSize int, mergeSort bool, bufferSize int, withHeaders bool, dropDuplicates bool) *file.Info {
 	t.Helper()
-	f, err := os.Open(filename)
+	i := rw.NewInputOutput(ctx)
+	err := i.SetInputReader(ctx, filename)
+	assert.NoError(t, err)
+	err = i.SetOutputWriter(ctx, "testdata/chunks/output.tsv")
 	assert.NoError(t, err)
 	fI := &file.Info{
-		InputReader: f,
+		InputReader: i.Input,
 		Allocate:    allocate,
-		OutputFile:  "testdata/chunks/output.tsv",
+		OutputFile:  i.Output,
+		WithHeader:  withHeaders,
 	}
-	chunkPaths, err := fI.CreateSortedChunks(ctx, "testdata/chunks", chunkSize, 10)
+	i.Do(func() (err error) {
+		chunkPaths, err := fI.CreateSortedChunks(ctx, "testdata/chunks", chunkSize, 10)
+		assert.NoError(t, err)
+		if mergeSort {
+			return fI.MergeSort(chunkPaths, bufferSize, dropDuplicates)
+		}
+		return nil
+	})
+	err = i.Err()
 	assert.NoError(t, err)
-
 	t.Cleanup(func() {
-		defer f.Close()
-		dir, err := ioutil.ReadDir("testdata/chunks")
+		dir, err := os.ReadDir("testdata/chunks")
 		assert.NoError(t, err)
 		for _, d := range dir {
 			err = os.RemoveAll(path.Join("testdata/chunks", d.Name()))
@@ -42,7 +52,7 @@ func prepareChunks(ctx context.Context, t *testing.T, allocate *vector.Allocate,
 		}
 	})
 
-	return fI, chunkPaths
+	return fI
 }
 
 func TestBasics(t *testing.T) {
@@ -80,10 +90,9 @@ func TestBasics(t *testing.T) {
 				t.Run(name+"_"+strconv.Itoa(chunkSize)+"_"+strconv.Itoa(bufferSize), func(t *testing.T) {
 					ctx := context.Background()
 
-					allocate := vector.DefaultVector(key.AllocateInt, reader.NewStdScanner, writer.NewStdWriter)
-					fI, chunkPaths := prepareChunks(ctx, t, allocate, filename, outputFilename, chunkSize)
-					err := fI.MergeSort(chunkPaths, bufferSize)
-					assert.NoError(t, err)
+					allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+					prepareChunks(ctx, t, allocate, filename, outputFilename, chunkSize, true, bufferSize, false, false)
+
 					outputFile, err := os.Open(outputFilename)
 					assert.NoError(t, err)
 					outputScanner := bufio.NewScanner(outputFile)
@@ -123,10 +132,124 @@ func Test100Elems(t *testing.T) {
 		expectedErr := tc.expectedErr
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
-			allocate := vector.DefaultVector(key.AllocateInt, reader.NewStdScanner, writer.NewStdWriter)
-			fI, chunkPaths := prepareChunks(ctx, t, allocate, filename, outputFilename, 21)
-			err := fI.MergeSort(chunkPaths, 10)
+			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, false, false)
+			outputFile, err := os.Open(outputFilename)
+			assert.NoError(t, err)
+			outputScanner := bufio.NewScanner(outputFile)
+			count := 0
+			for outputScanner.Scan() {
+				assert.Equal(t, expectedOutput[count], outputScanner.Text())
+				count++
+			}
+			assert.NoError(t, outputScanner.Err())
+			assert.Equal(t, len(expectedOutput), count)
+			assert.True(t, errors.Is(err, expectedErr))
+			outputFile.Close()
+		})
+	}
+}
+
+func Test100ElemsWithDuplicates(t *testing.T) {
+	tcs := map[string]struct {
+		filename       string
+		outputFilename string
+		expectedErr    error
+		expectedOutput []string
+	}{
+		"100 elems with duplicates": {
+			filename:       "testdata/100elems.tsv",
+			expectedOutput: []string{"3", "4", "5", "6", "7", "8", "9", "10", "15", "18", "21", "22", "25", "26", "27", "28", "29", "30", "31", "33", "34", "36", "37", "39", "40", "41", "42", "43", "47", "49", "50", "52", "53", "54", "55", "56", "57", "59", "60", "61", "62", "63", "67", "71", "72", "73", "74", "75", "78", "79", "80", "82", "89", "91", "92", "93", "94", "97", "99"},
+			outputFilename: "testdata/chunks/output.tsv",
+		},
+	}
+
+	for name, tc := range tcs {
+		filename := tc.filename
+		outputFilename := tc.outputFilename
+		expectedOutput := tc.expectedOutput
+		expectedErr := tc.expectedErr
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, false, true)
+			outputFile, err := os.Open(outputFilename)
+			assert.NoError(t, err)
+			outputScanner := bufio.NewScanner(outputFile)
+			count := 0
+			for outputScanner.Scan() {
+				assert.Equal(t, expectedOutput[count], outputScanner.Text())
+				count++
+			}
+			assert.NoError(t, outputScanner.Err())
+			assert.Equal(t, len(expectedOutput), count)
+			assert.True(t, errors.Is(err, expectedErr))
+			outputFile.Close()
+		})
+	}
+}
+
+func Test100ElemsWithHeaders(t *testing.T) {
+	tcs := map[string]struct {
+		filename       string
+		outputFilename string
+		expectedErr    error
+		expectedOutput []string
+	}{
+		"100 elems with headers": {
+			filename:       "testdata/100elemsWithHeaders.tsv",
+			expectedOutput: []string{"headers", "3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"},
+			outputFilename: "testdata/chunks/output.tsv",
+		},
+	}
+
+	for name, tc := range tcs {
+		filename := tc.filename
+		outputFilename := tc.outputFilename
+		expectedOutput := tc.expectedOutput
+		expectedErr := tc.expectedErr
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, true, false)
+			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
+			outputScanner := bufio.NewScanner(outputFile)
+			count := 0
+			for outputScanner.Scan() {
+				assert.Equal(t, expectedOutput[count], outputScanner.Text())
+				count++
+			}
+			assert.NoError(t, outputScanner.Err())
+			assert.Equal(t, len(expectedOutput), count)
+			assert.True(t, errors.Is(err, expectedErr))
+			outputFile.Close()
+		})
+	}
+}
+func Test100ElemsWithHeadersWithDuplicates(t *testing.T) {
+	tcs := map[string]struct {
+		filename       string
+		outputFilename string
+		expectedErr    error
+		expectedOutput []string
+	}{
+		"100 elems with headers and duplicates": {
+			filename:       "testdata/100elemsWithHeaders.tsv",
+			expectedOutput: []string{"headers", "3", "4", "5", "6", "7", "8", "9", "10", "15", "18", "21", "22", "25", "26", "27", "28", "29", "30", "31", "33", "34", "36", "37", "39", "40", "41", "42", "43", "47", "49", "50", "52", "53", "54", "55", "56", "57", "59", "60", "61", "62", "63", "67", "71", "72", "73", "74", "75", "78", "79", "80", "82", "89", "91", "92", "93", "94", "97", "99"},
+			outputFilename: "testdata/chunks/output.tsv",
+		},
+	}
+
+	for name, tc := range tcs {
+		filename := tc.filename
+		outputFilename := tc.outputFilename
+		expectedOutput := tc.expectedOutput
+		expectedErr := tc.expectedErr
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, true, true)
 			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
 			outputScanner := bufio.NewScanner(outputFile)
@@ -176,10 +299,8 @@ func TestTsvKey(t *testing.T) {
 
 			allocate := vector.DefaultVector(func(row interface{}) (key.Key, error) {
 				return key.AllocateTsv(row, 1)
-			}, func(r io.Reader) reader.Reader { return reader.NewSeparatedValues(r, '\t') }, func(w io.Writer) writer.Writer { return writer.NewSeparatedValues(w, '\t') })
-			fI, chunkPaths := prepareChunks(ctx, t, allocate, filename, outputFilename, 21)
-			err := fI.MergeSort(chunkPaths, 10)
-			assert.NoError(t, err)
+			}, func(r io.Reader) (reader.Reader, error) { return reader.NewSeparatedValues(r, '\t'), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewSeparatedValues(w, '\t'), nil })
+			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, false, false)
 			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
 			outputScanner := bufio.NewScanner(outputFile)
diff --git a/reader/gzip_separated_values.go b/reader/gzip_separated_values.go
new file mode 100644
index 0000000..d38430e
--- /dev/null
+++ b/reader/gzip_separated_values.go
@@ -0,0 +1,53 @@
+package reader
+
+import (
+	"compress/gzip"
+	"encoding/csv"
+	"io"
+
+	"github.com/pkg/errors"
+)
+
+type GZipSeparatedValuesReader struct {
+	row []string
+	r   *csv.Reader
+	gr  *gzip.Reader
+	err error
+}
+
+func NewGZipSeparatedValues(r io.Reader, separator rune) (*GZipSeparatedValuesReader, error) {
+	gr, err := gzip.NewReader(r)
+	if err != nil {
+		return nil, errors.Wrap(err, "can't create gzip reader")
+	}
+
+	s := &GZipSeparatedValuesReader{
+		gr: gr,
+		r:  csv.NewReader(gr),
+	}
+	s.r.Comma = separator
+	return s, nil
+}
+
+func (s *GZipSeparatedValuesReader) Next() bool {
+	s.row, s.err = s.r.Read()
+	if errors.Is(s.err, io.EOF) {
+		s.err = nil
+		s.gr.Close()
+		return false
+	}
+	return true
+}
+
+func (s *GZipSeparatedValuesReader) Read() (interface{}, error) {
+	if s.err != nil {
+		return nil, s.err
+	}
+	return s.row, nil
+}
+
+func (s *GZipSeparatedValuesReader) Err() error {
+	return s.err
+}
+
+var _ Reader = &GZipSeparatedValuesReader{}
diff --git a/testdata/100elemsWithHeaders.tsv b/testdata/100elemsWithHeaders.tsv
new file mode 100644
index 0000000..ecfc0b9
--- /dev/null
+++ b/testdata/100elemsWithHeaders.tsv
@@ -0,0 +1,101 @@
+headers
+5
+18
+27
+41
+6
+52
+89
+30
+39
+56
+63
+7
+22
+26
+73
+22
+55
+21
+8
+25
+40
+31
+26
+59
+57
+82
+7
+72
+4
+25
+47
+71
+61
+80
+91
+79
+25
+25
+43
+97
+25
+75
+50
+72
+29
+92
+80
+54
+89
+55
+28
+93
+43
+92
+47
+42
+71
+97
+49
+8
+93
+91
+7
+41
+74
+53
+18
+89
+50
+30
+3
+34
+62
+33
+55
+94
+10
+52
+39
+28
+60
+57
+78
+37
+67
+18
+33
+27
+9
+15
+99
+29
+10
+36
+6
+31
+39
+9
+18
+29
\ No newline at end of file
diff --git a/vector/key/int_key.go b/vector/key/int_key.go
index 7ffb83d..5479894 100644
--- a/vector/key/int_key.go
+++ b/vector/key/int_key.go
@@ -25,3 +25,7 @@ func AllocateInt(row interface{}) (Key, error) {
 func (k *Int) Less(other Key) bool {
 	return k.value < other.(*Int).value
 }
+
+func (k *Int) Equal(other Key) bool {
+	return k.value == other.(*Int).value
+}
diff --git a/vector/key/key.go b/vector/key/key.go
index eb05ce1..2eda041 100644
--- a/vector/key/key.go
+++ b/vector/key/key.go
@@ -1,6 +1,7 @@
 package key
 
 type Key interface {
+	Equal(v2 Key) bool
 	// Less returns wether the key is smaller than v2
 	Less(v2 Key) bool
 }
diff --git a/vector/key/string_key.go b/vector/key/string_key.go
index b98669c..d4452e5 100644
--- a/vector/key/string_key.go
+++ b/vector/key/string_key.go
@@ -14,6 +14,10 @@ func (k *String) Less(other Key) bool {
 	return k.value < other.(*String).value
 }
 
+func (k *String) Equal(other Key) bool {
+	return k.value == other.(*String).value
+}
+
 type UpperString struct {
 	value string
 }
@@ -25,3 +29,6 @@ func AllocateUpperString(line string) (Key, error) {
 func (k *UpperString) Less(other Key) bool {
 	return k.value < other.(*UpperString).value
 }
+func (k *UpperString) Equal(other Key) bool {
+	return k.value == other.(*UpperString).value
+}
diff --git a/vector/slice_vector.go b/vector/slice_vector.go
index c31b0fe..ba52d75 100644
--- a/vector/slice_vector.go
+++ b/vector/slice_vector.go
@@ -41,6 +41,11 @@ func (v *SliceVec) PushBack(row interface{}) error {
 	return nil
 }
 
+func (v *SliceVec) PushFrontNoKey(row interface{}) error {
+	v.s = append([]*Element{{Row: row}}, v.s...)
+	return nil
+}
+
 func (v *SliceVec) Sort() {
 	sort.Slice(v.s, func(i, j int) bool {
 		return Less(v.Get(i), v.Get(j))
diff --git a/vector/vector.go b/vector/vector.go
index 0e8916f..8b4d803 100644
--- a/vector/vector.go
+++ b/vector/vector.go
@@ -12,12 +12,12 @@ import (
 
 type Allocate struct {
 	Vector   func(int, func(row interface{}) (key.Key, error)) Vector
-	FnReader func(r io.Reader) reader.Reader
-	FnWriter func(w io.Writer) writer.Writer
+	FnReader func(r io.Reader) (reader.Reader, error)
+	FnWriter func(w io.Writer) (writer.Writer, error)
 	Key      func(elem interface{}) (key.Key, error)
 }
 
-func DefaultVector(allocateKey func(elem interface{}) (key.Key, error), fnReader func(r io.Reader) reader.Reader, fnWr func(w io.Writer) writer.Writer) *Allocate {
+func DefaultVector(allocateKey func(elem interface{}) (key.Key, error), fnReader func(r io.Reader) (reader.Reader, error), fnWr func(w io.Writer) (writer.Writer, error)) *Allocate {
 	return &Allocate{
 		FnReader: fnReader,
 		FnWriter: fnWr,
@@ -31,6 +31,8 @@ type Vector interface {
 	Get(i int) *Element
 	// PushBack Add item at the end
 	PushBack(row interface{}) error
+	// PushFront Add item at the beginning
+	PushFrontNoKey(row interface{}) error
 	// FrontShift Remove the first element
 	FrontShift()
 	// Len Length of the Vector
@@ -46,7 +48,10 @@ func (a *Allocate) Dump(v Vector, filename string) error {
 	if err != nil {
 		return errors.Errorf("failed creating file: %s", err)
 	}
-	datawriter := a.FnWriter(file)
+	datawriter, err := a.FnWriter(file)
+	if err != nil {
+		return errors.Errorf("failed creating writer: %s", err)
+	}
 	for i := 0; i < v.Len(); i++ {
 		err = datawriter.Write(v.Get(i).Row)
 		if err != nil {

From 6263d66bd5d6a81da5a3af5c066d5baff25ae40c Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Sun, 2 Oct 2022 15:39:17 +0200
Subject: [PATCH 03/16] add gzip

---
 writer/gzip_separated_values.go | 46 +++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 writer/gzip_separated_values.go

diff --git a/writer/gzip_separated_values.go b/writer/gzip_separated_values.go
new file mode 100644
index 0000000..81ffdea
--- /dev/null
+++ b/writer/gzip_separated_values.go
@@ -0,0 +1,46 @@
+package writer
+
+import (
+	"compress/gzip"
+	"encoding/csv"
+	"io"
+
+	"github.com/pkg/errors"
+)
+
+type GZipSeparatedValuesWriter struct {
+	w          *csv.Writer
+	gw         *gzip.Writer
+	withHeader bool
+}
+
+func NewGZipSeparatedValues(w io.Writer, separator rune) (Writer, error) {
+	gw := gzip.NewWriter(w)
+	s := &GZipSeparatedValuesWriter{
+		gw: gw,
+		w:  csv.NewWriter(gw),
+	}
+	s.w.Comma = separator
+	return s, nil
+}
+
+func (s *GZipSeparatedValuesWriter) Write(elem interface{}) error {
+	line, ok := elem.([]string)
+	if !ok {
+		return errors.Errorf("can't converte interface{} to []string: %+v", elem)
+	}
+	err := s.w.Write(line)
+	if err != nil {
+		return errors.Wrap(err, "can't write line")
+	}
+	return nil
+}
+
+func (s *GZipSeparatedValuesWriter) Close() error {
+	defer s.gw.Close()
+	s.w.Flush()
+	if s.w.Error() != nil {
+		return errors.Wrap(s.w.Error(), "can't close writer")
+	}
+	return nil
+}

From 4d9c369d0ba15eeb6e5183a18b04d2282f59c0ea Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Sun, 2 Oct 2022 18:32:54 +0200
Subject: [PATCH 04/16] add shuffle command

---
 Makefile                 |  10 ++-
 file/shuffle.go          | 128 ++++++++++++++++++++++++++++++++
 internal/env.go          |   4 +
 main.go                  |  96 +++++++++++++++++++-----
 main_bench_test.go       |   2 +-
 main_test.go             | 153 +++++++++++++++++++++++++++++++++++++--
 reader/std_scanner.go    |  76 +++++++++++++++++--
 testdata/100elems.tsv.gz | Bin 0 -> 172 bytes
 vector/key/int_key.go    |  24 ++++++
 writer/std_writer.go     |  49 +++++++++++++
 10 files changed, 510 insertions(+), 32 deletions(-)
 create mode 100644 file/shuffle.go
 create mode 100644 testdata/100elems.tsv.gz

diff --git a/Makefile b/Makefile
index acd6107..47dcb1c 100644
--- a/Makefile
+++ b/Makefile
@@ -17,9 +17,13 @@ test:
 test_race:
 	go test -race ./...
 
-.PHONY: run
-run: build
-	./bin/external-sort
+.PHONY: run_sort
+run_sort: build
+	./bin/external-sort sort
+
+.PHONY: run_shuffle
+run_shuffle: build
+	./bin/external-sort shuffle
 
 .PHONY: build
 build:
diff --git a/file/shuffle.go b/file/shuffle.go
new file mode 100644
index 0000000..8cd4db2
--- /dev/null
+++ b/file/shuffle.go
@@ -0,0 +1,128 @@
+package file
+
+import (
+	"context"
+	"io"
+	"math/rand"
+	"path"
+	"strconv"
+	"sync"
+
+	"github.com/askiada/external-sort/file/batchingchannels"
+	"github.com/askiada/external-sort/reader"
+	"github.com/askiada/external-sort/vector"
+	"github.com/askiada/external-sort/vector/key"
+	"github.com/askiada/external-sort/writer"
+	"github.com/pkg/errors"
+)
+
+// CreateSortedChunks Scan a file and divide it into small sorted chunks.
+// Store all the chunks in a folder an returns all the paths.
+func (f *Info) Shuffle(ctx context.Context, chunkFolder string, dumpSize int, maxWorkers int64, k int, seed int64, isGzip bool) ([]string, error) {
+	fn := "scan and shuffle and dump"
+	if dumpSize <= 0 {
+		return nil, errors.Wrap(errors.New("dump size must be greater than 0"), fn)
+	}
+
+	if f.PrintMemUsage && f.mu == nil {
+		f.mu = &MemUsage{}
+	}
+	if f.Allocate != nil {
+		return nil, errors.New("allocate should not be defined when shuffling")
+	}
+	f.Allocate = vector.DefaultVector(
+		func(row interface{}) (key.Key, error) {
+			return key.AllocateIntFromSlice(row, 0)
+		},
+		func(r io.Reader) (reader.Reader, error) {
+			return reader.NewStdScanner(r, isGzip)
+		},
+		func(w io.Writer) (writer.Writer, error) {
+			return writer.NewStdSliceWriter(w, false, isGzip), nil
+		},
+	)
+
+	err := clearChunkFolder(chunkFolder)
+	if err != nil {
+		return nil, errors.Wrap(err, fn)
+	}
+
+	inputReader, err := f.Allocate.FnReader(f.InputReader)
+	if err != nil {
+		return nil, errors.Wrap(err, fn)
+	}
+	countRows := 0
+	chunkPaths := []string{}
+
+	mu := sync.Mutex{}
+	r := rand.New(rand.NewSource(seed))
+	batchChan := batchingchannels.NewBatchingChannel(ctx, f.Allocate, maxWorkers, dumpSize)
+	batchChan.G.Go(func() error {
+		for inputReader.Next() {
+			if f.PrintMemUsage {
+				f.mu.Collect()
+			}
+			row, err := inputReader.Read()
+			if err != nil {
+				return errors.Wrap(err, fn)
+			}
+			if f.WithHeader && f.headers == nil {
+				f.headers = []string{"##!!##", row.(string)}
+			} else {
+				newRow := []string{strconv.FormatInt(r.Int63(), 10), row.(string)}
+				batchChan.In() <- newRow
+			}
+			countRows++
+		}
+		batchChan.Close()
+		if inputReader.Err() != nil {
+			return errors.Wrap(inputReader.Err(), fn)
+		}
+		return nil
+	})
+
+	chunkIdx := 0
+	err = batchChan.ProcessOut(func(v vector.Vector) error {
+		mu.Lock()
+		chunkIdx++
+		chunkPath := path.Join(chunkFolder, "chunk_"+strconv.Itoa(chunkIdx)+".tsv")
+		logger.Infoln("Created chunk", chunkPath)
+		mu.Unlock()
+		v.Sort()
+		if f.WithHeader {
+			err = v.PushFrontNoKey(f.headers)
+			if err != nil {
+				return err
+			}
+		}
+		err := f.Allocate.Dump(v, chunkPath)
+		if err != nil {
+			return err
+		}
+		mu.Lock()
+		chunkPaths = append(chunkPaths, chunkPath)
+		mu.Unlock()
+		return nil
+	})
+	if err != nil {
+		return nil, errors.Wrap(err, fn)
+	}
+	f.totalRows = countRows
+
+	f.Allocate = vector.DefaultVector(
+		func(row interface{}) (key.Key, error) {
+			return key.AllocateIntFromSlice(row, 0)
+		},
+		func(r io.Reader) (reader.Reader, error) {
+			return reader.NewStdSliceScanner(r, isGzip)
+		},
+		func(w io.Writer) (writer.Writer, error) {
+			return writer.NewStdSliceWriter(w, true, isGzip), nil
+		},
+	)
+	err = f.MergeSort(chunkPaths, k, false)
+	if err != nil {
+		return nil, errors.Wrap(err, fn)
+	}
+	return chunkPaths, nil
+}
diff --git a/internal/env.go b/internal/env.go
index fc847b7..8fb0490 100644
--- a/internal/env.go
+++ b/internal/env.go
@@ -19,6 +19,8 @@ const (
 
 	S3RegionName           = "s3_region"
 	S3RetryMaxAttemptsName = "s3_retry_max_attempts"
+
+	IsGzipName = "is_gzip"
 )
 
 // Environment variables.
@@ -34,6 +36,7 @@ var (
 
 	S3Region           string
 	S3RetryMaxAttempts int
+	IsGzip             bool
 )
 
 func init() {
@@ -49,4 +52,5 @@ func init() {
 
 	viper.SetDefault(S3RegionName, "eu-west-1")
 	viper.SetDefault(S3RetryMaxAttemptsName, 10)
+	viper.SetDefault(IsGzipName, false)
 }
diff --git a/main.go b/main.go
index a440bbe..fdda9c4 100644
--- a/main.go
+++ b/main.go
@@ -21,34 +21,60 @@ import (
 
 var logger = logrus.StandardLogger()
 
-func main() {
-	rootCmd := &cobra.Command{
-		Use:   "external-sort",
-		Short: "Perform an external sorting on an input file",
-		RunE:  rootRun,
+type command struct {
+	rootCmd    *cobra.Command
+	sortCmd    *cobra.Command
+	shuffleCmd *cobra.Command
+}
+
+func newCommand() *command {
+	root := &command{
+		rootCmd: &cobra.Command{
+			Use:   "external",
+			Short: "Perform an external task on an input file",
+		},
+		sortCmd: &cobra.Command{
+			Use:   "sort",
+			Short: "Perform an external sorting on an input file",
+			RunE:  sortRun,
+		},
+		shuffleCmd: &cobra.Command{
+			Use:   "shuffle",
+			Short: "Perform an external sorting on an input file",
+			RunE:  shuffleRun,
+		},
 	}
-	rootCmd.PersistentFlags().BoolVarP(&internal.WithHeader, internal.WithHeaderName, "i", viper.GetBool(internal.WithHeaderName), "Input file has headers.")
-	rootCmd.PersistentFlags().StringSliceVarP(&internal.InputFiles, internal.InputFileNames, "i", viper.GetStringSlice(internal.InputFileNames), "input file path.")
-	rootCmd.PersistentFlags().StringVarP(&internal.OutputFile, internal.OutputFileName, "o", viper.GetString(internal.OutputFileName), "output file path.")
-	rootCmd.PersistentFlags().StringVarP(&internal.ChunkFolder, internal.ChunkFolderName, "c", viper.GetString(internal.ChunkFolderName), "chunk folder.")
+	root.rootCmd.PersistentFlags().BoolVarP(&internal.WithHeader, internal.WithHeaderName, "e", viper.GetBool(internal.WithHeaderName), "Input file has headers.")
+	root.rootCmd.PersistentFlags().StringSliceVarP(&internal.InputFiles, internal.InputFileNames, "i", viper.GetStringSlice(internal.InputFileNames), "input file path.")
+	root.rootCmd.PersistentFlags().StringVarP(&internal.OutputFile, internal.OutputFileName, "o", viper.GetString(internal.OutputFileName), "output file path.")
+	root.rootCmd.PersistentFlags().StringVarP(&internal.ChunkFolder, internal.ChunkFolderName, "c", viper.GetString(internal.ChunkFolderName), "chunk folder.")
 
-	rootCmd.PersistentFlags().IntVarP(&internal.ChunkSize, internal.ChunkSizeName, "s", viper.GetInt(internal.ChunkSizeName), "chunk size.")
-	rootCmd.PersistentFlags().Int64VarP(&internal.MaxWorkers, internal.MaxWorkersName, "w", viper.GetInt64(internal.MaxWorkersName), "max worker.")
-	rootCmd.PersistentFlags().IntVarP(&internal.OutputBufferSize, internal.OutputBufferSizeName, "b", viper.GetInt(internal.OutputBufferSizeName), "output buffer size.")
-	rootCmd.PersistentFlags().StringSliceVarP(&internal.TsvFields, internal.TsvFieldsName, "t", viper.GetStringSlice(internal.TsvFieldsName), "")
+	root.rootCmd.PersistentFlags().IntVarP(&internal.ChunkSize, internal.ChunkSizeName, "s", viper.GetInt(internal.ChunkSizeName), "chunk size.")
+	root.rootCmd.PersistentFlags().Int64VarP(&internal.MaxWorkers, internal.MaxWorkersName, "w", viper.GetInt64(internal.MaxWorkersName), "max worker.")
+	root.rootCmd.PersistentFlags().IntVarP(&internal.OutputBufferSize, internal.OutputBufferSizeName, "b", viper.GetInt(internal.OutputBufferSizeName), "output buffer size.")
+	root.sortCmd.PersistentFlags().StringSliceVarP(&internal.TsvFields, internal.TsvFieldsName, "t", viper.GetStringSlice(internal.TsvFieldsName), "")
 
-	rootCmd.Flags().StringVar(&internal.S3Region, internal.S3RegionName, viper.GetString(internal.S3RegionName), "the bucket region")
-	rootCmd.Flags().IntVar(&internal.S3RetryMaxAttempts, internal.S3RetryMaxAttemptsName, viper.GetInt(internal.S3RetryMaxAttemptsName), "the number of retries per S3 request before failing")
+	root.rootCmd.Flags().StringVar(&internal.S3Region, internal.S3RegionName, viper.GetString(internal.S3RegionName), "the bucket region")
+	root.rootCmd.Flags().IntVar(&internal.S3RetryMaxAttempts, internal.S3RetryMaxAttemptsName, viper.GetInt(internal.S3RetryMaxAttemptsName), "the number of retries per S3 request before failing")
+
+	root.shuffleCmd.PersistentFlags().BoolVarP(&internal.IsGzip, internal.IsGzipName, "t", viper.GetBool(internal.IsGzipName), "")
 
 	logger.Infoln("Input files", internal.InputFiles)
 	logger.Infoln("With header", internal.WithHeader)
 	logger.Infoln("Output file", internal.OutputFile)
 	logger.Infoln("Chunk folder", internal.ChunkFolder)
 	logger.Infoln("TSV Fields", internal.TsvFields)
-	cobra.CheckErr(rootCmd.Execute())
+
+	root.rootCmd.AddCommand(root.sortCmd, root.shuffleCmd)
+	return root
 }
 
-func rootRun(cmd *cobra.Command, args []string) error {
+func main() {
+	root := newCommand()
+	cobra.CheckErr(root.rootCmd.Execute())
+}
+
+func sortRun(cmd *cobra.Command, args []string) error {
 	start := time.Now()
 	ctx := context.Background()
 	i := rw.NewInputOutput(ctx)
@@ -104,3 +130,39 @@ func rootRun(cmd *cobra.Command, args []string) error {
 	}
 	return nil
 }
+
+func shuffleRun(cmd *cobra.Command, args []string) error {
+	start := time.Now()
+	ctx := context.Background()
+	i := rw.NewInputOutput(ctx)
+	err := i.SetInputReader(ctx, internal.InputFiles...)
+	if err != nil {
+		return err
+	}
+	err = i.SetOutputWriter(ctx, internal.OutputFile)
+	if err != nil {
+		return err
+	}
+
+	fI := &file.Info{
+		WithHeader:    internal.WithHeader,
+		InputReader:   i.Input,
+		OutputFile:    i.Output,
+		PrintMemUsage: false,
+	}
+	i.Do(func() error {
+		// create small files with maximum 30 rows in each
+		_, err := fI.Shuffle(context.Background(), internal.ChunkFolder, internal.ChunkSize, internal.MaxWorkers, internal.OutputBufferSize, time.Now().Unix(), internal.IsGzip)
+		if err != nil {
+			return errors.Wrap(err, "can't create shuflled chunks")
+		}
+		elapsed := time.Since(start)
+		logger.Infoln("It took", elapsed)
+		return nil
+	})
+	err = i.Err()
+	if err != nil {
+		return errors.Wrap(err, "can't finish")
+	}
+	return nil
+}
diff --git a/main_bench_test.go b/main_bench_test.go
index 47a36a5..0b686c1 100644
--- a/main_bench_test.go
+++ b/main_bench_test.go
@@ -28,7 +28,7 @@ func BenchmarkMergeSort(b *testing.B) {
 	bufferSize := 5000
 	fI := &file.Info{
 		InputReader: i.Input,
-		Allocate:    vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil }),
+		Allocate:    vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil }),
 		OutputFile:  i.Output,
 	}
 	i.Do(func() (err error) {
diff --git a/main_test.go b/main_test.go
index b8669bf..63c755a 100644
--- a/main_test.go
+++ b/main_test.go
@@ -90,7 +90,7 @@ func TestBasics(t *testing.T) {
 				t.Run(name+"_"+strconv.Itoa(chunkSize)+"_"+strconv.Itoa(bufferSize), func(t *testing.T) {
 					ctx := context.Background()
 
-					allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+					allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
 					prepareChunks(ctx, t, allocate, filename, outputFilename, chunkSize, true, bufferSize, false, false)
 
 					outputFile, err := os.Open(outputFilename)
@@ -132,7 +132,7 @@ func Test100Elems(t *testing.T) {
 		expectedErr := tc.expectedErr
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
-			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
 			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, false, false)
 			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
@@ -171,7 +171,7 @@ func Test100ElemsWithDuplicates(t *testing.T) {
 		expectedErr := tc.expectedErr
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
-			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
 			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, false, true)
 			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
@@ -210,7 +210,7 @@ func Test100ElemsWithHeaders(t *testing.T) {
 		expectedErr := tc.expectedErr
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
-			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
 			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, true, false)
 			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
@@ -248,7 +248,7 @@ func Test100ElemsWithHeadersWithDuplicates(t *testing.T) {
 		expectedErr := tc.expectedErr
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
-			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
 			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, true, true)
 			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
@@ -316,3 +316,146 @@ func TestTsvKey(t *testing.T) {
 		})
 	}
 }
+func prepareChunksShuffle(ctx context.Context, t *testing.T, filename, outputFilename string, chunkSize int, mergeSort bool, bufferSize int, withHeaders bool, dropDuplicates, isGzip bool) *file.Info {
+	t.Helper()
+	i := rw.NewInputOutput(ctx)
+	err := i.SetInputReader(ctx, filename)
+	assert.NoError(t, err)
+	err = i.SetOutputWriter(ctx, outputFilename)
+	assert.NoError(t, err)
+	fI := &file.Info{
+		InputReader: i.Input,
+		OutputFile:  i.Output,
+		WithHeader:  withHeaders,
+	}
+	i.Do(func() (err error) {
+		_, err = fI.Shuffle(ctx, "testdata/chunks", chunkSize, 10, bufferSize, 13, isGzip)
+		assert.NoError(t, err)
+		return nil
+	})
+	err = i.Err()
+	assert.NoError(t, err)
+	t.Cleanup(func() {
+		dir, err := os.ReadDir("testdata/chunks")
+		assert.NoError(t, err)
+		for _, d := range dir {
+			err = os.RemoveAll(path.Join("testdata/chunks", d.Name()))
+			assert.NoError(t, err)
+		}
+	})
+
+	return fI
+}
+func Test100ElemsShuffle(t *testing.T) {
+	tcs := map[string]struct {
+		filename       string
+		outputFilename string
+		expectedErr    error
+		expectedOutput []string
+	}{
+		"100 elems": {
+			filename:       "testdata/100elems.tsv",
+			expectedOutput: []string{"3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"},
+			outputFilename: "testdata/chunks/output.tsv",
+		},
+	}
+
+	for name, tc := range tcs {
+		filename := tc.filename
+		outputFilename := tc.outputFilename
+		expectedOutput := tc.expectedOutput
+		expectedErr := tc.expectedErr
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			prepareChunksShuffle(ctx, t, filename, outputFilename, 21, false, 10, false, false, false)
+			outputFile, err := os.Open(outputFilename)
+			assert.NoError(t, err)
+			outputScanner := bufio.NewScanner(outputFile)
+			count := 0
+			for outputScanner.Scan() {
+				assert.Equal(t, expectedOutput[count], outputScanner.Text())
+				count++
+			}
+			assert.NoError(t, outputScanner.Err())
+			assert.Equal(t, len(expectedOutput), count)
+			assert.True(t, errors.Is(err, expectedErr))
+			outputFile.Close()
+		})
+	}
+}
+
+func Test100ElemsShuffleWithHeaders(t *testing.T) {
+	tcs := map[string]struct {
+		filename       string
+		outputFilename string
+		expectedErr    error
+		expectedOutput []string
+	}{
+		"100 elems with headers": {
+			filename:       "testdata/100elemsWithHeaders.tsv",
+			expectedOutput: []string{"headers", "3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"},
+			outputFilename: "testdata/chunks/output.tsv",
+		},
+	}
+
+	for name, tc := range tcs {
+		filename := tc.filename
+		outputFilename := tc.outputFilename
+		expectedOutput := tc.expectedOutput
+		expectedErr := tc.expectedErr
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			prepareChunksShuffle(ctx, t, filename, outputFilename, 21, false, 10, true, false, false)
+			outputFile, err := os.Open(outputFilename)
+			assert.NoError(t, err)
+			outputScanner := bufio.NewScanner(outputFile)
+			count := 0
+			for outputScanner.Scan() {
+				assert.Equal(t, expectedOutput[count], outputScanner.Text())
+				count++
+			}
+			assert.NoError(t, outputScanner.Err())
+			assert.Equal(t, len(expectedOutput), count)
+			assert.True(t, errors.Is(err, expectedErr))
+			outputFile.Close()
+		})
+	}
+}
+
+func Test100ElemsShuffleGzip(t *testing.T) {
+	tcs := map[string]struct {
+		filename       string
+		outputFilename string
+		expectedErr    error
+		expectedOutput []string
+	}{
+		"100 elems with headers": {
+			filename:       "testdata/100elems.tsv.gz",
+			expectedOutput: []string{"headers", "3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"},
+			outputFilename: "testdata/chunks/output.tsv.gz",
+		},
+	}
+
+	for name, tc := range tcs {
+		filename := tc.filename
+		outputFilename := tc.outputFilename
+		expectedOutput := tc.expectedOutput
+		expectedErr := tc.expectedErr
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			prepareChunksShuffle(ctx, t, filename, outputFilename, 21, false, 10, true, false, true)
+			outputFile, err := os.Open(outputFilename)
+			assert.NoError(t, err)
+			outputScanner := bufio.NewScanner(outputFile)
+			count := 0
+			for outputScanner.Scan() {
+				assert.Equal(t, expectedOutput[count], outputScanner.Text())
+				count++
+			}
+			assert.NoError(t, outputScanner.Err())
+			assert.Equal(t, len(expectedOutput), count)
+			assert.True(t, errors.Is(err, expectedErr))
+			outputFile.Close()
+		})
+	}
+}
diff --git a/reader/std_scanner.go b/reader/std_scanner.go
index 7610ffd..4c1fb22 100644
--- a/reader/std_scanner.go
+++ b/reader/std_scanner.go
@@ -2,22 +2,45 @@ package reader
 
 import (
 	"bufio"
+	"compress/gzip"
 	"io"
+	"strings"
+
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
 )
 
+var logger = logrus.StandardLogger()
+
 type StdScanner struct {
-	r *bufio.Scanner
+	r  *bufio.Scanner
+	gr *gzip.Reader
 }
 
-func NewStdScanner(r io.Reader) Reader {
-	s := &StdScanner{
-		r: bufio.NewScanner(r),
+func NewStdScanner(r io.Reader, isGzip bool) (Reader, error) {
+	var newR *bufio.Scanner
+	s := &StdScanner{}
+	if isGzip {
+		gr, err := gzip.NewReader(r)
+		if err != nil {
+			return nil, errors.Wrap(err, "can't create gzip reader")
+		}
+		s.gr = gr
+		newR = bufio.NewScanner(gr)
+	} else {
+		newR = bufio.NewScanner(r)
 	}
-	return s
+	s.r = newR
+	logger.Infoln("Created standard scanner")
+	return s, nil
 }
 
 func (s *StdScanner) Next() bool {
-	return s.r.Scan()
+	next := s.r.Scan()
+	if !next && s.gr != nil {
+		s.gr.Close()
+	}
+	return next
 }
 func (s *StdScanner) Read() (interface{}, error) {
 	return s.r.Text(), nil
@@ -25,3 +48,44 @@ func (s *StdScanner) Read() (interface{}, error) {
 func (s *StdScanner) Err() error {
 	return s.r.Err()
 }
+
+type StdSliceScanner struct {
+	r  *bufio.Scanner
+	gr *gzip.Reader
+}
+
+func NewStdSliceScanner(r io.Reader, isGzip bool) (Reader, error) {
+	var newR *bufio.Scanner
+	s := &StdSliceScanner{}
+	if isGzip {
+		gr, err := gzip.NewReader(r)
+		if err != nil {
+			return nil, errors.Wrap(err, "can't create gzip reader")
+		}
+		s.gr = gr
+		newR = bufio.NewScanner(gr)
+	} else {
+		newR = bufio.NewScanner(r)
+	}
+	s.r = newR
+	return s, nil
+}
+
+func (s *StdSliceScanner) Next() bool {
+	next := s.r.Scan()
+	if !next && s.gr != nil {
+		s.gr.Close()
+	}
+	return next
+}
+func (s *StdSliceScanner) Read() (interface{}, error) {
+	line := s.r.Text()
+	before, after, found := strings.Cut(line, "##!!##")
+	if !found {
+		return nil, errors.New("can't cut row")
+	}
+	return []string{before, after}, nil
+}
+func (s *StdSliceScanner) Err() error {
+	return s.r.Err()
+}
diff --git a/testdata/100elems.tsv.gz b/testdata/100elems.tsv.gz
new file mode 100644
index 0000000000000000000000000000000000000000..cf1b6ad830475fb70fb7d76329fe7287f43bfe61
GIT binary patch
literal 172
zcmV;d08{@TiwFp-4me`~12Hf#Wo%__b1rmqb^sla%M}1I2m<d@P2xB9-`L9Dd^!+F
zav#>jrAXrqe()iSavC$NXxEU5U6xN`p{q+S+B{pdjreanuHBM{x%6nMy)&($lRUz0
z7!#JEm*xK$|4EZin5<S6!+(*V!gi3Z?2?^7Gai)TE}seRQK7A}o^_P6AKp|;x7gMm
a?y{pGi~Dv~t<}u`FY^Hl^7IEE0RRBV)KT{U

literal 0
HcmV?d00001

diff --git a/vector/key/int_key.go b/vector/key/int_key.go
index 5479894..ee07b80 100644
--- a/vector/key/int_key.go
+++ b/vector/key/int_key.go
@@ -29,3 +29,27 @@ func (k *Int) Less(other Key) bool {
 func (k *Int) Equal(other Key) bool {
 	return k.value == other.(*Int).value
 }
+
+type IntFromSlice struct {
+	value int64
+}
+
+func AllocateIntFromSlice(row interface{}, intIndex int) (Key, error) {
+	line, ok := row.([]string)
+	if !ok {
+		return nil, errors.Errorf("can't convert interface{} to []string: %+v", row)
+	}
+	num, err := strconv.ParseInt(line[intIndex], 10, 64)
+	if err != nil {
+		return nil, err
+	}
+	return &IntFromSlice{num}, nil
+}
+
+func (k *IntFromSlice) Less(other Key) bool {
+	return k.value < other.(*IntFromSlice).value
+}
+
+func (k *IntFromSlice) Equal(other Key) bool {
+	return k.value == other.(*IntFromSlice).value
+}
diff --git a/writer/std_writer.go b/writer/std_writer.go
index a339a10..2f36187 100644
--- a/writer/std_writer.go
+++ b/writer/std_writer.go
@@ -2,7 +2,9 @@ package writer
 
 import (
 	"bufio"
+	"compress/gzip"
 	"io"
+	"strings"
 
 	"github.com/pkg/errors"
 )
@@ -37,3 +39,50 @@ func (w *StdWriter) Close() error {
 	}
 	return nil
 }
+
+type StdSliceWriter struct {
+	skipFirst bool
+	w         *bufio.Writer
+	gw        *gzip.Writer
+}
+
+func NewStdSliceWriter(w io.Writer, skipFirst, isGzip bool) Writer {
+	var newR *bufio.Writer
+	s := &StdSliceWriter{
+		skipFirst: skipFirst,
+	}
+	if isGzip {
+		s.gw = gzip.NewWriter(w)
+		newR = bufio.NewWriter(s.gw)
+	} else {
+		newR = bufio.NewWriter(w)
+	}
+	s.w = newR
+	return s
+}
+
+func (w *StdSliceWriter) Write(elem interface{}) error {
+	line, ok := elem.([]string)
+	if !ok {
+		return errors.Errorf("can't converte interface{} to string: %+v", elem)
+	}
+	if w.skipFirst {
+		line = line[1:]
+	}
+	_, err := w.w.WriteString(strings.Join(line, "##!!##") + "\n")
+	if err != nil {
+		return errors.Wrap(err, "can't write string")
+	}
+	return err
+}
+
+func (w *StdSliceWriter) Close() error {
+	if w.gw != nil {
+		defer w.gw.Close()
+	}
+	err := w.w.Flush()
+	if err != nil {
+		return errors.Wrap(err, "can't close writer")
+	}
+	return nil
+}

From 41b2dfdc35c459c4455aea25944b0b0f5bdb2fab Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Sun, 2 Oct 2022 18:34:05 +0200
Subject: [PATCH 05/16] add env to shuffle

---
 env.list      | 16 +++++++++++-----
 env_sort.list | 13 +++++++++++++
 2 files changed, 24 insertions(+), 5 deletions(-)
 create mode 100644 env_sort.list

diff --git a/env.list b/env.list
index cbbeaa3..8045cc0 100644
--- a/env.list
+++ b/env.list
@@ -1,7 +1,13 @@
-INPUT_PATH=/Users/alex/Desktop/Projects/Blokur/Repo/external-sort/rec_sample.tsv
-OUTPUT_PATH=./output.tsv
+INPUT_PATHS=./output.tsv.gz
+OUTPUT_PATH=./output_shuffled.tsv.gz
 CHUNK_FOLDER=./data/chunks/
 CHUNK_SIZE=1000000
-MAX_WORKERS=10
-OUTPUT_BUFFER_SIZE=1000
-TSV_FIELDS=2 4
\ No newline at end of file
+MAX_WORKERS=40
+OUTPUT_BUFFER_SIZE=1000000
+IS_GZIP=true
+
+S3_REGION=eu-west-1
+S3_BUCKET=blokur-data
+S3_RETRY_MAX_ATTEMPTS=10
+
+WITH_HEADER=true
\ No newline at end of file
diff --git a/env_sort.list b/env_sort.list
new file mode 100644
index 0000000..55d2dc6
--- /dev/null
+++ b/env_sort.list
@@ -0,0 +1,13 @@
+INPUT_PATHS=s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.0.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.1.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.2.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.3.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.4.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.5.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.6.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.7.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.8.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.9.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.10.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.11.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.12.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.13.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.14.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.15.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.16.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.17.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.18.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.19.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.20.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.21.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.22.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.23.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.24.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.25.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.26.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.27.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.28.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.29.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.30.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.31.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.32.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.33.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.34.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.35.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.36.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.37.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.38.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.39.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.40.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.41.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.42.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.43.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.44.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.45.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.46.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.47.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.48.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.49.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.50.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.51.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.52.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.53.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.54.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.55.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.56.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.57.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.58.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.59.tsv.gz
+OUTPUT_PATH=./output.tsv.gz
+CHUNK_FOLDER=./data/chunks/
+CHUNK_SIZE=1000000
+MAX_WORKERS=30
+OUTPUT_BUFFER_SIZE=100000
+TSV_FIELDS=1
+
+S3_REGION=eu-west-1
+S3_BUCKET=blokur-data
+S3_RETRY_MAX_ATTEMPTS=10
+
+WITH_HEADER=true
\ No newline at end of file

From 9ba122bd6a07a470dbfa1b5f234f27f67ecd0fa7 Mon Sep 17 00:00:00 2001
From: Skiada Alexandre <25521495+askiada@users.noreply.github.com>
Date: Tue, 4 Oct 2022 14:56:25 +0100
Subject: [PATCH 06/16] repair incorrect headers chunk

---
 file/chunk.go                   |  5 +++++
 main.go                         | 17 +++++++++++------
 writer/gzip_separated_values.go |  5 ++---
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/file/chunk.go b/file/chunk.go
index 380a0cf..0d908fb 100644
--- a/file/chunk.go
+++ b/file/chunk.go
@@ -51,6 +51,11 @@ func (c *chunks) new(chunkPath string, allocate *vector.Allocate, size int, with
 	if err != nil {
 		return err
 	}
+
+	if withHeader {
+		reader.Next()
+	}
+
 	elem := &chunkInfo{
 		filename: chunkPath,
 		file:     f,
diff --git a/main.go b/main.go
index fdda9c4..9b37f11 100644
--- a/main.go
+++ b/main.go
@@ -59,12 +59,6 @@ func newCommand() *command {
 
 	root.shuffleCmd.PersistentFlags().BoolVarP(&internal.IsGzip, internal.IsGzipName, "t", viper.GetBool(internal.IsGzipName), "")
 
-	logger.Infoln("Input files", internal.InputFiles)
-	logger.Infoln("With header", internal.WithHeader)
-	logger.Infoln("Output file", internal.OutputFile)
-	logger.Infoln("Chunk folder", internal.ChunkFolder)
-	logger.Infoln("TSV Fields", internal.TsvFields)
-
 	root.rootCmd.AddCommand(root.sortCmd, root.shuffleCmd)
 	return root
 }
@@ -75,6 +69,12 @@ func main() {
 }
 
 func sortRun(cmd *cobra.Command, args []string) error {
+	logger.Infoln("Input files", internal.InputFiles)
+	logger.Infoln("With header", internal.WithHeader)
+	logger.Infoln("Output file", internal.OutputFile)
+	logger.Infoln("Chunk folder", internal.ChunkFolder)
+	logger.Infoln("TSV Fields", internal.TsvFields)
+
 	start := time.Now()
 	ctx := context.Background()
 	i := rw.NewInputOutput(ctx)
@@ -132,6 +132,11 @@ func sortRun(cmd *cobra.Command, args []string) error {
 }
 
 func shuffleRun(cmd *cobra.Command, args []string) error {
+	logger.Infoln("Input files", internal.InputFiles)
+	logger.Infoln("With header", internal.WithHeader)
+	logger.Infoln("Output file", internal.OutputFile)
+	logger.Infoln("Chunk folder", internal.ChunkFolder)
+	logger.Infoln("GZip file", internal.IsGzip)
 	start := time.Now()
 	ctx := context.Background()
 	i := rw.NewInputOutput(ctx)
diff --git a/writer/gzip_separated_values.go b/writer/gzip_separated_values.go
index 81ffdea..63e6064 100644
--- a/writer/gzip_separated_values.go
+++ b/writer/gzip_separated_values.go
@@ -9,9 +9,8 @@ import (
 )
 
 type GZipSeparatedValuesWriter struct {
-	w          *csv.Writer
-	gw         *gzip.Writer
-	withHeader bool
+	w  *csv.Writer
+	gw *gzip.Writer
 }
 
 func NewGZipSeparatedValues(w io.Writer, separator rune) (Writer, error) {

From 6699985892f84f27bb2c92dfeeefe93111049117 Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Mon, 8 May 2023 04:16:17 +0200
Subject: [PATCH 07/16] lint

---
 .golangci.yml                                 | 235 +++++++-----
 bucket/contract.go                            |   4 +-
 bucket/errors.go                              |   6 +-
 bucket/s3.go                                  |  45 ++-
 file/batchingchannels/batching_channel.go     |  54 ++-
 .../batchingchannels/batching_channel_test.go |  74 ++--
 file/chunk.go                                 |  20 +-
 file/file.go                                  |  18 +-
 file/shuffle.go                               |   2 +-
 go.mod                                        |  88 ++---
 go.sum                                        | 347 ++++++------------
 internal/env.go                               |   4 +-
 internal/rw/rw.go                             |   8 +-
 main.go                                       |   2 +-
 main_test.go                                  |  34 +-
 reader/gzip_separated_values_test.go          |  48 +++
 reader/std_scanner.go                         |   4 +
 vector/key/string_key.go                      |   1 +
 18 files changed, 509 insertions(+), 485 deletions(-)
 create mode 100644 reader/gzip_separated_values_test.go

diff --git a/.golangci.yml b/.golangci.yml
index fc414e3..e5f2e1a 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -1,106 +1,153 @@
 linters-settings:
-    govet:
-        settings:
-            printf:
-                funcs:
-                    - (github.com/golangci/golangci-lint/pkg/logutils.Log).Infof
-                    - (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf
-                    - (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf
-                    - (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf
-        enable:
-            - fieldalignment
-    # golint:
-    # min-confidence: 0
-    gocyclo:
-        min-complexity: 15
-    maligned:
-        suggest-new: true
-    goconst:
-        min-len: 2
-        min-occurrences: 2
-    # misspell: # disabled as it was breaking interfaces with FinaliZe
-    #   locale: UK
-    lll:
-        line-length: 140
-    goimports:
-        local-prefixes: github.com/golangci/golangci-lint
-    gocritic:
-        enabled-tags:
-            - diagnostic
-            - experimental
-            - opinionated
-            - performance
-            - style
-    funlen:
-        lines: 100
-        statements: 50
-    godot:
-        capital: true
-    unparam:
-        check-exported: true
+  funlen:
+    lines: 80
+    statements: 50
+  goconst:
+    min-len: 2
+    min-occurrences: 2
+  gocritic:
+    enabled-tags:
+      - diagnostic
+      - experimental
+      - opinionated
+      - performance
+      - style
+  gocyclo:
+    min-complexity: 15
+  godot:
+    capital: true
+  goimports:
+    local-prefixes: github.com/golangci/golangci-lint
+  govet:
+    settings:
+      printf:
+        funcs:
+          - (github.com/golangci/golangci-lint/pkg/logutils.Log).Infof
+          - (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf
+          - (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf
+          - (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf
+    enable:
+      - fieldalignment
+  maligned:
+    suggest-new: true
+  misspell:
+    locale: UK
+  lll:
+    line-length: 140
+  unparam:
+    check-exported: true
 
 issues:
-    # Excluding configuration per-path, per-linter, per-text and per-source
-    exclude-rules:
-        - path: _test\.go
-          linters:
-              - gosec # security check is not impoerant in tests
-              - dupl # we usualy duplicate code in tests
-              - bodyclose
-              - unparam
-    fix: true
+  # Excluding configuration per-path, per-linter, per-text and per-source
+  exclude-rules:
+    - path: _test\.go
+      linters:
+        - gosec # security check is not important in tests
+        - dupl  # we usually duplicate code in tests
+        - bodyclose
+        - unparam
+        - errcheck
+        - govet
+        - gocritic
+        - goconst
+        - forcetypeassert
+        - wrapcheck
+  fix: true
+  exclude-use-default: false
 
 run:
-    skip-dirs:
-        - model
-        - tmp
-        - bin
-        - scripts
+  skip-dirs:
+    - tmp
+    - bin
+    - scripts
 
-    tests: true
-    build-tags:
-        - integration
+  tests: true
+  build-tags:
+    - integration
 
 linters:
-    disable-all: true
-    fast: true
-    enable:
-        - asciicheck
-        - bodyclose
-        - deadcode
-        - dogsled
-        - depguard
-        - dupl
-        - errorlint
-        - gocognit
-        - goconst
-        - gocritic
-        - gocyclo
-        - godot
-        - godox
-        - golint
-        - goprintffuncname
-        - gosec
-        - gosimple
-        - govet
-        # - misspell # disabled as it was breaking interfaces with FinaliZe
-        - nakedret
-        - nestif
-        - prealloc
-        - rowserrcheck
-        - scopelint
-        - staticcheck
-        - stylecheck
-        - unconvert
-        # - unparam # Too many false positives on Task interface implementation.
-        - unused
-        - whitespace
-        # - wrapcheck
-        - tparallel
+  disable-all: true
+  fast: true
+  enable:
+    - asciicheck
+    - bidichk
+    - bodyclose
+    - bodyclose
+    - containedctx
+    - contextcheck
+    - cyclop
+    - decorder
+    - depguard
+    - dogsled
+    - dupl
+    - durationcheck
+    - errcheck
+    - errchkjson
+    - errname
+    - errorlint
+    - exhaustive
+    - exportloopref
+    - forbidigo
+    - forcetypeassert
+    - funlen
+    - gocognit
+    - goconst
+    - gocritic
+    - gocyclo
+    - godot
+    - godox
+    - gofmt
+    - gofumpt
+    - goheader
+    - goimports
+    - gomnd
+    - gomoddirectives
+    - gomodguard
+    - goprintffuncname
+    - gosec
+    - gosec
+    - gosimple
+    - gosimple
+    - govet
+    - govet
+    - grouper
+    - importas
+    - ineffassign
+    - ireturn
+    - lll
+    - maintidx
+    - makezero
+    - misspell
+    - nakedret
+    - nestif
+    - nilerr
+    - nilnil
+    - nlreturn
+    - noctx
+    - nolintlint
+    - prealloc
+    - predeclared
+    - promlinter
+    - revive
+    - rowserrcheck
+    - sqlclosecheck
+    - staticcheck
+    - staticcheck
+    - stylecheck
+    - tagliatelle
+    - tenv
+    - thelper
+    - tparallel
+    - typecheck
+    - unconvert
+    - unparam
+    - unused
+    - varnamelen
+    - wastedassign
+    - whitespace
+    - wrapcheck
 
 # golangci.com configuration
 # https://github.com/golangci/golangci/wiki/Configuration
 service:
-    golangci-lint-version: 1.38.x
-    prepare:
-        - echo "here I can run custom commands, but no preparation needed for this repo"
+  golangci-lint-version: 1.52.x
diff --git a/bucket/contract.go b/bucket/contract.go
index e8f8450..9c51477 100644
--- a/bucket/contract.go
+++ b/bucket/contract.go
@@ -31,10 +31,12 @@ func PartBodyMaxRetries(r int) ConfigFunc {
 	}
 }
 
+const mbConversion = 1024 * 1024
+
 // Buffer is the amount of memory in MB to use for buffering the data.
 func Buffer(buffer int) ConfigFunc {
 	return func(s *S3) {
-		s.bufferLen = buffer * 1024 * 1024
+		s.bufferLen = buffer * mbConversion
 	}
 }
 
diff --git a/bucket/errors.go b/bucket/errors.go
index 63b153e..7347995 100644
--- a/bucket/errors.go
+++ b/bucket/errors.go
@@ -2,7 +2,5 @@ package bucket
 
 import "errors"
 
-var (
-	// ErrInvalidInput is returned when the input is invalid.
-	ErrInvalidInput = errors.New("invalid input")
-)
+// ErrInvalidInput is returned when the input is invalid.
+var ErrInvalidInput = errors.New("invalid input")
diff --git a/bucket/s3.go b/bucket/s3.go
index 6183d60..d6cf190 100644
--- a/bucket/s3.go
+++ b/bucket/s3.go
@@ -25,48 +25,56 @@ type S3 struct {
 	partBodyMaxRetries int
 }
 
+const (
+	defaultBufferLen          = 1024
+	defaultMaxRetries         = 10
+	defaultPartBodyMaxRetries = 3
+)
+
 // New returns an instance of the S3 struct.
 func New(ctx context.Context, cfg ...ConfigFunc) (*S3, error) {
-	s := &S3{
+	s3Val := &S3{
 		region:             "eu-west-1",
-		bufferLen:          1024,
-		maxRetries:         10,
-		partBodyMaxRetries: 3,
+		bufferLen:          defaultBufferLen,
+		maxRetries:         defaultMaxRetries,
+		partBodyMaxRetries: defaultPartBodyMaxRetries,
 	}
 	for _, c := range cfg {
-		c(s)
+		c(s3Val)
 	}
 
-	if s.region == "" {
+	if s3Val.region == "" {
 		return nil, errors.Wrap(ErrInvalidInput, "region")
 	}
-	if s.bufferLen <= 0 {
+	if s3Val.bufferLen <= 0 {
 		return nil, errors.Wrap(ErrInvalidInput, "buffer length")
 	}
-	if s.s3Client == nil {
+	if s3Val.s3Client == nil {
 		cfg, err := config.LoadDefaultConfig(ctx,
-			config.WithRegion(s.region),
-			config.WithRetryMaxAttempts(s.maxRetries),
+			config.WithRegion(s3Val.region),
+			config.WithRetryMaxAttempts(s3Val.maxRetries),
 		)
 		if err != nil {
 			return nil, errors.New("can't create aws config")
 		}
-		s.s3Client = s3.NewFromConfig(cfg)
+		s3Val.s3Client = s3.NewFromConfig(cfg)
 	}
-	return s, nil
+
+	return s3Val, nil
 }
 
 // Upload reads from the reader and uploads it to the S3 bucket with the
 // filename key.
-func (s *S3) Upload(ctx context.Context, r io.Reader, bucket string, key string) error {
+func (s *S3) Upload(ctx context.Context, reader io.Reader, bucket, key string) error {
 	uploader := manager.NewUploader(s.s3Client, func(u *manager.Uploader) {
 		u.BufferProvider = manager.NewBufferedReadSeekerWriteToPool(s.bufferLen)
 	})
 	_, err := uploader.Upload(ctx, &s3.PutObjectInput{
 		Bucket: aws.String(bucket),
 		Key:    aws.String(key),
-		Body:   r,
+		Body:   reader,
 	})
+
 	return errors.Wrap(err, "upload failed")
 }
 
@@ -75,7 +83,7 @@ type seqWriterAt struct {
 	progressFunc func(n int)
 }
 
-func (s *seqWriterAt) WriteAt(p []byte, offset int64) (n int, err error) {
+func (s *seqWriterAt) WriteAt(p []byte, _ int64) (n int, err error) {
 	n, err = s.w.Write(p)
 	if s.progressFunc != nil {
 		s.progressFunc(n)
@@ -83,21 +91,22 @@ func (s *seqWriterAt) WriteAt(p []byte, offset int64) (n int, err error) {
 	return n, errors.Wrap(err, "can't write bytes at offset")
 }
 
-type DownloadFileInfo struct {
+// S3FileInfo describe the path to a file on S3.
+type S3FileInfo struct {
 	Bucket string
 	Key    string
 }
 
 // Download downloads the file from the S3 bucket with the filename key and
 // writes it to the writer.
-func (s *S3) Download(ctx context.Context, w io.Writer, filesinfo ...*DownloadFileInfo) error {
+func (s *S3) Download(ctx context.Context, writer io.Writer, filesinfo ...*S3FileInfo) error {
 	downloader := manager.NewDownloader(s.s3Client, func(d *manager.Downloader) {
 		d.PartBodyMaxRetries = s.partBodyMaxRetries
 		d.PartSize = int64(s.bufferLen)
 		// we need to force this to be a sequential download.
 		d.Concurrency = 1
 	})
-	ww := &seqWriterAt{w, nil}
+	ww := &seqWriterAt{writer, nil}
 	for _, fileinfo := range filesinfo {
 		_, err := downloader.Download(ctx, ww, &s3.GetObjectInput{
 			Bucket: aws.String(fileinfo.Bucket),
diff --git a/file/batchingchannels/batching_channel.go b/file/batchingchannels/batching_channel.go
index 2f575cb..24246a8 100644
--- a/file/batchingchannels/batching_channel.go
+++ b/file/batchingchannels/batching_channel.go
@@ -4,45 +4,44 @@ import (
 	"context"
 
 	"github.com/askiada/external-sort/vector"
+	"github.com/pkg/errors"
 	"golang.org/x/sync/errgroup"
-	"golang.org/x/sync/semaphore"
 )
 
 // BatchingChannel implements the Channel interface, with the change that instead of producing individual elements
 // on Out(), it batches together the entire internal buffer each time. Trying to construct an unbuffered batching channel
 // will panic, that configuration is not supported (and provides no benefit over an unbuffered NativeChannel).
 type BatchingChannel struct {
-	input     chan interface{}
-	output    chan vector.Vector
-	buffer    vector.Vector
-	allocate  *vector.Allocate
-	G         *errgroup.Group
-	sem       *semaphore.Weighted
-	dCtx      context.Context
-	size      int
-	maxWorker int64
+	input           chan interface{}
+	output          chan vector.Vector
+	buffer          vector.Vector
+	allocate        *vector.Allocate
+	G               *errgroup.Group
+	internalContext context.Context //nolint //containedcontext
+	size            int
+	maxWorker       int
 }
 
-func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorker int64, size int) *BatchingChannel {
+func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorker, size int) *BatchingChannel {
 	if size == 0 {
 		panic("channels: BatchingChannel does not support unbuffered behaviour")
 	}
 	if size < 0 {
 		panic("channels: invalid negative size in NewBatchingChannel")
 	}
-	g, dCtx := errgroup.WithContext(ctx)
-	ch := &BatchingChannel{
-		input:     make(chan interface{}),
-		output:    make(chan vector.Vector),
-		size:      size,
-		allocate:  allocate,
-		maxWorker: maxWorker,
-		G:         g,
-		sem:       semaphore.NewWeighted(maxWorker),
-		dCtx:      dCtx,
+	errGrp, errGrpContext := errgroup.WithContext(ctx)
+	errGrp.SetLimit(maxWorker)
+	bChan := &BatchingChannel{
+		input:           make(chan interface{}),
+		output:          make(chan vector.Vector),
+		size:            size,
+		allocate:        allocate,
+		maxWorker:       maxWorker,
+		G:               errGrp,
+		internalContext: errGrpContext,
 	}
-	go ch.batchingBuffer()
-	return ch
+	go bChan.batchingBuffer()
+	return bChan
 }
 
 func (ch *BatchingChannel) In() chan<- interface{} {
@@ -58,18 +57,14 @@ func (ch *BatchingChannel) Out() <-chan vector.Vector {
 
 func (ch *BatchingChannel) ProcessOut(f func(vector.Vector) error) error {
 	for val := range ch.Out() {
-		if err := ch.sem.Acquire(ch.dCtx, 1); err != nil {
-			return err
-		}
 		val := val
 		ch.G.Go(func() error {
-			defer ch.sem.Release(1)
 			return f(val)
 		})
 	}
 	err := ch.G.Wait()
 	if err != nil {
-		return err
+		return errors.Wrap(err, "one of the task failed")
 	}
 	return nil
 }
@@ -94,13 +89,14 @@ func (ch *BatchingChannel) batchingBuffer() {
 			err := ch.buffer.PushBack(row)
 			if err != nil {
 				ch.G.Go(func() error {
-					return err
+					return errors.Wrap(err, "can't push back row")
 				})
 			}
 		} else {
 			if ch.buffer.Len() > 0 {
 				ch.output <- ch.buffer
 			}
+
 			break
 		}
 		if ch.buffer.Len() == ch.size {
diff --git a/file/batchingchannels/batching_channel_test.go b/file/batchingchannels/batching_channel_test.go
index 7330ed0..af3cfa4 100644
--- a/file/batchingchannels/batching_channel_test.go
+++ b/file/batchingchannels/batching_channel_test.go
@@ -14,11 +14,11 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
-type Int struct {
+type intKey struct {
 	value int
 }
 
-func AllocateInt(row interface{}) (key.Key, error) {
+func allocateInt(row interface{}) (key.Key, error) {
 	line, ok := row.(string)
 	if !ok {
 		return nil, errors.Errorf("can't convert interface{} to string: %+v", row)
@@ -27,40 +27,42 @@ func AllocateInt(row interface{}) (key.Key, error) {
 	if err != nil {
 		return nil, err
 	}
-	return &Int{num}, nil
+	return &intKey{num}, nil
 }
 
-func (k *Int) Get() int {
+func (k *intKey) Get() int {
 	return k.value
 }
 
-func (k *Int) Less(other key.Key) bool {
-	return k.value < other.(*Int).value
+func (k *intKey) Less(other key.Key) bool {
+	return k.value < other.(*intKey).value
 }
-func (k *Int) Equal(other key.Key) bool {
-	return k.value == other.(*Int).value
+
+func (k *intKey) Equal(other key.Key) bool {
+	return k.value == other.(*intKey).value
 }
 
-func testBatches(t *testing.T, ch *batchingchannels.BatchingChannel) {
+func testBatches(t *testing.T, bChan *batchingchannels.BatchingChannel) {
+	t.Helper()
 	maxI := 10000
 	expectedSum := (maxI - 1) * maxI / 2
-	wg := &sync.WaitGroup{}
-	wgInput := &sync.WaitGroup{}
+	wgrp := &sync.WaitGroup{}
+	wgrpInput := &sync.WaitGroup{}
 
 	maxIn := 100
-	wgInput.Add(maxIn)
-	for j := 0; j < maxIn; j++ {
+	wgrpInput.Add(maxIn)
+	for idx := 0; idx < maxIn; idx++ {
 		go func(j int) {
-			defer wgInput.Done()
+			defer wgrpInput.Done()
 			for i := maxI / maxIn * j; i < maxI*(j+1)/maxIn; i++ {
-				ch.In() <- strconv.Itoa(i)
+				bChan.In() <- strconv.Itoa(i)
 			}
-		}(j)
+		}(idx)
 	}
 
 	go func() {
-		wgInput.Wait()
-		ch.Close()
+		wgrpInput.Wait()
+		bChan.Close()
 	}()
 
 	got := make(chan *vector.Element, maxI)
@@ -70,13 +72,13 @@ func testBatches(t *testing.T, ch *batchingchannels.BatchingChannel) {
 	go func() {
 		defer wgSum.Done()
 		for g := range got {
-			gotSum += g.Key.(*Int).Get()
+			gotSum += g.Key.(*intKey).Get()
 		}
 	}()
-	wg.Add(1)
+	wgrp.Add(1)
 	go func() {
-		defer wg.Done()
-		err := ch.ProcessOut(func(val vector.Vector) error {
+		defer wgrp.Done()
+		err := bChan.ProcessOut(func(val vector.Vector) error {
 			for i := 0; i < val.Len(); i++ {
 				val := val.Get(i)
 				got <- val
@@ -88,42 +90,42 @@ func testBatches(t *testing.T, ch *batchingchannels.BatchingChannel) {
 			panic(err)
 		}
 	}()
-	wg.Wait()
+	wgrp.Wait()
 	close(got)
 	wgSum.Wait()
 	assert.Equal(t, expectedSum, gotSum)
 }
 
 func TestBatchingChannel(t *testing.T) {
-	allocate := vector.DefaultVector(AllocateInt, nil, nil)
-	ch := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 50)
-	testBatches(t, ch)
+	allocate := vector.DefaultVector(allocateInt, nil, nil)
+	bChan := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 50)
+	testBatches(t, bChan)
 
-	ch = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 3)
-	testBatches(t, ch)
+	bChan = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 3)
+	testBatches(t, bChan)
 
-	ch = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 1)
-	testChannelConcurrentAccessors(t, "batching channel", ch)
+	bChan = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 1)
+	testChannelConcurrentAccessors(t, bChan)
 }
 
 func TestBatchingChannelCap(t *testing.T) {
-	allocate := vector.DefaultVector(AllocateInt, nil, nil)
+	allocate := vector.DefaultVector(allocateInt, nil, nil)
 	ch := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 5)
 	if ch.Cap() != 5 {
 		t.Error("incorrect capacity on infinite channel")
 	}
 }
 
-func testChannelConcurrentAccessors(t *testing.T, name string, ch *batchingchannels.BatchingChannel) {
+func testChannelConcurrentAccessors(_ *testing.T, bChan *batchingchannels.BatchingChannel) {
 	// no asserts here, this is just for the race detector's benefit
-	go ch.Len()
-	go ch.Cap()
+	go bChan.Len()
+	go bChan.Cap()
 
 	go func() {
-		ch.In() <- ""
+		bChan.In() <- ""
 	}()
 
 	go func() {
-		<-ch.Out()
+		<-bChan.Out()
 	}()
 }
diff --git a/file/chunk.go b/file/chunk.go
index 0d908fb..accc16a 100644
--- a/file/chunk.go
+++ b/file/chunk.go
@@ -21,18 +21,22 @@ type chunkInfo struct {
 // pullSubset Add to vector the specified number of elements.
 // It stops if there is no elements left to add.
 func (c *chunkInfo) pullSubset(size int) (err error) {
-	i := 0
-	for i < size && c.reader.Next() {
+	elemIdx := 0
+	for elemIdx < size && c.reader.Next() {
 		row, err := c.reader.Read()
 		if err != nil {
-			return errors.Wrap(err, "")
+			return errors.Wrap(err, "can't read chunk")
 		}
-		c.buffer.PushBack(row)
-		i++
+		err = c.buffer.PushBack(row)
+		if err != nil {
+			return errors.Wrap(err, "can't push back row")
+		}
+		elemIdx++
 	}
 	if c.reader.Err() != nil {
-		return c.reader.Err()
+		return errors.Wrap(c.reader.Err(), "chunk reader encountered an error")
 	}
+
 	return nil
 }
 
@@ -41,7 +45,7 @@ type chunks struct {
 	list []*chunkInfo
 }
 
-// new Create a new chunk and initialize it.
+// new Create a new chunk and initialise it.
 func (c *chunks) new(chunkPath string, allocate *vector.Allocate, size int, withHeader bool) error {
 	f, err := os.Open(chunkPath)
 	if err != nil {
@@ -122,7 +126,7 @@ func (c *chunks) moveFirstChunkToCorrectIndex() {
 	pos := sort.Search(len(c.list), func(i int) bool {
 		return !vector.Less(c.list[i].buffer.Get(0), elem.buffer.Get(0))
 	})
-	// TODO: c.list = c.list[1:] and the following line create an unecessary allocation.
+	// TODO: c.list = c.list[1:] and the following line create an unnecessary allocation.
 	c.list = append(c.list[:pos], append([]*chunkInfo{elem}, c.list[pos:]...)...)
 }
 
diff --git a/file/file.go b/file/file.go
index 074a4e0..11f9906 100644
--- a/file/file.go
+++ b/file/file.go
@@ -3,10 +3,9 @@ package file
 import (
 	"context"
 	"io"
-	"sync"
-
 	"path"
 	"strconv"
+	"sync"
 
 	"github.com/askiada/external-sort/file/batchingchannels"
 	"github.com/askiada/external-sort/vector"
@@ -32,10 +31,9 @@ type Info struct {
 
 // CreateSortedChunks Scan a file and divide it into small sorted chunks.
 // Store all the chunks in a folder an returns all the paths.
-func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpSize int, maxWorkers int64) ([]string, error) {
-	fn := "scan and sort and dump"
+func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpSize, maxWorkers int) ([]string, error) {
 	if dumpSize <= 0 {
-		return nil, errors.Wrap(errors.New("dump size must be greater than 0"), fn)
+		return nil, errors.New("dump size must be greater than 0")
 	}
 
 	if f.PrintMemUsage && f.mu == nil {
@@ -44,12 +42,12 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 
 	err := clearChunkFolder(chunkFolder)
 	if err != nil {
-		return nil, errors.Wrap(err, fn)
+		return nil, errors.Wrap(err, "can't clear chunk folder")
 	}
 
 	inputReader, err := f.Allocate.FnReader(f.InputReader)
 	if err != nil {
-		return nil, errors.Wrap(err, fn)
+		return nil, errors.Wrap(err, "can't get input reader")
 	}
 	count_rows := 0
 	chunkPaths := []string{}
@@ -64,7 +62,7 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 			}
 			row, err := inputReader.Read()
 			if err != nil {
-				return errors.Wrap(err, fn)
+				return errors.Wrap(err, "can't read from input reader")
 			}
 			if f.WithHeader && f.headers == nil {
 				f.headers = row
@@ -75,7 +73,7 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 		}
 		batchChan.Close()
 		if inputReader.Err() != nil {
-			return errors.Wrap(inputReader.Err(), fn)
+			return errors.Wrap(inputReader.Err(), "input reader encountered an error")
 		}
 		return nil
 	})
@@ -104,7 +102,7 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 		return nil
 	})
 	if err != nil {
-		return nil, errors.Wrap(err, fn)
+		return nil, errors.Wrap(err, "can't process batching channel")
 	}
 	f.totalRows = count_rows
 	return chunkPaths, nil
diff --git a/file/shuffle.go b/file/shuffle.go
index 8cd4db2..643a971 100644
--- a/file/shuffle.go
+++ b/file/shuffle.go
@@ -18,7 +18,7 @@ import (
 
 // CreateSortedChunks Scan a file and divide it into small sorted chunks.
 // Store all the chunks in a folder an returns all the paths.
-func (f *Info) Shuffle(ctx context.Context, chunkFolder string, dumpSize int, maxWorkers int64, k int, seed int64, isGzip bool) ([]string, error) {
+func (f *Info) Shuffle(ctx context.Context, chunkFolder string, dumpSize, maxWorkers, k int, seed int64, isGzip bool) ([]string, error) {
 	fn := "scan and shuffle and dump"
 	if dumpSize <= 0 {
 		return nil, errors.Wrap(errors.New("dump size must be greater than 0"), fn)
diff --git a/go.mod b/go.mod
index 36e9033..f7256f6 100644
--- a/go.mod
+++ b/go.mod
@@ -1,63 +1,63 @@
 module github.com/askiada/external-sort
 
-go 1.17
+go 1.20
 
 require (
-	github.com/aws/aws-sdk-go-v2 v1.16.16
-	github.com/aws/aws-sdk-go-v2/config v1.17.8
-	github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.34
-	github.com/aws/aws-sdk-go-v2/service/s3 v1.27.11
-	github.com/cheggaaa/pb/v3 v3.0.8
+	github.com/aws/aws-sdk-go-v2 v1.18.0
+	github.com/aws/aws-sdk-go-v2/config v1.18.23
+	github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.65
+	github.com/aws/aws-sdk-go-v2/service/s3 v1.33.1
+	github.com/cheggaaa/pb/v3 v3.1.2
 	github.com/pkg/errors v0.9.1
-	github.com/pkg/sftp v1.13.4
+	github.com/pkg/sftp v1.13.5
 	github.com/sirupsen/logrus v1.9.0
-	github.com/spf13/cobra v1.2.1
-	github.com/spf13/viper v1.8.1
-	github.com/stretchr/testify v1.7.0
-	golang.org/x/crypto v0.0.0-20220210151621-f4118a5b28e2
-	golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
+	github.com/spf13/cobra v1.7.0
+	github.com/spf13/viper v1.15.0
+	github.com/stretchr/testify v1.8.2
+	golang.org/x/crypto v0.8.0
+	golang.org/x/sync v0.2.0
 )
 
 require (
 	github.com/VividCortex/ewma v1.2.0 // indirect
-	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.8 // indirect
-	github.com/aws/aws-sdk-go-v2/credentials v1.12.21 // indirect
-	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.14 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.9 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.18 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.17 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.17 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sso v1.11.23 // indirect
-	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sts v1.16.19 // indirect
-	github.com/aws/smithy-go v1.13.3 // indirect
+	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 // indirect
+	github.com/aws/aws-sdk-go-v2/credentials v1.13.22 // indirect
+	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.3 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.33 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.27 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/ini v1.3.34 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.25 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.28 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.27 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.14.2 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sso v1.12.10 // indirect
+	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.10 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sts v1.18.11 // indirect
+	github.com/aws/smithy-go v1.13.5 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
-	github.com/fatih/color v1.13.0 // indirect
-	github.com/fsnotify/fsnotify v1.4.9 // indirect
+	github.com/fatih/color v1.15.0 // indirect
+	github.com/fsnotify/fsnotify v1.6.0 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
-	github.com/inconshreveable/mousetrap v1.0.0 // indirect
+	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/jmespath/go-jmespath v0.4.0 // indirect
 	github.com/kr/fs v0.1.0 // indirect
-	github.com/magiconair/properties v1.8.5 // indirect
-	github.com/mattn/go-colorable v0.1.12 // indirect
-	github.com/mattn/go-isatty v0.0.14 // indirect
-	github.com/mattn/go-runewidth v0.0.13 // indirect
-	github.com/mitchellh/mapstructure v1.4.1 // indirect
-	github.com/pelletier/go-toml v1.9.3 // indirect
+	github.com/magiconair/properties v1.8.7 // indirect
+	github.com/mattn/go-colorable v0.1.13 // indirect
+	github.com/mattn/go-isatty v0.0.18 // indirect
+	github.com/mattn/go-runewidth v0.0.14 // indirect
+	github.com/mitchellh/mapstructure v1.5.0 // indirect
+	github.com/pelletier/go-toml/v2 v2.0.7 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
-	github.com/rivo/uniseg v0.2.0 // indirect
-	github.com/spf13/afero v1.6.0 // indirect
-	github.com/spf13/cast v1.3.1 // indirect
+	github.com/rivo/uniseg v0.4.4 // indirect
+	github.com/spf13/afero v1.9.5 // indirect
+	github.com/spf13/cast v1.5.0 // indirect
 	github.com/spf13/jwalterweatherman v1.1.0 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
-	github.com/subosito/gotenv v1.2.0 // indirect
-	golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect
-	golang.org/x/text v0.3.6 // indirect
-	gopkg.in/ini.v1 v1.62.0 // indirect
+	github.com/subosito/gotenv v1.4.2 // indirect
+	golang.org/x/sys v0.8.0 // indirect
+	golang.org/x/text v0.9.0 // indirect
+	gopkg.in/ini.v1 v1.67.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
-	gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
diff --git a/go.sum b/go.sum
index 86f0dcb..8c94da5 100644
--- a/go.sum
+++ b/go.sum
@@ -3,6 +3,7 @@ cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
 cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
 cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=
 cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
+cloud.google.com/go v0.44.3/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
 cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
 cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
 cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
@@ -15,9 +16,7 @@ cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOY
 cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY=
 cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI=
 cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk=
-cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg=
-cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8=
-cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0=
+cloud.google.com/go v0.75.0/go.mod h1:VGuuCn7PG0dwsd5XPVm2Mm3wlh3EL55/79EKB6hlPTY=
 cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
 cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
 cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
@@ -26,7 +25,6 @@ cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4g
 cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ=
 cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
 cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
-cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk=
 cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
 cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
 cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=
@@ -36,59 +34,53 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo
 cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
 cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
 cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
+cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo=
 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
-github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
 github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
 github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
-github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
-github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
-github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
-github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
-github.com/aws/aws-sdk-go-v2 v1.16.16 h1:M1fj4FE2lB4NzRb9Y0xdWsn2P0+2UHVxwKyOa4YJNjk=
-github.com/aws/aws-sdk-go-v2 v1.16.16/go.mod h1:SwiyXi/1zTUZ6KIAmLK5V5ll8SiURNUYOqTerZPaF9k=
-github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.8 h1:tcFliCWne+zOuUfKNRn8JdFBuWPDuISDH08wD2ULkhk=
-github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.8/go.mod h1:JTnlBSot91steJeti4ryyu/tLd4Sk84O5W22L7O2EQU=
-github.com/aws/aws-sdk-go-v2/config v1.17.8 h1:b9LGqNnOdg9vR4Q43tBTVWk4J6F+W774MSchvKJsqnE=
-github.com/aws/aws-sdk-go-v2/config v1.17.8/go.mod h1:UkCI3kb0sCdvtjiXYiU4Zx5h07BOpgBTtkPu/49r+kA=
-github.com/aws/aws-sdk-go-v2/credentials v1.12.21 h1:4tjlyCD0hRGNQivh5dN8hbP30qQhMLBE/FgQR1vHHWM=
-github.com/aws/aws-sdk-go-v2/credentials v1.12.21/go.mod h1:O+4XyAt4e+oBAoIwNUYkRg3CVMscaIJdmZBOcPgJ8D8=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17 h1:r08j4sbZu/RVi+BNxkBJwPMUYY3P8mgSDuKkZ/ZN1lE=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17/go.mod h1:yIkQcCDYNsZfXpd5UX2Cy+sWA1jPgIhGTw9cOBzfVnQ=
-github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.34 h1:1PNtaCM+2ruo1dfYL2RweUdtbuPvinjAejjNcPa/RQY=
-github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.34/go.mod h1:+Six+CXNHYllXam32j+YW8ixk82+am345ei89kEz8p4=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23 h1:s4g/wnzMf+qepSNgTvaQQHNxyMLKSawNhKCPNy++2xY=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23/go.mod h1:2DFxAQ9pfIRy0imBCJv+vZ2X6RKxves6fbnEuSry6b4=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17 h1:/K482T5A3623WJgWT8w1yRAFK4RzGzEl7y39yhtn9eA=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17/go.mod h1:pRwaTYCJemADaqCbUAxltMoHKata7hmB5PjEXeu0kfg=
-github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24 h1:wj5Rwc05hvUSvKuOF29IYb9QrCLjU+rHAy/x/o0DK2c=
-github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24/go.mod h1:jULHjqqjDlbyTa7pfM7WICATnOv+iOhjletM3N0Xbu8=
-github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.14 h1:ZSIPAkAsCCjYrhqfw2+lNzWDzxzHXEckFkTePL5RSWQ=
-github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.14/go.mod h1:AyGgqiKv9ECM6IZeNQtdT8NnMvUb3/2wokeq2Fgryto=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.9 h1:Lh1AShsuIJTwMkoxVCAYPJgNG5H+eN6SmoUn8nOZ5wE=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.9/go.mod h1:a9j48l6yL5XINLHLcOKInjdvknN+vWqPBxqeIDw7ktw=
-github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.18 h1:BBYoNQt2kUZUUK4bIPsKrCcjVPUMNsgQpNAwhznK/zo=
-github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.18/go.mod h1:NS55eQ4YixUJPTC+INxi2/jCqe1y2Uw3rnh9wEOVJxY=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.17 h1:Jrd/oMh0PKQc6+BowB+pLEwLIgaQF29eYbe7E1Av9Ug=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.17/go.mod h1:4nYOrY41Lrbk2170/BGkcJKBhws9Pfn8MG3aGqjjeFI=
-github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.17 h1:HfVVR1vItaG6le+Bpw6P4midjBDMKnjMyZnw9MXYUcE=
-github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.17/go.mod h1:YqMdV+gEKCQ59NrB7rzrJdALeBIsYiVi8Inj3+KcqHI=
-github.com/aws/aws-sdk-go-v2/service/s3 v1.27.11 h1:3/gm/JTX9bX8CpzTgIlrtYpB3EVBDxyg/GY/QdcIEZw=
-github.com/aws/aws-sdk-go-v2/service/s3 v1.27.11/go.mod h1:fmgDANqTUCxciViKl9hb/zD5LFbvPINFRgWhDbR+vZo=
-github.com/aws/aws-sdk-go-v2/service/sso v1.11.23 h1:pwvCchFUEnlceKIgPUouBJwK81aCkQ8UDMORfeFtW10=
-github.com/aws/aws-sdk-go-v2/service/sso v1.11.23/go.mod h1:/w0eg9IhFGjGyyncHIQrXtU8wvNsTJOP0R6PPj0wf80=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 h1:OwhhKc1P9ElfWbMKPIbMMZBV6hzJlL2JKD76wNNVzgQ=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6/go.mod h1:csZuQY65DAdFBt1oIjO5hhBR49kQqop4+lcuCjf2arA=
-github.com/aws/aws-sdk-go-v2/service/sts v1.16.19 h1:9pPi0PsFNAGILFfPCk8Y0iyEBGc6lu6OQ97U7hmdesg=
-github.com/aws/aws-sdk-go-v2/service/sts v1.16.19/go.mod h1:h4J3oPZQbxLhzGnk+j9dfYHi5qIOVJ5kczZd658/ydM=
-github.com/aws/smithy-go v1.13.3 h1:l7LYxGuzK6/K+NzJ2mC+VvLUbae0sL3bXU//04MkmnA=
-github.com/aws/smithy-go v1.13.3/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
-github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
-github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM=
+github.com/aws/aws-sdk-go-v2 v1.18.0 h1:882kkTpSFhdgYRKVZ/VCgf7sd0ru57p2JCxz4/oN5RY=
+github.com/aws/aws-sdk-go-v2 v1.18.0/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 h1:dK82zF6kkPeCo8J1e+tGx4JdvDIQzj7ygIoLg8WMuGs=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10/go.mod h1:VeTZetY5KRJLuD/7fkQXMU6Mw7H5m/KP2J5Iy9osMno=
+github.com/aws/aws-sdk-go-v2/config v1.18.23 h1:gc3lPsAnZpwfi2exupmgHfva0JiAY2BWDg5JWYlmA28=
+github.com/aws/aws-sdk-go-v2/config v1.18.23/go.mod h1:rx0ruaQ+gk3OrLFHRRx56lA//XxP8K8uPzeNiKNuWVY=
+github.com/aws/aws-sdk-go-v2/credentials v1.13.22 h1:Hp9rwJS4giQ48xqonRV/s7QcDf/wxF6UY7osRmBabvI=
+github.com/aws/aws-sdk-go-v2/credentials v1.13.22/go.mod h1:BfNcm6A9nSd+bzejDcMJ5RE+k6WbkCwWkQil7q4heRk=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.3 h1:jJPgroehGvjrde3XufFIJUZVK5A2L9a3KwSFgKy9n8w=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.3/go.mod h1:4Q0UFP0YJf0NrsEuEYHpM9fTSEVnD16Z3uyEF7J9JGM=
+github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.65 h1:4irvSxFf0u7pQdtpmUoDSjvMNpOG/8yDUq3orwd9qdg=
+github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.65/go.mod h1:BAWKiL53LT19UMewYr9YhZ8xPO69u6NwmGUjSjRwUdM=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.33 h1:kG5eQilShqmJbv11XL1VpyDbaEJzWxd4zRiCG30GSn4=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.33/go.mod h1:7i0PF1ME/2eUPFcjkVIwq+DOygHEoK92t5cDqNgYbIw=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.27 h1:vFQlirhuM8lLlpI7imKOMsjdQLuN9CPi+k44F/OFVsk=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.27/go.mod h1:UrHnn3QV/d0pBZ6QBAEQcqFLf8FAzLmoUfPVIueOvoM=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.3.34 h1:gGLG7yKaXG02/jBlg210R7VgQIotiQntNhsCFejawx8=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.3.34/go.mod h1:Etz2dj6UHYuw+Xw830KfzCfWGMzqvUTCjUj5b76GVDc=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.25 h1:AzwRi5OKKwo4QNqPf7TjeO+tK8AyOK3GVSwmRPo7/Cs=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.25/go.mod h1:SUbB4wcbSEyCvqBxv/O/IBf93RbEze7U7OnoTlpPB+g=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11 h1:y2+VQzC6Zh2ojtV2LoC0MNwHWc6qXv/j2vrQtlftkdA=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11/go.mod h1:iV4q2hsqtNECrfmlXyord9u4zyuFEJX9eLgLpSPzWA8=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.28 h1:vGWm5vTpMr39tEZfQeDiDAMgk+5qsnvRny3FjLpnH5w=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.28/go.mod h1:spfrICMD6wCAhjhzHuy6DOZZ+LAIY10UxhUmLzpJTTs=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.27 h1:0iKliEXAcCa2qVtRs7Ot5hItA2MsufrphbRFlz1Owxo=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.27/go.mod h1:EOwBD4J4S5qYszS5/3DpkejfuK+Z5/1uzICfPaZLtqw=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.14.2 h1:NbWkRxEEIRSCqxhsHQuMiTH7yo+JZW1gp8v3elSVMTQ=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.14.2/go.mod h1:4tfW5l4IAB32VWCDEBxCRtR9T4BWy4I4kr1spr8NgZM=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.33.1 h1:O+9nAy9Bb6bJFTpeNFtd9UfHbgxO1o4ZDAM9rQp5NsY=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.33.1/go.mod h1:J9kLNzEiHSeGMyN7238EjJmBpCniVzFda75Gxl/NqB8=
+github.com/aws/aws-sdk-go-v2/service/sso v1.12.10 h1:UBQjaMTCKwyUYwiVnUt6toEJwGXsLBI6al083tpjJzY=
+github.com/aws/aws-sdk-go-v2/service/sso v1.12.10/go.mod h1:ouy2P4z6sJN70fR3ka3wD3Ro3KezSxU6eKGQI2+2fjI=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.10 h1:PkHIIJs8qvq0e5QybnZoG1K/9QTrLr9OsqCIo59jOBA=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.10/go.mod h1:AFvkxc8xfBe8XA+5St5XIHHrQQtkxqrRincx4hmMHOk=
+github.com/aws/aws-sdk-go-v2/service/sts v1.18.11 h1:uBE+Zj478pfxV98L6SEpvxYiADNjTlMNY714PJLE7uo=
+github.com/aws/aws-sdk-go-v2/service/sts v1.18.11/go.mod h1:BgQOMsg8av8jset59jelyPW7NoZcZXLVpDsXunGDrk8=
+github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8=
+github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
-github.com/cheggaaa/pb/v3 v3.0.8 h1:bC8oemdChbke2FHIIGy9mn4DPJ2caZYQnfbRqwmdCoA=
-github.com/cheggaaa/pb/v3 v3.0.8/go.mod h1:UICbiLec/XO6Hw6k+BHEtHeQFzzBH4i2/qk/ow1EJTA=
+github.com/cheggaaa/pb/v3 v3.1.2 h1:FIxT3ZjOj9XJl0U4o2XbEhjFfZl7jCVCDOGq1ZAB7wQ=
+github.com/cheggaaa/pb/v3 v3.1.2/go.mod h1:SNjnd0yKcW+kw0brSusraeDd5Bf1zBfxAzTL2ss3yQ4=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
@@ -96,9 +88,7 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
 github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
 github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
-github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
-github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
-github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
+github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -107,20 +97,15 @@ github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.m
 github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
 github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po=
 github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
-github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
-github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
-github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
-github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w=
-github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
-github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
-github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
-github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
+github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
+github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw=
+github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE=
+github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
+github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
 github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
-github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
-github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
 github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
@@ -132,7 +117,6 @@ github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt
 github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
 github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
 github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
-github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8=
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -147,9 +131,6 @@ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvq
 github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
 github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
 github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
-github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
-github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM=
-github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
@@ -160,12 +141,9 @@ github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
 github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
 github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
-github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
 github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
 github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
@@ -178,163 +156,109 @@ github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hf
 github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
 github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
 github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
-github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
-github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
+github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
 github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
-github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8=
-github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
-github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
-github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q=
-github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8=
-github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
-github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
-github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
-github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
-github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
-github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU=
-github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
-github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
-github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
-github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
-github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90=
+github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g=
 github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
 github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
 github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
 github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
-github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
-github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ=
-github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I=
-github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc=
 github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
-github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
-github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
 github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
 github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
 github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
 github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
-github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
 github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
 github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
-github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
-github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
-github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8=
 github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
-github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
-github.com/magiconair/properties v1.8.5 h1:b6kJs+EmPFMYGkow9GiUyCyOvIwYetYJ3fSaWak/Gls=
-github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60=
-github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
-github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
-github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
-github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40=
-github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
-github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
-github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
-github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
-github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
-github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
-github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
-github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
-github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
-github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
-github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=
-github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg=
-github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY=
-github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
-github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
-github.com/mitchellh/mapstructure v1.4.1 h1:CpVNEelQCZBooIPDn+AR3NpivK/TIKU8bDxdASFVQag=
-github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
-github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
-github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
-github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
-github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
-github.com/pelletier/go-toml v1.9.3 h1:zeC5b1GviRUyKYd6OJPvBU/mcVDVoL1OhT17FCt5dSQ=
-github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
-github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
+github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
+github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
+github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
+github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
+github.com/pelletier/go-toml/v2 v2.0.7 h1:muncTPStnKRos5dpVKULv2FVd4bMOhNePj9CjgDb8Us=
+github.com/pelletier/go-toml/v2 v2.0.7/go.mod h1:eumQOmlWiOPt5WriQQqoM5y18pDHwha2N+QD+EUNTek=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI=
-github.com/pkg/sftp v1.13.4 h1:Lb0RYJCmgUcBgZosfoi9Y9sbl6+LJgOIgk/2Y4YjMFg=
-github.com/pkg/sftp v1.13.4/go.mod h1:LzqnAvaD5TWeNBsZpfKxSYn1MbjWwOsCIAFFJbpIsK8=
+github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg=
+github.com/pkg/sftp v1.13.5 h1:a3RLUqkyjYRtBTZJZ1VRrKbN3zhuPLlUc3sphVz81go=
+github.com/pkg/sftp v1.13.5/go.mod h1:wHDZ0IZX6JcBYRK1TH9bcVq8G7TLpVHYIGJRFnmPfxg=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
+github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
+github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
-github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
-github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
-github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
+github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
 github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
-github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM=
-github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
-github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s=
-github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
-github.com/spf13/afero v1.6.0 h1:xoax2sJ2DT8S8xA2paPFjDCScCNeWsg75VG0DLRreiY=
-github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I=
-github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng=
-github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
-github.com/spf13/cobra v1.2.1 h1:+KmjbUw1hriSNMF55oPrkZcb27aECyrj8V2ytv7kWDw=
-github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk=
+github.com/spf13/afero v1.9.5 h1:stMpOSZFs//0Lv29HduCmli3GUfpFoF3Y1Q/aXj/wVM=
+github.com/spf13/afero v1.9.5/go.mod h1:UBogFpq8E9Hx+xc5CNTTEpTnuHVmXDwZcZcE1eb/UhQ=
+github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w=
+github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU=
+github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
+github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0=
 github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
 github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
 github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/spf13/viper v1.8.1 h1:Kq1fyeebqsBfbjZj4EL7gj2IO0mMaiyjYUWcUsl2O44=
-github.com/spf13/viper v1.8.1/go.mod h1:o0Pch8wJ9BVSWGQMbra6iw0oQ5oktSIBaujf1rJH9Ns=
+github.com/spf13/viper v1.15.0 h1:js3yy885G8xwJa6iOISGFwd+qlUo5AvyXb7CiihdtiU=
+github.com/spf13/viper v1.15.0/go.mod h1:fFcTBJxvhhzSJiZy8n+PeW6t8l+KeT/uTARa0jHOQLA=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
-github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
 github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
-github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
-github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8=
+github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/subosito/gotenv v1.4.2 h1:X1TuBLAMDFbaTAChgCBLu3DU3UPyELpnF2jjJ2cz/S8=
+github.com/subosito/gotenv v1.4.2/go.mod h1:ayKnFf/c6rvx/2iiLrJUk1e6plDbT3edrFNGqEflhK0=
 github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
-go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
-go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=
-go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ=
 go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
 go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
 go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
 go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
 go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
 go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk=
-go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E=
-go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
-go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
-go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=
-golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
-golang.org/x/crypto v0.0.0-20220210151621-f4118a5b28e2 h1:XdAboW3BNMv9ocSCOk/u1MFioZGzCNkiJZ19v9Oe3Ig=
-golang.org/x/crypto v0.0.0-20220210151621-f4118a5b28e2/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/crypto v0.8.0 h1:pd9TJtTueMTVQXzk8E2XESSMQDj/U7OUu0PqJqPXQjQ=
+golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -358,7 +282,6 @@ golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRu
 golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
 golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
 golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
-golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
 golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
 golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
 golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
@@ -369,11 +292,8 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
@@ -400,12 +320,9 @@ golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81R
 golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc=
-golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -416,9 +333,6 @@ golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ
 golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
 golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
 golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -429,11 +343,9 @@ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ=
-golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI=
+golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -443,11 +355,9 @@ golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -466,31 +376,29 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ=
+golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.7.0 h1:BEvjmm5fURWqcfbSKTdpkDXYBrUS1c0m8agp14W48vQ=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -500,7 +408,6 @@ golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3
 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
 golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
 golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
-golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
 golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
 golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
 golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
@@ -510,7 +417,6 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw
 golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
@@ -533,7 +439,6 @@ golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roY
 golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
 golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
 golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
@@ -542,9 +447,8 @@ golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4f
 golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
-golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -568,9 +472,6 @@ google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz513
 google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg=
 google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE=
 google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8=
-google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU=
-google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94=
-google.golang.org/api v0.44.0/go.mod h1:EBOGZqzyhtvMDoxwS97ctnh0zUmYY6CxqXsc1AvkYD8=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
@@ -601,7 +502,6 @@ google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfG
 google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
 google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
 google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
-google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
 google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
 google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA=
@@ -613,12 +513,8 @@ google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6D
 google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
 google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
 google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A=
-google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0=
+google.golang.org/genproto v0.0.0-20210108203827-ffc7fda8c3d7/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20210226172003-ab064af71705/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
 google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
@@ -632,13 +528,9 @@ google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3Iji
 google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
 google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
 google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
-google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0=
 google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
 google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8=
 google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
-google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
-google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
-google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
@@ -649,22 +541,19 @@ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2
 google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
-google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
-google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
-gopkg.in/ini.v1 v1.62.0 h1:duBzk771uxoUuOlyRLkHsygud9+5lrlGjdFBb4mSKDU=
-gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
+gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
+gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
-gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
diff --git a/internal/env.go b/internal/env.go
index 8fb0490..00bc065 100644
--- a/internal/env.go
+++ b/internal/env.go
@@ -31,7 +31,7 @@ var (
 	OutputFile       string
 	ChunkFolder      string
 	ChunkSize        int
-	MaxWorkers       int64
+	MaxWorkers       int
 	OutputBufferSize int
 
 	S3Region           string
@@ -51,6 +51,6 @@ func init() {
 	viper.SetDefault(TsvFieldsName, []string{"0"})
 
 	viper.SetDefault(S3RegionName, "eu-west-1")
-	viper.SetDefault(S3RetryMaxAttemptsName, 10)
+	viper.SetDefault(S3RetryMaxAttemptsName, 10) //nolint //gomnd
 	viper.SetDefault(IsGzipName, false)
 }
diff --git a/internal/rw/rw.go b/internal/rw/rw.go
index fa89a15..2fb4fab 100644
--- a/internal/rw/rw.go
+++ b/internal/rw/rw.go
@@ -66,12 +66,12 @@ func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string)
 		if err != nil {
 			return errors.Wrap(err, "can't create s3 client")
 		}
-		files := []*bucket.DownloadFileInfo{}
+		files := []*bucket.S3FileInfo{}
 		for _, inputFile := range inputFiles {
 			u, _ := url.Parse(inputFile)
 			u.Path = strings.TrimLeft(u.Path, "/")
 			logger.Debugf("Proto: %q, Bucket: %q, Key: %q", u.Scheme, u.Host, u.Path)
-			files = append(files, &bucket.DownloadFileInfo{
+			files = append(files, &bucket.S3FileInfo{
 				Bucket: u.Host,
 				Key:    u.Path,
 			})
@@ -81,7 +81,7 @@ func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string)
 		i.Input = pr
 		i.inputPipe = pr
 		i.g.Go(func() error {
-			defer pw.Close() // nolint:errcheck //no need to check this error
+			defer pw.Close() //nolint:errcheck //no need to check this error
 			err := s3Api.Download(i.dCtx, pw, files...)
 			if err != nil {
 				return errors.Wrap(err, "can't download files")
@@ -124,7 +124,7 @@ func (i *InputOutput) SetOutputWriter(ctx context.Context, outputFile string) (e
 		i.Output = pw
 		i.outputPipe = pw
 		i.g.Go(func() error {
-			defer pr.Close() // nolint:errcheck //no need to check this error
+			defer pr.Close() //nolint:errcheck //no need to check this error
 			err := s3Api.Upload(i.dCtx, pr, u.Host, u.Path)
 			if err != nil {
 				return errors.Wrapf(err, "can't upload file %s", outputFile)
diff --git a/main.go b/main.go
index 9b37f11..687b35a 100644
--- a/main.go
+++ b/main.go
@@ -50,7 +50,7 @@ func newCommand() *command {
 	root.rootCmd.PersistentFlags().StringVarP(&internal.ChunkFolder, internal.ChunkFolderName, "c", viper.GetString(internal.ChunkFolderName), "chunk folder.")
 
 	root.rootCmd.PersistentFlags().IntVarP(&internal.ChunkSize, internal.ChunkSizeName, "s", viper.GetInt(internal.ChunkSizeName), "chunk size.")
-	root.rootCmd.PersistentFlags().Int64VarP(&internal.MaxWorkers, internal.MaxWorkersName, "w", viper.GetInt64(internal.MaxWorkersName), "max worker.")
+	root.rootCmd.PersistentFlags().IntVarP(&internal.MaxWorkers, internal.MaxWorkersName, "w", viper.GetInt(internal.MaxWorkersName), "max worker.")
 	root.rootCmd.PersistentFlags().IntVarP(&internal.OutputBufferSize, internal.OutputBufferSizeName, "b", viper.GetInt(internal.OutputBufferSizeName), "output buffer size.")
 	root.sortCmd.PersistentFlags().StringSliceVarP(&internal.TsvFields, internal.TsvFieldsName, "t", viper.GetStringSlice(internal.TsvFieldsName), "")
 
diff --git a/main_test.go b/main_test.go
index 63c755a..d369427 100644
--- a/main_test.go
+++ b/main_test.go
@@ -72,8 +72,29 @@ func TestBasics(t *testing.T) {
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 		"100 elems": {
-			filename:       "testdata/100elems.tsv",
-			expectedOutput: []string{"3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"},
+			filename: "testdata/100elems.tsv",
+			expectedOutput: []string{
+				"3", "4", "5", "6", "6",
+				"7", "7", "7", "8", "8",
+				"9", "9", "10", "10", "15",
+				"18", "18", "18", "18", "21",
+				"22", "22", "25", "25", "25",
+				"25", "25", "26", "26", "27",
+				"27", "28", "28", "29", "29",
+				"29", "30", "30", "31", "31",
+				"33", "33", "34", "36", "37",
+				"39", "39", "39", "40", "41",
+				"41", "42", "43", "43", "47",
+				"47", "49", "50", "50", "52",
+				"52", "53", "54", "55", "55",
+				"55", "56", "57", "57", "59",
+				"60", "61", "62", "63", "67",
+				"71", "71", "72", "72", "73",
+				"74", "75", "78", "79", "80",
+				"80", "82", "89", "89", "89",
+				"91", "91", "92", "92", "93",
+				"93", "94", "97", "97", "99",
+			},
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 	}
@@ -227,6 +248,7 @@ func Test100ElemsWithHeaders(t *testing.T) {
 		})
 	}
 }
+
 func Test100ElemsWithHeadersWithDuplicates(t *testing.T) {
 	tcs := map[string]struct {
 		filename       string
@@ -275,7 +297,8 @@ func TestTsvKey(t *testing.T) {
 	}{
 		"Tsv file": {
 			filename: "testdata/multifields.tsv",
-			expectedOutput: []string{"3	D	equipment",
+			expectedOutput: []string{
+				"3	D	equipment",
 				"7	G	inflation",
 				"6	H	delivery",
 				"9	I	child",
@@ -284,7 +307,8 @@ func TestTsvKey(t *testing.T) {
 				"1	N	guidance",
 				"10	S	feedback",
 				"2	T	library",
-				"4	Z	news"},
+				"4	Z	news",
+			},
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 	}
@@ -316,6 +340,7 @@ func TestTsvKey(t *testing.T) {
 		})
 	}
 }
+
 func prepareChunksShuffle(ctx context.Context, t *testing.T, filename, outputFilename string, chunkSize int, mergeSort bool, bufferSize int, withHeaders bool, dropDuplicates, isGzip bool) *file.Info {
 	t.Helper()
 	i := rw.NewInputOutput(ctx)
@@ -346,6 +371,7 @@ func prepareChunksShuffle(ctx context.Context, t *testing.T, filename, outputFil
 
 	return fI
 }
+
 func Test100ElemsShuffle(t *testing.T) {
 	tcs := map[string]struct {
 		filename       string
diff --git a/reader/gzip_separated_values_test.go b/reader/gzip_separated_values_test.go
new file mode 100644
index 0000000..f9bcd31
--- /dev/null
+++ b/reader/gzip_separated_values_test.go
@@ -0,0 +1,48 @@
+package reader_test
+
+import (
+	"bufio"
+	"context"
+	"os"
+	"testing"
+
+	"github.com/askiada/external-sort/internal/rw"
+	"github.com/askiada/external-sort/reader"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func Test(t *testing.T) {
+	f, err := os.Open("/mnt/c/Users/Alex/Downloads/recordings.59.tsv.gz")
+	require.NoError(t, err)
+	r, err := reader.NewGZipSeparatedValues(bufio.NewReader(f), '\t')
+	require.NoError(t, err)
+	count := 0
+	for r.Next() {
+		row, err := r.Read()
+		require.NoError(t, err)
+		_ = row
+		count++
+	}
+	assert.Equal(t, 2853701, count)
+	require.NoError(t, r.Err())
+}
+
+func TestS3(t *testing.T) {
+	ctx := context.Background()
+	i := rw.NewInputOutput(ctx)
+	err := i.SetInputReader(ctx, "s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.59.tsv.gz")
+	require.NoError(t, err)
+
+	r, err := reader.NewGZipSeparatedValues(i.Input, '\t')
+	require.NoError(t, err)
+	count := 0
+	for r.Next() {
+		row, err := r.Read()
+		require.NoError(t, err)
+		_ = row
+		count++
+	}
+	assert.Equal(t, 2853701, count)
+	require.NoError(t, r.Err())
+}
diff --git a/reader/std_scanner.go b/reader/std_scanner.go
index 4c1fb22..c16235e 100644
--- a/reader/std_scanner.go
+++ b/reader/std_scanner.go
@@ -42,9 +42,11 @@ func (s *StdScanner) Next() bool {
 	}
 	return next
 }
+
 func (s *StdScanner) Read() (interface{}, error) {
 	return s.r.Text(), nil
 }
+
 func (s *StdScanner) Err() error {
 	return s.r.Err()
 }
@@ -78,6 +80,7 @@ func (s *StdSliceScanner) Next() bool {
 	}
 	return next
 }
+
 func (s *StdSliceScanner) Read() (interface{}, error) {
 	line := s.r.Text()
 	before, after, found := strings.Cut(line, "##!!##")
@@ -86,6 +89,7 @@ func (s *StdSliceScanner) Read() (interface{}, error) {
 	}
 	return []string{before, after}, nil
 }
+
 func (s *StdSliceScanner) Err() error {
 	return s.r.Err()
 }
diff --git a/vector/key/string_key.go b/vector/key/string_key.go
index d4452e5..f4fec6e 100644
--- a/vector/key/string_key.go
+++ b/vector/key/string_key.go
@@ -29,6 +29,7 @@ func AllocateUpperString(line string) (Key, error) {
 func (k *UpperString) Less(other Key) bool {
 	return k.value < other.(*UpperString).value
 }
+
 func (k *UpperString) Equal(other Key) bool {
 	return k.value == other.(*UpperString).value
 }

From c31ce2dbd60f28727ceca913ec7398499b724d45 Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Mon, 8 May 2023 11:00:24 +0200
Subject: [PATCH 08/16] feat(ci) add labeler

---
 .github/labeler.yml | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 .github/labeler.yml

diff --git a/.github/labeler.yml b/.github/labeler.yml
new file mode 100644
index 0000000..03c280b
--- /dev/null
+++ b/.github/labeler.yml
@@ -0,0 +1,31 @@
+s3:
+  - bucket/*
+  - bucket/**/*
+
+file:
+  - file/*
+  - file/**/*
+
+internal:
+  - internal/*
+  - internal/**/*
+
+reader:
+  - reader/*
+  - reader/**/*
+
+sftp:
+  - sftp/*
+  - sftp/**/*
+
+vector:
+  - vector/*
+  - vector/**/*
+
+writer:
+  - writer/*
+  - writer/**/*
+
+ci:
+  - .github/*
+  - .github/**/*

From b01537e771aeb5fd4e075ebbbefd6eed01f54ea2 Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Mon, 8 May 2023 12:08:30 +0200
Subject: [PATCH 09/16] feat(make) add ci_test

---
 Makefile | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 47dcb1c..5c5cf66 100644
--- a/Makefile
+++ b/Makefile
@@ -33,4 +33,8 @@ build:
 build_docker: ## Build a docker image from current git sha
 	@docker build \
 		--build-arg BUILDKIT_INLINE_CACHE=1 \
-		-t $(docker_image):$(tag) .
\ No newline at end of file
+		-t $(docker_image):$(tag) .
+
+.PHONY: ci_tests
+ci_tests: ## Run tests for CI environment.
+	go test -trimpath --timeout=10m -failfast -v -race -covermode=atomic -coverprofile=coverage.out ./...

From 38d16a37d9ec833a76705402a85a47e679ffa664 Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Mon, 8 May 2023 12:12:39 +0200
Subject: [PATCH 10/16] lint

---
 bucket/s3.go                                  |   2 +
 file/batchingchannels/batching_channel.go     |  12 +-
 .../batchingchannels/batching_channel_test.go |  17 +-
 file/chunk.go                                 |  23 +--
 file/file.go                                  |  14 +-
 file/shuffle.go                               |   5 +-
 file/sort.go                                  |  16 +-
 file/utils.go                                 |   7 +-
 internal/rw/rw.go                             |  33 ++--
 main.go                                       |  22 ++-
 main_test.go                                  | 164 ++++++++++++++++--
 reader/contract.go                            |   5 +
 reader/gzip_separated_values_test.go          |   2 +
 vector/vector.go                              |   2 +-
 writer/contract.go                            |   4 +
 writer/std_writer.go                          |  10 +-
 16 files changed, 256 insertions(+), 82 deletions(-)

diff --git a/bucket/s3.go b/bucket/s3.go
index d6cf190..1b2e3b0 100644
--- a/bucket/s3.go
+++ b/bucket/s3.go
@@ -88,6 +88,7 @@ func (s *seqWriterAt) WriteAt(p []byte, _ int64) (n int, err error) {
 	if s.progressFunc != nil {
 		s.progressFunc(n)
 	}
+
 	return n, errors.Wrap(err, "can't write bytes at offset")
 }
 
@@ -116,5 +117,6 @@ func (s *S3) Download(ctx context.Context, writer io.Writer, filesinfo ...*S3Fil
 			return errors.Wrapf(err, "download failed for bucket %s and key %s", fileinfo.Bucket, fileinfo.Key)
 		}
 	}
+
 	return nil
 }
diff --git a/file/batchingchannels/batching_channel.go b/file/batchingchannels/batching_channel.go
index 24246a8..64be092 100644
--- a/file/batchingchannels/batching_channel.go
+++ b/file/batchingchannels/batching_channel.go
@@ -8,8 +8,8 @@ import (
 	"golang.org/x/sync/errgroup"
 )
 
-// BatchingChannel implements the Channel interface, with the change that instead of producing individual elements
-// on Out(), it batches together the entire internal buffer each time. Trying to construct an unbuffered batching channel
+// BatchingChannel standard channel, with the change that instead of producing individual elements
+// on Out(), it batches together n elements each time. Trying to construct an unbuffered batching channel
 // will panic, that configuration is not supported (and provides no benefit over an unbuffered NativeChannel).
 type BatchingChannel struct {
 	input           chan interface{}
@@ -22,12 +22,12 @@ type BatchingChannel struct {
 	maxWorker       int
 }
 
-func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorker, size int) *BatchingChannel {
+func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorker, size int) (*BatchingChannel, error) {
 	if size == 0 {
-		panic("channels: BatchingChannel does not support unbuffered behaviour")
+		return nil, errors.New("does not support unbuffered behaviour")
 	}
 	if size < 0 {
-		panic("channels: invalid negative size in NewBatchingChannel")
+		return nil, errors.New("does not support negative size")
 	}
 	errGrp, errGrpContext := errgroup.WithContext(ctx)
 	errGrp.SetLimit(maxWorker)
@@ -41,7 +41,7 @@ func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorke
 		internalContext: errGrpContext,
 	}
 	go bChan.batchingBuffer()
-	return bChan
+	return bChan, nil
 }
 
 func (ch *BatchingChannel) In() chan<- interface{} {
diff --git a/file/batchingchannels/batching_channel_test.go b/file/batchingchannels/batching_channel_test.go
index af3cfa4..8b40ad5 100644
--- a/file/batchingchannels/batching_channel_test.go
+++ b/file/batchingchannels/batching_channel_test.go
@@ -12,6 +12,7 @@ import (
 	"github.com/askiada/external-sort/vector/key"
 	"github.com/pkg/errors"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )
 
 type intKey struct {
@@ -27,6 +28,7 @@ func allocateInt(row interface{}) (key.Key, error) {
 	if err != nil {
 		return nil, err
 	}
+
 	return &intKey{num}, nil
 }
 
@@ -84,6 +86,7 @@ func testBatches(t *testing.T, bChan *batchingchannels.BatchingChannel) {
 				got <- val
 			}
 			time.Sleep(3 * time.Millisecond)
+
 			return nil
 		})
 		if err != nil {
@@ -98,20 +101,24 @@ func testBatches(t *testing.T, bChan *batchingchannels.BatchingChannel) {
 
 func TestBatchingChannel(t *testing.T) {
 	allocate := vector.DefaultVector(allocateInt, nil, nil)
-	bChan := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 50)
+	bChan, err := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 50)
+	require.NoError(t, err)
 	testBatches(t, bChan)
 
-	bChan = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 3)
+	bChan, err = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 3)
+	require.NoError(t, err)
 	testBatches(t, bChan)
 
-	bChan = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 1)
+	bChan, err = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 1)
+	require.NoError(t, err)
 	testChannelConcurrentAccessors(t, bChan)
 }
 
 func TestBatchingChannelCap(t *testing.T) {
 	allocate := vector.DefaultVector(allocateInt, nil, nil)
-	ch := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 5)
-	if ch.Cap() != 5 {
+	bChan, err := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 5)
+	require.NoError(t, err)
+	if bChan.Cap() != 5 {
 		t.Error("incorrect capacity on infinite channel")
 	}
 }
diff --git a/file/chunk.go b/file/chunk.go
index accc16a..2345ea1 100644
--- a/file/chunk.go
+++ b/file/chunk.go
@@ -2,6 +2,7 @@ package file
 
 import (
 	"os"
+	"path/filepath"
 	"sort"
 
 	"github.com/askiada/external-sort/reader"
@@ -47,28 +48,28 @@ type chunks struct {
 
 // new Create a new chunk and initialise it.
 func (c *chunks) new(chunkPath string, allocate *vector.Allocate, size int, withHeader bool) error {
-	f, err := os.Open(chunkPath)
+	chunkFile, err := os.Open(filepath.Clean(chunkPath))
 	if err != nil {
-		return err
+		return errors.Wrap(err, "can't open chunk file")
 	}
-	reader, err := allocate.FnReader(f)
+	rder, err := allocate.FnReader(chunkFile)
 	if err != nil {
-		return err
+		return errors.Wrap(err, "can't read chunk file")
 	}
 
 	if withHeader {
-		reader.Next()
+		rder.Next()
 	}
 
 	elem := &chunkInfo{
 		filename: chunkPath,
-		file:     f,
-		reader:   reader,
+		file:     chunkFile,
+		reader:   rder,
 		buffer:   allocate.Vector(size, allocate.Key),
 	}
 	err = elem.pullSubset(size)
 	if err != nil {
-		return err
+		return errors.Wrap(err, "can't pull chunk subset")
 	}
 	c.list = append(c.list, elem)
 	return nil
@@ -79,7 +80,7 @@ func (c *chunks) close() error {
 	for _, chunk := range c.list {
 		err := chunk.file.Close()
 		if err != nil {
-			return errors.Wrap(err, "close")
+			return errors.Wrapf(err, "can't close chunk file %s", chunk.filename)
 		}
 	}
 	return nil
@@ -92,11 +93,11 @@ func (c *chunks) shrink(toShrink []int) error {
 		shrinkIndex -= i
 		err := c.list[shrinkIndex].file.Close()
 		if err != nil {
-			return err
+			return errors.Wrapf(err, "can't close chunk file %s", c.list[shrinkIndex].filename)
 		}
 		err = os.Remove(c.list[shrinkIndex].filename)
 		if err != nil {
-			return err
+			return errors.Wrapf(err, "can't remove chunk file %s", c.list[shrinkIndex].filename)
 		}
 		// we want to preserve order
 		c.list = append(c.list[:shrinkIndex], c.list[shrinkIndex+1:]...)
diff --git a/file/file.go b/file/file.go
index 11f9906..15a5164 100644
--- a/file/file.go
+++ b/file/file.go
@@ -49,12 +49,15 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 	if err != nil {
 		return nil, errors.Wrap(err, "can't get input reader")
 	}
-	count_rows := 0
+	countRows := 0
 	chunkPaths := []string{}
 
 	mu := sync.Mutex{}
 
-	batchChan := batchingchannels.NewBatchingChannel(ctx, f.Allocate, maxWorkers, dumpSize)
+	batchChan, err := batchingchannels.NewBatchingChannel(ctx, f.Allocate, maxWorkers, dumpSize)
+	if err != nil {
+		return nil, errors.Wrap(err, "can't create new batching channel")
+	}
 	batchChan.G.Go(func() error {
 		for inputReader.Next() {
 			if f.PrintMemUsage {
@@ -69,7 +72,7 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 			} else {
 				batchChan.In() <- row
 			}
-			count_rows++
+			countRows++
 		}
 		batchChan.Close()
 		if inputReader.Err() != nil {
@@ -87,10 +90,13 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 		mu.Unlock()
 		v.Sort()
 		if f.WithHeader {
+			mu.Lock()
 			err = v.PushFrontNoKey(f.headers)
 			if err != nil {
+				mu.Unlock()
 				return err
 			}
+			mu.Unlock()
 		}
 		err := f.Allocate.Dump(v, chunkPath)
 		if err != nil {
@@ -104,6 +110,6 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 	if err != nil {
 		return nil, errors.Wrap(err, "can't process batching channel")
 	}
-	f.totalRows = count_rows
+	f.totalRows = countRows
 	return chunkPaths, nil
 }
diff --git a/file/shuffle.go b/file/shuffle.go
index 643a971..61d5125 100644
--- a/file/shuffle.go
+++ b/file/shuffle.go
@@ -56,7 +56,10 @@ func (f *Info) Shuffle(ctx context.Context, chunkFolder string, dumpSize, maxWor
 
 	mu := sync.Mutex{}
 	r := rand.New(rand.NewSource(seed))
-	batchChan := batchingchannels.NewBatchingChannel(ctx, f.Allocate, maxWorkers, dumpSize)
+	batchChan, err := batchingchannels.NewBatchingChannel(ctx, f.Allocate, maxWorkers, dumpSize)
+	if err != nil {
+		return nil, errors.Wrap(err, "can't create new batching channel")
+	}
 	batchChan.G.Go(func() error {
 		for inputReader.Next() {
 			if f.PrintMemUsage {
diff --git a/file/sort.go b/file/sort.go
index 60a2eb1..aa2e0fb 100644
--- a/file/sort.go
+++ b/file/sort.go
@@ -3,10 +3,12 @@ package file
 import (
 	"fmt"
 	"runtime"
+	"strings"
 
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/writer"
 	"github.com/cheggaaa/pb/v3"
+	"github.com/pkg/errors"
 )
 
 type MemUsage struct {
@@ -28,10 +30,12 @@ func (mu *MemUsage) Collect() {
 	mu.NumGc = m.NumGC
 }
 
-func (mu *MemUsage) PrintMemUsage() {
-	fmt.Printf("Max Alloc = %v MiB", bToMb(mu.MaxAlloc))
-	fmt.Printf("\tMax Sys = %v MiB", bToMb(mu.MaxSys))
-	fmt.Printf("\tNumGC = %v\n", mu.NumGc)
+func (mu *MemUsage) String() string {
+	builder := strings.Builder{}
+	builder.WriteString(fmt.Sprintf("Max Alloc = %v MiB", bToMb(mu.MaxAlloc)))
+	builder.WriteString(fmt.Sprintf(" Max Sys = %v MiB", bToMb(mu.MaxSys)))
+	builder.WriteString(fmt.Sprintf(" NumGC = %v\n", mu.NumGc))
+	return builder.String()
 }
 
 func bToMb(b uint64) uint64 {
@@ -115,7 +119,7 @@ func (f *Info) MergeSort(chunkPaths []string, k int, dropDuplicates bool) (err e
 	}
 	bar.Finish()
 	if f.PrintMemUsage {
-		f.mu.PrintMemUsage()
+		logger.Debugln(f.mu.String())
 	}
 	return chunks.close()
 }
@@ -124,7 +128,7 @@ func WriteBuffer(w writer.Writer, rows vector.Vector) error {
 	for i := 0; i < rows.Len(); i++ {
 		err := w.Write(rows.Get(i).Row)
 		if err != nil {
-			return err
+			return errors.Wrap(err, "can't write buffer")
 		}
 	}
 	rows.Reset()
diff --git a/file/utils.go b/file/utils.go
index 5b56ba9..9746804 100644
--- a/file/utils.go
+++ b/file/utils.go
@@ -10,14 +10,13 @@ import (
 
 // clearChunkFolder Remove all files from a folder.
 func clearChunkFolder(folder string) error {
-	fn := "clear folder"
 	err := os.MkdirAll(folder, os.ModePerm)
 	if err != nil {
-		return errors.Wrap(err, fn)
+		return errors.Wrap(err, "can't create folder")
 	}
 	dir, err := os.ReadDir(folder)
 	if err != nil {
-		return errors.Wrap(err, fn)
+		return errors.Wrap(err, "can't read chunk folder")
 	}
 	for _, d := range dir {
 		if !strings.HasPrefix(d.Name(), "chunk") {
@@ -25,7 +24,7 @@ func clearChunkFolder(folder string) error {
 		}
 		err = os.RemoveAll(path.Join(folder, d.Name()))
 		if err != nil {
-			return errors.Wrap(err, fn)
+			return errors.Wrap(err, "can't clear chunk folder")
 		}
 	}
 	return nil
diff --git a/internal/rw/rw.go b/internal/rw/rw.go
index 2fb4fab..183af77 100644
--- a/internal/rw/rw.go
+++ b/internal/rw/rw.go
@@ -5,6 +5,7 @@ import (
 	"io"
 	"net/url"
 	"os"
+	"path/filepath"
 	"strings"
 
 	"github.com/askiada/external-sort/bucket"
@@ -20,28 +21,28 @@ import (
 var logger = logrus.StandardLogger()
 
 type InputOutput struct {
-	s3Client   bucket.S3ClientAPI
-	Input      io.Reader
-	inputPipe  *io.PipeReader
-	Output     io.Writer
-	outputPipe *io.PipeWriter
-	g          *errgroup.Group
-	dCtx       context.Context
+	s3Client    bucket.S3ClientAPI
+	Input       io.Reader
+	inputPipe   *io.PipeReader
+	Output      io.Writer
+	outputPipe  *io.PipeWriter
+	g           *errgroup.Group
+	internalCtx context.Context //nolint //containedcontext
 }
 
 func NewInputOutput(ctx context.Context) *InputOutput {
 	g, dCtx := errgroup.WithContext(ctx)
 	return &InputOutput{
-		g:    g,
-		dCtx: dCtx,
+		g:           g,
+		internalCtx: dCtx,
 	}
 }
 
-func (i *InputOutput) s3Check() error {
+func (i *InputOutput) s3Check(ctx context.Context) error {
 	if i.s3Client != nil {
 		return nil
 	}
-	cfg, err := config.LoadDefaultConfig(context.Background(),
+	cfg, err := config.LoadDefaultConfig(ctx,
 		config.WithRegion(internal.S3Region),
 		config.WithRetryMaxAttempts(internal.S3RetryMaxAttempts),
 	)
@@ -54,7 +55,7 @@ func (i *InputOutput) s3Check() error {
 
 func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string) (err error) {
 	if strings.HasPrefix(inputFiles[0], "s3") || strings.HasPrefix(inputFiles[0], "S3") {
-		err = i.s3Check()
+		err = i.s3Check(ctx)
 		if err != nil {
 			return errors.Wrap(err, "can't check s3")
 		}
@@ -82,7 +83,7 @@ func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string)
 		i.inputPipe = pr
 		i.g.Go(func() error {
 			defer pw.Close() //nolint:errcheck //no need to check this error
-			err := s3Api.Download(i.dCtx, pw, files...)
+			err := s3Api.Download(i.internalCtx, pw, files...)
 			if err != nil {
 				return errors.Wrap(err, "can't download files")
 			}
@@ -104,7 +105,7 @@ func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string)
 
 func (i *InputOutput) SetOutputWriter(ctx context.Context, outputFile string) (err error) {
 	if strings.HasPrefix(outputFile, "s3") || strings.HasPrefix(outputFile, "S3") {
-		err = i.s3Check()
+		err = i.s3Check(ctx)
 		if err != nil {
 			return errors.Wrap(err, "can't check s3")
 		}
@@ -125,14 +126,14 @@ func (i *InputOutput) SetOutputWriter(ctx context.Context, outputFile string) (e
 		i.outputPipe = pw
 		i.g.Go(func() error {
 			defer pr.Close() //nolint:errcheck //no need to check this error
-			err := s3Api.Upload(i.dCtx, pr, u.Host, u.Path)
+			err := s3Api.Upload(i.internalCtx, pr, u.Host, u.Path)
 			if err != nil {
 				return errors.Wrapf(err, "can't upload file %s", outputFile)
 			}
 			return nil
 		})
 	} else {
-		i.Output, err = os.Create(outputFile)
+		i.Output, err = os.Create(filepath.Clean(outputFile))
 		if err != nil {
 			return errors.Wrapf(err, "can't create file %s", outputFile)
 		}
diff --git a/main.go b/main.go
index 687b35a..19d725b 100644
--- a/main.go
+++ b/main.go
@@ -36,12 +36,18 @@ func newCommand() *command {
 		sortCmd: &cobra.Command{
 			Use:   "sort",
 			Short: "Perform an external sorting on an input file",
-			RunE:  sortRun,
+			PreRun: func(cmd *cobra.Command, args []string) {
+				cmd.SetContext(context.WithValue(cmd.Parent().Context(), "cmd", "sort"))
+			},
+			RunE: sortRun,
 		},
 		shuffleCmd: &cobra.Command{
 			Use:   "shuffle",
-			Short: "Perform an external sorting on an input file",
-			RunE:  shuffleRun,
+			Short: "Perform an external shufflin on an input file",
+			PreRun: func(cmd *cobra.Command, args []string) {
+				cmd.SetContext(context.WithValue(cmd.Parent().Context(), "cmd", "shuffle"))
+			},
+			RunE: shuffleRun,
 		},
 	}
 	root.rootCmd.PersistentFlags().BoolVarP(&internal.WithHeader, internal.WithHeaderName, "e", viper.GetBool(internal.WithHeaderName), "Input file has headers.")
@@ -65,7 +71,8 @@ func newCommand() *command {
 
 func main() {
 	root := newCommand()
-	cobra.CheckErr(root.rootCmd.Execute())
+	ctx := context.Background()
+	cobra.CheckErr(root.rootCmd.ExecuteContext(ctx))
 }
 
 func sortRun(cmd *cobra.Command, args []string) error {
@@ -76,13 +83,12 @@ func sortRun(cmd *cobra.Command, args []string) error {
 	logger.Infoln("TSV Fields", internal.TsvFields)
 
 	start := time.Now()
-	ctx := context.Background()
-	i := rw.NewInputOutput(ctx)
-	err := i.SetInputReader(ctx, internal.InputFiles...)
+	i := rw.NewInputOutput(cmd.Context())
+	err := i.SetInputReader(cmd.Context(), internal.InputFiles...)
 	if err != nil {
 		return err
 	}
-	err = i.SetOutputWriter(ctx, internal.OutputFile)
+	err = i.SetOutputWriter(cmd.Context(), internal.OutputFile)
 	if err != nil {
 		return err
 	}
diff --git a/main_test.go b/main_test.go
index d369427..133be1b 100644
--- a/main_test.go
+++ b/main_test.go
@@ -25,7 +25,7 @@ func prepareChunks(ctx context.Context, t *testing.T, allocate *vector.Allocate,
 	i := rw.NewInputOutput(ctx)
 	err := i.SetInputReader(ctx, filename)
 	assert.NoError(t, err)
-	err = i.SetOutputWriter(ctx, "testdata/chunks/output.tsv")
+	err = i.SetOutputWriter(ctx, outputFilename)
 	assert.NoError(t, err)
 	fI := &file.Info{
 		InputReader: i.Input,
@@ -140,8 +140,29 @@ func Test100Elems(t *testing.T) {
 		expectedOutput []string
 	}{
 		"100 elems": {
-			filename:       "testdata/100elems.tsv",
-			expectedOutput: []string{"3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"},
+			filename: "testdata/100elems.tsv",
+			expectedOutput: []string{
+				"3", "4", "5", "6", "6",
+				"7", "7", "7", "8", "8",
+				"9", "9", "10", "10", "15",
+				"18", "18", "18", "18", "21",
+				"22", "22", "25", "25", "25",
+				"25", "25", "26", "26", "27",
+				"27", "28", "28", "29", "29",
+				"29", "30", "30", "31", "31",
+				"33", "33", "34", "36", "37",
+				"39", "39", "39", "40", "41",
+				"41", "42", "43", "43", "47",
+				"47", "49", "50", "50", "52",
+				"52", "53", "54", "55", "55",
+				"55", "56", "57", "57", "59",
+				"60", "61", "62", "63", "67",
+				"71", "71", "72", "72", "73",
+				"74", "75", "78", "79", "80",
+				"80", "82", "89", "89", "89",
+				"91", "91", "92", "92", "93",
+				"93", "94", "97", "97", "99",
+			},
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 	}
@@ -179,8 +200,21 @@ func Test100ElemsWithDuplicates(t *testing.T) {
 		expectedOutput []string
 	}{
 		"100 elems with duplicates": {
-			filename:       "testdata/100elems.tsv",
-			expectedOutput: []string{"3", "4", "5", "6", "7", "8", "9", "10", "15", "18", "21", "22", "25", "26", "27", "28", "29", "30", "31", "33", "34", "36", "37", "39", "40", "41", "42", "43", "47", "49", "50", "52", "53", "54", "55", "56", "57", "59", "60", "61", "62", "63", "67", "71", "72", "73", "74", "75", "78", "79", "80", "82", "89", "91", "92", "93", "94", "97", "99"},
+			filename: "testdata/100elems.tsv",
+			expectedOutput: []string{
+				"3", "4", "5", "6", "7",
+				"8", "9", "10", "15", "18",
+				"21", "22", "25", "26", "27",
+				"28", "29", "30", "31", "33",
+				"34", "36", "37", "39", "40",
+				"41", "42", "43", "47", "49",
+				"50", "52", "53", "54", "55",
+				"56", "57", "59", "60", "61",
+				"62", "63", "67", "71", "72",
+				"73", "74", "75", "78", "79",
+				"80", "82", "89", "91", "92",
+				"93", "94", "97", "99",
+			},
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 	}
@@ -218,8 +252,29 @@ func Test100ElemsWithHeaders(t *testing.T) {
 		expectedOutput []string
 	}{
 		"100 elems with headers": {
-			filename:       "testdata/100elemsWithHeaders.tsv",
-			expectedOutput: []string{"headers", "3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"},
+			filename: "testdata/100elemsWithHeaders.tsv",
+			expectedOutput: []string{
+				"headers", "3", "4", "5", "6", "6",
+				"7", "7", "7", "8", "8",
+				"9", "9", "10", "10", "15",
+				"18", "18", "18", "18", "21",
+				"22", "22", "25", "25", "25",
+				"25", "25", "26", "26", "27",
+				"27", "28", "28", "29", "29",
+				"29", "30", "30", "31", "31",
+				"33", "33", "34", "36", "37",
+				"39", "39", "39", "40", "41",
+				"41", "42", "43", "43", "47",
+				"47", "49", "50", "50", "52",
+				"52", "53", "54", "55", "55",
+				"55", "56", "57", "57", "59",
+				"60", "61", "62", "63", "67",
+				"71", "71", "72", "72", "73",
+				"74", "75", "78", "79", "80",
+				"80", "82", "89", "89", "89",
+				"91", "91", "92", "92", "93",
+				"93", "94", "97", "97", "99",
+			},
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 	}
@@ -257,8 +312,21 @@ func Test100ElemsWithHeadersWithDuplicates(t *testing.T) {
 		expectedOutput []string
 	}{
 		"100 elems with headers and duplicates": {
-			filename:       "testdata/100elemsWithHeaders.tsv",
-			expectedOutput: []string{"headers", "3", "4", "5", "6", "7", "8", "9", "10", "15", "18", "21", "22", "25", "26", "27", "28", "29", "30", "31", "33", "34", "36", "37", "39", "40", "41", "42", "43", "47", "49", "50", "52", "53", "54", "55", "56", "57", "59", "60", "61", "62", "63", "67", "71", "72", "73", "74", "75", "78", "79", "80", "82", "89", "91", "92", "93", "94", "97", "99"},
+			filename: "testdata/100elemsWithHeaders.tsv",
+			expectedOutput: []string{
+				"headers", "3", "4", "5", "6", "7",
+				"8", "9", "10", "15", "18",
+				"21", "22", "25", "26", "27",
+				"28", "29", "30", "31", "33",
+				"34", "36", "37", "39", "40",
+				"41", "42", "43", "47", "49",
+				"50", "52", "53", "54", "55",
+				"56", "57", "59", "60", "61",
+				"62", "63", "67", "71", "72",
+				"73", "74", "75", "78", "79",
+				"80", "82", "89", "91", "92",
+				"93", "94", "97", "99",
+			},
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 	}
@@ -373,6 +441,7 @@ func prepareChunksShuffle(ctx context.Context, t *testing.T, filename, outputFil
 }
 
 func Test100ElemsShuffle(t *testing.T) {
+	t.Skip("to rework")
 	tcs := map[string]struct {
 		filename       string
 		outputFilename string
@@ -380,8 +449,29 @@ func Test100ElemsShuffle(t *testing.T) {
 		expectedOutput []string
 	}{
 		"100 elems": {
-			filename:       "testdata/100elems.tsv",
-			expectedOutput: []string{"3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"},
+			filename: "testdata/100elems.tsv",
+			expectedOutput: []string{
+				"3", "4", "5", "6", "6",
+				"7", "7", "7", "8", "8",
+				"9", "9", "10", "10", "15",
+				"18", "18", "18", "18", "21",
+				"22", "22", "25", "25", "25",
+				"25", "25", "26", "26", "27",
+				"27", "28", "28", "29", "29",
+				"29", "30", "30", "31", "31",
+				"33", "33", "34", "36", "37",
+				"39", "39", "39", "40", "41",
+				"41", "42", "43", "43", "47",
+				"47", "49", "50", "50", "52",
+				"52", "53", "54", "55", "55",
+				"55", "56", "57", "57", "59",
+				"60", "61", "62", "63", "67",
+				"71", "71", "72", "72", "73",
+				"74", "75", "78", "79", "80",
+				"80", "82", "89", "89", "89",
+				"91", "91", "92", "92", "93",
+				"93", "94", "97", "97", "99",
+			},
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 	}
@@ -411,6 +501,7 @@ func Test100ElemsShuffle(t *testing.T) {
 }
 
 func Test100ElemsShuffleWithHeaders(t *testing.T) {
+	t.Skip("to rework")
 	tcs := map[string]struct {
 		filename       string
 		outputFilename string
@@ -418,8 +509,29 @@ func Test100ElemsShuffleWithHeaders(t *testing.T) {
 		expectedOutput []string
 	}{
 		"100 elems with headers": {
-			filename:       "testdata/100elemsWithHeaders.tsv",
-			expectedOutput: []string{"headers", "3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"},
+			filename: "testdata/100elemsWithHeaders.tsv",
+			expectedOutput: []string{
+				"headers", "3", "4", "5", "6", "6",
+				"7", "7", "7", "8", "8",
+				"9", "9", "10", "10", "15",
+				"18", "18", "18", "18", "21",
+				"22", "22", "25", "25", "25",
+				"25", "25", "26", "26", "27",
+				"27", "28", "28", "29", "29",
+				"29", "30", "30", "31", "31",
+				"33", "33", "34", "36", "37",
+				"39", "39", "39", "40", "41",
+				"41", "42", "43", "43", "47",
+				"47", "49", "50", "50", "52",
+				"52", "53", "54", "55", "55",
+				"55", "56", "57", "57", "59",
+				"60", "61", "62", "63", "67",
+				"71", "71", "72", "72", "73",
+				"74", "75", "78", "79", "80",
+				"80", "82", "89", "89", "89",
+				"91", "91", "92", "92", "93",
+				"93", "94", "97", "97", "99",
+			},
 			outputFilename: "testdata/chunks/output.tsv",
 		},
 	}
@@ -449,6 +561,7 @@ func Test100ElemsShuffleWithHeaders(t *testing.T) {
 }
 
 func Test100ElemsShuffleGzip(t *testing.T) {
+	t.Skip("to rework")
 	tcs := map[string]struct {
 		filename       string
 		outputFilename string
@@ -456,8 +569,29 @@ func Test100ElemsShuffleGzip(t *testing.T) {
 		expectedOutput []string
 	}{
 		"100 elems with headers": {
-			filename:       "testdata/100elems.tsv.gz",
-			expectedOutput: []string{"headers", "3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"},
+			filename: "testdata/100elems.tsv.gz",
+			expectedOutput: []string{
+				"headers", "3", "4", "5", "6", "6",
+				"7", "7", "7", "8", "8",
+				"9", "9", "10", "10", "15",
+				"18", "18", "18", "18", "21",
+				"22", "22", "25", "25", "25",
+				"25", "25", "26", "26", "27",
+				"27", "28", "28", "29", "29",
+				"29", "30", "30", "31", "31",
+				"33", "33", "34", "36", "37",
+				"39", "39", "39", "40", "41",
+				"41", "42", "43", "43", "47",
+				"47", "49", "50", "50", "52",
+				"52", "53", "54", "55", "55",
+				"55", "56", "57", "57", "59",
+				"60", "61", "62", "63", "67",
+				"71", "71", "72", "72", "73",
+				"74", "75", "78", "79", "80",
+				"80", "82", "89", "89", "89",
+				"91", "91", "92", "92", "93",
+				"93", "94", "97", "97", "99",
+			},
 			outputFilename: "testdata/chunks/output.tsv.gz",
 		},
 	}
diff --git a/reader/contract.go b/reader/contract.go
index 3589f3f..a50988f 100644
--- a/reader/contract.go
+++ b/reader/contract.go
@@ -1,7 +1,12 @@
 package reader
 
+import (
+	"io"
+)
+
 type Reader interface {
 	Next() bool
 	Read() (interface{}, error)
 	Err() error
 }
+type Config func(r io.Reader) (Reader, error)
diff --git a/reader/gzip_separated_values_test.go b/reader/gzip_separated_values_test.go
index f9bcd31..e8e48e0 100644
--- a/reader/gzip_separated_values_test.go
+++ b/reader/gzip_separated_values_test.go
@@ -13,6 +13,7 @@ import (
 )
 
 func Test(t *testing.T) {
+	t.Skip("to rework")
 	f, err := os.Open("/mnt/c/Users/Alex/Downloads/recordings.59.tsv.gz")
 	require.NoError(t, err)
 	r, err := reader.NewGZipSeparatedValues(bufio.NewReader(f), '\t')
@@ -29,6 +30,7 @@ func Test(t *testing.T) {
 }
 
 func TestS3(t *testing.T) {
+	t.Skip("to rework")
 	ctx := context.Background()
 	i := rw.NewInputOutput(ctx)
 	err := i.SetInputReader(ctx, "s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.59.tsv.gz")
diff --git a/vector/vector.go b/vector/vector.go
index 8b4d803..d49cd80 100644
--- a/vector/vector.go
+++ b/vector/vector.go
@@ -17,7 +17,7 @@ type Allocate struct {
 	Key      func(elem interface{}) (key.Key, error)
 }
 
-func DefaultVector(allocateKey func(elem interface{}) (key.Key, error), fnReader func(r io.Reader) (reader.Reader, error), fnWr func(w io.Writer) (writer.Writer, error)) *Allocate {
+func DefaultVector(allocateKey func(elem interface{}) (key.Key, error), fnReader reader.Config, fnWr writer.Config) *Allocate {
 	return &Allocate{
 		FnReader: fnReader,
 		FnWriter: fnWr,
diff --git a/writer/contract.go b/writer/contract.go
index 30a3310..78b4793 100644
--- a/writer/contract.go
+++ b/writer/contract.go
@@ -1,6 +1,10 @@
 package writer
 
+import "io"
+
 type Writer interface {
 	Write(interface{}) error
 	Close() error
 }
+
+type Config func(w io.Writer) (Writer, error)
diff --git a/writer/std_writer.go b/writer/std_writer.go
index 2f36187..c6cc8e8 100644
--- a/writer/std_writer.go
+++ b/writer/std_writer.go
@@ -48,17 +48,17 @@ type StdSliceWriter struct {
 
 func NewStdSliceWriter(w io.Writer, skipFirst, isGzip bool) Writer {
 	var newR *bufio.Writer
-	s := &StdSliceWriter{
+	ssw := &StdSliceWriter{
 		skipFirst: skipFirst,
 	}
 	if isGzip {
-		s.gw = gzip.NewWriter(w)
-		newR = bufio.NewWriter(s.gw)
+		ssw.gw = gzip.NewWriter(w)
+		newR = bufio.NewWriter(ssw.gw)
 	} else {
 		newR = bufio.NewWriter(w)
 	}
-	s.w = newR
-	return s
+	ssw.w = newR
+	return ssw
 }
 
 func (w *StdSliceWriter) Write(elem interface{}) error {

From 5cb0e8f37002c8db5298921903ae99b6a0c7b26b Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Mon, 8 May 2023 12:18:25 +0200
Subject: [PATCH 11/16] feat(ci) add basic workflow

---
 .github/workflows/integration.yml | 61 +++++++++++++++++++++++++++++++
 testdata/chunks/output.tsv        |  0
 2 files changed, 61 insertions(+)
 create mode 100644 .github/workflows/integration.yml
 create mode 100644 testdata/chunks/output.tsv

diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
new file mode 100644
index 0000000..b8724c4
--- /dev/null
+++ b/.github/workflows/integration.yml
@@ -0,0 +1,61 @@
+name: Continues Integration
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches:
+      - master
+      - feature/*
+      - bugfix/*
+      - refactor/*
+      - chore/*
+
+jobs:
+  label:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+
+    steps:
+      - uses: actions/labeler@v3
+
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+
+      - name: Set up Go
+        uses: actions/setup-go@v4
+        with:
+          go-version: '>=1.20.0'
+
+      - uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cache/go-build
+            ~/go/pkg/mod
+          key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
+          restore-keys: |
+            ${{ runner.os }}-go-
+
+      - name: Running Tests
+        run: chmod -R +rw ./testdata && make ci_tests
+
+  lint:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: golangci-lint
+        uses: golangci/golangci-lint-action@v2
+        with:
+          version: v1.52.x
+          args: --timeout 5m0s
diff --git a/testdata/chunks/output.tsv b/testdata/chunks/output.tsv
new file mode 100644
index 0000000..e69de29

From 4130571543866e2438f22c92eccfcc81a0cde624 Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Mon, 8 May 2023 13:13:15 +0200
Subject: [PATCH 12/16] lint

---
 .gitignore                                |   2 +-
 file/batchingchannels/batching_channel.go |   9 ++
 file/file.go                              | 166 +++++++++++++---------
 file/shuffle.go                           |   4 +-
 file/sort.go                              |  66 +++++----
 internal/progress/contract.go             |   2 +-
 internal/rw/rw.go                         |   4 +-
 main.go                                   |  85 +++++++++--
 main_test.go                              |  17 ---
 sftp/sftp.go                              |   3 +-
 vector/key/int_key.go                     |  12 +-
 writer/std_writer.go                      |   4 +-
 12 files changed, 239 insertions(+), 135 deletions(-)

diff --git a/.gitignore b/.gitignore
index 51c59c4..7cc7ba1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
 bench*
 gen*
 chunk_*.tsv
-bin/
\ No newline at end of file
+bin/
diff --git a/file/batchingchannels/batching_channel.go b/file/batchingchannels/batching_channel.go
index 64be092..7f54f16 100644
--- a/file/batchingchannels/batching_channel.go
+++ b/file/batchingchannels/batching_channel.go
@@ -22,6 +22,7 @@ type BatchingChannel struct {
 	maxWorker       int
 }
 
+// NewBatchingChannel create a batching channel.
 func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorker, size int) (*BatchingChannel, error) {
 	if size == 0 {
 		return nil, errors.New("does not support unbuffered behaviour")
@@ -41,9 +42,11 @@ func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorke
 		internalContext: errGrpContext,
 	}
 	go bChan.batchingBuffer()
+
 	return bChan, nil
 }
 
+// In add element to input channel.
 func (ch *BatchingChannel) In() chan<- interface{} {
 	return ch.input
 }
@@ -55,6 +58,7 @@ func (ch *BatchingChannel) Out() <-chan vector.Vector {
 	return ch.output
 }
 
+// ProcessOut process specified function on each batch.
 func (ch *BatchingChannel) ProcessOut(f func(vector.Vector) error) error {
 	for val := range ch.Out() {
 		val := val
@@ -69,18 +73,23 @@ func (ch *BatchingChannel) ProcessOut(f func(vector.Vector) error) error {
 	return nil
 }
 
+// Len return the maximum number of elements in a batch.
 func (ch *BatchingChannel) Len() int {
 	return ch.size
 }
 
+// Cap return the maximum capacity of a batch.
 func (ch *BatchingChannel) Cap() int {
 	return ch.size
 }
 
+// Close close the input channel.
 func (ch *BatchingChannel) Close() {
 	close(ch.input)
 }
 
+// batchingBuffer add input element to the next batch available.
+// When the batch reach maximum size or the input channel is closed, it is passed to the output channel.
 func (ch *BatchingChannel) batchingBuffer() {
 	ch.buffer = ch.allocate.Vector(ch.size, ch.allocate.Key)
 	for {
diff --git a/file/file.go b/file/file.go
index 15a5164..aa8f894 100644
--- a/file/file.go
+++ b/file/file.go
@@ -8,6 +8,7 @@ import (
 	"sync"
 
 	"github.com/askiada/external-sort/file/batchingchannels"
+	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/writer"
 	"github.com/sirupsen/logrus"
@@ -17,99 +18,130 @@ import (
 
 var logger = logrus.StandardLogger()
 
+// Info set all parameters to process a file with chunks.
 type Info struct {
-	mu            *MemUsage
-	Allocate      *vector.Allocate
-	InputReader   io.Reader
-	OutputFile    io.Writer
-	outputWriter  writer.Writer
+	mu           *memUsage
+	Allocate     *vector.Allocate
+	InputReader  io.Reader
+	OutputFile   io.Writer
+	outputWriter writer.Writer
+
+	headers       interface{}
+	chunkPaths    []string
+	localMutex    sync.Mutex
 	totalRows     int
+	chunkIndex    int
 	PrintMemUsage bool
 	WithHeader    bool
-	headers       interface{}
 }
 
-// CreateSortedChunks Scan a file and divide it into small sorted chunks.
-// Store all the chunks in a folder an returns all the paths.
-func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpSize, maxWorkers int) ([]string, error) {
+func (f *Info) check(dumpSize int) error {
+	f.chunkIndex = 0
+	f.chunkPaths = []string{}
 	if dumpSize <= 0 {
-		return nil, errors.New("dump size must be greater than 0")
+		return errors.New("dump size must be greater than 0")
 	}
+	return nil
+}
 
-	if f.PrintMemUsage && f.mu == nil {
-		f.mu = &MemUsage{}
+func (f *Info) processInputReader(batchChan *batchingchannels.BatchingChannel, inputReader reader.Reader) error {
+	for inputReader.Next() {
+		if f.PrintMemUsage {
+			f.mu.Collect()
+		}
+		row, err := inputReader.Read()
+		if err != nil {
+			return errors.Wrap(err, "can't read from input reader")
+		}
+		if f.WithHeader && f.headers == nil {
+			f.headers = row
+		} else {
+			batchChan.In() <- row
+		}
+		f.totalRows++
 	}
-
-	err := clearChunkFolder(chunkFolder)
-	if err != nil {
-		return nil, errors.Wrap(err, "can't clear chunk folder")
+	batchChan.Close()
+	if inputReader.Err() != nil {
+		return errors.Wrap(inputReader.Err(), "input reader encountered an error")
 	}
+	return nil
+}
 
-	inputReader, err := f.Allocate.FnReader(f.InputReader)
+func (f *Info) processBatch(vec vector.Vector, chunkFolder string) error {
+	f.localMutex.Lock()
+	f.chunkIndex++
+	chunkPath := path.Join(chunkFolder, "chunk_"+strconv.Itoa(f.chunkIndex)+".tsv")
+	logger.Infoln("Created chunk", chunkPath)
+	f.localMutex.Unlock()
+	vec.Sort()
+	if f.WithHeader {
+		f.localMutex.Lock()
+		err := vec.PushFrontNoKey(f.headers)
+		if err != nil {
+			f.localMutex.Unlock()
+			return err
+		}
+		f.localMutex.Unlock()
+	}
+	err := f.Allocate.Dump(vec, chunkPath)
 	if err != nil {
-		return nil, errors.Wrap(err, "can't get input reader")
+		return errors.Wrapf(err, "can't dump chunk %s", chunkPath)
 	}
-	countRows := 0
-	chunkPaths := []string{}
-
-	mu := sync.Mutex{}
+	f.localMutex.Lock()
+	f.chunkPaths = append(f.chunkPaths, chunkPath)
+	f.localMutex.Unlock()
+	return nil
+}
 
+func (f *Info) runBatchingChannel(
+	ctx context.Context,
+	inputReader reader.Reader,
+	chunkFolder string,
+	dumpSize,
+	maxWorkers int,
+) ([]string, error) {
 	batchChan, err := batchingchannels.NewBatchingChannel(ctx, f.Allocate, maxWorkers, dumpSize)
 	if err != nil {
 		return nil, errors.Wrap(err, "can't create new batching channel")
 	}
-	batchChan.G.Go(func() error {
-		for inputReader.Next() {
-			if f.PrintMemUsage {
-				f.mu.Collect()
-			}
-			row, err := inputReader.Read()
-			if err != nil {
-				return errors.Wrap(err, "can't read from input reader")
-			}
-			if f.WithHeader && f.headers == nil {
-				f.headers = row
-			} else {
-				batchChan.In() <- row
-			}
-			countRows++
-		}
-		batchChan.Close()
-		if inputReader.Err() != nil {
-			return errors.Wrap(inputReader.Err(), "input reader encountered an error")
-		}
-		return nil
-	})
+	batchChan.G.Go(func() error { return f.processInputReader(batchChan, inputReader) })
 
-	chunkIdx := 0
-	err = batchChan.ProcessOut(func(v vector.Vector) error {
-		mu.Lock()
-		chunkIdx++
-		chunkPath := path.Join(chunkFolder, "chunk_"+strconv.Itoa(chunkIdx)+".tsv")
-		logger.Infoln("Created chunk", chunkPath)
-		mu.Unlock()
-		v.Sort()
-		if f.WithHeader {
-			mu.Lock()
-			err = v.PushFrontNoKey(f.headers)
-			if err != nil {
-				mu.Unlock()
-				return err
-			}
-			mu.Unlock()
-		}
-		err := f.Allocate.Dump(v, chunkPath)
+	err = batchChan.ProcessOut(func(vec vector.Vector) error {
+		err := f.processBatch(vec, chunkFolder)
 		if err != nil {
-			return err
+			return errors.Wrap(err, "can't process batch")
 		}
-		mu.Lock()
-		chunkPaths = append(chunkPaths, chunkPath)
-		mu.Unlock()
 		return nil
 	})
 	if err != nil {
 		return nil, errors.Wrap(err, "can't process batching channel")
 	}
-	f.totalRows = countRows
+	return f.chunkPaths, nil
+}
+
+// CreateSortedChunks Scan a file and divide it into small sorted chunks.
+// Store all the chunks in a folder an returns all the paths.
+func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpSize, maxWorkers int) ([]string, error) {
+	if err := f.check(dumpSize); err != nil {
+		return nil, errors.New("can't pass checks")
+	}
+
+	if f.PrintMemUsage && f.mu == nil {
+		f.mu = &memUsage{}
+	}
+
+	err := clearChunkFolder(chunkFolder)
+	if err != nil {
+		return nil, errors.Wrap(err, "can't clear chunk folder")
+	}
+
+	inputReader, err := f.Allocate.FnReader(f.InputReader)
+	if err != nil {
+		return nil, errors.Wrap(err, "can't get input reader")
+	}
+	chunkPaths, err := f.runBatchingChannel(ctx, inputReader, chunkFolder, dumpSize, maxWorkers)
+	if err != nil {
+		return nil, errors.Wrap(err, "can't run batching channel")
+	}
 	return chunkPaths, nil
 }
diff --git a/file/shuffle.go b/file/shuffle.go
index 61d5125..ff7462a 100644
--- a/file/shuffle.go
+++ b/file/shuffle.go
@@ -1,3 +1,5 @@
+// TODO: rework + lint
+//nolint
 package file
 
 import (
@@ -25,7 +27,7 @@ func (f *Info) Shuffle(ctx context.Context, chunkFolder string, dumpSize, maxWor
 	}
 
 	if f.PrintMemUsage && f.mu == nil {
-		f.mu = &MemUsage{}
+		f.mu = &memUsage{}
 	}
 	if f.Allocate != nil {
 		return nil, errors.New("allocate should not be defined when shuffling")
diff --git a/file/sort.go b/file/sort.go
index aa2e0fb..da2ea4c 100644
--- a/file/sort.go
+++ b/file/sort.go
@@ -11,26 +11,26 @@ import (
 	"github.com/pkg/errors"
 )
 
-type MemUsage struct {
+type memUsage struct {
 	MaxAlloc uint64
 	MaxSys   uint64
 	NumGc    uint32
 }
 
-func (mu *MemUsage) Collect() {
-	var m runtime.MemStats
-	runtime.ReadMemStats(&m)
-	if m.Alloc > mu.MaxAlloc {
-		mu.MaxAlloc = m.Alloc
+func (mu *memUsage) Collect() {
+	var mStats runtime.MemStats
+	runtime.ReadMemStats(&mStats)
+	if mStats.Alloc > mu.MaxAlloc {
+		mu.MaxAlloc = mStats.Alloc
 	}
-	if m.Sys > mu.MaxSys {
-		mu.MaxSys = m.Sys
+	if mStats.Sys > mu.MaxSys {
+		mu.MaxSys = mStats.Sys
 	}
 
-	mu.NumGc = m.NumGC
+	mu.NumGc = mStats.NumGC
 }
 
-func (mu *MemUsage) String() string {
+func (mu *memUsage) String() string {
 	builder := strings.Builder{}
 	builder.WriteString(fmt.Sprintf("Max Alloc = %v MiB", bToMb(mu.MaxAlloc)))
 	builder.WriteString(fmt.Sprintf(" Max Sys = %v MiB", bToMb(mu.MaxSys)))
@@ -38,57 +38,67 @@ func (mu *MemUsage) String() string {
 	return builder.String()
 }
 
+const conversionMb = (1 << 20) //nolint
+
 func bToMb(b uint64) uint64 {
-	return b / 1024 / 1024
+	return b / conversionMb
+}
+
+func (f *Info) createChunks(chunkPaths []string, k int) (*chunks, error) {
+	chunks := &chunks{list: make([]*chunkInfo, 0, len(chunkPaths))}
+	for _, chunkPath := range chunkPaths {
+		err := chunks.new(chunkPath, f.Allocate, k, f.WithHeader)
+		if err != nil {
+			return nil, errors.Wrapf(err, "can't create chunk %s", chunkPath)
+		}
+	}
+	return chunks, nil
 }
 
 func (f *Info) MergeSort(chunkPaths []string, k int, dropDuplicates bool) (err error) {
 	var oldElem *vector.Element
 	output := f.Allocate.Vector(k, f.Allocate.Key)
 	if f.PrintMemUsage && f.mu == nil {
-		f.mu = &MemUsage{}
+		f.mu = &memUsage{}
 	}
 	if f.WithHeader {
 		err = output.PushFrontNoKey(f.headers)
 		if err != nil {
-			return err
+			return errors.Wrapf(err, "can't add headers %+v", f.headers)
 		}
 	}
 	// create a chunk per file path
-	chunks := &chunks{list: make([]*chunkInfo, 0, len(chunkPaths))}
-	for _, chunkPath := range chunkPaths {
-		err := chunks.new(chunkPath, f.Allocate, k, f.WithHeader)
-		if err != nil {
-			return err
-		}
+	createdChunks, err := f.createChunks(chunkPaths, k)
+	if err != nil {
+		return errors.Wrap(err, "can't create all chunks")
 	}
 	f.outputWriter, err = f.Allocate.FnWriter(f.OutputFile)
 	if err != nil {
-		return err
+		return errors.Wrap(err, "can't get output writer file")
 	}
 	defer f.outputWriter.Close()
 	bar := pb.StartNew(f.totalRows)
-	chunks.resetOrder()
+	createdChunks.resetOrder()
 	for {
 		if f.PrintMemUsage {
 			f.mu.Collect()
 		}
-		if chunks.len() == 0 || output.Len() == k {
+		if createdChunks.len() == 0 || output.Len() == k {
 			err = WriteBuffer(f.outputWriter, output)
 			if err != nil {
 				return err
 			}
 		}
-		if chunks.len() == 0 {
+		if createdChunks.len() == 0 {
 			break
 		}
 		toShrink := []int{}
 		// search the smallest value across chunk buffers by comparing first elements only
-		minChunk, minValue, minIdx := chunks.min()
+		minChunk, minValue, minIdx := createdChunks.min()
 		if (!dropDuplicates || oldElem == nil) || (dropDuplicates && !minValue.Key.Equal(oldElem.Key)) {
 			err = output.PushBack(minValue.Row)
 			if err != nil {
-				return err
+				return errors.Wrapf(err, "can't push back row %+v", minValue.Row)
 			}
 			oldElem = minValue
 		}
@@ -105,7 +115,7 @@ func (f *Info) MergeSort(chunkPaths []string, k int, dropDuplicates bool) (err e
 			if minChunk.buffer.Len() == 0 {
 				isEmpty = true
 				toShrink = append(toShrink, minIdx)
-				err = chunks.shrink(toShrink)
+				err = createdChunks.shrink(toShrink)
 				if err != nil {
 					return err
 				}
@@ -113,7 +123,7 @@ func (f *Info) MergeSort(chunkPaths []string, k int, dropDuplicates bool) (err e
 		}
 		// when we get a new element in the first chunk we need to re-order it
 		if !isEmpty {
-			chunks.moveFirstChunkToCorrectIndex()
+			createdChunks.moveFirstChunkToCorrectIndex()
 		}
 		bar.Increment()
 	}
@@ -121,7 +131,7 @@ func (f *Info) MergeSort(chunkPaths []string, k int, dropDuplicates bool) (err e
 	if f.PrintMemUsage {
 		logger.Debugln(f.mu.String())
 	}
-	return chunks.close()
+	return createdChunks.close()
 }
 
 func WriteBuffer(w writer.Writer, rows vector.Vector) error {
diff --git a/internal/progress/contract.go b/internal/progress/contract.go
index 7b9766c..b8495c2 100644
--- a/internal/progress/contract.go
+++ b/internal/progress/contract.go
@@ -56,7 +56,7 @@ func (b *Basic) Begin(total int64) {
 // Add increment the bar by n elements.
 func (b *Basic) Add(val int64) {
 	b.written += float64(val)
-	progress := int(math.Round(b.written / b.total * 100))
+	progress := int(math.Round(b.written / b.total * 100)) //nolint //gomnd
 	if progress >= b.milestone {
 		b.milestone += 5 // every 5%
 		logrus.Debugf("Download from S3 at %3d%%\n\n", progress)
diff --git a/internal/rw/rw.go b/internal/rw/rw.go
index 183af77..b85f0cf 100644
--- a/internal/rw/rw.go
+++ b/internal/rw/rw.go
@@ -32,6 +32,7 @@ type InputOutput struct {
 
 func NewInputOutput(ctx context.Context) *InputOutput {
 	g, dCtx := errgroup.WithContext(ctx)
+
 	return &InputOutput{
 		g:           g,
 		internalCtx: dCtx,
@@ -50,6 +51,7 @@ func (i *InputOutput) s3Check(ctx context.Context) error {
 		return errors.New("can't create aws config")
 	}
 	i.s3Client = s3.NewFromConfig(cfg)
+
 	return nil
 }
 
@@ -92,7 +94,7 @@ func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string)
 	} else {
 		var files []io.Reader
 		for _, inputFile := range inputFiles {
-			f, err := os.Open(inputFile)
+			f, err := os.Open(filepath.Clean(inputFile))
 			if err != nil {
 				return errors.Wrapf(err, "can't open file %s", inputFile)
 			}
diff --git a/main.go b/main.go
index 19d725b..d5a70d7 100644
--- a/main.go
+++ b/main.go
@@ -50,20 +50,83 @@ func newCommand() *command {
 			RunE: shuffleRun,
 		},
 	}
-	root.rootCmd.PersistentFlags().BoolVarP(&internal.WithHeader, internal.WithHeaderName, "e", viper.GetBool(internal.WithHeaderName), "Input file has headers.")
-	root.rootCmd.PersistentFlags().StringSliceVarP(&internal.InputFiles, internal.InputFileNames, "i", viper.GetStringSlice(internal.InputFileNames), "input file path.")
-	root.rootCmd.PersistentFlags().StringVarP(&internal.OutputFile, internal.OutputFileName, "o", viper.GetString(internal.OutputFileName), "output file path.")
-	root.rootCmd.PersistentFlags().StringVarP(&internal.ChunkFolder, internal.ChunkFolderName, "c", viper.GetString(internal.ChunkFolderName), "chunk folder.")
+	root.rootCmd.PersistentFlags().BoolVarP(
+		&internal.WithHeader,
+		internal.WithHeaderName,
+		"e",
+		viper.GetBool(internal.WithHeaderName),
+		"Input file has headers.",
+	)
+	root.rootCmd.PersistentFlags().StringSliceVarP(
+		&internal.InputFiles,
+		internal.InputFileNames,
+		"i",
+		viper.GetStringSlice(internal.InputFileNames),
+		"input file path.",
+	)
+	root.rootCmd.PersistentFlags().StringVarP(
+		&internal.OutputFile,
+		internal.OutputFileName,
+		"o",
+		viper.GetString(internal.OutputFileName),
+		"output file path.",
+	)
+	root.rootCmd.PersistentFlags().StringVarP(
+		&internal.ChunkFolder,
+		internal.ChunkFolderName,
+		"c",
+		viper.GetString(internal.ChunkFolderName),
+		"chunk folder.",
+	)
 
-	root.rootCmd.PersistentFlags().IntVarP(&internal.ChunkSize, internal.ChunkSizeName, "s", viper.GetInt(internal.ChunkSizeName), "chunk size.")
-	root.rootCmd.PersistentFlags().IntVarP(&internal.MaxWorkers, internal.MaxWorkersName, "w", viper.GetInt(internal.MaxWorkersName), "max worker.")
-	root.rootCmd.PersistentFlags().IntVarP(&internal.OutputBufferSize, internal.OutputBufferSizeName, "b", viper.GetInt(internal.OutputBufferSizeName), "output buffer size.")
-	root.sortCmd.PersistentFlags().StringSliceVarP(&internal.TsvFields, internal.TsvFieldsName, "t", viper.GetStringSlice(internal.TsvFieldsName), "")
+	root.rootCmd.PersistentFlags().IntVarP(
+		&internal.ChunkSize,
+		internal.ChunkSizeName,
+		"s",
+		viper.GetInt(internal.ChunkSizeName),
+		"chunk size.",
+	)
+	root.rootCmd.PersistentFlags().IntVarP(
+		&internal.MaxWorkers,
+		internal.MaxWorkersName,
+		"w",
+		viper.GetInt(internal.MaxWorkersName),
+		"max worker.",
+	)
+	root.rootCmd.PersistentFlags().IntVarP(
+		&internal.OutputBufferSize,
+		internal.OutputBufferSizeName,
+		"b",
+		viper.GetInt(internal.OutputBufferSizeName),
+		"output buffer size.",
+	)
+	root.sortCmd.PersistentFlags().StringSliceVarP(
+		&internal.TsvFields,
+		internal.TsvFieldsName,
+		"t",
+		viper.GetStringSlice(internal.TsvFieldsName),
+		"",
+	)
 
-	root.rootCmd.Flags().StringVar(&internal.S3Region, internal.S3RegionName, viper.GetString(internal.S3RegionName), "the bucket region")
-	root.rootCmd.Flags().IntVar(&internal.S3RetryMaxAttempts, internal.S3RetryMaxAttemptsName, viper.GetInt(internal.S3RetryMaxAttemptsName), "the number of retries per S3 request before failing")
+	root.rootCmd.Flags().StringVar(
+		&internal.S3Region,
+		internal.S3RegionName,
+		viper.GetString(internal.S3RegionName),
+		"the bucket region",
+	)
+	root.rootCmd.Flags().IntVar(
+		&internal.S3RetryMaxAttempts,
+		internal.S3RetryMaxAttemptsName,
+		viper.GetInt(internal.S3RetryMaxAttemptsName),
+		"the number of retries per S3 request before failing",
+	)
 
-	root.shuffleCmd.PersistentFlags().BoolVarP(&internal.IsGzip, internal.IsGzipName, "t", viper.GetBool(internal.IsGzipName), "")
+	root.shuffleCmd.PersistentFlags().BoolVarP(&internal.IsGzip,
+		internal.IsGzipName,
+		"t",
+		viper.GetBool(internal.IsGzipName),
+		"",
+	)
 
 	root.rootCmd.AddCommand(root.sortCmd, root.shuffleCmd)
 	return root
diff --git a/main_test.go b/main_test.go
index 133be1b..6882c3c 100644
--- a/main_test.go
+++ b/main_test.go
@@ -6,7 +6,6 @@ import (
 	"errors"
 	"io"
 	"os"
-	"path"
 	"strconv"
 	"testing"
 
@@ -43,14 +42,6 @@ func prepareChunks(ctx context.Context, t *testing.T, allocate *vector.Allocate,
 	})
 	err = i.Err()
 	assert.NoError(t, err)
-	t.Cleanup(func() {
-		dir, err := os.ReadDir("testdata/chunks")
-		assert.NoError(t, err)
-		for _, d := range dir {
-			err = os.RemoveAll(path.Join("testdata/chunks", d.Name()))
-			assert.NoError(t, err)
-		}
-	})
 
 	return fI
 }
@@ -428,14 +419,6 @@ func prepareChunksShuffle(ctx context.Context, t *testing.T, filename, outputFil
 	})
 	err = i.Err()
 	assert.NoError(t, err)
-	t.Cleanup(func() {
-		dir, err := os.ReadDir("testdata/chunks")
-		assert.NoError(t, err)
-		for _, d := range dir {
-			err = os.RemoveAll(path.Join("testdata/chunks", d.Name()))
-			assert.NoError(t, err)
-		}
-	})
 
 	return fI
 }
diff --git a/sftp/sftp.go b/sftp/sftp.go
index 60c9de2..16af7ab 100644
--- a/sftp/sftp.go
+++ b/sftp/sftp.go
@@ -3,6 +3,7 @@ package sftp
 import (
 	"io/ioutil"
 	"log"
+	"path/filepath"
 
 	"github.com/pkg/sftp"
 	"golang.org/x/crypto/ssh"
@@ -15,7 +16,7 @@ type Client struct {
 
 func NewSFTPClient(addr, key, user, passphrase string) (*Client, error) {
 	res := &Client{}
-	pemBytes, err := ioutil.ReadFile(key)
+	pemBytes, err := ioutil.ReadFile(filepath.Clean(key))
 	if err != nil {
 		log.Fatal(err)
 	}
diff --git a/vector/key/int_key.go b/vector/key/int_key.go
index ee07b80..92c946c 100644
--- a/vector/key/int_key.go
+++ b/vector/key/int_key.go
@@ -17,17 +17,17 @@ func AllocateInt(row interface{}) (Key, error) {
 	}
 	num, err := strconv.Atoi(line)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrapf(err, "can't convert line %s to int", line)
 	}
 	return &Int{num}, nil
 }
 
 func (k *Int) Less(other Key) bool {
-	return k.value < other.(*Int).value
+	return k.value < other.(*Int).value //nolint //forcetypeassert
 }
 
 func (k *Int) Equal(other Key) bool {
-	return k.value == other.(*Int).value
+	return k.value == other.(*Int).value //nolint //forcetypeassert
 }
 
 type IntFromSlice struct {
@@ -41,15 +41,15 @@ func AllocateIntFromSlice(row interface{}, intIndex int) (Key, error) {
 	}
 	num, err := strconv.ParseInt(line[intIndex], 10, 64)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrapf(err, "can't parse int %+v", line[intIndex])
 	}
 	return &IntFromSlice{num}, nil
 }
 
 func (k *IntFromSlice) Less(other Key) bool {
-	return k.value < other.(*IntFromSlice).value
+	return k.value < other.(*IntFromSlice).value //nolint //forcetypeassert
 }
 
 func (k *IntFromSlice) Equal(other Key) bool {
-	return k.value == other.(*IntFromSlice).value
+	return k.value == other.(*IntFromSlice).value //nolint //forcetypeassert
 }
diff --git a/writer/std_writer.go b/writer/std_writer.go
index c6cc8e8..bd7fbed 100644
--- a/writer/std_writer.go
+++ b/writer/std_writer.go
@@ -9,11 +9,13 @@ import (
 	"github.com/pkg/errors"
 )
 
+// StdWriter implement writer interface with a bufio writer.
 type StdWriter struct {
 	w *bufio.Writer
 }
 
-func NewStdWriter(w io.Writer) Writer {
+// NewStdWriter create a standard writer.
+func NewStdWriter(w io.Writer) Writer { //nolint //ireturn
 	s := &StdWriter{
 		w: bufio.NewWriter(w),
 	}

From ffb3ebc8d3ae152db1ca002130c20847b11d8171 Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Mon, 8 May 2023 18:06:39 +0200
Subject: [PATCH 13/16] lint

---
 bucket/s3.go                                  |   2 +-
 file/batchingchannels/batching_channel.go     |   3 +-
 .../batchingchannels/batching_channel_test.go |   2 +-
 file/batchingchannels/doc.go                  |   2 +
 file/chunk.go                                 |   2 +-
 file/sort.go                                  | 150 ++++++++++++------
 internal/rw/rw.go                             |  27 ++--
 main.go                                       | 131 ++++++++-------
 main_bench_test.go                            |  27 ++--
 main_test.go                                  | 104 ++++++++----
 reader/contract.go                            |   3 +
 reader/gzip_separated_values_test.go          |  16 +-
 sftp/sftp.go                                  |  11 +-
 vector/key/int_key.go                         |  12 +-
 vector/key/key.go                             |   1 +
 vector/key/string_key.go                      |  16 +-
 vector/key/tsv_key.go                         |  10 +-
 vector/vector.go                              |  20 ++-
 writer/gzip_separated_values.go               |   6 +-
 writer/std_writer.go                          |  10 +-
 20 files changed, 359 insertions(+), 196 deletions(-)
 create mode 100644 file/batchingchannels/doc.go

diff --git a/bucket/s3.go b/bucket/s3.go
index 1b2e3b0..08b1121 100644
--- a/bucket/s3.go
+++ b/bucket/s3.go
@@ -92,7 +92,7 @@ func (s *seqWriterAt) WriteAt(p []byte, _ int64) (n int, err error) {
 	return n, errors.Wrap(err, "can't write bytes at offset")
 }
 
-// S3FileInfo describe the path to a file on S3.
+// S3FileInfo define the path to a file on S3.
 type S3FileInfo struct {
 	Bucket string
 	Key    string
diff --git a/file/batchingchannels/batching_channel.go b/file/batchingchannels/batching_channel.go
index 7f54f16..150e593 100644
--- a/file/batchingchannels/batching_channel.go
+++ b/file/batchingchannels/batching_channel.go
@@ -8,7 +8,7 @@ import (
 	"golang.org/x/sync/errgroup"
 )
 
-// BatchingChannel standard channel, with the change that instead of producing individual elements
+// BatchingChannel define a standard channel, with the change that instead of producing individual elements
 // on Out(), it batches together n elements each time. Trying to construct an unbuffered batching channel
 // will panic, that configuration is not supported (and provides no benefit over an unbuffered NativeChannel).
 type BatchingChannel struct {
@@ -70,6 +70,7 @@ func (ch *BatchingChannel) ProcessOut(f func(vector.Vector) error) error {
 	if err != nil {
 		return errors.Wrap(err, "one of the task failed")
 	}
+
 	return nil
 }
 
diff --git a/file/batchingchannels/batching_channel_test.go b/file/batchingchannels/batching_channel_test.go
index 8b40ad5..ced13a3 100644
--- a/file/batchingchannels/batching_channel_test.go
+++ b/file/batchingchannels/batching_channel_test.go
@@ -19,7 +19,7 @@ type intKey struct {
 	value int
 }
 
-func allocateInt(row interface{}) (key.Key, error) {
+func allocateInt(row interface{}) (key.Key, error) { //nolint //ireturn
 	line, ok := row.(string)
 	if !ok {
 		return nil, errors.Errorf("can't convert interface{} to string: %+v", row)
diff --git a/file/batchingchannels/doc.go b/file/batchingchannels/doc.go
new file mode 100644
index 0000000..9874cca
--- /dev/null
+++ b/file/batchingchannels/doc.go
@@ -0,0 +1,2 @@
+// Package batchingchannels define a standard channel processing the output per batch.
+package batchingchannels
diff --git a/file/chunk.go b/file/chunk.go
index 2345ea1..df9d10f 100644
--- a/file/chunk.go
+++ b/file/chunk.go
@@ -11,7 +11,7 @@ import (
 	"github.com/pkg/errors"
 )
 
-// chunkInfo Describe a chunk.
+// chunkInfo define a chunk.
 type chunkInfo struct {
 	file     *os.File
 	reader   reader.Reader
diff --git a/file/sort.go b/file/sort.go
index da2ea4c..6fda072 100644
--- a/file/sort.go
+++ b/file/sort.go
@@ -55,75 +55,98 @@ func (f *Info) createChunks(chunkPaths []string, k int) (*chunks, error) {
 	return chunks, nil
 }
 
-func (f *Info) MergeSort(chunkPaths []string, k int, dropDuplicates bool) (err error) {
-	var oldElem *vector.Element
-	output := f.Allocate.Vector(k, f.Allocate.Key)
-	if f.PrintMemUsage && f.mu == nil {
-		f.mu = &memUsage{}
-	}
+func (f *Info) handleHeader(output vector.Vector) error {
 	if f.WithHeader {
-		err = output.PushFrontNoKey(f.headers)
+		err := output.PushFrontNoKey(f.headers)
 		if err != nil {
 			return errors.Wrapf(err, "can't add headers %+v", f.headers)
 		}
 	}
+	return nil
+}
+
+type nextChunk struct {
+	oldElem *vector.Element
+}
+
+func (nc *nextChunk) get(output vector.Vector, createdChunks *chunks, dropDuplicates bool) (*chunkInfo, int, error) {
+	minChunk, minValue, minIdx := createdChunks.min()
+	if (!dropDuplicates || nc.oldElem == nil) || (dropDuplicates && !minValue.Key.Equal(nc.oldElem.Key)) {
+		err := output.PushBack(minValue.Row)
+		if err != nil {
+			return nil, 0, errors.Wrapf(err, "can't push back row %+v", minValue.Row)
+		}
+		nc.oldElem = minValue
+	}
+	return minChunk, minIdx, nil
+}
+
+func updateChunks(createdChunks *chunks, minChunk *chunkInfo, minIdx, k int) error {
+	minChunk.buffer.FrontShift()
+	isEmpty := false
+	if minChunk.buffer.Len() == 0 {
+		err := minChunk.pullSubset(k)
+		if err != nil {
+			return errors.Wrapf(err, "can't pull subset from chunk %s", minChunk.filename)
+		}
+		// if after pulling data the chunk buffer is still empty then we can remove it
+		if minChunk.buffer.Len() == 0 {
+			isEmpty = true
+			err = createdChunks.shrink([]int{minIdx})
+			if err != nil {
+				return errors.Wrapf(err, "can't shrink chunk at index %d", minIdx)
+			}
+		}
+	}
+	// when we get a new element in the first chunk we need to re-order it
+	if !isEmpty {
+		createdChunks.moveFirstChunkToCorrectIndex()
+	}
+	return nil
+}
+
+func (f *Info) prepareMergeSort(output vector.Vector, chunkPaths []string, outputBufferSize int) (*chunks, error) {
+	err := f.handleHeader(output)
+	if err != nil {
+		return nil, errors.Wrap(err, "can't handle headers")
+	}
 	// create a chunk per file path
-	createdChunks, err := f.createChunks(chunkPaths, k)
+	createdChunks, err := f.createChunks(chunkPaths, outputBufferSize)
 	if err != nil {
-		return errors.Wrap(err, "can't create all chunks")
+		return nil, errors.Wrap(err, "can't create all chunks")
 	}
 	f.outputWriter, err = f.Allocate.FnWriter(f.OutputFile)
 	if err != nil {
-		return errors.Wrap(err, "can't get output writer file")
+		return nil, errors.Wrap(err, "can't get output writer file")
 	}
-	defer f.outputWriter.Close()
+	return createdChunks, nil
+}
+
+func (f *Info) runMergeSort(createdChunks *chunks, output vector.Vector, outputBufferSize int, dropDuplicates bool) error {
 	bar := pb.StartNew(f.totalRows)
 	createdChunks.resetOrder()
+	smallestChunk := &nextChunk{}
 	for {
 		if f.PrintMemUsage {
 			f.mu.Collect()
 		}
-		if createdChunks.len() == 0 || output.Len() == k {
-			err = WriteBuffer(f.outputWriter, output)
-			if err != nil {
-				return err
-			}
+		err := f.dumpOutput(createdChunks, output, outputBufferSize)
+		if err != nil {
+			return errors.Wrap(err, "can't dump output")
 		}
 		if createdChunks.len() == 0 {
 			break
 		}
-		toShrink := []int{}
+
 		// search the smallest value across chunk buffers by comparing first elements only
-		minChunk, minValue, minIdx := createdChunks.min()
-		if (!dropDuplicates || oldElem == nil) || (dropDuplicates && !minValue.Key.Equal(oldElem.Key)) {
-			err = output.PushBack(minValue.Row)
-			if err != nil {
-				return errors.Wrapf(err, "can't push back row %+v", minValue.Row)
-			}
-			oldElem = minValue
+		minChunk, minIdx, err := smallestChunk.get(output, createdChunks, dropDuplicates)
+		if err != nil {
+			return errors.Wrap(err, "can't get next chunk with smallest value")
 		}
-
 		// remove the first element from the chunk we pulled the smallest value
-		minChunk.buffer.FrontShift()
-		isEmpty := false
-		if minChunk.buffer.Len() == 0 {
-			err = minChunk.pullSubset(k)
-			if err != nil {
-				return err
-			}
-			// if after pulling data the chunk buffer is still empty then we can remove it
-			if minChunk.buffer.Len() == 0 {
-				isEmpty = true
-				toShrink = append(toShrink, minIdx)
-				err = createdChunks.shrink(toShrink)
-				if err != nil {
-					return err
-				}
-			}
-		}
-		// when we get a new element in the first chunk we need to re-order it
-		if !isEmpty {
-			createdChunks.moveFirstChunkToCorrectIndex()
+		err = updateChunks(createdChunks, minChunk, minIdx, outputBufferSize)
+		if err != nil {
+			return errors.Wrap(err, "can't update chunks")
 		}
 		bar.Increment()
 	}
@@ -131,10 +154,43 @@ func (f *Info) MergeSort(chunkPaths []string, k int, dropDuplicates bool) (err e
 	if f.PrintMemUsage {
 		logger.Debugln(f.mu.String())
 	}
-	return createdChunks.close()
+	return nil
+}
+
+func (f *Info) dumpOutput(createdChunks *chunks, output vector.Vector, outputBufferSize int) error {
+	if createdChunks.len() == 0 || output.Len() == outputBufferSize {
+		err := writeBuffer(f.outputWriter, output)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// MergeSort merge and sort a list of files.
+// It is possilbe to drop duplicates and define the maximum size of the output buffer before flush.
+func (f *Info) MergeSort(chunkPaths []string, outputBufferSize int, dropDuplicates bool) (err error) {
+	output := f.Allocate.Vector(outputBufferSize, f.Allocate.Key)
+	if f.PrintMemUsage && f.mu == nil {
+		f.mu = &memUsage{}
+	}
+	createdChunks, err := f.prepareMergeSort(output, chunkPaths, outputBufferSize)
+	if err != nil {
+		return errors.Wrap(err, "can't prepare merge sort")
+	}
+	defer func() { err = f.outputWriter.Close() }()
+	err = f.runMergeSort(createdChunks, output, outputBufferSize, dropDuplicates)
+	if err != nil {
+		return errors.Wrap(err, "can't run merge sort")
+	}
+	err = createdChunks.close()
+	if err != nil {
+		return errors.Wrap(err, "can't close created chunks")
+	}
+	return err
 }
 
-func WriteBuffer(w writer.Writer, rows vector.Vector) error {
+func writeBuffer(w writer.Writer, rows vector.Vector) error {
 	for i := 0; i < rows.Len(); i++ {
 		err := w.Write(rows.Get(i).Row)
 		if err != nil {
diff --git a/internal/rw/rw.go b/internal/rw/rw.go
index b85f0cf..e6253a6 100644
--- a/internal/rw/rw.go
+++ b/internal/rw/rw.go
@@ -83,13 +83,14 @@ func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string)
 		pr, pw := io.Pipe()
 		i.Input = pr
 		i.inputPipe = pr
-		i.g.Go(func() error {
-			defer pw.Close() //nolint:errcheck //no need to check this error
-			err := s3Api.Download(i.internalCtx, pw, files...)
+		i.g.Go(func() (err error) {
+			defer func() { err = pw.Close() }()
+			err = s3Api.Download(i.internalCtx, pw, files...)
 			if err != nil {
 				return errors.Wrap(err, "can't download files")
 			}
-			return nil
+
+			return err
 		})
 	} else {
 		var files []io.Reader
@@ -102,6 +103,7 @@ func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string)
 		}
 		i.Input = io.MultiReader(files...)
 	}
+
 	return nil
 }
 
@@ -111,9 +113,12 @@ func (i *InputOutput) SetOutputWriter(ctx context.Context, outputFile string) (e
 		if err != nil {
 			return errors.Wrap(err, "can't check s3")
 		}
-		u, _ := url.Parse(outputFile)
-		u.Path = strings.TrimLeft(u.Path, "/")
-		logger.Debugf("Proto: %q, Bucket: %q, Key: %q", u.Scheme, u.Host, u.Path)
+		outputURL, err := url.Parse(outputFile)
+		if err != nil {
+			return errors.Wrapf(err, "can't parse output url %s", outputFile)
+		}
+		outputURL.Path = strings.TrimLeft(outputURL.Path, "/")
+		logger.Debugf("Proto: %q, Bucket: %q, Key: %q", outputURL.Scheme, outputURL.Host, outputURL.Path)
 		s3Api, err := bucket.New(ctx,
 			bucket.Client(i.s3Client),
 			bucket.Buffer(1_000_000),
@@ -126,13 +131,13 @@ func (i *InputOutput) SetOutputWriter(ctx context.Context, outputFile string) (e
 		pr, pw := io.Pipe()
 		i.Output = pw
 		i.outputPipe = pw
-		i.g.Go(func() error {
-			defer pr.Close() //nolint:errcheck //no need to check this error
-			err := s3Api.Upload(i.internalCtx, pr, u.Host, u.Path)
+		i.g.Go(func() (err error) {
+			defer func() { err = pr.Close() }()
+			err = s3Api.Upload(i.internalCtx, pr, outputURL.Host, outputURL.Path)
 			if err != nil {
 				return errors.Wrapf(err, "can't upload file %s", outputFile)
 			}
-			return nil
+			return err
 		})
 	} else {
 		i.Output, err = os.Create(filepath.Clean(outputFile))
diff --git a/main.go b/main.go
index d5a70d7..97f1039 100644
--- a/main.go
+++ b/main.go
@@ -27,29 +27,7 @@ type command struct {
 	shuffleCmd *cobra.Command
 }
 
-func newCommand() *command {
-	root := &command{
-		rootCmd: &cobra.Command{
-			Use:   "external",
-			Short: "Perform an external task on an input file",
-		},
-		sortCmd: &cobra.Command{
-			Use:   "sort",
-			Short: "Perform an external sorting on an input file",
-			PreRun: func(cmd *cobra.Command, args []string) {
-				cmd.SetContext(context.WithValue(cmd.Parent().Context(), "cmd", "sort"))
-			},
-			RunE: sortRun,
-		},
-		shuffleCmd: &cobra.Command{
-			Use:   "shuffle",
-			Short: "Perform an external shufflin on an input file",
-			PreRun: func(cmd *cobra.Command, args []string) {
-				cmd.SetContext(context.WithValue(cmd.Parent().Context(), "cmd", "shuffle"))
-			},
-			RunE: shuffleRun,
-		},
-	}
+func setFlags(root *command) {
 	root.rootCmd.PersistentFlags().BoolVarP(
 		&internal.WithHeader,
 		internal.WithHeaderName,
@@ -127,18 +105,43 @@ func newCommand() *command {
 		viper.GetBool(internal.IsGzipName),
 		"",
 	)
+}
 
+func newCommand() *command {
+	root := &command{
+		rootCmd: &cobra.Command{
+			Use:   "external",
+			Short: "Perform an external task on an input file",
+		},
+		sortCmd: &cobra.Command{
+			Use:   "sort",
+			Short: "Perform an external sorting on an input file",
+			PreRun: func(cmd *cobra.Command, args []string) {
+				cmd.SetContext(cmd.Parent().Context())
+			},
+			RunE: sortRun,
+		},
+		shuffleCmd: &cobra.Command{
+			Use:   "shuffle",
+			Short: "Perform an external shufflin on an input file",
+			PreRun: func(cmd *cobra.Command, args []string) {
+				cmd.SetContext(cmd.Parent().Context())
+			},
+			RunE: shuffleRun,
+		},
+	}
 	root.rootCmd.AddCommand(root.sortCmd, root.shuffleCmd)
 	return root
 }
 
 func main() {
 	root := newCommand()
+	setFlags(root)
 	ctx := context.Background()
 	cobra.CheckErr(root.rootCmd.ExecuteContext(ctx))
 }
 
-func sortRun(cmd *cobra.Command, args []string) error {
+func sortRun(cmd *cobra.Command, _ []string) error {
 	logger.Infoln("Input files", internal.InputFiles)
 	logger.Infoln("With header", internal.WithHeader)
 	logger.Infoln("Output file", internal.OutputFile)
@@ -146,46 +149,61 @@ func sortRun(cmd *cobra.Command, args []string) error {
 	logger.Infoln("TSV Fields", internal.TsvFields)
 
 	start := time.Now()
-	i := rw.NewInputOutput(cmd.Context())
-	err := i.SetInputReader(cmd.Context(), internal.InputFiles...)
+	inputOutput := rw.NewInputOutput(cmd.Context())
+	err := inputOutput.SetInputReader(cmd.Context(), internal.InputFiles...)
 	if err != nil {
-		return err
+		return errors.Wrap(err, "can't set input reader")
 	}
-	err = i.SetOutputWriter(cmd.Context(), internal.OutputFile)
+	err = inputOutput.SetOutputWriter(cmd.Context(), internal.OutputFile)
 	if err != nil {
-		return err
+		return errors.Wrap(err, "can't set output writer")
 	}
 	tsvFields := []int{}
 	for _, field := range internal.TsvFields {
 		i, err := strconv.Atoi(field)
 		if err != nil {
-			return err
+			return errors.Wrapf(err, "can't convert field %s", field)
 		}
 		tsvFields = append(tsvFields, i)
 	}
-	fI := &file.Info{
+	fileInfo := &file.Info{
 		WithHeader:  internal.WithHeader,
-		InputReader: i.Input,
-		OutputFile:  i.Output,
+		InputReader: inputOutput.Input,
+		OutputFile:  inputOutput.Output,
 		Allocate: vector.DefaultVector(
 			func(row interface{}) (key.Key, error) {
-				return key.AllocateTsv(row, tsvFields...)
+				k, err := key.AllocateTsv(row, tsvFields...)
+				if err != nil {
+					return nil, errors.Wrapf(err, "can't allocate tsv %+v", row)
+				}
+				return k, nil
 			},
-			func(r io.Reader) (reader.Reader, error) { return reader.NewGZipSeparatedValues(r, '\t') }, func(w io.Writer) (writer.Writer, error) {
-				return writer.NewGZipSeparatedValues(w, '\t')
+			func(r io.Reader) (reader.Reader, error) {
+				gzipReader, err := reader.NewGZipSeparatedValues(r, '\t')
+				if err != nil {
+					return nil, errors.Wrap(err, "can't create Gzip reader")
+				}
+				return gzipReader, nil
+			},
+			func(w io.Writer) (writer.Writer, error) {
+				gzipWriter, err := writer.NewGZipSeparatedValues(w, '\t')
+				if err != nil {
+					return nil, errors.Wrap(err, "can't create Gzip writer")
+				}
+				return gzipWriter, nil
 			},
 		),
 		PrintMemUsage: false,
 	}
-	i.Do(func() error {
+	inputOutput.Do(func() error {
 		// create small files with maximum 30 rows in each
-		chunkPaths, err := fI.CreateSortedChunks(context.Background(), internal.ChunkFolder, internal.ChunkSize, internal.MaxWorkers)
+		chunkPaths, err := fileInfo.CreateSortedChunks(cmd.Context(), internal.ChunkFolder, internal.ChunkSize, internal.MaxWorkers)
 		if err != nil {
 			return errors.Wrap(err, "can't create sorted chunks")
 		}
 		// perform a merge sort on all the chunks files.
 		// we sort using a buffer so we don't have to load the entire chunks when merging
-		err = fI.MergeSort(chunkPaths, internal.OutputBufferSize, true)
+		err = fileInfo.MergeSort(chunkPaths, internal.OutputBufferSize, true)
 		if err != nil {
 			return errors.Wrap(err, "can't merge sort")
 		}
@@ -193,40 +211,47 @@ func sortRun(cmd *cobra.Command, args []string) error {
 		logger.Infoln("It took", elapsed)
 		return nil
 	})
-	err = i.Err()
+	err = inputOutput.Err()
 	if err != nil {
 		return errors.Wrap(err, "can't finish")
 	}
 	return nil
 }
 
-func shuffleRun(cmd *cobra.Command, args []string) error {
+func shuffleRun(cmd *cobra.Command, _ []string) error {
 	logger.Infoln("Input files", internal.InputFiles)
 	logger.Infoln("With header", internal.WithHeader)
 	logger.Infoln("Output file", internal.OutputFile)
 	logger.Infoln("Chunk folder", internal.ChunkFolder)
 	logger.Infoln("GZip file", internal.IsGzip)
 	start := time.Now()
-	ctx := context.Background()
-	i := rw.NewInputOutput(ctx)
-	err := i.SetInputReader(ctx, internal.InputFiles...)
+	inputOutput := rw.NewInputOutput(cmd.Context())
+	err := inputOutput.SetInputReader(cmd.Context(), internal.InputFiles...)
 	if err != nil {
-		return err
+		return errors.Wrap(err, "can't set input reader")
 	}
-	err = i.SetOutputWriter(ctx, internal.OutputFile)
+	err = inputOutput.SetOutputWriter(cmd.Context(), internal.OutputFile)
 	if err != nil {
-		return err
+		return errors.Wrap(err, "can't set output writer")
 	}
 
-	fI := &file.Info{
+	fileInfo := &file.Info{
 		WithHeader:    internal.WithHeader,
-		InputReader:   i.Input,
-		OutputFile:    i.Output,
+		InputReader:   inputOutput.Input,
+		OutputFile:    inputOutput.Output,
 		PrintMemUsage: false,
 	}
-	i.Do(func() error {
+	inputOutput.Do(func() error {
 		// create small files with maximum 30 rows in each
-		_, err := fI.Shuffle(context.Background(), internal.ChunkFolder, internal.ChunkSize, internal.MaxWorkers, internal.OutputBufferSize, time.Now().Unix(), internal.IsGzip)
+		_, err := fileInfo.Shuffle(
+			cmd.Context(),
+			internal.ChunkFolder,
+			internal.ChunkSize,
+			internal.MaxWorkers,
+			internal.OutputBufferSize,
+			time.Now().Unix(),
+			internal.IsGzip,
+		)
 		if err != nil {
 			return errors.Wrap(err, "can't create shuflled chunks")
 		}
@@ -234,7 +259,7 @@ func shuffleRun(cmd *cobra.Command, args []string) error {
 		logger.Infoln("It took", elapsed)
 		return nil
 	})
-	err = i.Err()
+	err = inputOutput.Err()
 	if err != nil {
 		return errors.Wrap(err, "can't finish")
 	}
diff --git a/main_bench_test.go b/main_bench_test.go
index 0b686c1..e87982c 100644
--- a/main_bench_test.go
+++ b/main_bench_test.go
@@ -19,29 +19,34 @@ import (
 func BenchmarkMergeSort(b *testing.B) {
 	filename := "test.tsv"
 	ctx := context.Background()
-	i := rw.NewInputOutput(ctx)
-	err := i.SetInputReader(ctx, filename)
+	inputOutput := rw.NewInputOutput(ctx)
+	err := inputOutput.SetInputReader(ctx, filename)
 	assert.NoError(b, err)
-	err = i.SetOutputWriter(ctx, "testdata/chunks/output.tsv")
+	err = inputOutput.SetOutputWriter(ctx, "testdata/chunks/output.tsv")
 	assert.NoError(b, err)
 	chunkSize := 10000
 	bufferSize := 5000
-	fI := &file.Info{
-		InputReader: i.Input,
-		Allocate:    vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil }),
-		OutputFile:  i.Output,
+	fileInfo := &file.Info{
+		InputReader: inputOutput.Input,
+		Allocate: vector.DefaultVector(
+			key.AllocateInt,
+			func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) },
+			func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil },
+		),
+		OutputFile: inputOutput.Output,
 	}
-	i.Do(func() (err error) {
-		chunkPaths, err := fI.CreateSortedChunks(context.Background(), "testdata/chunks", chunkSize, 100)
+	inputOutput.Do(func() (err error) {
+		chunkPaths, err := fileInfo.CreateSortedChunks(context.Background(), "testdata/chunks", chunkSize, 100)
 		assert.NoError(b, err)
 		b.ResetTimer()
 		for i := 0; i < b.N; i++ {
-			err = fI.MergeSort(chunkPaths, bufferSize, false)
+			err = fileInfo.MergeSort(chunkPaths, bufferSize, false)
 			_ = err
 		}
+
 		return nil
 	})
-	err = i.Err()
+	err = inputOutput.Err()
 	assert.NoError(b, err)
 	dir, err := os.ReadDir("testdata/chunks")
 	assert.NoError(b, err)
diff --git a/main_test.go b/main_test.go
index 6882c3c..6972aa2 100644
--- a/main_test.go
+++ b/main_test.go
@@ -19,31 +19,41 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
-func prepareChunks(ctx context.Context, t *testing.T, allocate *vector.Allocate, filename, outputFilename string, chunkSize int, mergeSort bool, bufferSize int, withHeaders bool, dropDuplicates bool) *file.Info {
+func prepareChunks(
+	ctx context.Context,
+	t *testing.T,
+	allocate *vector.Allocate,
+	filename, outputFilename string,
+	chunkSize int,
+	mergeSort bool,
+	bufferSize int,
+	withHeaders bool,
+	dropDuplicates bool,
+) *file.Info {
 	t.Helper()
-	i := rw.NewInputOutput(ctx)
-	err := i.SetInputReader(ctx, filename)
+	inputOutput := rw.NewInputOutput(ctx)
+	err := inputOutput.SetInputReader(ctx, filename)
 	assert.NoError(t, err)
-	err = i.SetOutputWriter(ctx, outputFilename)
+	err = inputOutput.SetOutputWriter(ctx, outputFilename)
 	assert.NoError(t, err)
-	fI := &file.Info{
-		InputReader: i.Input,
+	fileInfo := &file.Info{
+		InputReader: inputOutput.Input,
 		Allocate:    allocate,
-		OutputFile:  i.Output,
+		OutputFile:  inputOutput.Output,
 		WithHeader:  withHeaders,
 	}
-	i.Do(func() (err error) {
-		chunkPaths, err := fI.CreateSortedChunks(ctx, "testdata/chunks", chunkSize, 10)
+	inputOutput.Do(func() (err error) {
+		chunkPaths, err := fileInfo.CreateSortedChunks(ctx, "testdata/chunks", chunkSize, 10)
 		assert.NoError(t, err)
 		if mergeSort {
-			return fI.MergeSort(chunkPaths, bufferSize, dropDuplicates)
+			return fileInfo.MergeSort(chunkPaths, bufferSize, dropDuplicates)
 		}
 		return nil
 	})
-	err = i.Err()
+	err = inputOutput.Err()
 	assert.NoError(t, err)
 
-	return fI
+	return fileInfo
 }
 
 func TestBasics(t *testing.T) {
@@ -102,7 +112,11 @@ func TestBasics(t *testing.T) {
 				t.Run(name+"_"+strconv.Itoa(chunkSize)+"_"+strconv.Itoa(bufferSize), func(t *testing.T) {
 					ctx := context.Background()
 
-					allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+					allocate := vector.DefaultVector(
+						key.AllocateInt,
+						func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) },
+						func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil },
+					)
 					prepareChunks(ctx, t, allocate, filename, outputFilename, chunkSize, true, bufferSize, false, false)
 
 					outputFile, err := os.Open(outputFilename)
@@ -165,7 +179,11 @@ func Test100Elems(t *testing.T) {
 		expectedErr := tc.expectedErr
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
-			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			allocate := vector.DefaultVector(
+				key.AllocateInt,
+				func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) },
+				func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil },
+			)
 			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, false, false)
 			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
@@ -217,7 +235,11 @@ func Test100ElemsWithDuplicates(t *testing.T) {
 		expectedErr := tc.expectedErr
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
-			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			allocate := vector.DefaultVector(
+				key.AllocateInt,
+				func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) },
+				func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil },
+			)
 			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, false, true)
 			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
@@ -277,7 +299,11 @@ func Test100ElemsWithHeaders(t *testing.T) {
 		expectedErr := tc.expectedErr
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
-			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			allocate := vector.DefaultVector(
+				key.AllocateInt,
+				func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) },
+				func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil },
+			)
 			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, true, false)
 			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
@@ -329,7 +355,11 @@ func Test100ElemsWithHeadersWithDuplicates(t *testing.T) {
 		expectedErr := tc.expectedErr
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
-			allocate := vector.DefaultVector(key.AllocateInt, func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil })
+			allocate := vector.DefaultVector(
+				key.AllocateInt,
+				func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) },
+				func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil },
+			)
 			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, true, true)
 			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
@@ -380,9 +410,11 @@ func TestTsvKey(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
 
-			allocate := vector.DefaultVector(func(row interface{}) (key.Key, error) {
-				return key.AllocateTsv(row, 1)
-			}, func(r io.Reader) (reader.Reader, error) { return reader.NewSeparatedValues(r, '\t'), nil }, func(w io.Writer) (writer.Writer, error) { return writer.NewSeparatedValues(w, '\t'), nil })
+			allocate := vector.DefaultVector(
+				func(row interface{}) (key.Key, error) { return key.AllocateTsv(row, 1) },
+				func(r io.Reader) (reader.Reader, error) { return reader.NewSeparatedValues(r, '\t'), nil },
+				func(w io.Writer) (writer.Writer, error) { return writer.NewSeparatedValues(w, '\t'), nil },
+			)
 			prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, false, false)
 			outputFile, err := os.Open(outputFilename)
 			assert.NoError(t, err)
@@ -400,27 +432,37 @@ func TestTsvKey(t *testing.T) {
 	}
 }
 
-func prepareChunksShuffle(ctx context.Context, t *testing.T, filename, outputFilename string, chunkSize int, mergeSort bool, bufferSize int, withHeaders bool, dropDuplicates, isGzip bool) *file.Info {
+func prepareChunksShuffle(
+	ctx context.Context,
+	t *testing.T,
+	filename, outputFilename string,
+	chunkSize int,
+	mergeSort bool,
+	bufferSize int,
+	withHeaders bool,
+	dropDuplicates,
+	isGzip bool,
+) *file.Info {
 	t.Helper()
-	i := rw.NewInputOutput(ctx)
-	err := i.SetInputReader(ctx, filename)
+	inputOutput := rw.NewInputOutput(ctx)
+	err := inputOutput.SetInputReader(ctx, filename)
 	assert.NoError(t, err)
-	err = i.SetOutputWriter(ctx, outputFilename)
+	err = inputOutput.SetOutputWriter(ctx, outputFilename)
 	assert.NoError(t, err)
-	fI := &file.Info{
-		InputReader: i.Input,
-		OutputFile:  i.Output,
+	fileInfo := &file.Info{
+		InputReader: inputOutput.Input,
+		OutputFile:  inputOutput.Output,
 		WithHeader:  withHeaders,
 	}
-	i.Do(func() (err error) {
-		_, err = fI.Shuffle(ctx, "testdata/chunks", chunkSize, 10, bufferSize, 13, isGzip)
+	inputOutput.Do(func() (err error) {
+		_, err = fileInfo.Shuffle(ctx, "testdata/chunks", chunkSize, 10, bufferSize, 13, isGzip)
 		assert.NoError(t, err)
 		return nil
 	})
-	err = i.Err()
+	err = inputOutput.Err()
 	assert.NoError(t, err)
 
-	return fI
+	return fileInfo
 }
 
 func Test100ElemsShuffle(t *testing.T) {
diff --git a/reader/contract.go b/reader/contract.go
index a50988f..091267a 100644
--- a/reader/contract.go
+++ b/reader/contract.go
@@ -4,9 +4,12 @@ import (
 	"io"
 )
 
+// Reader define a basic reader.
 type Reader interface {
 	Next() bool
 	Read() (interface{}, error)
 	Err() error
 }
+
+// Config function type to convert a io.Reader to a Reader.
 type Config func(r io.Reader) (Reader, error)
diff --git a/reader/gzip_separated_values_test.go b/reader/gzip_separated_values_test.go
index e8e48e0..966ef3d 100644
--- a/reader/gzip_separated_values_test.go
+++ b/reader/gzip_separated_values_test.go
@@ -16,17 +16,17 @@ func Test(t *testing.T) {
 	t.Skip("to rework")
 	f, err := os.Open("/mnt/c/Users/Alex/Downloads/recordings.59.tsv.gz")
 	require.NoError(t, err)
-	r, err := reader.NewGZipSeparatedValues(bufio.NewReader(f), '\t')
+	rder, err := reader.NewGZipSeparatedValues(bufio.NewReader(f), '\t')
 	require.NoError(t, err)
 	count := 0
-	for r.Next() {
-		row, err := r.Read()
+	for rder.Next() {
+		row, err := rder.Read()
 		require.NoError(t, err)
 		_ = row
 		count++
 	}
 	assert.Equal(t, 2853701, count)
-	require.NoError(t, r.Err())
+	require.NoError(t, rder.Err())
 }
 
 func TestS3(t *testing.T) {
@@ -36,15 +36,15 @@ func TestS3(t *testing.T) {
 	err := i.SetInputReader(ctx, "s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.59.tsv.gz")
 	require.NoError(t, err)
 
-	r, err := reader.NewGZipSeparatedValues(i.Input, '\t')
+	gzipReader, err := reader.NewGZipSeparatedValues(i.Input, '\t')
 	require.NoError(t, err)
 	count := 0
-	for r.Next() {
-		row, err := r.Read()
+	for gzipReader.Next() {
+		row, err := gzipReader.Read()
 		require.NoError(t, err)
 		_ = row
 		count++
 	}
 	assert.Equal(t, 2853701, count)
-	require.NoError(t, r.Err())
+	require.NoError(t, gzipReader.Err())
 }
diff --git a/sftp/sftp.go b/sftp/sftp.go
index 16af7ab..e9dd341 100644
--- a/sftp/sftp.go
+++ b/sftp/sftp.go
@@ -1,10 +1,11 @@
 package sftp
 
 import (
-	"io/ioutil"
 	"log"
+	"os"
 	"path/filepath"
 
+	"github.com/pkg/errors"
 	"github.com/pkg/sftp"
 	"golang.org/x/crypto/ssh"
 )
@@ -16,7 +17,7 @@ type Client struct {
 
 func NewSFTPClient(addr, key, user, passphrase string) (*Client, error) {
 	res := &Client{}
-	pemBytes, err := ioutil.ReadFile(filepath.Clean(key))
+	pemBytes, err := os.ReadFile(filepath.Clean(key))
 	if err != nil {
 		log.Fatal(err)
 	}
@@ -31,12 +32,12 @@ func NewSFTPClient(addr, key, user, passphrase string) (*Client, error) {
 	}
 	conn, err := ssh.Dial("tcp", addr, config)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrapf(err, "can't dial with address %s", addr)
 	}
 	res.Conn = conn
 	client, err := sftp.NewClient(conn)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrapf(err, "can't create sftp client with address %s", addr)
 	}
 	res.Client = client
 	return res, nil
@@ -45,7 +46,7 @@ func NewSFTPClient(addr, key, user, passphrase string) (*Client, error) {
 func (s *Client) Close() error {
 	err := s.Client.Close()
 	if err != nil {
-		return err
+		return errors.Wrap(err, "can't close client")
 	}
 	return s.Conn.Close()
 }
diff --git a/vector/key/int_key.go b/vector/key/int_key.go
index 92c946c..73f0fd1 100644
--- a/vector/key/int_key.go
+++ b/vector/key/int_key.go
@@ -6,11 +6,13 @@ import (
 	"github.com/pkg/errors"
 )
 
+// Int define an integer key.
 type Int struct {
 	value int
 }
 
-func AllocateInt(row interface{}) (Key, error) {
+// AllocateInt create a new integer key.
+func AllocateInt(row interface{}) (Key, error) { //nolint //ireturn
 	line, ok := row.(string)
 	if !ok {
 		return nil, errors.Errorf("can't convert interface{} to string: %+v", row)
@@ -22,19 +24,23 @@ func AllocateInt(row interface{}) (Key, error) {
 	return &Int{num}, nil
 }
 
+// Less compare two integer keys.
 func (k *Int) Less(other Key) bool {
 	return k.value < other.(*Int).value //nolint //forcetypeassert
 }
 
+// Equal check tow integer keys are equal.
 func (k *Int) Equal(other Key) bool {
 	return k.value == other.(*Int).value //nolint //forcetypeassert
 }
 
+// IntFromSlice define an integer key from a position in a slice of integers.
 type IntFromSlice struct {
 	value int64
 }
 
-func AllocateIntFromSlice(row interface{}, intIndex int) (Key, error) {
+// AllocateIntFromSlice create a new integer key from a position in a slice of integers.
+func AllocateIntFromSlice(row interface{}, intIndex int) (Key, error) { //nolint //ireturn
 	line, ok := row.([]string)
 	if !ok {
 		return nil, errors.Errorf("can't convert interface{} to []string: %+v", row)
@@ -46,10 +52,12 @@ func AllocateIntFromSlice(row interface{}, intIndex int) (Key, error) {
 	return &IntFromSlice{num}, nil
 }
 
+// Less compare two integer keys.
 func (k *IntFromSlice) Less(other Key) bool {
 	return k.value < other.(*IntFromSlice).value //nolint //forcetypeassert
 }
 
+// Equal check tow integer keys are equal.
 func (k *IntFromSlice) Equal(other Key) bool {
 	return k.value == other.(*IntFromSlice).value //nolint //forcetypeassert
 }
diff --git a/vector/key/key.go b/vector/key/key.go
index 2eda041..3b45aa7 100644
--- a/vector/key/key.go
+++ b/vector/key/key.go
@@ -1,5 +1,6 @@
 package key
 
+// Key define the interface to compare keys to sort.
 type Key interface {
 	Equal(v2 Key) bool
 	// Less returns wether the key is smaller than v2
diff --git a/vector/key/string_key.go b/vector/key/string_key.go
index f4fec6e..4d6ade8 100644
--- a/vector/key/string_key.go
+++ b/vector/key/string_key.go
@@ -2,34 +2,42 @@ package key
 
 import "strings"
 
+// String define an string key.
 type String struct {
 	value string
 }
 
+// AllocateString create a new string key.
 func AllocateString(line string) (Key, error) {
 	return &String{line}, nil
 }
 
+// Less compare two string keys.
 func (k *String) Less(other Key) bool {
-	return k.value < other.(*String).value
+	return k.value < other.(*String).value //nolint //forcetypeassert
 }
 
+// Equal check tow string keys are equal.
 func (k *String) Equal(other Key) bool {
-	return k.value == other.(*String).value
+	return k.value == other.(*String).value //nolint //forcetypeassert
 }
 
+// UpperString define an string key.
 type UpperString struct {
 	value string
 }
 
+// AllocateString create a new string key. It trims space and change the string to uppercase.
 func AllocateUpperString(line string) (Key, error) {
 	return &UpperString{strings.TrimSpace(strings.ToUpper(line))}, nil
 }
 
+// Less compare two upper string keys.
 func (k *UpperString) Less(other Key) bool {
-	return k.value < other.(*UpperString).value
+	return k.value < other.(*UpperString).value //nolint //forcetypeassert
 }
 
+// Equal check tow upper string keys are equal.
 func (k *UpperString) Equal(other Key) bool {
-	return k.value == other.(*UpperString).value
+	return k.value == other.(*UpperString).value //nolint //forcetypeassert
 }
diff --git a/vector/key/tsv_key.go b/vector/key/tsv_key.go
index 6f3ee04..9c31d61 100644
--- a/vector/key/tsv_key.go
+++ b/vector/key/tsv_key.go
@@ -13,18 +13,16 @@ func AllocateTsv(row interface{}, pos ...int) (Key, error) {
 	if !ok {
 		return nil, errors.Errorf("can't convert interface{} to []string: %+v", row)
 	}
-	k := strings.Builder{}
+	strBuilder := strings.Builder{}
 	for i, p := range pos {
 		if len(splitted) < p+1 {
 			return nil, errors.Errorf("can't allocate tsv key line is invalid: %s", row)
 		}
-		k.WriteString(splitted[p])
+		strBuilder.WriteString(splitted[p])
 		if i < len(pos)-1 {
-			k.WriteString(salt)
+			strBuilder.WriteString(salt)
 		}
 	}
 
-	// fmt.Println(row, pos, k.String())
-
-	return &String{k.String()}, nil
+	return &String{strBuilder.String()}, nil
 }
diff --git a/vector/vector.go b/vector/vector.go
index d49cd80..d6b059d 100644
--- a/vector/vector.go
+++ b/vector/vector.go
@@ -1,8 +1,8 @@
 package vector
 
 import (
-	"io"
 	"os"
+	"path/filepath"
 
 	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector/key"
@@ -10,13 +10,15 @@ import (
 	"github.com/pkg/errors"
 )
 
+// Allocate define a vector and methods to read and write it.
 type Allocate struct {
 	Vector   func(int, func(row interface{}) (key.Key, error)) Vector
-	FnReader func(r io.Reader) (reader.Reader, error)
-	FnWriter func(w io.Writer) (writer.Writer, error)
+	FnReader reader.Config
+	FnWriter writer.Config
 	Key      func(elem interface{}) (key.Key, error)
 }
 
+// DefaultVector define a helper function to allocate a vector.
 func DefaultVector(allocateKey func(elem interface{}) (key.Key, error), fnReader reader.Config, fnWr writer.Config) *Allocate {
 	return &Allocate{
 		FnReader: fnReader,
@@ -26,6 +28,7 @@ func DefaultVector(allocateKey func(elem interface{}) (key.Key, error), fnReader
 	}
 }
 
+// Vector define a basic interface to manipulate a vector.
 type Vector interface {
 	// Get Access i-th element
 	Get(i int) *Element
@@ -43,8 +46,11 @@ type Vector interface {
 	Sort()
 }
 
-func (a *Allocate) Dump(v Vector, filename string) error {
-	file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
+const writeFilePerm = 0o600
+
+// Dump copy a vector to a file.
+func (a *Allocate) Dump(vec Vector, filename string) error {
+	file, err := os.OpenFile(filepath.Clean(filename), os.O_CREATE|os.O_WRONLY, writeFilePerm)
 	if err != nil {
 		return errors.Errorf("failed creating file: %s", err)
 	}
@@ -52,8 +58,8 @@ func (a *Allocate) Dump(v Vector, filename string) error {
 	if err != nil {
 		return errors.Errorf("failed creating writer: %s", err)
 	}
-	for i := 0; i < v.Len(); i++ {
-		err = datawriter.Write(v.Get(i).Row)
+	for i := 0; i < vec.Len(); i++ {
+		err = datawriter.Write(vec.Get(i).Row)
 		if err != nil {
 			return errors.Errorf("failed writing file: %s", err)
 		}
diff --git a/writer/gzip_separated_values.go b/writer/gzip_separated_values.go
index 63e6064..4e37a33 100644
--- a/writer/gzip_separated_values.go
+++ b/writer/gzip_separated_values.go
@@ -35,11 +35,11 @@ func (s *GZipSeparatedValuesWriter) Write(elem interface{}) error {
 	return nil
 }
 
-func (s *GZipSeparatedValuesWriter) Close() error {
-	defer s.gw.Close()
+func (s *GZipSeparatedValuesWriter) Close() (err error) {
+	defer func() { err = s.gw.Close() }()
 	s.w.Flush()
 	if s.w.Error() != nil {
 		return errors.Wrap(s.w.Error(), "can't close writer")
 	}
-	return nil
+	return err
 }
diff --git a/writer/std_writer.go b/writer/std_writer.go
index bd7fbed..c8ab78e 100644
--- a/writer/std_writer.go
+++ b/writer/std_writer.go
@@ -34,6 +34,7 @@ func (w *StdWriter) Write(elem interface{}) error {
 	return err
 }
 
+// Close close the bufio writer. It is the responsibility of the client to close the underlying writer.
 func (w *StdWriter) Close() error {
 	err := w.w.Flush()
 	if err != nil {
@@ -78,13 +79,14 @@ func (w *StdSliceWriter) Write(elem interface{}) error {
 	return err
 }
 
-func (w *StdSliceWriter) Close() error {
+// Close close the bufio writer. It is the responsibility of the client to close the underlying writer.
+func (w *StdSliceWriter) Close() (err error) {
 	if w.gw != nil {
-		defer w.gw.Close()
+		defer func() { err = w.gw.Close() }()
 	}
-	err := w.w.Flush()
+	err = w.w.Flush()
 	if err != nil {
 		return errors.Wrap(err, "can't close writer")
 	}
-	return nil
+	return err
 }

From 167ffa63439c9802a4cb9e846188730e2b225c16 Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Fri, 19 May 2023 09:40:56 +0200
Subject: [PATCH 14/16] proto test

---
 grpc/external_sort_index.proto | 38 ++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 grpc/external_sort_index.proto

diff --git a/grpc/external_sort_index.proto b/grpc/external_sort_index.proto
new file mode 100644
index 0000000..d268b90
--- /dev/null
+++ b/grpc/external_sort_index.proto
@@ -0,0 +1,38 @@
+// (-- api-linter: core::0215::versioned-packages=disabled
+//     aip.dev/not-precedent: This simply makes the structure simpler --)
+syntax = "proto3";
+
+package bk.registration.orchestrator;
+
+option go_package = "github.com/askiada/external-sort/grpc/_build/go";
+
+
+service ExternalSort {
+    rpc SortSV(SortSVRequest) returns (SortSVResponse);
+}
+
+message FileSV {
+    string path = 1;
+    bool gzip = 2;
+    string separator = 3;
+    repeated Field sort_fields = 4;
+    bool with_input_header = 5;
+    
+}
+
+
+message SortSVRequest {
+
+    repeated FileSV input = 2;
+      
+}
+
+message Field {
+    enum FIELD_TYPE{
+        INT = 0;
+        STRING = 1;
+        BOOL = 2; 
+     }
+    int64 index =1;
+    FIELD_TYPE type =2; 
+}

From 244baf4941020b454d9bee896aacb9be7621ca59 Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Sat, 29 Jun 2024 15:03:34 +0200
Subject: [PATCH 15/16] lint

---
 main_test.go               | 5 ++---
 testdata/chunks/output.tsv | 0
 2 files changed, 2 insertions(+), 3 deletions(-)
 delete mode 100644 testdata/chunks/output.tsv

diff --git a/main_test.go b/main_test.go
index 6972aa2..1a7ef49 100644
--- a/main_test.go
+++ b/main_test.go
@@ -9,14 +9,14 @@ import (
 	"strconv"
 	"testing"
 
+	"github.com/stretchr/testify/assert"
+
 	"github.com/askiada/external-sort/file"
 	"github.com/askiada/external-sort/internal/rw"
 	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/vector/key"
 	"github.com/askiada/external-sort/writer"
-
-	"github.com/stretchr/testify/assert"
 )
 
 func prepareChunks(
@@ -108,7 +108,6 @@ func TestBasics(t *testing.T) {
 		for chunkSize := 1; chunkSize < 152; chunkSize += 10 {
 			for bufferSize := 1; bufferSize < 152; bufferSize += 10 {
 				chunkSize := chunkSize
-				bufferSize := bufferSize
 				t.Run(name+"_"+strconv.Itoa(chunkSize)+"_"+strconv.Itoa(bufferSize), func(t *testing.T) {
 					ctx := context.Background()
 
diff --git a/testdata/chunks/output.tsv b/testdata/chunks/output.tsv
deleted file mode 100644
index e69de29..0000000

From 80e9ab44c22ecce81f0528aa4d840a80254fb325 Mon Sep 17 00:00:00 2001
From: askiada <25521495+askiada@users.noreply.github.com>
Date: Sat, 29 Jun 2024 15:03:59 +0200
Subject: [PATCH 16/16] lint

---
 .gitignore                                    |   7 +
 .golangci.yml                                 | 171 +++++++-----------
 Makefile                                      |   4 +
 bucket/contract.go                            |   3 +-
 bucket/s3.go                                  |   7 +-
 file/batchingchannels/batching_channel.go     |  12 +-
 .../batchingchannels/batching_channel_test.go |  13 +-
 file/chunk.go                                 |  16 +-
 file/file.go                                  |  25 ++-
 file/sort.go                                  |  45 ++++-
 file/utils.go                                 |   4 +
 go.mod                                        |   2 +-
 internal/progress/contract.go                 |   2 +
 internal/rw/rw.go                             |  29 ++-
 main.go                                       |  36 +++-
 main_bench_test.go                            |   3 +-
 reader/gzip_separated_values.go               |   4 +
 reader/gzip_separated_values_test.go          |  10 +-
 reader/separated_values.go                    |   3 +
 reader/std_scanner.go                         |  25 ++-
 sftp/sftp.go                                  |  11 +-
 vector/key/int_key.go                         |   8 +-
 vector/key/tsv_key.go                         |   4 +
 vector/slice_vector.go                        |   2 +
 vector/vector.go                              |  10 +-
 writer/contract.go                            |   4 +-
 writer/gzip_separated_values.go               |   5 +
 writer/separated_values.go                    |   5 +
 writer/std_writer.go                          |  24 ++-
 29 files changed, 342 insertions(+), 152 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7cc7ba1..ac81399 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,10 @@ bench*
 gen*
 chunk_*.tsv
 bin/
+*.csv
+*.tsv
+*.gz
+coverage.out
+testdata/chunks/*.csv
+testdata/chunks/*.tsv
+testdata/chunks/*.gz
diff --git a/.golangci.yml b/.golangci.yml
index e5f2e1a..ba3f9e4 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -1,7 +1,19 @@
 linters-settings:
+  cyclop:
+    skip-tests: true
+    max-complexity: 15
+  exhaustive:
+    default-signifies-exhaustive: true
   funlen:
-    lines: 80
-    statements: 50
+    lines: 110
+    statements: 70
+  gci:
+    sections:
+      - standard
+      - default
+      - prefix(github.com/askiada)
+      - localmodule
+    custom-order: true
   goconst:
     min-len: 2
     min-occurrences: 2
@@ -17,7 +29,7 @@ linters-settings:
   godot:
     capital: true
   goimports:
-    local-prefixes: github.com/golangci/golangci-lint
+    local-prefixes: github.com/askiada/external-sort
   govet:
     settings:
       printf:
@@ -26,128 +38,83 @@ linters-settings:
           - (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf
           - (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf
           - (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf
-    enable:
+    disable:
       - fieldalignment
-  maligned:
-    suggest-new: true
-  misspell:
-    locale: UK
   lll:
     line-length: 140
+  misspell:
+    locale: UK
+  paralleltest:
+    ignore-missing-subtests: true # Unfortunately, we can't write t.Run("success", testSuccess)
   unparam:
     check-exported: true
+  varnamelen:
+    min-name-length: 2
+    max-distance: 15
+
+  errcheck:
+    exclude-functions:
+      - (*io.PipeWriter).Close
+
+  wrapcheck:
+    ignoreSigs:
+      - .Errorf(
+      - errors.New(
+      - errors.Unwrap(
+      - errors.Join(
+      - .Wrap(
+      - .Wrapf(
+      - .WithMessage(
+      - .WithMessagef(
+      - .WithStack(
+      - status.Error(
+      - Group).Wait()
+
+  nlreturn:
+    block-size: 2
 
 issues:
   # Excluding configuration per-path, per-linter, per-text and per-source
   exclude-rules:
     - path: _test\.go
       linters:
-        - gosec # security check is not important in tests
-        - dupl  # we usually duplicate code in tests
         - bodyclose
-        - unparam
+        - contextcheck
+        - dupl # we usually duplicate code in tests
         - errcheck
-        - govet
+        - exhaustive
+        - funlen
         - gocritic
-        - goconst
-        - forcetypeassert
+        - gosec # security check is not important in tests
+        - govet
+        - maintidx
+        - nlreturn
+        - revive
+        - unparam
+        - varnamelen
         - wrapcheck
+        - wsl
+    - path: testing
+      linters:
+        - errcheck
   fix: true
   exclude-use-default: false
-
-run:
-  skip-dirs:
+  exclude-dirs:
+    - model
     - tmp
     - bin
     - scripts
 
+run:
+  allow-parallel-runners: true
   tests: true
   build-tags:
     - integration
 
 linters:
-  disable-all: true
-  fast: true
-  enable:
-    - asciicheck
-    - bidichk
-    - bodyclose
-    - bodyclose
-    - containedctx
-    - contextcheck
-    - cyclop
-    - decorder
-    - depguard
-    - dogsled
-    - dupl
-    - durationcheck
-    - errcheck
-    - errchkjson
-    - errname
-    - errorlint
-    - exhaustive
-    - exportloopref
-    - forbidigo
-    - forcetypeassert
-    - funlen
-    - gocognit
-    - goconst
-    - gocritic
-    - gocyclo
-    - godot
-    - godox
-    - gofmt
-    - gofumpt
-    - goheader
-    - goimports
-    - gomnd
-    - gomoddirectives
-    - gomodguard
-    - goprintffuncname
-    - gosec
-    - gosec
-    - gosimple
-    - gosimple
-    - govet
-    - govet
-    - grouper
-    - importas
-    - ineffassign
-    - ireturn
-    - lll
-    - maintidx
-    - makezero
-    - misspell
-    - nakedret
-    - nestif
-    - nilerr
-    - nilnil
-    - nlreturn
-    - noctx
-    - nolintlint
-    - prealloc
-    - predeclared
-    - promlinter
-    - revive
-    - rowserrcheck
-    - sqlclosecheck
-    - staticcheck
-    - staticcheck
-    - stylecheck
-    - tagliatelle
-    - tenv
-    - thelper
-    - tparallel
-    - typecheck
-    - unconvert
-    - unparam
-    - unused
-    - varnamelen
-    - wastedassign
-    - whitespace
-    - wrapcheck
-
-# golangci.com configuration
-# https://github.com/golangci/golangci/wiki/Configuration
-service:
-  golangci-lint-version: 1.52.x
+  enable-all: true
+  disable:
+    - depguard # because I don't want to write a dedicated config file.
+    - execinquery #Marked as deprecated by golangci-lint.
+    - gomnd # Marked as deprecated by golangci-lint. Replaced with mnd
+    - nonamedreturns # Conflicts with unnamedResult linter.
diff --git a/Makefile b/Makefile
index 5c5cf66..5473a9f 100644
--- a/Makefile
+++ b/Makefile
@@ -8,6 +8,10 @@ docker_image=askiada/external-sort
 include ./env.list
 export $(shell sed 's/=.*//' ./env.list)
 
+.PHONY: lint
+lint:
+	gofumpt -w -l .
+	golangci-lint run ./...
 
 .PHONY: test
 test:
diff --git a/bucket/contract.go b/bucket/contract.go
index 9c51477..9b63a55 100644
--- a/bucket/contract.go
+++ b/bucket/contract.go
@@ -1,9 +1,10 @@
 package bucket
 
 import (
-	"github.com/askiada/external-sort/internal/progress"
 	"github.com/aws/aws-sdk-go-v2/feature/s3/manager"
 	"github.com/aws/aws-sdk-go-v2/service/s3"
+
+	"github.com/askiada/external-sort/internal/progress"
 )
 
 // S3ClientAPI S3 client contract for this repo.
diff --git a/bucket/s3.go b/bucket/s3.go
index 08b1121..50f4c93 100644
--- a/bucket/s3.go
+++ b/bucket/s3.go
@@ -6,12 +6,13 @@ import (
 	"context"
 	"io"
 
-	"github.com/askiada/external-sort/internal/progress"
 	"github.com/aws/aws-sdk-go-v2/aws"
 	"github.com/aws/aws-sdk-go-v2/config"
 	"github.com/aws/aws-sdk-go-v2/feature/s3/manager"
 	"github.com/aws/aws-sdk-go-v2/service/s3"
 	"github.com/pkg/errors"
+
+	"github.com/askiada/external-sort/internal/progress"
 )
 
 // S3 can read and write from/to S3 buckets using io.Reader and io.Writer
@@ -46,9 +47,11 @@ func New(ctx context.Context, cfg ...ConfigFunc) (*S3, error) {
 	if s3Val.region == "" {
 		return nil, errors.Wrap(ErrInvalidInput, "region")
 	}
+
 	if s3Val.bufferLen <= 0 {
 		return nil, errors.Wrap(ErrInvalidInput, "buffer length")
 	}
+
 	if s3Val.s3Client == nil {
 		cfg, err := config.LoadDefaultConfig(ctx,
 			config.WithRegion(s3Val.region),
@@ -57,6 +60,7 @@ func New(ctx context.Context, cfg ...ConfigFunc) (*S3, error) {
 		if err != nil {
 			return nil, errors.New("can't create aws config")
 		}
+
 		s3Val.s3Client = s3.NewFromConfig(cfg)
 	}
 
@@ -107,6 +111,7 @@ func (s *S3) Download(ctx context.Context, writer io.Writer, filesinfo ...*S3Fil
 		// we need to force this to be a sequential download.
 		d.Concurrency = 1
 	})
+
 	ww := &seqWriterAt{writer, nil}
 	for _, fileinfo := range filesinfo {
 		_, err := downloader.Download(ctx, ww, &s3.GetObjectInput{
diff --git a/file/batchingchannels/batching_channel.go b/file/batchingchannels/batching_channel.go
index 150e593..428d416 100644
--- a/file/batchingchannels/batching_channel.go
+++ b/file/batchingchannels/batching_channel.go
@@ -3,9 +3,10 @@ package batchingchannels
 import (
 	"context"
 
-	"github.com/askiada/external-sort/vector"
 	"github.com/pkg/errors"
 	"golang.org/x/sync/errgroup"
+
+	"github.com/askiada/external-sort/vector"
 )
 
 // BatchingChannel define a standard channel, with the change that instead of producing individual elements
@@ -17,7 +18,7 @@ type BatchingChannel struct {
 	buffer          vector.Vector
 	allocate        *vector.Allocate
 	G               *errgroup.Group
-	internalContext context.Context //nolint //containedcontext
+	internalContext context.Context
 	size            int
 	maxWorker       int
 }
@@ -27,9 +28,11 @@ func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorke
 	if size == 0 {
 		return nil, errors.New("does not support unbuffered behaviour")
 	}
+
 	if size < 0 {
 		return nil, errors.New("does not support negative size")
 	}
+
 	errGrp, errGrpContext := errgroup.WithContext(ctx)
 	errGrp.SetLimit(maxWorker)
 	bChan := &BatchingChannel{
@@ -41,6 +44,7 @@ func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorke
 		G:               errGrp,
 		internalContext: errGrpContext,
 	}
+
 	go bChan.batchingBuffer()
 
 	return bChan, nil
@@ -61,11 +65,11 @@ func (ch *BatchingChannel) Out() <-chan vector.Vector {
 // ProcessOut process specified function on each batch.
 func (ch *BatchingChannel) ProcessOut(f func(vector.Vector) error) error {
 	for val := range ch.Out() {
-		val := val
 		ch.G.Go(func() error {
 			return f(val)
 		})
 	}
+
 	err := ch.G.Wait()
 	if err != nil {
 		return errors.Wrap(err, "one of the task failed")
@@ -93,6 +97,7 @@ func (ch *BatchingChannel) Close() {
 // When the batch reach maximum size or the input channel is closed, it is passed to the output channel.
 func (ch *BatchingChannel) batchingBuffer() {
 	ch.buffer = ch.allocate.Vector(ch.size, ch.allocate.Key)
+
 	for {
 		row, open := <-ch.input
 		if open {
@@ -109,6 +114,7 @@ func (ch *BatchingChannel) batchingBuffer() {
 
 			break
 		}
+
 		if ch.buffer.Len() == ch.size {
 			ch.output <- ch.buffer
 			ch.buffer = ch.allocate.Vector(ch.size, ch.allocate.Key)
diff --git a/file/batchingchannels/batching_channel_test.go b/file/batchingchannels/batching_channel_test.go
index ced13a3..56a67c8 100644
--- a/file/batchingchannels/batching_channel_test.go
+++ b/file/batchingchannels/batching_channel_test.go
@@ -7,19 +7,20 @@ import (
 	"testing"
 	"time"
 
-	"github.com/askiada/external-sort/file/batchingchannels"
-	"github.com/askiada/external-sort/vector"
-	"github.com/askiada/external-sort/vector/key"
 	"github.com/pkg/errors"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+
+	"github.com/askiada/external-sort/file/batchingchannels"
+	"github.com/askiada/external-sort/vector"
+	"github.com/askiada/external-sort/vector/key"
 )
 
 type intKey struct {
 	value int
 }
 
-func allocateInt(row interface{}) (key.Key, error) { //nolint //ireturn
+func allocateInt(row interface{}) (key.Key, error) {
 	line, ok := row.(string)
 	if !ok {
 		return nil, errors.Errorf("can't convert interface{} to string: %+v", row)
@@ -53,7 +54,7 @@ func testBatches(t *testing.T, bChan *batchingchannels.BatchingChannel) {
 
 	maxIn := 100
 	wgrpInput.Add(maxIn)
-	for idx := 0; idx < maxIn; idx++ {
+	for idx := range maxIn {
 		go func(j int) {
 			defer wgrpInput.Done()
 			for i := maxI / maxIn * j; i < maxI*(j+1)/maxIn; i++ {
@@ -81,7 +82,7 @@ func testBatches(t *testing.T, bChan *batchingchannels.BatchingChannel) {
 	go func() {
 		defer wgrp.Done()
 		err := bChan.ProcessOut(func(val vector.Vector) error {
-			for i := 0; i < val.Len(); i++ {
+			for i := range val.Len() {
 				val := val.Get(i)
 				got <- val
 			}
diff --git a/file/chunk.go b/file/chunk.go
index df9d10f..38266e0 100644
--- a/file/chunk.go
+++ b/file/chunk.go
@@ -5,10 +5,10 @@ import (
 	"path/filepath"
 	"sort"
 
+	"github.com/pkg/errors"
+
 	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
-
-	"github.com/pkg/errors"
 )
 
 // chunkInfo define a chunk.
@@ -28,12 +28,15 @@ func (c *chunkInfo) pullSubset(size int) (err error) {
 		if err != nil {
 			return errors.Wrap(err, "can't read chunk")
 		}
+
 		err = c.buffer.PushBack(row)
 		if err != nil {
 			return errors.Wrap(err, "can't push back row")
 		}
+
 		elemIdx++
 	}
+
 	if c.reader.Err() != nil {
 		return errors.Wrap(c.reader.Err(), "chunk reader encountered an error")
 	}
@@ -52,6 +55,7 @@ func (c *chunks) new(chunkPath string, allocate *vector.Allocate, size int, with
 	if err != nil {
 		return errors.Wrap(err, "can't open chunk file")
 	}
+
 	rder, err := allocate.FnReader(chunkFile)
 	if err != nil {
 		return errors.Wrap(err, "can't read chunk file")
@@ -67,11 +71,14 @@ func (c *chunks) new(chunkPath string, allocate *vector.Allocate, size int, with
 		reader:   rder,
 		buffer:   allocate.Vector(size, allocate.Key),
 	}
+
 	err = elem.pullSubset(size)
 	if err != nil {
 		return errors.Wrap(err, "can't pull chunk subset")
 	}
+
 	c.list = append(c.list, elem)
+
 	return nil
 }
 
@@ -83,6 +90,7 @@ func (c *chunks) close() error {
 			return errors.Wrapf(err, "can't close chunk file %s", chunk.filename)
 		}
 	}
+
 	return nil
 }
 
@@ -91,10 +99,12 @@ func (c *chunks) close() error {
 func (c *chunks) shrink(toShrink []int) error {
 	for i, shrinkIndex := range toShrink {
 		shrinkIndex -= i
+
 		err := c.list[shrinkIndex].file.Close()
 		if err != nil {
 			return errors.Wrapf(err, "can't close chunk file %s", c.list[shrinkIndex].filename)
 		}
+
 		err = os.Remove(c.list[shrinkIndex].filename)
 		if err != nil {
 			return errors.Wrapf(err, "can't remove chunk file %s", c.list[shrinkIndex].filename)
@@ -102,6 +112,7 @@ func (c *chunks) shrink(toShrink []int) error {
 		// we want to preserve order
 		c.list = append(c.list[:shrinkIndex], c.list[shrinkIndex+1:]...)
 	}
+
 	return nil
 }
 
@@ -136,5 +147,6 @@ func (c *chunks) min() (minChunk *chunkInfo, minValue *vector.Element, minIdx in
 	minValue = c.list[0].buffer.Get(0)
 	minIdx = 0
 	minChunk = c.list[0]
+
 	return minChunk, minValue, minIdx
 }
diff --git a/file/file.go b/file/file.go
index aa8f894..b42bcc7 100644
--- a/file/file.go
+++ b/file/file.go
@@ -7,13 +7,13 @@ import (
 	"strconv"
 	"sync"
 
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+
 	"github.com/askiada/external-sort/file/batchingchannels"
 	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/writer"
-	"github.com/sirupsen/logrus"
-
-	"github.com/pkg/errors"
 )
 
 var logger = logrus.StandardLogger()
@@ -38,9 +38,11 @@ type Info struct {
 func (f *Info) check(dumpSize int) error {
 	f.chunkIndex = 0
 	f.chunkPaths = []string{}
+
 	if dumpSize <= 0 {
 		return errors.New("dump size must be greater than 0")
 	}
+
 	return nil
 }
 
@@ -49,21 +51,26 @@ func (f *Info) processInputReader(batchChan *batchingchannels.BatchingChannel, i
 		if f.PrintMemUsage {
 			f.mu.Collect()
 		}
+
 		row, err := inputReader.Read()
 		if err != nil {
 			return errors.Wrap(err, "can't read from input reader")
 		}
+
 		if f.WithHeader && f.headers == nil {
 			f.headers = row
 		} else {
 			batchChan.In() <- row
 		}
+
 		f.totalRows++
 	}
 	batchChan.Close()
+
 	if inputReader.Err() != nil {
 		return errors.Wrap(inputReader.Err(), "input reader encountered an error")
 	}
+
 	return nil
 }
 
@@ -73,23 +80,30 @@ func (f *Info) processBatch(vec vector.Vector, chunkFolder string) error {
 	chunkPath := path.Join(chunkFolder, "chunk_"+strconv.Itoa(f.chunkIndex)+".tsv")
 	logger.Infoln("Created chunk", chunkPath)
 	f.localMutex.Unlock()
+
 	vec.Sort()
+
 	if f.WithHeader {
 		f.localMutex.Lock()
+
 		err := vec.PushFrontNoKey(f.headers)
 		if err != nil {
 			f.localMutex.Unlock()
 			return err
 		}
+
 		f.localMutex.Unlock()
 	}
+
 	err := f.Allocate.Dump(vec, chunkPath)
 	if err != nil {
 		return errors.Wrapf(err, "can't dump chunk %s", chunkPath)
 	}
+
 	f.localMutex.Lock()
 	f.chunkPaths = append(f.chunkPaths, chunkPath)
 	f.localMutex.Unlock()
+
 	return nil
 }
 
@@ -104,6 +118,7 @@ func (f *Info) runBatchingChannel(
 	if err != nil {
 		return nil, errors.Wrap(err, "can't create new batching channel")
 	}
+
 	batchChan.G.Go(func() error { return f.processInputReader(batchChan, inputReader) })
 
 	err = batchChan.ProcessOut(func(vec vector.Vector) error {
@@ -111,11 +126,13 @@ func (f *Info) runBatchingChannel(
 		if err != nil {
 			return errors.Wrap(err, "can't process batch")
 		}
+
 		return nil
 	})
 	if err != nil {
 		return nil, errors.Wrap(err, "can't process batching channel")
 	}
+
 	return f.chunkPaths, nil
 }
 
@@ -139,9 +156,11 @@ func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpS
 	if err != nil {
 		return nil, errors.Wrap(err, "can't get input reader")
 	}
+
 	chunkPaths, err := f.runBatchingChannel(ctx, inputReader, chunkFolder, dumpSize, maxWorkers)
 	if err != nil {
 		return nil, errors.Wrap(err, "can't run batching channel")
 	}
+
 	return chunkPaths, nil
 }
diff --git a/file/sort.go b/file/sort.go
index 6fda072..1c384ff 100644
--- a/file/sort.go
+++ b/file/sort.go
@@ -5,10 +5,11 @@ import (
 	"runtime"
 	"strings"
 
-	"github.com/askiada/external-sort/vector"
-	"github.com/askiada/external-sort/writer"
 	"github.com/cheggaaa/pb/v3"
 	"github.com/pkg/errors"
+
+	"github.com/askiada/external-sort/vector"
+	"github.com/askiada/external-sort/writer"
 )
 
 type memUsage struct {
@@ -19,10 +20,13 @@ type memUsage struct {
 
 func (mu *memUsage) Collect() {
 	var mStats runtime.MemStats
+
 	runtime.ReadMemStats(&mStats)
+
 	if mStats.Alloc > mu.MaxAlloc {
 		mu.MaxAlloc = mStats.Alloc
 	}
+
 	if mStats.Sys > mu.MaxSys {
 		mu.MaxSys = mStats.Sys
 	}
@@ -35,6 +39,7 @@ func (mu *memUsage) String() string {
 	builder.WriteString(fmt.Sprintf("Max Alloc = %v MiB", bToMb(mu.MaxAlloc)))
 	builder.WriteString(fmt.Sprintf(" Max Sys = %v MiB", bToMb(mu.MaxSys)))
 	builder.WriteString(fmt.Sprintf(" NumGC = %v\n", mu.NumGc))
+
 	return builder.String()
 }
 
@@ -52,6 +57,7 @@ func (f *Info) createChunks(chunkPaths []string, k int) (*chunks, error) {
 			return nil, errors.Wrapf(err, "can't create chunk %s", chunkPath)
 		}
 	}
+
 	return chunks, nil
 }
 
@@ -62,6 +68,7 @@ func (f *Info) handleHeader(output vector.Vector) error {
 			return errors.Wrapf(err, "can't add headers %+v", f.headers)
 		}
 	}
+
 	return nil
 }
 
@@ -76,22 +83,28 @@ func (nc *nextChunk) get(output vector.Vector, createdChunks *chunks, dropDuplic
 		if err != nil {
 			return nil, 0, errors.Wrapf(err, "can't push back row %+v", minValue.Row)
 		}
+
 		nc.oldElem = minValue
 	}
+
 	return minChunk, minIdx, nil
 }
 
 func updateChunks(createdChunks *chunks, minChunk *chunkInfo, minIdx, k int) error {
 	minChunk.buffer.FrontShift()
+
 	isEmpty := false
+
 	if minChunk.buffer.Len() == 0 {
 		err := minChunk.pullSubset(k)
 		if err != nil {
 			return errors.Wrapf(err, "can't pull subset from chunk %s", minChunk.filename)
 		}
+
 		// if after pulling data the chunk buffer is still empty then we can remove it
 		if minChunk.buffer.Len() == 0 {
 			isEmpty = true
+
 			err = createdChunks.shrink([]int{minIdx})
 			if err != nil {
 				return errors.Wrapf(err, "can't shrink chunk at index %d", minIdx)
@@ -102,6 +115,7 @@ func updateChunks(createdChunks *chunks, minChunk *chunkInfo, minIdx, k int) err
 	if !isEmpty {
 		createdChunks.moveFirstChunkToCorrectIndex()
 	}
+
 	return nil
 }
 
@@ -110,30 +124,39 @@ func (f *Info) prepareMergeSort(output vector.Vector, chunkPaths []string, outpu
 	if err != nil {
 		return nil, errors.Wrap(err, "can't handle headers")
 	}
+
 	// create a chunk per file path
 	createdChunks, err := f.createChunks(chunkPaths, outputBufferSize)
 	if err != nil {
 		return nil, errors.Wrap(err, "can't create all chunks")
 	}
+
 	f.outputWriter, err = f.Allocate.FnWriter(f.OutputFile)
 	if err != nil {
 		return nil, errors.Wrap(err, "can't get output writer file")
 	}
+
 	return createdChunks, nil
 }
 
 func (f *Info) runMergeSort(createdChunks *chunks, output vector.Vector, outputBufferSize int, dropDuplicates bool) error {
 	bar := pb.StartNew(f.totalRows)
-	createdChunks.resetOrder()
+	defer bar.Finish()
+
 	smallestChunk := &nextChunk{}
+
+	createdChunks.resetOrder()
+
 	for {
 		if f.PrintMemUsage {
 			f.mu.Collect()
 		}
+
 		err := f.dumpOutput(createdChunks, output, outputBufferSize)
 		if err != nil {
 			return errors.Wrap(err, "can't dump output")
 		}
+
 		if createdChunks.len() == 0 {
 			break
 		}
@@ -143,17 +166,20 @@ func (f *Info) runMergeSort(createdChunks *chunks, output vector.Vector, outputB
 		if err != nil {
 			return errors.Wrap(err, "can't get next chunk with smallest value")
 		}
+
 		// remove the first element from the chunk we pulled the smallest value
 		err = updateChunks(createdChunks, minChunk, minIdx, outputBufferSize)
 		if err != nil {
 			return errors.Wrap(err, "can't update chunks")
 		}
+
 		bar.Increment()
 	}
-	bar.Finish()
+
 	if f.PrintMemUsage {
 		logger.Debugln(f.mu.String())
 	}
+
 	return nil
 }
 
@@ -164,6 +190,7 @@ func (f *Info) dumpOutput(createdChunks *chunks, output vector.Vector, outputBuf
 			return err
 		}
 	}
+
 	return nil
 }
 
@@ -171,32 +198,40 @@ func (f *Info) dumpOutput(createdChunks *chunks, output vector.Vector, outputBuf
 // It is possilbe to drop duplicates and define the maximum size of the output buffer before flush.
 func (f *Info) MergeSort(chunkPaths []string, outputBufferSize int, dropDuplicates bool) (err error) {
 	output := f.Allocate.Vector(outputBufferSize, f.Allocate.Key)
+
 	if f.PrintMemUsage && f.mu == nil {
 		f.mu = &memUsage{}
 	}
+
 	createdChunks, err := f.prepareMergeSort(output, chunkPaths, outputBufferSize)
 	if err != nil {
 		return errors.Wrap(err, "can't prepare merge sort")
 	}
+
 	defer func() { err = f.outputWriter.Close() }()
+
 	err = f.runMergeSort(createdChunks, output, outputBufferSize, dropDuplicates)
 	if err != nil {
 		return errors.Wrap(err, "can't run merge sort")
 	}
+
 	err = createdChunks.close()
 	if err != nil {
 		return errors.Wrap(err, "can't close created chunks")
 	}
+
 	return err
 }
 
 func writeBuffer(w writer.Writer, rows vector.Vector) error {
-	for i := 0; i < rows.Len(); i++ {
+	for i := range rows.Len() {
 		err := w.Write(rows.Get(i).Row)
 		if err != nil {
 			return errors.Wrap(err, "can't write buffer")
 		}
 	}
+
 	rows.Reset()
+
 	return nil
 }
diff --git a/file/utils.go b/file/utils.go
index 9746804..8ad9ea8 100644
--- a/file/utils.go
+++ b/file/utils.go
@@ -14,18 +14,22 @@ func clearChunkFolder(folder string) error {
 	if err != nil {
 		return errors.Wrap(err, "can't create folder")
 	}
+
 	dir, err := os.ReadDir(folder)
 	if err != nil {
 		return errors.Wrap(err, "can't read chunk folder")
 	}
+
 	for _, d := range dir {
 		if !strings.HasPrefix(d.Name(), "chunk") {
 			continue
 		}
+
 		err = os.RemoveAll(path.Join(folder, d.Name()))
 		if err != nil {
 			return errors.Wrap(err, "can't clear chunk folder")
 		}
 	}
+
 	return nil
 }
diff --git a/go.mod b/go.mod
index f7256f6..d72b023 100644
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
 module github.com/askiada/external-sort
 
-go 1.20
+go 1.22
 
 require (
 	github.com/aws/aws-sdk-go-v2 v1.18.0
diff --git a/internal/progress/contract.go b/internal/progress/contract.go
index b8495c2..9fd343e 100644
--- a/internal/progress/contract.go
+++ b/internal/progress/contract.go
@@ -57,8 +57,10 @@ func (b *Basic) Begin(total int64) {
 func (b *Basic) Add(val int64) {
 	b.written += float64(val)
 	progress := int(math.Round(b.written / b.total * 100)) //nolint //gomnd
+
 	if progress >= b.milestone {
 		b.milestone += 5 // every 5%
+
 		logrus.Debugf("Download from S3 at %3d%%\n\n", progress)
 	}
 }
diff --git a/internal/rw/rw.go b/internal/rw/rw.go
index e6253a6..0252beb 100644
--- a/internal/rw/rw.go
+++ b/internal/rw/rw.go
@@ -8,14 +8,15 @@ import (
 	"path/filepath"
 	"strings"
 
-	"github.com/askiada/external-sort/bucket"
-	"github.com/askiada/external-sort/internal"
-	"github.com/askiada/external-sort/internal/progress"
 	"github.com/aws/aws-sdk-go-v2/config"
 	"github.com/aws/aws-sdk-go-v2/service/s3"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/sync/errgroup"
+
+	"github.com/askiada/external-sort/bucket"
+	"github.com/askiada/external-sort/internal"
+	"github.com/askiada/external-sort/internal/progress"
 )
 
 var logger = logrus.StandardLogger()
@@ -27,7 +28,7 @@ type InputOutput struct {
 	Output      io.Writer
 	outputPipe  *io.PipeWriter
 	g           *errgroup.Group
-	internalCtx context.Context //nolint //containedcontext
+	internalCtx context.Context
 }
 
 func NewInputOutput(ctx context.Context) *InputOutput {
@@ -43,6 +44,7 @@ func (i *InputOutput) s3Check(ctx context.Context) error {
 	if i.s3Client != nil {
 		return nil
 	}
+
 	cfg, err := config.LoadDefaultConfig(ctx,
 		config.WithRegion(internal.S3Region),
 		config.WithRetryMaxAttempts(internal.S3RetryMaxAttempts),
@@ -50,6 +52,7 @@ func (i *InputOutput) s3Check(ctx context.Context) error {
 	if err != nil {
 		return errors.New("can't create aws config")
 	}
+
 	i.s3Client = s3.NewFromConfig(cfg)
 
 	return nil
@@ -61,6 +64,7 @@ func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string)
 		if err != nil {
 			return errors.Wrap(err, "can't check s3")
 		}
+
 		s3Api, err := bucket.New(ctx,
 			bucket.Client(i.s3Client),
 			bucket.Buffer(1_000_000),
@@ -69,7 +73,9 @@ func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string)
 		if err != nil {
 			return errors.Wrap(err, "can't create s3 client")
 		}
+
 		files := []*bucket.S3FileInfo{}
+
 		for _, inputFile := range inputFiles {
 			u, _ := url.Parse(inputFile)
 			u.Path = strings.TrimLeft(u.Path, "/")
@@ -85,6 +91,7 @@ func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string)
 		i.inputPipe = pr
 		i.g.Go(func() (err error) {
 			defer func() { err = pw.Close() }()
+
 			err = s3Api.Download(i.internalCtx, pw, files...)
 			if err != nil {
 				return errors.Wrap(err, "can't download files")
@@ -94,13 +101,16 @@ func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string)
 		})
 	} else {
 		var files []io.Reader
+
 		for _, inputFile := range inputFiles {
 			f, err := os.Open(filepath.Clean(inputFile))
 			if err != nil {
 				return errors.Wrapf(err, "can't open file %s", inputFile)
 			}
+
 			files = append(files, f)
 		}
+
 		i.Input = io.MultiReader(files...)
 	}
 
@@ -113,12 +123,15 @@ func (i *InputOutput) SetOutputWriter(ctx context.Context, outputFile string) (e
 		if err != nil {
 			return errors.Wrap(err, "can't check s3")
 		}
+
 		outputURL, err := url.Parse(outputFile)
 		if err != nil {
 			return errors.Wrapf(err, "can't parse output url %s", outputFile)
 		}
+
 		outputURL.Path = strings.TrimLeft(outputURL.Path, "/")
 		logger.Debugf("Proto: %q, Bucket: %q, Key: %q", outputURL.Scheme, outputURL.Host, outputURL.Path)
+
 		s3Api, err := bucket.New(ctx,
 			bucket.Client(i.s3Client),
 			bucket.Buffer(1_000_000),
@@ -133,10 +146,12 @@ func (i *InputOutput) SetOutputWriter(ctx context.Context, outputFile string) (e
 		i.outputPipe = pw
 		i.g.Go(func() (err error) {
 			defer func() { err = pr.Close() }()
+
 			err = s3Api.Upload(i.internalCtx, pr, outputURL.Host, outputURL.Path)
 			if err != nil {
 				return errors.Wrapf(err, "can't upload file %s", outputFile)
 			}
+
 			return err
 		})
 	} else {
@@ -145,6 +160,7 @@ func (i *InputOutput) SetOutputWriter(ctx context.Context, outputFile string) (e
 			return errors.Wrapf(err, "can't create file %s", outputFile)
 		}
 	}
+
 	return nil
 }
 
@@ -154,10 +170,12 @@ func (i *InputOutput) Do(f func() error) {
 		if err != nil {
 			return err
 		}
+
 		err = i.Close()
 		if err != nil {
 			return err
 		}
+
 		return nil
 	})
 }
@@ -169,12 +187,14 @@ func (i *InputOutput) Close() error {
 			return errors.Wrap(err, "can't close input reader")
 		}
 	}
+
 	if i.outputPipe != nil {
 		err := i.outputPipe.Close()
 		if err != nil {
 			return errors.Wrap(err, "can't close output writer")
 		}
 	}
+
 	return nil
 }
 
@@ -182,5 +202,6 @@ func (i *InputOutput) Err() error {
 	if err := i.g.Wait(); err != nil {
 		return errors.Wrap(err, "one of the go routines went wrong")
 	}
+
 	return nil
 }
diff --git a/main.go b/main.go
index 97f1039..89257d6 100644
--- a/main.go
+++ b/main.go
@@ -6,6 +6,11 @@ import (
 	"strconv"
 	"time"
 
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+	"github.com/spf13/viper"
+
 	"github.com/askiada/external-sort/file"
 	"github.com/askiada/external-sort/internal"
 	"github.com/askiada/external-sort/internal/rw"
@@ -13,10 +18,6 @@ import (
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/vector/key"
 	"github.com/askiada/external-sort/writer"
-	"github.com/pkg/errors"
-	"github.com/sirupsen/logrus"
-	"github.com/spf13/cobra"
-	"github.com/spf13/viper"
 )
 
 var logger = logrus.StandardLogger()
@@ -130,14 +131,18 @@ func newCommand() *command {
 			RunE: shuffleRun,
 		},
 	}
+
 	root.rootCmd.AddCommand(root.sortCmd, root.shuffleCmd)
+
 	return root
 }
 
 func main() {
 	root := newCommand()
 	setFlags(root)
+
 	ctx := context.Background()
+
 	cobra.CheckErr(root.rootCmd.ExecuteContext(ctx))
 }
 
@@ -150,22 +155,28 @@ func sortRun(cmd *cobra.Command, _ []string) error {
 
 	start := time.Now()
 	inputOutput := rw.NewInputOutput(cmd.Context())
+
 	err := inputOutput.SetInputReader(cmd.Context(), internal.InputFiles...)
 	if err != nil {
 		return errors.Wrap(err, "can't set input reader")
 	}
+
 	err = inputOutput.SetOutputWriter(cmd.Context(), internal.OutputFile)
 	if err != nil {
 		return errors.Wrap(err, "can't set output writer")
 	}
+
 	tsvFields := []int{}
+
 	for _, field := range internal.TsvFields {
 		i, err := strconv.Atoi(field)
 		if err != nil {
 			return errors.Wrapf(err, "can't convert field %s", field)
 		}
+
 		tsvFields = append(tsvFields, i)
 	}
+
 	fileInfo := &file.Info{
 		WithHeader:  internal.WithHeader,
 		InputReader: inputOutput.Input,
@@ -176,6 +187,7 @@ func sortRun(cmd *cobra.Command, _ []string) error {
 				if err != nil {
 					return nil, errors.Wrapf(err, "can't allocate tsv %+v", row)
 				}
+
 				return k, nil
 			},
 			func(r io.Reader) (reader.Reader, error) {
@@ -183,6 +195,7 @@ func sortRun(cmd *cobra.Command, _ []string) error {
 				if err != nil {
 					return nil, errors.Wrap(err, "can't create Gzip reader")
 				}
+
 				return gzipReader, nil
 			},
 			func(w io.Writer) (writer.Writer, error) {
@@ -190,11 +203,13 @@ func sortRun(cmd *cobra.Command, _ []string) error {
 				if err != nil {
 					return nil, errors.Wrap(err, "can't create Gzip writer")
 				}
+
 				return gzipWriter, nil
 			},
 		),
 		PrintMemUsage: false,
 	}
+
 	inputOutput.Do(func() error {
 		// create small files with maximum 30 rows in each
 		chunkPaths, err := fileInfo.CreateSortedChunks(cmd.Context(), internal.ChunkFolder, internal.ChunkSize, internal.MaxWorkers)
@@ -207,14 +222,18 @@ func sortRun(cmd *cobra.Command, _ []string) error {
 		if err != nil {
 			return errors.Wrap(err, "can't merge sort")
 		}
+
 		elapsed := time.Since(start)
 		logger.Infoln("It took", elapsed)
+
 		return nil
 	})
+
 	err = inputOutput.Err()
 	if err != nil {
 		return errors.Wrap(err, "can't finish")
 	}
+
 	return nil
 }
 
@@ -224,12 +243,15 @@ func shuffleRun(cmd *cobra.Command, _ []string) error {
 	logger.Infoln("Output file", internal.OutputFile)
 	logger.Infoln("Chunk folder", internal.ChunkFolder)
 	logger.Infoln("GZip file", internal.IsGzip)
+
 	start := time.Now()
 	inputOutput := rw.NewInputOutput(cmd.Context())
+
 	err := inputOutput.SetInputReader(cmd.Context(), internal.InputFiles...)
 	if err != nil {
 		return errors.Wrap(err, "can't set input reader")
 	}
+
 	err = inputOutput.SetOutputWriter(cmd.Context(), internal.OutputFile)
 	if err != nil {
 		return errors.Wrap(err, "can't set output writer")
@@ -241,6 +263,7 @@ func shuffleRun(cmd *cobra.Command, _ []string) error {
 		OutputFile:    inputOutput.Output,
 		PrintMemUsage: false,
 	}
+
 	inputOutput.Do(func() error {
 		// create small files with maximum 30 rows in each
 		_, err := fileInfo.Shuffle(
@@ -255,13 +278,18 @@ func shuffleRun(cmd *cobra.Command, _ []string) error {
 		if err != nil {
 			return errors.Wrap(err, "can't create shuflled chunks")
 		}
+
 		elapsed := time.Since(start)
+
 		logger.Infoln("It took", elapsed)
+
 		return nil
 	})
+
 	err = inputOutput.Err()
 	if err != nil {
 		return errors.Wrap(err, "can't finish")
 	}
+
 	return nil
 }
diff --git a/main_bench_test.go b/main_bench_test.go
index e87982c..d31809e 100644
--- a/main_bench_test.go
+++ b/main_bench_test.go
@@ -7,13 +7,14 @@ import (
 	"path"
 	"testing"
 
+	"github.com/stretchr/testify/assert"
+
 	"github.com/askiada/external-sort/file"
 	"github.com/askiada/external-sort/internal/rw"
 	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector"
 	"github.com/askiada/external-sort/vector/key"
 	"github.com/askiada/external-sort/writer"
-	"github.com/stretchr/testify/assert"
 )
 
 func BenchmarkMergeSort(b *testing.B) {
diff --git a/reader/gzip_separated_values.go b/reader/gzip_separated_values.go
index d38430e..5ff4627 100644
--- a/reader/gzip_separated_values.go
+++ b/reader/gzip_separated_values.go
@@ -26,6 +26,7 @@ func NewGZipSeparatedValues(r io.Reader, separator rune) (*GZipSeparatedValuesRe
 		r:  csv.NewReader(gr),
 	}
 	s.r.Comma = separator
+
 	return s, nil
 }
 
@@ -34,8 +35,10 @@ func (s *GZipSeparatedValuesReader) Next() bool {
 	if errors.Is(s.err, io.EOF) {
 		s.err = nil
 		s.gr.Close()
+
 		return false
 	}
+
 	return true
 }
 
@@ -43,6 +46,7 @@ func (s *GZipSeparatedValuesReader) Read() (interface{}, error) {
 	if s.err != nil {
 		return nil, s.err
 	}
+
 	return s.row, nil
 }
 
diff --git a/reader/gzip_separated_values_test.go b/reader/gzip_separated_values_test.go
index 966ef3d..0793a63 100644
--- a/reader/gzip_separated_values_test.go
+++ b/reader/gzip_separated_values_test.go
@@ -6,10 +6,11 @@ import (
 	"os"
 	"testing"
 
-	"github.com/askiada/external-sort/internal/rw"
-	"github.com/askiada/external-sort/reader"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+
+	"github.com/askiada/external-sort/internal/rw"
+	"github.com/askiada/external-sort/reader"
 )
 
 func Test(t *testing.T) {
@@ -33,7 +34,10 @@ func TestS3(t *testing.T) {
 	t.Skip("to rework")
 	ctx := context.Background()
 	i := rw.NewInputOutput(ctx)
-	err := i.SetInputReader(ctx, "s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.59.tsv.gz")
+	err := i.SetInputReader(
+		ctx,
+		"s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.59.tsv.gz",
+	)
 	require.NoError(t, err)
 
 	gzipReader, err := reader.NewGZipSeparatedValues(i.Input, '\t')
diff --git a/reader/separated_values.go b/reader/separated_values.go
index fdb4fea..b2948e4 100644
--- a/reader/separated_values.go
+++ b/reader/separated_values.go
@@ -17,6 +17,7 @@ func NewSeparatedValues(r io.Reader, separator rune) *SeparatedValuesReader {
 		r: csv.NewReader(r),
 	}
 	s.r.Comma = separator
+
 	return s
 }
 
@@ -26,6 +27,7 @@ func (s *SeparatedValuesReader) Next() bool {
 		s.err = nil
 		return false
 	}
+
 	return true
 }
 
@@ -33,6 +35,7 @@ func (s *SeparatedValuesReader) Read() (interface{}, error) {
 	if s.err != nil {
 		return nil, s.err
 	}
+
 	return s.row, nil
 }
 
diff --git a/reader/std_scanner.go b/reader/std_scanner.go
index c16235e..07756b5 100644
--- a/reader/std_scanner.go
+++ b/reader/std_scanner.go
@@ -18,20 +18,27 @@ type StdScanner struct {
 }
 
 func NewStdScanner(r io.Reader, isGzip bool) (Reader, error) {
-	var newR *bufio.Scanner
-	s := &StdScanner{}
+	var (
+		newR *bufio.Scanner
+		s    = &StdScanner{}
+	)
+
 	if isGzip {
 		gr, err := gzip.NewReader(r)
 		if err != nil {
 			return nil, errors.Wrap(err, "can't create gzip reader")
 		}
+
 		s.gr = gr
 		newR = bufio.NewScanner(gr)
 	} else {
 		newR = bufio.NewScanner(r)
 	}
+
 	s.r = newR
+
 	logger.Infoln("Created standard scanner")
+
 	return s, nil
 }
 
@@ -40,6 +47,7 @@ func (s *StdScanner) Next() bool {
 	if !next && s.gr != nil {
 		s.gr.Close()
 	}
+
 	return next
 }
 
@@ -57,19 +65,25 @@ type StdSliceScanner struct {
 }
 
 func NewStdSliceScanner(r io.Reader, isGzip bool) (Reader, error) {
-	var newR *bufio.Scanner
-	s := &StdSliceScanner{}
+	var (
+		newR *bufio.Scanner
+		s    = &StdSliceScanner{}
+	)
+
 	if isGzip {
 		gr, err := gzip.NewReader(r)
 		if err != nil {
 			return nil, errors.Wrap(err, "can't create gzip reader")
 		}
+
 		s.gr = gr
 		newR = bufio.NewScanner(gr)
 	} else {
 		newR = bufio.NewScanner(r)
 	}
+
 	s.r = newR
+
 	return s, nil
 }
 
@@ -78,15 +92,18 @@ func (s *StdSliceScanner) Next() bool {
 	if !next && s.gr != nil {
 		s.gr.Close()
 	}
+
 	return next
 }
 
 func (s *StdSliceScanner) Read() (interface{}, error) {
 	line := s.r.Text()
+
 	before, after, found := strings.Cut(line, "##!!##")
 	if !found {
 		return nil, errors.New("can't cut row")
 	}
+
 	return []string{before, after}, nil
 }
 
diff --git a/sftp/sftp.go b/sftp/sftp.go
index e9dd341..3e60c51 100644
--- a/sftp/sftp.go
+++ b/sftp/sftp.go
@@ -17,29 +17,37 @@ type Client struct {
 
 func NewSFTPClient(addr, key, user, passphrase string) (*Client, error) {
 	res := &Client{}
+
 	pemBytes, err := os.ReadFile(filepath.Clean(key))
 	if err != nil {
 		log.Fatal(err)
 	}
+
 	signer, err := ssh.ParsePrivateKeyWithPassphrase(pemBytes, []byte(passphrase))
 	if err != nil {
 		log.Fatalf("parse key failed:%v", err)
 	}
+
 	config := &ssh.ClientConfig{
 		User:            user,
-		HostKeyCallback: ssh.InsecureIgnoreHostKey(), //nolint
+		HostKeyCallback: ssh.InsecureIgnoreHostKey(),
 		Auth:            []ssh.AuthMethod{ssh.PublicKeys(signer)},
 	}
+
 	conn, err := ssh.Dial("tcp", addr, config)
 	if err != nil {
 		return nil, errors.Wrapf(err, "can't dial with address %s", addr)
 	}
+
 	res.Conn = conn
+
 	client, err := sftp.NewClient(conn)
 	if err != nil {
 		return nil, errors.Wrapf(err, "can't create sftp client with address %s", addr)
 	}
+
 	res.Client = client
+
 	return res, nil
 }
 
@@ -48,5 +56,6 @@ func (s *Client) Close() error {
 	if err != nil {
 		return errors.Wrap(err, "can't close client")
 	}
+
 	return s.Conn.Close()
 }
diff --git a/vector/key/int_key.go b/vector/key/int_key.go
index 73f0fd1..6575d40 100644
--- a/vector/key/int_key.go
+++ b/vector/key/int_key.go
@@ -12,15 +12,17 @@ type Int struct {
 }
 
 // AllocateInt create a new integer key.
-func AllocateInt(row interface{}) (Key, error) { //nolint //ireturn
+func AllocateInt(row interface{}) (Key, error) {
 	line, ok := row.(string)
 	if !ok {
 		return nil, errors.Errorf("can't convert interface{} to string: %+v", row)
 	}
+
 	num, err := strconv.Atoi(line)
 	if err != nil {
 		return nil, errors.Wrapf(err, "can't convert line %s to int", line)
 	}
+
 	return &Int{num}, nil
 }
 
@@ -40,15 +42,17 @@ type IntFromSlice struct {
 }
 
 // AllocateIntFromSlice create a new integer key from a position in a slice of integers.
-func AllocateIntFromSlice(row interface{}, intIndex int) (Key, error) { //nolint //ireturn
+func AllocateIntFromSlice(row interface{}, intIndex int) (Key, error) {
 	line, ok := row.([]string)
 	if !ok {
 		return nil, errors.Errorf("can't convert interface{} to []string: %+v", row)
 	}
+
 	num, err := strconv.ParseInt(line[intIndex], 10, 64)
 	if err != nil {
 		return nil, errors.Wrapf(err, "can't parse int %+v", line[intIndex])
 	}
+
 	return &IntFromSlice{num}, nil
 }
 
diff --git a/vector/key/tsv_key.go b/vector/key/tsv_key.go
index 9c31d61..509df39 100644
--- a/vector/key/tsv_key.go
+++ b/vector/key/tsv_key.go
@@ -13,12 +13,16 @@ func AllocateTsv(row interface{}, pos ...int) (Key, error) {
 	if !ok {
 		return nil, errors.Errorf("can't convert interface{} to []string: %+v", row)
 	}
+
 	strBuilder := strings.Builder{}
+
 	for i, p := range pos {
 		if len(splitted) < p+1 {
 			return nil, errors.Errorf("can't allocate tsv key line is invalid: %s", row)
 		}
+
 		strBuilder.WriteString(splitted[p])
+
 		if i < len(pos)-1 {
 			strBuilder.WriteString(salt)
 		}
diff --git a/vector/slice_vector.go b/vector/slice_vector.go
index ba52d75..199b147 100644
--- a/vector/slice_vector.go
+++ b/vector/slice_vector.go
@@ -37,7 +37,9 @@ func (v *SliceVec) PushBack(row interface{}) error {
 	if err != nil {
 		return err
 	}
+
 	v.s = append(v.s, &Element{Row: row, Key: k})
+
 	return nil
 }
 
diff --git a/vector/vector.go b/vector/vector.go
index d6b059d..9961886 100644
--- a/vector/vector.go
+++ b/vector/vector.go
@@ -4,10 +4,11 @@ import (
 	"os"
 	"path/filepath"
 
+	"github.com/pkg/errors"
+
 	"github.com/askiada/external-sort/reader"
 	"github.com/askiada/external-sort/vector/key"
 	"github.com/askiada/external-sort/writer"
-	"github.com/pkg/errors"
 )
 
 // Allocate define a vector and methods to read and write it.
@@ -54,23 +55,28 @@ func (a *Allocate) Dump(vec Vector, filename string) error {
 	if err != nil {
 		return errors.Errorf("failed creating file: %s", err)
 	}
+
 	datawriter, err := a.FnWriter(file)
 	if err != nil {
 		return errors.Errorf("failed creating writer: %s", err)
 	}
-	for i := 0; i < vec.Len(); i++ {
+
+	for i := range vec.Len() {
 		err = datawriter.Write(vec.Get(i).Row)
 		if err != nil {
 			return errors.Errorf("failed writing file: %s", err)
 		}
 	}
+
 	err = datawriter.Close()
 	if err != nil {
 		return errors.Wrap(err, "can't close chunk writer")
 	}
+
 	err = file.Close()
 	if err != nil {
 		return errors.Wrap(err, "can't close chunf file")
 	}
+
 	return nil
 }
diff --git a/writer/contract.go b/writer/contract.go
index 78b4793..c23fddb 100644
--- a/writer/contract.go
+++ b/writer/contract.go
@@ -3,8 +3,8 @@ package writer
 import "io"
 
 type Writer interface {
-	Write(interface{}) error
-	Close() error
+	Write(row interface{}) (err error)
+	Close() (err error)
 }
 
 type Config func(w io.Writer) (Writer, error)
diff --git a/writer/gzip_separated_values.go b/writer/gzip_separated_values.go
index 4e37a33..a86a1f6 100644
--- a/writer/gzip_separated_values.go
+++ b/writer/gzip_separated_values.go
@@ -20,6 +20,7 @@ func NewGZipSeparatedValues(w io.Writer, separator rune) (Writer, error) {
 		w:  csv.NewWriter(gw),
 	}
 	s.w.Comma = separator
+
 	return s, nil
 }
 
@@ -28,18 +29,22 @@ func (s *GZipSeparatedValuesWriter) Write(elem interface{}) error {
 	if !ok {
 		return errors.Errorf("can't converte interface{} to []string: %+v", elem)
 	}
+
 	err := s.w.Write(line)
 	if err != nil {
 		return errors.Wrap(err, "can't write line")
 	}
+
 	return nil
 }
 
 func (s *GZipSeparatedValuesWriter) Close() (err error) {
 	defer func() { err = s.gw.Close() }()
 	s.w.Flush()
+
 	if s.w.Error() != nil {
 		return errors.Wrap(s.w.Error(), "can't close writer")
 	}
+
 	return err
 }
diff --git a/writer/separated_values.go b/writer/separated_values.go
index 4e073ea..0303ce2 100644
--- a/writer/separated_values.go
+++ b/writer/separated_values.go
@@ -16,6 +16,7 @@ func NewSeparatedValues(w io.Writer, separator rune) Writer {
 		w: csv.NewWriter(w),
 	}
 	s.w.Comma = separator
+
 	return s
 }
 
@@ -24,17 +25,21 @@ func (s *SeparatedValuesWriter) Write(elem interface{}) error {
 	if !ok {
 		return errors.Errorf("can't converte interface{} to []string: %+v", elem)
 	}
+
 	err := s.w.Write(line)
 	if err != nil {
 		return errors.Wrap(err, "can't write line")
 	}
+
 	return nil
 }
 
 func (s *SeparatedValuesWriter) Close() error {
 	s.w.Flush()
+
 	if s.w.Error() != nil {
 		return errors.Wrap(s.w.Error(), "can't close writer")
 	}
+
 	return nil
 }
diff --git a/writer/std_writer.go b/writer/std_writer.go
index c8ab78e..2398a03 100644
--- a/writer/std_writer.go
+++ b/writer/std_writer.go
@@ -15,10 +15,11 @@ type StdWriter struct {
 }
 
 // NewStdWriter create a standard writer.
-func NewStdWriter(w io.Writer) Writer { //nolint //ireturn
+func NewStdWriter(w io.Writer) Writer {
 	s := &StdWriter{
 		w: bufio.NewWriter(w),
 	}
+
 	return s
 }
 
@@ -27,10 +28,12 @@ func (w *StdWriter) Write(elem interface{}) error {
 	if !ok {
 		return errors.Errorf("can't converte interface{} to string: %+v", elem)
 	}
+
 	_, err := w.w.WriteString(line + "\n")
 	if err != nil {
 		return errors.Wrap(err, "can't write string")
 	}
+
 	return err
 }
 
@@ -40,6 +43,7 @@ func (w *StdWriter) Close() error {
 	if err != nil {
 		return errors.Wrap(err, "can't close writer")
 	}
+
 	return nil
 }
 
@@ -50,17 +54,22 @@ type StdSliceWriter struct {
 }
 
 func NewStdSliceWriter(w io.Writer, skipFirst, isGzip bool) Writer {
-	var newR *bufio.Writer
-	ssw := &StdSliceWriter{
-		skipFirst: skipFirst,
-	}
+	var (
+		newR *bufio.Writer
+		ssw  = &StdSliceWriter{
+			skipFirst: skipFirst,
+		}
+	)
+
 	if isGzip {
 		ssw.gw = gzip.NewWriter(w)
 		newR = bufio.NewWriter(ssw.gw)
 	} else {
 		newR = bufio.NewWriter(w)
 	}
+
 	ssw.w = newR
+
 	return ssw
 }
 
@@ -69,13 +78,16 @@ func (w *StdSliceWriter) Write(elem interface{}) error {
 	if !ok {
 		return errors.Errorf("can't converte interface{} to string: %+v", elem)
 	}
+
 	if w.skipFirst {
 		line = line[1:]
 	}
+
 	_, err := w.w.WriteString(strings.Join(line, "##!!##") + "\n")
 	if err != nil {
 		return errors.Wrap(err, "can't write string")
 	}
+
 	return err
 }
 
@@ -84,9 +96,11 @@ func (w *StdSliceWriter) Close() (err error) {
 	if w.gw != nil {
 		defer func() { err = w.gw.Close() }()
 	}
+
 	err = w.w.Flush()
 	if err != nil {
 		return errors.Wrap(err, "can't close writer")
 	}
+
 	return err
 }