diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 0000000..03c280b --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,31 @@ +s3: + - bucket/* + - bucket/**/* + +file: + - file/* + - file/**/* + +internal: + - internal/* + - internal/**/* + +reader: + - reader/* + - reader/**/* + +sftp: + - sftp/* + - sftp/**/* + +vector: + - vector/* + - vector/**/* + +writer: + - writer/* + - writer/**/* + +ci: + - .github/* + - .github/**/* diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml new file mode 100644 index 0000000..b8724c4 --- /dev/null +++ b/.github/workflows/integration.yml @@ -0,0 +1,61 @@ +name: Continues Integration + +on: + push: + branches: [master] + pull_request: + branches: + - master + - feature/* + - bugfix/* + - refactor/* + - chore/* + +jobs: + label: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + + steps: + - uses: actions/labeler@v3 + + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout repo + uses: actions/checkout@v3 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: '>=1.20.0' + + - uses: actions/cache@v3 + with: + path: | + ~/.cache/go-build + ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go- + + - name: Running Tests + run: chmod -R +rw ./testdata && make ci_tests + + lint: + runs-on: ubuntu-latest + + steps: + - name: Checkout repo + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: golangci-lint + uses: golangci/golangci-lint-action@v2 + with: + version: v1.52.x + args: --timeout 5m0s diff --git a/.gitignore b/.gitignore index 51c59c4..ac81399 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,11 @@ bench* gen* chunk_*.tsv -bin/ \ No newline at end of file +bin/ +*.csv +*.tsv +*.gz +coverage.out +testdata/chunks/*.csv +testdata/chunks/*.tsv +testdata/chunks/*.gz diff --git a/.golangci.yml b/.golangci.yml index fc414e3..ba3f9e4 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,106 +1,120 @@ linters-settings: - govet: - settings: - printf: - funcs: - - (github.com/golangci/golangci-lint/pkg/logutils.Log).Infof - - (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf - - (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf - - (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf - enable: - - fieldalignment - # golint: - # min-confidence: 0 - gocyclo: - min-complexity: 15 - maligned: - suggest-new: true - goconst: - min-len: 2 - min-occurrences: 2 - # misspell: # disabled as it was breaking interfaces with FinaliZe - # locale: UK - lll: - line-length: 140 - goimports: - local-prefixes: github.com/golangci/golangci-lint - gocritic: - enabled-tags: - - diagnostic - - experimental - - opinionated - - performance - - style - funlen: - lines: 100 - statements: 50 - godot: - capital: true - unparam: - check-exported: true + cyclop: + skip-tests: true + max-complexity: 15 + exhaustive: + default-signifies-exhaustive: true + funlen: + lines: 110 + statements: 70 + gci: + sections: + - standard + - default + - prefix(github.com/askiada) + - localmodule + custom-order: true + goconst: + min-len: 2 + min-occurrences: 2 + gocritic: + enabled-tags: + - diagnostic + - experimental + - opinionated + - performance + - style + gocyclo: + min-complexity: 15 + godot: + capital: true + goimports: + local-prefixes: github.com/askiada/external-sort + govet: + settings: + printf: + funcs: + - (github.com/golangci/golangci-lint/pkg/logutils.Log).Infof + - (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf + - (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf + - (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf + disable: + - fieldalignment + lll: + line-length: 140 + misspell: + locale: UK + paralleltest: + ignore-missing-subtests: true # Unfortunately, we can't write t.Run("success", testSuccess) + unparam: + check-exported: true + varnamelen: + min-name-length: 2 + max-distance: 15 -issues: - # Excluding configuration per-path, per-linter, per-text and per-source - exclude-rules: - - path: _test\.go - linters: - - gosec # security check is not impoerant in tests - - dupl # we usualy duplicate code in tests - - bodyclose - - unparam - fix: true + errcheck: + exclude-functions: + - (*io.PipeWriter).Close -run: - skip-dirs: - - model - - tmp - - bin - - scripts + wrapcheck: + ignoreSigs: + - .Errorf( + - errors.New( + - errors.Unwrap( + - errors.Join( + - .Wrap( + - .Wrapf( + - .WithMessage( + - .WithMessagef( + - .WithStack( + - status.Error( + - Group).Wait() - tests: true - build-tags: - - integration + nlreturn: + block-size: 2 -linters: - disable-all: true - fast: true - enable: - - asciicheck +issues: + # Excluding configuration per-path, per-linter, per-text and per-source + exclude-rules: + - path: _test\.go + linters: - bodyclose - - deadcode - - dogsled - - depguard - - dupl - - errorlint - - gocognit - - goconst + - contextcheck + - dupl # we usually duplicate code in tests + - errcheck + - exhaustive + - funlen - gocritic - - gocyclo - - godot - - godox - - golint - - goprintffuncname - - gosec - - gosimple + - gosec # security check is not important in tests - govet - # - misspell # disabled as it was breaking interfaces with FinaliZe - - nakedret - - nestif - - prealloc - - rowserrcheck - - scopelint - - staticcheck - - stylecheck - - unconvert - # - unparam # Too many false positives on Task interface implementation. - - unused - - whitespace - # - wrapcheck - - tparallel + - maintidx + - nlreturn + - revive + - unparam + - varnamelen + - wrapcheck + - wsl + - path: testing + linters: + - errcheck + fix: true + exclude-use-default: false + exclude-dirs: + - model + - tmp + - bin + - scripts -# golangci.com configuration -# https://github.com/golangci/golangci/wiki/Configuration -service: - golangci-lint-version: 1.38.x - prepare: - - echo "here I can run custom commands, but no preparation needed for this repo" +run: + allow-parallel-runners: true + tests: true + build-tags: + - integration + +linters: + enable-all: true + disable: + - depguard # because I don't want to write a dedicated config file. + - execinquery #Marked as deprecated by golangci-lint. + - gomnd # Marked as deprecated by golangci-lint. Replaced with mnd + - nonamedreturns # Conflicts with unnamedResult linter. diff --git a/Makefile b/Makefile index acd6107..5473a9f 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,10 @@ docker_image=askiada/external-sort include ./env.list export $(shell sed 's/=.*//' ./env.list) +.PHONY: lint +lint: + gofumpt -w -l . + golangci-lint run ./... .PHONY: test test: @@ -17,9 +21,13 @@ test: test_race: go test -race ./... -.PHONY: run -run: build - ./bin/external-sort +.PHONY: run_sort +run_sort: build + ./bin/external-sort sort + +.PHONY: run_shuffle +run_shuffle: build + ./bin/external-sort shuffle .PHONY: build build: @@ -29,4 +37,8 @@ build: build_docker: ## Build a docker image from current git sha @docker build \ --build-arg BUILDKIT_INLINE_CACHE=1 \ - -t $(docker_image):$(tag) . \ No newline at end of file + -t $(docker_image):$(tag) . + +.PHONY: ci_tests +ci_tests: ## Run tests for CI environment. + go test -trimpath --timeout=10m -failfast -v -race -covermode=atomic -coverprofile=coverage.out ./... diff --git a/bucket/contract.go b/bucket/contract.go new file mode 100644 index 0000000..9b63a55 --- /dev/null +++ b/bucket/contract.go @@ -0,0 +1,64 @@ +package bucket + +import ( + "github.com/aws/aws-sdk-go-v2/feature/s3/manager" + "github.com/aws/aws-sdk-go-v2/service/s3" + + "github.com/askiada/external-sort/internal/progress" +) + +// S3ClientAPI S3 client contract for this repo. +type S3ClientAPI interface { + manager.UploadAPIClient + manager.DownloadAPIClient + s3.HeadObjectAPIClient +} + +// ConfigFunc is a function that can be passed to the New function to configure +// the S3 object. +type ConfigFunc func(s *S3) + +// Region sets the region of the S3 bucket. +func Region(region string) ConfigFunc { + return func(s *S3) { + s.region = region + } +} + +// PartBodyMaxRetries sets the number of retries when performing upload multi part. +func PartBodyMaxRetries(r int) ConfigFunc { + return func(s *S3) { + s.partBodyMaxRetries = r + } +} + +const mbConversion = 1024 * 1024 + +// Buffer is the amount of memory in MB to use for buffering the data. +func Buffer(buffer int) ConfigFunc { + return func(s *S3) { + s.bufferLen = buffer * mbConversion + } +} + +// Client sets the S3 client to use. If you provide this option, we will not be +// able to set the region. +func Client(client S3ClientAPI) ConfigFunc { + return func(s *S3) { + s.s3Client = client + } +} + +// MaxRetries sets the maximum number of retried per request before returning an error. +func MaxRetries(maxRetries int) ConfigFunc { + return func(s *S3) { + s.maxRetries = maxRetries + } +} + +// Progress sets a progress bar to be used when performing bucket actions. +func Progress(p progress.Progress) ConfigFunc { + return func(s *S3) { + s.progress = p + } +} diff --git a/bucket/errors.go b/bucket/errors.go new file mode 100644 index 0000000..7347995 --- /dev/null +++ b/bucket/errors.go @@ -0,0 +1,6 @@ +package bucket + +import "errors" + +// ErrInvalidInput is returned when the input is invalid. +var ErrInvalidInput = errors.New("invalid input") diff --git a/bucket/s3.go b/bucket/s3.go new file mode 100644 index 0000000..50f4c93 --- /dev/null +++ b/bucket/s3.go @@ -0,0 +1,127 @@ +// Package bucket implements the io.ReadWriter for communication with the S3 +// API. +package bucket + +import ( + "context" + "io" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/feature/s3/manager" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/pkg/errors" + + "github.com/askiada/external-sort/internal/progress" +) + +// S3 can read and write from/to S3 buckets using io.Reader and io.Writer +// inputs. +type S3 struct { + s3Client S3ClientAPI + progress progress.Progress + region string + maxRetries int + bufferLen int + partBodyMaxRetries int +} + +const ( + defaultBufferLen = 1024 + defaultMaxRetries = 10 + defaultPartBodyMaxRetries = 3 +) + +// New returns an instance of the S3 struct. +func New(ctx context.Context, cfg ...ConfigFunc) (*S3, error) { + s3Val := &S3{ + region: "eu-west-1", + bufferLen: defaultBufferLen, + maxRetries: defaultMaxRetries, + partBodyMaxRetries: defaultPartBodyMaxRetries, + } + for _, c := range cfg { + c(s3Val) + } + + if s3Val.region == "" { + return nil, errors.Wrap(ErrInvalidInput, "region") + } + + if s3Val.bufferLen <= 0 { + return nil, errors.Wrap(ErrInvalidInput, "buffer length") + } + + if s3Val.s3Client == nil { + cfg, err := config.LoadDefaultConfig(ctx, + config.WithRegion(s3Val.region), + config.WithRetryMaxAttempts(s3Val.maxRetries), + ) + if err != nil { + return nil, errors.New("can't create aws config") + } + + s3Val.s3Client = s3.NewFromConfig(cfg) + } + + return s3Val, nil +} + +// Upload reads from the reader and uploads it to the S3 bucket with the +// filename key. +func (s *S3) Upload(ctx context.Context, reader io.Reader, bucket, key string) error { + uploader := manager.NewUploader(s.s3Client, func(u *manager.Uploader) { + u.BufferProvider = manager.NewBufferedReadSeekerWriteToPool(s.bufferLen) + }) + _, err := uploader.Upload(ctx, &s3.PutObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + Body: reader, + }) + + return errors.Wrap(err, "upload failed") +} + +type seqWriterAt struct { + w io.Writer + progressFunc func(n int) +} + +func (s *seqWriterAt) WriteAt(p []byte, _ int64) (n int, err error) { + n, err = s.w.Write(p) + if s.progressFunc != nil { + s.progressFunc(n) + } + + return n, errors.Wrap(err, "can't write bytes at offset") +} + +// S3FileInfo define the path to a file on S3. +type S3FileInfo struct { + Bucket string + Key string +} + +// Download downloads the file from the S3 bucket with the filename key and +// writes it to the writer. +func (s *S3) Download(ctx context.Context, writer io.Writer, filesinfo ...*S3FileInfo) error { + downloader := manager.NewDownloader(s.s3Client, func(d *manager.Downloader) { + d.PartBodyMaxRetries = s.partBodyMaxRetries + d.PartSize = int64(s.bufferLen) + // we need to force this to be a sequential download. + d.Concurrency = 1 + }) + + ww := &seqWriterAt{writer, nil} + for _, fileinfo := range filesinfo { + _, err := downloader.Download(ctx, ww, &s3.GetObjectInput{ + Bucket: aws.String(fileinfo.Bucket), + Key: aws.String(fileinfo.Key), + }) + if err != nil { + return errors.Wrapf(err, "download failed for bucket %s and key %s", fileinfo.Bucket, fileinfo.Key) + } + } + + return nil +} diff --git a/env.list b/env.list index 169be0f..8045cc0 100644 --- a/env.list +++ b/env.list @@ -1,6 +1,13 @@ -INPUT_PATH=./works.tsv -OUTPUT_PATH=./output.tsv +INPUT_PATHS=./output.tsv.gz +OUTPUT_PATH=./output_shuffled.tsv.gz CHUNK_FOLDER=./data/chunks/ CHUNK_SIZE=1000000 -MAX_WORKERS=10 -OUTPUT_BUFFER_SIZE=1000 \ No newline at end of file +MAX_WORKERS=40 +OUTPUT_BUFFER_SIZE=1000000 +IS_GZIP=true + +S3_REGION=eu-west-1 +S3_BUCKET=blokur-data +S3_RETRY_MAX_ATTEMPTS=10 + +WITH_HEADER=true \ No newline at end of file diff --git a/env_sort.list b/env_sort.list new file mode 100644 index 0000000..55d2dc6 --- /dev/null +++ b/env_sort.list @@ -0,0 +1,13 @@ +INPUT_PATHS=s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.0.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.1.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.2.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.3.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.4.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.5.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.6.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.7.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.8.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.9.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.10.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.11.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.12.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.13.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.14.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.15.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.16.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.17.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.18.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.19.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.20.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.21.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.22.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.23.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.24.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.25.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.26.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.27.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.28.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.29.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.30.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.31.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.32.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.33.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.34.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.35.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.36.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.37.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.38.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.39.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.40.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.41.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.42.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.43.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.44.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.45.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.46.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.47.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.48.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.49.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.50.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.51.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.52.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.53.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.54.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.55.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.56.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.57.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.58.tsv.gz s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.59.tsv.gz +OUTPUT_PATH=./output.tsv.gz +CHUNK_FOLDER=./data/chunks/ +CHUNK_SIZE=1000000 +MAX_WORKERS=30 +OUTPUT_BUFFER_SIZE=100000 +TSV_FIELDS=1 + +S3_REGION=eu-west-1 +S3_BUCKET=blokur-data +S3_RETRY_MAX_ATTEMPTS=10 + +WITH_HEADER=true \ No newline at end of file diff --git a/file/batchingchannels/batching_channel.go b/file/batchingchannels/batching_channel.go index 6826685..428d416 100644 --- a/file/batchingchannels/batching_channel.go +++ b/file/batchingchannels/batching_channel.go @@ -3,49 +3,55 @@ package batchingchannels import ( "context" - "github.com/askiada/external-sort/vector" + "github.com/pkg/errors" "golang.org/x/sync/errgroup" - "golang.org/x/sync/semaphore" + + "github.com/askiada/external-sort/vector" ) -// BatchingChannel implements the Channel interface, with the change that instead of producing individual elements -// on Out(), it batches together the entire internal buffer each time. Trying to construct an unbuffered batching channel +// BatchingChannel define a standard channel, with the change that instead of producing individual elements +// on Out(), it batches together n elements each time. Trying to construct an unbuffered batching channel // will panic, that configuration is not supported (and provides no benefit over an unbuffered NativeChannel). type BatchingChannel struct { - input chan string - output chan vector.Vector - buffer vector.Vector - allocate *vector.Allocate - g *errgroup.Group - sem *semaphore.Weighted - dCtx context.Context - size int - maxWorker int64 + input chan interface{} + output chan vector.Vector + buffer vector.Vector + allocate *vector.Allocate + G *errgroup.Group + internalContext context.Context + size int + maxWorker int } -func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorker int64, size int) *BatchingChannel { +// NewBatchingChannel create a batching channel. +func NewBatchingChannel(ctx context.Context, allocate *vector.Allocate, maxWorker, size int) (*BatchingChannel, error) { if size == 0 { - panic("channels: BatchingChannel does not support unbuffered behaviour") + return nil, errors.New("does not support unbuffered behaviour") } + if size < 0 { - panic("channels: invalid negative size in NewBatchingChannel") + return nil, errors.New("does not support negative size") } - g, dCtx := errgroup.WithContext(ctx) - ch := &BatchingChannel{ - input: make(chan string), - output: make(chan vector.Vector), - size: size, - allocate: allocate, - maxWorker: maxWorker, - g: g, - sem: semaphore.NewWeighted(maxWorker), - dCtx: dCtx, + + errGrp, errGrpContext := errgroup.WithContext(ctx) + errGrp.SetLimit(maxWorker) + bChan := &BatchingChannel{ + input: make(chan interface{}), + output: make(chan vector.Vector), + size: size, + allocate: allocate, + maxWorker: maxWorker, + G: errGrp, + internalContext: errGrpContext, } - go ch.batchingBuffer() - return ch + + go bChan.batchingBuffer() + + return bChan, nil } -func (ch *BatchingChannel) In() chan<- string { +// In add element to input channel. +func (ch *BatchingChannel) In() chan<- interface{} { return ch.input } @@ -56,53 +62,59 @@ func (ch *BatchingChannel) Out() <-chan vector.Vector { return ch.output } +// ProcessOut process specified function on each batch. func (ch *BatchingChannel) ProcessOut(f func(vector.Vector) error) error { for val := range ch.Out() { - if err := ch.sem.Acquire(ch.dCtx, 1); err != nil { - return err - } - val := val - ch.g.Go(func() error { - defer ch.sem.Release(1) + ch.G.Go(func() error { return f(val) }) } - err := ch.g.Wait() + + err := ch.G.Wait() if err != nil { - return err + return errors.Wrap(err, "one of the task failed") } + return nil } +// Len return the maximum number of elements in a batch. func (ch *BatchingChannel) Len() int { return ch.size } +// Cap return the maximum capacity of a batch. func (ch *BatchingChannel) Cap() int { return ch.size } +// Close close the input channel. func (ch *BatchingChannel) Close() { close(ch.input) } +// batchingBuffer add input element to the next batch available. +// When the batch reach maximum size or the input channel is closed, it is passed to the output channel. func (ch *BatchingChannel) batchingBuffer() { ch.buffer = ch.allocate.Vector(ch.size, ch.allocate.Key) + for { - elem, open := <-ch.input + row, open := <-ch.input if open { - err := ch.buffer.PushBack(elem) + err := ch.buffer.PushBack(row) if err != nil { - ch.g.Go(func() error { - return err + ch.G.Go(func() error { + return errors.Wrap(err, "can't push back row") }) } } else { if ch.buffer.Len() > 0 { ch.output <- ch.buffer } + break } + if ch.buffer.Len() == ch.size { ch.output <- ch.buffer ch.buffer = ch.allocate.Vector(ch.size, ch.allocate.Key) diff --git a/file/batchingchannels/batching_channel_test.go b/file/batchingchannels/batching_channel_test.go index d9a0cce..56a67c8 100644 --- a/file/batchingchannels/batching_channel_test.go +++ b/file/batchingchannels/batching_channel_test.go @@ -7,51 +7,65 @@ import ( "testing" "time" + "github.com/pkg/errors" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/askiada/external-sort/file/batchingchannels" "github.com/askiada/external-sort/vector" "github.com/askiada/external-sort/vector/key" - "github.com/stretchr/testify/assert" ) -type Int struct { +type intKey struct { value int } -func AllocateInt(line string) (key.Key, error) { +func allocateInt(row interface{}) (key.Key, error) { + line, ok := row.(string) + if !ok { + return nil, errors.Errorf("can't convert interface{} to string: %+v", row) + } num, err := strconv.Atoi(line) if err != nil { return nil, err } - return &Int{num}, nil + + return &intKey{num}, nil } -func (k *Int) Get() int { +func (k *intKey) Get() int { return k.value } -func (k *Int) Less(other key.Key) bool { - return k.value < other.(*Int).value +func (k *intKey) Less(other key.Key) bool { + return k.value < other.(*intKey).value } -func testBatches(t *testing.T, ch *batchingchannels.BatchingChannel) { + +func (k *intKey) Equal(other key.Key) bool { + return k.value == other.(*intKey).value +} + +func testBatches(t *testing.T, bChan *batchingchannels.BatchingChannel) { + t.Helper() maxI := 10000 expectedSum := (maxI - 1) * maxI / 2 - wg := &sync.WaitGroup{} - wgInput := &sync.WaitGroup{} + wgrp := &sync.WaitGroup{} + wgrpInput := &sync.WaitGroup{} maxIn := 100 - wgInput.Add(maxIn) - for j := 0; j < maxIn; j++ { + wgrpInput.Add(maxIn) + for idx := range maxIn { go func(j int) { - defer wgInput.Done() + defer wgrpInput.Done() for i := maxI / maxIn * j; i < maxI*(j+1)/maxIn; i++ { - ch.In() <- strconv.Itoa(i) + bChan.In() <- strconv.Itoa(i) } - }(j) + }(idx) } go func() { - wgInput.Wait() - ch.Close() + wgrpInput.Wait() + bChan.Close() }() got := make(chan *vector.Element, maxI) @@ -61,60 +75,65 @@ func testBatches(t *testing.T, ch *batchingchannels.BatchingChannel) { go func() { defer wgSum.Done() for g := range got { - gotSum += g.Key.(*Int).Get() + gotSum += g.Key.(*intKey).Get() } }() - wg.Add(1) + wgrp.Add(1) go func() { - defer wg.Done() - err := ch.ProcessOut(func(val vector.Vector) error { - for i := 0; i < val.Len(); i++ { + defer wgrp.Done() + err := bChan.ProcessOut(func(val vector.Vector) error { + for i := range val.Len() { val := val.Get(i) got <- val } time.Sleep(3 * time.Millisecond) + return nil }) if err != nil { panic(err) } }() - wg.Wait() + wgrp.Wait() close(got) wgSum.Wait() assert.Equal(t, expectedSum, gotSum) } func TestBatchingChannel(t *testing.T) { - allocate := vector.DefaultVector(AllocateInt) - ch := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 50) - testBatches(t, ch) - - ch = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 3) - testBatches(t, ch) - - ch = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 1) - testChannelConcurrentAccessors(t, "batching channel", ch) + allocate := vector.DefaultVector(allocateInt, nil, nil) + bChan, err := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 50) + require.NoError(t, err) + testBatches(t, bChan) + + bChan, err = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 3) + require.NoError(t, err) + testBatches(t, bChan) + + bChan, err = batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 1) + require.NoError(t, err) + testChannelConcurrentAccessors(t, bChan) } func TestBatchingChannelCap(t *testing.T) { - allocate := vector.DefaultVector(AllocateInt) - ch := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 5) - if ch.Cap() != 5 { + allocate := vector.DefaultVector(allocateInt, nil, nil) + bChan, err := batchingchannels.NewBatchingChannel(context.Background(), allocate, 2, 5) + require.NoError(t, err) + if bChan.Cap() != 5 { t.Error("incorrect capacity on infinite channel") } } -func testChannelConcurrentAccessors(t *testing.T, name string, ch *batchingchannels.BatchingChannel) { +func testChannelConcurrentAccessors(_ *testing.T, bChan *batchingchannels.BatchingChannel) { // no asserts here, this is just for the race detector's benefit - go ch.Len() - go ch.Cap() + go bChan.Len() + go bChan.Cap() go func() { - ch.In() <- "" + bChan.In() <- "" }() go func() { - <-ch.Out() + <-bChan.Out() }() } diff --git a/file/batchingchannels/doc.go b/file/batchingchannels/doc.go new file mode 100644 index 0000000..9874cca --- /dev/null +++ b/file/batchingchannels/doc.go @@ -0,0 +1,2 @@ +// Package batchingchannels define a standard channel processing the output per batch. +package batchingchannels diff --git a/file/chunk.go b/file/chunk.go index 77966c0..38266e0 100644 --- a/file/chunk.go +++ b/file/chunk.go @@ -1,19 +1,20 @@ package file import ( - "bufio" "os" + "path/filepath" "sort" - "github.com/askiada/external-sort/vector" - "github.com/pkg/errors" + + "github.com/askiada/external-sort/reader" + "github.com/askiada/external-sort/vector" ) -// chunkInfo Describe a chunk. +// chunkInfo define a chunk. type chunkInfo struct { file *os.File - scanner *bufio.Scanner + reader reader.Reader buffer vector.Vector filename string } @@ -21,15 +22,25 @@ type chunkInfo struct { // pullSubset Add to vector the specified number of elements. // It stops if there is no elements left to add. func (c *chunkInfo) pullSubset(size int) (err error) { - i := 0 - for i < size && c.scanner.Scan() { - text := c.scanner.Text() - c.buffer.PushBack(text) - i++ + elemIdx := 0 + for elemIdx < size && c.reader.Next() { + row, err := c.reader.Read() + if err != nil { + return errors.Wrap(err, "can't read chunk") + } + + err = c.buffer.PushBack(row) + if err != nil { + return errors.Wrap(err, "can't push back row") + } + + elemIdx++ } - if c.scanner.Err() != nil { - return c.scanner.Err() + + if c.reader.Err() != nil { + return errors.Wrap(c.reader.Err(), "chunk reader encountered an error") } + return nil } @@ -38,24 +49,36 @@ type chunks struct { list []*chunkInfo } -// new Create a new chunk and initialize it. -func (c *chunks) new(chunkPath string, allocate *vector.Allocate, size int) error { - f, err := os.Open(chunkPath) +// new Create a new chunk and initialise it. +func (c *chunks) new(chunkPath string, allocate *vector.Allocate, size int, withHeader bool) error { + chunkFile, err := os.Open(filepath.Clean(chunkPath)) + if err != nil { + return errors.Wrap(err, "can't open chunk file") + } + + rder, err := allocate.FnReader(chunkFile) if err != nil { - return err + return errors.Wrap(err, "can't read chunk file") + } + + if withHeader { + rder.Next() } - scanner := bufio.NewScanner(f) + elem := &chunkInfo{ filename: chunkPath, - file: f, - scanner: scanner, + file: chunkFile, + reader: rder, buffer: allocate.Vector(size, allocate.Key), } + err = elem.pullSubset(size) if err != nil { - return err + return errors.Wrap(err, "can't pull chunk subset") } + c.list = append(c.list, elem) + return nil } @@ -64,9 +87,10 @@ func (c *chunks) close() error { for _, chunk := range c.list { err := chunk.file.Close() if err != nil { - return errors.Wrap(err, "close") + return errors.Wrapf(err, "can't close chunk file %s", chunk.filename) } } + return nil } @@ -75,17 +99,20 @@ func (c *chunks) close() error { func (c *chunks) shrink(toShrink []int) error { for i, shrinkIndex := range toShrink { shrinkIndex -= i + err := c.list[shrinkIndex].file.Close() if err != nil { - return err + return errors.Wrapf(err, "can't close chunk file %s", c.list[shrinkIndex].filename) } + err = os.Remove(c.list[shrinkIndex].filename) if err != nil { - return err + return errors.Wrapf(err, "can't remove chunk file %s", c.list[shrinkIndex].filename) } // we want to preserve order c.list = append(c.list[:shrinkIndex], c.list[shrinkIndex+1:]...) } + return nil } @@ -111,7 +138,7 @@ func (c *chunks) moveFirstChunkToCorrectIndex() { pos := sort.Search(len(c.list), func(i int) bool { return !vector.Less(c.list[i].buffer.Get(0), elem.buffer.Get(0)) }) - // TODO: c.list = c.list[1:] and the following line create an unecessary allocation. + // TODO: c.list = c.list[1:] and the following line create an unnecessary allocation. c.list = append(c.list[:pos], append([]*chunkInfo{elem}, c.list[pos:]...)...) } @@ -120,5 +147,6 @@ func (c *chunks) min() (minChunk *chunkInfo, minValue *vector.Element, minIdx in minValue = c.list[0].buffer.Get(0) minIdx = 0 minChunk = c.list[0] + return minChunk, minValue, minIdx } diff --git a/file/file.go b/file/file.go index 5bc397f..b42bcc7 100644 --- a/file/file.go +++ b/file/file.go @@ -1,88 +1,166 @@ package file import ( - "bufio" "context" - "sync" - "io" "path" "strconv" + "sync" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" "github.com/askiada/external-sort/file/batchingchannels" + "github.com/askiada/external-sort/reader" "github.com/askiada/external-sort/vector" - - "github.com/pkg/errors" + "github.com/askiada/external-sort/writer" ) +var logger = logrus.StandardLogger() + +// Info set all parameters to process a file with chunks. type Info struct { - mu *MemUsage - Reader io.Reader - Allocate *vector.Allocate - OutputPath string + mu *memUsage + Allocate *vector.Allocate + InputReader io.Reader + OutputFile io.Writer + outputWriter writer.Writer + + headers interface{} + chunkPaths []string + localMutex sync.Mutex totalRows int + chunkIndex int PrintMemUsage bool + WithHeader bool } -// CreateSortedChunks Scan a file and divide it into small sorted chunks. -// Store all the chunks in a folder an returns all the paths. -func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpSize int, maxWorkers int64) ([]string, error) { - fn := "scan and sort and dump" +func (f *Info) check(dumpSize int) error { + f.chunkIndex = 0 + f.chunkPaths = []string{} + if dumpSize <= 0 { - return nil, errors.Wrap(errors.New("dump size must be greater than 0"), fn) + return errors.New("dump size must be greater than 0") } - if f.PrintMemUsage && f.mu == nil { - f.mu = &MemUsage{} + return nil +} + +func (f *Info) processInputReader(batchChan *batchingchannels.BatchingChannel, inputReader reader.Reader) error { + for inputReader.Next() { + if f.PrintMemUsage { + f.mu.Collect() + } + + row, err := inputReader.Read() + if err != nil { + return errors.Wrap(err, "can't read from input reader") + } + + if f.WithHeader && f.headers == nil { + f.headers = row + } else { + batchChan.In() <- row + } + + f.totalRows++ } + batchChan.Close() - err := clearChunkFolder(chunkFolder) - if err != nil { - return nil, errors.Wrap(err, fn) + if inputReader.Err() != nil { + return errors.Wrap(inputReader.Err(), "input reader encountered an error") } - row := 0 - chunkPaths := []string{} - scanner := bufio.NewScanner(f.Reader) - mu := sync.Mutex{} - wg := &sync.WaitGroup{} - wg.Add(1) - batchChan := batchingchannels.NewBatchingChannel(ctx, f.Allocate, maxWorkers, dumpSize) - go func() { - defer wg.Done() - for scanner.Scan() { - if f.PrintMemUsage { - f.mu.Collect() - } - text := scanner.Text() - batchChan.In() <- text - row++ - } - batchChan.Close() - }() - - chunkIdx := 0 - err = batchChan.ProcessOut(func(v vector.Vector) error { - mu.Lock() - chunkIdx++ - chunkPath := path.Join(chunkFolder, "chunk_"+strconv.Itoa(chunkIdx)+".tsv") - mu.Unlock() - v.Sort() - err := vector.Dump(v, chunkPath) + + return nil +} + +func (f *Info) processBatch(vec vector.Vector, chunkFolder string) error { + f.localMutex.Lock() + f.chunkIndex++ + chunkPath := path.Join(chunkFolder, "chunk_"+strconv.Itoa(f.chunkIndex)+".tsv") + logger.Infoln("Created chunk", chunkPath) + f.localMutex.Unlock() + + vec.Sort() + + if f.WithHeader { + f.localMutex.Lock() + + err := vec.PushFrontNoKey(f.headers) if err != nil { + f.localMutex.Unlock() return err } - mu.Lock() - chunkPaths = append(chunkPaths, chunkPath) - mu.Unlock() + + f.localMutex.Unlock() + } + + err := f.Allocate.Dump(vec, chunkPath) + if err != nil { + return errors.Wrapf(err, "can't dump chunk %s", chunkPath) + } + + f.localMutex.Lock() + f.chunkPaths = append(f.chunkPaths, chunkPath) + f.localMutex.Unlock() + + return nil +} + +func (f *Info) runBatchingChannel( + ctx context.Context, + inputReader reader.Reader, + chunkFolder string, + dumpSize, + maxWorkers int, +) ([]string, error) { + batchChan, err := batchingchannels.NewBatchingChannel(ctx, f.Allocate, maxWorkers, dumpSize) + if err != nil { + return nil, errors.Wrap(err, "can't create new batching channel") + } + + batchChan.G.Go(func() error { return f.processInputReader(batchChan, inputReader) }) + + err = batchChan.ProcessOut(func(vec vector.Vector) error { + err := f.processBatch(vec, chunkFolder) + if err != nil { + return errors.Wrap(err, "can't process batch") + } + return nil }) if err != nil { - return nil, errors.Wrap(err, fn) + return nil, errors.Wrap(err, "can't process batching channel") } - wg.Wait() - if scanner.Err() != nil { - return nil, errors.Wrap(scanner.Err(), fn) + + return f.chunkPaths, nil +} + +// CreateSortedChunks Scan a file and divide it into small sorted chunks. +// Store all the chunks in a folder an returns all the paths. +func (f *Info) CreateSortedChunks(ctx context.Context, chunkFolder string, dumpSize, maxWorkers int) ([]string, error) { + if err := f.check(dumpSize); err != nil { + return nil, errors.New("can't pass checks") + } + + if f.PrintMemUsage && f.mu == nil { + f.mu = &memUsage{} + } + + err := clearChunkFolder(chunkFolder) + if err != nil { + return nil, errors.Wrap(err, "can't clear chunk folder") + } + + inputReader, err := f.Allocate.FnReader(f.InputReader) + if err != nil { + return nil, errors.Wrap(err, "can't get input reader") } - f.totalRows = row + + chunkPaths, err := f.runBatchingChannel(ctx, inputReader, chunkFolder, dumpSize, maxWorkers) + if err != nil { + return nil, errors.Wrap(err, "can't run batching channel") + } + return chunkPaths, nil } diff --git a/file/shuffle.go b/file/shuffle.go new file mode 100644 index 0000000..ff7462a --- /dev/null +++ b/file/shuffle.go @@ -0,0 +1,133 @@ +// TODO: rework + lint +//nolint +package file + +import ( + "context" + "io" + "math/rand" + "path" + "strconv" + "sync" + + "github.com/askiada/external-sort/file/batchingchannels" + "github.com/askiada/external-sort/reader" + "github.com/askiada/external-sort/vector" + "github.com/askiada/external-sort/vector/key" + "github.com/askiada/external-sort/writer" + "github.com/pkg/errors" +) + +// CreateSortedChunks Scan a file and divide it into small sorted chunks. +// Store all the chunks in a folder an returns all the paths. +func (f *Info) Shuffle(ctx context.Context, chunkFolder string, dumpSize, maxWorkers, k int, seed int64, isGzip bool) ([]string, error) { + fn := "scan and shuffle and dump" + if dumpSize <= 0 { + return nil, errors.Wrap(errors.New("dump size must be greater than 0"), fn) + } + + if f.PrintMemUsage && f.mu == nil { + f.mu = &memUsage{} + } + if f.Allocate != nil { + return nil, errors.New("allocate should not be defined when shuffling") + } + f.Allocate = vector.DefaultVector( + func(row interface{}) (key.Key, error) { + return key.AllocateIntFromSlice(row, 0) + }, + func(r io.Reader) (reader.Reader, error) { + return reader.NewStdScanner(r, isGzip) + }, + func(w io.Writer) (writer.Writer, error) { + return writer.NewStdSliceWriter(w, false, isGzip), nil + }, + ) + + err := clearChunkFolder(chunkFolder) + if err != nil { + return nil, errors.Wrap(err, fn) + } + + inputReader, err := f.Allocate.FnReader(f.InputReader) + if err != nil { + return nil, errors.Wrap(err, fn) + } + countRows := 0 + chunkPaths := []string{} + + mu := sync.Mutex{} + r := rand.New(rand.NewSource(seed)) + batchChan, err := batchingchannels.NewBatchingChannel(ctx, f.Allocate, maxWorkers, dumpSize) + if err != nil { + return nil, errors.Wrap(err, "can't create new batching channel") + } + batchChan.G.Go(func() error { + for inputReader.Next() { + if f.PrintMemUsage { + f.mu.Collect() + } + row, err := inputReader.Read() + if err != nil { + return errors.Wrap(err, fn) + } + if f.WithHeader && f.headers == nil { + f.headers = []string{"##!!##", row.(string)} + } else { + newRow := []string{strconv.FormatInt(r.Int63(), 10), row.(string)} + batchChan.In() <- newRow + } + countRows++ + } + batchChan.Close() + if inputReader.Err() != nil { + return errors.Wrap(inputReader.Err(), fn) + } + return nil + }) + + chunkIdx := 0 + err = batchChan.ProcessOut(func(v vector.Vector) error { + mu.Lock() + chunkIdx++ + chunkPath := path.Join(chunkFolder, "chunk_"+strconv.Itoa(chunkIdx)+".tsv") + logger.Infoln("Created chunk", chunkPath) + mu.Unlock() + v.Sort() + if f.WithHeader { + err = v.PushFrontNoKey(f.headers) + if err != nil { + return err + } + } + err := f.Allocate.Dump(v, chunkPath) + if err != nil { + return err + } + mu.Lock() + chunkPaths = append(chunkPaths, chunkPath) + mu.Unlock() + return nil + }) + if err != nil { + return nil, errors.Wrap(err, fn) + } + f.totalRows = countRows + + f.Allocate = vector.DefaultVector( + func(row interface{}) (key.Key, error) { + return key.AllocateIntFromSlice(row, 0) + }, + func(r io.Reader) (reader.Reader, error) { + return reader.NewStdSliceScanner(r, isGzip) + }, + func(w io.Writer) (writer.Writer, error) { + return writer.NewStdSliceWriter(w, true, isGzip), nil + }, + ) + err = f.MergeSort(chunkPaths, k, false) + if err != nil { + return nil, errors.Wrap(err, fn) + } + return chunkPaths, nil +} diff --git a/file/sort.go b/file/sort.go index fd90f33..1c384ff 100644 --- a/file/sort.go +++ b/file/sort.go @@ -1,131 +1,237 @@ package file import ( - "bufio" "fmt" - "os" "runtime" + "strings" - "github.com/askiada/external-sort/vector" "github.com/cheggaaa/pb/v3" + "github.com/pkg/errors" + + "github.com/askiada/external-sort/vector" + "github.com/askiada/external-sort/writer" ) -type MemUsage struct { +type memUsage struct { MaxAlloc uint64 MaxSys uint64 NumGc uint32 } -func (mu *MemUsage) Collect() { - var m runtime.MemStats - runtime.ReadMemStats(&m) - if m.Alloc > mu.MaxAlloc { - mu.MaxAlloc = m.Alloc +func (mu *memUsage) Collect() { + var mStats runtime.MemStats + + runtime.ReadMemStats(&mStats) + + if mStats.Alloc > mu.MaxAlloc { + mu.MaxAlloc = mStats.Alloc } - if m.Sys > mu.MaxSys { - mu.MaxSys = m.Sys + + if mStats.Sys > mu.MaxSys { + mu.MaxSys = mStats.Sys } - mu.NumGc = m.NumGC + mu.NumGc = mStats.NumGC } -func (mu *MemUsage) PrintMemUsage() { - fmt.Printf("Max Alloc = %v MiB", bToMb(mu.MaxAlloc)) - fmt.Printf("\tMax Sys = %v MiB", bToMb(mu.MaxSys)) - fmt.Printf("\tNumGC = %v\n", mu.NumGc) +func (mu *memUsage) String() string { + builder := strings.Builder{} + builder.WriteString(fmt.Sprintf("Max Alloc = %v MiB", bToMb(mu.MaxAlloc))) + builder.WriteString(fmt.Sprintf(" Max Sys = %v MiB", bToMb(mu.MaxSys))) + builder.WriteString(fmt.Sprintf(" NumGC = %v\n", mu.NumGc)) + + return builder.String() } +const conversionMb = (1 << 20) //nolint + func bToMb(b uint64) uint64 { - return b / 1024 / 1024 + return b / conversionMb } -func (f *Info) MergeSort(chunkPaths []string, k int) (err error) { - output := f.Allocate.Vector(k, f.Allocate.Key) - if f.PrintMemUsage && f.mu == nil { - f.mu = &MemUsage{} - } - // create a chunk per file path +func (f *Info) createChunks(chunkPaths []string, k int) (*chunks, error) { chunks := &chunks{list: make([]*chunkInfo, 0, len(chunkPaths))} for _, chunkPath := range chunkPaths { - err := chunks.new(chunkPath, f.Allocate, k) + err := chunks.new(chunkPath, f.Allocate, k, f.WithHeader) if err != nil { - return err + return nil, errors.Wrapf(err, "can't create chunk %s", chunkPath) + } + } + + return chunks, nil +} + +func (f *Info) handleHeader(output vector.Vector) error { + if f.WithHeader { + err := output.PushFrontNoKey(f.headers) + if err != nil { + return errors.Wrapf(err, "can't add headers %+v", f.headers) } } - outputFile, err := os.Create(f.OutputPath) + return nil +} + +type nextChunk struct { + oldElem *vector.Element +} + +func (nc *nextChunk) get(output vector.Vector, createdChunks *chunks, dropDuplicates bool) (*chunkInfo, int, error) { + minChunk, minValue, minIdx := createdChunks.min() + if (!dropDuplicates || nc.oldElem == nil) || (dropDuplicates && !minValue.Key.Equal(nc.oldElem.Key)) { + err := output.PushBack(minValue.Row) + if err != nil { + return nil, 0, errors.Wrapf(err, "can't push back row %+v", minValue.Row) + } + + nc.oldElem = minValue + } + + return minChunk, minIdx, nil +} + +func updateChunks(createdChunks *chunks, minChunk *chunkInfo, minIdx, k int) error { + minChunk.buffer.FrontShift() + + isEmpty := false + + if minChunk.buffer.Len() == 0 { + err := minChunk.pullSubset(k) + if err != nil { + return errors.Wrapf(err, "can't pull subset from chunk %s", minChunk.filename) + } + + // if after pulling data the chunk buffer is still empty then we can remove it + if minChunk.buffer.Len() == 0 { + isEmpty = true + + err = createdChunks.shrink([]int{minIdx}) + if err != nil { + return errors.Wrapf(err, "can't shrink chunk at index %d", minIdx) + } + } + } + // when we get a new element in the first chunk we need to re-order it + if !isEmpty { + createdChunks.moveFirstChunkToCorrectIndex() + } + + return nil +} + +func (f *Info) prepareMergeSort(output vector.Vector, chunkPaths []string, outputBufferSize int) (*chunks, error) { + err := f.handleHeader(output) + if err != nil { + return nil, errors.Wrap(err, "can't handle headers") + } + + // create a chunk per file path + createdChunks, err := f.createChunks(chunkPaths, outputBufferSize) + if err != nil { + return nil, errors.Wrap(err, "can't create all chunks") + } + + f.outputWriter, err = f.Allocate.FnWriter(f.OutputFile) if err != nil { - return err + return nil, errors.Wrap(err, "can't get output writer file") } - // remember to close the file - defer outputFile.Close() - outputBuffer := bufio.NewWriter(outputFile) + return createdChunks, nil +} +func (f *Info) runMergeSort(createdChunks *chunks, output vector.Vector, outputBufferSize int, dropDuplicates bool) error { bar := pb.StartNew(f.totalRows) - chunks.resetOrder() + defer bar.Finish() + + smallestChunk := &nextChunk{} + + createdChunks.resetOrder() + for { if f.PrintMemUsage { f.mu.Collect() } - if chunks.len() == 0 || output.Len() == k { - err = WriteBuffer(outputBuffer, output) - if err != nil { - return err - } + + err := f.dumpOutput(createdChunks, output, outputBufferSize) + if err != nil { + return errors.Wrap(err, "can't dump output") } - if chunks.len() == 0 { + + if createdChunks.len() == 0 { break } - toShrink := []int{} + // search the smallest value across chunk buffers by comparing first elements only - minChunk, minValue, minIdx := chunks.min() - err = output.PushBack(minValue.Line) + minChunk, minIdx, err := smallestChunk.get(output, createdChunks, dropDuplicates) if err != nil { - return err + return errors.Wrap(err, "can't get next chunk with smallest value") } + // remove the first element from the chunk we pulled the smallest value - minChunk.buffer.FrontShift() - isEmpty := false - if minChunk.buffer.Len() == 0 { - err = minChunk.pullSubset(k) - if err != nil { - return err - } - // if after pulling data the chunk buffer is still empty then we can remove it - if minChunk.buffer.Len() == 0 { - isEmpty = true - toShrink = append(toShrink, minIdx) - err = chunks.shrink(toShrink) - if err != nil { - return err - } - } - } - // when we get a new element in the first chunk we need to re-order it - if !isEmpty { - chunks.moveFirstChunkToCorrectIndex() + err = updateChunks(createdChunks, minChunk, minIdx, outputBufferSize) + if err != nil { + return errors.Wrap(err, "can't update chunks") } + bar.Increment() } - err = outputBuffer.Flush() - if err != nil { - return err - } - bar.Finish() + if f.PrintMemUsage { - f.mu.PrintMemUsage() + logger.Debugln(f.mu.String()) } - return chunks.close() + + return nil } -func WriteBuffer(buffer *bufio.Writer, rows vector.Vector) error { - for i := 0; i < rows.Len(); i++ { - _, err := buffer.WriteString(rows.Get(i).Line + "\n") +func (f *Info) dumpOutput(createdChunks *chunks, output vector.Vector, outputBufferSize int) error { + if createdChunks.len() == 0 || output.Len() == outputBufferSize { + err := writeBuffer(f.outputWriter, output) if err != nil { return err } } + + return nil +} + +// MergeSort merge and sort a list of files. +// It is possilbe to drop duplicates and define the maximum size of the output buffer before flush. +func (f *Info) MergeSort(chunkPaths []string, outputBufferSize int, dropDuplicates bool) (err error) { + output := f.Allocate.Vector(outputBufferSize, f.Allocate.Key) + + if f.PrintMemUsage && f.mu == nil { + f.mu = &memUsage{} + } + + createdChunks, err := f.prepareMergeSort(output, chunkPaths, outputBufferSize) + if err != nil { + return errors.Wrap(err, "can't prepare merge sort") + } + + defer func() { err = f.outputWriter.Close() }() + + err = f.runMergeSort(createdChunks, output, outputBufferSize, dropDuplicates) + if err != nil { + return errors.Wrap(err, "can't run merge sort") + } + + err = createdChunks.close() + if err != nil { + return errors.Wrap(err, "can't close created chunks") + } + + return err +} + +func writeBuffer(w writer.Writer, rows vector.Vector) error { + for i := range rows.Len() { + err := w.Write(rows.Get(i).Row) + if err != nil { + return errors.Wrap(err, "can't write buffer") + } + } + rows.Reset() + return nil } diff --git a/file/utils.go b/file/utils.go index 5b56ba9..8ad9ea8 100644 --- a/file/utils.go +++ b/file/utils.go @@ -10,23 +10,26 @@ import ( // clearChunkFolder Remove all files from a folder. func clearChunkFolder(folder string) error { - fn := "clear folder" err := os.MkdirAll(folder, os.ModePerm) if err != nil { - return errors.Wrap(err, fn) + return errors.Wrap(err, "can't create folder") } + dir, err := os.ReadDir(folder) if err != nil { - return errors.Wrap(err, fn) + return errors.Wrap(err, "can't read chunk folder") } + for _, d := range dir { if !strings.HasPrefix(d.Name(), "chunk") { continue } + err = os.RemoveAll(path.Join(folder, d.Name())) if err != nil { - return errors.Wrap(err, fn) + return errors.Wrap(err, "can't clear chunk folder") } } + return nil } diff --git a/go.mod b/go.mod index 42f2d65..d72b023 100644 --- a/go.mod +++ b/go.mod @@ -1,42 +1,63 @@ module github.com/askiada/external-sort -go 1.17 +go 1.22 require ( - github.com/cheggaaa/pb/v3 v3.0.8 + github.com/aws/aws-sdk-go-v2 v1.18.0 + github.com/aws/aws-sdk-go-v2/config v1.18.23 + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.65 + github.com/aws/aws-sdk-go-v2/service/s3 v1.33.1 + github.com/cheggaaa/pb/v3 v3.1.2 github.com/pkg/errors v0.9.1 - github.com/pkg/sftp v1.13.4 - github.com/spf13/cobra v1.2.1 - github.com/spf13/viper v1.8.1 - github.com/stretchr/testify v1.7.0 - golang.org/x/crypto v0.0.0-20220210151621-f4118a5b28e2 - golang.org/x/sync v0.0.0-20210220032951-036812b2e83c + github.com/pkg/sftp v1.13.5 + github.com/sirupsen/logrus v1.9.0 + github.com/spf13/cobra v1.7.0 + github.com/spf13/viper v1.15.0 + github.com/stretchr/testify v1.8.2 + golang.org/x/crypto v0.8.0 + golang.org/x/sync v0.2.0 ) require ( github.com/VividCortex/ewma v1.2.0 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.13.22 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.33 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.27 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.3.34 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.25 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.28 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.27 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.14.2 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.12.10 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.10 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.18.11 // indirect + github.com/aws/smithy-go v1.13.5 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/fatih/color v1.13.0 // indirect - github.com/fsnotify/fsnotify v1.4.9 // indirect + github.com/fatih/color v1.15.0 // indirect + github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect - github.com/inconshreveable/mousetrap v1.0.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/kr/fs v0.1.0 // indirect - github.com/magiconair/properties v1.8.5 // indirect - github.com/mattn/go-colorable v0.1.12 // indirect - github.com/mattn/go-isatty v0.0.14 // indirect - github.com/mattn/go-runewidth v0.0.13 // indirect - github.com/mitchellh/mapstructure v1.4.1 // indirect - github.com/pelletier/go-toml v1.9.3 // indirect + github.com/magiconair/properties v1.8.7 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.18 // indirect + github.com/mattn/go-runewidth v0.0.14 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/pelletier/go-toml/v2 v2.0.7 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/rivo/uniseg v0.2.0 // indirect - github.com/spf13/afero v1.6.0 // indirect - github.com/spf13/cast v1.3.1 // indirect + github.com/rivo/uniseg v0.4.4 // indirect + github.com/spf13/afero v1.9.5 // indirect + github.com/spf13/cast v1.5.0 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/spf13/pflag v1.0.5 // indirect - github.com/subosito/gotenv v1.2.0 // indirect - golang.org/x/sys v0.0.0-20220209214540-3681064d5158 // indirect - golang.org/x/text v0.3.6 // indirect - gopkg.in/ini.v1 v1.62.0 // indirect + github.com/subosito/gotenv v1.4.2 // indirect + golang.org/x/sys v0.8.0 // indirect + golang.org/x/text v0.9.0 // indirect + gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 45da8db..8c94da5 100644 --- a/go.sum +++ b/go.sum @@ -3,6 +3,7 @@ cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= +cloud.google.com/go v0.44.3/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= @@ -15,9 +16,7 @@ cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOY cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI= cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk= -cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg= -cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8= -cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0= +cloud.google.com/go v0.75.0/go.mod h1:VGuuCn7PG0dwsd5XPVm2Mm3wlh3EL55/79EKB6hlPTY= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= @@ -26,7 +25,6 @@ cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4g cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= -cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= @@ -36,21 +34,53 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= +cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= -github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= -github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= -github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= -github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM= +github.com/aws/aws-sdk-go-v2 v1.18.0 h1:882kkTpSFhdgYRKVZ/VCgf7sd0ru57p2JCxz4/oN5RY= +github.com/aws/aws-sdk-go-v2 v1.18.0/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 h1:dK82zF6kkPeCo8J1e+tGx4JdvDIQzj7ygIoLg8WMuGs= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10/go.mod h1:VeTZetY5KRJLuD/7fkQXMU6Mw7H5m/KP2J5Iy9osMno= +github.com/aws/aws-sdk-go-v2/config v1.18.23 h1:gc3lPsAnZpwfi2exupmgHfva0JiAY2BWDg5JWYlmA28= +github.com/aws/aws-sdk-go-v2/config v1.18.23/go.mod h1:rx0ruaQ+gk3OrLFHRRx56lA//XxP8K8uPzeNiKNuWVY= +github.com/aws/aws-sdk-go-v2/credentials v1.13.22 h1:Hp9rwJS4giQ48xqonRV/s7QcDf/wxF6UY7osRmBabvI= +github.com/aws/aws-sdk-go-v2/credentials v1.13.22/go.mod h1:BfNcm6A9nSd+bzejDcMJ5RE+k6WbkCwWkQil7q4heRk= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.3 h1:jJPgroehGvjrde3XufFIJUZVK5A2L9a3KwSFgKy9n8w= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.3/go.mod h1:4Q0UFP0YJf0NrsEuEYHpM9fTSEVnD16Z3uyEF7J9JGM= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.65 h1:4irvSxFf0u7pQdtpmUoDSjvMNpOG/8yDUq3orwd9qdg= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.65/go.mod h1:BAWKiL53LT19UMewYr9YhZ8xPO69u6NwmGUjSjRwUdM= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.33 h1:kG5eQilShqmJbv11XL1VpyDbaEJzWxd4zRiCG30GSn4= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.33/go.mod h1:7i0PF1ME/2eUPFcjkVIwq+DOygHEoK92t5cDqNgYbIw= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.27 h1:vFQlirhuM8lLlpI7imKOMsjdQLuN9CPi+k44F/OFVsk= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.27/go.mod h1:UrHnn3QV/d0pBZ6QBAEQcqFLf8FAzLmoUfPVIueOvoM= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.34 h1:gGLG7yKaXG02/jBlg210R7VgQIotiQntNhsCFejawx8= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.34/go.mod h1:Etz2dj6UHYuw+Xw830KfzCfWGMzqvUTCjUj5b76GVDc= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.25 h1:AzwRi5OKKwo4QNqPf7TjeO+tK8AyOK3GVSwmRPo7/Cs= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.25/go.mod h1:SUbB4wcbSEyCvqBxv/O/IBf93RbEze7U7OnoTlpPB+g= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11 h1:y2+VQzC6Zh2ojtV2LoC0MNwHWc6qXv/j2vrQtlftkdA= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11/go.mod h1:iV4q2hsqtNECrfmlXyord9u4zyuFEJX9eLgLpSPzWA8= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.28 h1:vGWm5vTpMr39tEZfQeDiDAMgk+5qsnvRny3FjLpnH5w= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.28/go.mod h1:spfrICMD6wCAhjhzHuy6DOZZ+LAIY10UxhUmLzpJTTs= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.27 h1:0iKliEXAcCa2qVtRs7Ot5hItA2MsufrphbRFlz1Owxo= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.27/go.mod h1:EOwBD4J4S5qYszS5/3DpkejfuK+Z5/1uzICfPaZLtqw= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.14.2 h1:NbWkRxEEIRSCqxhsHQuMiTH7yo+JZW1gp8v3elSVMTQ= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.14.2/go.mod h1:4tfW5l4IAB32VWCDEBxCRtR9T4BWy4I4kr1spr8NgZM= +github.com/aws/aws-sdk-go-v2/service/s3 v1.33.1 h1:O+9nAy9Bb6bJFTpeNFtd9UfHbgxO1o4ZDAM9rQp5NsY= +github.com/aws/aws-sdk-go-v2/service/s3 v1.33.1/go.mod h1:J9kLNzEiHSeGMyN7238EjJmBpCniVzFda75Gxl/NqB8= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.10 h1:UBQjaMTCKwyUYwiVnUt6toEJwGXsLBI6al083tpjJzY= +github.com/aws/aws-sdk-go-v2/service/sso v1.12.10/go.mod h1:ouy2P4z6sJN70fR3ka3wD3Ro3KezSxU6eKGQI2+2fjI= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.10 h1:PkHIIJs8qvq0e5QybnZoG1K/9QTrLr9OsqCIo59jOBA= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.10/go.mod h1:AFvkxc8xfBe8XA+5St5XIHHrQQtkxqrRincx4hmMHOk= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.11 h1:uBE+Zj478pfxV98L6SEpvxYiADNjTlMNY714PJLE7uo= +github.com/aws/aws-sdk-go-v2/service/sts v1.18.11/go.mod h1:BgQOMsg8av8jset59jelyPW7NoZcZXLVpDsXunGDrk8= +github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8= +github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cheggaaa/pb/v3 v3.0.8 h1:bC8oemdChbke2FHIIGy9mn4DPJ2caZYQnfbRqwmdCoA= -github.com/cheggaaa/pb/v3 v3.0.8/go.mod h1:UICbiLec/XO6Hw6k+BHEtHeQFzzBH4i2/qk/ow1EJTA= +github.com/cheggaaa/pb/v3 v3.1.2 h1:FIxT3ZjOj9XJl0U4o2XbEhjFfZl7jCVCDOGq1ZAB7wQ= +github.com/cheggaaa/pb/v3 v3.1.2/go.mod h1:SNjnd0yKcW+kw0brSusraeDd5Bf1zBfxAzTL2ss3yQ4= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -58,9 +88,7 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= -github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -69,20 +97,15 @@ github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.m github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po= github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= -github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= -github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= -github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= -github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= -github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= +github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= +github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE= +github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= +github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -94,7 +117,6 @@ github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= -github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -109,9 +131,6 @@ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvq github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -122,10 +141,9 @@ github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= @@ -138,157 +156,109 @@ github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= -github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= -github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= -github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= -github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= -github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= -github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= -github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= -github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= +github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= -github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= -github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= -github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= -github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= -github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/magiconair/properties v1.8.5 h1:b6kJs+EmPFMYGkow9GiUyCyOvIwYetYJ3fSaWak/Gls= -github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= -github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= -github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= -github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= -github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40= -github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= -github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= -github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= -github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= -github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= -github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= -github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= -github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= -github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= -github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= -github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.4.1 h1:CpVNEelQCZBooIPDn+AR3NpivK/TIKU8bDxdASFVQag= -github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/pelletier/go-toml v1.9.3 h1:zeC5b1GviRUyKYd6OJPvBU/mcVDVoL1OhT17FCt5dSQ= -github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= +github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= +github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= +github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/pelletier/go-toml/v2 v2.0.7 h1:muncTPStnKRos5dpVKULv2FVd4bMOhNePj9CjgDb8Us= +github.com/pelletier/go-toml/v2 v2.0.7/go.mod h1:eumQOmlWiOPt5WriQQqoM5y18pDHwha2N+QD+EUNTek= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= -github.com/pkg/sftp v1.13.4 h1:Lb0RYJCmgUcBgZosfoi9Y9sbl6+LJgOIgk/2Y4YjMFg= -github.com/pkg/sftp v1.13.4/go.mod h1:LzqnAvaD5TWeNBsZpfKxSYn1MbjWwOsCIAFFJbpIsK8= +github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg= +github.com/pkg/sftp v1.13.5 h1:a3RLUqkyjYRtBTZJZ1VRrKbN3zhuPLlUc3sphVz81go= +github.com/pkg/sftp v1.13.5/go.mod h1:wHDZ0IZX6JcBYRK1TH9bcVq8G7TLpVHYIGJRFnmPfxg= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= +github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis= +github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s= -github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/spf13/afero v1.6.0 h1:xoax2sJ2DT8S8xA2paPFjDCScCNeWsg75VG0DLRreiY= -github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= -github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng= -github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= -github.com/spf13/cobra v1.2.1 h1:+KmjbUw1hriSNMF55oPrkZcb27aECyrj8V2ytv7kWDw= -github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk= +github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= +github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/spf13/afero v1.9.5 h1:stMpOSZFs//0Lv29HduCmli3GUfpFoF3Y1Q/aXj/wVM= +github.com/spf13/afero v1.9.5/go.mod h1:UBogFpq8E9Hx+xc5CNTTEpTnuHVmXDwZcZcE1eb/UhQ= +github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= +github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= +github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= +github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/viper v1.8.1 h1:Kq1fyeebqsBfbjZj4EL7gj2IO0mMaiyjYUWcUsl2O44= -github.com/spf13/viper v1.8.1/go.mod h1:o0Pch8wJ9BVSWGQMbra6iw0oQ5oktSIBaujf1rJH9Ns= +github.com/spf13/viper v1.15.0 h1:js3yy885G8xwJa6iOISGFwd+qlUo5AvyXb7CiihdtiU= +github.com/spf13/viper v1.15.0/go.mod h1:fFcTBJxvhhzSJiZy8n+PeW6t8l+KeT/uTARa0jHOQLA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s= -github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/subosito/gotenv v1.4.2 h1:X1TuBLAMDFbaTAChgCBLu3DU3UPyELpnF2jjJ2cz/S8= +github.com/subosito/gotenv v1.4.2/go.mod h1:ayKnFf/c6rvx/2iiLrJUk1e6plDbT3edrFNGqEflhK0= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= -go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= -go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= -go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= -go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= -golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= -golang.org/x/crypto v0.0.0-20220210151621-f4118a5b28e2 h1:XdAboW3BNMv9ocSCOk/u1MFioZGzCNkiJZ19v9Oe3Ig= -golang.org/x/crypto v0.0.0-20220210151621-f4118a5b28e2/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.8.0 h1:pd9TJtTueMTVQXzk8E2XESSMQDj/U7OUu0PqJqPXQjQ= +golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -312,7 +282,6 @@ golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRu golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= @@ -323,11 +292,8 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -354,12 +320,9 @@ golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc= -golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -370,9 +333,6 @@ golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -383,11 +343,9 @@ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI= +golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -397,11 +355,9 @@ golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -420,31 +376,29 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220209214540-3681064d5158 h1:rm+CHSpPEEW2IsXUib1ThaHIjuBVZjxNgSKmBLFfD4c= -golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.7.0 h1:BEvjmm5fURWqcfbSKTdpkDXYBrUS1c0m8agp14W48vQ= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -454,7 +408,6 @@ golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= @@ -464,7 +417,6 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -487,7 +439,6 @@ golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= @@ -496,9 +447,8 @@ golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= -golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -522,9 +472,6 @@ google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz513 google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg= google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE= google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8= -google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU= -google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94= -google.golang.org/api v0.44.0/go.mod h1:EBOGZqzyhtvMDoxwS97ctnh0zUmYY6CxqXsc1AvkYD8= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -555,7 +502,6 @@ google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfG google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= @@ -567,12 +513,8 @@ google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A= -google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= +google.golang.org/genproto v0.0.0-20210108203827-ffc7fda8c3d7/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210226172003-ab064af71705/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -586,13 +528,9 @@ google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3Iji google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= -google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8= google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -603,22 +541,19 @@ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/ini.v1 v1.62.0 h1:duBzk771uxoUuOlyRLkHsygud9+5lrlGjdFBb4mSKDU= -gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= +gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/grpc/external_sort_index.proto b/grpc/external_sort_index.proto new file mode 100644 index 0000000..d268b90 --- /dev/null +++ b/grpc/external_sort_index.proto @@ -0,0 +1,38 @@ +// (-- api-linter: core::0215::versioned-packages=disabled +// aip.dev/not-precedent: This simply makes the structure simpler --) +syntax = "proto3"; + +package bk.registration.orchestrator; + +option go_package = "github.com/askiada/external-sort/grpc/_build/go"; + + +service ExternalSort { + rpc SortSV(SortSVRequest) returns (SortSVResponse); +} + +message FileSV { + string path = 1; + bool gzip = 2; + string separator = 3; + repeated Field sort_fields = 4; + bool with_input_header = 5; + +} + + +message SortSVRequest { + + repeated FileSV input = 2; + +} + +message Field { + enum FIELD_TYPE{ + INT = 0; + STRING = 1; + BOOL = 2; + } + int64 index =1; + FIELD_TYPE type =2; +} diff --git a/internal/env.go b/internal/env.go index 852adde..00bc065 100644 --- a/internal/env.go +++ b/internal/env.go @@ -8,30 +8,49 @@ import ( // Argument names. const ( - InputFileName = "input_path" + WithHeaderName = "with_header" + InputFileNames = "input_paths" OutputFileName = "output_path" ChunkFolderName = "chunk_folder" ChunkSizeName = "chunk_size" MaxWorkersName = "max_workers" OutputBufferSizeName = "output_buffer_size" + TsvFieldsName = "tsv_fields" + + S3RegionName = "s3_region" + S3RetryMaxAttemptsName = "s3_retry_max_attempts" + + IsGzipName = "is_gzip" ) // Environment variables. var ( - InputFile string + WithHeader bool + InputFiles []string + TsvFields []string OutputFile string ChunkFolder string ChunkSize int - MaxWorkers int64 + MaxWorkers int OutputBufferSize int + + S3Region string + S3RetryMaxAttempts int + IsGzip bool ) func init() { viper.AutomaticEnv() - viper.SetDefault(InputFileName, "") + viper.SetDefault(WithHeaderName, false) + viper.SetDefault(InputFileNames, "") viper.SetDefault(OutputFileName, "") viper.SetDefault(ChunkFolderName, "") viper.SetDefault(ChunkSizeName, 0) viper.SetDefault(MaxWorkersName, 0) viper.SetDefault(OutputBufferSizeName, 0) + viper.SetDefault(TsvFieldsName, []string{"0"}) + + viper.SetDefault(S3RegionName, "eu-west-1") + viper.SetDefault(S3RetryMaxAttemptsName, 10) //nolint //gomnd + viper.SetDefault(IsGzipName, false) } diff --git a/internal/progress/contract.go b/internal/progress/contract.go new file mode 100644 index 0000000..9fd343e --- /dev/null +++ b/internal/progress/contract.go @@ -0,0 +1,69 @@ +// Package progress defines standard and simple progress bar to track file download progress. +package progress + +import ( + "math" + + "github.com/cheggaaa/pb/v3" + "github.com/sirupsen/logrus" +) + +// Progress defines a simple progress bar contract. +type Progress interface { + // Begin sets and starts the progress bar. + Begin(total int64) + // Add increments the progress bar with n elements + Add(n int64) + // End terminates the progress bar + End() +} + +// Pb implements Progress contract using cheggaaa pb v3. +type Pb struct { + bar *pb.ProgressBar +} + +// Begin start a new progress bar in byte mode. +func (p *Pb) Begin(total int64) { + p.bar = pb.Full.Start64(total) + p.bar.Set(pb.Bytes, true) +} + +// Add increment the bar by n elements. +func (p *Pb) Add(n int64) { + p.bar.Add64(n) +} + +// End terminates the bar. +func (p *Pb) End() { + p.bar.Finish() +} + +var _ Progress = &Pb{} + +// Basic implements Progress contract using stdout to print status. +type Basic struct { + total float64 + written float64 + milestone int +} + +// Begin start a new progress bar. +func (b *Basic) Begin(total int64) { + b.total = float64(total) +} + +// Add increment the bar by n elements. +func (b *Basic) Add(val int64) { + b.written += float64(val) + progress := int(math.Round(b.written / b.total * 100)) //nolint //gomnd + + if progress >= b.milestone { + b.milestone += 5 // every 5% + + logrus.Debugf("Download from S3 at %3d%%\n\n", progress) + } +} + +// End noop. +func (b *Basic) End() {} diff --git a/internal/rw/rw.go b/internal/rw/rw.go new file mode 100644 index 0000000..0252beb --- /dev/null +++ b/internal/rw/rw.go @@ -0,0 +1,207 @@ +package rw + +import ( + "context" + "io" + "net/url" + "os" + "path/filepath" + "strings" + + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sync/errgroup" + + "github.com/askiada/external-sort/bucket" + "github.com/askiada/external-sort/internal" + "github.com/askiada/external-sort/internal/progress" +) + +var logger = logrus.StandardLogger() + +type InputOutput struct { + s3Client bucket.S3ClientAPI + Input io.Reader + inputPipe *io.PipeReader + Output io.Writer + outputPipe *io.PipeWriter + g *errgroup.Group + internalCtx context.Context +} + +func NewInputOutput(ctx context.Context) *InputOutput { + g, dCtx := errgroup.WithContext(ctx) + + return &InputOutput{ + g: g, + internalCtx: dCtx, + } +} + +func (i *InputOutput) s3Check(ctx context.Context) error { + if i.s3Client != nil { + return nil + } + + cfg, err := config.LoadDefaultConfig(ctx, + config.WithRegion(internal.S3Region), + config.WithRetryMaxAttempts(internal.S3RetryMaxAttempts), + ) + if err != nil { + return errors.New("can't create aws config") + } + + i.s3Client = s3.NewFromConfig(cfg) + + return nil +} + +func (i *InputOutput) SetInputReader(ctx context.Context, inputFiles ...string) (err error) { + if strings.HasPrefix(inputFiles[0], "s3") || strings.HasPrefix(inputFiles[0], "S3") { + err = i.s3Check(ctx) + if err != nil { + return errors.Wrap(err, "can't check s3") + } + + s3Api, err := bucket.New(ctx, + bucket.Client(i.s3Client), + bucket.Buffer(1_000_000), + bucket.Progress(&progress.Pb{}), + ) + if err != nil { + return errors.Wrap(err, "can't create s3 client") + } + + files := []*bucket.S3FileInfo{} + + for _, inputFile := range inputFiles { + u, _ := url.Parse(inputFile) + u.Path = strings.TrimLeft(u.Path, "/") + logger.Debugf("Proto: %q, Bucket: %q, Key: %q", u.Scheme, u.Host, u.Path) + files = append(files, &bucket.S3FileInfo{ + Bucket: u.Host, + Key: u.Path, + }) + } + + pr, pw := io.Pipe() + i.Input = pr + i.inputPipe = pr + i.g.Go(func() (err error) { + defer func() { err = pw.Close() }() + + err = s3Api.Download(i.internalCtx, pw, files...) + if err != nil { + return errors.Wrap(err, "can't download files") + } + + return err + }) + } else { + var files []io.Reader + + for _, inputFile := range inputFiles { + f, err := os.Open(filepath.Clean(inputFile)) + if err != nil { + return errors.Wrapf(err, "can't open file %s", inputFile) + } + + files = append(files, f) + } + + i.Input = io.MultiReader(files...) + } + + return nil +} + +func (i *InputOutput) SetOutputWriter(ctx context.Context, outputFile string) (err error) { + if strings.HasPrefix(outputFile, "s3") || strings.HasPrefix(outputFile, "S3") { + err = i.s3Check(ctx) + if err != nil { + return errors.Wrap(err, "can't check s3") + } + + outputURL, err := url.Parse(outputFile) + if err != nil { + return errors.Wrapf(err, "can't parse output url %s", outputFile) + } + + outputURL.Path = strings.TrimLeft(outputURL.Path, "/") + logger.Debugf("Proto: %q, Bucket: %q, Key: %q", outputURL.Scheme, outputURL.Host, outputURL.Path) + + s3Api, err := bucket.New(ctx, + bucket.Client(i.s3Client), + bucket.Buffer(1_000_000), + bucket.Progress(&progress.Pb{}), + ) + if err != nil { + return errors.Wrap(err, "can't create s3 client") + } + + pr, pw := io.Pipe() + i.Output = pw + i.outputPipe = pw + i.g.Go(func() (err error) { + defer func() { err = pr.Close() }() + + err = s3Api.Upload(i.internalCtx, pr, outputURL.Host, outputURL.Path) + if err != nil { + return errors.Wrapf(err, "can't upload file %s", outputFile) + } + + return err + }) + } else { + i.Output, err = os.Create(filepath.Clean(outputFile)) + if err != nil { + return errors.Wrapf(err, "can't create file %s", outputFile) + } + } + + return nil +} + +func (i *InputOutput) Do(f func() error) { + i.g.Go(func() error { + err := f() + if err != nil { + return err + } + + err = i.Close() + if err != nil { + return err + } + + return nil + }) +} + +func (i *InputOutput) Close() error { + if i.inputPipe != nil { + err := i.inputPipe.Close() + if err != nil { + return errors.Wrap(err, "can't close input reader") + } + } + + if i.outputPipe != nil { + err := i.outputPipe.Close() + if err != nil { + return errors.Wrap(err, "can't close output writer") + } + } + + return nil +} + +func (i *InputOutput) Err() error { + if err := i.g.Wait(); err != nil { + return errors.Wrap(err, "one of the go routines went wrong") + } + + return nil +} diff --git a/main.go b/main.go index b44da1c..89257d6 100644 --- a/main.go +++ b/main.go @@ -2,69 +2,294 @@ package main import ( "context" - "fmt" - "os" + "io" + "strconv" "time" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/spf13/cobra" + "github.com/spf13/viper" + "github.com/askiada/external-sort/file" "github.com/askiada/external-sort/internal" + "github.com/askiada/external-sort/internal/rw" + "github.com/askiada/external-sort/reader" "github.com/askiada/external-sort/vector" "github.com/askiada/external-sort/vector/key" - "github.com/spf13/cobra" - "github.com/spf13/viper" + "github.com/askiada/external-sort/writer" ) -func main() { - rootCmd := &cobra.Command{ - Use: "external-sort", - Short: "Perform an external sorting on an input file", - RunE: rootRun, +var logger = logrus.StandardLogger() + +type command struct { + rootCmd *cobra.Command + sortCmd *cobra.Command + shuffleCmd *cobra.Command +} + +func setFlags(root *command) { + root.rootCmd.PersistentFlags().BoolVarP( + &internal.WithHeader, + internal.WithHeaderName, + "e", + viper.GetBool(internal.WithHeaderName), + "Input file has headers.", + ) + root.rootCmd.PersistentFlags().StringSliceVarP( + &internal.InputFiles, + internal.InputFileNames, + "i", + viper.GetStringSlice(internal.InputFileNames), + "input file path.", + ) + root.rootCmd.PersistentFlags().StringVarP( + &internal.OutputFile, + internal.OutputFileName, + "o", + viper.GetString(internal.OutputFileName), + "output file path.", + ) + root.rootCmd.PersistentFlags().StringVarP( + &internal.ChunkFolder, + internal.ChunkFolderName, + "c", + viper.GetString(internal.ChunkFolderName), + "chunk folder.", + ) + + root.rootCmd.PersistentFlags().IntVarP( + &internal.ChunkSize, + internal.ChunkSizeName, + "s", + viper.GetInt(internal.ChunkSizeName), + "chunk size.", + ) + root.rootCmd.PersistentFlags().IntVarP( + &internal.MaxWorkers, + internal.MaxWorkersName, + "w", + viper.GetInt(internal.MaxWorkersName), + "max worker.", + ) + root.rootCmd.PersistentFlags().IntVarP( + &internal.OutputBufferSize, + internal.OutputBufferSizeName, + "b", + viper.GetInt(internal.OutputBufferSizeName), + "output buffer size.", + ) + root.sortCmd.PersistentFlags().StringSliceVarP( + &internal.TsvFields, + internal.TsvFieldsName, + "t", + viper.GetStringSlice(internal.TsvFieldsName), + "", + ) + + root.rootCmd.Flags().StringVar( + &internal.S3Region, + internal.S3RegionName, + viper.GetString(internal.S3RegionName), + "the bucket region", + ) + root.rootCmd.Flags().IntVar( + &internal.S3RetryMaxAttempts, + internal.S3RetryMaxAttemptsName, + viper.GetInt(internal.S3RetryMaxAttemptsName), + "the number of retries per S3 request before failing", + ) + + root.shuffleCmd.PersistentFlags().BoolVarP(&internal.IsGzip, + internal.IsGzipName, + "t", + viper.GetBool(internal.IsGzipName), + "", + ) +} + +func newCommand() *command { + root := &command{ + rootCmd: &cobra.Command{ + Use: "external", + Short: "Perform an external task on an input file", + }, + sortCmd: &cobra.Command{ + Use: "sort", + Short: "Perform an external sorting on an input file", + PreRun: func(cmd *cobra.Command, args []string) { + cmd.SetContext(cmd.Parent().Context()) + }, + RunE: sortRun, + }, + shuffleCmd: &cobra.Command{ + Use: "shuffle", + Short: "Perform an external shufflin on an input file", + PreRun: func(cmd *cobra.Command, args []string) { + cmd.SetContext(cmd.Parent().Context()) + }, + RunE: shuffleRun, + }, } - rootCmd.PersistentFlags().StringVarP(&internal.InputFile, internal.InputFileName, "i", viper.GetString(internal.InputFileName), "input file path.") - rootCmd.PersistentFlags().StringVarP(&internal.OutputFile, internal.OutputFileName, "o", viper.GetString(internal.OutputFileName), "output file path.") - rootCmd.PersistentFlags().StringVarP(&internal.ChunkFolder, internal.ChunkFolderName, "c", viper.GetString(internal.ChunkFolderName), "chunk folder.") + root.rootCmd.AddCommand(root.sortCmd, root.shuffleCmd) - rootCmd.PersistentFlags().IntVarP(&internal.ChunkSize, internal.ChunkSizeName, "s", viper.GetInt(internal.ChunkSizeName), "chunk size.") - rootCmd.PersistentFlags().Int64VarP(&internal.MaxWorkers, internal.MaxWorkersName, "w", viper.GetInt64(internal.MaxWorkersName), "max worker.") - rootCmd.PersistentFlags().IntVarP(&internal.OutputBufferSize, internal.OutputBufferSizeName, "b", viper.GetInt(internal.OutputBufferSizeName), "output buffer size.") + return root +} + +func main() { + root := newCommand() + setFlags(root) + + ctx := context.Background() - fmt.Println("Input file", internal.InputFile) - fmt.Println("Output file", internal.OutputFile) - fmt.Println("Chunk foler", internal.ChunkFolder) - cobra.CheckErr(rootCmd.Execute()) + cobra.CheckErr(root.rootCmd.ExecuteContext(ctx)) } -func rootRun(cmd *cobra.Command, args []string) error { +func sortRun(cmd *cobra.Command, _ []string) error { + logger.Infoln("Input files", internal.InputFiles) + logger.Infoln("With header", internal.WithHeader) + logger.Infoln("Output file", internal.OutputFile) + logger.Infoln("Chunk folder", internal.ChunkFolder) + logger.Infoln("TSV Fields", internal.TsvFields) + start := time.Now() - inputPath := internal.InputFile - // open a file - f, err := os.Open(inputPath) + inputOutput := rw.NewInputOutput(cmd.Context()) + + err := inputOutput.SetInputReader(cmd.Context(), internal.InputFiles...) + if err != nil { + return errors.Wrap(err, "can't set input reader") + } + + err = inputOutput.SetOutputWriter(cmd.Context(), internal.OutputFile) if err != nil { - return err + return errors.Wrap(err, "can't set output writer") } - defer f.Close() - fI := &file.Info{ - Reader: f, - Allocate: vector.DefaultVector(func(line string) (key.Key, error) { - return key.AllocateTsv(line, 0) - }), - OutputPath: internal.OutputFile, + + tsvFields := []int{} + + for _, field := range internal.TsvFields { + i, err := strconv.Atoi(field) + if err != nil { + return errors.Wrapf(err, "can't convert field %s", field) + } + + tsvFields = append(tsvFields, i) + } + + fileInfo := &file.Info{ + WithHeader: internal.WithHeader, + InputReader: inputOutput.Input, + OutputFile: inputOutput.Output, + Allocate: vector.DefaultVector( + func(row interface{}) (key.Key, error) { + k, err := key.AllocateTsv(row, tsvFields...) + if err != nil { + return nil, errors.Wrapf(err, "can't allocate tsv %+v", row) + } + + return k, nil + }, + func(r io.Reader) (reader.Reader, error) { + gzipReader, err := reader.NewGZipSeparatedValues(r, '\t') + if err != nil { + return nil, errors.Wrap(err, "can't create Gzip reader") + } + + return gzipReader, nil + }, + func(w io.Writer) (writer.Writer, error) { + gzipWriter, err := writer.NewGZipSeparatedValues(w, '\t') + if err != nil { + return nil, errors.Wrap(err, "can't create Gzip writer") + } + + return gzipWriter, nil + }, + ), PrintMemUsage: false, } - // create small files with maximum 30 rows in each - chunkPaths, err := fI.CreateSortedChunks(context.Background(), internal.ChunkFolder, internal.ChunkSize, internal.MaxWorkers) + inputOutput.Do(func() error { + // create small files with maximum 30 rows in each + chunkPaths, err := fileInfo.CreateSortedChunks(cmd.Context(), internal.ChunkFolder, internal.ChunkSize, internal.MaxWorkers) + if err != nil { + return errors.Wrap(err, "can't create sorted chunks") + } + // perform a merge sort on all the chunks files. + // we sort using a buffer so we don't have to load the entire chunks when merging + err = fileInfo.MergeSort(chunkPaths, internal.OutputBufferSize, true) + if err != nil { + return errors.Wrap(err, "can't merge sort") + } + + elapsed := time.Since(start) + logger.Infoln("It took", elapsed) + + return nil + }) + + err = inputOutput.Err() if err != nil { - return err + return errors.Wrap(err, "can't finish") } - // perform a merge sort on all the chunks files. - // we sort using a buffer so we don't have to load the entire chunks when merging - err = fI.MergeSort(chunkPaths, internal.OutputBufferSize) + + return nil +} + +func shuffleRun(cmd *cobra.Command, _ []string) error { + logger.Infoln("Input files", internal.InputFiles) + logger.Infoln("With header", internal.WithHeader) + logger.Infoln("Output file", internal.OutputFile) + logger.Infoln("Chunk folder", internal.ChunkFolder) + logger.Infoln("GZip file", internal.IsGzip) + + start := time.Now() + inputOutput := rw.NewInputOutput(cmd.Context()) + + err := inputOutput.SetInputReader(cmd.Context(), internal.InputFiles...) if err != nil { - return err + return errors.Wrap(err, "can't set input reader") } - elapsed := time.Since(start) - fmt.Println(elapsed) + + err = inputOutput.SetOutputWriter(cmd.Context(), internal.OutputFile) + if err != nil { + return errors.Wrap(err, "can't set output writer") + } + + fileInfo := &file.Info{ + WithHeader: internal.WithHeader, + InputReader: inputOutput.Input, + OutputFile: inputOutput.Output, + PrintMemUsage: false, + } + + inputOutput.Do(func() error { + // create small files with maximum 30 rows in each + _, err := fileInfo.Shuffle( + cmd.Context(), + internal.ChunkFolder, + internal.ChunkSize, + internal.MaxWorkers, + internal.OutputBufferSize, + time.Now().Unix(), + internal.IsGzip, + ) + if err != nil { + return errors.Wrap(err, "can't create shuflled chunks") + } + + elapsed := time.Since(start) + + logger.Infoln("It took", elapsed) + + return nil + }) + + err = inputOutput.Err() + if err != nil { + return errors.Wrap(err, "can't finish") + } + return nil } diff --git a/main_bench_test.go b/main_bench_test.go index a600863..d31809e 100644 --- a/main_bench_test.go +++ b/main_bench_test.go @@ -2,38 +2,54 @@ package main_test import ( "context" - "io/ioutil" + "io" "os" "path" "testing" + "github.com/stretchr/testify/assert" + "github.com/askiada/external-sort/file" + "github.com/askiada/external-sort/internal/rw" + "github.com/askiada/external-sort/reader" "github.com/askiada/external-sort/vector" "github.com/askiada/external-sort/vector/key" - "github.com/stretchr/testify/assert" + "github.com/askiada/external-sort/writer" ) func BenchmarkMergeSort(b *testing.B) { filename := "test.tsv" + ctx := context.Background() + inputOutput := rw.NewInputOutput(ctx) + err := inputOutput.SetInputReader(ctx, filename) + assert.NoError(b, err) + err = inputOutput.SetOutputWriter(ctx, "testdata/chunks/output.tsv") + assert.NoError(b, err) chunkSize := 10000 bufferSize := 5000 - f, err := os.Open(filename) - assert.NoError(b, err) - - fI := &file.Info{ - Reader: f, - Allocate: vector.DefaultVector(key.AllocateInt), - OutputPath: "testdata/chunks/output.tsv", + fileInfo := &file.Info{ + InputReader: inputOutput.Input, + Allocate: vector.DefaultVector( + key.AllocateInt, + func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, + func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil }, + ), + OutputFile: inputOutput.Output, } - chunkPaths, err := fI.CreateSortedChunks(context.Background(), "testdata/chunks", chunkSize, 100) + inputOutput.Do(func() (err error) { + chunkPaths, err := fileInfo.CreateSortedChunks(context.Background(), "testdata/chunks", chunkSize, 100) + assert.NoError(b, err) + b.ResetTimer() + for i := 0; i < b.N; i++ { + err = fileInfo.MergeSort(chunkPaths, bufferSize, false) + _ = err + } + + return nil + }) + err = inputOutput.Err() assert.NoError(b, err) - b.ResetTimer() - for i := 0; i < b.N; i++ { - err = fI.MergeSort(chunkPaths, bufferSize) - _ = err - } - f.Close() - dir, err := ioutil.ReadDir("testdata/chunks") + dir, err := os.ReadDir("testdata/chunks") assert.NoError(b, err) for _, d := range dir { err = os.RemoveAll(path.Join("testdata/chunks", d.Name())) diff --git a/main_test.go b/main_test.go index 69b54d8..1a7ef49 100644 --- a/main_test.go +++ b/main_test.go @@ -4,43 +4,56 @@ import ( "bufio" "context" "errors" - "io/ioutil" + "io" "os" - "path" "strconv" "testing" + "github.com/stretchr/testify/assert" + "github.com/askiada/external-sort/file" + "github.com/askiada/external-sort/internal/rw" + "github.com/askiada/external-sort/reader" "github.com/askiada/external-sort/vector" "github.com/askiada/external-sort/vector/key" - - "github.com/stretchr/testify/assert" + "github.com/askiada/external-sort/writer" ) -func prepareChunks(ctx context.Context, t *testing.T, allocate *vector.Allocate, filename, outputFilename string, chunkSize int) (*file.Info, []string) { +func prepareChunks( + ctx context.Context, + t *testing.T, + allocate *vector.Allocate, + filename, outputFilename string, + chunkSize int, + mergeSort bool, + bufferSize int, + withHeaders bool, + dropDuplicates bool, +) *file.Info { t.Helper() - f, err := os.Open(filename) + inputOutput := rw.NewInputOutput(ctx) + err := inputOutput.SetInputReader(ctx, filename) assert.NoError(t, err) - - fI := &file.Info{ - Reader: f, - Allocate: allocate, - OutputPath: outputFilename, - } - chunkPaths, err := fI.CreateSortedChunks(ctx, "testdata/chunks", chunkSize, 10) + err = inputOutput.SetOutputWriter(ctx, outputFilename) assert.NoError(t, err) - - t.Cleanup(func() { - defer f.Close() - dir, err := ioutil.ReadDir("testdata/chunks") + fileInfo := &file.Info{ + InputReader: inputOutput.Input, + Allocate: allocate, + OutputFile: inputOutput.Output, + WithHeader: withHeaders, + } + inputOutput.Do(func() (err error) { + chunkPaths, err := fileInfo.CreateSortedChunks(ctx, "testdata/chunks", chunkSize, 10) assert.NoError(t, err) - for _, d := range dir { - err = os.RemoveAll(path.Join("testdata/chunks", d.Name())) - assert.NoError(t, err) + if mergeSort { + return fileInfo.MergeSort(chunkPaths, bufferSize, dropDuplicates) } + return nil }) + err = inputOutput.Err() + assert.NoError(t, err) - return fI, chunkPaths + return fileInfo } func TestBasics(t *testing.T) { @@ -60,12 +73,33 @@ func TestBasics(t *testing.T) { outputFilename: "testdata/chunks/output.tsv", }, "100 elems": { - filename: "testdata/100elems.tsv", - expectedOutput: []string{"3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"}, + filename: "testdata/100elems.tsv", + expectedOutput: []string{ + "3", "4", "5", "6", "6", + "7", "7", "7", "8", "8", + "9", "9", "10", "10", "15", + "18", "18", "18", "18", "21", + "22", "22", "25", "25", "25", + "25", "25", "26", "26", "27", + "27", "28", "28", "29", "29", + "29", "30", "30", "31", "31", + "33", "33", "34", "36", "37", + "39", "39", "39", "40", "41", + "41", "42", "43", "43", "47", + "47", "49", "50", "50", "52", + "52", "53", "54", "55", "55", + "55", "56", "57", "57", "59", + "60", "61", "62", "63", "67", + "71", "71", "72", "72", "73", + "74", "75", "78", "79", "80", + "80", "82", "89", "89", "89", + "91", "91", "92", "92", "93", + "93", "94", "97", "97", "99", + }, outputFilename: "testdata/chunks/output.tsv", }, } - allocate := vector.DefaultVector(key.AllocateInt) + for name, tc := range tcs { filename := tc.filename outputFilename := tc.outputFilename @@ -74,13 +108,16 @@ func TestBasics(t *testing.T) { for chunkSize := 1; chunkSize < 152; chunkSize += 10 { for bufferSize := 1; bufferSize < 152; bufferSize += 10 { chunkSize := chunkSize - bufferSize := bufferSize t.Run(name+"_"+strconv.Itoa(chunkSize)+"_"+strconv.Itoa(bufferSize), func(t *testing.T) { ctx := context.Background() - fI, chunkPaths := prepareChunks(ctx, t, allocate, filename, outputFilename, chunkSize) - fI.OutputPath = outputFilename - err := fI.MergeSort(chunkPaths, bufferSize) - assert.NoError(t, err) + + allocate := vector.DefaultVector( + key.AllocateInt, + func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, + func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil }, + ) + prepareChunks(ctx, t, allocate, filename, outputFilename, chunkSize, true, bufferSize, false, false) + outputFile, err := os.Open(outputFilename) assert.NoError(t, err) outputScanner := bufio.NewScanner(outputFile) @@ -107,12 +144,89 @@ func Test100Elems(t *testing.T) { expectedOutput []string }{ "100 elems": { - filename: "testdata/100elems.tsv", - expectedOutput: []string{"3", "4", "5", "6", "6", "7", "7", "7", "8", "8", "9", "9", "10", "10", "15", "18", "18", "18", "18", "21", "22", "22", "25", "25", "25", "25", "25", "26", "26", "27", "27", "28", "28", "29", "29", "29", "30", "30", "31", "31", "33", "33", "34", "36", "37", "39", "39", "39", "40", "41", "41", "42", "43", "43", "47", "47", "49", "50", "50", "52", "52", "53", "54", "55", "55", "55", "56", "57", "57", "59", "60", "61", "62", "63", "67", "71", "71", "72", "72", "73", "74", "75", "78", "79", "80", "80", "82", "89", "89", "89", "91", "91", "92", "92", "93", "93", "94", "97", "97", "99"}, + filename: "testdata/100elems.tsv", + expectedOutput: []string{ + "3", "4", "5", "6", "6", + "7", "7", "7", "8", "8", + "9", "9", "10", "10", "15", + "18", "18", "18", "18", "21", + "22", "22", "25", "25", "25", + "25", "25", "26", "26", "27", + "27", "28", "28", "29", "29", + "29", "30", "30", "31", "31", + "33", "33", "34", "36", "37", + "39", "39", "39", "40", "41", + "41", "42", "43", "43", "47", + "47", "49", "50", "50", "52", + "52", "53", "54", "55", "55", + "55", "56", "57", "57", "59", + "60", "61", "62", "63", "67", + "71", "71", "72", "72", "73", + "74", "75", "78", "79", "80", + "80", "82", "89", "89", "89", + "91", "91", "92", "92", "93", + "93", "94", "97", "97", "99", + }, + outputFilename: "testdata/chunks/output.tsv", + }, + } + + for name, tc := range tcs { + filename := tc.filename + outputFilename := tc.outputFilename + expectedOutput := tc.expectedOutput + expectedErr := tc.expectedErr + t.Run(name, func(t *testing.T) { + ctx := context.Background() + allocate := vector.DefaultVector( + key.AllocateInt, + func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, + func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil }, + ) + prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, false, false) + outputFile, err := os.Open(outputFilename) + assert.NoError(t, err) + outputScanner := bufio.NewScanner(outputFile) + count := 0 + for outputScanner.Scan() { + assert.Equal(t, expectedOutput[count], outputScanner.Text()) + count++ + } + assert.NoError(t, outputScanner.Err()) + assert.Equal(t, len(expectedOutput), count) + assert.True(t, errors.Is(err, expectedErr)) + outputFile.Close() + }) + } +} + +func Test100ElemsWithDuplicates(t *testing.T) { + tcs := map[string]struct { + filename string + outputFilename string + expectedErr error + expectedOutput []string + }{ + "100 elems with duplicates": { + filename: "testdata/100elems.tsv", + expectedOutput: []string{ + "3", "4", "5", "6", "7", + "8", "9", "10", "15", "18", + "21", "22", "25", "26", "27", + "28", "29", "30", "31", "33", + "34", "36", "37", "39", "40", + "41", "42", "43", "47", "49", + "50", "52", "53", "54", "55", + "56", "57", "59", "60", "61", + "62", "63", "67", "71", "72", + "73", "74", "75", "78", "79", + "80", "82", "89", "91", "92", + "93", "94", "97", "99", + }, outputFilename: "testdata/chunks/output.tsv", }, } - allocate := vector.DefaultVector(key.AllocateInt) + for name, tc := range tcs { filename := tc.filename outputFilename := tc.outputFilename @@ -120,9 +234,132 @@ func Test100Elems(t *testing.T) { expectedErr := tc.expectedErr t.Run(name, func(t *testing.T) { ctx := context.Background() - fI, chunkPaths := prepareChunks(ctx, t, allocate, filename, outputFilename, 21) - err := fI.MergeSort(chunkPaths, 10) + allocate := vector.DefaultVector( + key.AllocateInt, + func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, + func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil }, + ) + prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, false, true) + outputFile, err := os.Open(outputFilename) assert.NoError(t, err) + outputScanner := bufio.NewScanner(outputFile) + count := 0 + for outputScanner.Scan() { + assert.Equal(t, expectedOutput[count], outputScanner.Text()) + count++ + } + assert.NoError(t, outputScanner.Err()) + assert.Equal(t, len(expectedOutput), count) + assert.True(t, errors.Is(err, expectedErr)) + outputFile.Close() + }) + } +} + +func Test100ElemsWithHeaders(t *testing.T) { + tcs := map[string]struct { + filename string + outputFilename string + expectedErr error + expectedOutput []string + }{ + "100 elems with headers": { + filename: "testdata/100elemsWithHeaders.tsv", + expectedOutput: []string{ + "headers", "3", "4", "5", "6", "6", + "7", "7", "7", "8", "8", + "9", "9", "10", "10", "15", + "18", "18", "18", "18", "21", + "22", "22", "25", "25", "25", + "25", "25", "26", "26", "27", + "27", "28", "28", "29", "29", + "29", "30", "30", "31", "31", + "33", "33", "34", "36", "37", + "39", "39", "39", "40", "41", + "41", "42", "43", "43", "47", + "47", "49", "50", "50", "52", + "52", "53", "54", "55", "55", + "55", "56", "57", "57", "59", + "60", "61", "62", "63", "67", + "71", "71", "72", "72", "73", + "74", "75", "78", "79", "80", + "80", "82", "89", "89", "89", + "91", "91", "92", "92", "93", + "93", "94", "97", "97", "99", + }, + outputFilename: "testdata/chunks/output.tsv", + }, + } + + for name, tc := range tcs { + filename := tc.filename + outputFilename := tc.outputFilename + expectedOutput := tc.expectedOutput + expectedErr := tc.expectedErr + t.Run(name, func(t *testing.T) { + ctx := context.Background() + allocate := vector.DefaultVector( + key.AllocateInt, + func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, + func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil }, + ) + prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, true, false) + outputFile, err := os.Open(outputFilename) + assert.NoError(t, err) + outputScanner := bufio.NewScanner(outputFile) + count := 0 + for outputScanner.Scan() { + assert.Equal(t, expectedOutput[count], outputScanner.Text()) + count++ + } + assert.NoError(t, outputScanner.Err()) + assert.Equal(t, len(expectedOutput), count) + assert.True(t, errors.Is(err, expectedErr)) + outputFile.Close() + }) + } +} + +func Test100ElemsWithHeadersWithDuplicates(t *testing.T) { + tcs := map[string]struct { + filename string + outputFilename string + expectedErr error + expectedOutput []string + }{ + "100 elems with headers and duplicates": { + filename: "testdata/100elemsWithHeaders.tsv", + expectedOutput: []string{ + "headers", "3", "4", "5", "6", "7", + "8", "9", "10", "15", "18", + "21", "22", "25", "26", "27", + "28", "29", "30", "31", "33", + "34", "36", "37", "39", "40", + "41", "42", "43", "47", "49", + "50", "52", "53", "54", "55", + "56", "57", "59", "60", "61", + "62", "63", "67", "71", "72", + "73", "74", "75", "78", "79", + "80", "82", "89", "91", "92", + "93", "94", "97", "99", + }, + outputFilename: "testdata/chunks/output.tsv", + }, + } + + for name, tc := range tcs { + filename := tc.filename + outputFilename := tc.outputFilename + expectedOutput := tc.expectedOutput + expectedErr := tc.expectedErr + t.Run(name, func(t *testing.T) { + ctx := context.Background() + allocate := vector.DefaultVector( + key.AllocateInt, + func(r io.Reader) (reader.Reader, error) { return reader.NewStdScanner(r, false) }, + func(w io.Writer) (writer.Writer, error) { return writer.NewStdWriter(w), nil }, + ) + prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, true, true) outputFile, err := os.Open(outputFilename) assert.NoError(t, err) outputScanner := bufio.NewScanner(outputFile) @@ -148,7 +385,8 @@ func TestTsvKey(t *testing.T) { }{ "Tsv file": { filename: "testdata/multifields.tsv", - expectedOutput: []string{"3 D equipment", + expectedOutput: []string{ + "3 D equipment", "7 G inflation", "6 H delivery", "9 I child", @@ -157,13 +395,111 @@ func TestTsvKey(t *testing.T) { "1 N guidance", "10 S feedback", "2 T library", - "4 Z news"}, + "4 Z news", + }, outputFilename: "testdata/chunks/output.tsv", }, } - allocate := vector.DefaultVector(func(line string) (key.Key, error) { - return key.AllocateTsv(line, 1) + + for name, tc := range tcs { + filename := tc.filename + outputFilename := tc.outputFilename + expectedOutput := tc.expectedOutput + expectedErr := tc.expectedErr + t.Run(name, func(t *testing.T) { + ctx := context.Background() + + allocate := vector.DefaultVector( + func(row interface{}) (key.Key, error) { return key.AllocateTsv(row, 1) }, + func(r io.Reader) (reader.Reader, error) { return reader.NewSeparatedValues(r, '\t'), nil }, + func(w io.Writer) (writer.Writer, error) { return writer.NewSeparatedValues(w, '\t'), nil }, + ) + prepareChunks(ctx, t, allocate, filename, outputFilename, 21, true, 10, false, false) + outputFile, err := os.Open(outputFilename) + assert.NoError(t, err) + outputScanner := bufio.NewScanner(outputFile) + count := 0 + for outputScanner.Scan() { + assert.Equal(t, expectedOutput[count], outputScanner.Text()) + count++ + } + assert.NoError(t, outputScanner.Err()) + assert.Equal(t, len(expectedOutput), count) + assert.True(t, errors.Is(err, expectedErr)) + outputFile.Close() + }) + } +} + +func prepareChunksShuffle( + ctx context.Context, + t *testing.T, + filename, outputFilename string, + chunkSize int, + mergeSort bool, + bufferSize int, + withHeaders bool, + dropDuplicates, + isGzip bool, +) *file.Info { + t.Helper() + inputOutput := rw.NewInputOutput(ctx) + err := inputOutput.SetInputReader(ctx, filename) + assert.NoError(t, err) + err = inputOutput.SetOutputWriter(ctx, outputFilename) + assert.NoError(t, err) + fileInfo := &file.Info{ + InputReader: inputOutput.Input, + OutputFile: inputOutput.Output, + WithHeader: withHeaders, + } + inputOutput.Do(func() (err error) { + _, err = fileInfo.Shuffle(ctx, "testdata/chunks", chunkSize, 10, bufferSize, 13, isGzip) + assert.NoError(t, err) + return nil }) + err = inputOutput.Err() + assert.NoError(t, err) + + return fileInfo +} + +func Test100ElemsShuffle(t *testing.T) { + t.Skip("to rework") + tcs := map[string]struct { + filename string + outputFilename string + expectedErr error + expectedOutput []string + }{ + "100 elems": { + filename: "testdata/100elems.tsv", + expectedOutput: []string{ + "3", "4", "5", "6", "6", + "7", "7", "7", "8", "8", + "9", "9", "10", "10", "15", + "18", "18", "18", "18", "21", + "22", "22", "25", "25", "25", + "25", "25", "26", "26", "27", + "27", "28", "28", "29", "29", + "29", "30", "30", "31", "31", + "33", "33", "34", "36", "37", + "39", "39", "39", "40", "41", + "41", "42", "43", "43", "47", + "47", "49", "50", "50", "52", + "52", "53", "54", "55", "55", + "55", "56", "57", "57", "59", + "60", "61", "62", "63", "67", + "71", "71", "72", "72", "73", + "74", "75", "78", "79", "80", + "80", "82", "89", "89", "89", + "91", "91", "92", "92", "93", + "93", "94", "97", "97", "99", + }, + outputFilename: "testdata/chunks/output.tsv", + }, + } + for name, tc := range tcs { filename := tc.filename outputFilename := tc.outputFilename @@ -171,9 +507,127 @@ func TestTsvKey(t *testing.T) { expectedErr := tc.expectedErr t.Run(name, func(t *testing.T) { ctx := context.Background() - fI, chunkPaths := prepareChunks(ctx, t, allocate, filename, outputFilename, 21) - err := fI.MergeSort(chunkPaths, 10) + prepareChunksShuffle(ctx, t, filename, outputFilename, 21, false, 10, false, false, false) + outputFile, err := os.Open(outputFilename) assert.NoError(t, err) + outputScanner := bufio.NewScanner(outputFile) + count := 0 + for outputScanner.Scan() { + assert.Equal(t, expectedOutput[count], outputScanner.Text()) + count++ + } + assert.NoError(t, outputScanner.Err()) + assert.Equal(t, len(expectedOutput), count) + assert.True(t, errors.Is(err, expectedErr)) + outputFile.Close() + }) + } +} + +func Test100ElemsShuffleWithHeaders(t *testing.T) { + t.Skip("to rework") + tcs := map[string]struct { + filename string + outputFilename string + expectedErr error + expectedOutput []string + }{ + "100 elems with headers": { + filename: "testdata/100elemsWithHeaders.tsv", + expectedOutput: []string{ + "headers", "3", "4", "5", "6", "6", + "7", "7", "7", "8", "8", + "9", "9", "10", "10", "15", + "18", "18", "18", "18", "21", + "22", "22", "25", "25", "25", + "25", "25", "26", "26", "27", + "27", "28", "28", "29", "29", + "29", "30", "30", "31", "31", + "33", "33", "34", "36", "37", + "39", "39", "39", "40", "41", + "41", "42", "43", "43", "47", + "47", "49", "50", "50", "52", + "52", "53", "54", "55", "55", + "55", "56", "57", "57", "59", + "60", "61", "62", "63", "67", + "71", "71", "72", "72", "73", + "74", "75", "78", "79", "80", + "80", "82", "89", "89", "89", + "91", "91", "92", "92", "93", + "93", "94", "97", "97", "99", + }, + outputFilename: "testdata/chunks/output.tsv", + }, + } + + for name, tc := range tcs { + filename := tc.filename + outputFilename := tc.outputFilename + expectedOutput := tc.expectedOutput + expectedErr := tc.expectedErr + t.Run(name, func(t *testing.T) { + ctx := context.Background() + prepareChunksShuffle(ctx, t, filename, outputFilename, 21, false, 10, true, false, false) + outputFile, err := os.Open(outputFilename) + assert.NoError(t, err) + outputScanner := bufio.NewScanner(outputFile) + count := 0 + for outputScanner.Scan() { + assert.Equal(t, expectedOutput[count], outputScanner.Text()) + count++ + } + assert.NoError(t, outputScanner.Err()) + assert.Equal(t, len(expectedOutput), count) + assert.True(t, errors.Is(err, expectedErr)) + outputFile.Close() + }) + } +} + +func Test100ElemsShuffleGzip(t *testing.T) { + t.Skip("to rework") + tcs := map[string]struct { + filename string + outputFilename string + expectedErr error + expectedOutput []string + }{ + "100 elems with headers": { + filename: "testdata/100elems.tsv.gz", + expectedOutput: []string{ + "headers", "3", "4", "5", "6", "6", + "7", "7", "7", "8", "8", + "9", "9", "10", "10", "15", + "18", "18", "18", "18", "21", + "22", "22", "25", "25", "25", + "25", "25", "26", "26", "27", + "27", "28", "28", "29", "29", + "29", "30", "30", "31", "31", + "33", "33", "34", "36", "37", + "39", "39", "39", "40", "41", + "41", "42", "43", "43", "47", + "47", "49", "50", "50", "52", + "52", "53", "54", "55", "55", + "55", "56", "57", "57", "59", + "60", "61", "62", "63", "67", + "71", "71", "72", "72", "73", + "74", "75", "78", "79", "80", + "80", "82", "89", "89", "89", + "91", "91", "92", "92", "93", + "93", "94", "97", "97", "99", + }, + outputFilename: "testdata/chunks/output.tsv.gz", + }, + } + + for name, tc := range tcs { + filename := tc.filename + outputFilename := tc.outputFilename + expectedOutput := tc.expectedOutput + expectedErr := tc.expectedErr + t.Run(name, func(t *testing.T) { + ctx := context.Background() + prepareChunksShuffle(ctx, t, filename, outputFilename, 21, false, 10, true, false, true) outputFile, err := os.Open(outputFilename) assert.NoError(t, err) outputScanner := bufio.NewScanner(outputFile) diff --git a/reader/contract.go b/reader/contract.go new file mode 100644 index 0000000..091267a --- /dev/null +++ b/reader/contract.go @@ -0,0 +1,15 @@ +package reader + +import ( + "io" +) + +// Reader define a basic reader. +type Reader interface { + Next() bool + Read() (interface{}, error) + Err() error +} + +// Config function type to convert a io.Reader to a Reader. +type Config func(r io.Reader) (Reader, error) diff --git a/reader/gzip_separated_values.go b/reader/gzip_separated_values.go new file mode 100644 index 0000000..5ff4627 --- /dev/null +++ b/reader/gzip_separated_values.go @@ -0,0 +1,57 @@ +package reader + +import ( + "compress/gzip" + "encoding/csv" + "io" + + "github.com/pkg/errors" +) + +type GZipSeparatedValuesReader struct { + row []string + r *csv.Reader + gr *gzip.Reader + err error +} + +func NewGZipSeparatedValues(r io.Reader, separator rune) (*GZipSeparatedValuesReader, error) { + gr, err := gzip.NewReader(r) + if err != nil { + return nil, errors.Wrap(err, "can't create gzip reader") + } + + s := &GZipSeparatedValuesReader{ + gr: gr, + r: csv.NewReader(gr), + } + s.r.Comma = separator + + return s, nil +} + +func (s *GZipSeparatedValuesReader) Next() bool { + s.row, s.err = s.r.Read() + if errors.Is(s.err, io.EOF) { + s.err = nil + s.gr.Close() + + return false + } + + return true +} + +func (s *GZipSeparatedValuesReader) Read() (interface{}, error) { + if s.err != nil { + return nil, s.err + } + + return s.row, nil +} + +func (s *GZipSeparatedValuesReader) Err() error { + return s.err +} + +var _ Reader = &GZipSeparatedValuesReader{} diff --git a/reader/gzip_separated_values_test.go b/reader/gzip_separated_values_test.go new file mode 100644 index 0000000..0793a63 --- /dev/null +++ b/reader/gzip_separated_values_test.go @@ -0,0 +1,54 @@ +package reader_test + +import ( + "bufio" + "context" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/askiada/external-sort/internal/rw" + "github.com/askiada/external-sort/reader" +) + +func Test(t *testing.T) { + t.Skip("to rework") + f, err := os.Open("/mnt/c/Users/Alex/Downloads/recordings.59.tsv.gz") + require.NoError(t, err) + rder, err := reader.NewGZipSeparatedValues(bufio.NewReader(f), '\t') + require.NoError(t, err) + count := 0 + for rder.Next() { + row, err := rder.Read() + require.NoError(t, err) + _ = row + count++ + } + assert.Equal(t, 2853701, count) + require.NoError(t, rder.Err()) +} + +func TestS3(t *testing.T) { + t.Skip("to rework") + ctx := context.Background() + i := rw.NewInputOutput(ctx) + err := i.SetInputReader( + ctx, + "s3://blokur-data/ml-title/remote/1/f15c2cf2e3ab46589419e6441b64e3bd/artifacts/input/word2vec/refine/recordings.59.tsv.gz", + ) + require.NoError(t, err) + + gzipReader, err := reader.NewGZipSeparatedValues(i.Input, '\t') + require.NoError(t, err) + count := 0 + for gzipReader.Next() { + row, err := gzipReader.Read() + require.NoError(t, err) + _ = row + count++ + } + assert.Equal(t, 2853701, count) + require.NoError(t, gzipReader.Err()) +} diff --git a/reader/separated_values.go b/reader/separated_values.go new file mode 100644 index 0000000..b2948e4 --- /dev/null +++ b/reader/separated_values.go @@ -0,0 +1,46 @@ +package reader + +import ( + "encoding/csv" + "errors" + "io" +) + +type SeparatedValuesReader struct { + row []string + r *csv.Reader + err error +} + +func NewSeparatedValues(r io.Reader, separator rune) *SeparatedValuesReader { + s := &SeparatedValuesReader{ + r: csv.NewReader(r), + } + s.r.Comma = separator + + return s +} + +func (s *SeparatedValuesReader) Next() bool { + s.row, s.err = s.r.Read() + if errors.Is(s.err, io.EOF) { + s.err = nil + return false + } + + return true +} + +func (s *SeparatedValuesReader) Read() (interface{}, error) { + if s.err != nil { + return nil, s.err + } + + return s.row, nil +} + +func (s *SeparatedValuesReader) Err() error { + return s.err +} + +var _ Reader = &SeparatedValuesReader{} diff --git a/reader/std_scanner.go b/reader/std_scanner.go new file mode 100644 index 0000000..07756b5 --- /dev/null +++ b/reader/std_scanner.go @@ -0,0 +1,112 @@ +package reader + +import ( + "bufio" + "compress/gzip" + "io" + "strings" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +var logger = logrus.StandardLogger() + +type StdScanner struct { + r *bufio.Scanner + gr *gzip.Reader +} + +func NewStdScanner(r io.Reader, isGzip bool) (Reader, error) { + var ( + newR *bufio.Scanner + s = &StdScanner{} + ) + + if isGzip { + gr, err := gzip.NewReader(r) + if err != nil { + return nil, errors.Wrap(err, "can't create gzip reader") + } + + s.gr = gr + newR = bufio.NewScanner(gr) + } else { + newR = bufio.NewScanner(r) + } + + s.r = newR + + logger.Infoln("Created standard scanner") + + return s, nil +} + +func (s *StdScanner) Next() bool { + next := s.r.Scan() + if !next && s.gr != nil { + s.gr.Close() + } + + return next +} + +func (s *StdScanner) Read() (interface{}, error) { + return s.r.Text(), nil +} + +func (s *StdScanner) Err() error { + return s.r.Err() +} + +type StdSliceScanner struct { + r *bufio.Scanner + gr *gzip.Reader +} + +func NewStdSliceScanner(r io.Reader, isGzip bool) (Reader, error) { + var ( + newR *bufio.Scanner + s = &StdSliceScanner{} + ) + + if isGzip { + gr, err := gzip.NewReader(r) + if err != nil { + return nil, errors.Wrap(err, "can't create gzip reader") + } + + s.gr = gr + newR = bufio.NewScanner(gr) + } else { + newR = bufio.NewScanner(r) + } + + s.r = newR + + return s, nil +} + +func (s *StdSliceScanner) Next() bool { + next := s.r.Scan() + if !next && s.gr != nil { + s.gr.Close() + } + + return next +} + +func (s *StdSliceScanner) Read() (interface{}, error) { + line := s.r.Text() + + before, after, found := strings.Cut(line, "##!!##") + if !found { + return nil, errors.New("can't cut row") + } + + return []string{before, after}, nil +} + +func (s *StdSliceScanner) Err() error { + return s.r.Err() +} diff --git a/sftp/sftp.go b/sftp/sftp.go index 60c9de2..3e60c51 100644 --- a/sftp/sftp.go +++ b/sftp/sftp.go @@ -1,9 +1,11 @@ package sftp import ( - "io/ioutil" "log" + "os" + "path/filepath" + "github.com/pkg/errors" "github.com/pkg/sftp" "golang.org/x/crypto/ssh" ) @@ -15,36 +17,45 @@ type Client struct { func NewSFTPClient(addr, key, user, passphrase string) (*Client, error) { res := &Client{} - pemBytes, err := ioutil.ReadFile(key) + + pemBytes, err := os.ReadFile(filepath.Clean(key)) if err != nil { log.Fatal(err) } + signer, err := ssh.ParsePrivateKeyWithPassphrase(pemBytes, []byte(passphrase)) if err != nil { log.Fatalf("parse key failed:%v", err) } + config := &ssh.ClientConfig{ User: user, - HostKeyCallback: ssh.InsecureIgnoreHostKey(), //nolint + HostKeyCallback: ssh.InsecureIgnoreHostKey(), Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)}, } + conn, err := ssh.Dial("tcp", addr, config) if err != nil { - return nil, err + return nil, errors.Wrapf(err, "can't dial with address %s", addr) } + res.Conn = conn + client, err := sftp.NewClient(conn) if err != nil { - return nil, err + return nil, errors.Wrapf(err, "can't create sftp client with address %s", addr) } + res.Client = client + return res, nil } func (s *Client) Close() error { err := s.Client.Close() if err != nil { - return err + return errors.Wrap(err, "can't close client") } + return s.Conn.Close() } diff --git a/testdata/100elems.tsv.gz b/testdata/100elems.tsv.gz new file mode 100644 index 0000000..cf1b6ad Binary files /dev/null and b/testdata/100elems.tsv.gz differ diff --git a/testdata/100elemsWithHeaders.tsv b/testdata/100elemsWithHeaders.tsv new file mode 100644 index 0000000..ecfc0b9 --- /dev/null +++ b/testdata/100elemsWithHeaders.tsv @@ -0,0 +1,101 @@ +headers +5 +18 +27 +41 +6 +52 +89 +30 +39 +56 +63 +7 +22 +26 +73 +22 +55 +21 +8 +25 +40 +31 +26 +59 +57 +82 +7 +72 +4 +25 +47 +71 +61 +80 +91 +79 +25 +25 +43 +97 +25 +75 +50 +72 +29 +92 +80 +54 +89 +55 +28 +93 +43 +92 +47 +42 +71 +97 +49 +8 +93 +91 +7 +41 +74 +53 +18 +89 +50 +30 +3 +34 +62 +33 +55 +94 +10 +52 +39 +28 +60 +57 +78 +37 +67 +18 +33 +27 +9 +15 +99 +29 +10 +36 +6 +31 +39 +9 +18 +29 \ No newline at end of file diff --git a/vector/element.go b/vector/element.go index e49fb3e..3b9e768 100644 --- a/vector/element.go +++ b/vector/element.go @@ -3,8 +3,8 @@ package vector import "github.com/askiada/external-sort/vector/key" type Element struct { - Key key.Key - Line string + Key key.Key + Row interface{} } // Less returns wether v1 is smaller than v2 based on the keys. diff --git a/vector/key/int_key.go b/vector/key/int_key.go index 0744e82..6575d40 100644 --- a/vector/key/int_key.go +++ b/vector/key/int_key.go @@ -1,19 +1,67 @@ package key -import "strconv" +import ( + "strconv" + "github.com/pkg/errors" +) + +// Int define an integer key. type Int struct { value int } -func AllocateInt(line string) (Key, error) { +// AllocateInt create a new integer key. +func AllocateInt(row interface{}) (Key, error) { + line, ok := row.(string) + if !ok { + return nil, errors.Errorf("can't convert interface{} to string: %+v", row) + } + num, err := strconv.Atoi(line) if err != nil { - return nil, err + return nil, errors.Wrapf(err, "can't convert line %s to int", line) } + return &Int{num}, nil } +// Less compare two integer keys. func (k *Int) Less(other Key) bool { - return k.value < other.(*Int).value + return k.value < other.(*Int).value //nolint //forcetypeassert +} + +// Equal check tow integer keys are equal. +func (k *Int) Equal(other Key) bool { + return k.value == other.(*Int).value //nolint //forcetypeassert +} + +// IntFromSlice define an integer key from a position in a slice of integers. +type IntFromSlice struct { + value int64 +} + +// AllocateIntFromSlice create a new integer key from a position in a slice of integers. +func AllocateIntFromSlice(row interface{}, intIndex int) (Key, error) { + line, ok := row.([]string) + if !ok { + return nil, errors.Errorf("can't convert interface{} to []string: %+v", row) + } + + num, err := strconv.ParseInt(line[intIndex], 10, 64) + if err != nil { + return nil, errors.Wrapf(err, "can't parse int %+v", line[intIndex]) + } + + return &IntFromSlice{num}, nil +} + +// Less compare two integer keys. +func (k *IntFromSlice) Less(other Key) bool { + return k.value < other.(*IntFromSlice).value //nolint //forcetypeassert +} + +// Equal check tow integer keys are equal. +func (k *IntFromSlice) Equal(other Key) bool { + return k.value == other.(*IntFromSlice).value //nolint //forcetypeassert } diff --git a/vector/key/key.go b/vector/key/key.go index eb05ce1..3b45aa7 100644 --- a/vector/key/key.go +++ b/vector/key/key.go @@ -1,6 +1,8 @@ package key +// Key define the interface to compare keys to sort. type Key interface { + Equal(v2 Key) bool // Less returns wether the key is smaller than v2 Less(v2 Key) bool } diff --git a/vector/key/string_key.go b/vector/key/string_key.go index d774e0c..4d6ade8 100644 --- a/vector/key/string_key.go +++ b/vector/key/string_key.go @@ -1,13 +1,43 @@ package key +import "strings" + +// String define an string key. type String struct { value string } +// AllocateString create a new string key. func AllocateString(line string) (Key, error) { return &String{line}, nil } +// Less compare two string keys. func (k *String) Less(other Key) bool { - return k.value < other.(*String).value + return k.value < other.(*String).value //nolint //forcetypeassert +} + +// Equal check tow string keys are equal. +func (k *String) Equal(other Key) bool { + return k.value == other.(*String).value //nolint //forcetypeassert +} + +// UpperString define an string key. +type UpperString struct { + value string +} + +// AllocateString create a new string key. It trims space and change the string to uppercase. +func AllocateUpperString(line string) (Key, error) { + return &UpperString{strings.TrimSpace(strings.ToUpper(line))}, nil +} + +// Less compare two upper string keys. +func (k *UpperString) Less(other Key) bool { + return k.value < other.(*UpperString).value //nolint //forcetypeassert +} + +// Equal check tow upper string keys are equal. +func (k *UpperString) Equal(other Key) bool { + return k.value == other.(*UpperString).value //nolint //forcetypeassert } diff --git a/vector/key/tsv_key.go b/vector/key/tsv_key.go index d3d3f8e..509df39 100644 --- a/vector/key/tsv_key.go +++ b/vector/key/tsv_key.go @@ -6,10 +6,27 @@ import ( "github.com/pkg/errors" ) -func AllocateTsv(line string, pos int) (Key, error) { - splitted := strings.Split(line, "\t") - if len(splitted) < pos+1 { - return nil, errors.Errorf("can't allocate tsv key line is invalid: %s", line) +const salt = "##!##" + +func AllocateTsv(row interface{}, pos ...int) (Key, error) { + splitted, ok := row.([]string) + if !ok { + return nil, errors.Errorf("can't convert interface{} to []string: %+v", row) } - return &String{splitted[pos]}, nil + + strBuilder := strings.Builder{} + + for i, p := range pos { + if len(splitted) < p+1 { + return nil, errors.Errorf("can't allocate tsv key line is invalid: %s", row) + } + + strBuilder.WriteString(splitted[p]) + + if i < len(pos)-1 { + strBuilder.WriteString(salt) + } + } + + return &String{strBuilder.String()}, nil } diff --git a/vector/slice_vector.go b/vector/slice_vector.go index 270015d..199b147 100644 --- a/vector/slice_vector.go +++ b/vector/slice_vector.go @@ -8,7 +8,7 @@ import ( var _ Vector = &SliceVec{} -func AllocateSlice(size int, allocateKey func(line string) (key.Key, error)) Vector { +func AllocateSlice(size int, allocateKey func(row interface{}) (key.Key, error)) Vector { return &SliceVec{ allocateKey: allocateKey, s: make([]*Element, 0, size), @@ -16,7 +16,7 @@ func AllocateSlice(size int, allocateKey func(line string) (key.Key, error)) Vec } type SliceVec struct { - allocateKey func(line string) (key.Key, error) + allocateKey func(row interface{}) (key.Key, error) s []*Element } @@ -32,12 +32,19 @@ func (v *SliceVec) Len() int { return len(v.s) } -func (v *SliceVec) PushBack(line string) error { - k, err := v.allocateKey(line) +func (v *SliceVec) PushBack(row interface{}) error { + k, err := v.allocateKey(row) if err != nil { return err } - v.s = append(v.s, &Element{Line: line, Key: k}) + + v.s = append(v.s, &Element{Row: row, Key: k}) + + return nil +} + +func (v *SliceVec) PushFrontNoKey(row interface{}) error { + v.s = append([]*Element{{Row: row}}, v.s...) return nil } diff --git a/vector/vector.go b/vector/vector.go index cc5471e..9961886 100644 --- a/vector/vector.go +++ b/vector/vector.go @@ -1,30 +1,42 @@ package vector import ( - "bufio" "os" + "path/filepath" - "github.com/askiada/external-sort/vector/key" "github.com/pkg/errors" + + "github.com/askiada/external-sort/reader" + "github.com/askiada/external-sort/vector/key" + "github.com/askiada/external-sort/writer" ) +// Allocate define a vector and methods to read and write it. type Allocate struct { - Vector func(int, func(line string) (key.Key, error)) Vector - Key func(line string) (key.Key, error) + Vector func(int, func(row interface{}) (key.Key, error)) Vector + FnReader reader.Config + FnWriter writer.Config + Key func(elem interface{}) (key.Key, error) } -func DefaultVector(allocateKey func(line string) (key.Key, error)) *Allocate { +// DefaultVector define a helper function to allocate a vector. +func DefaultVector(allocateKey func(elem interface{}) (key.Key, error), fnReader reader.Config, fnWr writer.Config) *Allocate { return &Allocate{ - Vector: AllocateSlice, - Key: allocateKey, + FnReader: fnReader, + FnWriter: fnWr, + Vector: AllocateSlice, + Key: allocateKey, } } +// Vector define a basic interface to manipulate a vector. type Vector interface { // Get Access i-th element Get(i int) *Element // PushBack Add item at the end - PushBack(line string) error + PushBack(row interface{}) error + // PushFront Add item at the beginning + PushFrontNoKey(row interface{}) error // FrontShift Remove the first element FrontShift() // Len Length of the Vector @@ -35,19 +47,36 @@ type Vector interface { Sort() } -func Dump(v Vector, filename string) error { - file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644) +const writeFilePerm = 0o600 + +// Dump copy a vector to a file. +func (a *Allocate) Dump(vec Vector, filename string) error { + file, err := os.OpenFile(filepath.Clean(filename), os.O_CREATE|os.O_WRONLY, writeFilePerm) if err != nil { return errors.Errorf("failed creating file: %s", err) } - datawriter := bufio.NewWriter(file) - for i := 0; i < v.Len(); i++ { - _, err = datawriter.WriteString(v.Get(i).Line + "\n") + + datawriter, err := a.FnWriter(file) + if err != nil { + return errors.Errorf("failed creating writer: %s", err) + } + + for i := range vec.Len() { + err = datawriter.Write(vec.Get(i).Row) if err != nil { return errors.Errorf("failed writing file: %s", err) } } - datawriter.Flush() - file.Close() + + err = datawriter.Close() + if err != nil { + return errors.Wrap(err, "can't close chunk writer") + } + + err = file.Close() + if err != nil { + return errors.Wrap(err, "can't close chunf file") + } + return nil } diff --git a/writer/contract.go b/writer/contract.go new file mode 100644 index 0000000..c23fddb --- /dev/null +++ b/writer/contract.go @@ -0,0 +1,10 @@ +package writer + +import "io" + +type Writer interface { + Write(row interface{}) (err error) + Close() (err error) +} + +type Config func(w io.Writer) (Writer, error) diff --git a/writer/gzip_separated_values.go b/writer/gzip_separated_values.go new file mode 100644 index 0000000..a86a1f6 --- /dev/null +++ b/writer/gzip_separated_values.go @@ -0,0 +1,50 @@ +package writer + +import ( + "compress/gzip" + "encoding/csv" + "io" + + "github.com/pkg/errors" +) + +type GZipSeparatedValuesWriter struct { + w *csv.Writer + gw *gzip.Writer +} + +func NewGZipSeparatedValues(w io.Writer, separator rune) (Writer, error) { + gw := gzip.NewWriter(w) + s := &GZipSeparatedValuesWriter{ + gw: gw, + w: csv.NewWriter(gw), + } + s.w.Comma = separator + + return s, nil +} + +func (s *GZipSeparatedValuesWriter) Write(elem interface{}) error { + line, ok := elem.([]string) + if !ok { + return errors.Errorf("can't converte interface{} to []string: %+v", elem) + } + + err := s.w.Write(line) + if err != nil { + return errors.Wrap(err, "can't write line") + } + + return nil +} + +func (s *GZipSeparatedValuesWriter) Close() (err error) { + defer func() { err = s.gw.Close() }() + s.w.Flush() + + if s.w.Error() != nil { + return errors.Wrap(s.w.Error(), "can't close writer") + } + + return err +} diff --git a/writer/separated_values.go b/writer/separated_values.go new file mode 100644 index 0000000..0303ce2 --- /dev/null +++ b/writer/separated_values.go @@ -0,0 +1,45 @@ +package writer + +import ( + "encoding/csv" + "io" + + "github.com/pkg/errors" +) + +type SeparatedValuesWriter struct { + w *csv.Writer +} + +func NewSeparatedValues(w io.Writer, separator rune) Writer { + s := &SeparatedValuesWriter{ + w: csv.NewWriter(w), + } + s.w.Comma = separator + + return s +} + +func (s *SeparatedValuesWriter) Write(elem interface{}) error { + line, ok := elem.([]string) + if !ok { + return errors.Errorf("can't converte interface{} to []string: %+v", elem) + } + + err := s.w.Write(line) + if err != nil { + return errors.Wrap(err, "can't write line") + } + + return nil +} + +func (s *SeparatedValuesWriter) Close() error { + s.w.Flush() + + if s.w.Error() != nil { + return errors.Wrap(s.w.Error(), "can't close writer") + } + + return nil +} diff --git a/writer/std_writer.go b/writer/std_writer.go new file mode 100644 index 0000000..2398a03 --- /dev/null +++ b/writer/std_writer.go @@ -0,0 +1,106 @@ +package writer + +import ( + "bufio" + "compress/gzip" + "io" + "strings" + + "github.com/pkg/errors" +) + +// StdWriter implement writer interface with a bufio writer. +type StdWriter struct { + w *bufio.Writer +} + +// NewStdWriter create a standard writer. +func NewStdWriter(w io.Writer) Writer { + s := &StdWriter{ + w: bufio.NewWriter(w), + } + + return s +} + +func (w *StdWriter) Write(elem interface{}) error { + line, ok := elem.(string) + if !ok { + return errors.Errorf("can't converte interface{} to string: %+v", elem) + } + + _, err := w.w.WriteString(line + "\n") + if err != nil { + return errors.Wrap(err, "can't write string") + } + + return err +} + +// Close close the bufio writer. It is the responsibility of the client to close the underlying writer. +func (w *StdWriter) Close() error { + err := w.w.Flush() + if err != nil { + return errors.Wrap(err, "can't close writer") + } + + return nil +} + +type StdSliceWriter struct { + skipFirst bool + w *bufio.Writer + gw *gzip.Writer +} + +func NewStdSliceWriter(w io.Writer, skipFirst, isGzip bool) Writer { + var ( + newR *bufio.Writer + ssw = &StdSliceWriter{ + skipFirst: skipFirst, + } + ) + + if isGzip { + ssw.gw = gzip.NewWriter(w) + newR = bufio.NewWriter(ssw.gw) + } else { + newR = bufio.NewWriter(w) + } + + ssw.w = newR + + return ssw +} + +func (w *StdSliceWriter) Write(elem interface{}) error { + line, ok := elem.([]string) + if !ok { + return errors.Errorf("can't converte interface{} to string: %+v", elem) + } + + if w.skipFirst { + line = line[1:] + } + + _, err := w.w.WriteString(strings.Join(line, "##!!##") + "\n") + if err != nil { + return errors.Wrap(err, "can't write string") + } + + return err +} + +// Close close the bufio writer. It is the responsibility of the client to close the underlying writer. +func (w *StdSliceWriter) Close() (err error) { + if w.gw != nil { + defer func() { err = w.gw.Close() }() + } + + err = w.w.Flush() + if err != nil { + return errors.Wrap(err, "can't close writer") + } + + return err +}