From 9868e8f31eeb193c5a8ea3d9c012a9586ba9ef03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=B4mulo=20Saksida?= Date: Fri, 6 Mar 2026 16:58:06 -0300 Subject: [PATCH 1/6] Add s3-graphs-zip workflow for archiving CE graphs - Add script to package CE graph JSON files into zip files in S3 - Streams zip files directly to S3 with multipart upload support - Calls preconfigured webhook for notifications - Add Dockerfile for Argo / container orchestration - Add docker-compose.yml with LocalStack for integration test setup - Add workflow template - Hook up legacy CER API to Argo workflows (WIP) --- Gemfile | 1 + Gemfile.lock | 18 + app/api/entities/envelope_download.rb | 4 + app/api/v1/envelopes.rb | 5 + app/jobs/download_envelopes_job.rb | 4 +- app/services/argo_workflows_client.rb | 52 + .../submit_envelope_download_workflow.rb | 79 + .../sync_envelope_download_workflow_status.rb | 125 ++ ...o_workflow_fields_to_envelope_downloads.rb | 7 + db/structure.sql | 228 ++- lib/argo_workflows_api_client.rb | 5 + .../api/workflow_service_api.rb | 1277 +++++++++++++++++ lib/argo_workflows_api_client/api_client.rb | 388 +++++ lib/argo_workflows_api_client/api_error.rb | 57 + .../configuration.rb | 205 +++ lib/argo_workflows_api_client/version.rb | 14 + lib/swagger_docs/models.rb | 8 + scripts/generate_argo_workflows_client.sh | 59 + spec/api/v1/envelopes_spec.rb | 11 +- spec/factories/envelope_downloads.rb | 1 + spec/jobs/download_envelopes_job_spec.rb | 6 +- spec/services/download_envelopes_spec.rb | 2 + .../submit_envelope_download_workflow_spec.rb | 57 + ..._envelope_download_workflow_status_spec.rb | 107 ++ workflows/tasks/s3-graphs-zip/.dockerignore | 4 + workflows/tasks/s3-graphs-zip/.gitignore | 35 + workflows/tasks/s3-graphs-zip/Dockerfile | 13 + workflows/tasks/s3-graphs-zip/README.md | 224 +++ .../tasks/s3-graphs-zip/docker-compose.yml | 35 + workflows/tasks/s3-graphs-zip/main.py | 590 ++++++++ workflows/tasks/s3-graphs-zip/pyproject.toml | 20 + .../tests/test_integration_localstack.py | 269 ++++ .../tasks/s3-graphs-zip/tests/test_service.py | 117 ++ workflows/tasks/s3-graphs-zip/uv.lock | 162 +++ .../s3-graphs-zip-workflow-template.yaml | 88 ++ 35 files changed, 4268 insertions(+), 9 deletions(-) create mode 100644 app/services/argo_workflows_client.rb create mode 100644 app/services/submit_envelope_download_workflow.rb create mode 100644 app/services/sync_envelope_download_workflow_status.rb create mode 100644 db/migrate/20260306120000_add_argo_workflow_fields_to_envelope_downloads.rb create mode 100644 lib/argo_workflows_api_client.rb create mode 100644 lib/argo_workflows_api_client/api/workflow_service_api.rb create mode 100644 lib/argo_workflows_api_client/api_client.rb create mode 100644 lib/argo_workflows_api_client/api_error.rb create mode 100644 lib/argo_workflows_api_client/configuration.rb create mode 100644 lib/argo_workflows_api_client/version.rb create mode 100755 scripts/generate_argo_workflows_client.sh create mode 100644 spec/services/submit_envelope_download_workflow_spec.rb create mode 100644 spec/services/sync_envelope_download_workflow_status_spec.rb create mode 100644 workflows/tasks/s3-graphs-zip/.dockerignore create mode 100644 workflows/tasks/s3-graphs-zip/.gitignore create mode 100644 workflows/tasks/s3-graphs-zip/Dockerfile create mode 100644 workflows/tasks/s3-graphs-zip/README.md create mode 100644 workflows/tasks/s3-graphs-zip/docker-compose.yml create mode 100644 workflows/tasks/s3-graphs-zip/main.py create mode 100644 workflows/tasks/s3-graphs-zip/pyproject.toml create mode 100644 workflows/tasks/s3-graphs-zip/tests/test_integration_localstack.py create mode 100644 workflows/tasks/s3-graphs-zip/tests/test_service.py create mode 100644 workflows/tasks/s3-graphs-zip/uv.lock create mode 100644 workflows/templates/s3-graphs-zip-workflow-template.yaml diff --git a/Gemfile b/Gemfile index 56a4d836..040a1623 100644 --- a/Gemfile +++ b/Gemfile @@ -25,6 +25,7 @@ gem 'rake', '~> 13.2' gem 'rdoc', '~> 6.15.0' gem 'rubyzip', '~> 2.4', require: 'zip' gem 'swagger-blocks', '~> 3.0.0' +gem 'typhoeus', '~> 1.5' # Persistence gem 'activerecord-import', '~> 2.1' diff --git a/Gemfile.lock b/Gemfile.lock index 5d21bcbe..f62b5900 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -79,6 +79,7 @@ GEM benchmark (0.5.0) bigdecimal (4.0.1) builder (3.3.0) + byebug (12.0.0) childprocess (5.1.0) logger (~> 1.5) chronic (0.10.2) @@ -135,11 +136,15 @@ GEM encryptor (3.0.0) erb (6.0.2) erubi (1.13.1) + ethon (0.15.0) + ffi (>= 1.15.0) factory_bot (6.5.6) activesupport (>= 6.1.0) faker (3.6.0) i18n (>= 1.8.11, < 2) + ffi (1.17.3) ffi (1.17.3-x64-mingw-ucrt) + ffi (1.17.3-x86_64-linux-gnu) ffi-compiler (1.3.2) ffi (>= 1.15.5) rake @@ -231,6 +236,7 @@ GEM logger mime-types-data (~> 3.2025, >= 3.2025.0507) mime-types-data (3.2026.0224) + mini_portile2 (2.8.9) minitest (6.0.2) drb (~> 2.0) prism (~> 1.5) @@ -243,8 +249,13 @@ GEM netrc (0.11.0) newrelic_rpm (9.24.0) nio4r (2.7.5) + nokogiri (1.19.1) + mini_portile2 (~> 2.8.2) + racc (~> 1.4) nokogiri (1.19.1-x64-mingw-ucrt) racc (~> 1.4) + nokogiri (1.19.1-x86_64-linux-gnu) + racc (~> 1.4) ostruct (0.6.3) overcommit (0.68.0) childprocess (>= 0.6.3, < 6) @@ -257,6 +268,8 @@ GEM parser (3.3.10.2) ast (~> 2.4.1) racc + pg (1.6.3) + pg (1.6.3-x86_64-linux) pg_search (2.3.7) activerecord (>= 6.1) activesupport (>= 6.1) @@ -393,6 +406,8 @@ GEM readline sync tsort (0.2.0) + typhoeus (1.5.0) + ethon (>= 0.9.0, < 0.16.0) tzinfo (2.0.6) concurrent-ruby (~> 1.0) unicode-display_width (3.2.0) @@ -416,7 +431,9 @@ GEM zeitwerk (2.7.5) PLATFORMS + aarch64-linux x64-mingw-ucrt + x86_64-linux DEPENDENCIES activejob (= 8.0.2.1) @@ -491,6 +508,7 @@ DEPENDENCIES simplecov (>= 0.21.2) simplecov_json_formatter swagger-blocks (~> 3.0.0) + typhoeus (~> 1.5) uuid (~> 2.3) vcr (~> 6.3) virtus (~> 2.0) diff --git a/app/api/entities/envelope_download.rb b/app/api/entities/envelope_download.rb index c69c2b5d..735752e2 100644 --- a/app/api/entities/envelope_download.rb +++ b/app/api/entities/envelope_download.rb @@ -20,6 +20,10 @@ class EnvelopeDownload < Grape::Entity expose :url, documentation: { type: 'string', desc: 'AWS S3 URL' }, if: ->(object) { object.finished? } + + expose :zip_files, + documentation: { type: 'array', is_array: true, desc: 'ZIP files produced by the workflow' }, + if: ->(object) { object.finished? } end end end diff --git a/app/api/v1/envelopes.rb b/app/api/v1/envelopes.rb index cef605b7..786f41e5 100644 --- a/app/api/v1/envelopes.rb +++ b/app/api/v1/envelopes.rb @@ -12,6 +12,7 @@ require 'v1/revisions' require 'v1/envelope_events' require 'download_envelopes_job' +require 'sync_envelope_download_workflow_status' module API module V1 @@ -79,6 +80,10 @@ class Envelopes < MountableAPI desc 'Returns the envelope download' get do + SyncEnvelopeDownloadWorkflowStatus.call( + envelope_download: @envelope_download + ) + present @envelope_download, with: API::Entities::EnvelopeDownload end diff --git a/app/jobs/download_envelopes_job.rb b/app/jobs/download_envelopes_job.rb index b330c82d..559aa0c6 100644 --- a/app/jobs/download_envelopes_job.rb +++ b/app/jobs/download_envelopes_job.rb @@ -1,4 +1,4 @@ -require 'download_envelopes' +require 'submit_envelope_download_workflow' require 'envelope_download' # Create a ZIP archive contaning all of the envelopes from a certain community, @@ -10,7 +10,7 @@ def perform(envelope_download_id) envelope_download = EnvelopeDownload.find_by(id: envelope_download_id) return unless envelope_download - DownloadEnvelopes.call(envelope_download:) + SubmitEnvelopeDownloadWorkflow.call(envelope_download:) rescue StandardError => e Airbrake.notify(e, envelope_download_id:) raise e diff --git a/app/services/argo_workflows_client.rb b/app/services/argo_workflows_client.rb new file mode 100644 index 00000000..d1a22dc3 --- /dev/null +++ b/app/services/argo_workflows_client.rb @@ -0,0 +1,52 @@ +require 'argo_workflows_api_client' +require 'uri' + +class ArgoWorkflowsClient + attr_reader :namespace + + def initialize(configuration: build_configuration) + @namespace = ENV.fetch('ARGO_WORKFLOWS_NAMESPACE') + @workflow_service_api = ArgoWorkflowsApiClient::WorkflowServiceApi.new( + ArgoWorkflowsApiClient::ApiClient.new(configuration) + ) + end + + def get_workflow(name:) + @workflow_service_api.workflow_service_get_workflow( + namespace, + name, + return_type: 'Object' + ) + end + + def submit_workflow(template_name:, parameters:, generate_name:) + @workflow_service_api.workflow_service_submit_workflow( + { + namespace:, + resourceKind: 'WorkflowTemplate', + resourceName: template_name, + submitOptions: { + generateName:, + parameters: parameters.map { |key, value| "#{key}=#{value}" } + } + }, + namespace, + return_type: 'Object' + ) + end + + private + + def build_configuration + base_uri = URI.parse(ENV.fetch('ARGO_WORKFLOWS_BASE_URL')) + + ArgoWorkflowsApiClient::Configuration.new.tap do |config| + config.scheme = base_uri.scheme + config.host = [base_uri.host, base_uri.port].compact.join(':') + config.base_path = base_uri.path + config.api_key['Authorization'] = ENV.fetch('ARGO_WORKFLOWS_TOKEN') + config.api_key_prefix['Authorization'] = 'Bearer' + config.timeout = ENV.fetch('ARGO_WORKFLOWS_TIMEOUT_SECONDS', 30).to_i + end + end +end diff --git a/app/services/submit_envelope_download_workflow.rb b/app/services/submit_envelope_download_workflow.rb new file mode 100644 index 00000000..d897f093 --- /dev/null +++ b/app/services/submit_envelope_download_workflow.rb @@ -0,0 +1,79 @@ +require 'argo_workflows_client' + +class SubmitEnvelopeDownloadWorkflow + def self.call(envelope_download:) + new(envelope_download).call + end + + attr_reader :envelope_download + + def initialize(envelope_download) + @envelope_download = envelope_download + end + + def call + workflow = client.submit_workflow( + template_name: ENV.fetch('ARGO_WORKFLOWS_TEMPLATE_NAME'), + generate_name: "#{community_name.tr('_', '-')}-download-", + parameters: + ) + workflow_name = workflow.dig(:metadata, :name) + raise 'Argo workflow submission did not return a workflow name' if workflow_name.blank? + + envelope_download.update!( + argo_workflow_name: workflow_name, + argo_workflow_namespace: client.namespace, + finished_at: nil, + internal_error_backtrace: [], + internal_error_message: nil, + started_at: Time.current, + status: :in_progress, + zip_files: [], + url: nil + ) + rescue StandardError => e + envelope_download.update!( + argo_workflow_name: nil, + argo_workflow_namespace: nil, + finished_at: Time.current, + internal_error_backtrace: Array(e.backtrace), + internal_error_message: e.message, + status: :finished, + zip_files: [], + url: nil + ) + raise + end + + private + + def client + @client ||= ArgoWorkflowsClient.new + end + + def community_name + envelope_download.envelope_community.name + end + + def destination_prefix + "#{community_name}/downloads/#{envelope_download.id}" + end + + def parameters + { + 'batch-size' => ENV.fetch('ARGO_WORKFLOWS_BATCH_SIZE', '1000'), + 'aws-region' => ENV.fetch('AWS_REGION'), + 'destination-bucket' => ENV.fetch('ENVELOPE_DOWNLOADS_BUCKET'), + 'destination-prefix' => destination_prefix, + 'environment' => MR.env, + 'max-uncompressed-zip-size-bytes' => ENV.fetch( + 'ARGO_WORKFLOWS_MAX_UNCOMPRESSED_ZIP_SIZE_BYTES', + (200 * 1024 * 1024).to_s + ), + 'max-workers' => ENV.fetch('ARGO_WORKFLOWS_MAX_WORKERS', '4'), + 'source-bucket' => ENV.fetch('ENVELOPE_GRAPHS_BUCKET'), + 'source-prefix' => community_name, + 'task-image' => ENV.fetch('ARGO_WORKFLOWS_TASK_IMAGE') + } + end +end diff --git a/app/services/sync_envelope_download_workflow_status.rb b/app/services/sync_envelope_download_workflow_status.rb new file mode 100644 index 00000000..1d5dea30 --- /dev/null +++ b/app/services/sync_envelope_download_workflow_status.rb @@ -0,0 +1,125 @@ +require 'argo_workflows_client' +require 'aws-sdk-s3' +require 'json' + +class SyncEnvelopeDownloadWorkflowStatus + SUCCESS_PHASE = 'Succeeded'.freeze + FAILURE_PHASES = %w[Error Failed].freeze + RUNNING_PHASE = 'Running'.freeze + + def self.call(envelope_download:) + new(envelope_download).call + end + + attr_reader :envelope_download + + def initialize(envelope_download) + @envelope_download = envelope_download + end + + def call + return envelope_download if envelope_download.argo_workflow_name.blank? + return envelope_download if envelope_download.finished? && envelope_download.zip_files.present? + + workflow = client.get_workflow(name: envelope_download.argo_workflow_name) + status = workflow.fetch(:status, {}) + phase = status[:phase] + + if phase == SUCCESS_PHASE + mark_success!(status) + elsif FAILURE_PHASES.include?(phase) + mark_failure!(status) + elsif phase == RUNNING_PHASE + mark_in_progress!(status) + end + + envelope_download + rescue ArgoWorkflowsApiClient::ApiError => e + MR.logger.warn("Unable to sync Argo workflow #{envelope_download.argo_workflow_name}: #{e.message}") + envelope_download + end + + private + + def client + @client ||= ArgoWorkflowsClient.new + end + + def community_name + envelope_download.envelope_community.name + end + + def destination_bucket + ENV.fetch('ENVELOPE_DOWNLOADS_BUCKET') + end + + def mark_failure!(status) + envelope_download.update!( + finished_at: parse_time(status[:finishedAt]) || Time.current, + internal_error_backtrace: [], + internal_error_message: status[:message] || "Argo workflow #{status[:phase].to_s.downcase}", + status: :finished, + zip_files: [], + url: nil + ) + end + + def mark_in_progress!(status) + envelope_download.update!( + started_at: parse_time(status[:startedAt]) || envelope_download.started_at || Time.current, + status: :in_progress + ) + end + + def mark_success!(status) + manifest = output_manifest(status) + zip_files = manifest.fetch('zip_files', []) + + if zip_files.present? + envelope_download.update!( + finished_at: parse_time(status[:finishedAt]) || Time.current, + internal_error_backtrace: [], + internal_error_message: nil, + status: :finished, + url: public_url_for(zip_files.first), + zip_files: + ) + else + envelope_download.update!( + finished_at: parse_time(status[:finishedAt]) || Time.current, + internal_error_backtrace: [], + internal_error_message: 'Argo workflow succeeded but did not return any ZIP files', + status: :finished, + zip_files: [], + url: nil + ) + end + end + + def parse_time(value) + Time.zone.parse(value) if value.present? + end + + def s3_client + @s3_client ||= Aws::S3::Client.new(region: ENV.fetch('AWS_REGION')) + end + + def output_manifest(status) + parameter = status.fetch(:outputs, {}) + .fetch(:parameters, []) + .find { |item| item[:name] == 'zip-manifest' } + return {} unless parameter + + JSON.parse(parameter.fetch(:value)) + end + + def public_url_for(key) + s3_client.head_object(bucket: destination_bucket, key:) + Aws::S3::Resource.new(region: ENV.fetch('AWS_REGION')) + .bucket(destination_bucket) + .object(key) + .public_url + rescue Aws::S3::Errors::NotFound, Aws::S3::Errors::NoSuchKey + nil + end +end diff --git a/db/migrate/20260306120000_add_argo_workflow_fields_to_envelope_downloads.rb b/db/migrate/20260306120000_add_argo_workflow_fields_to_envelope_downloads.rb new file mode 100644 index 00000000..bbc35fec --- /dev/null +++ b/db/migrate/20260306120000_add_argo_workflow_fields_to_envelope_downloads.rb @@ -0,0 +1,7 @@ +class AddArgoWorkflowFieldsToEnvelopeDownloads < ActiveRecord::Migration[8.0] + def change + add_column :envelope_downloads, :argo_workflow_name, :string + add_column :envelope_downloads, :argo_workflow_namespace, :string + add_column :envelope_downloads, :zip_files, :jsonb, default: [], null: false + end +end diff --git a/db/structure.sql b/db/structure.sql index fb410fb0..c457398b 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -1,3 +1,8 @@ +\restrict RlhAm1PKGOPdsOZNMphz6iO9DdiuUyVV63UMT76QE4hhnh8DEXySEcXHsCn7v5q + +-- Dumped from database version 16.13 (Debian 16.13-1.pgdg13+1) +-- Dumped by pg_dump version 16.13 + SET statement_timeout = 0; SET lock_timeout = 0; SET idle_in_transaction_session_timeout = 0; @@ -326,7 +331,10 @@ CREATE TABLE public.envelope_downloads ( created_at timestamp(6) without time zone NOT NULL, updated_at timestamp(6) without time zone NOT NULL, status character varying DEFAULT 'pending'::character varying NOT NULL, - enqueued_at timestamp(6) without time zone + enqueued_at timestamp(6) without time zone, + argo_workflow_name character varying, + argo_workflow_namespace character varying, + zip_files jsonb DEFAULT '[]'::jsonb NOT NULL ); @@ -508,7 +516,24 @@ CREATE TABLE public.indexed_envelope_resources ( payload jsonb DEFAULT '"{}"'::jsonb NOT NULL, public_record boolean DEFAULT true NOT NULL, "search:resourcePublishType" character varying, - publication_status integer DEFAULT 0 NOT NULL + publication_status integer DEFAULT 0 NOT NULL, + "ceterms:name" character varying, + "rdfs:label" character varying, + "rdfs:label_en" character varying, + "rdfs:label_es" character varying, + "skos:note" character varying, + "skos:note_fr_us" character varying, + "skos:note_nl_nl" character varying, + "ceterms:globalJurisdiction" boolean[] DEFAULT '{}'::boolean[] NOT NULL, + "ceterms:temporalCoverage" date[] DEFAULT '{}'::date[] NOT NULL, + "ceterms:startTime" timestamp(6) without time zone[] DEFAULT '{}'::timestamp without time zone[] NOT NULL, + "ceterms:weight" double precision[] DEFAULT '{}'::double precision[] NOT NULL, + "ceterms:medianEarnings" integer[] DEFAULT '{}'::integer[] NOT NULL, + "ceterms:inLanguage" character varying[] DEFAULT '{}'::character varying[] NOT NULL, + "ceterms:email" character varying, + "ceterms:telephone" character varying, + "ceterms:contactType" character varying, + "ceterms:contactType_en" character varying ); @@ -1136,6 +1161,34 @@ CREATE INDEX envelope_resources_fts_trigram_idx ON public.envelope_resources USI CREATE INDEX envelopes_resources_id_idx ON public.envelopes USING btree (((processed_resource ->> '@id'::text))); +-- +-- Name: i_ctdl_ceterms_contactType_en_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX "i_ctdl_ceterms_contactType_en_fts" ON public.indexed_envelope_resources USING gin (to_tsvector('english'::regconfig, translate(("ceterms:contactType_en")::text, '/.'::text, ' '::text))); + + +-- +-- Name: i_ctdl_ceterms_contactType_en_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX "i_ctdl_ceterms_contactType_en_trgm" ON public.indexed_envelope_resources USING gin ("ceterms:contactType_en" public.gin_trgm_ops); + + +-- +-- Name: i_ctdl_ceterms_contactType_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX "i_ctdl_ceterms_contactType_fts" ON public.indexed_envelope_resources USING gin (to_tsvector('english'::regconfig, translate(("ceterms:contactType")::text, '/.'::text, ' '::text))); + + +-- +-- Name: i_ctdl_ceterms_contactType_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX "i_ctdl_ceterms_contactType_trgm" ON public.indexed_envelope_resources USING gin ("ceterms:contactType" public.gin_trgm_ops); + + -- -- Name: i_ctdl_ceterms_ctid; Type: INDEX; Schema: public; Owner: - -- @@ -1150,6 +1203,90 @@ CREATE UNIQUE INDEX i_ctdl_ceterms_ctid ON public.indexed_envelope_resources USI CREATE INDEX i_ctdl_ceterms_ctid_trgm ON public.indexed_envelope_resources USING gin ("ceterms:ctid" public.gin_trgm_ops); +-- +-- Name: i_ctdl_ceterms_email_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_ceterms_email_fts ON public.indexed_envelope_resources USING gin (to_tsvector('english'::regconfig, translate(("ceterms:email")::text, '/.'::text, ' '::text))); + + +-- +-- Name: i_ctdl_ceterms_email_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_ceterms_email_trgm ON public.indexed_envelope_resources USING gin ("ceterms:email" public.gin_trgm_ops); + + +-- +-- Name: i_ctdl_ceterms_globalJurisdiction; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX "i_ctdl_ceterms_globalJurisdiction" ON public.indexed_envelope_resources USING gin ("ceterms:globalJurisdiction"); + + +-- +-- Name: i_ctdl_ceterms_inLanguage; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX "i_ctdl_ceterms_inLanguage" ON public.indexed_envelope_resources USING gin ("ceterms:inLanguage"); + + +-- +-- Name: i_ctdl_ceterms_medianEarnings; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX "i_ctdl_ceterms_medianEarnings" ON public.indexed_envelope_resources USING gin ("ceterms:medianEarnings"); + + +-- +-- Name: i_ctdl_ceterms_name_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_ceterms_name_fts ON public.indexed_envelope_resources USING gin (to_tsvector('english'::regconfig, translate(("ceterms:name")::text, '/.'::text, ' '::text))); + + +-- +-- Name: i_ctdl_ceterms_name_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_ceterms_name_trgm ON public.indexed_envelope_resources USING gin ("ceterms:name" public.gin_trgm_ops); + + +-- +-- Name: i_ctdl_ceterms_startTime; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX "i_ctdl_ceterms_startTime" ON public.indexed_envelope_resources USING gin ("ceterms:startTime"); + + +-- +-- Name: i_ctdl_ceterms_telephone_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_ceterms_telephone_fts ON public.indexed_envelope_resources USING gin (to_tsvector('english'::regconfig, translate(("ceterms:telephone")::text, '/.'::text, ' '::text))); + + +-- +-- Name: i_ctdl_ceterms_telephone_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_ceterms_telephone_trgm ON public.indexed_envelope_resources USING gin ("ceterms:telephone" public.gin_trgm_ops); + + +-- +-- Name: i_ctdl_ceterms_temporalCoverage; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX "i_ctdl_ceterms_temporalCoverage" ON public.indexed_envelope_resources USING gin ("ceterms:temporalCoverage"); + + +-- +-- Name: i_ctdl_ceterms_weight; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_ceterms_weight ON public.indexed_envelope_resources USING gin ("ceterms:weight"); + + -- -- Name: i_ctdl_envelope_resource_id; Type: INDEX; Schema: public; Owner: - -- @@ -1178,6 +1315,48 @@ CREATE INDEX i_ctdl_id_trgm ON public.indexed_envelope_resources USING gin ("@id CREATE INDEX i_ctdl_public_record ON public.indexed_envelope_resources USING btree (public_record); +-- +-- Name: i_ctdl_rdfs_label_en_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_rdfs_label_en_fts ON public.indexed_envelope_resources USING gin (to_tsvector('english'::regconfig, translate(("rdfs:label_en")::text, '/.'::text, ' '::text))); + + +-- +-- Name: i_ctdl_rdfs_label_en_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_rdfs_label_en_trgm ON public.indexed_envelope_resources USING gin ("rdfs:label_en" public.gin_trgm_ops); + + +-- +-- Name: i_ctdl_rdfs_label_es_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_rdfs_label_es_fts ON public.indexed_envelope_resources USING gin (to_tsvector('spanish'::regconfig, translate(("rdfs:label_es")::text, '/.'::text, ' '::text))); + + +-- +-- Name: i_ctdl_rdfs_label_es_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_rdfs_label_es_trgm ON public.indexed_envelope_resources USING gin ("rdfs:label_es" public.gin_trgm_ops); + + +-- +-- Name: i_ctdl_rdfs_label_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_rdfs_label_fts ON public.indexed_envelope_resources USING gin (to_tsvector('english'::regconfig, translate(("rdfs:label")::text, '/.'::text, ' '::text))); + + +-- +-- Name: i_ctdl_rdfs_label_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_rdfs_label_trgm ON public.indexed_envelope_resources USING gin ("rdfs:label" public.gin_trgm_ops); + + -- -- Name: i_ctdl_search_ownedBy; Type: INDEX; Schema: public; Owner: - -- @@ -1227,6 +1406,48 @@ CREATE INDEX "i_ctdl_search_recordUpdated_desc" ON public.indexed_envelope_resou CREATE INDEX "i_ctdl_search_resourcePublishType" ON public.indexed_envelope_resources USING btree ("search:resourcePublishType"); +-- +-- Name: i_ctdl_skos_note_fr_us_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_skos_note_fr_us_fts ON public.indexed_envelope_resources USING gin (to_tsvector('french'::regconfig, translate(("skos:note_fr_us")::text, '/.'::text, ' '::text))); + + +-- +-- Name: i_ctdl_skos_note_fr_us_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_skos_note_fr_us_trgm ON public.indexed_envelope_resources USING gin ("skos:note_fr_us" public.gin_trgm_ops); + + +-- +-- Name: i_ctdl_skos_note_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_skos_note_fts ON public.indexed_envelope_resources USING gin (to_tsvector('english'::regconfig, translate(("skos:note")::text, '/.'::text, ' '::text))); + + +-- +-- Name: i_ctdl_skos_note_nl_nl_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_skos_note_nl_nl_fts ON public.indexed_envelope_resources USING gin (to_tsvector('dutch'::regconfig, translate(("skos:note_nl_nl")::text, '/.'::text, ' '::text))); + + +-- +-- Name: i_ctdl_skos_note_nl_nl_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_skos_note_nl_nl_trgm ON public.indexed_envelope_resources USING gin ("skos:note_nl_nl" public.gin_trgm_ops); + + +-- +-- Name: i_ctdl_skos_note_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX i_ctdl_skos_note_trgm ON public.indexed_envelope_resources USING gin ("skos:note" public.gin_trgm_ops); + + -- -- Name: i_ctdl_type; Type: INDEX; Schema: public; Owner: - -- @@ -1828,10 +2049,13 @@ ALTER TABLE ONLY public.envelopes -- PostgreSQL database dump complete -- +\unrestrict RlhAm1PKGOPdsOZNMphz6iO9DdiuUyVV63UMT76QE4hhnh8DEXySEcXHsCn7v5q + SET search_path TO "$user", public; INSERT INTO "schema_migrations" (version) VALUES ('20260310005238'), +('20260306120000'), ('20251022205617'), ('20250925025616'), ('20250922224518'), diff --git a/lib/argo_workflows_api_client.rb b/lib/argo_workflows_api_client.rb new file mode 100644 index 00000000..28632c9b --- /dev/null +++ b/lib/argo_workflows_api_client.rb @@ -0,0 +1,5 @@ +require 'argo_workflows_api_client/api_client' +require 'argo_workflows_api_client/api_error' +require 'argo_workflows_api_client/version' +require 'argo_workflows_api_client/configuration' +require 'argo_workflows_api_client/api/workflow_service_api' diff --git a/lib/argo_workflows_api_client/api/workflow_service_api.rb b/lib/argo_workflows_api_client/api/workflow_service_api.rb new file mode 100644 index 00000000..24cdf4f6 --- /dev/null +++ b/lib/argo_workflows_api_client/api/workflow_service_api.rb @@ -0,0 +1,1277 @@ +=begin +#Argo Workflows API + +#Argo Workflows is an open source container-native workflow engine for orchestrating parallel jobs on Kubernetes. For more information, please see https://argo-workflows.readthedocs.io/en/latest/ + +OpenAPI spec version: VERSION + +Generated by: https://github.com/swagger-api/swagger-codegen.git +Swagger Codegen version: 3.0.78 +=end + +module ArgoWorkflowsApiClient + class WorkflowServiceApi + attr_accessor :api_client + + def initialize(api_client = ApiClient.default) + @api_client = api_client + end + # @param body + # @param namespace + # @param [Hash] opts the optional parameters + # @return [IoArgoprojWorkflowV1alpha1Workflow] + def workflow_service_create_workflow(body, namespace, opts = {}) + data, _status_code, _headers = workflow_service_create_workflow_with_http_info(body, namespace, opts) + data + end + + # @param body + # @param namespace + # @param [Hash] opts the optional parameters + # @return [Array<(IoArgoprojWorkflowV1alpha1Workflow, Integer, Hash)>] IoArgoprojWorkflowV1alpha1Workflow data, response status code and response headers + def workflow_service_create_workflow_with_http_info(body, namespace, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_create_workflow ...' + end + # verify the required parameter 'body' is set + if @api_client.config.client_side_validation && body.nil? + fail ArgumentError, "Missing the required parameter 'body' when calling WorkflowServiceApi.workflow_service_create_workflow" + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_create_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}'.sub('{' + 'namespace' + '}', namespace.to_s) + + # query parameters + query_params = opts[:query_params] || {} + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + # HTTP header 'Content-Type' + header_params['Content-Type'] = @api_client.select_header_content_type(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] || @api_client.object_to_http_body(body) + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1Workflow' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:POST, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_create_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @option opts [String] :delete_options_grace_period_seconds The duration in seconds before the object should be deleted. Value must be non-negative integer. The value zero indicates delete immediately. If this value is nil, the default grace period for the specified type will be used. Defaults to a per object value if not specified. zero means delete immediately. +optional. + # @option opts [String] :delete_options_preconditions_uid Specifies the target UID. +optional. + # @option opts [String] :delete_options_preconditions_resource_version Specifies the target ResourceVersion +optional. + # @option opts [BOOLEAN] :delete_options_orphan_dependents Deprecated: please use the PropagationPolicy, this field will be deprecated in 1.7. Should the dependent objects be orphaned. If true/false, the \"orphan\" finalizer will be added to/removed from the object's finalizers list. Either this field or PropagationPolicy may be set, but not both. +optional. + # @option opts [String] :delete_options_propagation_policy Whether and how garbage collection will be performed. Either this field or OrphanDependents may be set, but not both. The default policy is decided by the existing finalizer set in the metadata.finalizers and the resource-specific default policy. Acceptable values are: 'Orphan' - orphan the dependents; 'Background' - allow the garbage collector to delete the dependents in the background; 'Foreground' - a cascading policy that deletes all dependents in the foreground. +optional. + # @option opts [Array] :delete_options_dry_run When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed +optional +listType=atomic. + # @option opts [BOOLEAN] :delete_options_ignore_store_read_error_with_cluster_breaking_potential if set to true, it will trigger an unsafe deletion of the resource in case the normal deletion flow fails with a corrupt object error. A resource is considered corrupt if it can not be retrieved from the underlying storage successfully because of a) its data can not be transformed e.g. decryption failure, or b) it fails to decode into an object. NOTE: unsafe deletion ignores finalizer constraints, skips precondition checks, and removes the object from the storage. WARNING: This may potentially break the cluster if the workload associated with the resource being unsafe-deleted relies on normal deletion flow. Use only if you REALLY know what you are doing. The default value is false, and the user must opt in to enable it +optional. + # @option opts [BOOLEAN] :force + # @return [IoArgoprojWorkflowV1alpha1WorkflowDeleteResponse] + def workflow_service_delete_workflow(namespace, name, opts = {}) + data, _status_code, _headers = workflow_service_delete_workflow_with_http_info(namespace, name, opts) + data + end + + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @option opts [String] :delete_options_grace_period_seconds The duration in seconds before the object should be deleted. Value must be non-negative integer. The value zero indicates delete immediately. If this value is nil, the default grace period for the specified type will be used. Defaults to a per object value if not specified. zero means delete immediately. +optional. + # @option opts [String] :delete_options_preconditions_uid Specifies the target UID. +optional. + # @option opts [String] :delete_options_preconditions_resource_version Specifies the target ResourceVersion +optional. + # @option opts [BOOLEAN] :delete_options_orphan_dependents Deprecated: please use the PropagationPolicy, this field will be deprecated in 1.7. Should the dependent objects be orphaned. If true/false, the \"orphan\" finalizer will be added to/removed from the object's finalizers list. Either this field or PropagationPolicy may be set, but not both. +optional. + # @option opts [String] :delete_options_propagation_policy Whether and how garbage collection will be performed. Either this field or OrphanDependents may be set, but not both. The default policy is decided by the existing finalizer set in the metadata.finalizers and the resource-specific default policy. Acceptable values are: 'Orphan' - orphan the dependents; 'Background' - allow the garbage collector to delete the dependents in the background; 'Foreground' - a cascading policy that deletes all dependents in the foreground. +optional. + # @option opts [Array] :delete_options_dry_run When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed +optional +listType=atomic. + # @option opts [BOOLEAN] :delete_options_ignore_store_read_error_with_cluster_breaking_potential if set to true, it will trigger an unsafe deletion of the resource in case the normal deletion flow fails with a corrupt object error. A resource is considered corrupt if it can not be retrieved from the underlying storage successfully because of a) its data can not be transformed e.g. decryption failure, or b) it fails to decode into an object. NOTE: unsafe deletion ignores finalizer constraints, skips precondition checks, and removes the object from the storage. WARNING: This may potentially break the cluster if the workload associated with the resource being unsafe-deleted relies on normal deletion flow. Use only if you REALLY know what you are doing. The default value is false, and the user must opt in to enable it +optional. + # @option opts [BOOLEAN] :force + # @return [Array<(IoArgoprojWorkflowV1alpha1WorkflowDeleteResponse, Integer, Hash)>] IoArgoprojWorkflowV1alpha1WorkflowDeleteResponse data, response status code and response headers + def workflow_service_delete_workflow_with_http_info(namespace, name, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_delete_workflow ...' + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_delete_workflow" + end + # verify the required parameter 'name' is set + if @api_client.config.client_side_validation && name.nil? + fail ArgumentError, "Missing the required parameter 'name' when calling WorkflowServiceApi.workflow_service_delete_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/{name}'.sub('{' + 'namespace' + '}', namespace.to_s).sub('{' + 'name' + '}', name.to_s) + + # query parameters + query_params = opts[:query_params] || {} + query_params[:'deleteOptions.gracePeriodSeconds'] = opts[:'delete_options_grace_period_seconds'] if !opts[:'delete_options_grace_period_seconds'].nil? + query_params[:'deleteOptions.preconditions.uid'] = opts[:'delete_options_preconditions_uid'] if !opts[:'delete_options_preconditions_uid'].nil? + query_params[:'deleteOptions.preconditions.resourceVersion'] = opts[:'delete_options_preconditions_resource_version'] if !opts[:'delete_options_preconditions_resource_version'].nil? + query_params[:'deleteOptions.orphanDependents'] = opts[:'delete_options_orphan_dependents'] if !opts[:'delete_options_orphan_dependents'].nil? + query_params[:'deleteOptions.propagationPolicy'] = opts[:'delete_options_propagation_policy'] if !opts[:'delete_options_propagation_policy'].nil? + query_params[:'deleteOptions.dryRun'] = @api_client.build_collection_param(opts[:'delete_options_dry_run'], :multi) if !opts[:'delete_options_dry_run'].nil? + query_params[:'deleteOptions.ignoreStoreReadErrorWithClusterBreakingPotential'] = opts[:'delete_options_ignore_store_read_error_with_cluster_breaking_potential'] if !opts[:'delete_options_ignore_store_read_error_with_cluster_breaking_potential'].nil? + query_params[:'force'] = opts[:'force'] if !opts[:'force'].nil? + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1WorkflowDeleteResponse' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:DELETE, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_delete_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @option opts [String] :get_options_resource_version resourceVersion sets a constraint on what resource versions a request may be served from. See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :fields Fields to be included or excluded in the response. e.g. \"spec,status.phase\", \"-status.nodes\". + # @option opts [String] :uid Optional UID to retrieve a specific workflow (useful for archived workflows with the same name). + # @return [IoArgoprojWorkflowV1alpha1Workflow] + def workflow_service_get_workflow(namespace, name, opts = {}) + data, _status_code, _headers = workflow_service_get_workflow_with_http_info(namespace, name, opts) + data + end + + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @option opts [String] :get_options_resource_version resourceVersion sets a constraint on what resource versions a request may be served from. See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :fields Fields to be included or excluded in the response. e.g. \"spec,status.phase\", \"-status.nodes\". + # @option opts [String] :uid Optional UID to retrieve a specific workflow (useful for archived workflows with the same name). + # @return [Array<(IoArgoprojWorkflowV1alpha1Workflow, Integer, Hash)>] IoArgoprojWorkflowV1alpha1Workflow data, response status code and response headers + def workflow_service_get_workflow_with_http_info(namespace, name, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_get_workflow ...' + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_get_workflow" + end + # verify the required parameter 'name' is set + if @api_client.config.client_side_validation && name.nil? + fail ArgumentError, "Missing the required parameter 'name' when calling WorkflowServiceApi.workflow_service_get_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/{name}'.sub('{' + 'namespace' + '}', namespace.to_s).sub('{' + 'name' + '}', name.to_s) + + # query parameters + query_params = opts[:query_params] || {} + query_params[:'getOptions.resourceVersion'] = opts[:'get_options_resource_version'] if !opts[:'get_options_resource_version'].nil? + query_params[:'fields'] = opts[:'fields'] if !opts[:'fields'].nil? + query_params[:'uid'] = opts[:'uid'] if !opts[:'uid'].nil? + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1Workflow' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:GET, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_get_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param body + # @param namespace + # @param [Hash] opts the optional parameters + # @return [IoArgoprojWorkflowV1alpha1Workflow] + def workflow_service_lint_workflow(body, namespace, opts = {}) + data, _status_code, _headers = workflow_service_lint_workflow_with_http_info(body, namespace, opts) + data + end + + # @param body + # @param namespace + # @param [Hash] opts the optional parameters + # @return [Array<(IoArgoprojWorkflowV1alpha1Workflow, Integer, Hash)>] IoArgoprojWorkflowV1alpha1Workflow data, response status code and response headers + def workflow_service_lint_workflow_with_http_info(body, namespace, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_lint_workflow ...' + end + # verify the required parameter 'body' is set + if @api_client.config.client_side_validation && body.nil? + fail ArgumentError, "Missing the required parameter 'body' when calling WorkflowServiceApi.workflow_service_lint_workflow" + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_lint_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/lint'.sub('{' + 'namespace' + '}', namespace.to_s) + + # query parameters + query_params = opts[:query_params] || {} + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + # HTTP header 'Content-Type' + header_params['Content-Type'] = @api_client.select_header_content_type(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] || @api_client.object_to_http_body(body) + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1Workflow' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:POST, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_lint_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param namespace + # @param [Hash] opts the optional parameters + # @option opts [String] :list_options_label_selector A selector to restrict the list of returned objects by their labels. Defaults to everything. +optional. + # @option opts [String] :list_options_field_selector A selector to restrict the list of returned objects by their fields. Defaults to everything. +optional. + # @option opts [BOOLEAN] :list_options_watch Watch for changes to the described resources and return them as a stream of add, update, and remove notifications. Specify resourceVersion. +optional. + # @option opts [BOOLEAN] :list_options_allow_watch_bookmarks allowWatchBookmarks requests watch events with type \"BOOKMARK\". Servers that do not implement bookmarks may ignore this flag and bookmarks are sent at the server's discretion. Clients should not assume bookmarks are returned at any specific interval, nor may they assume the server will send any BOOKMARK event during a session. If this is not a watch, this field is ignored. +optional. + # @option opts [String] :list_options_resource_version resourceVersion sets a constraint on what resource versions a request may be served from. See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_resource_version_match resourceVersionMatch determines how resourceVersion is applied to list calls. It is highly recommended that resourceVersionMatch be set for list calls where resourceVersion is set See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_timeout_seconds Timeout for the list/watch call. This limits the duration of the call, regardless of any activity or inactivity. +optional. + # @option opts [String] :list_options_limit limit is a maximum number of responses to return for a list call. If more items exist, the server will set the `continue` field on the list metadata to a value that can be used with the same initial query to retrieve the next set of results. Setting a limit may return fewer than the requested amount of items (up to zero items) in the event all requested objects are filtered out and clients should only use the presence of the continue field to determine whether more results are available. Servers may choose not to support the limit argument and will return all of the available results. If limit is specified and the continue field is empty, clients may assume that no more results are available. This field is not supported if watch is true. The server guarantees that the objects returned when using continue will be identical to issuing a single list call without a limit - that is, no objects created, modified, or deleted after the first request is issued will be included in any subsequent continued requests. This is sometimes referred to as a consistent snapshot, and ensures that a client that is using limit to receive smaller chunks of a very large result can ensure they see all possible objects. If objects are updated during a chunked list the version of the object that was present at the time the first list result was calculated is returned. + # @option opts [String] :list_options_continue The continue option should be set when retrieving more results from the server. Since this value is server defined, clients may only use the continue value from a previous query result with identical query parameters (except for the value of continue) and the server may reject a continue value it does not recognize. If the specified continue value is no longer valid whether due to expiration (generally five to fifteen minutes) or a configuration change on the server, the server will respond with a 410 ResourceExpired error together with a continue token. If the client needs a consistent list, it must restart their list without the continue field. Otherwise, the client may send another list request with the token received with the 410 error, the server will respond with a list starting from the next key, but from the latest snapshot, which is inconsistent from the previous list results - objects that are created, modified, or deleted after the first list request will be included in the response, as long as their keys are after the \"next key\". This field is not supported when watch is true. Clients may start a watch from the last resourceVersion value returned by the server and not miss any modifications. + # @option opts [BOOLEAN] :list_options_send_initial_events `sendInitialEvents=true` may be set together with `watch=true`. In that case, the watch stream will begin with synthetic events to produce the current state of objects in the collection. Once all such events have been sent, a synthetic \"Bookmark\" event will be sent. The bookmark will report the ResourceVersion (RV) corresponding to the set of objects, and be marked with `\"io.k8s.initial-events-end\": \"true\"` annotation. Afterwards, the watch stream will proceed as usual, sending watch events corresponding to changes (subsequent to the RV) to objects watched. When `sendInitialEvents` option is set, we require `resourceVersionMatch` option to also be set. The semantic of the watch request is as following: - `resourceVersionMatch` = NotOlderThan is interpreted as \"data at least as new as the provided `resourceVersion`\" and the bookmark event is send when the state is synced to a `resourceVersion` at least as fresh as the one provided by the ListOptions. If `resourceVersion` is unset, this is interpreted as \"consistent read\" and the bookmark event is send when the state is synced at least to the moment when request started being processed. - `resourceVersionMatch` set to any other value or unset Invalid error is returned. Defaults to true if `resourceVersion=\"\"` or `resourceVersion=\"0\"` (for backward compatibility reasons) and to false otherwise. +optional + # @option opts [String] :fields Fields to be included or excluded in the response. e.g. \"items.spec,items.status.phase\", \"-items.status.nodes\". + # @option opts [String] :name_filter Filter type used for name filtering. Exact | Contains | Prefix. Default to Exact. + # @option opts [String] :created_after + # @option opts [String] :finished_before + # @return [IoArgoprojWorkflowV1alpha1WorkflowList] + def workflow_service_list_workflows(namespace, opts = {}) + data, _status_code, _headers = workflow_service_list_workflows_with_http_info(namespace, opts) + data + end + + # @param namespace + # @param [Hash] opts the optional parameters + # @option opts [String] :list_options_label_selector A selector to restrict the list of returned objects by their labels. Defaults to everything. +optional. + # @option opts [String] :list_options_field_selector A selector to restrict the list of returned objects by their fields. Defaults to everything. +optional. + # @option opts [BOOLEAN] :list_options_watch Watch for changes to the described resources and return them as a stream of add, update, and remove notifications. Specify resourceVersion. +optional. + # @option opts [BOOLEAN] :list_options_allow_watch_bookmarks allowWatchBookmarks requests watch events with type \"BOOKMARK\". Servers that do not implement bookmarks may ignore this flag and bookmarks are sent at the server's discretion. Clients should not assume bookmarks are returned at any specific interval, nor may they assume the server will send any BOOKMARK event during a session. If this is not a watch, this field is ignored. +optional. + # @option opts [String] :list_options_resource_version resourceVersion sets a constraint on what resource versions a request may be served from. See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_resource_version_match resourceVersionMatch determines how resourceVersion is applied to list calls. It is highly recommended that resourceVersionMatch be set for list calls where resourceVersion is set See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_timeout_seconds Timeout for the list/watch call. This limits the duration of the call, regardless of any activity or inactivity. +optional. + # @option opts [String] :list_options_limit limit is a maximum number of responses to return for a list call. If more items exist, the server will set the `continue` field on the list metadata to a value that can be used with the same initial query to retrieve the next set of results. Setting a limit may return fewer than the requested amount of items (up to zero items) in the event all requested objects are filtered out and clients should only use the presence of the continue field to determine whether more results are available. Servers may choose not to support the limit argument and will return all of the available results. If limit is specified and the continue field is empty, clients may assume that no more results are available. This field is not supported if watch is true. The server guarantees that the objects returned when using continue will be identical to issuing a single list call without a limit - that is, no objects created, modified, or deleted after the first request is issued will be included in any subsequent continued requests. This is sometimes referred to as a consistent snapshot, and ensures that a client that is using limit to receive smaller chunks of a very large result can ensure they see all possible objects. If objects are updated during a chunked list the version of the object that was present at the time the first list result was calculated is returned. + # @option opts [String] :list_options_continue The continue option should be set when retrieving more results from the server. Since this value is server defined, clients may only use the continue value from a previous query result with identical query parameters (except for the value of continue) and the server may reject a continue value it does not recognize. If the specified continue value is no longer valid whether due to expiration (generally five to fifteen minutes) or a configuration change on the server, the server will respond with a 410 ResourceExpired error together with a continue token. If the client needs a consistent list, it must restart their list without the continue field. Otherwise, the client may send another list request with the token received with the 410 error, the server will respond with a list starting from the next key, but from the latest snapshot, which is inconsistent from the previous list results - objects that are created, modified, or deleted after the first list request will be included in the response, as long as their keys are after the \"next key\". This field is not supported when watch is true. Clients may start a watch from the last resourceVersion value returned by the server and not miss any modifications. + # @option opts [BOOLEAN] :list_options_send_initial_events `sendInitialEvents=true` may be set together with `watch=true`. In that case, the watch stream will begin with synthetic events to produce the current state of objects in the collection. Once all such events have been sent, a synthetic \"Bookmark\" event will be sent. The bookmark will report the ResourceVersion (RV) corresponding to the set of objects, and be marked with `\"io.k8s.initial-events-end\": \"true\"` annotation. Afterwards, the watch stream will proceed as usual, sending watch events corresponding to changes (subsequent to the RV) to objects watched. When `sendInitialEvents` option is set, we require `resourceVersionMatch` option to also be set. The semantic of the watch request is as following: - `resourceVersionMatch` = NotOlderThan is interpreted as \"data at least as new as the provided `resourceVersion`\" and the bookmark event is send when the state is synced to a `resourceVersion` at least as fresh as the one provided by the ListOptions. If `resourceVersion` is unset, this is interpreted as \"consistent read\" and the bookmark event is send when the state is synced at least to the moment when request started being processed. - `resourceVersionMatch` set to any other value or unset Invalid error is returned. Defaults to true if `resourceVersion=\"\"` or `resourceVersion=\"0\"` (for backward compatibility reasons) and to false otherwise. +optional + # @option opts [String] :fields Fields to be included or excluded in the response. e.g. \"items.spec,items.status.phase\", \"-items.status.nodes\". + # @option opts [String] :name_filter Filter type used for name filtering. Exact | Contains | Prefix. Default to Exact. + # @option opts [String] :created_after + # @option opts [String] :finished_before + # @return [Array<(IoArgoprojWorkflowV1alpha1WorkflowList, Integer, Hash)>] IoArgoprojWorkflowV1alpha1WorkflowList data, response status code and response headers + def workflow_service_list_workflows_with_http_info(namespace, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_list_workflows ...' + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_list_workflows" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}'.sub('{' + 'namespace' + '}', namespace.to_s) + + # query parameters + query_params = opts[:query_params] || {} + query_params[:'listOptions.labelSelector'] = opts[:'list_options_label_selector'] if !opts[:'list_options_label_selector'].nil? + query_params[:'listOptions.fieldSelector'] = opts[:'list_options_field_selector'] if !opts[:'list_options_field_selector'].nil? + query_params[:'listOptions.watch'] = opts[:'list_options_watch'] if !opts[:'list_options_watch'].nil? + query_params[:'listOptions.allowWatchBookmarks'] = opts[:'list_options_allow_watch_bookmarks'] if !opts[:'list_options_allow_watch_bookmarks'].nil? + query_params[:'listOptions.resourceVersion'] = opts[:'list_options_resource_version'] if !opts[:'list_options_resource_version'].nil? + query_params[:'listOptions.resourceVersionMatch'] = opts[:'list_options_resource_version_match'] if !opts[:'list_options_resource_version_match'].nil? + query_params[:'listOptions.timeoutSeconds'] = opts[:'list_options_timeout_seconds'] if !opts[:'list_options_timeout_seconds'].nil? + query_params[:'listOptions.limit'] = opts[:'list_options_limit'] if !opts[:'list_options_limit'].nil? + query_params[:'listOptions.continue'] = opts[:'list_options_continue'] if !opts[:'list_options_continue'].nil? + query_params[:'listOptions.sendInitialEvents'] = opts[:'list_options_send_initial_events'] if !opts[:'list_options_send_initial_events'].nil? + query_params[:'fields'] = opts[:'fields'] if !opts[:'fields'].nil? + query_params[:'nameFilter'] = opts[:'name_filter'] if !opts[:'name_filter'].nil? + query_params[:'createdAfter'] = opts[:'created_after'] if !opts[:'created_after'].nil? + query_params[:'finishedBefore'] = opts[:'finished_before'] if !opts[:'finished_before'].nil? + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1WorkflowList' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:GET, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_list_workflows\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # DEPRECATED: Cannot work via HTTP if podName is an empty string. Use WorkflowLogs. + # @param namespace + # @param name + # @param pod_name + # @param [Hash] opts the optional parameters + # @option opts [String] :log_options_container The container for which to stream logs. Defaults to only container if there is one container in the pod. +optional. + # @option opts [BOOLEAN] :log_options_follow Follow the log stream of the pod. Defaults to false. +optional. + # @option opts [BOOLEAN] :log_options_previous Return previous terminated container logs. Defaults to false. +optional. + # @option opts [String] :log_options_since_seconds A relative time in seconds before the current time from which to show logs. If this value precedes the time a pod was started, only logs since the pod start will be returned. If this value is in the future, no logs will be returned. Only one of sinceSeconds or sinceTime may be specified. +optional. + # @option opts [String] :log_options_since_time_seconds Represents seconds of UTC time since Unix epoch 1970-01-01T00:00:00Z. Must be from 0001-01-01T00:00:00Z to 9999-12-31T23:59:59Z inclusive. + # @option opts [Integer] :log_options_since_time_nanos Non-negative fractions of a second at nanosecond resolution. Negative second values with fractions must still have non-negative nanos values that count forward in time. Must be from 0 to 999,999,999 inclusive. This field may be limited in precision depending on context. + # @option opts [BOOLEAN] :log_options_timestamps If true, add an RFC3339 or RFC3339Nano timestamp at the beginning of every line of log output. Defaults to false. +optional. + # @option opts [String] :log_options_tail_lines If set, the number of lines from the end of the logs to show. If not specified, logs are shown from the creation of the container or sinceSeconds or sinceTime. Note that when \"TailLines\" is specified, \"Stream\" can only be set to nil or \"All\". +optional. + # @option opts [String] :log_options_limit_bytes If set, the number of bytes to read from the server before terminating the log output. This may not display a complete final line of logging, and may return slightly more or slightly less than the specified limit. +optional. + # @option opts [BOOLEAN] :log_options_insecure_skip_tls_verify_backend insecureSkipTLSVerifyBackend indicates that the apiserver should not confirm the validity of the serving certificate of the backend it is connecting to. This will make the HTTPS connection between the apiserver and the backend insecure. This means the apiserver cannot verify the log data it is receiving came from the real kubelet. If the kubelet is configured to verify the apiserver's TLS credentials, it does not mean the connection to the real kubelet is vulnerable to a man in the middle attack (e.g. an attacker could not intercept the actual log data coming from the real kubelet). +optional. + # @option opts [String] :log_options_stream Specify which container log stream to return to the client. Acceptable values are \"All\", \"Stdout\" and \"Stderr\". If not specified, \"All\" is used, and both stdout and stderr are returned interleaved. Note that when \"TailLines\" is specified, \"Stream\" can only be set to nil or \"All\". +featureGate=PodLogsQuerySplitStreams +optional. + # @option opts [String] :grep + # @option opts [String] :selector + # @return [StreamResultOfIoArgoprojWorkflowV1alpha1LogEntry] + def workflow_service_pod_logs(namespace, name, pod_name, opts = {}) + data, _status_code, _headers = workflow_service_pod_logs_with_http_info(namespace, name, pod_name, opts) + data + end + + # DEPRECATED: Cannot work via HTTP if podName is an empty string. Use WorkflowLogs. + # @param namespace + # @param name + # @param pod_name + # @param [Hash] opts the optional parameters + # @option opts [String] :log_options_container The container for which to stream logs. Defaults to only container if there is one container in the pod. +optional. + # @option opts [BOOLEAN] :log_options_follow Follow the log stream of the pod. Defaults to false. +optional. + # @option opts [BOOLEAN] :log_options_previous Return previous terminated container logs. Defaults to false. +optional. + # @option opts [String] :log_options_since_seconds A relative time in seconds before the current time from which to show logs. If this value precedes the time a pod was started, only logs since the pod start will be returned. If this value is in the future, no logs will be returned. Only one of sinceSeconds or sinceTime may be specified. +optional. + # @option opts [String] :log_options_since_time_seconds Represents seconds of UTC time since Unix epoch 1970-01-01T00:00:00Z. Must be from 0001-01-01T00:00:00Z to 9999-12-31T23:59:59Z inclusive. + # @option opts [Integer] :log_options_since_time_nanos Non-negative fractions of a second at nanosecond resolution. Negative second values with fractions must still have non-negative nanos values that count forward in time. Must be from 0 to 999,999,999 inclusive. This field may be limited in precision depending on context. + # @option opts [BOOLEAN] :log_options_timestamps If true, add an RFC3339 or RFC3339Nano timestamp at the beginning of every line of log output. Defaults to false. +optional. + # @option opts [String] :log_options_tail_lines If set, the number of lines from the end of the logs to show. If not specified, logs are shown from the creation of the container or sinceSeconds or sinceTime. Note that when \"TailLines\" is specified, \"Stream\" can only be set to nil or \"All\". +optional. + # @option opts [String] :log_options_limit_bytes If set, the number of bytes to read from the server before terminating the log output. This may not display a complete final line of logging, and may return slightly more or slightly less than the specified limit. +optional. + # @option opts [BOOLEAN] :log_options_insecure_skip_tls_verify_backend insecureSkipTLSVerifyBackend indicates that the apiserver should not confirm the validity of the serving certificate of the backend it is connecting to. This will make the HTTPS connection between the apiserver and the backend insecure. This means the apiserver cannot verify the log data it is receiving came from the real kubelet. If the kubelet is configured to verify the apiserver's TLS credentials, it does not mean the connection to the real kubelet is vulnerable to a man in the middle attack (e.g. an attacker could not intercept the actual log data coming from the real kubelet). +optional. + # @option opts [String] :log_options_stream Specify which container log stream to return to the client. Acceptable values are \"All\", \"Stdout\" and \"Stderr\". If not specified, \"All\" is used, and both stdout and stderr are returned interleaved. Note that when \"TailLines\" is specified, \"Stream\" can only be set to nil or \"All\". +featureGate=PodLogsQuerySplitStreams +optional. + # @option opts [String] :grep + # @option opts [String] :selector + # @return [Array<(StreamResultOfIoArgoprojWorkflowV1alpha1LogEntry, Integer, Hash)>] StreamResultOfIoArgoprojWorkflowV1alpha1LogEntry data, response status code and response headers + def workflow_service_pod_logs_with_http_info(namespace, name, pod_name, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_pod_logs ...' + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_pod_logs" + end + # verify the required parameter 'name' is set + if @api_client.config.client_side_validation && name.nil? + fail ArgumentError, "Missing the required parameter 'name' when calling WorkflowServiceApi.workflow_service_pod_logs" + end + # verify the required parameter 'pod_name' is set + if @api_client.config.client_side_validation && pod_name.nil? + fail ArgumentError, "Missing the required parameter 'pod_name' when calling WorkflowServiceApi.workflow_service_pod_logs" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/{name}/{podName}/log'.sub('{' + 'namespace' + '}', namespace.to_s).sub('{' + 'name' + '}', name.to_s).sub('{' + 'podName' + '}', pod_name.to_s) + + # query parameters + query_params = opts[:query_params] || {} + query_params[:'logOptions.container'] = opts[:'log_options_container'] if !opts[:'log_options_container'].nil? + query_params[:'logOptions.follow'] = opts[:'log_options_follow'] if !opts[:'log_options_follow'].nil? + query_params[:'logOptions.previous'] = opts[:'log_options_previous'] if !opts[:'log_options_previous'].nil? + query_params[:'logOptions.sinceSeconds'] = opts[:'log_options_since_seconds'] if !opts[:'log_options_since_seconds'].nil? + query_params[:'logOptions.sinceTime.seconds'] = opts[:'log_options_since_time_seconds'] if !opts[:'log_options_since_time_seconds'].nil? + query_params[:'logOptions.sinceTime.nanos'] = opts[:'log_options_since_time_nanos'] if !opts[:'log_options_since_time_nanos'].nil? + query_params[:'logOptions.timestamps'] = opts[:'log_options_timestamps'] if !opts[:'log_options_timestamps'].nil? + query_params[:'logOptions.tailLines'] = opts[:'log_options_tail_lines'] if !opts[:'log_options_tail_lines'].nil? + query_params[:'logOptions.limitBytes'] = opts[:'log_options_limit_bytes'] if !opts[:'log_options_limit_bytes'].nil? + query_params[:'logOptions.insecureSkipTLSVerifyBackend'] = opts[:'log_options_insecure_skip_tls_verify_backend'] if !opts[:'log_options_insecure_skip_tls_verify_backend'].nil? + query_params[:'logOptions.stream'] = opts[:'log_options_stream'] if !opts[:'log_options_stream'].nil? + query_params[:'grep'] = opts[:'grep'] if !opts[:'grep'].nil? + query_params[:'selector'] = opts[:'selector'] if !opts[:'selector'].nil? + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] + + return_type = opts[:return_type] || 'StreamResultOfIoArgoprojWorkflowV1alpha1LogEntry' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:GET, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_pod_logs\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [IoArgoprojWorkflowV1alpha1Workflow] + def workflow_service_resubmit_workflow(body, namespace, name, opts = {}) + data, _status_code, _headers = workflow_service_resubmit_workflow_with_http_info(body, namespace, name, opts) + data + end + + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [Array<(IoArgoprojWorkflowV1alpha1Workflow, Integer, Hash)>] IoArgoprojWorkflowV1alpha1Workflow data, response status code and response headers + def workflow_service_resubmit_workflow_with_http_info(body, namespace, name, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_resubmit_workflow ...' + end + # verify the required parameter 'body' is set + if @api_client.config.client_side_validation && body.nil? + fail ArgumentError, "Missing the required parameter 'body' when calling WorkflowServiceApi.workflow_service_resubmit_workflow" + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_resubmit_workflow" + end + # verify the required parameter 'name' is set + if @api_client.config.client_side_validation && name.nil? + fail ArgumentError, "Missing the required parameter 'name' when calling WorkflowServiceApi.workflow_service_resubmit_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/{name}/resubmit'.sub('{' + 'namespace' + '}', namespace.to_s).sub('{' + 'name' + '}', name.to_s) + + # query parameters + query_params = opts[:query_params] || {} + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + # HTTP header 'Content-Type' + header_params['Content-Type'] = @api_client.select_header_content_type(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] || @api_client.object_to_http_body(body) + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1Workflow' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:PUT, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_resubmit_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [IoArgoprojWorkflowV1alpha1Workflow] + def workflow_service_resume_workflow(body, namespace, name, opts = {}) + data, _status_code, _headers = workflow_service_resume_workflow_with_http_info(body, namespace, name, opts) + data + end + + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [Array<(IoArgoprojWorkflowV1alpha1Workflow, Integer, Hash)>] IoArgoprojWorkflowV1alpha1Workflow data, response status code and response headers + def workflow_service_resume_workflow_with_http_info(body, namespace, name, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_resume_workflow ...' + end + # verify the required parameter 'body' is set + if @api_client.config.client_side_validation && body.nil? + fail ArgumentError, "Missing the required parameter 'body' when calling WorkflowServiceApi.workflow_service_resume_workflow" + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_resume_workflow" + end + # verify the required parameter 'name' is set + if @api_client.config.client_side_validation && name.nil? + fail ArgumentError, "Missing the required parameter 'name' when calling WorkflowServiceApi.workflow_service_resume_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/{name}/resume'.sub('{' + 'namespace' + '}', namespace.to_s).sub('{' + 'name' + '}', name.to_s) + + # query parameters + query_params = opts[:query_params] || {} + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + # HTTP header 'Content-Type' + header_params['Content-Type'] = @api_client.select_header_content_type(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] || @api_client.object_to_http_body(body) + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1Workflow' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:PUT, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_resume_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [IoArgoprojWorkflowV1alpha1Workflow] + def workflow_service_retry_workflow(body, namespace, name, opts = {}) + data, _status_code, _headers = workflow_service_retry_workflow_with_http_info(body, namespace, name, opts) + data + end + + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [Array<(IoArgoprojWorkflowV1alpha1Workflow, Integer, Hash)>] IoArgoprojWorkflowV1alpha1Workflow data, response status code and response headers + def workflow_service_retry_workflow_with_http_info(body, namespace, name, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_retry_workflow ...' + end + # verify the required parameter 'body' is set + if @api_client.config.client_side_validation && body.nil? + fail ArgumentError, "Missing the required parameter 'body' when calling WorkflowServiceApi.workflow_service_retry_workflow" + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_retry_workflow" + end + # verify the required parameter 'name' is set + if @api_client.config.client_side_validation && name.nil? + fail ArgumentError, "Missing the required parameter 'name' when calling WorkflowServiceApi.workflow_service_retry_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/{name}/retry'.sub('{' + 'namespace' + '}', namespace.to_s).sub('{' + 'name' + '}', name.to_s) + + # query parameters + query_params = opts[:query_params] || {} + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + # HTTP header 'Content-Type' + header_params['Content-Type'] = @api_client.select_header_content_type(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] || @api_client.object_to_http_body(body) + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1Workflow' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:PUT, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_retry_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [IoArgoprojWorkflowV1alpha1Workflow] + def workflow_service_set_workflow(body, namespace, name, opts = {}) + data, _status_code, _headers = workflow_service_set_workflow_with_http_info(body, namespace, name, opts) + data + end + + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [Array<(IoArgoprojWorkflowV1alpha1Workflow, Integer, Hash)>] IoArgoprojWorkflowV1alpha1Workflow data, response status code and response headers + def workflow_service_set_workflow_with_http_info(body, namespace, name, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_set_workflow ...' + end + # verify the required parameter 'body' is set + if @api_client.config.client_side_validation && body.nil? + fail ArgumentError, "Missing the required parameter 'body' when calling WorkflowServiceApi.workflow_service_set_workflow" + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_set_workflow" + end + # verify the required parameter 'name' is set + if @api_client.config.client_side_validation && name.nil? + fail ArgumentError, "Missing the required parameter 'name' when calling WorkflowServiceApi.workflow_service_set_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/{name}/set'.sub('{' + 'namespace' + '}', namespace.to_s).sub('{' + 'name' + '}', name.to_s) + + # query parameters + query_params = opts[:query_params] || {} + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + # HTTP header 'Content-Type' + header_params['Content-Type'] = @api_client.select_header_content_type(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] || @api_client.object_to_http_body(body) + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1Workflow' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:PUT, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_set_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [IoArgoprojWorkflowV1alpha1Workflow] + def workflow_service_stop_workflow(body, namespace, name, opts = {}) + data, _status_code, _headers = workflow_service_stop_workflow_with_http_info(body, namespace, name, opts) + data + end + + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [Array<(IoArgoprojWorkflowV1alpha1Workflow, Integer, Hash)>] IoArgoprojWorkflowV1alpha1Workflow data, response status code and response headers + def workflow_service_stop_workflow_with_http_info(body, namespace, name, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_stop_workflow ...' + end + # verify the required parameter 'body' is set + if @api_client.config.client_side_validation && body.nil? + fail ArgumentError, "Missing the required parameter 'body' when calling WorkflowServiceApi.workflow_service_stop_workflow" + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_stop_workflow" + end + # verify the required parameter 'name' is set + if @api_client.config.client_side_validation && name.nil? + fail ArgumentError, "Missing the required parameter 'name' when calling WorkflowServiceApi.workflow_service_stop_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/{name}/stop'.sub('{' + 'namespace' + '}', namespace.to_s).sub('{' + 'name' + '}', name.to_s) + + # query parameters + query_params = opts[:query_params] || {} + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + # HTTP header 'Content-Type' + header_params['Content-Type'] = @api_client.select_header_content_type(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] || @api_client.object_to_http_body(body) + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1Workflow' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:PUT, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_stop_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param body + # @param namespace + # @param [Hash] opts the optional parameters + # @return [IoArgoprojWorkflowV1alpha1Workflow] + def workflow_service_submit_workflow(body, namespace, opts = {}) + data, _status_code, _headers = workflow_service_submit_workflow_with_http_info(body, namespace, opts) + data + end + + # @param body + # @param namespace + # @param [Hash] opts the optional parameters + # @return [Array<(IoArgoprojWorkflowV1alpha1Workflow, Integer, Hash)>] IoArgoprojWorkflowV1alpha1Workflow data, response status code and response headers + def workflow_service_submit_workflow_with_http_info(body, namespace, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_submit_workflow ...' + end + # verify the required parameter 'body' is set + if @api_client.config.client_side_validation && body.nil? + fail ArgumentError, "Missing the required parameter 'body' when calling WorkflowServiceApi.workflow_service_submit_workflow" + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_submit_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/submit'.sub('{' + 'namespace' + '}', namespace.to_s) + + # query parameters + query_params = opts[:query_params] || {} + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + # HTTP header 'Content-Type' + header_params['Content-Type'] = @api_client.select_header_content_type(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] || @api_client.object_to_http_body(body) + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1Workflow' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:POST, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_submit_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [IoArgoprojWorkflowV1alpha1Workflow] + def workflow_service_suspend_workflow(body, namespace, name, opts = {}) + data, _status_code, _headers = workflow_service_suspend_workflow_with_http_info(body, namespace, name, opts) + data + end + + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [Array<(IoArgoprojWorkflowV1alpha1Workflow, Integer, Hash)>] IoArgoprojWorkflowV1alpha1Workflow data, response status code and response headers + def workflow_service_suspend_workflow_with_http_info(body, namespace, name, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_suspend_workflow ...' + end + # verify the required parameter 'body' is set + if @api_client.config.client_side_validation && body.nil? + fail ArgumentError, "Missing the required parameter 'body' when calling WorkflowServiceApi.workflow_service_suspend_workflow" + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_suspend_workflow" + end + # verify the required parameter 'name' is set + if @api_client.config.client_side_validation && name.nil? + fail ArgumentError, "Missing the required parameter 'name' when calling WorkflowServiceApi.workflow_service_suspend_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/{name}/suspend'.sub('{' + 'namespace' + '}', namespace.to_s).sub('{' + 'name' + '}', name.to_s) + + # query parameters + query_params = opts[:query_params] || {} + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + # HTTP header 'Content-Type' + header_params['Content-Type'] = @api_client.select_header_content_type(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] || @api_client.object_to_http_body(body) + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1Workflow' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:PUT, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_suspend_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [IoArgoprojWorkflowV1alpha1Workflow] + def workflow_service_terminate_workflow(body, namespace, name, opts = {}) + data, _status_code, _headers = workflow_service_terminate_workflow_with_http_info(body, namespace, name, opts) + data + end + + # @param body + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @return [Array<(IoArgoprojWorkflowV1alpha1Workflow, Integer, Hash)>] IoArgoprojWorkflowV1alpha1Workflow data, response status code and response headers + def workflow_service_terminate_workflow_with_http_info(body, namespace, name, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_terminate_workflow ...' + end + # verify the required parameter 'body' is set + if @api_client.config.client_side_validation && body.nil? + fail ArgumentError, "Missing the required parameter 'body' when calling WorkflowServiceApi.workflow_service_terminate_workflow" + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_terminate_workflow" + end + # verify the required parameter 'name' is set + if @api_client.config.client_side_validation && name.nil? + fail ArgumentError, "Missing the required parameter 'name' when calling WorkflowServiceApi.workflow_service_terminate_workflow" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/{name}/terminate'.sub('{' + 'namespace' + '}', namespace.to_s).sub('{' + 'name' + '}', name.to_s) + + # query parameters + query_params = opts[:query_params] || {} + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + # HTTP header 'Content-Type' + header_params['Content-Type'] = @api_client.select_header_content_type(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] || @api_client.object_to_http_body(body) + + return_type = opts[:return_type] || 'IoArgoprojWorkflowV1alpha1Workflow' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:PUT, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_terminate_workflow\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param namespace + # @param [Hash] opts the optional parameters + # @option opts [String] :list_options_label_selector A selector to restrict the list of returned objects by their labels. Defaults to everything. +optional. + # @option opts [String] :list_options_field_selector A selector to restrict the list of returned objects by their fields. Defaults to everything. +optional. + # @option opts [BOOLEAN] :list_options_watch Watch for changes to the described resources and return them as a stream of add, update, and remove notifications. Specify resourceVersion. +optional. + # @option opts [BOOLEAN] :list_options_allow_watch_bookmarks allowWatchBookmarks requests watch events with type \"BOOKMARK\". Servers that do not implement bookmarks may ignore this flag and bookmarks are sent at the server's discretion. Clients should not assume bookmarks are returned at any specific interval, nor may they assume the server will send any BOOKMARK event during a session. If this is not a watch, this field is ignored. +optional. + # @option opts [String] :list_options_resource_version resourceVersion sets a constraint on what resource versions a request may be served from. See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_resource_version_match resourceVersionMatch determines how resourceVersion is applied to list calls. It is highly recommended that resourceVersionMatch be set for list calls where resourceVersion is set See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_timeout_seconds Timeout for the list/watch call. This limits the duration of the call, regardless of any activity or inactivity. +optional. + # @option opts [String] :list_options_limit limit is a maximum number of responses to return for a list call. If more items exist, the server will set the `continue` field on the list metadata to a value that can be used with the same initial query to retrieve the next set of results. Setting a limit may return fewer than the requested amount of items (up to zero items) in the event all requested objects are filtered out and clients should only use the presence of the continue field to determine whether more results are available. Servers may choose not to support the limit argument and will return all of the available results. If limit is specified and the continue field is empty, clients may assume that no more results are available. This field is not supported if watch is true. The server guarantees that the objects returned when using continue will be identical to issuing a single list call without a limit - that is, no objects created, modified, or deleted after the first request is issued will be included in any subsequent continued requests. This is sometimes referred to as a consistent snapshot, and ensures that a client that is using limit to receive smaller chunks of a very large result can ensure they see all possible objects. If objects are updated during a chunked list the version of the object that was present at the time the first list result was calculated is returned. + # @option opts [String] :list_options_continue The continue option should be set when retrieving more results from the server. Since this value is server defined, clients may only use the continue value from a previous query result with identical query parameters (except for the value of continue) and the server may reject a continue value it does not recognize. If the specified continue value is no longer valid whether due to expiration (generally five to fifteen minutes) or a configuration change on the server, the server will respond with a 410 ResourceExpired error together with a continue token. If the client needs a consistent list, it must restart their list without the continue field. Otherwise, the client may send another list request with the token received with the 410 error, the server will respond with a list starting from the next key, but from the latest snapshot, which is inconsistent from the previous list results - objects that are created, modified, or deleted after the first list request will be included in the response, as long as their keys are after the \"next key\". This field is not supported when watch is true. Clients may start a watch from the last resourceVersion value returned by the server and not miss any modifications. + # @option opts [BOOLEAN] :list_options_send_initial_events `sendInitialEvents=true` may be set together with `watch=true`. In that case, the watch stream will begin with synthetic events to produce the current state of objects in the collection. Once all such events have been sent, a synthetic \"Bookmark\" event will be sent. The bookmark will report the ResourceVersion (RV) corresponding to the set of objects, and be marked with `\"io.k8s.initial-events-end\": \"true\"` annotation. Afterwards, the watch stream will proceed as usual, sending watch events corresponding to changes (subsequent to the RV) to objects watched. When `sendInitialEvents` option is set, we require `resourceVersionMatch` option to also be set. The semantic of the watch request is as following: - `resourceVersionMatch` = NotOlderThan is interpreted as \"data at least as new as the provided `resourceVersion`\" and the bookmark event is send when the state is synced to a `resourceVersion` at least as fresh as the one provided by the ListOptions. If `resourceVersion` is unset, this is interpreted as \"consistent read\" and the bookmark event is send when the state is synced at least to the moment when request started being processed. - `resourceVersionMatch` set to any other value or unset Invalid error is returned. Defaults to true if `resourceVersion=\"\"` or `resourceVersion=\"0\"` (for backward compatibility reasons) and to false otherwise. +optional + # @return [StreamResultOfIoK8sApiCoreV1Event] + def workflow_service_watch_events(namespace, opts = {}) + data, _status_code, _headers = workflow_service_watch_events_with_http_info(namespace, opts) + data + end + + # @param namespace + # @param [Hash] opts the optional parameters + # @option opts [String] :list_options_label_selector A selector to restrict the list of returned objects by their labels. Defaults to everything. +optional. + # @option opts [String] :list_options_field_selector A selector to restrict the list of returned objects by their fields. Defaults to everything. +optional. + # @option opts [BOOLEAN] :list_options_watch Watch for changes to the described resources and return them as a stream of add, update, and remove notifications. Specify resourceVersion. +optional. + # @option opts [BOOLEAN] :list_options_allow_watch_bookmarks allowWatchBookmarks requests watch events with type \"BOOKMARK\". Servers that do not implement bookmarks may ignore this flag and bookmarks are sent at the server's discretion. Clients should not assume bookmarks are returned at any specific interval, nor may they assume the server will send any BOOKMARK event during a session. If this is not a watch, this field is ignored. +optional. + # @option opts [String] :list_options_resource_version resourceVersion sets a constraint on what resource versions a request may be served from. See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_resource_version_match resourceVersionMatch determines how resourceVersion is applied to list calls. It is highly recommended that resourceVersionMatch be set for list calls where resourceVersion is set See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_timeout_seconds Timeout for the list/watch call. This limits the duration of the call, regardless of any activity or inactivity. +optional. + # @option opts [String] :list_options_limit limit is a maximum number of responses to return for a list call. If more items exist, the server will set the `continue` field on the list metadata to a value that can be used with the same initial query to retrieve the next set of results. Setting a limit may return fewer than the requested amount of items (up to zero items) in the event all requested objects are filtered out and clients should only use the presence of the continue field to determine whether more results are available. Servers may choose not to support the limit argument and will return all of the available results. If limit is specified and the continue field is empty, clients may assume that no more results are available. This field is not supported if watch is true. The server guarantees that the objects returned when using continue will be identical to issuing a single list call without a limit - that is, no objects created, modified, or deleted after the first request is issued will be included in any subsequent continued requests. This is sometimes referred to as a consistent snapshot, and ensures that a client that is using limit to receive smaller chunks of a very large result can ensure they see all possible objects. If objects are updated during a chunked list the version of the object that was present at the time the first list result was calculated is returned. + # @option opts [String] :list_options_continue The continue option should be set when retrieving more results from the server. Since this value is server defined, clients may only use the continue value from a previous query result with identical query parameters (except for the value of continue) and the server may reject a continue value it does not recognize. If the specified continue value is no longer valid whether due to expiration (generally five to fifteen minutes) or a configuration change on the server, the server will respond with a 410 ResourceExpired error together with a continue token. If the client needs a consistent list, it must restart their list without the continue field. Otherwise, the client may send another list request with the token received with the 410 error, the server will respond with a list starting from the next key, but from the latest snapshot, which is inconsistent from the previous list results - objects that are created, modified, or deleted after the first list request will be included in the response, as long as their keys are after the \"next key\". This field is not supported when watch is true. Clients may start a watch from the last resourceVersion value returned by the server and not miss any modifications. + # @option opts [BOOLEAN] :list_options_send_initial_events `sendInitialEvents=true` may be set together with `watch=true`. In that case, the watch stream will begin with synthetic events to produce the current state of objects in the collection. Once all such events have been sent, a synthetic \"Bookmark\" event will be sent. The bookmark will report the ResourceVersion (RV) corresponding to the set of objects, and be marked with `\"io.k8s.initial-events-end\": \"true\"` annotation. Afterwards, the watch stream will proceed as usual, sending watch events corresponding to changes (subsequent to the RV) to objects watched. When `sendInitialEvents` option is set, we require `resourceVersionMatch` option to also be set. The semantic of the watch request is as following: - `resourceVersionMatch` = NotOlderThan is interpreted as \"data at least as new as the provided `resourceVersion`\" and the bookmark event is send when the state is synced to a `resourceVersion` at least as fresh as the one provided by the ListOptions. If `resourceVersion` is unset, this is interpreted as \"consistent read\" and the bookmark event is send when the state is synced at least to the moment when request started being processed. - `resourceVersionMatch` set to any other value or unset Invalid error is returned. Defaults to true if `resourceVersion=\"\"` or `resourceVersion=\"0\"` (for backward compatibility reasons) and to false otherwise. +optional + # @return [Array<(StreamResultOfIoK8sApiCoreV1Event, Integer, Hash)>] StreamResultOfIoK8sApiCoreV1Event data, response status code and response headers + def workflow_service_watch_events_with_http_info(namespace, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_watch_events ...' + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_watch_events" + end + # resource path + local_var_path = '/api/v1/stream/events/{namespace}'.sub('{' + 'namespace' + '}', namespace.to_s) + + # query parameters + query_params = opts[:query_params] || {} + query_params[:'listOptions.labelSelector'] = opts[:'list_options_label_selector'] if !opts[:'list_options_label_selector'].nil? + query_params[:'listOptions.fieldSelector'] = opts[:'list_options_field_selector'] if !opts[:'list_options_field_selector'].nil? + query_params[:'listOptions.watch'] = opts[:'list_options_watch'] if !opts[:'list_options_watch'].nil? + query_params[:'listOptions.allowWatchBookmarks'] = opts[:'list_options_allow_watch_bookmarks'] if !opts[:'list_options_allow_watch_bookmarks'].nil? + query_params[:'listOptions.resourceVersion'] = opts[:'list_options_resource_version'] if !opts[:'list_options_resource_version'].nil? + query_params[:'listOptions.resourceVersionMatch'] = opts[:'list_options_resource_version_match'] if !opts[:'list_options_resource_version_match'].nil? + query_params[:'listOptions.timeoutSeconds'] = opts[:'list_options_timeout_seconds'] if !opts[:'list_options_timeout_seconds'].nil? + query_params[:'listOptions.limit'] = opts[:'list_options_limit'] if !opts[:'list_options_limit'].nil? + query_params[:'listOptions.continue'] = opts[:'list_options_continue'] if !opts[:'list_options_continue'].nil? + query_params[:'listOptions.sendInitialEvents'] = opts[:'list_options_send_initial_events'] if !opts[:'list_options_send_initial_events'].nil? + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] + + return_type = opts[:return_type] || 'StreamResultOfIoK8sApiCoreV1Event' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:GET, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_watch_events\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param namespace + # @param [Hash] opts the optional parameters + # @option opts [String] :list_options_label_selector A selector to restrict the list of returned objects by their labels. Defaults to everything. +optional. + # @option opts [String] :list_options_field_selector A selector to restrict the list of returned objects by their fields. Defaults to everything. +optional. + # @option opts [BOOLEAN] :list_options_watch Watch for changes to the described resources and return them as a stream of add, update, and remove notifications. Specify resourceVersion. +optional. + # @option opts [BOOLEAN] :list_options_allow_watch_bookmarks allowWatchBookmarks requests watch events with type \"BOOKMARK\". Servers that do not implement bookmarks may ignore this flag and bookmarks are sent at the server's discretion. Clients should not assume bookmarks are returned at any specific interval, nor may they assume the server will send any BOOKMARK event during a session. If this is not a watch, this field is ignored. +optional. + # @option opts [String] :list_options_resource_version resourceVersion sets a constraint on what resource versions a request may be served from. See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_resource_version_match resourceVersionMatch determines how resourceVersion is applied to list calls. It is highly recommended that resourceVersionMatch be set for list calls where resourceVersion is set See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_timeout_seconds Timeout for the list/watch call. This limits the duration of the call, regardless of any activity or inactivity. +optional. + # @option opts [String] :list_options_limit limit is a maximum number of responses to return for a list call. If more items exist, the server will set the `continue` field on the list metadata to a value that can be used with the same initial query to retrieve the next set of results. Setting a limit may return fewer than the requested amount of items (up to zero items) in the event all requested objects are filtered out and clients should only use the presence of the continue field to determine whether more results are available. Servers may choose not to support the limit argument and will return all of the available results. If limit is specified and the continue field is empty, clients may assume that no more results are available. This field is not supported if watch is true. The server guarantees that the objects returned when using continue will be identical to issuing a single list call without a limit - that is, no objects created, modified, or deleted after the first request is issued will be included in any subsequent continued requests. This is sometimes referred to as a consistent snapshot, and ensures that a client that is using limit to receive smaller chunks of a very large result can ensure they see all possible objects. If objects are updated during a chunked list the version of the object that was present at the time the first list result was calculated is returned. + # @option opts [String] :list_options_continue The continue option should be set when retrieving more results from the server. Since this value is server defined, clients may only use the continue value from a previous query result with identical query parameters (except for the value of continue) and the server may reject a continue value it does not recognize. If the specified continue value is no longer valid whether due to expiration (generally five to fifteen minutes) or a configuration change on the server, the server will respond with a 410 ResourceExpired error together with a continue token. If the client needs a consistent list, it must restart their list without the continue field. Otherwise, the client may send another list request with the token received with the 410 error, the server will respond with a list starting from the next key, but from the latest snapshot, which is inconsistent from the previous list results - objects that are created, modified, or deleted after the first list request will be included in the response, as long as their keys are after the \"next key\". This field is not supported when watch is true. Clients may start a watch from the last resourceVersion value returned by the server and not miss any modifications. + # @option opts [BOOLEAN] :list_options_send_initial_events `sendInitialEvents=true` may be set together with `watch=true`. In that case, the watch stream will begin with synthetic events to produce the current state of objects in the collection. Once all such events have been sent, a synthetic \"Bookmark\" event will be sent. The bookmark will report the ResourceVersion (RV) corresponding to the set of objects, and be marked with `\"io.k8s.initial-events-end\": \"true\"` annotation. Afterwards, the watch stream will proceed as usual, sending watch events corresponding to changes (subsequent to the RV) to objects watched. When `sendInitialEvents` option is set, we require `resourceVersionMatch` option to also be set. The semantic of the watch request is as following: - `resourceVersionMatch` = NotOlderThan is interpreted as \"data at least as new as the provided `resourceVersion`\" and the bookmark event is send when the state is synced to a `resourceVersion` at least as fresh as the one provided by the ListOptions. If `resourceVersion` is unset, this is interpreted as \"consistent read\" and the bookmark event is send when the state is synced at least to the moment when request started being processed. - `resourceVersionMatch` set to any other value or unset Invalid error is returned. Defaults to true if `resourceVersion=\"\"` or `resourceVersion=\"0\"` (for backward compatibility reasons) and to false otherwise. +optional + # @option opts [String] :fields + # @return [StreamResultOfIoArgoprojWorkflowV1alpha1WorkflowWatchEvent] + def workflow_service_watch_workflows(namespace, opts = {}) + data, _status_code, _headers = workflow_service_watch_workflows_with_http_info(namespace, opts) + data + end + + # @param namespace + # @param [Hash] opts the optional parameters + # @option opts [String] :list_options_label_selector A selector to restrict the list of returned objects by their labels. Defaults to everything. +optional. + # @option opts [String] :list_options_field_selector A selector to restrict the list of returned objects by their fields. Defaults to everything. +optional. + # @option opts [BOOLEAN] :list_options_watch Watch for changes to the described resources and return them as a stream of add, update, and remove notifications. Specify resourceVersion. +optional. + # @option opts [BOOLEAN] :list_options_allow_watch_bookmarks allowWatchBookmarks requests watch events with type \"BOOKMARK\". Servers that do not implement bookmarks may ignore this flag and bookmarks are sent at the server's discretion. Clients should not assume bookmarks are returned at any specific interval, nor may they assume the server will send any BOOKMARK event during a session. If this is not a watch, this field is ignored. +optional. + # @option opts [String] :list_options_resource_version resourceVersion sets a constraint on what resource versions a request may be served from. See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_resource_version_match resourceVersionMatch determines how resourceVersion is applied to list calls. It is highly recommended that resourceVersionMatch be set for list calls where resourceVersion is set See https://kubernetes.io/docs/reference/using-api/api-concepts/#resource-versions for details. Defaults to unset +optional + # @option opts [String] :list_options_timeout_seconds Timeout for the list/watch call. This limits the duration of the call, regardless of any activity or inactivity. +optional. + # @option opts [String] :list_options_limit limit is a maximum number of responses to return for a list call. If more items exist, the server will set the `continue` field on the list metadata to a value that can be used with the same initial query to retrieve the next set of results. Setting a limit may return fewer than the requested amount of items (up to zero items) in the event all requested objects are filtered out and clients should only use the presence of the continue field to determine whether more results are available. Servers may choose not to support the limit argument and will return all of the available results. If limit is specified and the continue field is empty, clients may assume that no more results are available. This field is not supported if watch is true. The server guarantees that the objects returned when using continue will be identical to issuing a single list call without a limit - that is, no objects created, modified, or deleted after the first request is issued will be included in any subsequent continued requests. This is sometimes referred to as a consistent snapshot, and ensures that a client that is using limit to receive smaller chunks of a very large result can ensure they see all possible objects. If objects are updated during a chunked list the version of the object that was present at the time the first list result was calculated is returned. + # @option opts [String] :list_options_continue The continue option should be set when retrieving more results from the server. Since this value is server defined, clients may only use the continue value from a previous query result with identical query parameters (except for the value of continue) and the server may reject a continue value it does not recognize. If the specified continue value is no longer valid whether due to expiration (generally five to fifteen minutes) or a configuration change on the server, the server will respond with a 410 ResourceExpired error together with a continue token. If the client needs a consistent list, it must restart their list without the continue field. Otherwise, the client may send another list request with the token received with the 410 error, the server will respond with a list starting from the next key, but from the latest snapshot, which is inconsistent from the previous list results - objects that are created, modified, or deleted after the first list request will be included in the response, as long as their keys are after the \"next key\". This field is not supported when watch is true. Clients may start a watch from the last resourceVersion value returned by the server and not miss any modifications. + # @option opts [BOOLEAN] :list_options_send_initial_events `sendInitialEvents=true` may be set together with `watch=true`. In that case, the watch stream will begin with synthetic events to produce the current state of objects in the collection. Once all such events have been sent, a synthetic \"Bookmark\" event will be sent. The bookmark will report the ResourceVersion (RV) corresponding to the set of objects, and be marked with `\"io.k8s.initial-events-end\": \"true\"` annotation. Afterwards, the watch stream will proceed as usual, sending watch events corresponding to changes (subsequent to the RV) to objects watched. When `sendInitialEvents` option is set, we require `resourceVersionMatch` option to also be set. The semantic of the watch request is as following: - `resourceVersionMatch` = NotOlderThan is interpreted as \"data at least as new as the provided `resourceVersion`\" and the bookmark event is send when the state is synced to a `resourceVersion` at least as fresh as the one provided by the ListOptions. If `resourceVersion` is unset, this is interpreted as \"consistent read\" and the bookmark event is send when the state is synced at least to the moment when request started being processed. - `resourceVersionMatch` set to any other value or unset Invalid error is returned. Defaults to true if `resourceVersion=\"\"` or `resourceVersion=\"0\"` (for backward compatibility reasons) and to false otherwise. +optional + # @option opts [String] :fields + # @return [Array<(StreamResultOfIoArgoprojWorkflowV1alpha1WorkflowWatchEvent, Integer, Hash)>] StreamResultOfIoArgoprojWorkflowV1alpha1WorkflowWatchEvent data, response status code and response headers + def workflow_service_watch_workflows_with_http_info(namespace, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_watch_workflows ...' + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_watch_workflows" + end + # resource path + local_var_path = '/api/v1/workflow-events/{namespace}'.sub('{' + 'namespace' + '}', namespace.to_s) + + # query parameters + query_params = opts[:query_params] || {} + query_params[:'listOptions.labelSelector'] = opts[:'list_options_label_selector'] if !opts[:'list_options_label_selector'].nil? + query_params[:'listOptions.fieldSelector'] = opts[:'list_options_field_selector'] if !opts[:'list_options_field_selector'].nil? + query_params[:'listOptions.watch'] = opts[:'list_options_watch'] if !opts[:'list_options_watch'].nil? + query_params[:'listOptions.allowWatchBookmarks'] = opts[:'list_options_allow_watch_bookmarks'] if !opts[:'list_options_allow_watch_bookmarks'].nil? + query_params[:'listOptions.resourceVersion'] = opts[:'list_options_resource_version'] if !opts[:'list_options_resource_version'].nil? + query_params[:'listOptions.resourceVersionMatch'] = opts[:'list_options_resource_version_match'] if !opts[:'list_options_resource_version_match'].nil? + query_params[:'listOptions.timeoutSeconds'] = opts[:'list_options_timeout_seconds'] if !opts[:'list_options_timeout_seconds'].nil? + query_params[:'listOptions.limit'] = opts[:'list_options_limit'] if !opts[:'list_options_limit'].nil? + query_params[:'listOptions.continue'] = opts[:'list_options_continue'] if !opts[:'list_options_continue'].nil? + query_params[:'listOptions.sendInitialEvents'] = opts[:'list_options_send_initial_events'] if !opts[:'list_options_send_initial_events'].nil? + query_params[:'fields'] = opts[:'fields'] if !opts[:'fields'].nil? + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] + + return_type = opts[:return_type] || 'StreamResultOfIoArgoprojWorkflowV1alpha1WorkflowWatchEvent' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:GET, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_watch_workflows\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @option opts [String] :pod_name + # @option opts [String] :log_options_container The container for which to stream logs. Defaults to only container if there is one container in the pod. +optional. + # @option opts [BOOLEAN] :log_options_follow Follow the log stream of the pod. Defaults to false. +optional. + # @option opts [BOOLEAN] :log_options_previous Return previous terminated container logs. Defaults to false. +optional. + # @option opts [String] :log_options_since_seconds A relative time in seconds before the current time from which to show logs. If this value precedes the time a pod was started, only logs since the pod start will be returned. If this value is in the future, no logs will be returned. Only one of sinceSeconds or sinceTime may be specified. +optional. + # @option opts [String] :log_options_since_time_seconds Represents seconds of UTC time since Unix epoch 1970-01-01T00:00:00Z. Must be from 0001-01-01T00:00:00Z to 9999-12-31T23:59:59Z inclusive. + # @option opts [Integer] :log_options_since_time_nanos Non-negative fractions of a second at nanosecond resolution. Negative second values with fractions must still have non-negative nanos values that count forward in time. Must be from 0 to 999,999,999 inclusive. This field may be limited in precision depending on context. + # @option opts [BOOLEAN] :log_options_timestamps If true, add an RFC3339 or RFC3339Nano timestamp at the beginning of every line of log output. Defaults to false. +optional. + # @option opts [String] :log_options_tail_lines If set, the number of lines from the end of the logs to show. If not specified, logs are shown from the creation of the container or sinceSeconds or sinceTime. Note that when \"TailLines\" is specified, \"Stream\" can only be set to nil or \"All\". +optional. + # @option opts [String] :log_options_limit_bytes If set, the number of bytes to read from the server before terminating the log output. This may not display a complete final line of logging, and may return slightly more or slightly less than the specified limit. +optional. + # @option opts [BOOLEAN] :log_options_insecure_skip_tls_verify_backend insecureSkipTLSVerifyBackend indicates that the apiserver should not confirm the validity of the serving certificate of the backend it is connecting to. This will make the HTTPS connection between the apiserver and the backend insecure. This means the apiserver cannot verify the log data it is receiving came from the real kubelet. If the kubelet is configured to verify the apiserver's TLS credentials, it does not mean the connection to the real kubelet is vulnerable to a man in the middle attack (e.g. an attacker could not intercept the actual log data coming from the real kubelet). +optional. + # @option opts [String] :log_options_stream Specify which container log stream to return to the client. Acceptable values are \"All\", \"Stdout\" and \"Stderr\". If not specified, \"All\" is used, and both stdout and stderr are returned interleaved. Note that when \"TailLines\" is specified, \"Stream\" can only be set to nil or \"All\". +featureGate=PodLogsQuerySplitStreams +optional. + # @option opts [String] :grep + # @option opts [String] :selector + # @return [StreamResultOfIoArgoprojWorkflowV1alpha1LogEntry] + def workflow_service_workflow_logs(namespace, name, opts = {}) + data, _status_code, _headers = workflow_service_workflow_logs_with_http_info(namespace, name, opts) + data + end + + # @param namespace + # @param name + # @param [Hash] opts the optional parameters + # @option opts [String] :pod_name + # @option opts [String] :log_options_container The container for which to stream logs. Defaults to only container if there is one container in the pod. +optional. + # @option opts [BOOLEAN] :log_options_follow Follow the log stream of the pod. Defaults to false. +optional. + # @option opts [BOOLEAN] :log_options_previous Return previous terminated container logs. Defaults to false. +optional. + # @option opts [String] :log_options_since_seconds A relative time in seconds before the current time from which to show logs. If this value precedes the time a pod was started, only logs since the pod start will be returned. If this value is in the future, no logs will be returned. Only one of sinceSeconds or sinceTime may be specified. +optional. + # @option opts [String] :log_options_since_time_seconds Represents seconds of UTC time since Unix epoch 1970-01-01T00:00:00Z. Must be from 0001-01-01T00:00:00Z to 9999-12-31T23:59:59Z inclusive. + # @option opts [Integer] :log_options_since_time_nanos Non-negative fractions of a second at nanosecond resolution. Negative second values with fractions must still have non-negative nanos values that count forward in time. Must be from 0 to 999,999,999 inclusive. This field may be limited in precision depending on context. + # @option opts [BOOLEAN] :log_options_timestamps If true, add an RFC3339 or RFC3339Nano timestamp at the beginning of every line of log output. Defaults to false. +optional. + # @option opts [String] :log_options_tail_lines If set, the number of lines from the end of the logs to show. If not specified, logs are shown from the creation of the container or sinceSeconds or sinceTime. Note that when \"TailLines\" is specified, \"Stream\" can only be set to nil or \"All\". +optional. + # @option opts [String] :log_options_limit_bytes If set, the number of bytes to read from the server before terminating the log output. This may not display a complete final line of logging, and may return slightly more or slightly less than the specified limit. +optional. + # @option opts [BOOLEAN] :log_options_insecure_skip_tls_verify_backend insecureSkipTLSVerifyBackend indicates that the apiserver should not confirm the validity of the serving certificate of the backend it is connecting to. This will make the HTTPS connection between the apiserver and the backend insecure. This means the apiserver cannot verify the log data it is receiving came from the real kubelet. If the kubelet is configured to verify the apiserver's TLS credentials, it does not mean the connection to the real kubelet is vulnerable to a man in the middle attack (e.g. an attacker could not intercept the actual log data coming from the real kubelet). +optional. + # @option opts [String] :log_options_stream Specify which container log stream to return to the client. Acceptable values are \"All\", \"Stdout\" and \"Stderr\". If not specified, \"All\" is used, and both stdout and stderr are returned interleaved. Note that when \"TailLines\" is specified, \"Stream\" can only be set to nil or \"All\". +featureGate=PodLogsQuerySplitStreams +optional. + # @option opts [String] :grep + # @option opts [String] :selector + # @return [Array<(StreamResultOfIoArgoprojWorkflowV1alpha1LogEntry, Integer, Hash)>] StreamResultOfIoArgoprojWorkflowV1alpha1LogEntry data, response status code and response headers + def workflow_service_workflow_logs_with_http_info(namespace, name, opts = {}) + if @api_client.config.debugging + @api_client.config.logger.debug 'Calling API: WorkflowServiceApi.workflow_service_workflow_logs ...' + end + # verify the required parameter 'namespace' is set + if @api_client.config.client_side_validation && namespace.nil? + fail ArgumentError, "Missing the required parameter 'namespace' when calling WorkflowServiceApi.workflow_service_workflow_logs" + end + # verify the required parameter 'name' is set + if @api_client.config.client_side_validation && name.nil? + fail ArgumentError, "Missing the required parameter 'name' when calling WorkflowServiceApi.workflow_service_workflow_logs" + end + # resource path + local_var_path = '/api/v1/workflows/{namespace}/{name}/log'.sub('{' + 'namespace' + '}', namespace.to_s).sub('{' + 'name' + '}', name.to_s) + + # query parameters + query_params = opts[:query_params] || {} + query_params[:'podName'] = opts[:'pod_name'] if !opts[:'pod_name'].nil? + query_params[:'logOptions.container'] = opts[:'log_options_container'] if !opts[:'log_options_container'].nil? + query_params[:'logOptions.follow'] = opts[:'log_options_follow'] if !opts[:'log_options_follow'].nil? + query_params[:'logOptions.previous'] = opts[:'log_options_previous'] if !opts[:'log_options_previous'].nil? + query_params[:'logOptions.sinceSeconds'] = opts[:'log_options_since_seconds'] if !opts[:'log_options_since_seconds'].nil? + query_params[:'logOptions.sinceTime.seconds'] = opts[:'log_options_since_time_seconds'] if !opts[:'log_options_since_time_seconds'].nil? + query_params[:'logOptions.sinceTime.nanos'] = opts[:'log_options_since_time_nanos'] if !opts[:'log_options_since_time_nanos'].nil? + query_params[:'logOptions.timestamps'] = opts[:'log_options_timestamps'] if !opts[:'log_options_timestamps'].nil? + query_params[:'logOptions.tailLines'] = opts[:'log_options_tail_lines'] if !opts[:'log_options_tail_lines'].nil? + query_params[:'logOptions.limitBytes'] = opts[:'log_options_limit_bytes'] if !opts[:'log_options_limit_bytes'].nil? + query_params[:'logOptions.insecureSkipTLSVerifyBackend'] = opts[:'log_options_insecure_skip_tls_verify_backend'] if !opts[:'log_options_insecure_skip_tls_verify_backend'].nil? + query_params[:'logOptions.stream'] = opts[:'log_options_stream'] if !opts[:'log_options_stream'].nil? + query_params[:'grep'] = opts[:'grep'] if !opts[:'grep'].nil? + query_params[:'selector'] = opts[:'selector'] if !opts[:'selector'].nil? + + # header parameters + header_params = opts[:header_params] || {} + # HTTP header 'Accept' (if needed) + header_params['Accept'] = @api_client.select_header_accept(['application/json']) + + # form parameters + form_params = opts[:form_params] || {} + + # http body (model) + post_body = opts[:body] + + return_type = opts[:return_type] || 'StreamResultOfIoArgoprojWorkflowV1alpha1LogEntry' + + auth_names = opts[:auth_names] || ['BearerToken'] + data, status_code, headers = @api_client.call_api(:GET, local_var_path, + :header_params => header_params, + :query_params => query_params, + :form_params => form_params, + :body => post_body, + :auth_names => auth_names, + :return_type => return_type) + + if @api_client.config.debugging + @api_client.config.logger.debug "API called: WorkflowServiceApi#workflow_service_workflow_logs\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" + end + return data, status_code, headers + end + end +end diff --git a/lib/argo_workflows_api_client/api_client.rb b/lib/argo_workflows_api_client/api_client.rb new file mode 100644 index 00000000..ac8e234c --- /dev/null +++ b/lib/argo_workflows_api_client/api_client.rb @@ -0,0 +1,388 @@ +=begin +#Argo Workflows API + +#Argo Workflows is an open source container-native workflow engine for orchestrating parallel jobs on Kubernetes. For more information, please see https://argo-workflows.readthedocs.io/en/latest/ + +OpenAPI spec version: VERSION + +Generated by: https://github.com/swagger-api/swagger-codegen.git +Swagger Codegen version: 3.0.78 +=end + +require 'date' +require 'json' +require 'logger' +require 'tempfile' +require 'typhoeus' +require 'uri' + +module ArgoWorkflowsApiClient + class ApiClient + # The Configuration object holding settings to be used in the API client. + attr_accessor :config + + # Defines the headers to be used in HTTP requests of all API calls by default. + # + # @return [Hash] + attr_accessor :default_headers + + # Initializes the ApiClient + # @option config [Configuration] Configuration for initializing the object, default to Configuration.default + def initialize(config = Configuration.default) + @config = config + @user_agent = "Swagger-Codegen/#{VERSION}/ruby" + @default_headers = { + 'Content-Type' => 'application/json', + 'User-Agent' => @user_agent + } + end + + def self.default + @@default ||= ApiClient.new + end + + # Call an API with given options. + # + # @return [Array<(Object, Integer, Hash)>] an array of 3 elements: + # the data deserialized from response body (could be nil), response status code and response headers. + def call_api(http_method, path, opts = {}) + request = build_request(http_method, path, opts) + response = request.run + + if @config.debugging + @config.logger.debug "HTTP response body ~BEGIN~\n#{response.body}\n~END~\n" + end + + unless response.success? + if response.timed_out? + fail ApiError.new('Connection timed out') + elsif response.code == 0 + # Errors from libcurl will be made visible here + fail ApiError.new(:code => 0, + :message => response.return_message) + else + fail ApiError.new(:code => response.code, + :response_headers => response.headers, + :response_body => response.body), + response.status_message + end + end + + if opts[:return_type] + data = deserialize(response, opts[:return_type]) + else + data = nil + end + return data, response.code, response.headers + end + + # Builds the HTTP request + # + # @param [String] http_method HTTP method/verb (e.g. POST) + # @param [String] path URL path (e.g. /account/new) + # @option opts [Hash] :header_params Header parameters + # @option opts [Hash] :query_params Query parameters + # @option opts [Hash] :form_params Query parameters + # @option opts [Object] :body HTTP body (JSON/XML) + # @return [Typhoeus::Request] A Typhoeus Request + def build_request(http_method, path, opts = {}) + url = build_request_url(path) + http_method = http_method.to_sym.downcase + + header_params = @default_headers.merge(opts[:header_params] || {}) + query_params = opts[:query_params] || {} + form_params = opts[:form_params] || {} + + update_params_for_auth! header_params, query_params, opts[:auth_names] + + # set ssl_verifyhosts option based on @config.verify_ssl_host (true/false) + _verify_ssl_host = @config.verify_ssl_host ? 2 : 0 + + req_opts = { + :method => http_method, + :headers => header_params, + :params => query_params, + :params_encoding => @config.params_encoding, + :timeout => @config.timeout, + :ssl_verifypeer => @config.verify_ssl, + :ssl_verifyhost => _verify_ssl_host, + :sslcert => @config.cert_file, + :sslkey => @config.key_file, + :verbose => @config.debugging + } + + # set custom cert, if provided + req_opts[:cainfo] = @config.ssl_ca_cert if @config.ssl_ca_cert + + if [:post, :patch, :put, :delete].include?(http_method) + req_body = build_request_body(header_params, form_params, opts[:body]) + req_opts.update :body => req_body + if @config.debugging + @config.logger.debug "HTTP request body param ~BEGIN~\n#{req_body}\n~END~\n" + end + end + + request = Typhoeus::Request.new(url, req_opts) + download_file(request) if opts[:return_type] == 'File' + request + end + + # Builds the HTTP request body + # + # @param [Hash] header_params Header parameters + # @param [Hash] form_params Query parameters + # @param [Object] body HTTP body (JSON/XML) + # @return [String] HTTP body data in the form of string + def build_request_body(header_params, form_params, body) + # http form + if header_params['Content-Type'] == 'application/x-www-form-urlencoded' || + header_params['Content-Type'] == 'multipart/form-data' + data = {} + form_params.each do |key, value| + case value + when ::File, ::Array, nil + # let typhoeus handle File, Array and nil parameters + data[key] = value + else + data[key] = value.to_s + end + end + elsif body + data = body.is_a?(String) ? body : body.to_json + else + data = nil + end + data + end + + # Check if the given MIME is a JSON MIME. + # JSON MIME examples: + # application/json + # application/json; charset=UTF8 + # APPLICATION/JSON + # */* + # @param [String] mime MIME + # @return [Boolean] True if the MIME is application/json + def json_mime?(mime) + (mime == '*/*') || !(mime =~ /Application\/.*json(?!p)(;.*)?/i).nil? + end + + # Deserialize the response to the given return type. + # + # @param [Response] response HTTP response + # @param [String] return_type some examples: "User", "Array", "Hash" + def deserialize(response, return_type) + body = response.body + + # handle file downloading - return the File instance processed in request callbacks + # note that response body is empty when the file is written in chunks in request on_body callback + return @tempfile if return_type == 'File' + + return nil if body.nil? || body.empty? + + # return response body directly for String return type + return body if return_type == 'String' + + # ensuring a default content type + content_type = response.headers['Content-Type'] || 'application/json' + + fail "Content-Type is not supported: #{content_type}" unless json_mime?(content_type) + + begin + data = JSON.parse("[#{body}]", :symbolize_names => true)[0] + rescue JSON::ParserError => e + if %w(String Date DateTime).include?(return_type) + data = body + else + raise e + end + end + + convert_to_type data, return_type + end + + # Convert data to the given return type. + # @param [Object] data Data to be converted + # @param [String] return_type Return type + # @return [Mixed] Data in a particular type + def convert_to_type(data, return_type) + return nil if data.nil? + case return_type + when 'String' + data.to_s + when 'Integer' + data.to_i + when 'Float' + data.to_f + when 'Boolean' + data == true + when 'DateTime' + # parse date time (expecting ISO 8601 format) + DateTime.parse data + when 'Date' + # parse date time (expecting ISO 8601 format) + Date.parse data + when 'Object' + # generic object (usually a Hash), return directly + data + when /\AArray<(.+)>\z/ + # e.g. Array + sub_type = $1 + data.map { |item| convert_to_type(item, sub_type) } + when /\AHash\\z/ + # e.g. Hash + sub_type = $1 + {}.tap do |hash| + data.each { |k, v| hash[k] = convert_to_type(v, sub_type) } + end + else + # models, e.g. Pet + ArgoWorkflowsApiClient.const_get(return_type).build_from_hash(data) + end + end + + # Save response body into a file in (the defined) temporary folder, using the filename + # from the "Content-Disposition" header if provided, otherwise a random filename. + # The response body is written to the file in chunks in order to handle files which + # size is larger than maximum Ruby String or even larger than the maximum memory a Ruby + # process can use. + # + # @see Configuration#temp_folder_path + def download_file(request) + tempfile = nil + encoding = nil + request.on_headers do |response| + content_disposition = response.headers['Content-Disposition'] + if content_disposition && content_disposition =~ /filename=/i + filename = content_disposition[/filename=['"]?([^'"\s]+)['"]?/, 1] + prefix = sanitize_filename(filename) + else + prefix = 'download-' + end + prefix = prefix + '-' unless prefix.end_with?('-') + encoding = response.body.encoding + tempfile = Tempfile.open(prefix, @config.temp_folder_path, encoding: encoding) + @tempfile = tempfile + end + request.on_body do |chunk| + chunk.force_encoding(encoding) + tempfile.write(chunk) + end + request.on_complete do |response| + if tempfile + tempfile.close + @config.logger.info "Temp file written to #{tempfile.path}, please copy the file to a proper folder "\ + "with e.g. `FileUtils.cp(tempfile.path, '/new/file/path')` otherwise the temp file "\ + "will be deleted automatically with GC. It's also recommended to delete the temp file "\ + "explicitly with `tempfile.delete`" + end + end + end + + # Sanitize filename by removing path. + # e.g. ../../sun.gif becomes sun.gif + # + # @param [String] filename the filename to be sanitized + # @return [String] the sanitized filename + def sanitize_filename(filename) + filename.gsub(/.*[\/\\]/, '') + end + + def build_request_url(path) + # Add leading and trailing slashes to path + path = "/#{path}".gsub(/\/+/, '/') + @config.base_url + path + end + + # Update hearder and query params based on authentication settings. + # + # @param [Hash] header_params Header parameters + # @param [Hash] query_params Query parameters + # @param [String] auth_names Authentication scheme name + def update_params_for_auth!(header_params, query_params, auth_names) + Array(auth_names).each do |auth_name| + auth_setting = @config.auth_settings[auth_name] + next unless auth_setting + case auth_setting[:in] + when 'header' then header_params[auth_setting[:key]] = auth_setting[:value] + when 'query' then query_params[auth_setting[:key]] = auth_setting[:value] + else fail ArgumentError, 'Authentication token must be in `query` of `header`' + end + end + end + + # Sets user agent in HTTP header + # + # @param [String] user_agent User agent (e.g. swagger-codegen/ruby/1.0.0) + def user_agent=(user_agent) + @user_agent = user_agent + @default_headers['User-Agent'] = @user_agent + end + + # Return Accept header based on an array of accepts provided. + # @param [Array] accepts array for Accept + # @return [String] the Accept header (e.g. application/json) + def select_header_accept(accepts) + return nil if accepts.nil? || accepts.empty? + # use JSON when present, otherwise use all of the provided + json_accept = accepts.find { |s| json_mime?(s) } + json_accept || accepts.join(',') + end + + # Return Content-Type header based on an array of content types provided. + # @param [Array] content_types array for Content-Type + # @return [String] the Content-Type header (e.g. application/json) + def select_header_content_type(content_types) + # use application/json by default + return 'application/json' if content_types.nil? || content_types.empty? + # use JSON when present, otherwise use the first one + json_content_type = content_types.find { |s| json_mime?(s) } + json_content_type || content_types.first + end + + # Convert object (array, hash, object, etc) to JSON string. + # @param [Object] model object to be converted into JSON string + # @return [String] JSON string representation of the object + def object_to_http_body(model) + return model if model.nil? || model.is_a?(String) + local_body = nil + if model.is_a?(Array) + local_body = model.map { |m| object_to_hash(m) } + else + local_body = object_to_hash(model) + end + local_body.to_json + end + + # Convert object(non-array) to hash. + # @param [Object] obj object to be converted into JSON string + # @return [String] JSON string representation of the object + def object_to_hash(obj) + if obj.respond_to?(:to_hash) + obj.to_hash + else + obj + end + end + + # Build parameter value according to the given collection format. + # @param [String] collection_format one of :csv, :ssv, :tsv, :pipes and :multi + def build_collection_param(param, collection_format) + case collection_format + when :csv + param.join(',') + when :ssv + param.join(' ') + when :tsv + param.join("\t") + when :pipes + param.join('|') + when :multi + # return the array directly as typhoeus will handle it as expected + param + else + fail "unknown collection format: #{collection_format.inspect}" + end + end + end +end diff --git a/lib/argo_workflows_api_client/api_error.rb b/lib/argo_workflows_api_client/api_error.rb new file mode 100644 index 00000000..e164bbfe --- /dev/null +++ b/lib/argo_workflows_api_client/api_error.rb @@ -0,0 +1,57 @@ +=begin +#Argo Workflows API + +#Argo Workflows is an open source container-native workflow engine for orchestrating parallel jobs on Kubernetes. For more information, please see https://argo-workflows.readthedocs.io/en/latest/ + +OpenAPI spec version: VERSION + +Generated by: https://github.com/swagger-api/swagger-codegen.git +Swagger Codegen version: 3.0.78 +=end + +module ArgoWorkflowsApiClient + class ApiError < StandardError + attr_reader :code, :response_headers, :response_body + + # Usage examples: + # ApiError.new + # ApiError.new("message") + # ApiError.new(:code => 500, :response_headers => {}, :response_body => "") + # ApiError.new(:code => 404, :message => "Not Found") + def initialize(arg = nil) + if arg.is_a? Hash + if arg.key?(:message) || arg.key?('message') + super(arg[:message] || arg['message']) + else + super arg + end + + arg.each do |k, v| + instance_variable_set "@#{k}", v + end + else + super arg + end + end + + # Override to_s to display a friendly error message + def to_s + message + end + + def message + if @message.nil? + msg = "Error message: the server returns an error" + else + msg = @message + end + + msg += "\nHTTP status code: #{code}" if code + msg += "\nResponse headers: #{response_headers}" if response_headers + msg += "\nResponse body: #{response_body}" if response_body + + msg + end + + end +end diff --git a/lib/argo_workflows_api_client/configuration.rb b/lib/argo_workflows_api_client/configuration.rb new file mode 100644 index 00000000..358245fc --- /dev/null +++ b/lib/argo_workflows_api_client/configuration.rb @@ -0,0 +1,205 @@ +=begin +#Argo Workflows API + +#Argo Workflows is an open source container-native workflow engine for orchestrating parallel jobs on Kubernetes. For more information, please see https://argo-workflows.readthedocs.io/en/latest/ + +OpenAPI spec version: VERSION + +Generated by: https://github.com/swagger-api/swagger-codegen.git +Swagger Codegen version: 3.0.78 +=end + +module ArgoWorkflowsApiClient + class Configuration + # Defines url scheme + attr_accessor :scheme + + # Defines url host + attr_accessor :host + + # Defines url base path + attr_accessor :base_path + + # Defines API keys used with API Key authentications. + # + # @return [Hash] key: parameter name, value: parameter value (API key) + # + # @example parameter name is "api_key", API key is "xxx" (e.g. "api_key=xxx" in query string) + # config.api_key['api_key'] = 'xxx' + attr_accessor :api_key + + # Defines API key prefixes used with API Key authentications. + # + # @return [Hash] key: parameter name, value: API key prefix + # + # @example parameter name is "Authorization", API key prefix is "Token" (e.g. "Authorization: Token xxx" in headers) + # config.api_key_prefix['api_key'] = 'Token' + attr_accessor :api_key_prefix + + # Defines the username used with HTTP basic authentication. + # + # @return [String] + attr_accessor :username + + # Defines the password used with HTTP basic authentication. + # + # @return [String] + attr_accessor :password + + # Defines the access token (Bearer) used with OAuth2. + attr_accessor :access_token + + # Set this to enable/disable debugging. When enabled (set to true), HTTP request/response + # details will be logged with `logger.debug` (see the `logger` attribute). + # Default to false. + # + # @return [true, false] + attr_accessor :debugging + + # Defines the logger used for debugging. + # Default to `Rails.logger` (when in Rails) or logging to STDOUT. + # + # @return [#debug] + attr_accessor :logger + + # Defines the temporary folder to store downloaded files + # (for API endpoints that have file response). + # Default to use `Tempfile`. + # + # @return [String] + attr_accessor :temp_folder_path + + # The time limit for HTTP request in seconds. + # Default to 0 (never times out). + attr_accessor :timeout + + # Set this to false to skip client side validation in the operation. + # Default to true. + # @return [true, false] + attr_accessor :client_side_validation + + ### TLS/SSL setting + # Set this to false to skip verifying SSL certificate when calling API from https server. + # Default to true. + # + # @note Do NOT set it to false in production code, otherwise you would face multiple types of cryptographic attacks. + # + # @return [true, false] + attr_accessor :verify_ssl + + ### TLS/SSL setting + # Set this to false to skip verifying SSL host name + # Default to true. + # + # @note Do NOT set it to false in production code, otherwise you would face multiple types of cryptographic attacks. + # + # @return [true, false] + attr_accessor :verify_ssl_host + + ### TLS/SSL setting + # Set this to customize the certificate file to verify the peer. + # + # @return [String] the path to the certificate file + # + # @see The `cainfo` option of Typhoeus, `--cert` option of libcurl. Related source code: + # https://github.com/typhoeus/typhoeus/blob/master/lib/typhoeus/easy_factory.rb#L145 + attr_accessor :ssl_ca_cert + + ### TLS/SSL setting + # Client certificate file (for client certificate) + attr_accessor :cert_file + + ### TLS/SSL setting + # Client private key file (for client certificate) + attr_accessor :key_file + + # Set this to customize parameters encoding of array parameter with multi collectionFormat. + # Default to nil. + # + # @see The params_encoding option of Ethon. Related source code: + # https://github.com/typhoeus/ethon/blob/master/lib/ethon/easy/queryable.rb#L96 + attr_accessor :params_encoding + + attr_accessor :inject_format + + attr_accessor :force_ending_format + + def initialize + @scheme = 'http' + @host = 'localhost' + @base_path = 'http://localhost:2746/' + @api_key = {} + @api_key_prefix = {} + @timeout = 0 + @client_side_validation = true + @verify_ssl = true + @verify_ssl_host = true + @params_encoding = nil + @cert_file = nil + @key_file = nil + @debugging = false + @inject_format = false + @force_ending_format = false + @logger = defined?(Rails) ? Rails.logger : Logger.new(STDOUT) + + yield(self) if block_given? + end + + # The default Configuration object. + def self.default + @@default ||= Configuration.new + end + + def configure + yield(self) if block_given? + end + + def scheme=(scheme) + # remove :// from scheme + @scheme = scheme.sub(/:\/\//, '') + end + + def host=(host) + # remove http(s):// and anything after a slash + @host = host.sub(/https?:\/\//, '').split('/').first + end + + def base_path=(base_path) + # Add leading and trailing slashes to base_path + @base_path = "/#{base_path}".gsub(/\/+/, '/') + @base_path = '' if @base_path == '/' + end + + def base_url + "#{scheme}://#{[host, base_path].join('/').gsub(/\/+/, '/')}".sub(/\/+\z/, '') + end + + # Gets API key (with prefix if set). + # @param [String] param_name the parameter name of API key auth + def api_key_with_prefix(param_name) + if @api_key_prefix[param_name] + "#{@api_key_prefix[param_name]} #{@api_key[param_name]}" + else + @api_key[param_name] + end + end + + # Gets Basic Auth token string + def basic_auth_token + 'Basic ' + ["#{username}:#{password}"].pack('m').delete("\r\n") + end + + # Returns Auth Settings hash for api client. + def auth_settings + { + 'BearerToken' => + { + type: 'api_key', + in: 'header', + key: 'Authorization', + value: api_key_with_prefix('Authorization') + }, + } + end + end +end diff --git a/lib/argo_workflows_api_client/version.rb b/lib/argo_workflows_api_client/version.rb new file mode 100644 index 00000000..8d1b7057 --- /dev/null +++ b/lib/argo_workflows_api_client/version.rb @@ -0,0 +1,14 @@ +=begin +#Argo Workflows API + +#Argo Workflows is an open source container-native workflow engine for orchestrating parallel jobs on Kubernetes. For more information, please see https://argo-workflows.readthedocs.io/en/latest/ + +OpenAPI spec version: VERSION + +Generated by: https://github.com/swagger-api/swagger-codegen.git +Swagger Codegen version: 3.0.78 +=end + +module ArgoWorkflowsApiClient + VERSION = '1.0.0' +end diff --git a/lib/swagger_docs/models.rb b/lib/swagger_docs/models.rb index 72b91296..e9d647d4 100644 --- a/lib/swagger_docs/models.rb +++ b/lib/swagger_docs/models.rb @@ -381,6 +381,14 @@ module Models # rubocop:todo Metrics/ModuleLength, Style/Documentation property :url, type: :string, description: 'S3 URL (when finished)' + + property :zip_files do + key :type, :array + key :description, 'ZIP files produced by the workflow when finished' + items do + key :type, :string + end + end end swagger_schema :RetrieveDescriptionSets do diff --git a/scripts/generate_argo_workflows_client.sh b/scripts/generate_argo_workflows_client.sh new file mode 100755 index 00000000..a396f81a --- /dev/null +++ b/scripts/generate_argo_workflows_client.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +SPEC_URL="${ARGO_WORKFLOWS_SWAGGER_URL:-https://raw.githubusercontent.com/argoproj/argo-workflows/main/api/openapi-spec/swagger.json}" +IMAGE="${SWAGGER_CODEGEN_IMAGE:-swaggerapi/swagger-codegen-cli-v3}" +MODULE_NAME="ArgoWorkflowsApiClient" +GEM_NAME="argo_workflows_api_client" + +TMP_DIR="$(mktemp -d)" +cleanup() { + rm -rf "$TMP_DIR" +} +trap cleanup EXIT + +require_cmd() { + local cmd="$1" + if ! command -v "$cmd" >/dev/null 2>&1; then + echo "Missing required command: $cmd" >&2 + exit 1 + fi +} + +require_cmd curl +require_cmd docker + +echo "Fetching Argo Workflows Swagger spec from $SPEC_URL" +curl -fsSL "$SPEC_URL" -o "$TMP_DIR/swagger.json" + +echo "Generating Ruby client with $IMAGE" +docker run --rm \ + -v "$TMP_DIR:/local" \ + "$IMAGE" generate \ + -i /local/swagger.json \ + -l ruby \ + -o /local/out \ + -D "moduleName=$MODULE_NAME,gemName=$GEM_NAME" >/dev/null + +DEST_DIR="$ROOT_DIR/lib/argo_workflows_api_client" +API_DIR="$DEST_DIR/api" + +mkdir -p "$API_DIR" + +cp "$TMP_DIR/out/lib/$GEM_NAME/api_client.rb" "$DEST_DIR/api_client.rb" +cp "$TMP_DIR/out/lib/$GEM_NAME/api_error.rb" "$DEST_DIR/api_error.rb" +cp "$TMP_DIR/out/lib/$GEM_NAME/configuration.rb" "$DEST_DIR/configuration.rb" +cp "$TMP_DIR/out/lib/$GEM_NAME/version.rb" "$DEST_DIR/version.rb" +cp "$TMP_DIR/out/lib/$GEM_NAME/api/workflow_service_api.rb" "$API_DIR/workflow_service_api.rb" + +cat > "$ROOT_DIR/lib/argo_workflows_api_client.rb" <<'RUBY' +require 'argo_workflows_api_client/api_client' +require 'argo_workflows_api_client/api_error' +require 'argo_workflows_api_client/version' +require 'argo_workflows_api_client/configuration' +require 'argo_workflows_api_client/api/workflow_service_api' +RUBY + +echo "Updated vendored Argo Workflows client in $DEST_DIR" diff --git a/spec/api/v1/envelopes_spec.rb b/spec/api/v1/envelopes_spec.rb index b77d5ab5..b5555069 100644 --- a/spec/api/v1/envelopes_spec.rb +++ b/spec/api/v1/envelopes_spec.rb @@ -198,10 +198,13 @@ internal_error_message:, started_at:, status:, + zip_files:, url: ) end + let(:zip_files) { [] } + # rubocop:todo RSpec/MultipleMemoizedHelpers # rubocop:todo RSpec/NestedGroups context 'in progress' do # rubocop:todo RSpec/ContextWording, RSpec/MultipleMemoizedHelpers, RSpec/NestedGroups @@ -225,14 +228,16 @@ let(:internal_error_message) { Faker::Lorem.sentence } let(:status) { :finished } let(:url) { Faker::Internet.url } + let(:zip_files) { [url] } it 'returns `failed`' do expect { perform_request }.not_to change(EnvelopeDownload, :count) expect_status(:ok) - expect_json_sizes(3) + expect_json_sizes(4) expect_json('finished_at', envelope_download.finished_at.as_json) expect_json('status', 'failed') expect_json('url', url) + expect_json('zip_files', zip_files) end end # rubocop:enable RSpec/MultipleMemoizedHelpers @@ -244,14 +249,16 @@ let(:finished_at) { Time.current } let(:status) { :finished } let(:url) { Faker::Internet.url } + let(:zip_files) { [url, "#{url}/second.zip"] } it 'returns `finished` and URL' do expect { perform_request }.not_to change(EnvelopeDownload, :count) expect_status(:ok) - expect_json_sizes(3) + expect_json_sizes(4) expect_json('finished_at', envelope_download.finished_at.as_json) expect_json('status', 'finished') expect_json('url', url) + expect_json('zip_files', zip_files) end end # rubocop:enable RSpec/MultipleMemoizedHelpers diff --git a/spec/factories/envelope_downloads.rb b/spec/factories/envelope_downloads.rb index caf0a418..5789d505 100644 --- a/spec/factories/envelope_downloads.rb +++ b/spec/factories/envelope_downloads.rb @@ -1,6 +1,7 @@ FactoryBot.define do factory :envelope_download do enqueued_at { Time.current.change(usec: 0) } + zip_files { [] } # rubocop:todo FactoryBot/FactoryAssociationWithStrategy envelope_community { create(:envelope_community, :with_random_name) } # rubocop:enable FactoryBot/FactoryAssociationWithStrategy diff --git a/spec/jobs/download_envelopes_job_spec.rb b/spec/jobs/download_envelopes_job_spec.rb index 726c4bd1..5465e912 100644 --- a/spec/jobs/download_envelopes_job_spec.rb +++ b/spec/jobs/download_envelopes_job_spec.rb @@ -5,8 +5,8 @@ describe '#perform' do context 'without error' do - it 'calls DownloadEnvelopes' do - allow(DownloadEnvelopes).to receive(:call).with(envelope_download:) + it 'submits the Argo workflow' do + allow(SubmitEnvelopeDownloadWorkflow).to receive(:call).with(envelope_download:) described_class.new.perform(envelope_download.id) end end @@ -18,7 +18,7 @@ allow(Airbrake).to receive(:notify) .with(error, envelope_download_id: envelope_download.id) - allow(DownloadEnvelopes).to receive(:call) + allow(SubmitEnvelopeDownloadWorkflow).to receive(:call) .with(envelope_download:) .and_raise(error) diff --git a/spec/services/download_envelopes_spec.rb b/spec/services/download_envelopes_spec.rb index 5ec381d6..0f5a62ae 100644 --- a/spec/services/download_envelopes_spec.rb +++ b/spec/services/download_envelopes_spec.rb @@ -1,3 +1,5 @@ +require 'download_envelopes' + RSpec.describe DownloadEnvelopes do # rubocop:todo RSpec/MultipleMemoizedHelpers let(:bucket) { double('bucket') } # rubocop:todo RSpec/VerifiedDoubles let(:bucket_name) { 'envelope-downloads-bucket-test' } diff --git a/spec/services/submit_envelope_download_workflow_spec.rb b/spec/services/submit_envelope_download_workflow_spec.rb new file mode 100644 index 00000000..e6bc384e --- /dev/null +++ b/spec/services/submit_envelope_download_workflow_spec.rb @@ -0,0 +1,57 @@ +require 'spec_helper' + +RSpec.describe SubmitEnvelopeDownloadWorkflow do + let(:client) { instance_double(ArgoWorkflowsClient, namespace: 'credreg-staging') } + let(:community) { EnvelopeCommunity.find_or_create_by!(name: 'ce_registry') } + let(:envelope_download) { create(:envelope_download, envelope_community: community) } + let(:workflow) { { metadata: { name: 'ce-registry-download-abc123' } } } + let(:now) { Time.zone.parse('2026-03-06 12:00:00 UTC') } + + before do + allow(ArgoWorkflowsClient).to receive(:new).and_return(client) + allow(ENV).to receive(:fetch).and_call_original + allow(ENV).to receive(:fetch).with('ARGO_WORKFLOWS_TEMPLATE_NAME').and_return('s3-graphs-zip') + allow(ENV).to receive(:fetch).with('ARGO_WORKFLOWS_TASK_IMAGE').and_return('registry:s3-graphs-zip') + allow(ENV).to receive(:fetch).with('ARGO_WORKFLOWS_BATCH_SIZE', '1000').and_return('1000') + allow(ENV).to receive(:fetch) + .with('ARGO_WORKFLOWS_MAX_UNCOMPRESSED_ZIP_SIZE_BYTES', '209715200') + .and_return('209715200') + allow(ENV).to receive(:fetch).with('ARGO_WORKFLOWS_MAX_WORKERS', '4').and_return('4') + allow(ENV).to receive(:fetch).with('AWS_REGION').and_return('us-east-1') + allow(ENV).to receive(:fetch).with('ENVELOPE_DOWNLOADS_BUCKET').and_return('downloads-bucket') + allow(ENV).to receive(:fetch).with('ENVELOPE_GRAPHS_BUCKET').and_return('graphs-bucket') + end + + it 'submits the workflow and marks the download in progress' do + allow(client).to receive(:submit_workflow) + .with( + template_name: 's3-graphs-zip', + generate_name: 'ce-registry-download-', + parameters: { + 'batch-size' => '1000', + 'aws-region' => 'us-east-1', + 'destination-bucket' => 'downloads-bucket', + 'destination-prefix' => "ce_registry/downloads/#{envelope_download.id}", + 'environment' => MR.env, + 'max-uncompressed-zip-size-bytes' => '209715200', + 'max-workers' => '4', + 'source-bucket' => 'graphs-bucket', + 'source-prefix' => 'ce_registry', + 'task-image' => 'registry:s3-graphs-zip' + } + ).and_return(workflow) + + travel_to now do + described_class.call(envelope_download:) + end + + envelope_download.reload + expect(envelope_download.status).to eq('in_progress') + expect(envelope_download.started_at).to eq(now) + expect(envelope_download.finished_at).to be_nil + expect(envelope_download.internal_error_message).to be_nil + expect(envelope_download.argo_workflow_name).to eq('ce-registry-download-abc123') + expect(envelope_download.argo_workflow_namespace).to eq('credreg-staging') + expect(envelope_download.zip_files).to eq([]) + end +end diff --git a/spec/services/sync_envelope_download_workflow_status_spec.rb b/spec/services/sync_envelope_download_workflow_status_spec.rb new file mode 100644 index 00000000..a48496a3 --- /dev/null +++ b/spec/services/sync_envelope_download_workflow_status_spec.rb @@ -0,0 +1,107 @@ +require 'spec_helper' + +RSpec.describe SyncEnvelopeDownloadWorkflowStatus do + let(:client) { instance_double(ArgoWorkflowsClient) } + let(:community) { EnvelopeCommunity.find_or_create_by!(name: 'ce_registry') } + let(:envelope_download) do + create( + :envelope_download, + :in_progress, + envelope_community: community, + argo_workflow_name: 'ce-registry-download-abc123', + argo_workflow_namespace: 'credreg-staging' + ) + end + let(:s3_client) { instance_double(Aws::S3::Client) } + let(:s3_resource) { instance_double(Aws::S3::Resource) } + let(:bucket) { instance_double(Aws::S3::Bucket) } + let(:object) { instance_double(Aws::S3::Object, public_url: 'https://downloads.example/batch-00001.zip') } + + before do + allow(ArgoWorkflowsClient).to receive(:new).and_return(client) + allow(ENV).to receive(:fetch).and_call_original + allow(ENV).to receive(:fetch).with('AWS_REGION').and_return('us-east-1') + allow(ENV).to receive(:fetch).with('ENVELOPE_DOWNLOADS_BUCKET').and_return('downloads-bucket') + end + + context 'when the workflow succeeds' do + before do + allow(client).to receive(:get_workflow).with(name: 'ce-registry-download-abc123').and_return( + status: { + phase: 'Succeeded', + finishedAt: '2026-03-06T12:10:00Z', + outputs: { + parameters: [ + { + name: 'zip-manifest', + value: { + batch_count: 2, + destination_bucket: 'downloads-bucket', + destination_prefix: "ce_registry/downloads/#{envelope_download.id}", + total_files: 12, + total_input_bytes: 123_456, + zip_files: [ + "ce_registry/downloads/#{envelope_download.id}/batch-00001.zip", + "ce_registry/downloads/#{envelope_download.id}/batch-00002.zip", + ], + zip_size_bytes: 45_678, + }.to_json + }, + ] + } + } + ) + + allow(Aws::S3::Client).to receive(:new).with(region: 'us-east-1').and_return(s3_client) + allow(s3_client).to receive(:head_object).with( + bucket: 'downloads-bucket', + key: "ce_registry/downloads/#{envelope_download.id}/batch-00001.zip" + ).and_return(true) + + allow(Aws::S3::Resource).to receive(:new).with(region: 'us-east-1').and_return(s3_resource) + allow(s3_resource).to receive(:bucket).with('downloads-bucket').and_return(bucket) + allow(bucket).to receive(:object) + .with("ce_registry/downloads/#{envelope_download.id}/batch-00001.zip") + .and_return(object) + end + + it 'stores the download URL and marks the download finished' do + described_class.call(envelope_download:) + + envelope_download.reload + expect(envelope_download.status).to eq('finished') + expect(envelope_download.url).to eq('https://downloads.example/batch-00001.zip') + expect(envelope_download.zip_files).to eq( + [ + "ce_registry/downloads/#{envelope_download.id}/batch-00001.zip", + "ce_registry/downloads/#{envelope_download.id}/batch-00002.zip", + ] + ) + expect(envelope_download.internal_error_message).to be_nil + expect(envelope_download.finished_at).to eq(Time.zone.parse('2026-03-06T12:10:00Z')) + end + end + + context 'when the workflow fails' do + before do + allow(client).to receive(:get_workflow).with(name: 'ce-registry-download-abc123').and_return( + status: { + phase: 'Failed', + finishedAt: '2026-03-06T12:10:00Z', + message: 'zip task failed' + } + ) + end + + it 'marks the download failed' do + described_class.call(envelope_download:) + + envelope_download.reload + expect(envelope_download.status).to eq('finished') + expect(envelope_download.url).to be_nil + expect(envelope_download.zip_files).to eq([]) + expect(envelope_download.internal_error_message).to eq('zip task failed') + expect(envelope_download.finished_at).to eq(Time.zone.parse('2026-03-06T12:10:00Z')) + end + end +end diff --git a/workflows/tasks/s3-graphs-zip/.dockerignore b/workflows/tasks/s3-graphs-zip/.dockerignore new file mode 100644 index 00000000..15c2a746 --- /dev/null +++ b/workflows/tasks/s3-graphs-zip/.dockerignore @@ -0,0 +1,4 @@ +.venv +__pycache__ +.pytest_cache +*.pyc diff --git a/workflows/tasks/s3-graphs-zip/.gitignore b/workflows/tasks/s3-graphs-zip/.gitignore new file mode 100644 index 00000000..abf53dc9 --- /dev/null +++ b/workflows/tasks/s3-graphs-zip/.gitignore @@ -0,0 +1,35 @@ +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +*.so +.python-version + +# Virtual environments +.venv/ +venv/ + +# Tooling caches +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ +.coverage +htmlcov/ + +# Build artifacts +build/ +dist/ +*.egg-info/ + +# Local env/config +.env +.env.* + +# OS/editor files +.DS_Store +.vscode/ +.idea/ + +# LocalStack / Docker overrides +localstack-data/ diff --git a/workflows/tasks/s3-graphs-zip/Dockerfile b/workflows/tasks/s3-graphs-zip/Dockerfile new file mode 100644 index 00000000..0d7e26c8 --- /dev/null +++ b/workflows/tasks/s3-graphs-zip/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.13-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN pip install --no-cache-dir boto3 + +COPY main.py /app/main.py + +ENTRYPOINT ["python", "/app/main.py"] +CMD ["--help"] diff --git a/workflows/tasks/s3-graphs-zip/README.md b/workflows/tasks/s3-graphs-zip/README.md new file mode 100644 index 00000000..f0353a9f --- /dev/null +++ b/workflows/tasks/s3-graphs-zip/README.md @@ -0,0 +1,224 @@ +# s3-graphs-zip + +Streams CE graph `.json` objects from one S3 bucket into ZIP batches and uploads +the resulting archives to another bucket. + +## What it does + +- lists JSON files from source bucket/prefix +- groups them into batches based on target ZIP size +- processes batches in parallel +- streams each object directly into a ZIP archive +- streams the ZIP output directly back to S3 +- uses multipart upload automatically for larger archives + +## Requirements + +- Python 3.13+ + +## Local install + +For local testing: + +```bash +uv sync --dev +``` + +## Run locally + +Make sure AWS credentials are available in the environment or via your usual AWS +configuration files. For LocalStack or other S3-compatible endpoints, any dummy +credentials accepted by that service are sufficient. + +```bash +python main.py \ + --source-bucket source-bucket \ + --source-prefix graphs/ \ + --destination-bucket destination-bucket \ + --destination-prefix zipped/ \ + --max-uncompressed-zip-size-bytes 209715200 \ + --max-workers 4 \ + --max-input-files 500 +``` + +## Parameters + +Required parameters: + +- `--source-bucket` + Source S3 bucket that contains the input graph objects. +- `--destination-bucket` + Destination S3 bucket where the generated ZIP archives will be uploaded. + +Optional parameters: + +- `--source-prefix` + Prefix inside the source bucket to scan for input files. Only keys ending in + `.json` under this prefix are included. Default: empty prefix. +- `--destination-prefix` + Prefix inside the destination bucket where ZIP files are written. The task + writes `batch-00001.zip`, `batch-00002.zip`, and so on under this prefix. + Default: empty prefix. +- `--max-uncompressed-zip-size-bytes` + Target maximum total input size per ZIP batch, measured using the source + object sizes reported by S3. This is the primary batching control and is an + estimate of final ZIP size, not an exact compressed-size guarantee. A value of + `209715200` targets about 200 MiB of uncompressed input per ZIP. Default: + `209715200`. +- `--batch-size` + Optional maximum number of input `.json` files allowed in a single ZIP batch. + This acts as a safety cap on top of `--max-uncompressed-zip-size-bytes` for + cases where many tiny files would otherwise end up in one archive. Default: + `1000`. +- `--max-workers` + Number of batches to process concurrently. Each worker streams one ZIP archive + to S3 at a time. Default: `4`. +- `--max-input-files` + Optional cap on how many input `.json` files are processed in a run. Useful + for test runs, incremental validation, or limiting blast radius while tuning. + Default: no limit. +- `--region` + AWS region for the S3 client. If omitted, boto3 falls back to standard AWS + region resolution from the environment or AWS config files. +- `--endpoint-url` + Custom S3 endpoint URL for LocalStack or another S3-compatible service. +- `--read-chunk-size` + Number of bytes to read from each source object per streaming read. Increase + it to reduce request overhead; decrease it to lower per-stream memory usage. + Default: `1048576` (1 MiB). +- `--part-size` + Multipart upload part size in bytes for streaming ZIP uploads to S3. Must be + at least `5242880` (5 MiB), which is the S3 multipart minimum. Default: + `8388608` (8 MiB). +- `--manifest-path` + Optional filesystem path where the task writes a JSON manifest describing the + run output. This is used by Argo to capture the produced ZIP file list as a + workflow output parameter. Default: no manifest file is written. + +## Completion webhook + +If `WEBHOOK_URL` is set, the CLI sends a `POST` request to that URL when processing +finishes. This happens for both successful and failed runs. `ENVIRONMENT` controls +the label in the message and defaults to `staging`. + +Example: + +```bash +export WEBHOOK_URL="https://example.com/webhooks/s3-graphs-zip" +export ENVIRONMENT="staging" + +python main.py \ + --source-bucket source-bucket \ + --source-prefix graphs/ \ + --destination-bucket destination-bucket \ + --destination-prefix zipped/run-123 +``` + +The request body is JSON: + +```json +{"text": "..."} +``` + +## Output manifest + +If `--manifest-path` is provided, the task writes a JSON document containing the +uploaded ZIP keys and summary metadata. + +Example: + +```json +{ + "batch_count": 2, + "destination_bucket": "destination-bucket", + "destination_prefix": "zipped/run-123", + "total_files": 12, + "total_input_bytes": 73400320, + "zip_files": [ + "zipped/run-123/batch-00001.zip", + "zipped/run-123/batch-00002.zip" + ], + "zip_size_bytes": 18350080 +} +``` + +The task also prints this same manifest to stdout when processing completes. + +## Destination prefix strategy + +The destination bucket is expected to already exist. + +Use `--destination-prefix` as a run-specific output directory so each execution writes +into its own prefix instead of reusing previous batch keys. + +Example: + +```bash +python main.py \ + --source-bucket source-bucket \ + --source-prefix graphs/ \ + --destination-bucket destination-bucket \ + --destination-prefix "zipped/2026-03-06T14-22-10Z" \ + --max-uncompressed-zip-size-bytes 209715200 \ + --max-workers 4 +``` + +This produces objects like: + +- `zipped/2026-03-06T14-22-10Z/batch-00001.zip` +- `zipped/2026-03-06T14-22-10Z/batch-00002.zip` + +If you reuse the same destination prefix, objects with the same batch key will be overwritten. + +## Test + +Unit tests: + +```bash +uv run pytest -q +``` + +Integration tests against LocalStack: + +```bash +docker compose up --build tests +``` + +The Compose setup starts LocalStack and configures the test container with the +required endpoint and dummy AWS credentials automatically. + +## Docker + +Build: + +```bash +docker build -t s3-graphs-zip . +``` + +Run: + +```bash +docker run --rm s3-graphs-zip --help +``` + +Example: + +```bash +docker run --rm \ + -e AWS_ACCESS_KEY_ID=test \ + -e AWS_SECRET_ACCESS_KEY=test \ + -e AWS_DEFAULT_REGION=us-east-1 \ + s3-graphs-zip \ + --source-bucket source-bucket \ + --source-prefix graphs/ \ + --destination-bucket destination-bucket \ + --destination-prefix zipped/ +``` + +For LocalStack or another S3-compatible endpoint, also pass `--endpoint-url`. + +## Notes + +- Only keys ending in `.json` are included. +- Files inside each ZIP are stored relative to `--source-prefix`. +- Output archives are named `batch-00001.zip`, `batch-00002.zip`, etc. diff --git a/workflows/tasks/s3-graphs-zip/docker-compose.yml b/workflows/tasks/s3-graphs-zip/docker-compose.yml new file mode 100644 index 00000000..1fb89bd7 --- /dev/null +++ b/workflows/tasks/s3-graphs-zip/docker-compose.yml @@ -0,0 +1,35 @@ +services: + localstack: + image: localstack/localstack:4.1 + ports: + - "4566:4566" + environment: + SERVICES: s3 + DEBUG: 1 + AWS_DEFAULT_REGION: us-east-1 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:4566/_localstack/health"] + interval: 5s + timeout: 3s + retries: 20 + volumes: + - localstack-data:/var/lib/localstack + + tests: + image: python:3.13-slim + depends_on: + localstack: + condition: service_healthy + working_dir: /app + volumes: + - .:/app + environment: + AWS_ENDPOINT_URL: http://localstack:4566 + AWS_DEFAULT_REGION: us-east-1 + AWS_ACCESS_KEY_ID: test + AWS_SECRET_ACCESS_KEY: test + command: > + sh -lc "pip install --no-cache-dir boto3 pytest && python -m pytest -q -m integration" + +volumes: + localstack-data: diff --git a/workflows/tasks/s3-graphs-zip/main.py b/workflows/tasks/s3-graphs-zip/main.py new file mode 100644 index 00000000..7c840b13 --- /dev/null +++ b/workflows/tasks/s3-graphs-zip/main.py @@ -0,0 +1,590 @@ +from __future__ import annotations + +import argparse +from concurrent.futures import FIRST_COMPLETED, Future, ThreadPoolExecutor, wait +from contextlib import closing +from dataclasses import dataclass +import io +import json +import os +from pathlib import Path +import sys +import time +from typing import Any, Iterable +from urllib import request +import zipfile + +import boto3 + +DEFAULT_TIMEOUT_SECONDS = 30 + + +@dataclass(slots=True) +class BatchResult: + batch_number: int + object_count: int + destination_key: str + zip_size_bytes: int + + +@dataclass(slots=True) +class SourceObject: + key: str + size_bytes: int + + +class S3UploadWriter(io.RawIOBase): + def __init__( + self, + client: Any, + bucket: str, + key: str, + *, + part_size: int = 8 * 1024 * 1024, + multipart_threshold: int | None = None, + content_type: str = "application/zip", + ) -> None: + if part_size < 5 * 1024 * 1024: + raise ValueError("part_size must be at least 5 MiB for S3 multipart uploads") + + self._client = client + self._bucket = bucket + self._key = key + self._part_size = part_size + self._multipart_threshold = multipart_threshold or part_size + self._content_type = content_type + + self._buffer = bytearray() + self._upload_id: str | None = None + self._parts: list[dict[str, Any]] = [] + self._part_number = 1 + self._bytes_written = 0 + self._closed = False + self._aborted = False + + def __enter__(self) -> S3UploadWriter: + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> bool: + if exc_type is None: + self.close() + else: + self.discard() + return False + + def writable(self) -> bool: + return True + + @property + def bytes_written(self) -> int: + return self._bytes_written + + @property + def closed(self) -> bool: + return self._closed + + def write(self, b: bytes | bytearray) -> int: + if self._closed: + raise ValueError("I/O operation on closed writer") + + if not b: + return 0 + + self._buffer.extend(b) + self._bytes_written += len(b) + + if self._upload_id is None and len(self._buffer) >= self._multipart_threshold: + self._start_multipart_upload() + + if self._upload_id is not None: + self._flush_full_parts() + + return len(b) + + def close(self) -> None: + if self._closed: + return + + try: + if self._upload_id is None: + self._client.put_object( + Bucket=self._bucket, + Key=self._key, + Body=bytes(self._buffer), + ContentType=self._content_type, + ) + self._buffer.clear() + else: + if self._buffer: + self._upload_part(bytes(self._buffer)) + self._buffer.clear() + + self._client.complete_multipart_upload( + Bucket=self._bucket, + Key=self._key, + UploadId=self._upload_id, + MultipartUpload={"Parts": self._parts}, + ) + except Exception: + self.discard() + raise + finally: + self._closed = True + super().close() + + def discard(self) -> None: + if self._closed: + return + + self._buffer.clear() + + if self._upload_id is not None and not self._aborted: + self._client.abort_multipart_upload( + Bucket=self._bucket, + Key=self._key, + UploadId=self._upload_id, + ) + self._aborted = True + + self._closed = True + super().close() + + def _start_multipart_upload(self) -> None: + response = self._client.create_multipart_upload( + Bucket=self._bucket, + Key=self._key, + ContentType=self._content_type, + ) + self._upload_id = response["UploadId"] + + def _flush_full_parts(self) -> None: + while len(self._buffer) >= self._part_size: + chunk = bytes(self._buffer[: self._part_size]) + del self._buffer[: self._part_size] + self._upload_part(chunk) + + def _upload_part(self, chunk: bytes) -> None: + response = self._client.upload_part( + Bucket=self._bucket, + Key=self._key, + UploadId=self._upload_id, + PartNumber=self._part_number, + Body=chunk, + ) + self._parts.append( + { + "ETag": response["ETag"], + "PartNumber": self._part_number, + } + ) + self._part_number += 1 + + +def list_json_keys( + client: Any, + bucket: str, + prefix: str = "", + limit: int | None = None, +) -> list[SourceObject]: + if limit is not None and limit <= 0: + raise ValueError("limit must be greater than zero") + + normalized_prefix = prefix.strip("/") + listing_prefix = f"{normalized_prefix}/" if normalized_prefix else "" + + paginator = client.get_paginator("list_objects_v2") + keys: list[SourceObject] = [] + + for page in paginator.paginate(Bucket=bucket, Prefix=listing_prefix): + for item in page.get("Contents", []): + key = item["Key"] + if not key.endswith(".json"): + continue + + keys.append(SourceObject(key=key, size_bytes=item["Size"])) + if limit is not None and len(keys) == limit: + return keys + + keys.sort(key=lambda item: item.key) + return keys + + +def chunked_by_estimated_size( + items: list[SourceObject], + *, + max_uncompressed_zip_size_bytes: int, + max_batch_size: int | None = None, +) -> Iterable[list[SourceObject]]: + if max_uncompressed_zip_size_bytes <= 0: + raise ValueError("max_uncompressed_zip_size_bytes must be greater than zero") + if max_batch_size is not None and max_batch_size <= 0: + raise ValueError("max_batch_size must be greater than zero") + + batch: list[SourceObject] = [] + batch_size_bytes = 0 + + for item in items: + would_exceed_size = ( + batch and batch_size_bytes + item.size_bytes > max_uncompressed_zip_size_bytes + ) + would_exceed_count = max_batch_size is not None and len(batch) >= max_batch_size + + if would_exceed_size or would_exceed_count: + yield batch + batch = [] + batch_size_bytes = 0 + + batch.append(item) + batch_size_bytes += item.size_bytes + + if batch: + yield batch + + +def build_destination_key(destination_prefix: str, batch_number: int) -> str: + cleaned = destination_prefix.strip("/") + filename = f"batch-{batch_number:05d}.zip" + return f"{cleaned}/{filename}" if cleaned else filename + + +def stream_objects_to_zip( + client: Any, + *, + source_bucket: str, + source_prefix: str, + destination_bucket: str, + destination_key: str, + object_keys: list[str], + batch_number: int, + read_chunk_size: int = 1024 * 1024, + part_size: int = 8 * 1024 * 1024, +) -> BatchResult: + if read_chunk_size <= 0: + raise ValueError("read_chunk_size must be greater than zero") + + normalized_prefix = source_prefix.strip("/") + prefix_with_slash = f"{normalized_prefix}/" if normalized_prefix else "" + + with S3UploadWriter( + client, + destination_bucket, + destination_key, + part_size=part_size, + ) as writer: + with zipfile.ZipFile( + writer, + mode="w", + compression=zipfile.ZIP_DEFLATED, + compresslevel=6, + ) as archive: + for key in object_keys: + arcname = key + if prefix_with_slash and key.startswith(prefix_with_slash): + arcname = key[len(prefix_with_slash) :] + + response = client.get_object(Bucket=source_bucket, Key=key) + with ( + closing(response["Body"]) as body, + archive.open(arcname, mode="w") as archive_entry, + ): + while chunk := body.read(read_chunk_size): + archive_entry.write(chunk) + + return BatchResult( + batch_number=batch_number, + object_count=len(object_keys), + destination_key=destination_key, + zip_size_bytes=writer.bytes_written, + ) + + +def process_batches( + client: Any, + *, + source_bucket: str, + source_prefix: str, + destination_bucket: str, + destination_prefix: str, + max_uncompressed_zip_size_bytes: int, + max_workers: int, + source_objects: list[SourceObject] | None = None, + max_batch_size: int | None = None, + read_chunk_size: int = 1024 * 1024, + part_size: int = 8 * 1024 * 1024, +) -> list[BatchResult]: + source_objects = source_objects or list_json_keys(client, source_bucket, source_prefix) + if not source_objects: + return [] + + results: list[BatchResult] = [] + batch_iter = enumerate( + chunked_by_estimated_size( + source_objects, + max_uncompressed_zip_size_bytes=max_uncompressed_zip_size_bytes, + max_batch_size=max_batch_size, + ), + start=1, + ) + + def submit_batch( + executor: ThreadPoolExecutor, + batch_number: int, + batch_objects: list[SourceObject], + ) -> Future[BatchResult]: + destination_key = build_destination_key(destination_prefix, batch_number) + return executor.submit( + stream_objects_to_zip, + client, + source_bucket=source_bucket, + source_prefix=source_prefix, + destination_bucket=destination_bucket, + destination_key=destination_key, + object_keys=[item.key for item in batch_objects], + batch_number=batch_number, + read_chunk_size=read_chunk_size, + part_size=part_size, + ) + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + pending: set[Future[BatchResult]] = set() + + for _ in range(max_workers): + try: + batch_number, batch_objects = next(batch_iter) + except StopIteration: + break + pending.add(submit_batch(executor, batch_number, batch_objects)) + + while pending: + done, pending = wait(pending, return_when=FIRST_COMPLETED) + + completed_results = [future.result() for future in done] + results.extend(completed_results) + + for _ in completed_results: + try: + batch_number, batch_objects = next(batch_iter) + except StopIteration: + continue + pending.add(submit_batch(executor, batch_number, batch_objects)) + + return sorted(results, key=lambda result: result.batch_number) + + +def format_duration(duration_seconds: float) -> str: + total_seconds = max(0, int(round(duration_seconds))) + hours, remainder = divmod(total_seconds, 3600) + minutes, seconds = divmod(remainder, 60) + + if hours: + return f"{hours}h {minutes}m {seconds}s" + if minutes: + return f"{minutes}m {seconds}s" + return f"{seconds}s" + + +def build_processing_done_text( + *, + environment_name: str, + duration_seconds: float, + total_files: int, + zip_size_bytes: int, + destination_bucket: str, + uploaded_location: str, + error_msg: str | None, +) -> str: + dur_str = format_duration(duration_seconds) + zip_size_mb = zip_size_bytes / (1024 * 1024) + + if error_msg: + return ( + f":x: *CE Registry ZIP bundle failed* ({environment_name})\n" + f">*Duration:* {dur_str}\n" + f">*Error:* {error_msg}" + ) + + return ( + f":white_check_mark: *CE Registry ZIP bundle succeeded* ({environment_name})\n" + f">*Files:* {total_files:,}\n" + f">*ZIP size:* {zip_size_mb:.2f} MB\n" + f">*Uploaded:* `s3://{destination_bucket}/{uploaded_location}`\n" + f">*Duration:* {dur_str}" + ) + + +def send_processing_done_webhook( + webhook_url: str, + *, + text: str, + timeout: int = DEFAULT_TIMEOUT_SECONDS, +) -> None: + body = json.dumps({"text": text}).encode("utf-8") + req = request.Request( + webhook_url, + data=body, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + with request.urlopen(req, timeout=timeout) as response: + status_code = getattr(response, "status", None) + if status_code is not None and not 200 <= status_code < 300: + raise RuntimeError(f"webhook returned unexpected status code: {status_code}") + + +def build_output_manifest( + *, + source_objects: list[SourceObject], + destination_bucket: str, + destination_prefix: str, + results: list[BatchResult], +) -> dict[str, Any]: + return { + "batch_count": len(results), + "destination_bucket": destination_bucket, + "destination_prefix": destination_prefix.strip("/"), + "total_files": sum(result.object_count for result in results), + "total_input_bytes": sum(item.size_bytes for item in source_objects), + "zip_files": [result.destination_key for result in results], + "zip_size_bytes": sum(result.zip_size_bytes for result in results), + } + + +def write_output_manifest(manifest_path: str | None, manifest: dict[str, Any]) -> None: + if not manifest_path: + return + + path = Path(manifest_path) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(manifest), encoding="utf-8") + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Stream JSON objects from S3 into ZIP batches") + parser.add_argument("--source-bucket", required=True) + parser.add_argument("--source-prefix", default="") + parser.add_argument("--destination-bucket", required=True) + parser.add_argument("--destination-prefix", default="") + parser.add_argument("--max-uncompressed-zip-size-bytes", type=int, default=200 * 1024 * 1024) + parser.add_argument("--batch-size", type=int, default=1000) + parser.add_argument("--max-workers", type=int, default=4) + parser.add_argument("--max-input-files", type=int, default=None) + parser.add_argument("--region", default=None) + parser.add_argument("--endpoint-url", default=None) + parser.add_argument("--read-chunk-size", type=int, default=1024 * 1024) + parser.add_argument("--part-size", type=int, default=8 * 1024 * 1024) + parser.add_argument("--manifest-path", default=None) + return parser + + +def get_uploaded_location(destination_prefix: str, results: list[BatchResult]) -> str: + if len(results) == 1: + return results[0].destination_key + + cleaned_prefix = destination_prefix.strip("/") + return f"{cleaned_prefix}/" if cleaned_prefix else "" + + +def send_completion_webhook( + webhook_url: str | None, + *, + environment_name: str, + started_at: float, + destination_bucket: str, + destination_prefix: str, + results: list[BatchResult], + error_msg: str | None, +) -> None: + if not webhook_url: + return + + text = build_processing_done_text( + environment_name=environment_name, + duration_seconds=time.monotonic() - started_at, + total_files=sum(result.object_count for result in results), + zip_size_bytes=sum(result.zip_size_bytes for result in results), + destination_bucket=destination_bucket, + uploaded_location=get_uploaded_location(destination_prefix, results), + error_msg=error_msg, + ) + send_processing_done_webhook(webhook_url, text=text) + + +def main() -> int: + args = build_parser().parse_args() + + client_kwargs = {} + if args.region: + client_kwargs["region_name"] = args.region + if args.endpoint_url: + client_kwargs["endpoint_url"] = args.endpoint_url + + client = boto3.client("s3", **client_kwargs) + webhook_url = os.getenv("WEBHOOK_URL") + environment_name = os.getenv("ENVIRONMENT", "staging") + + started_at = time.monotonic() + results: list[BatchResult] = [] + source_objects = list_json_keys( + client, + args.source_bucket, + args.source_prefix, + limit=args.max_input_files, + ) + + try: + results = process_batches( + client, + source_bucket=args.source_bucket, + source_prefix=args.source_prefix, + destination_bucket=args.destination_bucket, + destination_prefix=args.destination_prefix, + max_uncompressed_zip_size_bytes=args.max_uncompressed_zip_size_bytes, + max_workers=args.max_workers, + source_objects=source_objects, + max_batch_size=args.batch_size, + read_chunk_size=args.read_chunk_size, + part_size=args.part_size, + ) + except Exception as exc: + send_completion_webhook( + webhook_url, + environment_name=environment_name, + started_at=started_at, + destination_bucket=args.destination_bucket, + destination_prefix=args.destination_prefix, + results=results, + error_msg=str(exc), + ) + raise + + send_completion_webhook( + webhook_url, + environment_name=environment_name, + started_at=started_at, + destination_bucket=args.destination_bucket, + destination_prefix=args.destination_prefix, + results=results, + error_msg=None, + ) + + manifest = build_output_manifest( + source_objects=source_objects, + destination_bucket=args.destination_bucket, + destination_prefix=args.destination_prefix, + results=results, + ) + write_output_manifest(args.manifest_path, manifest) + print(json.dumps(manifest), file=sys.stdout) + + for result in results: + print( + f"batch={result.batch_number} objects={result.object_count} destination={result.destination_key}", + file=sys.stdout, + ) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/workflows/tasks/s3-graphs-zip/pyproject.toml b/workflows/tasks/s3-graphs-zip/pyproject.toml new file mode 100644 index 00000000..a44486b5 --- /dev/null +++ b/workflows/tasks/s3-graphs-zip/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "s3-graphs-zip" +version = "0.1.0" +description = "Stream JSON objects from one S3 bucket into ZIP batches uploaded to another bucket" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "boto3>=1.37.0", +] + +[dependency-groups] +dev = [ + "pytest>=8.3.5", +] + +[tool.pytest.ini_options] +pythonpath = ["."] +markers = [ + "integration: tests that require LocalStack or another S3-compatible endpoint", +] diff --git a/workflows/tasks/s3-graphs-zip/tests/test_integration_localstack.py b/workflows/tasks/s3-graphs-zip/tests/test_integration_localstack.py new file mode 100644 index 00000000..5fc66a77 --- /dev/null +++ b/workflows/tasks/s3-graphs-zip/tests/test_integration_localstack.py @@ -0,0 +1,269 @@ +from __future__ import annotations + +from contextlib import contextmanager +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +import io +import json +import os +from queue import Empty, Queue +import subprocess +import sys +from threading import Thread +import zipfile + +import boto3 +import pytest + +from main import process_batches + + +pytestmark = pytest.mark.integration +requires_localstack = pytest.mark.skipif( + not os.getenv("AWS_ENDPOINT_URL"), + reason="AWS_ENDPOINT_URL is not configured", +) + + +def create_s3_client(): + return boto3.client( + "s3", + endpoint_url=os.environ["AWS_ENDPOINT_URL"], + region_name=os.getenv("AWS_DEFAULT_REGION", "us-east-1"), + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID", "test"), + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY", "test"), + ) + + +def empty_bucket(client, bucket_name: str) -> None: + paginator = client.get_paginator("list_objects_v2") + for page in paginator.paginate(Bucket=bucket_name): + contents = page.get("Contents", []) + if contents: + client.delete_objects( + Bucket=bucket_name, + Delete={"Objects": [{"Key": item["Key"]} for item in contents]}, + ) + + +def ensure_clean_bucket(client, bucket_name: str) -> None: + existing_buckets = {bucket["Name"] for bucket in client.list_buckets().get("Buckets", [])} + if bucket_name in existing_buckets: + empty_bucket(client, bucket_name) + else: + client.create_bucket(Bucket=bucket_name) + + +def delete_bucket(client, bucket_name: str) -> None: + empty_bucket(client, bucket_name) + client.delete_bucket(Bucket=bucket_name) + + +def seed_source_bucket(client, bucket_name: str, *, total_graphs: int) -> dict[str, bytes]: + graph_bodies = { + f"graphs/graph-{index:03d}.json": f'{{"graph": {index}}}'.encode() + for index in range(1, total_graphs + 1) + } + for key, body in graph_bodies.items(): + client.put_object(Bucket=bucket_name, Key=key, Body=body) + + client.put_object(Bucket=bucket_name, Key="graphs/ignore.txt", Body=b"ignore") + return graph_bodies + + +def build_batch_key(destination_prefix: str, batch_number: int) -> str: + cleaned_prefix = destination_prefix.strip("/") + filename = f"batch-{batch_number:05d}.zip" + return f"{cleaned_prefix}/{filename}" if cleaned_prefix else filename + + +def assert_uploaded_batches( + client, + *, + destination_bucket: str, + destination_prefix: str, + expected_batch_sizes: list[int], + graph_bodies: dict[str, bytes], +) -> None: + next_graph_index = 1 + + for batch_number, expected_count in enumerate(expected_batch_sizes, start=1): + zip_bytes = client.get_object( + Bucket=destination_bucket, + Key=build_batch_key(destination_prefix, batch_number), + )["Body"].read() + + start = next_graph_index + end = start + expected_count - 1 + expected_names = [f"graph-{index:03d}.json" for index in range(start, end + 1)] + + with zipfile.ZipFile(io.BytesIO(zip_bytes)) as archive: + assert sorted(archive.namelist()) == expected_names + for index in range(start, end + 1): + assert archive.read(f"graph-{index:03d}.json") == graph_bodies[ + f"graphs/graph-{index:03d}.json" + ] + + next_graph_index = end + 1 + + +@contextmanager +def webhook_server(status_code: int = 204): + received: Queue[dict[str, object]] = Queue() + + class Handler(BaseHTTPRequestHandler): + def do_POST(self) -> None: + content_length = int(self.headers.get("Content-Length", "0")) + body = self.rfile.read(content_length) + received.put( + { + "path": self.path, + "headers": dict(self.headers), + "body": body, + } + ) + self.send_response(status_code) + self.end_headers() + + def log_message(self, format: str, *args: object) -> None: + return + + server = ThreadingHTTPServer(("127.0.0.1", 0), Handler) + thread = Thread(target=server.serve_forever, daemon=True) + thread.start() + + host, port = server.server_address + try: + yield f"http://{host}:{port}/webhook", received + finally: + server.shutdown() + thread.join(timeout=5) + server.server_close() + + +@requires_localstack +def test_end_to_end_with_localstack() -> None: + client = create_s3_client() + source_bucket = "source-graphs-service" + destination_bucket = "dest-archives-service" + + ensure_clean_bucket(client, source_bucket) + ensure_clean_bucket(client, destination_bucket) + + try: + graph_bodies = seed_source_bucket(client, source_bucket, total_graphs=50) + + results = process_batches( + client, + source_bucket=source_bucket, + source_prefix="graphs/", + destination_bucket=destination_bucket, + destination_prefix="zips", + max_uncompressed_zip_size_bytes=150, + max_workers=4, + read_chunk_size=1024, + part_size=5 * 1024 * 1024, + ) + + assert [result.destination_key for result in results] == [ + "zips/batch-00001.zip", + "zips/batch-00002.zip", + "zips/batch-00003.zip", + "zips/batch-00004.zip", + "zips/batch-00005.zip", + ] + assert_uploaded_batches( + client, + destination_bucket=destination_bucket, + destination_prefix="zips", + expected_batch_sizes=[12, 11, 11, 11, 5], + graph_bodies=graph_bodies, + ) + finally: + delete_bucket(client, source_bucket) + delete_bucket(client, destination_bucket) + + +@requires_localstack +def test_cli_sends_completion_webhook_after_uploading_batches() -> None: + client = create_s3_client() + source_bucket = "source-graphs-webhook" + destination_bucket = "dest-archives-webhook" + + ensure_clean_bucket(client, source_bucket) + ensure_clean_bucket(client, destination_bucket) + + try: + graph_bodies = seed_source_bucket(client, source_bucket, total_graphs=12) + + with webhook_server() as (webhook_url, received): + completed = subprocess.run( + [ + sys.executable, + "main.py", + "--source-bucket", + source_bucket, + "--source-prefix", + "graphs/", + "--destination-bucket", + destination_bucket, + "--destination-prefix", + "zips", + "--max-uncompressed-zip-size-bytes", + "150", + "--batch-size", + "100", + "--max-workers", + "2", + "--read-chunk-size", + "1024", + "--part-size", + str(5 * 1024 * 1024), + ], + env={ + **os.environ, + "WEBHOOK_URL": webhook_url, + "ENVIRONMENT": "integration", + }, + capture_output=True, + text=True, + check=False, + ) + + assert completed.returncode == 0, ( + f"stdout:\n{completed.stdout}\n\nstderr:\n{completed.stderr}" + ) + + manifest = json.loads(completed.stdout.splitlines()[0]) + assert manifest["batch_count"] == 1 + assert manifest["total_files"] == 12 + assert manifest["total_input_bytes"] > 0 + assert manifest["zip_files"] == ["zips/batch-00001.zip"] + assert manifest["zip_size_bytes"] > 0 + + request_data = received.get(timeout=5) + headers = request_data["headers"] + body = json.loads(request_data["body"]) + text = body["text"] + + assert request_data["path"] == "/webhook" + assert headers["Content-Type"] == "application/json" + assert text.startswith(":white_check_mark: *CE Registry ZIP bundle succeeded*") + assert "(integration)" in text + assert ">*Files:* 12" in text + assert f">*Uploaded:* `s3://{destination_bucket}/zips/batch-00001.zip`" in text + assert ">*ZIP size:* " in text + assert ">*Duration:* " in text + + with pytest.raises(Empty): + received.get_nowait() + + assert_uploaded_batches( + client, + destination_bucket=destination_bucket, + destination_prefix="zips", + expected_batch_sizes=[12], + graph_bodies=graph_bodies, + ) + finally: + delete_bucket(client, source_bucket) + delete_bucket(client, destination_bucket) diff --git a/workflows/tasks/s3-graphs-zip/tests/test_service.py b/workflows/tasks/s3-graphs-zip/tests/test_service.py new file mode 100644 index 00000000..8716fcec --- /dev/null +++ b/workflows/tasks/s3-graphs-zip/tests/test_service.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +import pytest + +from main import ( + S3UploadWriter, + SourceObject, + build_destination_key, + build_output_manifest, + chunked_by_estimated_size, + stream_objects_to_zip, +) + + +def test_chunked_by_estimated_size_splits_items_by_target_bytes() -> None: + assert list( + chunked_by_estimated_size( + [ + SourceObject("a", 40), + SourceObject("b", 50), + SourceObject("c", 30), + SourceObject("d", 70), + ], + max_uncompressed_zip_size_bytes=100, + ) + ) == [ + [SourceObject("a", 40), SourceObject("b", 50)], + [SourceObject("c", 30), SourceObject("d", 70)], + ] + + +def test_chunked_by_estimated_size_respects_optional_batch_size_cap() -> None: + assert list( + chunked_by_estimated_size( + [ + SourceObject("a", 10), + SourceObject("b", 10), + SourceObject("c", 10), + ], + max_uncompressed_zip_size_bytes=100, + max_batch_size=2, + ) + ) == [ + [SourceObject("a", 10), SourceObject("b", 10)], + [SourceObject("c", 10)], + ] + + +@pytest.mark.parametrize( + ("max_uncompressed_zip_size_bytes", "max_batch_size", "error_message"), + [ + (0, None, "max_uncompressed_zip_size_bytes must be greater than zero"), + (-1, None, "max_uncompressed_zip_size_bytes must be greater than zero"), + (1, 0, "max_batch_size must be greater than zero"), + (1, -1, "max_batch_size must be greater than zero"), + ], +) +def test_chunked_by_estimated_size_validates_inputs( + max_uncompressed_zip_size_bytes: int, + max_batch_size: int | None, + error_message: str, +) -> None: + with pytest.raises(ValueError, match=error_message): + list( + chunked_by_estimated_size( + [SourceObject("a", 1)], + max_uncompressed_zip_size_bytes=max_uncompressed_zip_size_bytes, + max_batch_size=max_batch_size, + ) + ) + + +def test_build_destination_key_normalizes_prefix() -> None: + assert build_destination_key("out", 7) == "out/batch-00007.zip" + assert build_destination_key("/out/", 7) == "out/batch-00007.zip" + assert build_destination_key("", 7) == "batch-00007.zip" + + +def test_build_output_manifest_lists_uploaded_zip_files() -> None: + manifest = build_output_manifest( + source_objects=[], + destination_bucket="dest-bucket", + destination_prefix="/archives/run-1/", + results=[], + ) + + assert manifest == { + "batch_count": 0, + "destination_bucket": "dest-bucket", + "destination_prefix": "archives/run-1", + "total_files": 0, + "total_input_bytes": 0, + "zip_files": [], + "zip_size_bytes": 0, + } + + +@pytest.mark.parametrize("read_chunk_size", [0, -1]) +def test_stream_objects_to_zip_validates_read_chunk_size(read_chunk_size: int) -> None: + with pytest.raises(ValueError, match="read_chunk_size must be greater than zero"): + stream_objects_to_zip( + object(), + source_bucket="source", + source_prefix="graphs/", + destination_bucket="dest", + destination_key="archives/batch-00001.zip", + object_keys=["graphs/1.json"], + batch_number=1, + read_chunk_size=read_chunk_size, + part_size=5 * 1024 * 1024, + ) + + +@pytest.mark.parametrize("part_size", [0, 1024, 5 * 1024 * 1024 - 1]) +def test_s3_upload_writer_validates_minimum_part_size(part_size: int) -> None: + with pytest.raises(ValueError, match="part_size must be at least 5 MiB"): + S3UploadWriter(object(), "dest", "batch.zip", part_size=part_size) diff --git a/workflows/tasks/s3-graphs-zip/uv.lock b/workflows/tasks/s3-graphs-zip/uv.lock new file mode 100644 index 00000000..d5326602 --- /dev/null +++ b/workflows/tasks/s3-graphs-zip/uv.lock @@ -0,0 +1,162 @@ +version = 1 +revision = 3 +requires-python = ">=3.13" + +[[package]] +name = "boto3" +version = "1.42.62" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/7e/c952803c8900f14e6f6158fddbd35da5afb2e3fa68bf498a761e6ba2c2ae/boto3-1.42.62.tar.gz", hash = "sha256:6b26ff56c458685caec3d42adde0549f6a55410e557e1f51bebde5c8abcf3037", size = 112848, upload-time = "2026-03-05T21:20:37.755Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/68/b5e82dedd9c8d53a9542df4e3475d2d3ec331eef4a4a801e9c5fa98b583a/boto3-1.42.62-py3-none-any.whl", hash = "sha256:eef0ee08f30e5ed16d8296719808801a827fa0f3126a3e2a9ef9be9eb5e6a313", size = 140556, upload-time = "2026-03-05T21:20:35.354Z" }, +] + +[[package]] +name = "botocore" +version = "1.42.62" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/e7/031f2f03f22817f8a8def7ad1caa138979c20ac35062b055274e0a505c3f/botocore-1.42.62.tar.gz", hash = "sha256:c210dc93b0b81bf72cfe745a7b1c8df765d04bd90b4ac6c8707fbb6714141dae", size = 14966114, upload-time = "2026-03-05T21:20:25.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/57/9bc5c1aad3a354dd7da54ba52d43ee821badb3deedbea4c5117c4bd05eab/botocore-1.42.62-py3-none-any.whl", hash = "sha256:86d327fded96775268ffe8d8bd6ed96c4a1db86cf24eb64ff85233db12dbc287", size = 14638389, upload-time = "2026-03-05T21:20:22.359Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + +[[package]] +name = "s3-graphs-zip" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "boto3" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, +] + +[package.metadata] +requires-dist = [{ name = "boto3", specifier = ">=1.37.0" }] + +[package.metadata.requires-dev] +dev = [{ name = "pytest", specifier = ">=8.3.5" }] + +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, +] diff --git a/workflows/templates/s3-graphs-zip-workflow-template.yaml b/workflows/templates/s3-graphs-zip-workflow-template.yaml new file mode 100644 index 00000000..4d0658fb --- /dev/null +++ b/workflows/templates/s3-graphs-zip-workflow-template.yaml @@ -0,0 +1,88 @@ +apiVersion: argoproj.io/v1alpha1 +kind: WorkflowTemplate +metadata: + name: s3-graphs-zip + labels: + app: credential-registry +spec: + serviceAccountName: main-app-service-account + entrypoint: s3-graphs-zip + arguments: + parameters: + - name: task-image + - name: source-bucket + - name: source-prefix + - name: destination-bucket + - name: destination-prefix + - name: max-uncompressed-zip-size-bytes + value: "209715200" + - name: batch-size + value: "1000" + - name: max-workers + value: "4" + - name: aws-region + - name: environment + value: "staging" + templates: + - name: s3-graphs-zip + inputs: + parameters: + - name: task-image + - name: source-bucket + - name: source-prefix + - name: destination-bucket + - name: destination-prefix + - name: max-uncompressed-zip-size-bytes + - name: batch-size + - name: max-workers + - name: aws-region + - name: environment + metadata: + labels: + app: credential-registry + workflow: s3-graphs-zip + outputs: + parameters: + - name: zip-manifest + valueFrom: + path: /tmp/argo/zip-manifest.json + container: + image: "{{inputs.parameters.task-image}}" + imagePullPolicy: IfNotPresent + args: + - --source-bucket + - "{{inputs.parameters.source-bucket}}" + - --source-prefix + - "{{inputs.parameters.source-prefix}}" + - --destination-bucket + - "{{inputs.parameters.destination-bucket}}" + - --destination-prefix + - "{{inputs.parameters.destination-prefix}}" + - --max-uncompressed-zip-size-bytes + - "{{inputs.parameters.max-uncompressed-zip-size-bytes}}" + - --batch-size + - "{{inputs.parameters.batch-size}}" + - --max-workers + - "{{inputs.parameters.max-workers}}" + - --manifest-path + - /tmp/argo/zip-manifest.json + env: + - name: AWS_DEFAULT_REGION + value: "{{inputs.parameters.aws-region}}" + - name: ENVIRONMENT + value: "{{inputs.parameters.environment}}" + resources: + requests: + cpu: "1000m" + memory: "2Gi" + limits: + cpu: "2000m" + memory: "4Gi" + activeDeadlineSeconds: 10800 + retryStrategy: + limit: 2 + retryPolicy: OnFailure + backoff: + duration: "60s" + factor: 2 + maxDuration: "3h" From a6582408f40b91ebf80d25d7bfe9ba138ad1a999 Mon Sep 17 00:00:00 2001 From: Ariel Rolfo Date: Mon, 9 Mar 2026 15:58:46 -0300 Subject: [PATCH 2/6] Update Dockerfile, workflow template --- Dockerfile | 19 +++++++++++++++++++ .../s3-graphs-zip-workflow-template.yaml | 3 ++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3f7db141..f531d9e6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -167,6 +167,25 @@ RUN set -eux; \ done RUN set -eux; \ # Copy commonly required runtime shared libraries (no loop) + cp -a /usr/lib64/libcurl.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libnghttp2.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libidn2.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libpsl.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libssh2.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libunistring.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libnettle.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libhogweed.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libgnutls.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libgmp.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libbrotlidec.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libbrotlicommon.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libzstd.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libnss3.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libnssutil3.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libsmime3.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libplc4.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libplds4.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ + cp -a /usr/lib64/libnspr4.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ cp -a /usr/lib64/libpq.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ cp -a /usr/lib64/libssl.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ cp -a /usr/lib64/libcrypto.so.* /runtime/usr/lib64/ 2>/dev/null || true; \ diff --git a/workflows/templates/s3-graphs-zip-workflow-template.yaml b/workflows/templates/s3-graphs-zip-workflow-template.yaml index 4d0658fb..3135f63a 100644 --- a/workflows/templates/s3-graphs-zip-workflow-template.yaml +++ b/workflows/templates/s3-graphs-zip-workflow-template.yaml @@ -48,7 +48,8 @@ spec: path: /tmp/argo/zip-manifest.json container: image: "{{inputs.parameters.task-image}}" - imagePullPolicy: IfNotPresent + command: ["python", "/app/main.py"] + imagePullPolicy: Always args: - --source-bucket - "{{inputs.parameters.source-bucket}}" From 2168f55e83ee4fe8f0821ed9d0810f93b35f4df9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=B4mulo=20Saksida?= Date: Tue, 10 Mar 2026 19:29:17 -0300 Subject: [PATCH 3/6] Fix issues with Argo workflows client, S3 client --- app/api/v1/envelopes.rb | 27 ++++- app/services/argo_workflows_client.rb | 6 +- .../submit_envelope_download_workflow.rb | 46 ++++---- .../sync_envelope_download_workflow_status.rb | 37 +++++-- spec/api/v1/envelopes_spec.rb | 63 +++++++++++ spec/services/argo_workflows_client_spec.rb | 48 +++++++++ .../submit_envelope_download_workflow_spec.rb | 18 +++- ..._envelope_download_workflow_status_spec.rb | 102 ++++++++++++++---- workflows/tasks/s3-graphs-zip/README.md | 2 +- workflows/tasks/s3-graphs-zip/main.py | 11 +- .../s3-graphs-zip-workflow-template.yaml | 13 ++- 11 files changed, 306 insertions(+), 67 deletions(-) create mode 100644 spec/services/argo_workflows_client_spec.rb diff --git a/app/api/v1/envelopes.rb b/app/api/v1/envelopes.rb index 786f41e5..3388a4e3 100644 --- a/app/api/v1/envelopes.rb +++ b/app/api/v1/envelopes.rb @@ -89,12 +89,29 @@ class Envelopes < MountableAPI desc 'Starts an envelope download' post do - @envelope_download.update!( - enqueued_at: Time.current, - status: :pending - ) + should_enqueue = false + + @envelope_download.with_lock do + active_download = @envelope_download.in_progress? || + (@envelope_download.pending? && @envelope_download.enqueued_at.present?) + + unless active_download + @envelope_download.update!( + argo_workflow_name: nil, + argo_workflow_namespace: nil, + enqueued_at: Time.current, + finished_at: nil, + internal_error_backtrace: [], + internal_error_message: nil, + status: :pending, + url: nil, + zip_files: [] + ) + should_enqueue = true + end + end - DownloadEnvelopesJob.perform_later(@envelope_download.id) + DownloadEnvelopesJob.perform_later(@envelope_download.id) if should_enqueue present @envelope_download, with: API::Entities::EnvelopeDownload end end diff --git a/app/services/argo_workflows_client.rb b/app/services/argo_workflows_client.rb index d1a22dc3..0a7d7030 100644 --- a/app/services/argo_workflows_client.rb +++ b/app/services/argo_workflows_client.rb @@ -26,7 +26,7 @@ def submit_workflow(template_name:, parameters:, generate_name:) resourceKind: 'WorkflowTemplate', resourceName: template_name, submitOptions: { - generateName:, + generateName: generate_name, parameters: parameters.map { |key, value| "#{key}=#{value}" } } }, @@ -47,6 +47,10 @@ def build_configuration config.api_key['Authorization'] = ENV.fetch('ARGO_WORKFLOWS_TOKEN') config.api_key_prefix['Authorization'] = 'Bearer' config.timeout = ENV.fetch('ARGO_WORKFLOWS_TIMEOUT_SECONDS', 30).to_i + + # We run this in a secure environment, so it can be disabled + config.verify_ssl = false + config.verify_ssl_host = false end end end diff --git a/app/services/submit_envelope_download_workflow.rb b/app/services/submit_envelope_download_workflow.rb index d897f093..45d2bcf2 100644 --- a/app/services/submit_envelope_download_workflow.rb +++ b/app/services/submit_envelope_download_workflow.rb @@ -12,25 +12,29 @@ def initialize(envelope_download) end def call - workflow = client.submit_workflow( - template_name: ENV.fetch('ARGO_WORKFLOWS_TEMPLATE_NAME'), - generate_name: "#{community_name.tr('_', '-')}-download-", - parameters: - ) - workflow_name = workflow.dig(:metadata, :name) - raise 'Argo workflow submission did not return a workflow name' if workflow_name.blank? + envelope_download.with_lock do + return envelope_download if workflow_already_started? - envelope_download.update!( - argo_workflow_name: workflow_name, - argo_workflow_namespace: client.namespace, - finished_at: nil, - internal_error_backtrace: [], - internal_error_message: nil, - started_at: Time.current, - status: :in_progress, - zip_files: [], - url: nil - ) + workflow = client.submit_workflow( + template_name: ENV.fetch('ARGO_WORKFLOWS_TEMPLATE_NAME'), + generate_name: "#{community_name.tr('_', '-')}-download-", + parameters: + ) + workflow_name = workflow.dig(:metadata, :name) + raise 'Argo workflow submission did not return a workflow name' if workflow_name.blank? + + envelope_download.update!( + argo_workflow_name: workflow_name, + argo_workflow_namespace: client.namespace, + finished_at: nil, + internal_error_backtrace: [], + internal_error_message: nil, + started_at: Time.current, + status: :in_progress, + zip_files: [], + url: nil + ) + end rescue StandardError => e envelope_download.update!( argo_workflow_name: nil, @@ -61,7 +65,7 @@ def destination_prefix def parameters { - 'batch-size' => ENV.fetch('ARGO_WORKFLOWS_BATCH_SIZE', '1000'), + 'batch-size' => ENV.fetch('ARGO_WORKFLOWS_BATCH_SIZE', '25000'), 'aws-region' => ENV.fetch('AWS_REGION'), 'destination-bucket' => ENV.fetch('ENVELOPE_DOWNLOADS_BUCKET'), 'destination-prefix' => destination_prefix, @@ -76,4 +80,8 @@ def parameters 'task-image' => ENV.fetch('ARGO_WORKFLOWS_TASK_IMAGE') } end + + def workflow_already_started? + envelope_download.in_progress? && envelope_download.argo_workflow_name.present? + end end diff --git a/app/services/sync_envelope_download_workflow_status.rb b/app/services/sync_envelope_download_workflow_status.rb index 1d5dea30..1ea018a4 100644 --- a/app/services/sync_envelope_download_workflow_status.rb +++ b/app/services/sync_envelope_download_workflow_status.rb @@ -26,7 +26,7 @@ def call phase = status[:phase] if phase == SUCCESS_PHASE - mark_success!(status) + mark_success!(workflow:, status:) elsif FAILURE_PHASES.include?(phase) mark_failure!(status) elsif phase == RUNNING_PHASE @@ -35,6 +35,7 @@ def call envelope_download rescue ArgoWorkflowsApiClient::ApiError => e + mark_missing_workflow_as_failure!(e) if workflow_not_found?(e) MR.logger.warn("Unable to sync Argo workflow #{envelope_download.argo_workflow_name}: #{e.message}") envelope_download end @@ -64,6 +65,19 @@ def mark_failure!(status) ) end + def mark_missing_workflow_as_failure!(error) + envelope_download.update!( + argo_workflow_name: nil, + argo_workflow_namespace: nil, + finished_at: Time.current, + internal_error_backtrace: [], + internal_error_message: "Argo workflow not found: #{error.message}", + status: :finished, + zip_files: [], + url: nil + ) + end + def mark_in_progress!(status) envelope_download.update!( started_at: parse_time(status[:startedAt]) || envelope_download.started_at || Time.current, @@ -71,8 +85,8 @@ def mark_in_progress!(status) ) end - def mark_success!(status) - manifest = output_manifest(status) + def mark_success!(workflow:, status:) + manifest = output_manifest(workflow:, status:) zip_files = manifest.fetch('zip_files', []) if zip_files.present? @@ -104,22 +118,25 @@ def s3_client @s3_client ||= Aws::S3::Client.new(region: ENV.fetch('AWS_REGION')) end - def output_manifest(status) - parameter = status.fetch(:outputs, {}) - .fetch(:parameters, []) - .find { |item| item[:name] == 'zip-manifest' } + def output_manifest(workflow:, status:) + workflow_name = workflow.dig(:metadata, :name) + return {} if workflow_name.blank? + + parameters = status.dig(:nodes, workflow_name.to_sym, :outputs, :parameters) || [] + parameter = parameters.find { |item| item[:name] == 'zip-manifest' } return {} unless parameter JSON.parse(parameter.fetch(:value)) end def public_url_for(key) - s3_client.head_object(bucket: destination_bucket, key:) Aws::S3::Resource.new(region: ENV.fetch('AWS_REGION')) .bucket(destination_bucket) .object(key) .public_url - rescue Aws::S3::Errors::NotFound, Aws::S3::Errors::NoSuchKey - nil + end + + def workflow_not_found?(error) + error.respond_to?(:code) && error.code.to_i == 404 end end diff --git a/spec/api/v1/envelopes_spec.rb b/spec/api/v1/envelopes_spec.rb index b5555069..e994cce6 100644 --- a/spec/api/v1/envelopes_spec.rb +++ b/spec/api/v1/envelopes_spec.rb @@ -346,6 +346,69 @@ expect_json('enqueued_at', now.as_json) expect_json('status', 'pending') end + + it 'clears previous failure fields when retrying a failed download' do + envelope_download.update!( + argo_workflow_name: 'old-workflow', + argo_workflow_namespace: 'credreg-staging', + finished_at: 5.minutes.ago.change(usec: 0), + internal_error_backtrace: ['boom'], + internal_error_message: 'zip task failed', + url: 'https://downloads.example/old.zip', + zip_files: ['old.zip'] + ) + + travel_to now do + expect { perform_request }.to enqueue_job(DownloadEnvelopesJob).with(envelope_download.id) + end + + expect_status(:created) + + envelope_download.reload + expect(envelope_download.status).to eq('pending') + expect(envelope_download.enqueued_at).to eq(now) + expect(envelope_download.finished_at).to be_nil + expect(envelope_download.internal_error_message).to be_nil + expect(envelope_download.internal_error_backtrace).to eq([]) + expect(envelope_download.url).to be_nil + expect(envelope_download.zip_files).to eq([]) + expect(envelope_download.argo_workflow_name).to be_nil + expect(envelope_download.argo_workflow_namespace).to be_nil + + expect_json_sizes(2) + expect_json('enqueued_at', now.as_json) + expect_json('status', 'pending') + end + + it 'does not enqueue a duplicate job when the download is already pending' do + envelope_download.update!( + enqueued_at: now, + status: :pending + ) + + expect { perform_request }.to not_enqueue_job(DownloadEnvelopesJob) + + expect_status(:created) + expect(envelope_download.reload.status).to eq('pending') + expect_json_sizes(2) + expect_json('enqueued_at', now.as_json) + expect_json('status', 'pending') + end + + it 'does not enqueue a duplicate job when the download is already in progress' do + envelope_download.update!( + started_at: now, + status: :in_progress + ) + + expect { perform_request }.to not_enqueue_job(DownloadEnvelopesJob) + + expect_status(:created) + expect(envelope_download.reload.status).to eq('in_progress') + expect_json_sizes(2) + expect_json('started_at', now.as_json) + expect_json('status', 'in_progress') + end end # rubocop:enable RSpec/MultipleMemoizedHelpers end diff --git a/spec/services/argo_workflows_client_spec.rb b/spec/services/argo_workflows_client_spec.rb new file mode 100644 index 00000000..ad421551 --- /dev/null +++ b/spec/services/argo_workflows_client_spec.rb @@ -0,0 +1,48 @@ +require 'spec_helper' + +RSpec.describe ArgoWorkflowsClient do + let(:api_client) { instance_double(ArgoWorkflowsApiClient::ApiClient) } + let(:workflow_service_api) { instance_double(ArgoWorkflowsApiClient::WorkflowServiceApi) } + let(:configuration) { instance_double(ArgoWorkflowsApiClient::Configuration) } + let(:workflow) { { metadata: { name: 'ce-registry-download-abc123' } } } + + before do + allow(ENV).to receive(:fetch).and_call_original + allow(ENV).to receive(:fetch).with('ARGO_WORKFLOWS_NAMESPACE').and_return('credreg-staging') + allow(ArgoWorkflowsApiClient::ApiClient).to receive(:new).with(configuration).and_return(api_client) + allow(ArgoWorkflowsApiClient::WorkflowServiceApi) + .to receive(:new).with(api_client).and_return(workflow_service_api) + end + + describe '#submit_workflow' do + it 'passes generateName and parameters to the Argo client' do + client = described_class.new(configuration:) + + allow(workflow_service_api).to receive(:workflow_service_submit_workflow) + .with( + { + namespace: 'credreg-staging', + resourceKind: 'WorkflowTemplate', + resourceName: 's3-graphs-zip', + submitOptions: { + generateName: 'ce-registry-download-', + parameters: ['source-prefix=ce_registry', 'destination-bucket=downloads-bucket'] + } + }, + 'credreg-staging', + return_type: 'Object' + ).and_return(workflow) + + result = client.submit_workflow( + template_name: 's3-graphs-zip', + generate_name: 'ce-registry-download-', + parameters: { + 'source-prefix' => 'ce_registry', + 'destination-bucket' => 'downloads-bucket' + } + ) + + expect(result).to eq(workflow) + end + end +end diff --git a/spec/services/submit_envelope_download_workflow_spec.rb b/spec/services/submit_envelope_download_workflow_spec.rb index e6bc384e..aa6a3a53 100644 --- a/spec/services/submit_envelope_download_workflow_spec.rb +++ b/spec/services/submit_envelope_download_workflow_spec.rb @@ -12,7 +12,7 @@ allow(ENV).to receive(:fetch).and_call_original allow(ENV).to receive(:fetch).with('ARGO_WORKFLOWS_TEMPLATE_NAME').and_return('s3-graphs-zip') allow(ENV).to receive(:fetch).with('ARGO_WORKFLOWS_TASK_IMAGE').and_return('registry:s3-graphs-zip') - allow(ENV).to receive(:fetch).with('ARGO_WORKFLOWS_BATCH_SIZE', '1000').and_return('1000') + allow(ENV).to receive(:fetch).with('ARGO_WORKFLOWS_BATCH_SIZE', '25000').and_return('25000') allow(ENV).to receive(:fetch) .with('ARGO_WORKFLOWS_MAX_UNCOMPRESSED_ZIP_SIZE_BYTES', '209715200') .and_return('209715200') @@ -28,7 +28,7 @@ template_name: 's3-graphs-zip', generate_name: 'ce-registry-download-', parameters: { - 'batch-size' => '1000', + 'batch-size' => '25000', 'aws-region' => 'us-east-1', 'destination-bucket' => 'downloads-bucket', 'destination-prefix' => "ce_registry/downloads/#{envelope_download.id}", @@ -54,4 +54,18 @@ expect(envelope_download.argo_workflow_namespace).to eq('credreg-staging') expect(envelope_download.zip_files).to eq([]) end + + it 'does not submit a second workflow when one is already in progress' do + envelope_download.update!( + argo_workflow_name: 'existing-workflow', + argo_workflow_namespace: 'credreg-staging', + status: :in_progress + ) + + expect(client).not_to receive(:submit_workflow) + + described_class.call(envelope_download:) + + expect(envelope_download.reload.argo_workflow_name).to eq('existing-workflow') + end end diff --git a/spec/services/sync_envelope_download_workflow_status_spec.rb b/spec/services/sync_envelope_download_workflow_status_spec.rb index a48496a3..63fd51dc 100644 --- a/spec/services/sync_envelope_download_workflow_status_spec.rb +++ b/spec/services/sync_envelope_download_workflow_status_spec.rb @@ -1,6 +1,16 @@ require 'spec_helper' RSpec.describe SyncEnvelopeDownloadWorkflowStatus do + let(:api_error_class) do + Class.new(StandardError) do + attr_reader :code + + def initialize(code, message) + @code = code + super(message) + end + end + end let(:client) { instance_double(ArgoWorkflowsClient) } let(:community) { EnvelopeCommunity.find_or_create_by!(name: 'ce_registry') } let(:envelope_download) do @@ -18,6 +28,7 @@ let(:object) { instance_double(Aws::S3::Object, public_url: 'https://downloads.example/batch-00001.zip') } before do + stub_const('ArgoWorkflowsApiClient::ApiError', api_error_class) allow(ArgoWorkflowsClient).to receive(:new).and_return(client) allow(ENV).to receive(:fetch).and_call_original allow(ENV).to receive(:fetch).with('AWS_REGION').and_return('us-east-1') @@ -27,37 +38,39 @@ context 'when the workflow succeeds' do before do allow(client).to receive(:get_workflow).with(name: 'ce-registry-download-abc123').and_return( + metadata: { + name: 'ce-registry-download-abc123' + }, status: { phase: 'Succeeded', finishedAt: '2026-03-06T12:10:00Z', - outputs: { - parameters: [ - { - name: 'zip-manifest', - value: { - batch_count: 2, - destination_bucket: 'downloads-bucket', - destination_prefix: "ce_registry/downloads/#{envelope_download.id}", - total_files: 12, - total_input_bytes: 123_456, - zip_files: [ - "ce_registry/downloads/#{envelope_download.id}/batch-00001.zip", - "ce_registry/downloads/#{envelope_download.id}/batch-00002.zip", - ], - zip_size_bytes: 45_678, - }.to_json - }, - ] + nodes: { + :'ce-registry-download-abc123' => { + outputs: { + parameters: [ + { + name: 'zip-manifest', + value: { + batch_count: 2, + destination_bucket: 'downloads-bucket', + destination_prefix: "ce_registry/downloads/#{envelope_download.id}", + total_files: 12, + total_input_bytes: 123_456, + zip_files: [ + "ce_registry/downloads/#{envelope_download.id}/batch-00001.zip", + "ce_registry/downloads/#{envelope_download.id}/batch-00002.zip", + ], + zip_size_bytes: 45_678, + }.to_json + }, + ] + } + } } } ) allow(Aws::S3::Client).to receive(:new).with(region: 'us-east-1').and_return(s3_client) - allow(s3_client).to receive(:head_object).with( - bucket: 'downloads-bucket', - key: "ce_registry/downloads/#{envelope_download.id}/batch-00001.zip" - ).and_return(true) - allow(Aws::S3::Resource).to receive(:new).with(region: 'us-east-1').and_return(s3_resource) allow(s3_resource).to receive(:bucket).with('downloads-bucket').and_return(bucket) allow(bucket).to receive(:object) @@ -104,4 +117,47 @@ expect(envelope_download.finished_at).to eq(Time.zone.parse('2026-03-06T12:10:00Z')) end end + + context 'when the workflow lookup returns not found' do + let(:api_error) { ArgoWorkflowsApiClient::ApiError.new(404, 'Not Found') } + + before do + allow(client).to receive(:get_workflow) + .with(name: 'ce-registry-download-abc123') + .and_raise(api_error) + allow(MR.logger).to receive(:warn) + end + + it 'marks the download failed' do + described_class.call(envelope_download:) + + envelope_download.reload + expect(envelope_download.status).to eq('finished') + expect(envelope_download.argo_workflow_name).to be_nil + expect(envelope_download.argo_workflow_namespace).to be_nil + expect(envelope_download.url).to be_nil + expect(envelope_download.zip_files).to eq([]) + expect(envelope_download.internal_error_message).to eq('Argo workflow not found: Not Found') + expect(envelope_download.finished_at).to be_present + end + end + + context 'when the workflow lookup returns a transient API error' do + let(:api_error) { ArgoWorkflowsApiClient::ApiError.new(500, 'Internal Server Error') } + + before do + allow(client).to receive(:get_workflow) + .with(name: 'ce-registry-download-abc123') + .and_raise(api_error) + allow(MR.logger).to receive(:warn) + end + + it 'leaves the download in progress' do + expect { described_class.call(envelope_download:) } + .not_to change { envelope_download.reload.status } + + expect(envelope_download.argo_workflow_name).to eq('ce-registry-download-abc123') + expect(envelope_download.argo_workflow_namespace).to eq('credreg-staging') + end + end end diff --git a/workflows/tasks/s3-graphs-zip/README.md b/workflows/tasks/s3-graphs-zip/README.md index f0353a9f..a6eeb891 100644 --- a/workflows/tasks/s3-graphs-zip/README.md +++ b/workflows/tasks/s3-graphs-zip/README.md @@ -69,7 +69,7 @@ Optional parameters: Optional maximum number of input `.json` files allowed in a single ZIP batch. This acts as a safety cap on top of `--max-uncompressed-zip-size-bytes` for cases where many tiny files would otherwise end up in one archive. Default: - `1000`. + `25000`. - `--max-workers` Number of batches to process concurrently. Each worker streams one ZIP archive to S3 at a time. Default: `4`. diff --git a/workflows/tasks/s3-graphs-zip/main.py b/workflows/tasks/s3-graphs-zip/main.py index 7c840b13..4bf2559c 100644 --- a/workflows/tasks/s3-graphs-zip/main.py +++ b/workflows/tasks/s3-graphs-zip/main.py @@ -19,6 +19,13 @@ DEFAULT_TIMEOUT_SECONDS = 30 +def optional_int(value: str) -> int | None: + if value == "": + return None + + return int(value) + + @dataclass(slots=True) class BatchResult: batch_number: int @@ -466,9 +473,9 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument("--destination-bucket", required=True) parser.add_argument("--destination-prefix", default="") parser.add_argument("--max-uncompressed-zip-size-bytes", type=int, default=200 * 1024 * 1024) - parser.add_argument("--batch-size", type=int, default=1000) + parser.add_argument("--batch-size", type=int, default=25000) parser.add_argument("--max-workers", type=int, default=4) - parser.add_argument("--max-input-files", type=int, default=None) + parser.add_argument("--max-input-files", type=optional_int, default=None) parser.add_argument("--region", default=None) parser.add_argument("--endpoint-url", default=None) parser.add_argument("--read-chunk-size", type=int, default=1024 * 1024) diff --git a/workflows/templates/s3-graphs-zip-workflow-template.yaml b/workflows/templates/s3-graphs-zip-workflow-template.yaml index 3135f63a..fce6ae76 100644 --- a/workflows/templates/s3-graphs-zip-workflow-template.yaml +++ b/workflows/templates/s3-graphs-zip-workflow-template.yaml @@ -16,8 +16,10 @@ spec: - name: destination-prefix - name: max-uncompressed-zip-size-bytes value: "209715200" + - name: max-input-files + value: "" - name: batch-size - value: "1000" + value: "25000" - name: max-workers value: "4" - name: aws-region @@ -33,6 +35,7 @@ spec: - name: destination-bucket - name: destination-prefix - name: max-uncompressed-zip-size-bytes + - name: max-input-files - name: batch-size - name: max-workers - name: aws-region @@ -48,7 +51,7 @@ spec: path: /tmp/argo/zip-manifest.json container: image: "{{inputs.parameters.task-image}}" - command: ["python", "/app/main.py"] + command: ["python", "/app/main.py"] imagePullPolicy: Always args: - --source-bucket @@ -61,6 +64,8 @@ spec: - "{{inputs.parameters.destination-prefix}}" - --max-uncompressed-zip-size-bytes - "{{inputs.parameters.max-uncompressed-zip-size-bytes}}" + - --max-input-files + - "{{inputs.parameters.max-input-files}}" - --batch-size - "{{inputs.parameters.batch-size}}" - --max-workers @@ -74,8 +79,8 @@ spec: value: "{{inputs.parameters.environment}}" resources: requests: - cpu: "1000m" - memory: "2Gi" + cpu: "250m" + memory: "256Mi" limits: cpu: "2000m" memory: "4Gi" From f37af88acff227a183c7033a3cd8216c0807b573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=B4mulo=20Saksida?= Date: Thu, 19 Mar 2026 14:40:57 -0300 Subject: [PATCH 4/6] Reuse downloads when possible Addresses https://github.com/CredentialEngine/ce-registry/issues/33 --- app/api/v1/envelopes.rb | 12 +++-- app/models/envelope.rb | 6 +++ ...last_published_at_to_envelope_downloads.rb | 5 ++ db/structure.sql | 8 +-- spec/api/v1/envelopes_spec.rb | 49 +++++++++++++++++-- spec/factories/envelope_downloads.rb | 1 + 6 files changed, 72 insertions(+), 9 deletions(-) create mode 100644 db/migrate/20260319120000_add_last_published_at_to_envelope_downloads.rb diff --git a/app/api/v1/envelopes.rb b/app/api/v1/envelopes.rb index 3388a4e3..f9d9b4b1 100644 --- a/app/api/v1/envelopes.rb +++ b/app/api/v1/envelopes.rb @@ -90,12 +90,15 @@ class Envelopes < MountableAPI desc 'Starts an envelope download' post do should_enqueue = false + response_status = :ok @envelope_download.with_lock do - active_download = @envelope_download.in_progress? || - (@envelope_download.pending? && @envelope_download.enqueued_at.present?) + active_download = @envelope_download.enqueued_at.present? && + (@envelope_download.in_progress? || @envelope_download.pending?) + current_published_at = Envelope.last_publish_event_at(current_community) + last_published_at = @envelope_download.last_published_at || Time.at(0) - unless active_download + if !active_download && current_published_at&.>(last_published_at) @envelope_download.update!( argo_workflow_name: nil, argo_workflow_namespace: nil, @@ -103,15 +106,18 @@ class Envelopes < MountableAPI finished_at: nil, internal_error_backtrace: [], internal_error_message: nil, + last_published_at: current_published_at, status: :pending, url: nil, zip_files: [] ) should_enqueue = true + response_status = :created end end DownloadEnvelopesJob.perform_later(@envelope_download.id) if should_enqueue + status response_status present @envelope_download, with: API::Entities::EnvelopeDownload end end diff --git a/app/models/envelope.rb b/app/models/envelope.rb index e50b385a..0919185f 100644 --- a/app/models/envelope.rb +++ b/app/models/envelope.rb @@ -118,6 +118,12 @@ def self.select_scope(include_deleted = nil) end end + def self.last_publish_event_at(envelope_community) + EnvelopeVersion + .where(item_type: 'Envelope', envelope_community_id: envelope_community.id) + .maximum(:created_at) + end + def envelope_community_name envelope_community.name end diff --git a/db/migrate/20260319120000_add_last_published_at_to_envelope_downloads.rb b/db/migrate/20260319120000_add_last_published_at_to_envelope_downloads.rb new file mode 100644 index 00000000..bea38550 --- /dev/null +++ b/db/migrate/20260319120000_add_last_published_at_to_envelope_downloads.rb @@ -0,0 +1,5 @@ +class AddLastPublishedAtToEnvelopeDownloads < ActiveRecord::Migration[8.0] + def change + add_column :envelope_downloads, :last_published_at, :datetime + end +end diff --git a/db/structure.sql b/db/structure.sql index c457398b..11d30890 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -1,4 +1,4 @@ -\restrict RlhAm1PKGOPdsOZNMphz6iO9DdiuUyVV63UMT76QE4hhnh8DEXySEcXHsCn7v5q +\restrict HVig67hq7zLYZnQf6tMgVZ6cHpcSPJO9ABlqm7KUFMXdFajytb3UBg649usLbeJ -- Dumped from database version 16.13 (Debian 16.13-1.pgdg13+1) -- Dumped by pg_dump version 16.13 @@ -334,7 +334,8 @@ CREATE TABLE public.envelope_downloads ( enqueued_at timestamp(6) without time zone, argo_workflow_name character varying, argo_workflow_namespace character varying, - zip_files jsonb DEFAULT '[]'::jsonb NOT NULL + zip_files jsonb DEFAULT '[]'::jsonb NOT NULL, + last_published_at timestamp(6) without time zone ); @@ -2049,11 +2050,12 @@ ALTER TABLE ONLY public.envelopes -- PostgreSQL database dump complete -- -\unrestrict RlhAm1PKGOPdsOZNMphz6iO9DdiuUyVV63UMT76QE4hhnh8DEXySEcXHsCn7v5q +\unrestrict HVig67hq7zLYZnQf6tMgVZ6cHpcSPJO9ABlqm7KUFMXdFajytb3UBg649usLbeJ SET search_path TO "$user", public; INSERT INTO "schema_migrations" (version) VALUES +('20260319120000'), ('20260310005238'), ('20260306120000'), ('20251022205617'), diff --git a/spec/api/v1/envelopes_spec.rb b/spec/api/v1/envelopes_spec.rb index e994cce6..5cb9e751 100644 --- a/spec/api/v1/envelopes_spec.rb +++ b/spec/api/v1/envelopes_spec.rb @@ -286,6 +286,14 @@ post '/envelopes/download', nil, 'Authorization' => "Token #{auth_token}" end + before do + PaperTrail.enabled = true + end + + after do + PaperTrail.enabled = false + end + context 'with invalid token' do let(:auth_token) { 'invalid token' } @@ -305,6 +313,12 @@ # rubocop:todo RSpec/MultipleExpectations it 'creates new pending download and enqueues job' do # rubocop:todo RSpec/ExampleLength # rubocop:enable RSpec/MultipleExpectations + published_at = now - 5.minutes + + travel_to published_at do + create(:envelope, :from_cer, envelope_community:) + end + travel_to now do expect { perform_request }.to change(EnvelopeDownload, :count).by(1) end @@ -313,6 +327,7 @@ envelope_download = EnvelopeDownload.last expect(envelope_download.envelope_community).to eq(envelope_community) + expect(envelope_download.last_published_at).to eq(published_at) expect(envelope_download.status).to eq('pending') expect_json_sizes(2) @@ -332,8 +347,31 @@ let!(:envelope_download) do create(:envelope_download, :finished, envelope_community:) end + let(:published_at) { now - 10.minutes } + + before do + travel_to published_at do + create(:envelope, :from_cer, envelope_community:) + end + end + + it 'returns the existing download when no newer publish event exists' do + envelope_download.update!(last_published_at: published_at) + + expect { perform_request }.to not_enqueue_job(DownloadEnvelopesJob) + + expect_status(:ok) + expect(envelope_download.reload.status).to eq('finished') + expect(envelope_download.last_published_at).to eq(published_at) + + expect_json('finished_at', envelope_download.finished_at.as_json) + expect_json('status', 'finished') + end + + it 'enqueues job for existing download when there is a newer publish event' do + previous_publish_time = published_at - 5.minutes + envelope_download.update!(last_published_at: previous_publish_time) - it 'enqueues job for existing download' do travel_to now do expect { perform_request }.to not_change(EnvelopeDownload, :count) .and enqueue_job(DownloadEnvelopesJob).with(envelope_download.id) @@ -341,6 +379,7 @@ expect_status(:created) expect(envelope_download.reload.status).to eq('pending') + expect(envelope_download.last_published_at).to eq(published_at) expect_json_sizes(2) expect_json('enqueued_at', now.as_json) @@ -357,6 +396,7 @@ url: 'https://downloads.example/old.zip', zip_files: ['old.zip'] ) + envelope_download.update!(last_published_at: published_at - 5.minutes) travel_to now do expect { perform_request }.to enqueue_job(DownloadEnvelopesJob).with(envelope_download.id) @@ -370,6 +410,7 @@ expect(envelope_download.finished_at).to be_nil expect(envelope_download.internal_error_message).to be_nil expect(envelope_download.internal_error_backtrace).to eq([]) + expect(envelope_download.last_published_at).to eq(published_at) expect(envelope_download.url).to be_nil expect(envelope_download.zip_files).to eq([]) expect(envelope_download.argo_workflow_name).to be_nil @@ -383,12 +424,13 @@ it 'does not enqueue a duplicate job when the download is already pending' do envelope_download.update!( enqueued_at: now, + last_published_at: published_at - 5.minutes, status: :pending ) expect { perform_request }.to not_enqueue_job(DownloadEnvelopesJob) - expect_status(:created) + expect_status(:ok) expect(envelope_download.reload.status).to eq('pending') expect_json_sizes(2) expect_json('enqueued_at', now.as_json) @@ -397,13 +439,14 @@ it 'does not enqueue a duplicate job when the download is already in progress' do envelope_download.update!( + last_published_at: published_at - 5.minutes, started_at: now, status: :in_progress ) expect { perform_request }.to not_enqueue_job(DownloadEnvelopesJob) - expect_status(:created) + expect_status(:ok) expect(envelope_download.reload.status).to eq('in_progress') expect_json_sizes(2) expect_json('started_at', now.as_json) diff --git a/spec/factories/envelope_downloads.rb b/spec/factories/envelope_downloads.rb index 5789d505..9971381d 100644 --- a/spec/factories/envelope_downloads.rb +++ b/spec/factories/envelope_downloads.rb @@ -1,6 +1,7 @@ FactoryBot.define do factory :envelope_download do enqueued_at { Time.current.change(usec: 0) } + last_published_at { nil } zip_files { [] } # rubocop:todo FactoryBot/FactoryAssociationWithStrategy envelope_community { create(:envelope_community, :with_random_name) } From b16cb1b6355106544d148d864f1d6a7b37d2e959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=B4mulo=20Saksida?= Date: Thu, 19 Mar 2026 14:50:32 -0300 Subject: [PATCH 5/6] Update Swagger --- app/api/entities/envelope_download.rb | 3 +++ db/structure.sql | 4 ++-- lib/swagger_docs/models.rb | 20 +++++++++++++++++++ lib/swagger_docs/sections/envelopes.rb | 11 ++++++++--- spec/api/v1/envelopes_spec.rb | 27 +++++++++++++++++--------- 5 files changed, 51 insertions(+), 14 deletions(-) diff --git a/app/api/entities/envelope_download.rb b/app/api/entities/envelope_download.rb index 735752e2..be56f5fd 100644 --- a/app/api/entities/envelope_download.rb +++ b/app/api/entities/envelope_download.rb @@ -5,6 +5,9 @@ class EnvelopeDownload < Grape::Entity expose :display_status, as: :status, documentation: { type: 'string', desc: 'Status of download' } + expose :last_published_at, + documentation: { type: 'string', desc: 'Timestamp of the latest publish event included in this download' } + expose :enqueued_at, documentation: { type: 'string', desc: 'When the download was enqueued' }, if: ->(object) { object.pending? } diff --git a/db/structure.sql b/db/structure.sql index 11d30890..c0e20716 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -1,4 +1,4 @@ -\restrict HVig67hq7zLYZnQf6tMgVZ6cHpcSPJO9ABlqm7KUFMXdFajytb3UBg649usLbeJ +\restrict 0dy55iEgxshkrNexCflHQqeKePQeKS9cgNddI1yhNU2MlrkmlYja0aF7oIYJK1j -- Dumped from database version 16.13 (Debian 16.13-1.pgdg13+1) -- Dumped by pg_dump version 16.13 @@ -2050,7 +2050,7 @@ ALTER TABLE ONLY public.envelopes -- PostgreSQL database dump complete -- -\unrestrict HVig67hq7zLYZnQf6tMgVZ6cHpcSPJO9ABlqm7KUFMXdFajytb3UBg649usLbeJ +\unrestrict 0dy55iEgxshkrNexCflHQqeKePQeKS9cgNddI1yhNU2MlrkmlYja0aF7oIYJK1j SET search_path TO "$user", public; diff --git a/lib/swagger_docs/models.rb b/lib/swagger_docs/models.rb index e9d647d4..b2f9e0f6 100644 --- a/lib/swagger_docs/models.rb +++ b/lib/swagger_docs/models.rb @@ -378,6 +378,26 @@ module Models # rubocop:todo Metrics/ModuleLength, Style/Documentation type: :string, description: 'Status (pending, in progress, finished, or failed)' + property :last_published_at, + type: :string, + format: :'date-time', + description: 'Timestamp of the latest publish event included in this download' + + property :enqueued_at, + type: :string, + format: :'date-time', + description: 'When the download was enqueued' + + property :started_at, + type: :string, + format: :'date-time', + description: 'When the download started' + + property :finished_at, + type: :string, + format: :'date-time', + description: 'When the download finished' + property :url, type: :string, description: 'S3 URL (when finished)' diff --git a/lib/swagger_docs/sections/envelopes.rb b/lib/swagger_docs/sections/envelopes.rb index bf5baf57..1868a210 100644 --- a/lib/swagger_docs/sections/envelopes.rb +++ b/lib/swagger_docs/sections/envelopes.rb @@ -62,7 +62,7 @@ module Envelopes # rubocop:todo Metrics/ModuleLength, Style/Documentation swagger_path '/{community_name}/envelopes/download' do operation :get do key :operationId, 'getApiEnvelopesDownload' - key :description, "Returns the download's status and URL" + key :description, "Returns the download's status, publish timestamp, and URLs" key :produces, ['application/json'] key :tags, ['Envelopes'] @@ -76,14 +76,19 @@ module Envelopes # rubocop:todo Metrics/ModuleLength, Style/Documentation operation :post do key :operationId, 'postApiEnvelopesDownloads' - key :description, 'Starts a new download' + key :description, 'Starts a new download when newer publish events exist, otherwise returns the existing download' key :produces, ['application/json'] key :tags, ['Envelopes'] parameter community_name + response 200 do + key :description, 'Existing download object' + schema { key :$ref, :EnvelopeDownload } + end + response 201 do - key :description, 'Download object' + key :description, 'Newly started download object' schema { key :$ref, :EnvelopeDownload } end end diff --git a/spec/api/v1/envelopes_spec.rb b/spec/api/v1/envelopes_spec.rb index 5cb9e751..189f8528 100644 --- a/spec/api/v1/envelopes_spec.rb +++ b/spec/api/v1/envelopes_spec.rb @@ -181,7 +181,8 @@ expect(envelope_download.envelope_community).to eq(envelope_community) expect(envelope_download.status).to eq('pending') - expect_json_sizes(2) + expect_json_sizes(3) + expect_json('last_published_at', nil) expect_json('enqueued_at', nil) expect_json('status', 'pending') end @@ -214,7 +215,8 @@ it 'returns `in progress`' do expect { perform_request }.not_to change(EnvelopeDownload, :count) expect_status(:ok) - expect_json_sizes(2) + expect_json_sizes(3) + expect_json('last_published_at', nil) expect_json('started_at', envelope_download.started_at.as_json) expect_json('status', 'in_progress') end @@ -233,7 +235,8 @@ it 'returns `failed`' do expect { perform_request }.not_to change(EnvelopeDownload, :count) expect_status(:ok) - expect_json_sizes(4) + expect_json_sizes(5) + expect_json('last_published_at', nil) expect_json('finished_at', envelope_download.finished_at.as_json) expect_json('status', 'failed') expect_json('url', url) @@ -254,7 +257,8 @@ it 'returns `finished` and URL' do expect { perform_request }.not_to change(EnvelopeDownload, :count) expect_status(:ok) - expect_json_sizes(4) + expect_json_sizes(5) + expect_json('last_published_at', nil) expect_json('finished_at', envelope_download.finished_at.as_json) expect_json('status', 'finished') expect_json('url', url) @@ -330,7 +334,8 @@ expect(envelope_download.last_published_at).to eq(published_at) expect(envelope_download.status).to eq('pending') - expect_json_sizes(2) + expect_json_sizes(3) + expect_json('last_published_at', published_at.as_json) expect_json('enqueued_at', now.as_json) expect_json('status', 'pending') @@ -381,7 +386,8 @@ expect(envelope_download.reload.status).to eq('pending') expect(envelope_download.last_published_at).to eq(published_at) - expect_json_sizes(2) + expect_json_sizes(3) + expect_json('last_published_at', published_at.as_json) expect_json('enqueued_at', now.as_json) expect_json('status', 'pending') end @@ -416,7 +422,8 @@ expect(envelope_download.argo_workflow_name).to be_nil expect(envelope_download.argo_workflow_namespace).to be_nil - expect_json_sizes(2) + expect_json_sizes(3) + expect_json('last_published_at', published_at.as_json) expect_json('enqueued_at', now.as_json) expect_json('status', 'pending') end @@ -432,7 +439,8 @@ expect_status(:ok) expect(envelope_download.reload.status).to eq('pending') - expect_json_sizes(2) + expect_json_sizes(3) + expect_json('last_published_at', envelope_download.last_published_at.as_json) expect_json('enqueued_at', now.as_json) expect_json('status', 'pending') end @@ -448,7 +456,8 @@ expect_status(:ok) expect(envelope_download.reload.status).to eq('in_progress') - expect_json_sizes(2) + expect_json_sizes(3) + expect_json('last_published_at', envelope_download.last_published_at.as_json) expect_json('started_at', now.as_json) expect_json('status', 'in_progress') end From f0b99e8d338a6ad91bbb8c87f7cdbbf9133732e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=B4mulo=20Saksida?= Date: Thu, 26 Mar 2026 15:32:35 -0300 Subject: [PATCH 6/6] Remove workflows folder --- workflows/tasks/s3-graphs-zip/.dockerignore | 4 - workflows/tasks/s3-graphs-zip/.gitignore | 35 - workflows/tasks/s3-graphs-zip/Dockerfile | 13 - workflows/tasks/s3-graphs-zip/README.md | 224 ------- .../tasks/s3-graphs-zip/docker-compose.yml | 35 - workflows/tasks/s3-graphs-zip/main.py | 597 ------------------ workflows/tasks/s3-graphs-zip/pyproject.toml | 20 - .../tests/test_integration_localstack.py | 269 -------- .../tasks/s3-graphs-zip/tests/test_service.py | 117 ---- workflows/tasks/s3-graphs-zip/uv.lock | 162 ----- .../s3-graphs-zip-workflow-template.yaml | 94 --- 11 files changed, 1570 deletions(-) delete mode 100644 workflows/tasks/s3-graphs-zip/.dockerignore delete mode 100644 workflows/tasks/s3-graphs-zip/.gitignore delete mode 100644 workflows/tasks/s3-graphs-zip/Dockerfile delete mode 100644 workflows/tasks/s3-graphs-zip/README.md delete mode 100644 workflows/tasks/s3-graphs-zip/docker-compose.yml delete mode 100644 workflows/tasks/s3-graphs-zip/main.py delete mode 100644 workflows/tasks/s3-graphs-zip/pyproject.toml delete mode 100644 workflows/tasks/s3-graphs-zip/tests/test_integration_localstack.py delete mode 100644 workflows/tasks/s3-graphs-zip/tests/test_service.py delete mode 100644 workflows/tasks/s3-graphs-zip/uv.lock delete mode 100644 workflows/templates/s3-graphs-zip-workflow-template.yaml diff --git a/workflows/tasks/s3-graphs-zip/.dockerignore b/workflows/tasks/s3-graphs-zip/.dockerignore deleted file mode 100644 index 15c2a746..00000000 --- a/workflows/tasks/s3-graphs-zip/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -.venv -__pycache__ -.pytest_cache -*.pyc diff --git a/workflows/tasks/s3-graphs-zip/.gitignore b/workflows/tasks/s3-graphs-zip/.gitignore deleted file mode 100644 index abf53dc9..00000000 --- a/workflows/tasks/s3-graphs-zip/.gitignore +++ /dev/null @@ -1,35 +0,0 @@ -# Python -__pycache__/ -*.py[cod] -*.pyo -*.pyd -*.so -.python-version - -# Virtual environments -.venv/ -venv/ - -# Tooling caches -.pytest_cache/ -.mypy_cache/ -.ruff_cache/ -.coverage -htmlcov/ - -# Build artifacts -build/ -dist/ -*.egg-info/ - -# Local env/config -.env -.env.* - -# OS/editor files -.DS_Store -.vscode/ -.idea/ - -# LocalStack / Docker overrides -localstack-data/ diff --git a/workflows/tasks/s3-graphs-zip/Dockerfile b/workflows/tasks/s3-graphs-zip/Dockerfile deleted file mode 100644 index 0d7e26c8..00000000 --- a/workflows/tasks/s3-graphs-zip/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM python:3.13-slim - -ENV PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 - -WORKDIR /app - -RUN pip install --no-cache-dir boto3 - -COPY main.py /app/main.py - -ENTRYPOINT ["python", "/app/main.py"] -CMD ["--help"] diff --git a/workflows/tasks/s3-graphs-zip/README.md b/workflows/tasks/s3-graphs-zip/README.md deleted file mode 100644 index a6eeb891..00000000 --- a/workflows/tasks/s3-graphs-zip/README.md +++ /dev/null @@ -1,224 +0,0 @@ -# s3-graphs-zip - -Streams CE graph `.json` objects from one S3 bucket into ZIP batches and uploads -the resulting archives to another bucket. - -## What it does - -- lists JSON files from source bucket/prefix -- groups them into batches based on target ZIP size -- processes batches in parallel -- streams each object directly into a ZIP archive -- streams the ZIP output directly back to S3 -- uses multipart upload automatically for larger archives - -## Requirements - -- Python 3.13+ - -## Local install - -For local testing: - -```bash -uv sync --dev -``` - -## Run locally - -Make sure AWS credentials are available in the environment or via your usual AWS -configuration files. For LocalStack or other S3-compatible endpoints, any dummy -credentials accepted by that service are sufficient. - -```bash -python main.py \ - --source-bucket source-bucket \ - --source-prefix graphs/ \ - --destination-bucket destination-bucket \ - --destination-prefix zipped/ \ - --max-uncompressed-zip-size-bytes 209715200 \ - --max-workers 4 \ - --max-input-files 500 -``` - -## Parameters - -Required parameters: - -- `--source-bucket` - Source S3 bucket that contains the input graph objects. -- `--destination-bucket` - Destination S3 bucket where the generated ZIP archives will be uploaded. - -Optional parameters: - -- `--source-prefix` - Prefix inside the source bucket to scan for input files. Only keys ending in - `.json` under this prefix are included. Default: empty prefix. -- `--destination-prefix` - Prefix inside the destination bucket where ZIP files are written. The task - writes `batch-00001.zip`, `batch-00002.zip`, and so on under this prefix. - Default: empty prefix. -- `--max-uncompressed-zip-size-bytes` - Target maximum total input size per ZIP batch, measured using the source - object sizes reported by S3. This is the primary batching control and is an - estimate of final ZIP size, not an exact compressed-size guarantee. A value of - `209715200` targets about 200 MiB of uncompressed input per ZIP. Default: - `209715200`. -- `--batch-size` - Optional maximum number of input `.json` files allowed in a single ZIP batch. - This acts as a safety cap on top of `--max-uncompressed-zip-size-bytes` for - cases where many tiny files would otherwise end up in one archive. Default: - `25000`. -- `--max-workers` - Number of batches to process concurrently. Each worker streams one ZIP archive - to S3 at a time. Default: `4`. -- `--max-input-files` - Optional cap on how many input `.json` files are processed in a run. Useful - for test runs, incremental validation, or limiting blast radius while tuning. - Default: no limit. -- `--region` - AWS region for the S3 client. If omitted, boto3 falls back to standard AWS - region resolution from the environment or AWS config files. -- `--endpoint-url` - Custom S3 endpoint URL for LocalStack or another S3-compatible service. -- `--read-chunk-size` - Number of bytes to read from each source object per streaming read. Increase - it to reduce request overhead; decrease it to lower per-stream memory usage. - Default: `1048576` (1 MiB). -- `--part-size` - Multipart upload part size in bytes for streaming ZIP uploads to S3. Must be - at least `5242880` (5 MiB), which is the S3 multipart minimum. Default: - `8388608` (8 MiB). -- `--manifest-path` - Optional filesystem path where the task writes a JSON manifest describing the - run output. This is used by Argo to capture the produced ZIP file list as a - workflow output parameter. Default: no manifest file is written. - -## Completion webhook - -If `WEBHOOK_URL` is set, the CLI sends a `POST` request to that URL when processing -finishes. This happens for both successful and failed runs. `ENVIRONMENT` controls -the label in the message and defaults to `staging`. - -Example: - -```bash -export WEBHOOK_URL="https://example.com/webhooks/s3-graphs-zip" -export ENVIRONMENT="staging" - -python main.py \ - --source-bucket source-bucket \ - --source-prefix graphs/ \ - --destination-bucket destination-bucket \ - --destination-prefix zipped/run-123 -``` - -The request body is JSON: - -```json -{"text": "..."} -``` - -## Output manifest - -If `--manifest-path` is provided, the task writes a JSON document containing the -uploaded ZIP keys and summary metadata. - -Example: - -```json -{ - "batch_count": 2, - "destination_bucket": "destination-bucket", - "destination_prefix": "zipped/run-123", - "total_files": 12, - "total_input_bytes": 73400320, - "zip_files": [ - "zipped/run-123/batch-00001.zip", - "zipped/run-123/batch-00002.zip" - ], - "zip_size_bytes": 18350080 -} -``` - -The task also prints this same manifest to stdout when processing completes. - -## Destination prefix strategy - -The destination bucket is expected to already exist. - -Use `--destination-prefix` as a run-specific output directory so each execution writes -into its own prefix instead of reusing previous batch keys. - -Example: - -```bash -python main.py \ - --source-bucket source-bucket \ - --source-prefix graphs/ \ - --destination-bucket destination-bucket \ - --destination-prefix "zipped/2026-03-06T14-22-10Z" \ - --max-uncompressed-zip-size-bytes 209715200 \ - --max-workers 4 -``` - -This produces objects like: - -- `zipped/2026-03-06T14-22-10Z/batch-00001.zip` -- `zipped/2026-03-06T14-22-10Z/batch-00002.zip` - -If you reuse the same destination prefix, objects with the same batch key will be overwritten. - -## Test - -Unit tests: - -```bash -uv run pytest -q -``` - -Integration tests against LocalStack: - -```bash -docker compose up --build tests -``` - -The Compose setup starts LocalStack and configures the test container with the -required endpoint and dummy AWS credentials automatically. - -## Docker - -Build: - -```bash -docker build -t s3-graphs-zip . -``` - -Run: - -```bash -docker run --rm s3-graphs-zip --help -``` - -Example: - -```bash -docker run --rm \ - -e AWS_ACCESS_KEY_ID=test \ - -e AWS_SECRET_ACCESS_KEY=test \ - -e AWS_DEFAULT_REGION=us-east-1 \ - s3-graphs-zip \ - --source-bucket source-bucket \ - --source-prefix graphs/ \ - --destination-bucket destination-bucket \ - --destination-prefix zipped/ -``` - -For LocalStack or another S3-compatible endpoint, also pass `--endpoint-url`. - -## Notes - -- Only keys ending in `.json` are included. -- Files inside each ZIP are stored relative to `--source-prefix`. -- Output archives are named `batch-00001.zip`, `batch-00002.zip`, etc. diff --git a/workflows/tasks/s3-graphs-zip/docker-compose.yml b/workflows/tasks/s3-graphs-zip/docker-compose.yml deleted file mode 100644 index 1fb89bd7..00000000 --- a/workflows/tasks/s3-graphs-zip/docker-compose.yml +++ /dev/null @@ -1,35 +0,0 @@ -services: - localstack: - image: localstack/localstack:4.1 - ports: - - "4566:4566" - environment: - SERVICES: s3 - DEBUG: 1 - AWS_DEFAULT_REGION: us-east-1 - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:4566/_localstack/health"] - interval: 5s - timeout: 3s - retries: 20 - volumes: - - localstack-data:/var/lib/localstack - - tests: - image: python:3.13-slim - depends_on: - localstack: - condition: service_healthy - working_dir: /app - volumes: - - .:/app - environment: - AWS_ENDPOINT_URL: http://localstack:4566 - AWS_DEFAULT_REGION: us-east-1 - AWS_ACCESS_KEY_ID: test - AWS_SECRET_ACCESS_KEY: test - command: > - sh -lc "pip install --no-cache-dir boto3 pytest && python -m pytest -q -m integration" - -volumes: - localstack-data: diff --git a/workflows/tasks/s3-graphs-zip/main.py b/workflows/tasks/s3-graphs-zip/main.py deleted file mode 100644 index 4bf2559c..00000000 --- a/workflows/tasks/s3-graphs-zip/main.py +++ /dev/null @@ -1,597 +0,0 @@ -from __future__ import annotations - -import argparse -from concurrent.futures import FIRST_COMPLETED, Future, ThreadPoolExecutor, wait -from contextlib import closing -from dataclasses import dataclass -import io -import json -import os -from pathlib import Path -import sys -import time -from typing import Any, Iterable -from urllib import request -import zipfile - -import boto3 - -DEFAULT_TIMEOUT_SECONDS = 30 - - -def optional_int(value: str) -> int | None: - if value == "": - return None - - return int(value) - - -@dataclass(slots=True) -class BatchResult: - batch_number: int - object_count: int - destination_key: str - zip_size_bytes: int - - -@dataclass(slots=True) -class SourceObject: - key: str - size_bytes: int - - -class S3UploadWriter(io.RawIOBase): - def __init__( - self, - client: Any, - bucket: str, - key: str, - *, - part_size: int = 8 * 1024 * 1024, - multipart_threshold: int | None = None, - content_type: str = "application/zip", - ) -> None: - if part_size < 5 * 1024 * 1024: - raise ValueError("part_size must be at least 5 MiB for S3 multipart uploads") - - self._client = client - self._bucket = bucket - self._key = key - self._part_size = part_size - self._multipart_threshold = multipart_threshold or part_size - self._content_type = content_type - - self._buffer = bytearray() - self._upload_id: str | None = None - self._parts: list[dict[str, Any]] = [] - self._part_number = 1 - self._bytes_written = 0 - self._closed = False - self._aborted = False - - def __enter__(self) -> S3UploadWriter: - return self - - def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> bool: - if exc_type is None: - self.close() - else: - self.discard() - return False - - def writable(self) -> bool: - return True - - @property - def bytes_written(self) -> int: - return self._bytes_written - - @property - def closed(self) -> bool: - return self._closed - - def write(self, b: bytes | bytearray) -> int: - if self._closed: - raise ValueError("I/O operation on closed writer") - - if not b: - return 0 - - self._buffer.extend(b) - self._bytes_written += len(b) - - if self._upload_id is None and len(self._buffer) >= self._multipart_threshold: - self._start_multipart_upload() - - if self._upload_id is not None: - self._flush_full_parts() - - return len(b) - - def close(self) -> None: - if self._closed: - return - - try: - if self._upload_id is None: - self._client.put_object( - Bucket=self._bucket, - Key=self._key, - Body=bytes(self._buffer), - ContentType=self._content_type, - ) - self._buffer.clear() - else: - if self._buffer: - self._upload_part(bytes(self._buffer)) - self._buffer.clear() - - self._client.complete_multipart_upload( - Bucket=self._bucket, - Key=self._key, - UploadId=self._upload_id, - MultipartUpload={"Parts": self._parts}, - ) - except Exception: - self.discard() - raise - finally: - self._closed = True - super().close() - - def discard(self) -> None: - if self._closed: - return - - self._buffer.clear() - - if self._upload_id is not None and not self._aborted: - self._client.abort_multipart_upload( - Bucket=self._bucket, - Key=self._key, - UploadId=self._upload_id, - ) - self._aborted = True - - self._closed = True - super().close() - - def _start_multipart_upload(self) -> None: - response = self._client.create_multipart_upload( - Bucket=self._bucket, - Key=self._key, - ContentType=self._content_type, - ) - self._upload_id = response["UploadId"] - - def _flush_full_parts(self) -> None: - while len(self._buffer) >= self._part_size: - chunk = bytes(self._buffer[: self._part_size]) - del self._buffer[: self._part_size] - self._upload_part(chunk) - - def _upload_part(self, chunk: bytes) -> None: - response = self._client.upload_part( - Bucket=self._bucket, - Key=self._key, - UploadId=self._upload_id, - PartNumber=self._part_number, - Body=chunk, - ) - self._parts.append( - { - "ETag": response["ETag"], - "PartNumber": self._part_number, - } - ) - self._part_number += 1 - - -def list_json_keys( - client: Any, - bucket: str, - prefix: str = "", - limit: int | None = None, -) -> list[SourceObject]: - if limit is not None and limit <= 0: - raise ValueError("limit must be greater than zero") - - normalized_prefix = prefix.strip("/") - listing_prefix = f"{normalized_prefix}/" if normalized_prefix else "" - - paginator = client.get_paginator("list_objects_v2") - keys: list[SourceObject] = [] - - for page in paginator.paginate(Bucket=bucket, Prefix=listing_prefix): - for item in page.get("Contents", []): - key = item["Key"] - if not key.endswith(".json"): - continue - - keys.append(SourceObject(key=key, size_bytes=item["Size"])) - if limit is not None and len(keys) == limit: - return keys - - keys.sort(key=lambda item: item.key) - return keys - - -def chunked_by_estimated_size( - items: list[SourceObject], - *, - max_uncompressed_zip_size_bytes: int, - max_batch_size: int | None = None, -) -> Iterable[list[SourceObject]]: - if max_uncompressed_zip_size_bytes <= 0: - raise ValueError("max_uncompressed_zip_size_bytes must be greater than zero") - if max_batch_size is not None and max_batch_size <= 0: - raise ValueError("max_batch_size must be greater than zero") - - batch: list[SourceObject] = [] - batch_size_bytes = 0 - - for item in items: - would_exceed_size = ( - batch and batch_size_bytes + item.size_bytes > max_uncompressed_zip_size_bytes - ) - would_exceed_count = max_batch_size is not None and len(batch) >= max_batch_size - - if would_exceed_size or would_exceed_count: - yield batch - batch = [] - batch_size_bytes = 0 - - batch.append(item) - batch_size_bytes += item.size_bytes - - if batch: - yield batch - - -def build_destination_key(destination_prefix: str, batch_number: int) -> str: - cleaned = destination_prefix.strip("/") - filename = f"batch-{batch_number:05d}.zip" - return f"{cleaned}/{filename}" if cleaned else filename - - -def stream_objects_to_zip( - client: Any, - *, - source_bucket: str, - source_prefix: str, - destination_bucket: str, - destination_key: str, - object_keys: list[str], - batch_number: int, - read_chunk_size: int = 1024 * 1024, - part_size: int = 8 * 1024 * 1024, -) -> BatchResult: - if read_chunk_size <= 0: - raise ValueError("read_chunk_size must be greater than zero") - - normalized_prefix = source_prefix.strip("/") - prefix_with_slash = f"{normalized_prefix}/" if normalized_prefix else "" - - with S3UploadWriter( - client, - destination_bucket, - destination_key, - part_size=part_size, - ) as writer: - with zipfile.ZipFile( - writer, - mode="w", - compression=zipfile.ZIP_DEFLATED, - compresslevel=6, - ) as archive: - for key in object_keys: - arcname = key - if prefix_with_slash and key.startswith(prefix_with_slash): - arcname = key[len(prefix_with_slash) :] - - response = client.get_object(Bucket=source_bucket, Key=key) - with ( - closing(response["Body"]) as body, - archive.open(arcname, mode="w") as archive_entry, - ): - while chunk := body.read(read_chunk_size): - archive_entry.write(chunk) - - return BatchResult( - batch_number=batch_number, - object_count=len(object_keys), - destination_key=destination_key, - zip_size_bytes=writer.bytes_written, - ) - - -def process_batches( - client: Any, - *, - source_bucket: str, - source_prefix: str, - destination_bucket: str, - destination_prefix: str, - max_uncompressed_zip_size_bytes: int, - max_workers: int, - source_objects: list[SourceObject] | None = None, - max_batch_size: int | None = None, - read_chunk_size: int = 1024 * 1024, - part_size: int = 8 * 1024 * 1024, -) -> list[BatchResult]: - source_objects = source_objects or list_json_keys(client, source_bucket, source_prefix) - if not source_objects: - return [] - - results: list[BatchResult] = [] - batch_iter = enumerate( - chunked_by_estimated_size( - source_objects, - max_uncompressed_zip_size_bytes=max_uncompressed_zip_size_bytes, - max_batch_size=max_batch_size, - ), - start=1, - ) - - def submit_batch( - executor: ThreadPoolExecutor, - batch_number: int, - batch_objects: list[SourceObject], - ) -> Future[BatchResult]: - destination_key = build_destination_key(destination_prefix, batch_number) - return executor.submit( - stream_objects_to_zip, - client, - source_bucket=source_bucket, - source_prefix=source_prefix, - destination_bucket=destination_bucket, - destination_key=destination_key, - object_keys=[item.key for item in batch_objects], - batch_number=batch_number, - read_chunk_size=read_chunk_size, - part_size=part_size, - ) - - with ThreadPoolExecutor(max_workers=max_workers) as executor: - pending: set[Future[BatchResult]] = set() - - for _ in range(max_workers): - try: - batch_number, batch_objects = next(batch_iter) - except StopIteration: - break - pending.add(submit_batch(executor, batch_number, batch_objects)) - - while pending: - done, pending = wait(pending, return_when=FIRST_COMPLETED) - - completed_results = [future.result() for future in done] - results.extend(completed_results) - - for _ in completed_results: - try: - batch_number, batch_objects = next(batch_iter) - except StopIteration: - continue - pending.add(submit_batch(executor, batch_number, batch_objects)) - - return sorted(results, key=lambda result: result.batch_number) - - -def format_duration(duration_seconds: float) -> str: - total_seconds = max(0, int(round(duration_seconds))) - hours, remainder = divmod(total_seconds, 3600) - minutes, seconds = divmod(remainder, 60) - - if hours: - return f"{hours}h {minutes}m {seconds}s" - if minutes: - return f"{minutes}m {seconds}s" - return f"{seconds}s" - - -def build_processing_done_text( - *, - environment_name: str, - duration_seconds: float, - total_files: int, - zip_size_bytes: int, - destination_bucket: str, - uploaded_location: str, - error_msg: str | None, -) -> str: - dur_str = format_duration(duration_seconds) - zip_size_mb = zip_size_bytes / (1024 * 1024) - - if error_msg: - return ( - f":x: *CE Registry ZIP bundle failed* ({environment_name})\n" - f">*Duration:* {dur_str}\n" - f">*Error:* {error_msg}" - ) - - return ( - f":white_check_mark: *CE Registry ZIP bundle succeeded* ({environment_name})\n" - f">*Files:* {total_files:,}\n" - f">*ZIP size:* {zip_size_mb:.2f} MB\n" - f">*Uploaded:* `s3://{destination_bucket}/{uploaded_location}`\n" - f">*Duration:* {dur_str}" - ) - - -def send_processing_done_webhook( - webhook_url: str, - *, - text: str, - timeout: int = DEFAULT_TIMEOUT_SECONDS, -) -> None: - body = json.dumps({"text": text}).encode("utf-8") - req = request.Request( - webhook_url, - data=body, - headers={"Content-Type": "application/json"}, - method="POST", - ) - - with request.urlopen(req, timeout=timeout) as response: - status_code = getattr(response, "status", None) - if status_code is not None and not 200 <= status_code < 300: - raise RuntimeError(f"webhook returned unexpected status code: {status_code}") - - -def build_output_manifest( - *, - source_objects: list[SourceObject], - destination_bucket: str, - destination_prefix: str, - results: list[BatchResult], -) -> dict[str, Any]: - return { - "batch_count": len(results), - "destination_bucket": destination_bucket, - "destination_prefix": destination_prefix.strip("/"), - "total_files": sum(result.object_count for result in results), - "total_input_bytes": sum(item.size_bytes for item in source_objects), - "zip_files": [result.destination_key for result in results], - "zip_size_bytes": sum(result.zip_size_bytes for result in results), - } - - -def write_output_manifest(manifest_path: str | None, manifest: dict[str, Any]) -> None: - if not manifest_path: - return - - path = Path(manifest_path) - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps(manifest), encoding="utf-8") - - -def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description="Stream JSON objects from S3 into ZIP batches") - parser.add_argument("--source-bucket", required=True) - parser.add_argument("--source-prefix", default="") - parser.add_argument("--destination-bucket", required=True) - parser.add_argument("--destination-prefix", default="") - parser.add_argument("--max-uncompressed-zip-size-bytes", type=int, default=200 * 1024 * 1024) - parser.add_argument("--batch-size", type=int, default=25000) - parser.add_argument("--max-workers", type=int, default=4) - parser.add_argument("--max-input-files", type=optional_int, default=None) - parser.add_argument("--region", default=None) - parser.add_argument("--endpoint-url", default=None) - parser.add_argument("--read-chunk-size", type=int, default=1024 * 1024) - parser.add_argument("--part-size", type=int, default=8 * 1024 * 1024) - parser.add_argument("--manifest-path", default=None) - return parser - - -def get_uploaded_location(destination_prefix: str, results: list[BatchResult]) -> str: - if len(results) == 1: - return results[0].destination_key - - cleaned_prefix = destination_prefix.strip("/") - return f"{cleaned_prefix}/" if cleaned_prefix else "" - - -def send_completion_webhook( - webhook_url: str | None, - *, - environment_name: str, - started_at: float, - destination_bucket: str, - destination_prefix: str, - results: list[BatchResult], - error_msg: str | None, -) -> None: - if not webhook_url: - return - - text = build_processing_done_text( - environment_name=environment_name, - duration_seconds=time.monotonic() - started_at, - total_files=sum(result.object_count for result in results), - zip_size_bytes=sum(result.zip_size_bytes for result in results), - destination_bucket=destination_bucket, - uploaded_location=get_uploaded_location(destination_prefix, results), - error_msg=error_msg, - ) - send_processing_done_webhook(webhook_url, text=text) - - -def main() -> int: - args = build_parser().parse_args() - - client_kwargs = {} - if args.region: - client_kwargs["region_name"] = args.region - if args.endpoint_url: - client_kwargs["endpoint_url"] = args.endpoint_url - - client = boto3.client("s3", **client_kwargs) - webhook_url = os.getenv("WEBHOOK_URL") - environment_name = os.getenv("ENVIRONMENT", "staging") - - started_at = time.monotonic() - results: list[BatchResult] = [] - source_objects = list_json_keys( - client, - args.source_bucket, - args.source_prefix, - limit=args.max_input_files, - ) - - try: - results = process_batches( - client, - source_bucket=args.source_bucket, - source_prefix=args.source_prefix, - destination_bucket=args.destination_bucket, - destination_prefix=args.destination_prefix, - max_uncompressed_zip_size_bytes=args.max_uncompressed_zip_size_bytes, - max_workers=args.max_workers, - source_objects=source_objects, - max_batch_size=args.batch_size, - read_chunk_size=args.read_chunk_size, - part_size=args.part_size, - ) - except Exception as exc: - send_completion_webhook( - webhook_url, - environment_name=environment_name, - started_at=started_at, - destination_bucket=args.destination_bucket, - destination_prefix=args.destination_prefix, - results=results, - error_msg=str(exc), - ) - raise - - send_completion_webhook( - webhook_url, - environment_name=environment_name, - started_at=started_at, - destination_bucket=args.destination_bucket, - destination_prefix=args.destination_prefix, - results=results, - error_msg=None, - ) - - manifest = build_output_manifest( - source_objects=source_objects, - destination_bucket=args.destination_bucket, - destination_prefix=args.destination_prefix, - results=results, - ) - write_output_manifest(args.manifest_path, manifest) - print(json.dumps(manifest), file=sys.stdout) - - for result in results: - print( - f"batch={result.batch_number} objects={result.object_count} destination={result.destination_key}", - file=sys.stdout, - ) - - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/workflows/tasks/s3-graphs-zip/pyproject.toml b/workflows/tasks/s3-graphs-zip/pyproject.toml deleted file mode 100644 index a44486b5..00000000 --- a/workflows/tasks/s3-graphs-zip/pyproject.toml +++ /dev/null @@ -1,20 +0,0 @@ -[project] -name = "s3-graphs-zip" -version = "0.1.0" -description = "Stream JSON objects from one S3 bucket into ZIP batches uploaded to another bucket" -readme = "README.md" -requires-python = ">=3.13" -dependencies = [ - "boto3>=1.37.0", -] - -[dependency-groups] -dev = [ - "pytest>=8.3.5", -] - -[tool.pytest.ini_options] -pythonpath = ["."] -markers = [ - "integration: tests that require LocalStack or another S3-compatible endpoint", -] diff --git a/workflows/tasks/s3-graphs-zip/tests/test_integration_localstack.py b/workflows/tasks/s3-graphs-zip/tests/test_integration_localstack.py deleted file mode 100644 index 5fc66a77..00000000 --- a/workflows/tasks/s3-graphs-zip/tests/test_integration_localstack.py +++ /dev/null @@ -1,269 +0,0 @@ -from __future__ import annotations - -from contextlib import contextmanager -from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer -import io -import json -import os -from queue import Empty, Queue -import subprocess -import sys -from threading import Thread -import zipfile - -import boto3 -import pytest - -from main import process_batches - - -pytestmark = pytest.mark.integration -requires_localstack = pytest.mark.skipif( - not os.getenv("AWS_ENDPOINT_URL"), - reason="AWS_ENDPOINT_URL is not configured", -) - - -def create_s3_client(): - return boto3.client( - "s3", - endpoint_url=os.environ["AWS_ENDPOINT_URL"], - region_name=os.getenv("AWS_DEFAULT_REGION", "us-east-1"), - aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID", "test"), - aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY", "test"), - ) - - -def empty_bucket(client, bucket_name: str) -> None: - paginator = client.get_paginator("list_objects_v2") - for page in paginator.paginate(Bucket=bucket_name): - contents = page.get("Contents", []) - if contents: - client.delete_objects( - Bucket=bucket_name, - Delete={"Objects": [{"Key": item["Key"]} for item in contents]}, - ) - - -def ensure_clean_bucket(client, bucket_name: str) -> None: - existing_buckets = {bucket["Name"] for bucket in client.list_buckets().get("Buckets", [])} - if bucket_name in existing_buckets: - empty_bucket(client, bucket_name) - else: - client.create_bucket(Bucket=bucket_name) - - -def delete_bucket(client, bucket_name: str) -> None: - empty_bucket(client, bucket_name) - client.delete_bucket(Bucket=bucket_name) - - -def seed_source_bucket(client, bucket_name: str, *, total_graphs: int) -> dict[str, bytes]: - graph_bodies = { - f"graphs/graph-{index:03d}.json": f'{{"graph": {index}}}'.encode() - for index in range(1, total_graphs + 1) - } - for key, body in graph_bodies.items(): - client.put_object(Bucket=bucket_name, Key=key, Body=body) - - client.put_object(Bucket=bucket_name, Key="graphs/ignore.txt", Body=b"ignore") - return graph_bodies - - -def build_batch_key(destination_prefix: str, batch_number: int) -> str: - cleaned_prefix = destination_prefix.strip("/") - filename = f"batch-{batch_number:05d}.zip" - return f"{cleaned_prefix}/{filename}" if cleaned_prefix else filename - - -def assert_uploaded_batches( - client, - *, - destination_bucket: str, - destination_prefix: str, - expected_batch_sizes: list[int], - graph_bodies: dict[str, bytes], -) -> None: - next_graph_index = 1 - - for batch_number, expected_count in enumerate(expected_batch_sizes, start=1): - zip_bytes = client.get_object( - Bucket=destination_bucket, - Key=build_batch_key(destination_prefix, batch_number), - )["Body"].read() - - start = next_graph_index - end = start + expected_count - 1 - expected_names = [f"graph-{index:03d}.json" for index in range(start, end + 1)] - - with zipfile.ZipFile(io.BytesIO(zip_bytes)) as archive: - assert sorted(archive.namelist()) == expected_names - for index in range(start, end + 1): - assert archive.read(f"graph-{index:03d}.json") == graph_bodies[ - f"graphs/graph-{index:03d}.json" - ] - - next_graph_index = end + 1 - - -@contextmanager -def webhook_server(status_code: int = 204): - received: Queue[dict[str, object]] = Queue() - - class Handler(BaseHTTPRequestHandler): - def do_POST(self) -> None: - content_length = int(self.headers.get("Content-Length", "0")) - body = self.rfile.read(content_length) - received.put( - { - "path": self.path, - "headers": dict(self.headers), - "body": body, - } - ) - self.send_response(status_code) - self.end_headers() - - def log_message(self, format: str, *args: object) -> None: - return - - server = ThreadingHTTPServer(("127.0.0.1", 0), Handler) - thread = Thread(target=server.serve_forever, daemon=True) - thread.start() - - host, port = server.server_address - try: - yield f"http://{host}:{port}/webhook", received - finally: - server.shutdown() - thread.join(timeout=5) - server.server_close() - - -@requires_localstack -def test_end_to_end_with_localstack() -> None: - client = create_s3_client() - source_bucket = "source-graphs-service" - destination_bucket = "dest-archives-service" - - ensure_clean_bucket(client, source_bucket) - ensure_clean_bucket(client, destination_bucket) - - try: - graph_bodies = seed_source_bucket(client, source_bucket, total_graphs=50) - - results = process_batches( - client, - source_bucket=source_bucket, - source_prefix="graphs/", - destination_bucket=destination_bucket, - destination_prefix="zips", - max_uncompressed_zip_size_bytes=150, - max_workers=4, - read_chunk_size=1024, - part_size=5 * 1024 * 1024, - ) - - assert [result.destination_key for result in results] == [ - "zips/batch-00001.zip", - "zips/batch-00002.zip", - "zips/batch-00003.zip", - "zips/batch-00004.zip", - "zips/batch-00005.zip", - ] - assert_uploaded_batches( - client, - destination_bucket=destination_bucket, - destination_prefix="zips", - expected_batch_sizes=[12, 11, 11, 11, 5], - graph_bodies=graph_bodies, - ) - finally: - delete_bucket(client, source_bucket) - delete_bucket(client, destination_bucket) - - -@requires_localstack -def test_cli_sends_completion_webhook_after_uploading_batches() -> None: - client = create_s3_client() - source_bucket = "source-graphs-webhook" - destination_bucket = "dest-archives-webhook" - - ensure_clean_bucket(client, source_bucket) - ensure_clean_bucket(client, destination_bucket) - - try: - graph_bodies = seed_source_bucket(client, source_bucket, total_graphs=12) - - with webhook_server() as (webhook_url, received): - completed = subprocess.run( - [ - sys.executable, - "main.py", - "--source-bucket", - source_bucket, - "--source-prefix", - "graphs/", - "--destination-bucket", - destination_bucket, - "--destination-prefix", - "zips", - "--max-uncompressed-zip-size-bytes", - "150", - "--batch-size", - "100", - "--max-workers", - "2", - "--read-chunk-size", - "1024", - "--part-size", - str(5 * 1024 * 1024), - ], - env={ - **os.environ, - "WEBHOOK_URL": webhook_url, - "ENVIRONMENT": "integration", - }, - capture_output=True, - text=True, - check=False, - ) - - assert completed.returncode == 0, ( - f"stdout:\n{completed.stdout}\n\nstderr:\n{completed.stderr}" - ) - - manifest = json.loads(completed.stdout.splitlines()[0]) - assert manifest["batch_count"] == 1 - assert manifest["total_files"] == 12 - assert manifest["total_input_bytes"] > 0 - assert manifest["zip_files"] == ["zips/batch-00001.zip"] - assert manifest["zip_size_bytes"] > 0 - - request_data = received.get(timeout=5) - headers = request_data["headers"] - body = json.loads(request_data["body"]) - text = body["text"] - - assert request_data["path"] == "/webhook" - assert headers["Content-Type"] == "application/json" - assert text.startswith(":white_check_mark: *CE Registry ZIP bundle succeeded*") - assert "(integration)" in text - assert ">*Files:* 12" in text - assert f">*Uploaded:* `s3://{destination_bucket}/zips/batch-00001.zip`" in text - assert ">*ZIP size:* " in text - assert ">*Duration:* " in text - - with pytest.raises(Empty): - received.get_nowait() - - assert_uploaded_batches( - client, - destination_bucket=destination_bucket, - destination_prefix="zips", - expected_batch_sizes=[12], - graph_bodies=graph_bodies, - ) - finally: - delete_bucket(client, source_bucket) - delete_bucket(client, destination_bucket) diff --git a/workflows/tasks/s3-graphs-zip/tests/test_service.py b/workflows/tasks/s3-graphs-zip/tests/test_service.py deleted file mode 100644 index 8716fcec..00000000 --- a/workflows/tasks/s3-graphs-zip/tests/test_service.py +++ /dev/null @@ -1,117 +0,0 @@ -from __future__ import annotations - -import pytest - -from main import ( - S3UploadWriter, - SourceObject, - build_destination_key, - build_output_manifest, - chunked_by_estimated_size, - stream_objects_to_zip, -) - - -def test_chunked_by_estimated_size_splits_items_by_target_bytes() -> None: - assert list( - chunked_by_estimated_size( - [ - SourceObject("a", 40), - SourceObject("b", 50), - SourceObject("c", 30), - SourceObject("d", 70), - ], - max_uncompressed_zip_size_bytes=100, - ) - ) == [ - [SourceObject("a", 40), SourceObject("b", 50)], - [SourceObject("c", 30), SourceObject("d", 70)], - ] - - -def test_chunked_by_estimated_size_respects_optional_batch_size_cap() -> None: - assert list( - chunked_by_estimated_size( - [ - SourceObject("a", 10), - SourceObject("b", 10), - SourceObject("c", 10), - ], - max_uncompressed_zip_size_bytes=100, - max_batch_size=2, - ) - ) == [ - [SourceObject("a", 10), SourceObject("b", 10)], - [SourceObject("c", 10)], - ] - - -@pytest.mark.parametrize( - ("max_uncompressed_zip_size_bytes", "max_batch_size", "error_message"), - [ - (0, None, "max_uncompressed_zip_size_bytes must be greater than zero"), - (-1, None, "max_uncompressed_zip_size_bytes must be greater than zero"), - (1, 0, "max_batch_size must be greater than zero"), - (1, -1, "max_batch_size must be greater than zero"), - ], -) -def test_chunked_by_estimated_size_validates_inputs( - max_uncompressed_zip_size_bytes: int, - max_batch_size: int | None, - error_message: str, -) -> None: - with pytest.raises(ValueError, match=error_message): - list( - chunked_by_estimated_size( - [SourceObject("a", 1)], - max_uncompressed_zip_size_bytes=max_uncompressed_zip_size_bytes, - max_batch_size=max_batch_size, - ) - ) - - -def test_build_destination_key_normalizes_prefix() -> None: - assert build_destination_key("out", 7) == "out/batch-00007.zip" - assert build_destination_key("/out/", 7) == "out/batch-00007.zip" - assert build_destination_key("", 7) == "batch-00007.zip" - - -def test_build_output_manifest_lists_uploaded_zip_files() -> None: - manifest = build_output_manifest( - source_objects=[], - destination_bucket="dest-bucket", - destination_prefix="/archives/run-1/", - results=[], - ) - - assert manifest == { - "batch_count": 0, - "destination_bucket": "dest-bucket", - "destination_prefix": "archives/run-1", - "total_files": 0, - "total_input_bytes": 0, - "zip_files": [], - "zip_size_bytes": 0, - } - - -@pytest.mark.parametrize("read_chunk_size", [0, -1]) -def test_stream_objects_to_zip_validates_read_chunk_size(read_chunk_size: int) -> None: - with pytest.raises(ValueError, match="read_chunk_size must be greater than zero"): - stream_objects_to_zip( - object(), - source_bucket="source", - source_prefix="graphs/", - destination_bucket="dest", - destination_key="archives/batch-00001.zip", - object_keys=["graphs/1.json"], - batch_number=1, - read_chunk_size=read_chunk_size, - part_size=5 * 1024 * 1024, - ) - - -@pytest.mark.parametrize("part_size", [0, 1024, 5 * 1024 * 1024 - 1]) -def test_s3_upload_writer_validates_minimum_part_size(part_size: int) -> None: - with pytest.raises(ValueError, match="part_size must be at least 5 MiB"): - S3UploadWriter(object(), "dest", "batch.zip", part_size=part_size) diff --git a/workflows/tasks/s3-graphs-zip/uv.lock b/workflows/tasks/s3-graphs-zip/uv.lock deleted file mode 100644 index d5326602..00000000 --- a/workflows/tasks/s3-graphs-zip/uv.lock +++ /dev/null @@ -1,162 +0,0 @@ -version = 1 -revision = 3 -requires-python = ">=3.13" - -[[package]] -name = "boto3" -version = "1.42.62" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "botocore" }, - { name = "jmespath" }, - { name = "s3transfer" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f7/7e/c952803c8900f14e6f6158fddbd35da5afb2e3fa68bf498a761e6ba2c2ae/boto3-1.42.62.tar.gz", hash = "sha256:6b26ff56c458685caec3d42adde0549f6a55410e557e1f51bebde5c8abcf3037", size = 112848, upload-time = "2026-03-05T21:20:37.755Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/68/b5e82dedd9c8d53a9542df4e3475d2d3ec331eef4a4a801e9c5fa98b583a/boto3-1.42.62-py3-none-any.whl", hash = "sha256:eef0ee08f30e5ed16d8296719808801a827fa0f3126a3e2a9ef9be9eb5e6a313", size = 140556, upload-time = "2026-03-05T21:20:35.354Z" }, -] - -[[package]] -name = "botocore" -version = "1.42.62" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jmespath" }, - { name = "python-dateutil" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/af/e7/031f2f03f22817f8a8def7ad1caa138979c20ac35062b055274e0a505c3f/botocore-1.42.62.tar.gz", hash = "sha256:c210dc93b0b81bf72cfe745a7b1c8df765d04bd90b4ac6c8707fbb6714141dae", size = 14966114, upload-time = "2026-03-05T21:20:25.518Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/57/9bc5c1aad3a354dd7da54ba52d43ee821badb3deedbea4c5117c4bd05eab/botocore-1.42.62-py3-none-any.whl", hash = "sha256:86d327fded96775268ffe8d8bd6ed96c4a1db86cf24eb64ff85233db12dbc287", size = 14638389, upload-time = "2026-03-05T21:20:22.359Z" }, -] - -[[package]] -name = "colorama" -version = "0.4.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, -] - -[[package]] -name = "iniconfig" -version = "2.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, -] - -[[package]] -name = "jmespath" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, -] - -[[package]] -name = "packaging" -version = "26.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, -] - -[[package]] -name = "pluggy" -version = "1.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, -] - -[[package]] -name = "pygments" -version = "2.19.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, -] - -[[package]] -name = "pytest" -version = "9.0.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "iniconfig" }, - { name = "packaging" }, - { name = "pluggy" }, - { name = "pygments" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, -] - -[[package]] -name = "python-dateutil" -version = "2.9.0.post0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "six" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, -] - -[[package]] -name = "s3-graphs-zip" -version = "0.1.0" -source = { virtual = "." } -dependencies = [ - { name = "boto3" }, -] - -[package.dev-dependencies] -dev = [ - { name = "pytest" }, -] - -[package.metadata] -requires-dist = [{ name = "boto3", specifier = ">=1.37.0" }] - -[package.metadata.requires-dev] -dev = [{ name = "pytest", specifier = ">=8.3.5" }] - -[[package]] -name = "s3transfer" -version = "0.16.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "botocore" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, -] - -[[package]] -name = "six" -version = "1.17.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, -] - -[[package]] -name = "urllib3" -version = "2.6.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, -] diff --git a/workflows/templates/s3-graphs-zip-workflow-template.yaml b/workflows/templates/s3-graphs-zip-workflow-template.yaml deleted file mode 100644 index fce6ae76..00000000 --- a/workflows/templates/s3-graphs-zip-workflow-template.yaml +++ /dev/null @@ -1,94 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: WorkflowTemplate -metadata: - name: s3-graphs-zip - labels: - app: credential-registry -spec: - serviceAccountName: main-app-service-account - entrypoint: s3-graphs-zip - arguments: - parameters: - - name: task-image - - name: source-bucket - - name: source-prefix - - name: destination-bucket - - name: destination-prefix - - name: max-uncompressed-zip-size-bytes - value: "209715200" - - name: max-input-files - value: "" - - name: batch-size - value: "25000" - - name: max-workers - value: "4" - - name: aws-region - - name: environment - value: "staging" - templates: - - name: s3-graphs-zip - inputs: - parameters: - - name: task-image - - name: source-bucket - - name: source-prefix - - name: destination-bucket - - name: destination-prefix - - name: max-uncompressed-zip-size-bytes - - name: max-input-files - - name: batch-size - - name: max-workers - - name: aws-region - - name: environment - metadata: - labels: - app: credential-registry - workflow: s3-graphs-zip - outputs: - parameters: - - name: zip-manifest - valueFrom: - path: /tmp/argo/zip-manifest.json - container: - image: "{{inputs.parameters.task-image}}" - command: ["python", "/app/main.py"] - imagePullPolicy: Always - args: - - --source-bucket - - "{{inputs.parameters.source-bucket}}" - - --source-prefix - - "{{inputs.parameters.source-prefix}}" - - --destination-bucket - - "{{inputs.parameters.destination-bucket}}" - - --destination-prefix - - "{{inputs.parameters.destination-prefix}}" - - --max-uncompressed-zip-size-bytes - - "{{inputs.parameters.max-uncompressed-zip-size-bytes}}" - - --max-input-files - - "{{inputs.parameters.max-input-files}}" - - --batch-size - - "{{inputs.parameters.batch-size}}" - - --max-workers - - "{{inputs.parameters.max-workers}}" - - --manifest-path - - /tmp/argo/zip-manifest.json - env: - - name: AWS_DEFAULT_REGION - value: "{{inputs.parameters.aws-region}}" - - name: ENVIRONMENT - value: "{{inputs.parameters.environment}}" - resources: - requests: - cpu: "250m" - memory: "256Mi" - limits: - cpu: "2000m" - memory: "4Gi" - activeDeadlineSeconds: 10800 - retryStrategy: - limit: 2 - retryPolicy: OnFailure - backoff: - duration: "60s" - factor: 2 - maxDuration: "3h"