Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,25 @@ RUN set -eux; \
done
RUN set -eux; \
# Copy commonly required runtime shared libraries (no loop)
cp -a /usr/lib64/libcurl.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libnghttp2.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libidn2.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libpsl.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libssh2.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libunistring.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libnettle.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libhogweed.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libgnutls.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libgmp.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libbrotlidec.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libbrotlicommon.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libzstd.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libnss3.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libnssutil3.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libsmime3.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libplc4.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libplds4.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libnspr4.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libpq.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libssl.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
cp -a /usr/lib64/libcrypto.so.* /runtime/usr/lib64/ 2>/dev/null || true; \
Expand Down
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ gem 'rake', '~> 13.2'
gem 'rdoc', '~> 6.15.0'
gem 'rubyzip', '~> 2.4', require: 'zip'
gem 'swagger-blocks', '~> 3.0.0'
gem 'typhoeus', '~> 1.5'

# Persistence
gem 'activerecord-import', '~> 2.1'
Expand Down
18 changes: 18 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ GEM
benchmark (0.5.0)
bigdecimal (4.0.1)
builder (3.3.0)
byebug (12.0.0)
childprocess (5.1.0)
logger (~> 1.5)
chronic (0.10.2)
Expand Down Expand Up @@ -135,11 +136,15 @@ GEM
encryptor (3.0.0)
erb (6.0.2)
erubi (1.13.1)
ethon (0.15.0)
ffi (>= 1.15.0)
factory_bot (6.5.6)
activesupport (>= 6.1.0)
faker (3.6.0)
i18n (>= 1.8.11, < 2)
ffi (1.17.3)
ffi (1.17.3-x64-mingw-ucrt)
ffi (1.17.3-x86_64-linux-gnu)
ffi-compiler (1.3.2)
ffi (>= 1.15.5)
rake
Expand Down Expand Up @@ -231,6 +236,7 @@ GEM
logger
mime-types-data (~> 3.2025, >= 3.2025.0507)
mime-types-data (3.2026.0224)
mini_portile2 (2.8.9)
minitest (6.0.2)
drb (~> 2.0)
prism (~> 1.5)
Expand All @@ -243,8 +249,13 @@ GEM
netrc (0.11.0)
newrelic_rpm (9.24.0)
nio4r (2.7.5)
nokogiri (1.19.1)
mini_portile2 (~> 2.8.2)
racc (~> 1.4)
nokogiri (1.19.1-x64-mingw-ucrt)
racc (~> 1.4)
nokogiri (1.19.1-x86_64-linux-gnu)
racc (~> 1.4)
ostruct (0.6.3)
overcommit (0.68.0)
childprocess (>= 0.6.3, < 6)
Expand All @@ -257,6 +268,8 @@ GEM
parser (3.3.10.2)
ast (~> 2.4.1)
racc
pg (1.6.3)
pg (1.6.3-x86_64-linux)
pg_search (2.3.7)
activerecord (>= 6.1)
activesupport (>= 6.1)
Expand Down Expand Up @@ -393,6 +406,8 @@ GEM
readline
sync
tsort (0.2.0)
typhoeus (1.5.0)
ethon (>= 0.9.0, < 0.16.0)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unicode-display_width (3.2.0)
Expand All @@ -416,7 +431,9 @@ GEM
zeitwerk (2.7.5)

PLATFORMS
aarch64-linux
x64-mingw-ucrt
x86_64-linux

DEPENDENCIES
activejob (= 8.0.2.1)
Expand Down Expand Up @@ -491,6 +508,7 @@ DEPENDENCIES
simplecov (>= 0.21.2)
simplecov_json_formatter
swagger-blocks (~> 3.0.0)
typhoeus (~> 1.5)
uuid (~> 2.3)
vcr (~> 6.3)
virtus (~> 2.0)
Expand Down
7 changes: 7 additions & 0 deletions app/api/entities/envelope_download.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ class EnvelopeDownload < Grape::Entity
expose :display_status, as: :status,
documentation: { type: 'string', desc: 'Status of download' }

expose :last_published_at,
documentation: { type: 'string', desc: 'Timestamp of the latest publish event included in this download' }

expose :enqueued_at,
documentation: { type: 'string', desc: 'When the download was enqueued' },
if: ->(object) { object.pending? }
Expand All @@ -20,6 +23,10 @@ class EnvelopeDownload < Grape::Entity
expose :url,
documentation: { type: 'string', desc: 'AWS S3 URL' },
if: ->(object) { object.finished? }

expose :zip_files,
documentation: { type: 'array', is_array: true, desc: 'ZIP files produced by the workflow' },
if: ->(object) { object.finished? }
end
end
end
38 changes: 33 additions & 5 deletions app/api/v1/envelopes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
require 'v1/revisions'
require 'v1/envelope_events'
require 'download_envelopes_job'
require 'sync_envelope_download_workflow_status'

module API
module V1
Expand Down Expand Up @@ -79,17 +80,44 @@ class Envelopes < MountableAPI

desc 'Returns the envelope download'
get do
SyncEnvelopeDownloadWorkflowStatus.call(
envelope_download: @envelope_download
)

present @envelope_download, with: API::Entities::EnvelopeDownload
end

desc 'Starts an envelope download'
post do
@envelope_download.update!(
enqueued_at: Time.current,
status: :pending
)
should_enqueue = false
response_status = :ok

@envelope_download.with_lock do
active_download = @envelope_download.enqueued_at.present? &&
(@envelope_download.in_progress? || @envelope_download.pending?)
current_published_at = Envelope.last_publish_event_at(current_community)
last_published_at = @envelope_download.last_published_at || Time.at(0)

if !active_download && current_published_at&.>(last_published_at)
@envelope_download.update!(
argo_workflow_name: nil,
argo_workflow_namespace: nil,
enqueued_at: Time.current,
finished_at: nil,
internal_error_backtrace: [],
internal_error_message: nil,
last_published_at: current_published_at,
status: :pending,
url: nil,
zip_files: []
)
should_enqueue = true
response_status = :created
end
end

DownloadEnvelopesJob.perform_later(@envelope_download.id)
DownloadEnvelopesJob.perform_later(@envelope_download.id) if should_enqueue
status response_status
present @envelope_download, with: API::Entities::EnvelopeDownload
end
end
Expand Down
4 changes: 2 additions & 2 deletions app/jobs/download_envelopes_job.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
require 'download_envelopes'
require 'submit_envelope_download_workflow'
require 'envelope_download'

# Create a ZIP archive contaning all of the envelopes from a certain community,
Expand All @@ -10,7 +10,7 @@ def perform(envelope_download_id)
envelope_download = EnvelopeDownload.find_by(id: envelope_download_id)
return unless envelope_download

DownloadEnvelopes.call(envelope_download:)
SubmitEnvelopeDownloadWorkflow.call(envelope_download:)
rescue StandardError => e
Airbrake.notify(e, envelope_download_id:)
raise e
Expand Down
6 changes: 6 additions & 0 deletions app/models/envelope.rb
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ def self.select_scope(include_deleted = nil)
end
end

def self.last_publish_event_at(envelope_community)
EnvelopeVersion
.where(item_type: 'Envelope', envelope_community_id: envelope_community.id)
.maximum(:created_at)
end

def envelope_community_name
envelope_community.name
end
Expand Down
56 changes: 56 additions & 0 deletions app/services/argo_workflows_client.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
require 'argo_workflows_api_client'
require 'uri'

class ArgoWorkflowsClient
attr_reader :namespace

def initialize(configuration: build_configuration)
@namespace = ENV.fetch('ARGO_WORKFLOWS_NAMESPACE')
@workflow_service_api = ArgoWorkflowsApiClient::WorkflowServiceApi.new(
ArgoWorkflowsApiClient::ApiClient.new(configuration)
)
end

def get_workflow(name:)
@workflow_service_api.workflow_service_get_workflow(
namespace,
name,
return_type: 'Object'
)
end

def submit_workflow(template_name:, parameters:, generate_name:)
@workflow_service_api.workflow_service_submit_workflow(
{
namespace:,
resourceKind: 'WorkflowTemplate',
resourceName: template_name,
submitOptions: {
generateName: generate_name,
parameters: parameters.map { |key, value| "#{key}=#{value}" }
}
},
namespace,
return_type: 'Object'
)
end

private

def build_configuration
base_uri = URI.parse(ENV.fetch('ARGO_WORKFLOWS_BASE_URL'))

ArgoWorkflowsApiClient::Configuration.new.tap do |config|
config.scheme = base_uri.scheme
config.host = [base_uri.host, base_uri.port].compact.join(':')
config.base_path = base_uri.path
config.api_key['Authorization'] = ENV.fetch('ARGO_WORKFLOWS_TOKEN')
config.api_key_prefix['Authorization'] = 'Bearer'
config.timeout = ENV.fetch('ARGO_WORKFLOWS_TIMEOUT_SECONDS', 30).to_i

# We run this in a secure environment, so it can be disabled
config.verify_ssl = false
config.verify_ssl_host = false
end
end
end
87 changes: 87 additions & 0 deletions app/services/submit_envelope_download_workflow.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
require 'argo_workflows_client'

class SubmitEnvelopeDownloadWorkflow
def self.call(envelope_download:)
new(envelope_download).call
end

attr_reader :envelope_download

def initialize(envelope_download)
@envelope_download = envelope_download
end

def call
envelope_download.with_lock do
return envelope_download if workflow_already_started?

workflow = client.submit_workflow(
template_name: ENV.fetch('ARGO_WORKFLOWS_TEMPLATE_NAME'),
generate_name: "#{community_name.tr('_', '-')}-download-",
parameters:
)
workflow_name = workflow.dig(:metadata, :name)
raise 'Argo workflow submission did not return a workflow name' if workflow_name.blank?

envelope_download.update!(
argo_workflow_name: workflow_name,
argo_workflow_namespace: client.namespace,
finished_at: nil,
internal_error_backtrace: [],
internal_error_message: nil,
started_at: Time.current,
status: :in_progress,
zip_files: [],
url: nil
)
end
rescue StandardError => e
envelope_download.update!(
argo_workflow_name: nil,
argo_workflow_namespace: nil,
finished_at: Time.current,
internal_error_backtrace: Array(e.backtrace),
internal_error_message: e.message,
status: :finished,
zip_files: [],
url: nil
)
raise
end

private

def client
@client ||= ArgoWorkflowsClient.new
end

def community_name
envelope_download.envelope_community.name
end

def destination_prefix
"#{community_name}/downloads/#{envelope_download.id}"
end

def parameters
{
'batch-size' => ENV.fetch('ARGO_WORKFLOWS_BATCH_SIZE', '25000'),
'aws-region' => ENV.fetch('AWS_REGION'),
'destination-bucket' => ENV.fetch('ENVELOPE_DOWNLOADS_BUCKET'),
'destination-prefix' => destination_prefix,
'environment' => MR.env,
'max-uncompressed-zip-size-bytes' => ENV.fetch(
'ARGO_WORKFLOWS_MAX_UNCOMPRESSED_ZIP_SIZE_BYTES',
(200 * 1024 * 1024).to_s
),
'max-workers' => ENV.fetch('ARGO_WORKFLOWS_MAX_WORKERS', '4'),
'source-bucket' => ENV.fetch('ENVELOPE_GRAPHS_BUCKET'),
'source-prefix' => community_name,
'task-image' => ENV.fetch('ARGO_WORKFLOWS_TASK_IMAGE')
}
end

def workflow_already_started?
envelope_download.in_progress? && envelope_download.argo_workflow_name.present?
end
end
Loading
Loading