From 9b40be6e8847fccf357b9d978241cf75dd62d8b8 Mon Sep 17 00:00:00 2001 From: erick-gege Date: Thu, 4 Jun 2026 15:24:06 -0500 Subject: [PATCH] fix: masked env vars passed as literal string to spiders --- estela-api/api/serializers/cronjob.py | 20 +++++++++++++++++++ estela-api/api/serializers/job_specific.py | 6 ++++++ estela-api/core/cronjob.py | 10 +++++++--- estela-api/core/tasks.py | 13 ++++++++++++ estela-api/docs/api.yaml | 2 +- .../src/pages/CronjobCreateModal/index.tsx | 7 +++++++ estela-web/src/pages/JobCreateModal/index.tsx | 8 ++++++++ .../generated-api/models/SpiderJobEnvVar.ts | 3 ++- 8 files changed, 64 insertions(+), 5 deletions(-) diff --git a/estela-api/api/serializers/cronjob.py b/estela-api/api/serializers/cronjob.py index 759e8059..0e734097 100644 --- a/estela-api/api/serializers/cronjob.py +++ b/estela-api/api/serializers/cronjob.py @@ -1,3 +1,5 @@ +import logging + from croniter import croniter from rest_framework import serializers @@ -16,6 +18,8 @@ SpiderJobTag, ) +logger = logging.getLogger(__name__) + class SpiderCronJobSerializer(serializers.ModelSerializer): cargs = SpiderJobArgSerializer( @@ -107,6 +111,22 @@ def create(self, validated_data): SpiderJobArg.objects.create(cronjob=cronjob, **arg) for env_var in env_vars_data: + evid = env_var.pop("evid", None) + if evid: + source = SpiderJobEnvVar.objects.filter(evid=evid).first() + if source: + SpiderJobEnvVar.objects.create( + cronjob=cronjob, + name=source.name, + value=source.value, + masked=source.masked, + ) + continue + logger.warning( + "evid=%s not found when creating env var '%s' for cronjob %s — skipping.", + evid, env_var.get("name"), cronjob.cjid, + ) + continue SpiderJobEnvVar.objects.create(cronjob=cronjob, **env_var) for tag_data in tags_data: diff --git a/estela-api/api/serializers/job_specific.py b/estela-api/api/serializers/job_specific.py index 415f6068..a9e3f355 100644 --- a/estela-api/api/serializers/job_specific.py +++ b/estela-api/api/serializers/job_specific.py @@ -10,6 +10,12 @@ class Meta: class SpiderJobEnvVarSerializer(serializers.ModelSerializer): + evid = serializers.IntegerField( + required=False, + allow_null=True, + help_text="A unique integer value identifying this job env variable.", + ) + class Meta: model = SpiderJobEnvVar fields = ("evid", "name", "value", "masked") diff --git a/estela-api/core/cronjob.py b/estela-api/core/cronjob.py index 1fe6ede3..29ae65e7 100644 --- a/estela-api/core/cronjob.py +++ b/estela-api/core/cronjob.py @@ -2,13 +2,14 @@ from django_celery_beat.models import CrontabSchedule, PeriodicTask +from core.models import SpiderCronJob from core.tasks import launch_job def create_cronjob(name, key, args, env_vars, tags, schedule, data_expiry_days=None, resource_tier=None): minute, hour, day_of_month, month, day_of_week = schedule.split(" ") cjid, sid, pid = key.split(".") - data = {"cronjob": cjid, "args": args, "env_vars": env_vars, "tags": tags} + data = {"cronjob": cjid, "args": args, "env_vars": [], "tags": tags} if resource_tier: data["resource_tier"] = resource_tier schedule, _ = CrontabSchedule.objects.get_or_create( @@ -28,8 +29,11 @@ def create_cronjob(name, key, args, env_vars, tags, schedule, data_expiry_days=N def run_cronjob_once(data): - env_vars = data.get("cenv_vars") or [] - env_vars = [ev for ev in env_vars if ev.get("value") is not None] + cronjob = SpiderCronJob.objects.get(cjid=data.get("cjid")) + env_vars = [ + {"name": ev.name, "value": ev.value, "masked": ev.masked} + for ev in cronjob.cenv_vars.all() + ] _data = { "cronjob": data.get("cjid"), "args": data.get("cargs"), diff --git a/estela-api/core/tasks.py b/estela-api/core/tasks.py index c905dc2e..66f10b22 100644 --- a/estela-api/core/tasks.py +++ b/estela-api/core/tasks.py @@ -25,6 +25,7 @@ Project, ProxyProvider, Spider, + SpiderCronJob, SpiderJob, UsageRecord, ) @@ -274,6 +275,18 @@ def launch_job(sid_, data_, data_expiry_days=None, token=None): resource_tier = data_.pop("resource_tier", None) + cjid = data_.get("cronjob") + if cjid: + cronjob = SpiderCronJob.objects.filter(cjid=cjid).first() + if cronjob: + cronjob_env_vars = { + ev.name: {"name": ev.name, "value": ev.value, "masked": ev.masked} + for ev in cronjob.cenv_vars.all() + } + extra_env_vars = {ev["name"]: ev for ev in data_.get("env_vars", [])} + cronjob_env_vars.update(extra_env_vars) + data_["env_vars"] = list(cronjob_env_vars.values()) + serializer = SpiderJobCreateSerializer(data=data_) serializer.is_valid(raise_exception=True) diff --git a/estela-api/docs/api.yaml b/estela-api/docs/api.yaml index 8cde91f4..cce201a9 100644 --- a/estela-api/docs/api.yaml +++ b/estela-api/docs/api.yaml @@ -1886,7 +1886,7 @@ definitions: title: Evid description: A unique integer value identifying this job env variable. type: integer - readOnly: true + x-nullable: true name: title: Name description: Env variable name. diff --git a/estela-web/src/pages/CronjobCreateModal/index.tsx b/estela-web/src/pages/CronjobCreateModal/index.tsx index ca6e34c8..373e4f0c 100644 --- a/estela-web/src/pages/CronjobCreateModal/index.tsx +++ b/estela-web/src/pages/CronjobCreateModal/index.tsx @@ -87,6 +87,7 @@ interface ArgsData { } interface EnvVarsData { + evid?: number; name: string; value: string; key: number; @@ -678,14 +679,20 @@ export default function CronjobCreateModal({ openModal, spider, projectId }: Cro cronjobData.envVars.map((envVar: EnvVarsData) => { const index = envVarsData.findIndex((element: SpiderJobEnvVar) => element.name === envVar.name); + const resolvedEvid = + envVar.masked && envVar.value === "__MASKED__" + ? envVar.evid ?? (index !== -1 ? envVarsData[index].evid : undefined) + : undefined; if (index != -1) { envVarsData[index] = { + evid: resolvedEvid, name: envVar.name, value: envVar.value, masked: envVar.masked, }; } else { envVarsData.push({ + evid: resolvedEvid, name: envVar.name, value: envVar.value, masked: envVar.masked, diff --git a/estela-web/src/pages/JobCreateModal/index.tsx b/estela-web/src/pages/JobCreateModal/index.tsx index 2996a940..6e1fd4f1 100644 --- a/estela-web/src/pages/JobCreateModal/index.tsx +++ b/estela-web/src/pages/JobCreateModal/index.tsx @@ -48,6 +48,7 @@ interface ArgsData { } interface EnvVarsData { + evid?: number; name: string; value: string; key: number; @@ -138,6 +139,7 @@ export default function JobCreateModal({ const [jobData, setJobData] = useState({ args: initialArgs.map((arg, index) => ({ ...arg, key: index })), envVars: initialEnvVars.map((envVar, index) => ({ + evid: envVar.evid, name: envVar.name, value: envVar.masked ? "__MASKED__" : envVar.value, masked: envVar.masked || false, @@ -448,14 +450,20 @@ export default function JobCreateModal({ jobData.envVars.map((envVar: EnvVarsData) => { const index = envVarsData.findIndex((element: SpiderJobEnvVar) => element.name === envVar.name); + const resolvedEvid = + envVar.masked && envVar.value === "__MASKED__" + ? envVar.evid ?? (index !== -1 ? envVarsData[index].evid : undefined) + : undefined; if (index != -1) { envVarsData[index] = { + evid: resolvedEvid, name: envVar.name, value: envVar.value, masked: envVar.masked, }; } else { envVarsData.push({ + evid: resolvedEvid, name: envVar.name, value: envVar.value, masked: envVar.masked, diff --git a/estela-web/src/services/api/generated-api/models/SpiderJobEnvVar.ts b/estela-web/src/services/api/generated-api/models/SpiderJobEnvVar.ts index 0f69797f..cc9c4a4f 100644 --- a/estela-web/src/services/api/generated-api/models/SpiderJobEnvVar.ts +++ b/estela-web/src/services/api/generated-api/models/SpiderJobEnvVar.ts @@ -24,7 +24,7 @@ export interface SpiderJobEnvVar { * @type {number} * @memberof SpiderJobEnvVar */ - readonly evid?: number; + evid?: number | null; /** * Env variable name. * @type {string} @@ -71,6 +71,7 @@ export function SpiderJobEnvVarToJSON(value?: SpiderJobEnvVar | null): any { } return { + 'evid': value.evid, 'name': value.name, 'value': value.value, 'masked': value.masked,