diff --git a/estela-api/api/serializers/deploy.py b/estela-api/api/serializers/deploy.py index 955a9eff..5571c23c 100644 --- a/estela-api/api/serializers/deploy.py +++ b/estela-api/api/serializers/deploy.py @@ -1,8 +1,12 @@ from rest_framework import serializers +from django.conf import settings from api.serializers.project import UserDetailSerializer from api.serializers.spider import SpiderSerializer from core.models import Deploy, Spider +from engines.kubernetes import KubernetesEngine + +_k8s = KubernetesEngine() class DeploySerializer(serializers.ModelSerializer): @@ -14,6 +18,15 @@ class DeploySerializer(serializers.ModelSerializer): def get_spiders_count(self, obj): return obj.spiders.count() + def to_representation(self, instance): + data = super().to_representation(instance) + if instance.status == Deploy.BUILDING_STATUS: + namespace = getattr(settings, "K8S_NAMESPACE", "default") + stage = _k8s.get_deploy_stage(instance.did, namespace) + if stage: + data["status"] = stage + return data + class Meta: model = Deploy fields = ["did", "project", "user", "status", "spiders_count", "created"] diff --git a/estela-api/api/serializers/job.py b/estela-api/api/serializers/job.py index 12fe4913..d71abdcd 100644 --- a/estela-api/api/serializers/job.py +++ b/estela-api/api/serializers/job.py @@ -8,6 +8,8 @@ SpiderJobEnvVarSerializer, SpiderJobTagSerializer, ) +import redis as redis_lib +from django.conf import settings from api.utils import ( delete_stats_from_redis, update_stats_from_redis, @@ -38,6 +40,7 @@ class SpiderJobSerializer(serializers.ModelSerializer): spider = serializers.SerializerMethodField("get_spider") storage_size = serializers.SerializerMethodField("get_storage_size") database_insertion_progress = serializers.SerializerMethodField("get_database_insertion_progress") + peak_memory = serializers.SerializerMethodField("get_peak_memory") class Meta: model = SpiderJob @@ -60,6 +63,7 @@ class Meta: "database_insertion_progress", "storage_size", "resource_tier", + "peak_memory", ) def get_spider(self, instance): @@ -85,6 +89,37 @@ def get_database_insertion_progress(self, instance): # Return the actual database insertion progress value from the model return instance.database_insertion_progress + def get_peak_memory(self, instance): + if instance.status == SpiderJob.RUNNING_STATUS: + try: + redis_conn = redis_lib.from_url(settings.REDIS_URL) + raw_stats = redis_conn.hgetall(f"scrapy_stats_{instance.key}") + if raw_stats: + job_stats = {key.decode(): value.decode() for key, value in raw_stats.items()} + mem = job_stats.get("resources/peak_memory_bytes") or job_stats.get("memusage/max") + if mem: + mem_bytes = int(float(mem)) + if mem_bytes > 0: + return mem_bytes + except Exception: + pass + else: + try: + if spiderdata_db_client.get_connection(): + pid = str(instance.spider.project.pid) + job_collection_name = get_collection_name(instance, "stats") + job_stats = spiderdata_db_client.get_job_stats(pid, job_collection_name) + if job_stats: + for stat in job_stats: + mem = stat.get("resources/peak_memory_bytes") + if mem is not None: + mem_bytes = int(float(mem)) + if mem_bytes > 0: + return mem_bytes + except Exception: + pass + return None + class SpiderJobCreateEnvVarSerializer(serializers.Serializer): evid = serializers.IntegerField(required=False, help_text="Env var id.") diff --git a/estela-api/core/models.py b/estela-api/core/models.py index 3f7f8f56..aac8b036 100644 --- a/estela-api/core/models.py +++ b/estela-api/core/models.py @@ -205,11 +205,13 @@ def last_modified(self): class Deploy(models.Model): SUCCESS_STATUS = "SUCCESS" BUILDING_STATUS = "BUILDING" + DOWNLOADING_STATUS = "DOWNLOADING" FAILURE_STATUS = "FAILURE" CANCELED_STATUS = "CANCELED" STATUS_OPTIONS = [ (SUCCESS_STATUS, "Success"), (BUILDING_STATUS, "Building"), + (DOWNLOADING_STATUS, "Downloading"), (FAILURE_STATUS, "Failure"), (CANCELED_STATUS, "Canceled"), ] diff --git a/estela-api/core/tasks.py b/estela-api/core/tasks.py index 654bd053..d930d47b 100644 --- a/estela-api/core/tasks.py +++ b/estela-api/core/tasks.py @@ -587,5 +587,5 @@ def update_mongodb_insertion_progress(): logging.info(f"Job {job.jid} excluded after {stall_count} cycles with no progress") except Exception as e: logging.error(f"Error updating progress for job {job.jid}: {str(e)}") - + logging.info(f"Completed MongoDB insertion progress updates") diff --git a/estela-api/core/views.py b/estela-api/core/views.py index 1c679ad2..f5f81d23 100644 --- a/estela-api/core/views.py +++ b/estela-api/core/views.py @@ -43,6 +43,7 @@ def launch_deploy_job(pid, did, container_image): ) + def send_verification_email(user, request): mail_subject = "Activate your estela account." to_email = user.email diff --git a/estela-api/docs/api.yaml b/estela-api/docs/api.yaml index 1d57037a..b0134ee0 100644 --- a/estela-api/docs/api.yaml +++ b/estela-api/docs/api.yaml @@ -1753,6 +1753,7 @@ definitions: memory_quota: title: Memory quota type: integer + readOnly: true User: required: - username @@ -2254,6 +2255,7 @@ definitions: enum: - SUCCESS - BUILDING + - DOWNLOADING - FAILURE - CANCELED spiders_count: @@ -2282,6 +2284,7 @@ definitions: enum: - SUCCESS - BUILDING + - DOWNLOADING - FAILURE - CANCELED created: @@ -2311,6 +2314,7 @@ definitions: enum: - SUCCESS - BUILDING + - DOWNLOADING - FAILURE - CANCELED spiders_names: @@ -2429,6 +2433,10 @@ definitions: - XLARGE - HUGE - XHUGE + peak_memory: + title: Peak memory + type: string + readOnly: true ProjectJob: required: - results diff --git a/estela-api/engines/kubernetes.py b/estela-api/engines/kubernetes.py index f80571df..c12d6f53 100644 --- a/estela-api/engines/kubernetes.py +++ b/estela-api/engines/kubernetes.py @@ -230,6 +230,23 @@ def read_job_status(self, name, namespace="default", api_instance=None): return self.Status(api_response.status) + def get_deploy_stage(self, did, namespace="default"): + try: + core_api = client.CoreV1Api() + batch_api = self.get_api_instance() + job_name = f"deploy-project-{did}" + batch_api.read_namespaced_job(job_name, namespace) + pods = core_api.list_namespaced_pod(namespace, label_selector=f"job-name={job_name}") + if not pods.items: + return None + init_statuses = pods.items[0].status.init_container_statuses or [] + for i, ics in enumerate(init_statuses): + if ics.state and (ics.state.running or ics.state.waiting): + return "DOWNLOADING" if i == 0 else "BUILDING" + except Exception: + pass + return None + def _create_build_volumes(self): """Create shared volume for build containers""" return [ diff --git a/estela-web/src/pages/DeployListPage/index.tsx b/estela-web/src/pages/DeployListPage/index.tsx index 1a4aa75e..d1ea53aa 100644 --- a/estela-web/src/pages/DeployListPage/index.tsx +++ b/estela-web/src/pages/DeployListPage/index.tsx @@ -10,9 +10,16 @@ import WelcomeDeploy from "../../assets/images/welcomeDeploy.svg"; import "./styles.scss"; import { API_BASE_URL } from "../../constants"; import { ApiService, AuthService } from "../../services"; -import { ApiProjectsDeploysListRequest, Deploy, UserDetail } from "../../services/api"; +import { + ApiProjectsDeploysListRequest, + ApiProjectsJobsRequest, + Deploy, + DeployStatusEnum, + UserDetail, +} from "../../services/api"; import { resourceNotAllowedNotification, Spin, PaginationItem } from "../../shared"; import { convertDateToString } from "../../utils"; +import { TourStore } from "../../tour"; const { Content } = Layout; const { Text, Paragraph } = Typography; @@ -29,6 +36,52 @@ interface RouteParams { projectId: string; } +const STAGE_LABELS: Record = { + DOWNLOADING: "Downloading project", + BUILDING: "Building image", +}; + +const STAGE_STEP: Record = { + DOWNLOADING: 1, + BUILDING: 2, +}; + +const DeployStageProgress = ({ stage }: { stage: string }) => { + const label = STAGE_LABELS[stage] || stage; + const stepIndex = STAGE_STEP[stage] || 1; + + return ( +
+
+ + + {label} · step {stepIndex} of 2 + +
+
+ {[1, 2].map((step) => ( +
+ ))} +
+
+ ); +}; + +const ACTIVE_STAGES = [DeployStatusEnum.Downloading, DeployStatusEnum.Building]; + export class DeployListPage extends Component, DeployListPageState> { PAGE_SIZE = 10; state: DeployListPageState = { @@ -83,8 +136,8 @@ export class DeployListPage extends Component, dataIndex: "status", render: (state: string): ReactElement => ( - {state === "BUILDING" ? ( - Waiting + {ACTIVE_STAGES.includes(state as DeployStatusEnum) ? ( + ) : state === "SUCCESS" ? ( Completed ) : ( @@ -98,6 +151,7 @@ export class DeployListPage extends Component, ]; async componentDidMount(): Promise { + TourStore.setRoute("deploys"); await this.getProjectDeploys(1); } @@ -132,6 +186,20 @@ export class DeployListPage extends Component, loaded: true, modalIsOpen: results.count === 0, }); + TourStore.setDeploys(deploys); + + const successCount = deploys.filter((d) => d.status === "SUCCESS").length; + if (successCount === 1) { + const jobsParams: ApiProjectsJobsRequest = { pid: this.projectId, page: 1, pageSize: 1 }; + this.apiService + .apiProjectsJobs(jobsParams) + .then((response) => { + TourStore.setProjectHasJobs(response.count > 0); + }) + .catch(() => TourStore.setProjectHasJobs(false)); + } else { + TourStore.setProjectHasJobs(false); + } }, (error: unknown) => { error; diff --git a/estela-web/src/pages/DeployListPage/styles.scss b/estela-web/src/pages/DeployListPage/styles.scss index c72a36ac..a4ed3d7e 100644 --- a/estela-web/src/pages/DeployListPage/styles.scss +++ b/estela-web/src/pages/DeployListPage/styles.scss @@ -4,4 +4,24 @@ .ant-table-thead .ant-table-cell { background-color: white; border:none; +} + +.deploy-stage-dot { + width: 8px; + height: 8px; + border-radius: 50%; + background-color: #4D47C3; + animation: pulseDot 2s infinite; + flex-shrink: 0; +} + +@keyframes pulseDot { + 0% { box-shadow: 0 0 0 0 rgba(77,71,195,0.55); } + 70% { box-shadow: 0 0 0 6px rgba(77,71,195,0); } + 100% { box-shadow: 0 0 0 0 rgba(77,71,195,0); } +} + +@keyframes shimmer { + 0% { background-position: -120px 0; } + 100% { background-position: 120px 0; } } \ No newline at end of file diff --git a/estela-web/src/pages/JobCreateModal/help.ts b/estela-web/src/pages/JobCreateModal/help.ts new file mode 100644 index 00000000..c28e9d7f --- /dev/null +++ b/estela-web/src/pages/JobCreateModal/help.ts @@ -0,0 +1,33 @@ +export const JOB_FIELD_HELP = { + spider: "The spider to run. Each project can have multiple spiders, one per scraping target.", + + persistence: + "How long the items extracted by this job will be retained before being deleted. " + + "Choose 'Forever' to keep them indefinitely.", + + tier: + "CPU and memory allocated to this job. Higher tiers run faster but consume more " + + "credits. DEFAULT is fine for most spiders.", + + args: + "Command-line arguments passed to your spider on start (e.g. start_url=https://example.com). " + + "Available in the spider via self..", + + envProject: + "Variables defined at the project level. They are inherited by every job, in every " + + "spider of this project.", + + envSpider: + "Variables defined on this spider. They are inherited by every job of this spider " + + "and override project variables.", + + envJob: + "Variables for this job only. They override spider and project variables. Use the " + + "eye icon to mask sensitive values like API keys.", + + proxy: "Route this job's requests through a proxy server. Useful for IP rotation or " + "geo-targeted scraping.", + + tags: + "Labels for organizing and filtering jobs later (e.g. 'production', 'monitoring'). " + + "Pure metadata, no behaviour change.", +}; diff --git a/estela-web/src/pages/JobCreateModal/index.tsx b/estela-web/src/pages/JobCreateModal/index.tsx index a3f33b7e..2996a940 100644 --- a/estela-web/src/pages/JobCreateModal/index.tsx +++ b/estela-web/src/pages/JobCreateModal/index.tsx @@ -1,6 +1,6 @@ -import React, { useState, useEffect } from "react"; +import React, { useState, useEffect, useRef } from "react"; import { Modal, Button, message, Row, Select, Space, Input, Tag, Checkbox, Tooltip } from "antd"; -import { EyeInvisibleOutlined } from "@ant-design/icons"; +import { EyeInvisibleOutlined, QuestionCircleOutlined } from "@ant-design/icons"; import type { CheckboxChangeEvent } from "antd/es/checkbox"; import { ApiProjectsSpidersJobsCreateRequest, @@ -20,6 +20,8 @@ import { ProxySettings } from "../../components/ProxySettingsPage"; import { resourceNotAllowedNotification, invalidDataNotification, incorrectDataNotification } from "../../shared"; import { DEFAULT_RESOURCE_TIER, PREDEFINED_TIERS } from "../../constants"; import { checkExternalError } from "../../defaultComponents"; +import { TourStore } from "../../tour"; +import { JOB_FIELD_HELP } from "./help"; import Run from "../../assets/icons/play.svg"; import Add from "../../assets/icons/add.svg"; @@ -101,6 +103,17 @@ const dataPersistenceOptions = [ { label: "Forever", key: 7, value: 720 }, ]; +function FieldLabel({ label, help, className }: { label: string; help: string; className?: string }) { + return ( +
+ {label} + + + +
+ ); +} + export default function JobCreateModal({ openModal, spider, @@ -153,6 +166,11 @@ export default function JobCreateModal({ pid: projectId, sid: "", }); + const runBtnRef = useRef(null); + + useEffect(() => { + TourStore.setRunButtonEl(runBtnRef.current); + }, []); // MaskedTag component was replaced with inline implementation using EyeInvisibleOutlined @@ -460,6 +478,8 @@ export default function JobCreateModal({ }; apiService.apiProjectsSpidersJobsCreate(requests).then( (response: SpiderJobCreate) => { + TourStore.markStepSeen("step-3"); + sessionStorage.setItem("tour_just_created", "true"); setLoading(false); // Close the modal first if an onClose callback is provided if (onClose) { @@ -552,26 +572,36 @@ export default function JobCreateModal({ } }, [projectEnvVars, spiderEnvVars, jobData]); + useEffect(() => { + TourStore.setNewJobModalOpen(open); + return () => { + TourStore.setNewJobModalOpen(false); + }; + }, [open]); + return ( <> {!hideRunButton && ( - +
+ +
)} {externalComponent} NEW JOB

} footer={null} > - -

Spider

+ + -

Resource Tier

+