Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions estela-api/api/serializers/deploy.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from rest_framework import serializers
from django.conf import settings

from api.serializers.project import UserDetailSerializer
from api.serializers.spider import SpiderSerializer
from core.models import Deploy, Spider
from engines.kubernetes import KubernetesEngine

_k8s = KubernetesEngine()


class DeploySerializer(serializers.ModelSerializer):
Expand All @@ -14,6 +18,15 @@ class DeploySerializer(serializers.ModelSerializer):
def get_spiders_count(self, obj):
return obj.spiders.count()

def to_representation(self, instance):
data = super().to_representation(instance)
if instance.status == Deploy.BUILDING_STATUS:
namespace = getattr(settings, "K8S_NAMESPACE", "default")
stage = _k8s.get_deploy_stage(instance.did, namespace)
if stage:
data["status"] = stage
return data

class Meta:
model = Deploy
fields = ["did", "project", "user", "status", "spiders_count", "created"]
Expand Down
35 changes: 35 additions & 0 deletions estela-api/api/serializers/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
SpiderJobEnvVarSerializer,
SpiderJobTagSerializer,
)
import redis as redis_lib
from django.conf import settings
from api.utils import (
delete_stats_from_redis,
update_stats_from_redis,
Expand Down Expand Up @@ -38,6 +40,7 @@ class SpiderJobSerializer(serializers.ModelSerializer):
spider = serializers.SerializerMethodField("get_spider")
storage_size = serializers.SerializerMethodField("get_storage_size")
database_insertion_progress = serializers.SerializerMethodField("get_database_insertion_progress")
peak_memory = serializers.SerializerMethodField("get_peak_memory")

class Meta:
model = SpiderJob
Expand All @@ -60,6 +63,7 @@ class Meta:
"database_insertion_progress",
"storage_size",
"resource_tier",
"peak_memory",
)

def get_spider(self, instance):
Expand All @@ -85,6 +89,37 @@ def get_database_insertion_progress(self, instance):
# Return the actual database insertion progress value from the model
return instance.database_insertion_progress

def get_peak_memory(self, instance):
if instance.status == SpiderJob.RUNNING_STATUS:
try:
redis_conn = redis_lib.from_url(settings.REDIS_URL)
raw_stats = redis_conn.hgetall(f"scrapy_stats_{instance.key}")
if raw_stats:
job_stats = {key.decode(): value.decode() for key, value in raw_stats.items()}
mem = job_stats.get("resources/peak_memory_bytes") or job_stats.get("memusage/max")
if mem:
mem_bytes = int(float(mem))
if mem_bytes > 0:
return mem_bytes
except Exception:
pass
else:
try:
if spiderdata_db_client.get_connection():
pid = str(instance.spider.project.pid)
job_collection_name = get_collection_name(instance, "stats")
job_stats = spiderdata_db_client.get_job_stats(pid, job_collection_name)
if job_stats:
for stat in job_stats:
mem = stat.get("resources/peak_memory_bytes")
if mem is not None:
mem_bytes = int(float(mem))
if mem_bytes > 0:
return mem_bytes
except Exception:
pass
return None


class SpiderJobCreateEnvVarSerializer(serializers.Serializer):
evid = serializers.IntegerField(required=False, help_text="Env var id.")
Expand Down
2 changes: 2 additions & 0 deletions estela-api/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,13 @@ def last_modified(self):
class Deploy(models.Model):
SUCCESS_STATUS = "SUCCESS"
BUILDING_STATUS = "BUILDING"
DOWNLOADING_STATUS = "DOWNLOADING"
FAILURE_STATUS = "FAILURE"
CANCELED_STATUS = "CANCELED"
STATUS_OPTIONS = [
(SUCCESS_STATUS, "Success"),
(BUILDING_STATUS, "Building"),
(DOWNLOADING_STATUS, "Downloading"),
(FAILURE_STATUS, "Failure"),
(CANCELED_STATUS, "Canceled"),
]
Expand Down
2 changes: 1 addition & 1 deletion estela-api/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,5 +587,5 @@ def update_mongodb_insertion_progress():
logging.info(f"Job {job.jid} excluded after {stall_count} cycles with no progress")
except Exception as e:
logging.error(f"Error updating progress for job {job.jid}: {str(e)}")

logging.info(f"Completed MongoDB insertion progress updates")
1 change: 1 addition & 0 deletions estela-api/core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def launch_deploy_job(pid, did, container_image):
)



def send_verification_email(user, request):
mail_subject = "Activate your estela account."
to_email = user.email
Expand Down
8 changes: 8 additions & 0 deletions estela-api/docs/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1753,6 +1753,7 @@ definitions:
memory_quota:
title: Memory quota
type: integer
readOnly: true
User:
required:
- username
Expand Down Expand Up @@ -2254,6 +2255,7 @@ definitions:
enum:
- SUCCESS
- BUILDING
- DOWNLOADING
- FAILURE
- CANCELED
spiders_count:
Expand Down Expand Up @@ -2282,6 +2284,7 @@ definitions:
enum:
- SUCCESS
- BUILDING
- DOWNLOADING
- FAILURE
- CANCELED
created:
Expand Down Expand Up @@ -2311,6 +2314,7 @@ definitions:
enum:
- SUCCESS
- BUILDING
- DOWNLOADING
- FAILURE
- CANCELED
spiders_names:
Expand Down Expand Up @@ -2429,6 +2433,10 @@ definitions:
- XLARGE
- HUGE
- XHUGE
peak_memory:
title: Peak memory
type: string
readOnly: true
ProjectJob:
required:
- results
Expand Down
17 changes: 17 additions & 0 deletions estela-api/engines/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,23 @@ def read_job_status(self, name, namespace="default", api_instance=None):

return self.Status(api_response.status)

def get_deploy_stage(self, did, namespace="default"):
try:
core_api = client.CoreV1Api()
batch_api = self.get_api_instance()
job_name = f"deploy-project-{did}"
batch_api.read_namespaced_job(job_name, namespace)
pods = core_api.list_namespaced_pod(namespace, label_selector=f"job-name={job_name}")
if not pods.items:
return None
init_statuses = pods.items[0].status.init_container_statuses or []
for i, ics in enumerate(init_statuses):
if ics.state and (ics.state.running or ics.state.waiting):
return "DOWNLOADING" if i == 0 else "BUILDING"
except Exception:
pass
return None

def _create_build_volumes(self):
"""Create shared volume for build containers"""
return [
Expand Down
74 changes: 71 additions & 3 deletions estela-web/src/pages/DeployListPage/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@ import WelcomeDeploy from "../../assets/images/welcomeDeploy.svg";
import "./styles.scss";
import { API_BASE_URL } from "../../constants";
import { ApiService, AuthService } from "../../services";
import { ApiProjectsDeploysListRequest, Deploy, UserDetail } from "../../services/api";
import {
ApiProjectsDeploysListRequest,
ApiProjectsJobsRequest,
Deploy,
DeployStatusEnum,
UserDetail,
} from "../../services/api";
import { resourceNotAllowedNotification, Spin, PaginationItem } from "../../shared";
import { convertDateToString } from "../../utils";
import { TourStore } from "../../tour";

const { Content } = Layout;
const { Text, Paragraph } = Typography;
Expand All @@ -29,6 +36,52 @@ interface RouteParams {
projectId: string;
}

const STAGE_LABELS: Record<string, string> = {
DOWNLOADING: "Downloading project",
BUILDING: "Building image",
};

const STAGE_STEP: Record<string, number> = {
DOWNLOADING: 1,
BUILDING: 2,
};

const DeployStageProgress = ({ stage }: { stage: string }) => {
const label = STAGE_LABELS[stage] || stage;
const stepIndex = STAGE_STEP[stage] || 1;

return (
<div className="flex flex-col gap-1">
<div className="flex items-center gap-1.5">
<span className="deploy-stage-dot" />
<Text className="text-estela-black-medium text-xs">
{label} <span className="text-estela-black-medium/50">· step {stepIndex} of 2</span>
</Text>
</div>
<div className="flex gap-0.5" style={{ width: 160, height: 4 }}>
{[1, 2].map((step) => (
<div
key={step}
className="h-full flex-1 rounded-sm"
style={{
backgroundColor:
step < stepIndex ? "#4D47C3" : step === stepIndex ? "transparent" : "#E5E7EB",
backgroundImage:
step === stepIndex
? "linear-gradient(90deg, transparent 0%, #4D47C3 50%, transparent 100%)"
: undefined,
backgroundSize: step === stepIndex ? "200% 100%" : undefined,
animation: step === stepIndex ? "shimmer 1.5s infinite linear" : undefined,
}}
/>
))}
</div>
</div>
);
};

const ACTIVE_STAGES = [DeployStatusEnum.Downloading, DeployStatusEnum.Building];

export class DeployListPage extends Component<RouteComponentProps<RouteParams>, DeployListPageState> {
PAGE_SIZE = 10;
state: DeployListPageState = {
Expand Down Expand Up @@ -83,8 +136,8 @@ export class DeployListPage extends Component<RouteComponentProps<RouteParams>,
dataIndex: "status",
render: (state: string): ReactElement => (
<Content style={{ display: "flex", alignItems: "center" }}>
{state === "BUILDING" ? (
<Tag className="border-0 text-s bg-estela-blue-low rounded-md text-estela-yellow">Waiting</Tag>
{ACTIVE_STAGES.includes(state as DeployStatusEnum) ? (
<DeployStageProgress stage={state} />
) : state === "SUCCESS" ? (
<Tag className="border-0 text-s bg-estela-blue-low rounded-md text-estela-green">Completed</Tag>
) : (
Expand All @@ -98,6 +151,7 @@ export class DeployListPage extends Component<RouteComponentProps<RouteParams>,
];

async componentDidMount(): Promise<void> {
TourStore.setRoute("deploys");
await this.getProjectDeploys(1);
}

Expand Down Expand Up @@ -132,6 +186,20 @@ export class DeployListPage extends Component<RouteComponentProps<RouteParams>,
loaded: true,
modalIsOpen: results.count === 0,
});
TourStore.setDeploys(deploys);

const successCount = deploys.filter((d) => d.status === "SUCCESS").length;
if (successCount === 1) {
const jobsParams: ApiProjectsJobsRequest = { pid: this.projectId, page: 1, pageSize: 1 };
this.apiService
.apiProjectsJobs(jobsParams)
.then((response) => {
TourStore.setProjectHasJobs(response.count > 0);
})
.catch(() => TourStore.setProjectHasJobs(false));
} else {
TourStore.setProjectHasJobs(false);
}
},
(error: unknown) => {
error;
Expand Down
20 changes: 20 additions & 0 deletions estela-web/src/pages/DeployListPage/styles.scss
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,24 @@
.ant-table-thead .ant-table-cell {
background-color: white;
border:none;
}

.deploy-stage-dot {
width: 8px;
height: 8px;
border-radius: 50%;
background-color: #4D47C3;
animation: pulseDot 2s infinite;
flex-shrink: 0;
}

@keyframes pulseDot {
0% { box-shadow: 0 0 0 0 rgba(77,71,195,0.55); }
70% { box-shadow: 0 0 0 6px rgba(77,71,195,0); }
100% { box-shadow: 0 0 0 0 rgba(77,71,195,0); }
}

@keyframes shimmer {
0% { background-position: -120px 0; }
100% { background-position: 120px 0; }
}
33 changes: 33 additions & 0 deletions estela-web/src/pages/JobCreateModal/help.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
export const JOB_FIELD_HELP = {
spider: "The spider to run. Each project can have multiple spiders, one per scraping target.",

persistence:
"How long the items extracted by this job will be retained before being deleted. " +
"Choose 'Forever' to keep them indefinitely.",

tier:
"CPU and memory allocated to this job. Higher tiers run faster but consume more " +
"credits. DEFAULT is fine for most spiders.",

args:
"Command-line arguments passed to your spider on start (e.g. start_url=https://example.com). " +
"Available in the spider via self.<name>.",

envProject:
"Variables defined at the project level. They are inherited by every job, in every " +
"spider of this project.",

envSpider:
"Variables defined on this spider. They are inherited by every job of this spider " +
"and override project variables.",

envJob:
"Variables for this job only. They override spider and project variables. Use the " +
"eye icon to mask sensitive values like API keys.",

proxy: "Route this job's requests through a proxy server. Useful for IP rotation or " + "geo-targeted scraping.",

tags:
"Labels for organizing and filtering jobs later (e.g. 'production', 'monitoring'). " +
"Pure metadata, no behaviour change.",
};
Loading
Loading