Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions database_adapters/db_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,12 @@ def get_jobs_set_stats(self, database_name, jobs_ids):
return list(result)

def get_paginated_dataset_data(
self, database_name, collection_name, page, page_size
self, database_name, collection_name, page, page_size, query=None
):
collection = self.client[database_name][collection_name]
mongo_filter = query or {}
result = (
collection.find({}, {"_id": False})
collection.find(mongo_filter, {"_id": False})
.skip(page_size * (page - 1))
.limit(page_size)
)
Expand All @@ -214,6 +215,10 @@ def get_estimated_item_count(self, database_name, collection_name):
collection = self.client[database_name][collection_name]
return collection.estimated_document_count()

def count_documents(self, database_name, collection_name, query=None):
collection = self.client[database_name][collection_name]
return collection.count_documents(query or {})

def get_estimated_item_size(self, database_name, collection_name):
database = self.client[database_name]
document_size = database.command("collstats", collection_name)["avgObjSize"]
Expand Down
49 changes: 43 additions & 6 deletions estela-api/api/views/job_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import redis

from django.conf import settings
Expand Down Expand Up @@ -37,7 +38,22 @@ def get_parameters(self, request):
page_size = int(
request.query_params.get("page_size", self.DEFAULT_PAGINATION_SIZE)
)
return page, data_type, page_size
search = request.query_params.get("search", "").strip()
level = request.query_params.get("level", "").strip().upper()
return page, data_type, page_size, search, level

def build_log_filter(self, search, level):
"""Build a MongoDB filter for log documents based on search text and level."""
import re
conditions = []
if level and level != "ALL":
# Match the Scrapy log format: "] LEVEL: "
conditions.append({"log": {"$regex": f"\\] {level}: "}})
if search:
conditions.append({"log": {"$regex": re.escape(search), "$options": "i"}}) # escape prevents ReDoS and invalid regex errors
if not conditions:
return None
return {"$and": conditions} if len(conditions) > 1 else conditions[0]

def get_paginated_link(self, page_number):
if page_number < 1:
Expand Down Expand Up @@ -85,10 +101,24 @@ def get_paginated_link(self, page_number):
type=openapi.TYPE_STRING,
required=False,
),
openapi.Parameter(
"search",
openapi.IN_QUERY,
description="Search term to filter log entries.",
type=openapi.TYPE_STRING,
required=False,
),
openapi.Parameter(
"level",
openapi.IN_QUERY,
description="Log level filter (DEBUG, INFO, WARNING, ERROR, CRITICAL).",
type=openapi.TYPE_STRING,
required=False,
),
],
)
def list(self, request, *args, **kwargs):
page, data_type, page_size = self.get_parameters(request)
page, data_type, page_size, search, level = self.get_parameters(request)
if page_size > self.MAX_PAGINATION_SIZE or page_size < self.MIN_PAGINATION_SIZE:
raise ParseError({"error": errors.INVALID_PAGE_SIZE})
if page_size < 1:
Expand All @@ -101,9 +131,16 @@ def list(self, request, *args, **kwargs):
job = SpiderJob.objects.filter(jid=kwargs["jid"]).get()
job_collection_name = get_collection_name(job, data_type)

count = spiderdata_db_client.get_estimated_item_count(
kwargs["pid"], job_collection_name
)
log_filter = self.build_log_filter(search, level) if data_type == "logs" else None

if log_filter:
count = spiderdata_db_client.count_documents(
kwargs["pid"], job_collection_name, log_filter
)
else:
count = spiderdata_db_client.get_estimated_item_count(
kwargs["pid"], job_collection_name
)

if data_type == "stats":
if job.status == SpiderJob.RUNNING_STATUS:
Expand Down Expand Up @@ -134,7 +171,7 @@ def list(self, request, *args, **kwargs):
return Response(response)
else:
result = spiderdata_db_client.get_paginated_dataset_data(
kwargs["pid"], job_collection_name, page, page_size
kwargs["pid"], job_collection_name, page, page_size, query=log_filter
)

return Response(
Expand Down
10 changes: 10 additions & 0 deletions estela-api/docs/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,16 @@ paths:
description: Spider job data type.
required: false
type: string
- name: search
in: query
description: Search term to filter log entries.
required: false
type: string
- name: level
in: query
description: Log level filter (DEBUG, INFO, WARNING, ERROR, CRITICAL).
required: false
type: string
responses:
'200':
description: ''
Expand Down
12 changes: 12 additions & 0 deletions estela-web/src/pages/CronJobDetailPage/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ interface SpiderJobData {
cronjob: number | null | undefined;
tags: TagsData[] | undefined;
args: ArgsData[] | undefined;
itemCount: number | undefined;
}

interface OptionDataRepeat {
Expand Down Expand Up @@ -283,6 +284,16 @@ export class CronJobDetailPage extends Component<RouteComponentProps<RouteParams
</Content>
),
},
{
title: "ITEMS",
dataIndex: "itemCount",
key: "itemCount",
render: (itemCount: number): ReactElement => (
<Content>
<span className="text-xs text-estela-black-medium">{itemCount ?? 0}</span>
</Content>
),
},
];

async componentDidMount(): Promise<void> {
Expand Down Expand Up @@ -427,6 +438,7 @@ export class CronJobDetailPage extends Component<RouteComponentProps<RouteParams
status: job.jobStatus,
tags: job.tags,
cronjob: job.cronjob,
itemCount: job.itemCount,
}));
return { data: data, count: response.count, current: page };
};
Expand Down
123 changes: 80 additions & 43 deletions estela-web/src/pages/JobDataPage/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ const getData = async (
spiderId: string,
jobId: string,
pageSize?: number,
search?: string,
level?: string,
): Promise<InlineResponse2008> => {
const requestParams: ApiProjectsSpidersJobsDataListRequest = {
pid: projectId,
Expand All @@ -144,6 +146,8 @@ const getData = async (
type: type_,
page: page,
pageSize: pageSize ?? PAGE_SIZE,
search: search || undefined,
level: level || undefined,
};
return apiService.apiProjectsSpidersJobsDataList(requestParams).then(
(response) => {
Expand Down Expand Up @@ -647,6 +651,23 @@ export function JobRequestsData({ projectId, spiderId, jobId }: JobsDataProps) {
);
}

const LOG_LEVELS = ["ALL", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"];

const LEVEL_COLORS: Record<string, string> = {
DEBUG: "text-gray-500",
INFO: "text-green-600",
WARNING: "text-yellow-500",
ERROR: "text-red-500",
CRITICAL: "text-red-800",
};

const LEVEL_RE = /\]\s+(DEBUG|INFO|WARNING|ERROR|CRITICAL):\s/;

function parseLogLevel(logText: string): string {
const match = LEVEL_RE.exec(logText);
return match ? match[1] : "INFO";
}

export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) {
const [openModal, setOpenModal] = useState(false);
const [loadedDeleteButton, setLoadedDeleteButton] = useState(false);
Expand All @@ -655,52 +676,78 @@ export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) {
const [count, setCount] = useState(0);
const [loaded, setLoaded] = useState(false);
const [logs, setLogs] = useState<Dictionary[]>([]);
const [searchInput, setSearchInput] = useState("");
const [levelInput, setLevelInput] = useState("ALL");
const [activeSearch, setActiveSearch] = useState("");
const [activeLevel, setActiveLevel] = useState("ALL");

useEffect(() => {
getData("logs", 1, projectId, spiderId, jobId).then((response) => {
let data: Dictionary[] = [];
if (response.results?.length) {
const safe_data: unknown[] = response.results ?? [];
data = safe_data as Dictionary[];
setLogs(data);
setCurrent(1);
setCount(response.count);
}
const fetchLogs = (page: number, search: string, level: string) => {
setLoaded(false);
getData("logs", page, projectId, spiderId, jobId, PAGE_SIZE, search, level).then((response) => {
const safe_data: unknown[] = response.results ?? [];
setLogs(safe_data as Dictionary[]);
setCurrent(page);
setCount(response.count ?? 0);
setLoaded(true);
});
};

useEffect(() => {
fetchLogs(1, "", "ALL");
}, []);

const onLogsPageChange = async (page: number): Promise<void> => {
setLoaded(false);
await getData("logs", page, projectId, spiderId, jobId).then((response) => {
let data: Dictionary[] = [];
if (response.results?.length) {
const safe_data: unknown[] = response.results ?? [];
data = safe_data as Dictionary[];
setLogs(data);
setCurrent(page);
setCount(response.count);
}
setLoaded(true);
});
const applyFilters = () => {
setActiveSearch(searchInput);
setActiveLevel(levelInput);
fetchLogs(1, searchInput, levelInput);
};

const onLogsPageChange = (page: number) => {
fetchLogs(page, activeSearch, activeLevel);
};

return (
<Content className="bg-metal content-padding">
{loaded ? (
<>
<Row className="flow-root my-2 w-full space-x-2" align="middle">
<Col className="flex float-left items-center space-x-3">
<Col className="flex float-left items-center space-x-2">
<Text className="text-estela-black-medium text-sm">Search:</Text>
<Input disabled className="w-36 h-10 rounded-2xl" placeholder="Enter a word..." />
<Input
className="w-44 h-10 rounded-2xl"
placeholder="Enter a word..."
value={searchInput}
onChange={(e) => setSearchInput(e.target.value)}
onPressEnter={applyFilters}
/>
</Col>
<Col className="flex float-left items-center space-x-2">
<Text className="text-estela-black-medium text-sm">Level:</Text>
<Dropdown
overlay={
<Menu onClick={({ key }) => setLevelInput(key as string)}>
{LOG_LEVELS.map((lvl) => (
<Menu.Item key={lvl}>{lvl}</Menu.Item>
))}
</Menu>
}
>
<Button
size="large"
className="flex items-center w-32 stroke-estela-blue-full border-estela-blue-low bg-estela-blue-low text-estela-blue-full hover:text-estela-blue-full text-sm hover:border-estela rounded-2xl"
>
<Text className="float-left text-sm text-estela-black-medium">{levelInput}</Text>
<ArrowDown className="h-3.5 w-4 ml-auto" />
</Button>
</Dropdown>
</Col>
<Col className="flex float-left">
<Button
disabled
size="large"
onClick={applyFilters}
className="flex items-center mr-2 stroke-estela-blue-full border-estela-blue-low bg-estela-blue-low text-estela-blue-full hover:text-estela-blue-full text-sm hover:border-estela rounded-2xl"
>
Update
Apply
</Button>
</Col>
<Col className="flex float-right">
Expand Down Expand Up @@ -762,22 +809,10 @@ export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) {
</Dropdown>
</Tooltip>
</Col>
<Col className="flex float-right">
<Button
disabled
size="large"
className="flex items-center mr-2 stroke-estela-blue-full border-estela-blue-low bg-estela-blue-low text-estela-blue-full hover:text-estela-blue-full text-sm hover:border-estela rounded-2xl"
>
Go
</Button>
</Col>
<Col className="flex float-right items-center space-x-3">
<Input disabled className="w-36 h-10 rounded-2xl" placeholder="Go to line..." />
</Col>
</Row>
<Content className="bg-white content-padding">
<Row align="middle" className="grid grid-cols-12 py-1 px-2 rounded-lg bg-estela-blue-low">
<Col className=" col-start-2 col-span-3">
<Col className="col-start-2 col-span-3">
<Text className="font-bold estela-black-full text-xs">TIME</Text>
</Col>
<Col className="col-span-2">
Expand All @@ -789,8 +824,10 @@ export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) {
</Row>
{logs.map((log: Dictionary, index: number) => {
const logDate = log.datetime
? new Date(parseFloat(log.datetime) * 1000).toDateString()
? new Date(parseFloat(log.datetime) * 1000).toLocaleString()
: "no date";
const level = parseLogLevel(log.log ?? "");
const levelColor = LEVEL_COLORS[level] ?? "text-estela-black-medium";
return (
<Row
key={index}
Expand All @@ -805,10 +842,10 @@ export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) {
</Text>
</Col>
<Col className="col-span-3">
<Text className="text-estela-black-medium">{logDate}</Text>
<Text className="text-estela-black-medium text-xs">{logDate}</Text>
</Col>
<Col className="col-span-2">
<Text className="text-estela-black-medium">INFO</Text>
<Text className={`font-semibold text-xs ${levelColor}`}>{level}</Text>
</Col>
<Col className="col-span-6">
<Paragraph
Expand Down
9 changes: 9 additions & 0 deletions estela-web/src/pages/ProjectCronJobListPage/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ interface SpiderCronJobData {
dataStatus: string | undefined;
tags: TagsData[] | undefined;
args: Args[] | undefined;
resourceTier: string | undefined;
}

interface ProjectCronJobListPageState {
Expand Down Expand Up @@ -165,6 +166,13 @@ export class ProjectCronJobListPage extends Component<RouteComponentProps<RouteP
</Link>
),
},
{
title: "RESOURCE TIER",
dataIndex: "resourceTier",
key: "resourceTier",
align: "center" as const,
render: (tier: string): ReactElement => <Tag color="blue">{(tier || "DEFAULT").toUpperCase()}</Tag>,
},
{
title: "ARGUMENTS",
dataIndex: "args",
Expand Down Expand Up @@ -241,6 +249,7 @@ export class ProjectCronJobListPage extends Component<RouteComponentProps<RouteP
dataStatus: cronjob.dataStatus,
tags: cronjob.ctags,
args: cronjob.cargs,
resourceTier: cronjob.resourceTier,
}));
const cronjobs: SpiderCronJobData[] = data;
this.setState({ cronjobs: [...cronjobs], loadedCronjobs: true, count: response.count, current: page });
Expand Down
Loading
Loading