diff --git a/database_adapters/db_adapters.py b/database_adapters/db_adapters.py index cf343afd..0cf6306b 100644 --- a/database_adapters/db_adapters.py +++ b/database_adapters/db_adapters.py @@ -195,11 +195,12 @@ def get_jobs_set_stats(self, database_name, jobs_ids): return list(result) def get_paginated_dataset_data( - self, database_name, collection_name, page, page_size + self, database_name, collection_name, page, page_size, query=None ): collection = self.client[database_name][collection_name] + mongo_filter = query or {} result = ( - collection.find({}, {"_id": False}) + collection.find(mongo_filter, {"_id": False}) .skip(page_size * (page - 1)) .limit(page_size) ) @@ -214,6 +215,10 @@ def get_estimated_item_count(self, database_name, collection_name): collection = self.client[database_name][collection_name] return collection.estimated_document_count() + def count_documents(self, database_name, collection_name, query=None): + collection = self.client[database_name][collection_name] + return collection.count_documents(query or {}) + def get_estimated_item_size(self, database_name, collection_name): database = self.client[database_name] document_size = database.command("collstats", collection_name)["avgObjSize"] diff --git a/estela-api/api/views/job_data.py b/estela-api/api/views/job_data.py index 145c4adb..d6465d1d 100644 --- a/estela-api/api/views/job_data.py +++ b/estela-api/api/views/job_data.py @@ -1,3 +1,4 @@ +import re import redis from django.conf import settings @@ -37,7 +38,22 @@ def get_parameters(self, request): page_size = int( request.query_params.get("page_size", self.DEFAULT_PAGINATION_SIZE) ) - return page, data_type, page_size + search = request.query_params.get("search", "").strip() + level = request.query_params.get("level", "").strip().upper() + return page, data_type, page_size, search, level + + def build_log_filter(self, search, level): + """Build a MongoDB filter for log documents based on search text and level.""" + import re + conditions = [] + if level and level != "ALL": + # Match the Scrapy log format: "] LEVEL: " + conditions.append({"log": {"$regex": f"\\] {level}: "}}) + if search: + conditions.append({"log": {"$regex": re.escape(search), "$options": "i"}}) # escape prevents ReDoS and invalid regex errors + if not conditions: + return None + return {"$and": conditions} if len(conditions) > 1 else conditions[0] def get_paginated_link(self, page_number): if page_number < 1: @@ -85,10 +101,24 @@ def get_paginated_link(self, page_number): type=openapi.TYPE_STRING, required=False, ), + openapi.Parameter( + "search", + openapi.IN_QUERY, + description="Search term to filter log entries.", + type=openapi.TYPE_STRING, + required=False, + ), + openapi.Parameter( + "level", + openapi.IN_QUERY, + description="Log level filter (DEBUG, INFO, WARNING, ERROR, CRITICAL).", + type=openapi.TYPE_STRING, + required=False, + ), ], ) def list(self, request, *args, **kwargs): - page, data_type, page_size = self.get_parameters(request) + page, data_type, page_size, search, level = self.get_parameters(request) if page_size > self.MAX_PAGINATION_SIZE or page_size < self.MIN_PAGINATION_SIZE: raise ParseError({"error": errors.INVALID_PAGE_SIZE}) if page_size < 1: @@ -101,9 +131,16 @@ def list(self, request, *args, **kwargs): job = SpiderJob.objects.filter(jid=kwargs["jid"]).get() job_collection_name = get_collection_name(job, data_type) - count = spiderdata_db_client.get_estimated_item_count( - kwargs["pid"], job_collection_name - ) + log_filter = self.build_log_filter(search, level) if data_type == "logs" else None + + if log_filter: + count = spiderdata_db_client.count_documents( + kwargs["pid"], job_collection_name, log_filter + ) + else: + count = spiderdata_db_client.get_estimated_item_count( + kwargs["pid"], job_collection_name + ) if data_type == "stats": if job.status == SpiderJob.RUNNING_STATUS: @@ -134,7 +171,7 @@ def list(self, request, *args, **kwargs): return Response(response) else: result = spiderdata_db_client.get_paginated_dataset_data( - kwargs["pid"], job_collection_name, page, page_size + kwargs["pid"], job_collection_name, page, page_size, query=log_filter ) return Response( diff --git a/estela-api/docs/api.yaml b/estela-api/docs/api.yaml index b0134ee0..8cde91f4 100644 --- a/estela-api/docs/api.yaml +++ b/estela-api/docs/api.yaml @@ -1186,6 +1186,16 @@ paths: description: Spider job data type. required: false type: string + - name: search + in: query + description: Search term to filter log entries. + required: false + type: string + - name: level + in: query + description: Log level filter (DEBUG, INFO, WARNING, ERROR, CRITICAL). + required: false + type: string responses: '200': description: '' diff --git a/estela-web/src/pages/CronJobDetailPage/index.tsx b/estela-web/src/pages/CronJobDetailPage/index.tsx index 2a7d45fc..f118e707 100644 --- a/estela-web/src/pages/CronJobDetailPage/index.tsx +++ b/estela-web/src/pages/CronJobDetailPage/index.tsx @@ -87,6 +87,7 @@ interface SpiderJobData { cronjob: number | null | undefined; tags: TagsData[] | undefined; args: ArgsData[] | undefined; + itemCount: number | undefined; } interface OptionDataRepeat { @@ -283,6 +284,16 @@ export class CronJobDetailPage extends Component ), }, + { + title: "ITEMS", + dataIndex: "itemCount", + key: "itemCount", + render: (itemCount: number): ReactElement => ( + + {itemCount ?? 0} + + ), + }, ]; async componentDidMount(): Promise { @@ -427,6 +438,7 @@ export class CronJobDetailPage extends Component => { const requestParams: ApiProjectsSpidersJobsDataListRequest = { pid: projectId, @@ -144,6 +146,8 @@ const getData = async ( type: type_, page: page, pageSize: pageSize ?? PAGE_SIZE, + search: search || undefined, + level: level || undefined, }; return apiService.apiProjectsSpidersJobsDataList(requestParams).then( (response) => { @@ -647,6 +651,23 @@ export function JobRequestsData({ projectId, spiderId, jobId }: JobsDataProps) { ); } +const LOG_LEVELS = ["ALL", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]; + +const LEVEL_COLORS: Record = { + DEBUG: "text-gray-500", + INFO: "text-green-600", + WARNING: "text-yellow-500", + ERROR: "text-red-500", + CRITICAL: "text-red-800", +}; + +const LEVEL_RE = /\]\s+(DEBUG|INFO|WARNING|ERROR|CRITICAL):\s/; + +function parseLogLevel(logText: string): string { + const match = LEVEL_RE.exec(logText); + return match ? match[1] : "INFO"; +} + export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) { const [openModal, setOpenModal] = useState(false); const [loadedDeleteButton, setLoadedDeleteButton] = useState(false); @@ -655,34 +676,34 @@ export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) { const [count, setCount] = useState(0); const [loaded, setLoaded] = useState(false); const [logs, setLogs] = useState([]); + const [searchInput, setSearchInput] = useState(""); + const [levelInput, setLevelInput] = useState("ALL"); + const [activeSearch, setActiveSearch] = useState(""); + const [activeLevel, setActiveLevel] = useState("ALL"); - useEffect(() => { - getData("logs", 1, projectId, spiderId, jobId).then((response) => { - let data: Dictionary[] = []; - if (response.results?.length) { - const safe_data: unknown[] = response.results ?? []; - data = safe_data as Dictionary[]; - setLogs(data); - setCurrent(1); - setCount(response.count); - } + const fetchLogs = (page: number, search: string, level: string) => { + setLoaded(false); + getData("logs", page, projectId, spiderId, jobId, PAGE_SIZE, search, level).then((response) => { + const safe_data: unknown[] = response.results ?? []; + setLogs(safe_data as Dictionary[]); + setCurrent(page); + setCount(response.count ?? 0); setLoaded(true); }); + }; + + useEffect(() => { + fetchLogs(1, "", "ALL"); }, []); - const onLogsPageChange = async (page: number): Promise => { - setLoaded(false); - await getData("logs", page, projectId, spiderId, jobId).then((response) => { - let data: Dictionary[] = []; - if (response.results?.length) { - const safe_data: unknown[] = response.results ?? []; - data = safe_data as Dictionary[]; - setLogs(data); - setCurrent(page); - setCount(response.count); - } - setLoaded(true); - }); + const applyFilters = () => { + setActiveSearch(searchInput); + setActiveLevel(levelInput); + fetchLogs(1, searchInput, levelInput); + }; + + const onLogsPageChange = (page: number) => { + fetchLogs(page, activeSearch, activeLevel); }; return ( @@ -690,17 +711,43 @@ export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) { {loaded ? ( <> - + Search: - + setSearchInput(e.target.value)} + onPressEnter={applyFilters} + /> + + + Level: + setLevelInput(key as string)}> + {LOG_LEVELS.map((lvl) => ( + {lvl} + ))} + + } + > + + @@ -762,22 +809,10 @@ export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) { - - - - - - - + TIME @@ -789,8 +824,10 @@ export function JobLogsData({ projectId, spiderId, jobId }: JobsDataProps) { {logs.map((log: Dictionary, index: number) => { const logDate = log.datetime - ? new Date(parseFloat(log.datetime) * 1000).toDateString() + ? new Date(parseFloat(log.datetime) * 1000).toLocaleString() : "no date"; + const level = parseLogLevel(log.log ?? ""); + const levelColor = LEVEL_COLORS[level] ?? "text-estela-black-medium"; return ( - {logDate} + {logDate} - INFO + {level} ), }, + { + title: "RESOURCE TIER", + dataIndex: "resourceTier", + key: "resourceTier", + align: "center" as const, + render: (tier: string): ReactElement => {(tier || "DEFAULT").toUpperCase()}, + }, { title: "ARGUMENTS", dataIndex: "args", @@ -241,6 +249,7 @@ export class ProjectCronJobListPage extends Component ), }, + { + title: "RESOURCE TIER", + dataIndex: "resourceTier", + key: "resourceTier", + render: (tier: string): ReactElement => {(tier || "DEFAULT").toUpperCase()}, + }, { title: "SCHEDULED JOB", key: "info", @@ -232,6 +239,7 @@ export class ProjectJobListPage extends Component job.status === "WAITING"); diff --git a/estela-web/src/pages/SpiderDetailPage/index.tsx b/estela-web/src/pages/SpiderDetailPage/index.tsx index e493f356..9bc9b0e2 100644 --- a/estela-web/src/pages/SpiderDetailPage/index.tsx +++ b/estela-web/src/pages/SpiderDetailPage/index.tsx @@ -61,6 +61,8 @@ interface SpiderJobData { cronjob: number | null | undefined; args: SpiderJobArg[] | undefined; tags: SpiderJobTag[] | undefined; + resourceTier: string | undefined; + itemCount: number | undefined; } interface SpiderDetailPageState { @@ -146,6 +148,13 @@ export class SpiderDetailPage extends Component ), }, + { + title: "RESOURCE TIER", + dataIndex: "resourceTier", + key: "resourceTier", + align: "center" as const, + render: (tier: string): ReactElement => {(tier || "DEFAULT").toUpperCase()}, + }, { title: "SCHEDULED JOB", dataIndex: "cronjob", @@ -200,6 +209,14 @@ export class SpiderDetailPage extends Component <> ), }, + { + title: "ITEMS", + dataIndex: "itemCount", + key: "itemCount", + render: (itemCount: number): ReactElement => ( + {itemCount ?? 0} + ), + }, ]; async componentDidMount(): Promise { @@ -285,6 +302,8 @@ export class SpiderDetailPage extends Component tags: job.tags, jobStatus: job.jobStatus, cronjob: job.cronjob, + resourceTier: job.resourceTier, + itemCount: job.itemCount, })); return { data, count: response.count, current: page }; }; diff --git a/estela-web/src/services/api/generated-api/apis/ApiApi.ts b/estela-web/src/services/api/generated-api/apis/ApiApi.ts index 8000303e..d8048734 100644 --- a/estela-web/src/services/api/generated-api/apis/ApiApi.ts +++ b/estela-web/src/services/api/generated-api/apis/ApiApi.ts @@ -388,6 +388,8 @@ export interface ApiProjectsSpidersJobsDataListRequest { page?: number; pageSize?: number; type?: string; + search?: string; + level?: string; } export interface ApiProjectsSpidersJobsListRequest { @@ -2231,6 +2233,14 @@ export class ApiApi extends runtime.BaseAPI { queryParameters['type'] = requestParameters.type; } + if (requestParameters.search !== undefined) { + queryParameters['search'] = requestParameters.search; + } + + if (requestParameters.level !== undefined) { + queryParameters['level'] = requestParameters.level; + } + const headerParameters: runtime.HTTPHeaders = {}; if (this.configuration && (this.configuration.username !== undefined || this.configuration.password !== undefined)) {