Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
531 changes: 531 additions & 0 deletions client/src/components/app/extractions/SampleModal.tsx

Large diffs are not rendered by default.

21 changes: 19 additions & 2 deletions client/src/components/app/extractions/detail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ import {
resolveCrawlPageUrl,
trpc,
} from "@/utils";
import { CookingPot, LibraryBig, List } from "lucide-react";
import { CookingPot, LibraryBig, List, Pipette } from "lucide-react";
import { useState } from "react";
import { Bar, BarChart, XAxis, YAxis } from "recharts";
import { Link, useLocation, useParams } from "wouter";
import { displayRecipeDetails } from "../recipes/util";
import AuditLogModal from "./AuditLogModal";
import SampleModal from "./SampleModal";
import { displayStepType } from "./utils";

function displayStepParent(steps: CrawlStep[], parentId: number) {
Expand Down Expand Up @@ -139,6 +140,7 @@ export default function ExtractionDetail() {
const [lockedCancel, setLockedCancel] = useState(true);
const [lockedDelete, setLockDelete] = useState(true);
const [auditLogModalOpen, setAuditLogModalOpen] = useState(false);
const [sampleModalOpen, setSampleModalOpen] = useState(false);
const { toast } = useToast();
const [, navigate] = useLocation();
const query = trpc.extractions.detail.useQuery(
Expand Down Expand Up @@ -898,8 +900,16 @@ export default function ExtractionDetail() {
) : null}
</div>
<Card className="mt-4">
<CardHeader>
<CardHeader className="flex flex-row items-center justify-between space-y-0">
<CardTitle>Extraction Steps</CardTitle>
<Button
variant="outline"
size="sm"
onClick={() => setSampleModalOpen(true)}
>
<Pipette className="w-3.5 h-3.5 mr-2" />
Sample
</Button>
</CardHeader>
<CardContent>
<Tabs defaultValue="table">
Expand Down Expand Up @@ -987,6 +997,13 @@ export default function ExtractionDetail() {
open={auditLogModalOpen}
onOpenChange={setAuditLogModalOpen}
/>
<SampleModal
extractionId={extractionIdNum}
extractionStatus={extraction.status}
open={sampleModalOpen}
onOpenChange={setSampleModalOpen}
recipeUrl={extraction.recipe?.url}
/>
</>
);
}
53 changes: 45 additions & 8 deletions client/src/components/app/extractions/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,26 @@ import {
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { concisePrintDate, prettyPrintDate, resolveCrawlPageUrl, trpc } from "@/utils";
import { ExternalLink } from "lucide-react";
import { useState } from "react";
import { useParams } from "wouter";
import { useEffect, useState } from "react";
import { useLocation, useParams, useSearch } from "wouter";
import { base64Img } from "./utils";

const DEFAULT_TAB = "data";

const VALID_TAB_VALUES = [
"data",
"raw_content",
"screenshot",
"simplified_content",
"operation_logs",
];

export default function CrawlPageDetail() {
const { extractionId, stepId, crawlPageId } = useParams();
const [, navigate] = useLocation();
const search = useSearch();
const basePath = "~" + new URL(window.location.href).pathname;

const crawlPageQuery = trpc.extractions.crawlPageDetail.useQuery(
{ crawlPageId: parseInt(crawlPageId || "") },
{ enabled: !!crawlPageId }
Expand All @@ -40,7 +54,22 @@ export default function CrawlPageDetail() {
(typeof simulateExtractionQuery)["mutateAsync"]
> | null>
>(null);
if (!crawlPageQuery.data) {

const item = crawlPageQuery.data;

useEffect(() => {
const sp = new URLSearchParams(search || "");
const tabName = sp.get("tabName");
const shouldRedirect =
!tabName || !VALID_TAB_VALUES.includes(tabName || "");

if (shouldRedirect) {
sp.set("tabName", DEFAULT_TAB);
navigate(`${basePath}?${sp.toString()}`, { replace: true });
}
}, [basePath, navigate, search]);

if (!item) {
return null;
}

Expand All @@ -53,8 +82,6 @@ export default function CrawlPageDetail() {
}
};

const item = crawlPageQuery.data;

const breadCrumbs = [
{ label: "Extractions", href: "/" },
{ label: `Extraction #${extractionId}`, href: `/${extractionId}` },
Expand Down Expand Up @@ -123,8 +150,6 @@ export default function CrawlPageDetail() {
</TabsContent>,
];

const defaultTab = "data";

if (item.markdownContent) {
tabTriggers.push(
<TabsTrigger key="simplified_content" value="simplified_content">Simplified Content</TabsTrigger>
Expand Down Expand Up @@ -191,6 +216,18 @@ export default function CrawlPageDetail() {
tabContents.splice(1, 0, <TabsContent key="screenshot" value="screenshot">{screenshot}</TabsContent>);
}

const tabNameFromUrl = new URLSearchParams(search || "").get("tabName");
if (tabNameFromUrl && !VALID_TAB_VALUES.includes(tabNameFromUrl)) {
return null;
}
const currentTab = tabNameFromUrl ?? DEFAULT_TAB;

const onTabChange = (value: string) => {
const sp = new URLSearchParams(search || "");
sp.set("tabName", value);
navigate(`${basePath}?${sp.toString()}`);
};

const formattedSimulatedData = simulatedExtractedData?.data
? JSON.stringify(simulatedExtractedData?.data, null, 2)
: null;
Expand Down Expand Up @@ -259,7 +296,7 @@ export default function CrawlPageDetail() {
)}
</div>

<Tabs defaultValue={defaultTab}>
<Tabs value={currentTab} onValueChange={onTabChange}>
<TabsList className="w-full mb-4">{tabTriggers}</TabsList>
<div className="border border-dashed p-4 text-xs overflow-auto">
{tabContents}
Expand Down
35 changes: 6 additions & 29 deletions client/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,35 +30,12 @@ export type CrawlPage = ItemType<
type DatasetItemsResponse = Exclude<RouterOutput["datasets"]["items"], null>;
export type DatasetItem = ItemType<DatasetItemsResponse["items"]["results"]>;

export enum ExtractionStatus {
WAITING = "WAITING",
IN_PROGRESS = "IN_PROGRESS",
COMPLETE = "COMPLETE",
STALE = "STALE",
CANCELLED = "CANCELLED",
}

export enum PageStatus {
WAITING = "WAITING",
IN_PROGRESS = "IN_PROGRESS",
DOWNLOADED = "DOWNLOADED",
SUCCESS = "SUCCESS",
EXTRACTED_NO_DATA = "EXTRACTED_NO_DATA",
ERROR = "ERROR",
}

export enum RecipeDetectionStatus {
WAITING = "WAITING",
IN_PROGRESS = "IN_PROGRESS",
SUCCESS = "SUCCESS",
ERROR = "ERROR",
}

export enum Step {
FETCH_ROOT = "FETCH_ROOT",
FETCH_PAGINATED = "FETCH_PAGINATED",
FETCH_LINKS = "FETCH_LINKS",
}
export {
ExtractionStatus,
PageStatus,
RecipeDetectionStatus,
Step,
} from "../../common/types";

export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs));
Expand Down
9 changes: 9 additions & 0 deletions common/types.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { toTitleCase } from "./utils";

export enum CatalogueType {
COURSES = "COURSES",
LEARNING_PROGRAMS = "LEARNING_PROGRAMS",
Expand Down Expand Up @@ -112,6 +114,13 @@ export enum PageStatus {
ERROR = "ERROR",
}

export const UIPageStatus: { value: PageStatus; label: string }[] = (
Object.values(PageStatus) as PageStatus[]
).map((value) => ({
value,
label: toTitleCase(String(value).replace(/_/g, " ")),
}));

export enum RecipeDetectionStatus {
WAITING = "WAITING",
IN_PROGRESS = "IN_PROGRESS",
Expand Down
6 changes: 6 additions & 0 deletions common/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export function toTitleCase(str: string): string {
return str
.split(" ")
.map((word) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
.join(" ");
}
16 changes: 11 additions & 5 deletions server/src/data/extractions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -831,8 +831,7 @@ export async function createModelApiCallLog(
callSite: string,
inputTokenCount: number,
outputTokenCount: number,
datasetId?: number,
crawlPageId?: number
options?: { datasetId?: number; crawlPageId?: number }
) {
const result = await db
.insert(modelApiCalls)
Expand All @@ -843,8 +842,8 @@ export async function createModelApiCallLog(
callSite,
input_token_count: inputTokenCount,
output_token_count: outputTokenCount,
datasetId,
crawlPageId,
datasetId: options?.datasetId,
crawlPageId: options?.crawlPageId,
})
.returning();
return result[0];
Expand Down Expand Up @@ -923,11 +922,18 @@ export async function findFailedAndNoDataPageIds(crawlStepId: number) {
)
)
.groupBy(crawlPages.id)
.having(sql`count(${dataItems.id}) = 0`);
.having(eq(count(dataItems.id), 0));

return [...new Set(failedIds.concat(noDataIds).map((p) => p.id))];
}

export async function destroyExtraction(id: number) {
return db.delete(extractions).where(eq(extractions.id, id));
}

export {
findSampledPagesForExtraction,
type SamplePagesOptions,
type SampleSortOption,
type SampledPageRow,
} from "./extractionsSample";
Loading