diff --git a/backend/prompt_studio/prompt_studio_core_v2/views.py b/backend/prompt_studio/prompt_studio_core_v2/views.py index 5e1f0d2a3f..6e63f5c4e5 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/views.py +++ b/backend/prompt_studio/prompt_studio_core_v2/views.py @@ -531,6 +531,22 @@ def fetch_contents_ide(self, request: HttpRequest, pk: Any = None) -> Response: file_name = ( f"{FileViewTypes.SUMMARIZE.lower()}/{filename_without_extension}.txt" ) + + # For ORIGINAL view, check if a converted PDF exists for preview + if view_type == FileViewTypes.ORIGINAL and file_converter_plugin: + converted_name = f"converted/{filename_without_extension}.pdf" + try: + contents = PromptStudioFileHelper.fetch_file_contents( + file_name=converted_name, + org_id=UserSessionUtils.get_organization_id(request), + user_id=custom_tool.created_by.user_id, + tool_id=str(custom_tool.tool_id), + allowed_content_types=allowed_content_types, + ) + return Response({"data": contents}, status=status.HTTP_200_OK) + except (FileNotFoundError, FileNotFound): + pass # No converted file — fall through to return original + try: contents = PromptStudioFileHelper.fetch_file_contents( file_name=file_name, @@ -557,15 +573,29 @@ def upload_for_ide(self, request: HttpRequest, pk: Any = None) -> Response: file_name = uploaded_file.name file_data = uploaded_file file_type = uploaded_file.content_type - # Convert non-PDF files + if file_converter_plugin and file_type != "application/pdf": file_converter_service = file_converter_plugin["service_class"]() - file_data, file_name = file_converter_service.process_file( - uploaded_file, file_name - ) + if file_converter_service.should_convert_to_pdf(file_type): + # Convert and store in converted/ subdir for preview + converted_data, converted_name = file_converter_service.process_file( + uploaded_file, file_name + ) + PromptStudioFileHelper.upload_converted_for_ide( + org_id=UserSessionUtils.get_organization_id(request), + user_id=custom_tool.created_by.user_id, + tool_id=str(custom_tool.tool_id), + file_name=converted_name, + file_data=converted_data, + ) + # Reset uploaded_file for storing original in main dir + uploaded_file.seek(0) + file_data = uploaded_file + # else: CSV/TXT/Excel — file_data stays as original, no conversion logger.info(f"Uploading file: {file_name}" if file_name else "Uploading file") + # Store original file in main dir (always the original) PromptStudioFileHelper.upload_for_ide( org_id=UserSessionUtils.get_organization_id(request), user_id=custom_tool.created_by.user_id, @@ -574,7 +604,7 @@ def upload_for_ide(self, request: HttpRequest, pk: Any = None) -> Response: file_data=file_data, ) - # Create a record in the db for the file + # Create a record in the db for the file (document_name = original filename) document = PromptStudioDocumentHelper.create( tool_id=str(custom_tool.tool_id), document_name=file_name ) diff --git a/backend/utils/file_storage/helpers/prompt_studio_file_helper.py b/backend/utils/file_storage/helpers/prompt_studio_file_helper.py index 2d1d990673..23a05633fd 100644 --- a/backend/utils/file_storage/helpers/prompt_studio_file_helper.py +++ b/backend/utils/file_storage/helpers/prompt_studio_file_helper.py @@ -38,6 +38,7 @@ def get_or_create_prompt_studio_subdirectory( file_path = str(Path(base_path) / org_id / user_id / tool_id) extract_file_path = str(Path(file_path) / "extract") summarize_file_path = str(Path(file_path) / "summarize") + converted_file_path = str(Path(file_path) / "converted") if is_create: fs_instance = EnvHelper.get_storage( storage_type=StorageType.PERMANENT, @@ -46,6 +47,7 @@ def get_or_create_prompt_studio_subdirectory( fs_instance.mkdir(file_path, create_parents=True) fs_instance.mkdir(extract_file_path, create_parents=True) fs_instance.mkdir(summarize_file_path, create_parents=True) + fs_instance.mkdir(converted_file_path, create_parents=True) return str(file_path) @staticmethod @@ -81,6 +83,38 @@ def upload_for_ide( data=file_data if isinstance(file_data, bytes) else file_data.read(), ) + @staticmethod + def upload_converted_for_ide( + org_id: str, user_id: str, tool_id: str, file_data: Any, file_name: str + ) -> None: + """Stores converted PDF in the converted/ subdirectory for preview. + + Args: + org_id (str): Organization ID + user_id (str): User ID + tool_id (str): ID of the prompt studio tool + file_data (Any): File data (bytes or file-like object) + file_name (str): Name of the converted file + """ + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + file_system_path = ( + PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( + org_id=org_id, + is_create=True, + user_id=user_id, + tool_id=str(tool_id), + ) + ) + converted_path = str(Path(file_system_path) / "converted" / file_name) + fs_instance.write( + path=converted_path, + mode="wb", + data=file_data if isinstance(file_data, bytes) else file_data.read(), + ) + @staticmethod def fetch_file_contents( org_id: str, @@ -141,13 +175,22 @@ def fetch_file_contents( encoded_string = base64.b64encode(bytes(text_content_bytes)) return {"data": encoded_string, "mime_type": file_content_type} - elif file_content_type == "text/plain": + elif file_content_type in ("text/plain", "text/csv"): text_content_string: str = fs_instance.read( path=file_path, mode="r", legacy_storage_path=legacy_file_path, encoding="utf-8", ) + elif file_content_type in ( + "application/vnd.ms-excel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-excel.sheet.macroenabled.12", + ): + text_content_string = ( + "Preview not available for Excel files. " + "Please index the document and view content in the Raw View tab." + ) # Check if the file type is in the allowed list elif file_content_type not in allowed_content_types: raise InvalidFileType(f"File type '{file_content_type}' is not allowed.") @@ -178,7 +221,7 @@ def delete_for_ide(org_id: str, user_id: str, tool_id: str, file_name: str) -> b # Delete the source file fs_instance.rm(str(Path(file_system_path) / file_name)) # Delete all related files for cascade delete - directories = ["extract/", "extract/metadata/", "summarize/"] + directories = ["extract/", "extract/metadata/", "summarize/", "converted/"] base_file_name, _ = os.path.splitext(file_name) # Delete related files file_paths = PromptStudioFileHelper._find_files( diff --git a/frontend/src/components/custom-tools/document-manager/DocumentManager.jsx b/frontend/src/components/custom-tools/document-manager/DocumentManager.jsx index e1fb4015bc..bfc11126e1 100644 --- a/frontend/src/components/custom-tools/document-manager/DocumentManager.jsx +++ b/frontend/src/components/custom-tools/document-manager/DocumentManager.jsx @@ -25,7 +25,6 @@ import { ManageDocsModal } from "../manage-docs-modal/ManageDocsModal"; import { PdfViewer } from "../pdf-viewer/PdfViewer"; import { TextViewerPre } from "../text-viewer-pre/TextViewerPre"; import usePostHogEvents from "../../../hooks/usePostHogEvents"; -import { TextViewer } from "../text-viewer/TextViewer"; let items = [ { @@ -247,17 +246,27 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) { const processGetDocsResponse = (data, viewType, mimeType) => { if (viewType === viewTypes.original) { - const base64String = data || ""; - const blob = base64toBlobWithMime(base64String, mimeType); - setFileData({ blob, mimeType }); - const reader = new FileReader(); - reader.readAsDataURL(blob); - reader.onload = () => { - setFileUrl(reader.result); - }; - reader.onerror = () => { - throw new Error("Fail to load the file"); - }; + if (mimeType === "application/pdf") { + // Existing flow: base64 → blob → PdfViewer + const base64String = data || ""; + const blob = base64toBlobWithMime(base64String, mimeType); + setFileData({ blob, mimeType }); + const reader = new FileReader(); + reader.readAsDataURL(blob); + reader.onload = () => { + setFileUrl(reader.result); + }; + reader.onerror = () => { + throw new Error("Fail to load the file"); + }; + } else { + // Non-PDF file (CSV, TXT, Excel, or non-convertible) + // data is text, not base64 + setFileUrl(""); + setFileData({ blob: null, mimeType }); + // Auto-switch to Raw View for non-PDF files + setActiveKey("2"); + } } else if (viewType === viewTypes.extract) { setExtractTxt(data?.data); } @@ -345,16 +354,19 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) { }; const renderDoc = (docName, fileUrl, highlightData) => { - const fileType = docName?.split(".").pop().toLowerCase(); // Get the file extension - switch (fileType) { - case "pdf": - return ; - case "txt": - case "md": - return ; - default: - return
Unsupported file type: {fileType}
; + // Use mimeType from response for rendering decisions + if (fileData.mimeType === "application/pdf") { + return ; } + // Non-PDF: show placeholder message + return ( +
+ + Document preview is not available for this file type. Please index the + document and switch to Raw View. + +
+ ); }; return ( @@ -467,7 +479,10 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) { 0} + isContentAvailable={ + fileUrl?.length > 0 || + (fileData.mimeType && fileData.mimeType !== "application/pdf") + } setOpenManageDocsModal={setOpenManageDocsModal} errMsg={fileErrMsg} > diff --git a/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx b/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx index 310447e25a..8660477a12 100644 --- a/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx +++ b/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx @@ -589,8 +589,18 @@ function ManageDocsModal({ return; // Stop further execution } - // If the file is not a PDF, show the modal for confirmation - if (fileType !== "application/pdf") { + // File types that can be uploaded directly without conversion + const DIRECT_UPLOAD_TYPES = new Set([ + "application/pdf", + "text/plain", + "text/csv", + "application/vnd.ms-excel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-excel.sheet.macroenabled.12", + ]); + + if (!DIRECT_UPLOAD_TYPES.has(fileType)) { + // Non-direct types: show ConfirmMultiDoc modal or error if (!ConfirmMultiDoc) { setAlertDetails({ type: "error", @@ -600,7 +610,7 @@ function ManageDocsModal({ setFileToUpload(file); // Store the file to be uploaded setIsModalVisible(true); // Show the modal } else { - // If the file is a PDF, proceed with the upload immediately + // PDF, CSV, TXT, Excel — proceed with the upload immediately resolve(file); } };