From 66c59595d5259519dd7fcdad334f34bc2b663a8e Mon Sep 17 00:00:00 2001 From: "donghyuck, son" Date: Thu, 30 Apr 2026 12:01:10 +0900 Subject: [PATCH] [ai-assisted] feat(vector): add projection visualization api MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue: - Closes #380 Why: - 관리자 화면에서 기존 벡터 저장 테이블을 기반으로 2D 산점도 시각화 API가 필요하다. What: - 기존 tb_ai_document_chunk를 변경하지 않고 projection 상태/좌표 테이블과 V603 migration을 추가했다. - PCA 기반 projection generator, JDBC repository, 비동기 job service, search visualization service를 추가했다. - /api/mgmt/ai/vectors/projections, /items/{vectorItemId}, /search-visualization API와 클라이언트용 DTO를 추가했다. - metadata allowlist, query 길이 제한, projection item 상한, executor rejection 처리, projection scope 기반 검색 제한을 반영했다. - README와 CHANGELOG에 API 사용 흐름과 제약을 문서화했다. Validation: - ./gradlew :studio-platform-ai:test :starter:studio-platform-starter-ai:test :starter:studio-platform-starter-ai-web:test && git diff --check: PASS - ./gradlew test: PASS AI / Subagent Usage: - AI-assisted: Yes - Subagent used: Yes - Delegated scope: code review and security review for issue #380 - Main author validation: subagent findings were reviewed, data exposure and lifecycle issues were addressed, targeted and full tests passed --- CHANGELOG.md | 3 + .../studio-platform-starter-ai-web/README.md | 86 ++++++ .../build.gradle.kts | 1 + .../autoconfigure/AiWebAutoConfiguration.java | 122 +++++++++ .../web/controller/AiWebExceptionHandler.java | 1 + .../VectorVisualizationMgmtController.java | 219 +++++++++++++++ .../ProjectionCreateRequest.java | 14 + .../ProjectionCreateResponse.java | 4 + .../ProjectionDetailResponse.java | 18 ++ .../visualization/ProjectionListResponse.java | 6 + .../ProjectionPointResponse.java | 14 + .../ProjectionPointsResponse.java | 10 + .../ProjectionSummaryResponse.java | 13 + .../VectorItemDetailResponse.java | 16 ++ .../VectorSearchVisualizationRequest.java | 14 + .../VectorSearchVisualizationResponse.java | 19 ++ ...VectorVisualizationMgmtControllerTest.java | 133 +++++++++ .../vector/PgVectorStoreAdapterV2.java | 14 +- .../DefaultVectorProjectionJobService.java | 75 ++++++ .../DefaultVectorProjectionService.java | 144 ++++++++++ ...faultVectorSearchVisualizationService.java | 211 +++++++++++++++ .../JdbcExistingVectorItemRepository.java | 236 ++++++++++++++++ .../JdbcVectorProjectionPointRepository.java | 205 ++++++++++++++ .../JdbcVectorProjectionRepository.java | 185 +++++++++++++ .../JdbcVectorProjectionSql.java | 48 ++++ .../VectorProjectionCreateCommand.java | 19 ++ .../VectorProjectionJobService.java | 6 + .../VectorProjectionService.java | 20 ++ .../VectorSearchVisualizationCommand.java | 15 ++ .../VectorSearchVisualizationResult.java | 23 ++ .../VectorSearchVisualizationService.java | 6 + .../DefaultVectorProjectionServiceTest.java | 254 ++++++++++++++++++ ...tVectorSearchVisualizationServiceTest.java | 173 ++++++++++++ studio-platform-ai/README.md | 21 ++ .../ExistingVectorItemRepository.java | 17 ++ .../PcaVectorProjectionGenerator.java | 167 ++++++++++++ .../visualization/ProjectionAlgorithm.java | 5 + .../visualization/ProjectionPointPage.java | 10 + .../visualization/ProjectionPointView.java | 18 ++ .../visualization/ProjectionStatus.java | 9 + .../core/vector/visualization/VectorItem.java | 23 ++ .../visualization/VectorProjection.java | 46 ++++ .../VectorProjectionGenerator.java | 11 + .../visualization/VectorProjectionPoint.java | 13 + .../VectorProjectionPointRepository.java | 18 ++ .../VectorProjectionRepository.java | 18 ++ .../VectorVisualizationMetadataSanitizer.java | 49 ++++ .../V603__create_vector_projection_tables.sql | 33 +++ .../V603__create_vector_projection_tables.sql | 33 +++ .../V603__create_vector_projection_tables.sql | 33 +++ .../src/main/resources/sql/ai-sqlset.xml | 8 +- .../PcaVectorProjectionGeneratorTest.java | 38 +++ 52 files changed, 2892 insertions(+), 5 deletions(-) create mode 100644 starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/controller/VectorVisualizationMgmtController.java create mode 100644 starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionCreateRequest.java create mode 100644 starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionCreateResponse.java create mode 100644 starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionDetailResponse.java create mode 100644 starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionListResponse.java create mode 100644 starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionPointResponse.java create mode 100644 starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionPointsResponse.java create mode 100644 starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionSummaryResponse.java create mode 100644 starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorItemDetailResponse.java create mode 100644 starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorSearchVisualizationRequest.java create mode 100644 starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorSearchVisualizationResponse.java create mode 100644 starter/studio-platform-starter-ai-web/src/test/java/studio/one/platform/ai/web/controller/VectorVisualizationMgmtControllerTest.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionJobService.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionService.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorSearchVisualizationService.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcExistingVectorItemRepository.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionPointRepository.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionRepository.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionSql.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionCreateCommand.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionJobService.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionService.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationCommand.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationResult.java create mode 100644 starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationService.java create mode 100644 starter/studio-platform-starter-ai/src/test/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionServiceTest.java create mode 100644 starter/studio-platform-starter-ai/src/test/java/studio/one/platform/ai/service/visualization/DefaultVectorSearchVisualizationServiceTest.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ExistingVectorItemRepository.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/PcaVectorProjectionGenerator.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionAlgorithm.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionPointPage.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionPointView.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionStatus.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorItem.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjection.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionGenerator.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionPoint.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionPointRepository.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionRepository.java create mode 100644 studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorVisualizationMetadataSanitizer.java create mode 100644 studio-platform-ai/src/main/resources/schema/ai/mariadb/V603__create_vector_projection_tables.sql create mode 100644 studio-platform-ai/src/main/resources/schema/ai/mysql/V603__create_vector_projection_tables.sql create mode 100644 studio-platform-ai/src/main/resources/schema/ai/postgres/V603__create_vector_projection_tables.sql create mode 100644 studio-platform-ai/src/test/java/studio/one/platform/ai/core/vector/visualization/PcaVectorProjectionGeneratorTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 3687ee78..ad51a95a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,9 @@ ## 2026-04-28 ### 변경됨 +- 이슈 #380 대응으로 기존 `tb_ai_document_chunk`를 원본으로 사용하는 vector projection visualization API를 추가했다. +- `tb_ai_vector_projection`, `tb_ai_vector_projection_point` migration을 추가하고, 비동기 PCA job으로 2D 좌표를 미리 계산해 저장하도록 했다. +- `/api/mgmt/ai/vectors/projections`, `/api/mgmt/ai/vectors/items/{vectorItemId}`, `/api/mgmt/ai/vectors/search-visualization` 관리자 API와 클라이언트 산점도용 응답 DTO를 추가했다. - 이슈 #371 대응으로 `studio-platform-thumbnail`에 PPTX/DOCX/HWP/HWPX 문서 썸네일 renderer를 추가했다. - PPTX는 Apache POI slide renderer로 실제 slide thumbnail을 생성하고, DOCX/HWP/HWPX는 `FileContentExtractionService`의 구조화 추출 결과로 preview thumbnail을 생성한다. - `studio.thumbnail.renderers..*` configuration metadata 및 README 예시를 추가했다. diff --git a/starter/studio-platform-starter-ai-web/README.md b/starter/studio-platform-starter-ai-web/README.md index 1412f339..09b670d3 100644 --- a/starter/studio-platform-starter-ai-web/README.md +++ b/starter/studio-platform-starter-ai-web/README.md @@ -83,6 +83,12 @@ studio: | `POST` | `{mgmtBasePath}/embedding` | 텍스트 임베딩 벡터 생성 | `services:ai_embedding write` | | `POST` | `{mgmtBasePath}/vectors` | 벡터 문서 업서트 | `services:ai_vector read` | | `POST` | `{mgmtBasePath}/vectors/search` | 벡터 유사도 검색 | `services:ai_vector read` | +| `POST` | `{mgmtBasePath}/vectors/projections` | 벡터 2D projection 생성 job 요청 | `services:ai_vector admin` | +| `GET` | `{mgmtBasePath}/vectors/projections` | 벡터 projection 목록 조회 | `services:ai_vector read` | +| `GET` | `{mgmtBasePath}/vectors/projections/{projectionId}` | 벡터 projection 상세 조회 | `services:ai_vector read` | +| `GET` | `{mgmtBasePath}/vectors/projections/{projectionId}/points` | 산점도 렌더링용 projection point 조회 | `services:ai_vector admin` | +| `GET` | `{mgmtBasePath}/vectors/items/{vectorItemId}` | 벡터 항목 상세 조회 | `services:ai_vector admin` | +| `POST` | `{mgmtBasePath}/vectors/search-visualization` | 검색어 기반 projection highlight 좌표 조회 | `services:ai_vector admin` | | `POST` | `{mgmtBasePath}/rag/index` | 문서 RAG 인덱싱 | `services:ai_rag read` | | `POST` | `{mgmtBasePath}/rag/search` | RAG 시맨틱 검색 | `services:ai_rag read` | | `GET` | `{mgmtBasePath}/rag/jobs` | RAG 색인 job 목록 조회 | `services:ai_rag read` | @@ -101,6 +107,86 @@ studio: > `studio.ai.endpoints.enabled=false`이면 위 AI web endpoint 전체가 등록되지 않는다. +### Vector Projection Visualization + +관리자 화면에서 기존 `tb_ai_document_chunk` 벡터를 2D 산점도로 표시할 수 있도록 projection API를 제공한다. +원본 embedding 테이블은 변경하지 않고, `tb_ai_vector_projection`과 `tb_ai_vector_projection_point`에 +projection job 상태와 미리 계산된 좌표만 저장한다. 화면 요청 시마다 고차원 벡터를 다시 projection하지 않는다. + +기본 알고리즘은 `PCA`다. v1 구현은 Java 내장 연산으로 PCA 좌표를 계산하고, 후속 UMAP/t-SNE는 +`VectorProjectionGenerator` 구현을 추가해 확장한다. `targetTypes`가 비어 있으면 전체 vector item을 대상으로 한다. +`filters`는 v1에서 metadata equality 조건만 사용하며 null 값은 무시한다. 한 projection job은 최대 +1,000개 vector item, 2,048 embedding dimension까지 처리한다. 더 큰 범위는 `targetTypes`나 metadata filter로 나눠 생성한다. +projection 생성과 point/item/search visualization 조회는 object별 ACL을 행마다 평가하지 않는 corpus-level 관리 API이므로 +`services:ai_vector admin` 권한이 필요하다. + +Projection 생성: + +```http +POST /api/mgmt/ai/vectors/projections +Authorization: Bearer +Content-Type: application/json + +{ + "name": "NCS-과정-청크 벡터맵", + "targetTypes": ["NCS_UNIT", "COURSE", "COURSE_CHUNK"], + "algorithm": "PCA", + "filters": { + "useYn": "Y" + } +} +``` + +응답은 즉시 `REQUESTED`를 반환한다. 서버는 비동기 job에서 `PROCESSING`으로 전환한 뒤 기존 +`tb_ai_document_chunk`의 embedding을 읽어 좌표를 만들고, 기존 point를 삭제 후 재생성한다. +완료 시 `COMPLETED`, 실패 시 `FAILED`와 `errorMessage`를 저장한다. + +```json +{ + "data": { + "projectionId": "proj-20260430010000-a1b2c3d4", + "status": "REQUESTED", + "message": "벡터 시각화 좌표 생성 작업이 요청되었습니다." + } +} +``` + +산점도 point 조회: + +```http +GET /api/mgmt/ai/vectors/projections/{projectionId}/points?targetType=COURSE_CHUNK&keyword=java&limit=2000&offset=0 +Authorization: Bearer +``` + +`limit` 기본값은 2000, 최대값은 5000이다. projection 상태가 `COMPLETED`가 아니면 +`409 Conflict`와 `PROJECTION_NOT_READY` detail을 반환하므로 클라이언트는 목록/상세 API를 polling하다가 +완료 후 point를 요청해야 한다. point 응답은 산점도 렌더링에 필요한 `vectorItemId`, `targetType`, +`sourceId`, `label`, `x`, `y`, `clusterId`, `metadata`를 함께 제공한다. 응답 metadata에서는 +embedding, 원문 text/content, keyword text 같은 대용량/민감 가능성이 큰 필드는 반환하지 않고, +표시용 allowlist metadata만 반환한다. + +검색어 기반 시각화: + +```http +POST /api/mgmt/ai/vectors/search-visualization +Authorization: Bearer +Content-Type: application/json + +{ + "projectionId": "proj-20260430010000-a1b2c3d4", + "query": "자바 백엔드 개발자가 되고 싶어", + "targetTypes": ["NCS_UNIT", "COURSE", "COURSE_CHUNK"], + "topK": 10 +} +``` + +이 API는 기존 `EmbeddingPort`와 `VectorStorePort` 검색을 재사용한다. 검색 결과의 `vectorItemId`와 +projection point를 매칭하고, query 위치는 매칭된 Top-K point 좌표의 평균으로 계산한다. +매칭 point가 없으면 `query.x`, `query.y`는 `null`, `results`는 빈 배열로 200 응답한다. +검색은 선택된 projection의 `targetTypes`와 `filters` 범위를 기준으로 제한하고, 요청 `targetTypes`가 있으면 +projection 범위와 교집합인 type만 대상으로 한다. `query`는 provider 비용과 지연을 제한하기 위해 최대 +2,000자까지 허용한다. + ### RAG Index Job Management RAG 운영 화면은 신규 job API를 사용해 색인 실행 상태, 단계별 로그, 색인된 chunk를 조회할 수 있다. diff --git a/starter/studio-platform-starter-ai-web/build.gradle.kts b/starter/studio-platform-starter-ai-web/build.gradle.kts index 2830ffc3..98650763 100644 --- a/starter/studio-platform-starter-ai-web/build.gradle.kts +++ b/starter/studio-platform-starter-ai-web/build.gradle.kts @@ -23,6 +23,7 @@ dependencies { compileOnly("org.springframework.boot:spring-boot-starter-web") compileOnly("org.springframework.boot:spring-boot-starter-validation") compileOnly("org.springframework.boot:spring-boot-starter-security") + compileOnly("org.springframework:spring-jdbc") compileOnly("com.fasterxml.jackson.core:jackson-databind") implementation("com.github.ben-manes.caffeine:caffeine:${property("caffeineVersion")}") diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/autoconfigure/AiWebAutoConfiguration.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/autoconfigure/AiWebAutoConfiguration.java index 244f4895..db0d8682 100644 --- a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/autoconfigure/AiWebAutoConfiguration.java +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/autoconfigure/AiWebAutoConfiguration.java @@ -16,6 +16,7 @@ import org.springframework.context.annotation.Configuration; import org.springframework.core.env.Environment; import org.springframework.http.converter.json.Jackson2ObjectMapperBuilder; +import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; import org.springframework.lang.Nullable; import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; @@ -28,9 +29,23 @@ import studio.one.platform.ai.core.embedding.EmbeddingPort; import studio.one.platform.ai.core.registry.AiProviderRegistry; import studio.one.platform.ai.core.vector.VectorStorePort; +import studio.one.platform.ai.core.vector.visualization.ExistingVectorItemRepository; +import studio.one.platform.ai.core.vector.visualization.PcaVectorProjectionGenerator; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionGenerator; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionPointRepository; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionRepository; import studio.one.platform.ai.service.pipeline.RagPipelineService; import studio.one.platform.ai.service.pipeline.RagPipelineOptions; import studio.one.platform.ai.service.prompt.PromptRenderer; +import studio.one.platform.ai.service.visualization.DefaultVectorProjectionJobService; +import studio.one.platform.ai.service.visualization.DefaultVectorProjectionService; +import studio.one.platform.ai.service.visualization.DefaultVectorSearchVisualizationService; +import studio.one.platform.ai.service.visualization.JdbcExistingVectorItemRepository; +import studio.one.platform.ai.service.visualization.JdbcVectorProjectionPointRepository; +import studio.one.platform.ai.service.visualization.JdbcVectorProjectionRepository; +import studio.one.platform.ai.service.visualization.VectorProjectionJobService; +import studio.one.platform.ai.service.visualization.VectorProjectionService; +import studio.one.platform.ai.service.visualization.VectorSearchVisualizationService; import studio.one.platform.ai.web.controller.AiWebExceptionHandler; import studio.one.platform.ai.web.controller.AiInfoController; import studio.one.platform.ai.web.controller.ChatController; @@ -42,6 +57,7 @@ import studio.one.platform.ai.web.controller.RagIndexJobController; import studio.one.platform.ai.web.controller.RagIndexJobEndpointSecurity; import studio.one.platform.ai.web.controller.VectorController; +import studio.one.platform.ai.web.controller.VectorVisualizationMgmtController; import studio.one.platform.ai.web.service.ConversationChatService; import studio.one.platform.ai.web.service.InMemoryConversationRepository; import studio.one.platform.ai.web.service.InMemoryChatMemoryStore; @@ -137,6 +153,80 @@ VectorController vectorController( ragPipelineOptions(ragPipelineProperties)); } + @Bean + @ConditionalOnMissingBean + VectorProjectionGenerator vectorProjectionGenerator() { + return new PcaVectorProjectionGenerator(); + } + + @Bean(name = "vectorProjectionExecutor") + @ConditionalOnMissingBean(name = "vectorProjectionExecutor") + Executor vectorProjectionExecutor() { + ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); + executor.setThreadNamePrefix("vector-projection-"); + executor.setCorePoolSize(1); + executor.setMaxPoolSize(2); + executor.setQueueCapacity(10); + executor.initialize(); + return executor; + } + + @Bean + @ConditionalOnBean({VectorProjectionRepository.class, VectorProjectionPointRepository.class, ExistingVectorItemRepository.class}) + @ConditionalOnMissingBean + VectorProjectionJobService vectorProjectionJobService( + VectorProjectionRepository projectionRepository, + VectorProjectionPointRepository pointRepository, + ExistingVectorItemRepository itemRepository, + ObjectProvider generators) { + return new DefaultVectorProjectionJobService( + projectionRepository, + pointRepository, + itemRepository, + generators.orderedStream().toList()); + } + + @Bean + @ConditionalOnBean(VectorProjectionJobService.class) + @ConditionalOnMissingBean + VectorProjectionService vectorProjectionService( + VectorProjectionRepository projectionRepository, + VectorProjectionPointRepository pointRepository, + ExistingVectorItemRepository itemRepository, + VectorProjectionJobService jobService, + @Qualifier("vectorProjectionExecutor") Executor vectorProjectionExecutor) { + return new DefaultVectorProjectionService( + projectionRepository, + pointRepository, + itemRepository, + jobService, + vectorProjectionExecutor); + } + + @Bean + @ConditionalOnBean({EmbeddingPort.class, VectorStorePort.class, VectorProjectionRepository.class, + VectorProjectionPointRepository.class}) + @ConditionalOnMissingBean + VectorSearchVisualizationService vectorSearchVisualizationService( + EmbeddingPort embeddingPort, + VectorStorePort vectorStorePort, + VectorProjectionRepository projectionRepository, + VectorProjectionPointRepository pointRepository) { + return new DefaultVectorSearchVisualizationService( + embeddingPort, + vectorStorePort, + projectionRepository, + pointRepository); + } + + @Bean + @ConditionalOnBean(VectorProjectionService.class) + VectorVisualizationMgmtController vectorVisualizationMgmtController( + VectorProjectionService projectionService, + @Nullable VectorSearchVisualizationService searchVisualizationService) { + return new VectorVisualizationMgmtController(projectionService, searchVisualizationService); + } + @Bean RagController ragController( RagPipelineService ragPipelineService, @@ -229,4 +319,36 @@ AiInfoController aiInfoController( @Nullable VectorStorePort vectorStorePort) { return new AiInfoController(properties, chatProperties, environment, vectorStorePort); } + + @Configuration(proxyBeanMethods = false) + @ConditionalOnClass(NamedParameterJdbcTemplate.class) + static class VectorProjectionJdbcConfiguration { + + @Bean + @ConditionalOnBean(NamedParameterJdbcTemplate.class) + @ConditionalOnMissingBean + ExistingVectorItemRepository existingVectorItemRepository( + NamedParameterJdbcTemplate jdbcTemplate, + ObjectMapper objectMapper) { + return new JdbcExistingVectorItemRepository(jdbcTemplate, objectMapper); + } + + @Bean + @ConditionalOnBean(NamedParameterJdbcTemplate.class) + @ConditionalOnMissingBean + VectorProjectionRepository vectorProjectionRepository( + NamedParameterJdbcTemplate jdbcTemplate, + ObjectMapper objectMapper) { + return new JdbcVectorProjectionRepository(jdbcTemplate, objectMapper); + } + + @Bean + @ConditionalOnBean(NamedParameterJdbcTemplate.class) + @ConditionalOnMissingBean + VectorProjectionPointRepository vectorProjectionPointRepository( + NamedParameterJdbcTemplate jdbcTemplate, + ObjectMapper objectMapper) { + return new JdbcVectorProjectionPointRepository(jdbcTemplate, objectMapper); + } + } } diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/controller/AiWebExceptionHandler.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/controller/AiWebExceptionHandler.java index 240ab80d..6a36c01c 100644 --- a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/controller/AiWebExceptionHandler.java +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/controller/AiWebExceptionHandler.java @@ -17,6 +17,7 @@ ChatController.class, EmbeddingController.class, VectorController.class, + VectorVisualizationMgmtController.class, RagController.class, RagChunkPreviewController.class, RagIndexJobController.class, diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/controller/VectorVisualizationMgmtController.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/controller/VectorVisualizationMgmtController.java new file mode 100644 index 00000000..44003139 --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/controller/VectorVisualizationMgmtController.java @@ -0,0 +1,219 @@ +package studio.one.platform.ai.web.controller; + +import java.util.Locale; + +import jakarta.validation.Valid; + +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.lang.Nullable; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.validation.annotation.Validated; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.server.ResponseStatusException; + +import studio.one.platform.ai.core.vector.visualization.ProjectionAlgorithm; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointPage; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointView; +import studio.one.platform.ai.core.vector.visualization.VectorVisualizationMetadataSanitizer; +import studio.one.platform.ai.core.vector.visualization.VectorItem; +import studio.one.platform.ai.core.vector.visualization.VectorProjection; +import studio.one.platform.ai.service.visualization.VectorProjectionCreateCommand; +import studio.one.platform.ai.service.visualization.VectorProjectionService; +import studio.one.platform.ai.service.visualization.VectorSearchVisualizationCommand; +import studio.one.platform.ai.service.visualization.VectorSearchVisualizationResult; +import studio.one.platform.ai.service.visualization.VectorSearchVisualizationService; +import studio.one.platform.ai.web.dto.visualization.ProjectionCreateRequest; +import studio.one.platform.ai.web.dto.visualization.ProjectionCreateResponse; +import studio.one.platform.ai.web.dto.visualization.ProjectionDetailResponse; +import studio.one.platform.ai.web.dto.visualization.ProjectionListResponse; +import studio.one.platform.ai.web.dto.visualization.ProjectionPointResponse; +import studio.one.platform.ai.web.dto.visualization.ProjectionPointsResponse; +import studio.one.platform.ai.web.dto.visualization.ProjectionSummaryResponse; +import studio.one.platform.ai.web.dto.visualization.VectorItemDetailResponse; +import studio.one.platform.ai.web.dto.visualization.VectorSearchVisualizationRequest; +import studio.one.platform.ai.web.dto.visualization.VectorSearchVisualizationResponse; +import studio.one.platform.constant.PropertyKeys; +import studio.one.platform.web.dto.ApiResponse; + +@RestController +@RequestMapping("${" + PropertyKeys.AI.Endpoints.MGMT_BASE_PATH + ":/api/mgmt/ai}/vectors") +@Validated +public class VectorVisualizationMgmtController { + + private final VectorProjectionService projectionService; + @Nullable + private final VectorSearchVisualizationService searchVisualizationService; + + public VectorVisualizationMgmtController( + VectorProjectionService projectionService, + @Nullable VectorSearchVisualizationService searchVisualizationService) { + this.projectionService = projectionService; + this.searchVisualizationService = searchVisualizationService; + } + + @PostMapping("/projections") + @PreAuthorize("@endpointAuthz.can('services:ai_vector','admin')") + public ResponseEntity> createProjection( + @Valid @RequestBody ProjectionCreateRequest request) { + VectorProjection projection = projectionService.create(new VectorProjectionCreateCommand( + request.name(), + algorithm(request.algorithm()), + request.targetTypes(), + request.filters(), + null)); + return ResponseEntity.ok(ApiResponse.ok(new ProjectionCreateResponse( + projection.projectionId(), + projection.status().name(), + "벡터 시각화 좌표 생성 작업이 요청되었습니다."))); + } + + @GetMapping("/projections") + @PreAuthorize("@endpointAuthz.can('services:ai_vector','read')") + public ResponseEntity> listProjections( + @RequestParam(defaultValue = "50") int limit, + @RequestParam(defaultValue = "0") int offset) { + return ResponseEntity.ok(ApiResponse.ok(new ProjectionListResponse( + projectionService.list(limit, offset).stream() + .map(this::summary) + .toList()))); + } + + @GetMapping("/projections/{projectionId}") + @PreAuthorize("@endpointAuthz.can('services:ai_vector','read')") + public ResponseEntity> projection( + @PathVariable String projectionId) { + return ResponseEntity.ok(ApiResponse.ok(detail(projectionService.get(projectionId)))); + } + + @GetMapping("/projections/{projectionId}/points") + @PreAuthorize("@endpointAuthz.can('services:ai_vector','admin')") + public ResponseEntity> points( + @PathVariable String projectionId, + @RequestParam(required = false) String targetType, + @RequestParam(required = false) String clusterId, + @RequestParam(required = false) String keyword, + @RequestParam(defaultValue = "2000") int limit, + @RequestParam(defaultValue = "0") int offset) { + VectorProjection projection = projectionService.get(projectionId); + ProjectionPointPage page = projectionService.points( + projectionId, + targetType, + clusterId, + keyword, + limit, + offset); + return ResponseEntity.ok(ApiResponse.ok(new ProjectionPointsResponse( + projectionId, + projection.algorithm().name(), + page.totalCount(), + page.items().stream().map(this::point).toList()))); + } + + @GetMapping("/items/{vectorItemId}") + @PreAuthorize("@endpointAuthz.can('services:ai_vector','admin')") + public ResponseEntity> item(@PathVariable String vectorItemId) { + return ResponseEntity.ok(ApiResponse.ok(itemDetail(projectionService.item(vectorItemId)))); + } + + @PostMapping("/search-visualization") + @PreAuthorize("@endpointAuthz.can('services:ai_vector','admin')") + public ResponseEntity> searchVisualization( + @Valid @RequestBody VectorSearchVisualizationRequest request) { + if (searchVisualizationService == null) { + throw new ResponseStatusException(HttpStatus.SERVICE_UNAVAILABLE, "Vector search visualization is not configured"); + } + VectorSearchVisualizationResult result = searchVisualizationService.search(new VectorSearchVisualizationCommand( + request.projectionId(), + request.query(), + request.targetTypes(), + request.topK(), + request.minScore())); + return ResponseEntity.ok(ApiResponse.ok(searchResponse(result))); + } + + private ProjectionAlgorithm algorithm(String value) { + if (value == null || value.isBlank()) { + return ProjectionAlgorithm.PCA; + } + try { + return ProjectionAlgorithm.valueOf(value.trim().toUpperCase(Locale.ROOT)); + } catch (IllegalArgumentException ex) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "UNSUPPORTED_PROJECTION_ALGORITHM", ex); + } + } + + private ProjectionSummaryResponse summary(VectorProjection projection) { + return new ProjectionSummaryResponse( + projection.projectionId(), + projection.name(), + projection.algorithm().name(), + projection.status().name(), + projection.itemCount(), + projection.createdAt(), + projection.completedAt()); + } + + private ProjectionDetailResponse detail(VectorProjection projection) { + return new ProjectionDetailResponse( + projection.projectionId(), + projection.name(), + projection.algorithm().name(), + projection.status().name(), + projection.targetTypes(), + projection.filters(), + projection.itemCount(), + projection.errorMessage(), + projection.createdAt(), + projection.completedAt()); + } + + private ProjectionPointResponse point(ProjectionPointView point) { + return new ProjectionPointResponse( + point.vectorItemId(), + point.targetType(), + point.sourceId(), + point.label(), + point.x(), + point.y(), + point.clusterId(), + point.metadata()); + } + + private VectorItemDetailResponse itemDetail(VectorItem item) { + return new VectorItemDetailResponse( + item.vectorItemId(), + item.targetType(), + item.sourceId(), + item.label(), + item.contentText(), + item.embeddingModel(), + item.embeddingDimension(), + VectorVisualizationMetadataSanitizer.sanitize(item.metadata()), + item.createdAt()); + } + + private VectorSearchVisualizationResponse searchResponse(VectorSearchVisualizationResult result) { + return new VectorSearchVisualizationResponse( + new VectorSearchVisualizationResponse.QueryPoint( + result.query().label(), + result.query().x(), + result.query().y()), + result.results().stream() + .map(point -> new VectorSearchVisualizationResponse.ResultPoint( + point.vectorItemId(), + point.targetType(), + point.sourceId(), + point.label(), + point.x(), + point.y(), + point.similarity())) + .toList()); + } +} diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionCreateRequest.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionCreateRequest.java new file mode 100644 index 00000000..d76845bc --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionCreateRequest.java @@ -0,0 +1,14 @@ +package studio.one.platform.ai.web.dto.visualization; + +import java.util.List; +import java.util.Map; + +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; + +public record ProjectionCreateRequest( + @NotBlank @Size(max = 200) String name, + List targetTypes, + String algorithm, + Map filters) { +} diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionCreateResponse.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionCreateResponse.java new file mode 100644 index 00000000..92c9eced --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionCreateResponse.java @@ -0,0 +1,4 @@ +package studio.one.platform.ai.web.dto.visualization; + +public record ProjectionCreateResponse(String projectionId, String status, String message) { +} diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionDetailResponse.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionDetailResponse.java new file mode 100644 index 00000000..2f23f258 --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionDetailResponse.java @@ -0,0 +1,18 @@ +package studio.one.platform.ai.web.dto.visualization; + +import java.time.Instant; +import java.util.List; +import java.util.Map; + +public record ProjectionDetailResponse( + String projectionId, + String name, + String algorithm, + String status, + List targetTypes, + Map filters, + int itemCount, + String errorMessage, + Instant createdAt, + Instant completedAt) { +} diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionListResponse.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionListResponse.java new file mode 100644 index 00000000..ae8c4844 --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionListResponse.java @@ -0,0 +1,6 @@ +package studio.one.platform.ai.web.dto.visualization; + +import java.util.List; + +public record ProjectionListResponse(List items) { +} diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionPointResponse.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionPointResponse.java new file mode 100644 index 00000000..95ebd35e --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionPointResponse.java @@ -0,0 +1,14 @@ +package studio.one.platform.ai.web.dto.visualization; + +import java.util.Map; + +public record ProjectionPointResponse( + String vectorItemId, + String targetType, + String sourceId, + String label, + double x, + double y, + String clusterId, + Map metadata) { +} diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionPointsResponse.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionPointsResponse.java new file mode 100644 index 00000000..82843ca9 --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionPointsResponse.java @@ -0,0 +1,10 @@ +package studio.one.platform.ai.web.dto.visualization; + +import java.util.List; + +public record ProjectionPointsResponse( + String projectionId, + String algorithm, + long totalCount, + List items) { +} diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionSummaryResponse.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionSummaryResponse.java new file mode 100644 index 00000000..b1c379ae --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/ProjectionSummaryResponse.java @@ -0,0 +1,13 @@ +package studio.one.platform.ai.web.dto.visualization; + +import java.time.Instant; + +public record ProjectionSummaryResponse( + String projectionId, + String name, + String algorithm, + String status, + int itemCount, + Instant createdAt, + Instant completedAt) { +} diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorItemDetailResponse.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorItemDetailResponse.java new file mode 100644 index 00000000..d2808dd1 --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorItemDetailResponse.java @@ -0,0 +1,16 @@ +package studio.one.platform.ai.web.dto.visualization; + +import java.time.Instant; +import java.util.Map; + +public record VectorItemDetailResponse( + String vectorItemId, + String targetType, + String sourceId, + String label, + String text, + String embeddingModel, + Integer dimension, + Map metadata, + Instant createdAt) { +} diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorSearchVisualizationRequest.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorSearchVisualizationRequest.java new file mode 100644 index 00000000..be46057f --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorSearchVisualizationRequest.java @@ -0,0 +1,14 @@ +package studio.one.platform.ai.web.dto.visualization; + +import java.util.List; + +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; + +public record VectorSearchVisualizationRequest( + @NotBlank String projectionId, + @NotBlank @Size(max = 2000) String query, + List targetTypes, + Integer topK, + Double minScore) { +} diff --git a/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorSearchVisualizationResponse.java b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorSearchVisualizationResponse.java new file mode 100644 index 00000000..b456adad --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/main/java/studio/one/platform/ai/web/dto/visualization/VectorSearchVisualizationResponse.java @@ -0,0 +1,19 @@ +package studio.one.platform.ai.web.dto.visualization; + +import java.util.List; + +public record VectorSearchVisualizationResponse(QueryPoint query, List results) { + + public record QueryPoint(String label, Double x, Double y) { + } + + public record ResultPoint( + String vectorItemId, + String targetType, + String sourceId, + String label, + double x, + double y, + Double similarity) { + } +} diff --git a/starter/studio-platform-starter-ai-web/src/test/java/studio/one/platform/ai/web/controller/VectorVisualizationMgmtControllerTest.java b/starter/studio-platform-starter-ai-web/src/test/java/studio/one/platform/ai/web/controller/VectorVisualizationMgmtControllerTest.java new file mode 100644 index 00000000..136c30b8 --- /dev/null +++ b/starter/studio-platform-starter-ai-web/src/test/java/studio/one/platform/ai/web/controller/VectorVisualizationMgmtControllerTest.java @@ -0,0 +1,133 @@ +package studio.one.platform.ai.web.controller; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.time.Instant; +import java.util.List; +import java.util.Map; + +import org.junit.jupiter.api.Test; +import org.springframework.http.HttpStatus; +import org.springframework.web.server.ResponseStatusException; + +import studio.one.platform.ai.core.vector.visualization.ProjectionAlgorithm; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointPage; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointView; +import studio.one.platform.ai.core.vector.visualization.ProjectionStatus; +import studio.one.platform.ai.core.vector.visualization.VectorItem; +import studio.one.platform.ai.core.vector.visualization.VectorProjection; +import studio.one.platform.ai.service.visualization.VectorProjectionService; +import studio.one.platform.ai.service.visualization.VectorSearchVisualizationService; +import studio.one.platform.ai.web.dto.visualization.ProjectionCreateRequest; +import studio.one.platform.ai.web.dto.visualization.VectorSearchVisualizationRequest; + +class VectorVisualizationMgmtControllerTest { + + @Test + void createProjectionDefaultsToPcaAndReturnsRequestedStatus() { + VectorProjectionService projectionService = mock(VectorProjectionService.class); + VectorProjection projection = projection(ProjectionStatus.REQUESTED); + when(projectionService.create(any())).thenReturn(projection); + VectorVisualizationMgmtController controller = new VectorVisualizationMgmtController( + projectionService, + mock(VectorSearchVisualizationService.class)); + + var response = controller.createProjection(new ProjectionCreateRequest( + "NCS map", + List.of("COURSE_CHUNK"), + null, + Map.of("useYn", "Y"))); + + assertThat(response.getBody().getData().projectionId()).isEqualTo("proj-1"); + assertThat(response.getBody().getData().status()).isEqualTo("REQUESTED"); + verify(projectionService).create(any()); + } + + @Test + void pointsReturnsClientOrientedShape() { + VectorProjectionService projectionService = mock(VectorProjectionService.class); + when(projectionService.get("proj-1")).thenReturn(projection(ProjectionStatus.COMPLETED)); + when(projectionService.points("proj-1", "COURSE_CHUNK", null, "java", 2000, 0)) + .thenReturn(new ProjectionPointPage(1, List.of(new ProjectionPointView( + "chunk-1", + "COURSE_CHUNK", + "course-1", + "Java 기본 문법", + 0.1, + 0.2, + null, + Map.of("chunkIndex", 3))))); + VectorVisualizationMgmtController controller = new VectorVisualizationMgmtController( + projectionService, + mock(VectorSearchVisualizationService.class)); + + var response = controller.points("proj-1", "COURSE_CHUNK", null, "java", 2000, 0); + + assertThat(response.getBody().getData().algorithm()).isEqualTo("PCA"); + assertThat(response.getBody().getData().items()).singleElement() + .satisfies(point -> { + assertThat(point.vectorItemId()).isEqualTo("chunk-1"); + assertThat(point.label()).isEqualTo("Java 기본 문법"); + assertThat(point.metadata()).containsEntry("chunkIndex", 3); + }); + } + + @Test + void itemDetailDoesNotReturnEmbeddingMetadata() { + VectorProjectionService projectionService = mock(VectorProjectionService.class); + when(projectionService.item("chunk-1")).thenReturn(new VectorItem( + "chunk-1", + "COURSE_CHUNK", + "course-1", + "label", + "text", + List.of(0.1, 0.2), + "model", + 2, + Map.of("embedding", List.of(0.1, 0.2), "chunkIndex", 1), + Instant.now())); + VectorVisualizationMgmtController controller = new VectorVisualizationMgmtController(projectionService, null); + + var response = controller.item("chunk-1"); + + assertThat(response.getBody().getData().dimension()).isEqualTo(2); + assertThat(response.getBody().getData().metadata()).doesNotContainKey("embedding"); + } + + @Test + void searchVisualizationRequiresConfiguredSearchService() { + VectorVisualizationMgmtController controller = new VectorVisualizationMgmtController( + mock(VectorProjectionService.class), + null); + + assertThatThrownBy(() -> controller.searchVisualization(new VectorSearchVisualizationRequest( + "proj-1", + "java", + List.of(), + 10, + null))) + .isInstanceOf(ResponseStatusException.class) + .extracting("statusCode") + .satisfies(status -> assertThat(status).isEqualTo(HttpStatus.SERVICE_UNAVAILABLE)); + } + + private VectorProjection projection(ProjectionStatus status) { + return new VectorProjection( + "proj-1", + "map", + ProjectionAlgorithm.PCA, + status, + List.of("COURSE_CHUNK"), + Map.of(), + 1, + null, + null, + Instant.parse("2026-04-30T00:00:00Z"), + null); + } +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/adapters/vector/PgVectorStoreAdapterV2.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/adapters/vector/PgVectorStoreAdapterV2.java index 4e93f947..f474decc 100644 --- a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/adapters/vector/PgVectorStoreAdapterV2.java +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/adapters/vector/PgVectorStoreAdapterV2.java @@ -41,11 +41,23 @@ public VectorSearchResult mapRow(ResultSet rs, int rowNum) throws SQLException { String metadataJson = rs.getString("metadata"); double distance = rs.getDouble("distance"); double score = 1.0d / (1.0d + distance); - Map metadata = Json.read(metadataJson); + Map metadata = new HashMap<>(Json.read(metadataJson)); + addRowId(metadata, rs); String documentId = Objects.toString(metadata.getOrDefault("documentId", objectId), objectId); VectorDocument document = new VectorDocument(documentId, content, metadata, List.of()); return new VectorSearchResult(document, score); } + + private void addRowId(Map metadata, ResultSet rs) { + try { + long rowId = rs.getLong("id"); + if (!rs.wasNull()) { + metadata.putIfAbsent("_vectorRowId", "row-" + rowId); + } + } catch (SQLException ignored) { + // Older test SQL/result sets may not expose the physical row id. + } + } }; private static final Pattern ORDER_BY_PATTERN = Pattern.compile("\\border\\s+by\\b", Pattern.CASE_INSENSITIVE); diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionJobService.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionJobService.java new file mode 100644 index 00000000..6e94c549 --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionJobService.java @@ -0,0 +1,75 @@ +package studio.one.platform.ai.service.visualization; + +import java.time.Instant; +import java.util.List; +import java.util.Objects; + +import lombok.extern.slf4j.Slf4j; +import studio.one.platform.ai.core.vector.visualization.ExistingVectorItemRepository; +import studio.one.platform.ai.core.vector.visualization.ProjectionStatus; +import studio.one.platform.ai.core.vector.visualization.VectorItem; +import studio.one.platform.ai.core.vector.visualization.VectorProjection; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionGenerator; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionPoint; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionRepository; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionPointRepository; + +@Slf4j +public class DefaultVectorProjectionJobService implements VectorProjectionJobService { + + private static final int MAX_EMBEDDING_DIMENSIONS = 2_048; + + private final VectorProjectionRepository projectionRepository; + private final VectorProjectionPointRepository pointRepository; + private final ExistingVectorItemRepository itemRepository; + private final List generators; + + public DefaultVectorProjectionJobService( + VectorProjectionRepository projectionRepository, + VectorProjectionPointRepository pointRepository, + ExistingVectorItemRepository itemRepository, + List generators) { + this.projectionRepository = Objects.requireNonNull(projectionRepository, "projectionRepository"); + this.pointRepository = Objects.requireNonNull(pointRepository, "pointRepository"); + this.itemRepository = Objects.requireNonNull(itemRepository, "itemRepository"); + this.generators = List.copyOf(generators); + } + + @Override + public void run(String projectionId) { + projectionRepository.updateStatus(projectionId, ProjectionStatus.PROCESSING, null, null); + try { + VectorProjection projection = projectionRepository.findById(projectionId) + .orElseThrow(() -> new IllegalStateException("Projection not found: " + projectionId)); + VectorProjectionGenerator generator = generatorFor(projection); + List items = itemRepository.findItems(projection.targetTypes(), projection.filters()); + if (items.size() > ExistingVectorItemRepository.DEFAULT_MAX_PROJECTION_ITEMS) { + throw new IllegalStateException("Projection scope is too large. Limit targetTypes or filters."); + } + if (items.stream().anyMatch(item -> item.embedding().size() > MAX_EMBEDDING_DIMENSIONS)) { + throw new IllegalStateException("Projection embedding dimension is too large"); + } + List points = generator.generate(projectionId, items, Instant.now()); + if (items.isEmpty() || points.isEmpty()) { + throw new IllegalStateException("No vector items with embeddings were found"); + } + pointRepository.deleteByProjectionId(projectionId); + pointRepository.saveAll(points); + projectionRepository.markCompleted(projectionId, points.size(), Instant.now()); + } catch (Exception ex) { + log.warn("Vector projection job failed. projectionId={}", projectionId, ex); + projectionRepository.updateStatus( + projectionId, + ProjectionStatus.FAILED, + ex.getMessage(), + Instant.now()); + } + } + + private VectorProjectionGenerator generatorFor(VectorProjection projection) { + return generators.stream() + .filter(generator -> generator.algorithm() == projection.algorithm()) + .findFirst() + .orElseThrow(() -> new IllegalArgumentException("UNSUPPORTED_PROJECTION_ALGORITHM")); + } +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionService.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionService.java new file mode 100644 index 00000000..280e2baa --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionService.java @@ -0,0 +1,144 @@ +package studio.one.platform.ai.service.visualization; + +import java.time.Instant; +import java.time.format.DateTimeFormatter; +import java.util.List; +import java.util.Locale; +import java.util.Objects; +import java.util.UUID; +import java.util.concurrent.Executor; +import java.util.concurrent.RejectedExecutionException; + +import org.springframework.http.HttpStatus; +import org.springframework.web.server.ResponseStatusException; + +import studio.one.platform.ai.core.vector.visualization.ExistingVectorItemRepository; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointPage; +import studio.one.platform.ai.core.vector.visualization.ProjectionStatus; +import studio.one.platform.ai.core.vector.visualization.VectorItem; +import studio.one.platform.ai.core.vector.visualization.VectorProjection; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionRepository; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionPointRepository; + +public class DefaultVectorProjectionService implements VectorProjectionService { + + private static final int MAX_LIMIT = 5_000; + private static final int MAX_NAME_LENGTH = 200; + private static final int MAX_TARGET_TYPES_LENGTH = 500; + + private final VectorProjectionRepository projectionRepository; + private final VectorProjectionPointRepository pointRepository; + private final ExistingVectorItemRepository itemRepository; + private final VectorProjectionJobService jobService; + private final Executor executor; + + public DefaultVectorProjectionService( + VectorProjectionRepository projectionRepository, + VectorProjectionPointRepository pointRepository, + ExistingVectorItemRepository itemRepository, + VectorProjectionJobService jobService, + Executor executor) { + this.projectionRepository = Objects.requireNonNull(projectionRepository, "projectionRepository"); + this.pointRepository = Objects.requireNonNull(pointRepository, "pointRepository"); + this.itemRepository = Objects.requireNonNull(itemRepository, "itemRepository"); + this.jobService = Objects.requireNonNull(jobService, "jobService"); + this.executor = Objects.requireNonNull(executor, "executor"); + } + + @Override + public VectorProjection create(VectorProjectionCreateCommand command) { + String name = normalize(command.name()); + if (name == null) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "name must not be blank"); + } + if (name.length() > MAX_NAME_LENGTH) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "name must be at most " + MAX_NAME_LENGTH + " characters"); + } + List targetTypes = command.targetTypes().stream() + .map(this::normalize) + .filter(Objects::nonNull) + .toList(); + if (String.join(",", targetTypes).length() > MAX_TARGET_TYPES_LENGTH) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, + "targetTypes must fit within " + MAX_TARGET_TYPES_LENGTH + " characters"); + } + VectorProjection projection = VectorProjection.requested( + newProjectionId(), + name, + command.algorithm(), + targetTypes, + command.filters(), + normalize(command.createdBy()), + Instant.now()); + projectionRepository.save(projection); + try { + executor.execute(() -> jobService.run(projection.projectionId())); + } catch (RejectedExecutionException ex) { + projectionRepository.updateStatus( + projection.projectionId(), + ProjectionStatus.FAILED, + "Projection job could not be queued", + Instant.now()); + throw new ResponseStatusException(HttpStatus.SERVICE_UNAVAILABLE, "PROJECTION_JOB_QUEUE_UNAVAILABLE", ex); + } + return projection; + } + + @Override + public List list(int limit, int offset) { + return projectionRepository.findAll(clampLimit(limit), Math.max(0, offset)); + } + + @Override + public VectorProjection get(String projectionId) { + return projectionRepository.findById(projectionId) + .orElseThrow(() -> new ResponseStatusException(HttpStatus.NOT_FOUND, "PROJECTION_NOT_FOUND")); + } + + @Override + public ProjectionPointPage points( + String projectionId, + String targetType, + String clusterId, + String keyword, + int limit, + int offset) { + VectorProjection projection = get(projectionId); + if (projection.status() != ProjectionStatus.COMPLETED) { + throw new ResponseStatusException(HttpStatus.CONFLICT, "PROJECTION_NOT_READY"); + } + return pointRepository.findPage( + projectionId, + normalize(targetType), + normalize(clusterId), + normalize(keyword), + clampLimit(limit), + Math.max(0, offset)); + } + + @Override + public VectorItem item(String vectorItemId) { + return itemRepository.findByVectorItemId(vectorItemId) + .orElseThrow(() -> new ResponseStatusException(HttpStatus.NOT_FOUND, "VECTOR_ITEM_NOT_FOUND")); + } + + private int clampLimit(int limit) { + int effective = limit <= 0 ? 2_000 : limit; + return Math.min(effective, MAX_LIMIT); + } + + private String newProjectionId() { + String timestamp = DateTimeFormatter.ofPattern("yyyyMMddHHmmss", Locale.ROOT) + .withZone(java.time.ZoneOffset.UTC) + .format(Instant.now()); + return "proj-" + timestamp + "-" + UUID.randomUUID().toString().substring(0, 8); + } + + private String normalize(String value) { + if (value == null) { + return null; + } + String trimmed = value.trim(); + return trimmed.isEmpty() ? null : trimmed; + } +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorSearchVisualizationService.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorSearchVisualizationService.java new file mode 100644 index 00000000..8af8c879 --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/DefaultVectorSearchVisualizationService.java @@ -0,0 +1,211 @@ +package studio.one.platform.ai.service.visualization; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import org.springframework.http.HttpStatus; +import org.springframework.web.server.ResponseStatusException; + +import studio.one.platform.ai.core.MetadataFilter; +import studio.one.platform.ai.core.embedding.EmbeddingPort; +import studio.one.platform.ai.core.embedding.EmbeddingRequest; +import studio.one.platform.ai.core.embedding.EmbeddingResponse; +import studio.one.platform.ai.core.vector.VectorSearchHit; +import studio.one.platform.ai.core.vector.VectorSearchRequest; +import studio.one.platform.ai.core.vector.VectorSearchResults; +import studio.one.platform.ai.core.vector.VectorStorePort; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointView; +import studio.one.platform.ai.core.vector.visualization.ProjectionStatus; +import studio.one.platform.ai.core.vector.visualization.VectorProjection; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionPointRepository; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionRepository; + +public class DefaultVectorSearchVisualizationService implements VectorSearchVisualizationService { + + private final EmbeddingPort embeddingPort; + private final VectorStorePort vectorStorePort; + private final VectorProjectionRepository projectionRepository; + private final VectorProjectionPointRepository pointRepository; + + public DefaultVectorSearchVisualizationService( + EmbeddingPort embeddingPort, + VectorStorePort vectorStorePort, + VectorProjectionRepository projectionRepository, + VectorProjectionPointRepository pointRepository) { + this.embeddingPort = Objects.requireNonNull(embeddingPort, "embeddingPort"); + this.vectorStorePort = Objects.requireNonNull(vectorStorePort, "vectorStorePort"); + this.projectionRepository = Objects.requireNonNull(projectionRepository, "projectionRepository"); + this.pointRepository = Objects.requireNonNull(pointRepository, "pointRepository"); + } + + @Override + public VectorSearchVisualizationResult search(VectorSearchVisualizationCommand command) { + String query = normalize(command.query()); + if (query == null) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "query must not be blank"); + } + VectorProjection projection = projectionRepository.findById(command.projectionId()) + .orElseThrow(() -> new ResponseStatusException(HttpStatus.NOT_FOUND, "PROJECTION_NOT_FOUND")); + if (projection.status() != ProjectionStatus.COMPLETED) { + throw new ResponseStatusException(HttpStatus.CONFLICT, "PROJECTION_NOT_READY"); + } + List embedding = embed(query); + int topK = effectiveTopK(command.topK()); + List effectiveTargetTypes = effectiveTargetTypes(projection.targetTypes(), command.targetTypes()); + if (!normalizedDistinct(projection.targetTypes()).isEmpty() + && !normalizedDistinct(command.targetTypes()).isEmpty() + && effectiveTargetTypes.isEmpty()) { + return new VectorSearchVisualizationResult( + new VectorSearchVisualizationResult.QueryPoint(query, null, null), + List.of()); + } + List hits = searchHits( + query, + embedding, + effectiveTargetTypes, + projection.filters(), + topK, + command.minScore()); + Map similarityById = new LinkedHashMap<>(); + List vectorItemIds = new ArrayList<>(); + for (VectorSearchHit hit : hits) { + String vectorItemId = vectorItemId(hit); + if (vectorItemId != null && !similarityById.containsKey(vectorItemId)) { + similarityById.put(vectorItemId, hit.score()); + vectorItemIds.add(vectorItemId); + } + } + Map pointById = new LinkedHashMap<>(); + for (ProjectionPointView point : pointRepository.findByVectorItemIds(projection.projectionId(), vectorItemIds)) { + pointById.put(point.vectorItemId(), point); + } + List results = new ArrayList<>(); + for (String vectorItemId : vectorItemIds) { + ProjectionPointView point = pointById.get(vectorItemId); + if (point == null) { + continue; + } + results.add(new VectorSearchVisualizationResult.ResultPoint( + point.vectorItemId(), + point.targetType(), + point.sourceId(), + point.label(), + point.x(), + point.y(), + similarityById.get(vectorItemId))); + } + Double x = null; + Double y = null; + if (!results.isEmpty()) { + x = results.stream().mapToDouble(VectorSearchVisualizationResult.ResultPoint::x).average().orElse(0.0d); + y = results.stream().mapToDouble(VectorSearchVisualizationResult.ResultPoint::y).average().orElse(0.0d); + } + return new VectorSearchVisualizationResult( + new VectorSearchVisualizationResult.QueryPoint(query, x, y), + results); + } + + private List embed(String query) { + try { + EmbeddingResponse response = embeddingPort.embed(new EmbeddingRequest(List.of(query))); + return response.vectors().get(0).values(); + } catch (RuntimeException ex) { + throw new ResponseStatusException(HttpStatus.INTERNAL_SERVER_ERROR, "EMBEDDING_FAILED", ex); + } + } + + private List searchHits( + String query, + List embedding, + List targetTypes, + Map filters, + int topK, + Double minScore) { + VectorSearchRequest baseRequest = new VectorSearchRequest( + embedding, + query, + topK, + MetadataFilter.of(filters, Map.of(), Map.of()), + minScore, + false, + true); + try { + if (targetTypes == null || targetTypes.isEmpty()) { + return vectorStorePort.searchWithFilter(baseRequest).hits().stream() + .filter(hit -> minScore == null || hit.score() >= minScore) + .limit(topK) + .toList(); + } + return targetTypes.stream() + .filter(value -> value != null && !value.isBlank()) + .flatMap(targetType -> vectorStorePort.searchByObject(targetType.trim(), null, baseRequest).stream()) + .map(result -> VectorSearchHit.from(result, false, true)) + .filter(hit -> minScore == null || hit.score() >= minScore) + .sorted(Comparator.comparingDouble(VectorSearchHit::score).reversed()) + .limit(topK) + .toList(); + } catch (RuntimeException ex) { + throw new ResponseStatusException(HttpStatus.INTERNAL_SERVER_ERROR, "PROJECTION_SEARCH_FAILED", ex); + } + } + + private String vectorItemId(VectorSearchHit hit) { + Object chunkId = hit.metadata().get("chunkId"); + if (chunkId != null && !chunkId.toString().isBlank()) { + return chunkId.toString(); + } + Object rowId = hit.metadata().get("_vectorRowId"); + if (rowId != null && !rowId.toString().isBlank()) { + return rowId.toString(); + } + Object documentId = hit.metadata().get("documentId"); + if (documentId != null && !documentId.toString().isBlank()) { + return documentId.toString(); + } + return hit.id(); + } + + private List effectiveTargetTypes(List projectionTargetTypes, List requestedTargetTypes) { + List projectionTypes = normalizedDistinct(projectionTargetTypes); + List requestedTypes = normalizedDistinct(requestedTargetTypes); + if (projectionTypes.isEmpty()) { + return requestedTypes; + } + if (requestedTypes.isEmpty()) { + return projectionTypes; + } + return requestedTypes.stream() + .filter(projectionTypes::contains) + .toList(); + } + + private List normalizedDistinct(List values) { + if (values == null || values.isEmpty()) { + return List.of(); + } + return values.stream() + .map(this::normalize) + .filter(Objects::nonNull) + .distinct() + .toList(); + } + + private int effectiveTopK(Integer topK) { + if (topK == null) { + return 10; + } + return Math.max(1, Math.min(topK, VectorSearchRequest.MAX_TOP_K)); + } + + private String normalize(String value) { + if (value == null) { + return null; + } + String trimmed = value.trim(); + return trimmed.isEmpty() ? null : trimmed; + } +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcExistingVectorItemRepository.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcExistingVectorItemRepository.java new file mode 100644 index 00000000..5ca34abb --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcExistingVectorItemRepository.java @@ -0,0 +1,236 @@ +package studio.one.platform.ai.service.visualization; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.springframework.jdbc.core.RowMapper; +import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; +import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; +import studio.one.platform.ai.core.vector.VectorRecord; +import studio.one.platform.ai.core.vector.visualization.ExistingVectorItemRepository; +import studio.one.platform.ai.core.vector.visualization.VectorItem; + +public class JdbcExistingVectorItemRepository implements ExistingVectorItemRepository { + + private static final TypeReference> MAP_TYPE = new TypeReference<>() {}; + private static final Pattern NUMBER_PATTERN = Pattern.compile("-?\\d+(?:\\.\\d+)?(?:[eE][+-]?\\d+)?"); + private static final Pattern FILTER_KEY_PATTERN = Pattern.compile("[A-Za-z0-9_.-]+"); + private static final List LABEL_KEYS = List.of( + "sourceName", "title", "filename", "fileName", "name", "headingPath", "sourceRef"); + + private final NamedParameterJdbcTemplate jdbcTemplate; + private final ObjectMapper objectMapper; + private final RowMapper rowMapper = this::mapItem; + private final boolean postgres; + + public JdbcExistingVectorItemRepository(NamedParameterJdbcTemplate jdbcTemplate, ObjectMapper objectMapper) { + this.jdbcTemplate = jdbcTemplate; + this.objectMapper = objectMapper; + this.postgres = JdbcVectorProjectionSql.isPostgres(jdbcTemplate); + } + + @Override + public List findItems(List targetTypes, Map filters) { + MapSqlParameterSource params = new MapSqlParameterSource(); + String targetClause = ""; + if (targetTypes != null && !targetTypes.isEmpty()) { + targetClause = " AND object_type IN (:targetTypes)"; + params.addValue("targetTypes", targetTypes); + } + String filterClause = filterClause(filters, params); + params.addValue("limit", ExistingVectorItemRepository.DEFAULT_MAX_PROJECTION_ITEMS + 1); + List items = jdbcTemplate.query(""" + SELECT id, object_type, object_id, chunk_index, text, embedding, metadata, created_at + FROM tb_ai_document_chunk + WHERE embedding IS NOT NULL + """ + targetClause + filterClause + """ + ORDER BY object_type, object_id, chunk_index, id + LIMIT :limit + """, params, rowMapper); + return items; + } + + @Override + public Optional findByVectorItemId(String vectorItemId) { + return findByVectorItemIds(List.of(vectorItemId)).stream().findFirst(); + } + + @Override + public List findByVectorItemIds(Collection vectorItemIds) { + List ids = vectorItemIds == null ? List.of() : vectorItemIds.stream() + .filter(Objects::nonNull) + .map(String::trim) + .filter(value -> !value.isBlank()) + .distinct() + .toList(); + if (ids.isEmpty()) { + return List.of(); + } + return jdbcTemplate.query(""" + SELECT id, object_type, object_id, chunk_index, text, embedding, metadata, created_at + FROM tb_ai_document_chunk + WHERE """ + jsonText(null, "chunkId") + """ + IN (:ids) + OR """ + rowVectorItemId("id") + """ + IN (:ids) + OR """ + jsonText(null, "documentId") + """ + IN (:ids) + ORDER BY object_type, object_id, chunk_index, id + """, new MapSqlParameterSource("ids", ids), rowMapper); + } + + private VectorItem mapItem(ResultSet rs, int rowNum) throws SQLException { + long rowId = rs.getLong("id"); + String objectType = rs.getString("object_type"); + String objectId = rs.getString("object_id"); + String text = rs.getString("text"); + Map metadata = readJson(rs.getString("metadata")); + metadata.putIfAbsent("_vectorRowId", "row-" + rowId); + metadata.putIfAbsent("objectType", objectType); + metadata.putIfAbsent("objectId", objectId); + metadata.putIfAbsent("chunkIndex", rs.getInt("chunk_index")); + String rowVectorItemId = "row-" + rowId; + String vectorItemId = firstText(metadata, VectorRecord.KEY_CHUNK_ID); + if (vectorItemId == null) { + vectorItemId = rowVectorItemId; + } + String label = label(metadata, objectId); + List embedding = parseEmbedding(rs.getObject("embedding")); + return new VectorItem( + vectorItemId, + objectType, + objectId, + label, + text, + embedding, + firstText(metadata, "embeddingModel"), + integer(metadata.get("embeddingDimension"), embedding.isEmpty() ? null : embedding.size()), + metadata, + instant(rs.getTimestamp("created_at"))); + } + + private String filterClause(Map filters, MapSqlParameterSource params) { + if (filters == null || filters.isEmpty()) { + return ""; + } + StringBuilder clause = new StringBuilder(); + int index = 0; + for (Map.Entry entry : filters.entrySet()) { + Object expected = entry.getValue(); + if (expected == null) { + continue; + } + String key = entry.getKey(); + if (key == null || !FILTER_KEY_PATTERN.matcher(key).matches()) { + continue; + } + String keyParam = "filterKey" + index; + String valueParam = "filterValue" + index; + clause.append(" AND ").append(jsonText(null, ":" + keyParam)).append(" = :").append(valueParam); + params.addValue(keyParam, key); + params.addValue(valueParam, String.valueOf(expected)); + index++; + } + return clause.toString(); + } + + private Map readJson(String value) { + if (value == null || value.isBlank()) { + return new LinkedHashMap<>(); + } + try { + return new LinkedHashMap<>(objectMapper.readValue(value, MAP_TYPE)); + } catch (Exception ex) { + return new LinkedHashMap<>(); + } + } + + private String label(Map metadata, String fallback) { + for (String key : LABEL_KEYS) { + String value = firstText(metadata, key); + if (value != null) { + return value; + } + } + return fallback; + } + + private String firstText(Map metadata, String key) { + Object value = metadata.get(key); + if (value == null) { + return null; + } + String text = value instanceof Iterable iterable ? join(iterable) : value.toString(); + text = text.trim(); + return text.isBlank() ? null : text; + } + + private String join(Iterable values) { + StringBuilder builder = new StringBuilder(); + for (Object value : values) { + if (value == null) { + continue; + } + String text = value.toString().trim(); + if (text.isBlank()) { + continue; + } + if (builder.length() > 0) { + builder.append(" > "); + } + builder.append(text); + } + return builder.toString(); + } + + private Integer integer(Object value, Integer fallback) { + if (value instanceof Number number) { + return number.intValue(); + } + if (value instanceof String text && !text.isBlank()) { + try { + return Integer.valueOf(text.trim()); + } catch (NumberFormatException ignored) { + return fallback; + } + } + return fallback; + } + + private List parseEmbedding(Object value) { + if (value == null) { + return List.of(); + } + String text = value.toString(); + Matcher matcher = NUMBER_PATTERN.matcher(text); + List values = new ArrayList<>(); + while (matcher.find()) { + values.add(Double.valueOf(matcher.group())); + } + return values; + } + + private Instant instant(Timestamp timestamp) { + return timestamp == null ? null : timestamp.toInstant(); + } + + private String jsonText(String alias, String keyExpression) { + return JdbcVectorProjectionSql.jsonText(alias, keyExpression, postgres); + } + + private String rowVectorItemId(String idExpression) { + return JdbcVectorProjectionSql.rowVectorItemId(idExpression, postgres); + } +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionPointRepository.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionPointRepository.java new file mode 100644 index 00000000..986c3719 --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionPointRepository.java @@ -0,0 +1,205 @@ +package studio.one.platform.ai.service.visualization; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import org.springframework.jdbc.core.RowMapper; +import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; +import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointPage; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointView; +import studio.one.platform.ai.core.vector.visualization.VectorVisualizationMetadataSanitizer; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionPoint; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionPointRepository; + +public class JdbcVectorProjectionPointRepository implements VectorProjectionPointRepository { + + private static final TypeReference> MAP_TYPE = new TypeReference<>() {}; + private final NamedParameterJdbcTemplate jdbcTemplate; + private final ObjectMapper objectMapper; + private final RowMapper rowMapper = this::mapView; + private final boolean postgres; + + public JdbcVectorProjectionPointRepository(NamedParameterJdbcTemplate jdbcTemplate, ObjectMapper objectMapper) { + this.jdbcTemplate = jdbcTemplate; + this.objectMapper = objectMapper; + this.postgres = JdbcVectorProjectionSql.isPostgres(jdbcTemplate); + } + + @Override + public void deleteByProjectionId(String projectionId) { + jdbcTemplate.update("DELETE FROM tb_ai_vector_projection_point WHERE projection_id = :projectionId", + new MapSqlParameterSource("projectionId", projectionId)); + } + + @Override + public void saveAll(List points) { + if (points == null || points.isEmpty()) { + return; + } + MapSqlParameterSource[] params = points.stream() + .map(point -> new MapSqlParameterSource() + .addValue("projectionId", point.projectionId()) + .addValue("vectorItemId", point.vectorItemId()) + .addValue("x", point.x()) + .addValue("y", point.y()) + .addValue("clusterId", point.clusterId()) + .addValue("displayOrder", point.displayOrder()) + .addValue("createdAt", point.createdAt() == null ? null : Timestamp.from(point.createdAt()))) + .toArray(MapSqlParameterSource[]::new); + jdbcTemplate.batchUpdate(""" + INSERT INTO tb_ai_vector_projection_point( + projection_id, vector_item_id, x, y, cluster_id, display_order, created_at) + VALUES ( + :projectionId, :vectorItemId, :x, :y, :clusterId, :displayOrder, :createdAt) + """, params); + } + + @Override + public ProjectionPointPage findPage( + String projectionId, + String targetType, + String clusterId, + String keyword, + int limit, + int offset) { + MapSqlParameterSource params = new MapSqlParameterSource() + .addValue("projectionId", projectionId) + .addValue("targetType", targetType) + .addValue("clusterId", clusterId) + .addValue("keyword", keyword == null ? null : "%" + keyword.toLowerCase(Locale.ROOT) + "%") + .addValue("limit", limit) + .addValue("offset", offset); + String where = whereClause(targetType, clusterId, keyword); + Long total = jdbcTemplate.queryForObject(""" + SELECT COUNT(*) + FROM tb_ai_vector_projection_point p + JOIN tb_ai_document_chunk c + ON p.vector_item_id = """ + pointJoinExpression() + """ + WHERE p.projection_id = :projectionId + """ + where, params, Long.class); + List items = jdbcTemplate.query(""" + SELECT p.vector_item_id, c.object_type, c.object_id, c.text, c.metadata, + p.x, p.y, p.cluster_id + FROM tb_ai_vector_projection_point p + JOIN tb_ai_document_chunk c + ON p.vector_item_id = """ + pointJoinExpression() + """ + WHERE p.projection_id = :projectionId + """ + where + """ + ORDER BY """ + JdbcVectorProjectionSql.orderByDisplayOrder(postgres) + """ + LIMIT :limit OFFSET :offset + """, params, rowMapper); + return new ProjectionPointPage(total == null ? 0L : total, items); + } + + @Override + public List findByVectorItemIds(String projectionId, Collection vectorItemIds) { + List ids = vectorItemIds == null ? List.of() : vectorItemIds.stream() + .filter(value -> value != null && !value.isBlank()) + .distinct() + .toList(); + if (ids.isEmpty()) { + return List.of(); + } + return jdbcTemplate.query(""" + SELECT p.vector_item_id, c.object_type, c.object_id, c.text, c.metadata, + p.x, p.y, p.cluster_id + FROM tb_ai_vector_projection_point p + JOIN tb_ai_document_chunk c + ON p.vector_item_id = """ + pointJoinExpression() + """ + WHERE p.projection_id = :projectionId + AND p.vector_item_id IN (:vectorItemIds) + """, new MapSqlParameterSource() + .addValue("projectionId", projectionId) + .addValue("vectorItemIds", ids), rowMapper); + } + + @Override + public java.util.Optional findByVectorItemId(String projectionId, String vectorItemId) { + return findByVectorItemIds(projectionId, List.of(vectorItemId)).stream().findFirst(); + } + + private String whereClause(String targetType, String clusterId, String keyword) { + StringBuilder where = new StringBuilder(); + if (targetType != null) { + where.append(" AND c.object_type = :targetType"); + } + if (clusterId != null) { + where.append(" AND p.cluster_id = :clusterId"); + } + if (keyword != null) { + where.append(""" + AND ( + LOWER(c.object_id) LIKE :keyword + OR LOWER(c.text) LIKE :keyword + OR LOWER(COALESCE(""" + labelExpressions() + """ + , '')) LIKE :keyword + ) + """); + } + return where.toString(); + } + + private ProjectionPointView mapView(ResultSet rs, int rowNum) throws SQLException { + Map metadata = readJson(rs.getString("metadata")); + String label = label(metadata, rs.getString("object_id")); + return new ProjectionPointView( + rs.getString("vector_item_id"), + rs.getString("object_type"), + rs.getString("object_id"), + label, + rs.getDouble("x"), + rs.getDouble("y"), + rs.getString("cluster_id"), + VectorVisualizationMetadataSanitizer.sanitize(metadata)); + } + + private Map readJson(String value) { + if (value == null || value.isBlank()) { + return Map.of(); + } + try { + return objectMapper.readValue(value, MAP_TYPE); + } catch (Exception ex) { + return Map.of(); + } + } + + private String label(Map metadata, String fallback) { + for (String key : List.of("sourceName", "title", "filename", "fileName", "name", "headingPath", "sourceRef")) { + Object value = metadata.get(key); + if (value != null && !value.toString().isBlank()) { + return value.toString(); + } + } + return fallback; + } + + private String pointJoinExpression() { + return "COALESCE(NULLIF(" + jsonText("c", "chunkId") + ",''), " + + JdbcVectorProjectionSql.rowVectorItemId("c.id", postgres) + ")"; + } + + private String labelExpressions() { + return String.join(", ", List.of( + jsonText("c", "sourceName"), + jsonText("c", "title"), + jsonText("c", "filename"), + jsonText("c", "fileName"), + jsonText("c", "name"), + jsonText("c", "headingPath"), + jsonText("c", "sourceRef"))); + } + + private String jsonText(String alias, String key) { + return JdbcVectorProjectionSql.jsonText(alias, key, postgres); + } + +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionRepository.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionRepository.java new file mode 100644 index 00000000..6dbf463c --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionRepository.java @@ -0,0 +1,185 @@ +package studio.one.platform.ai.service.visualization; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.time.Instant; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.springframework.jdbc.core.ConnectionCallback; +import org.springframework.jdbc.core.RowMapper; +import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; +import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; +import studio.one.platform.ai.core.vector.visualization.ProjectionAlgorithm; +import studio.one.platform.ai.core.vector.visualization.ProjectionStatus; +import studio.one.platform.ai.core.vector.visualization.VectorProjection; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionRepository; + +public class JdbcVectorProjectionRepository implements VectorProjectionRepository { + + private static final TypeReference> MAP_TYPE = new TypeReference<>() {}; + + private final NamedParameterJdbcTemplate jdbcTemplate; + private final ObjectMapper objectMapper; + private final RowMapper rowMapper = this::mapProjection; + private final boolean postgres; + + public JdbcVectorProjectionRepository(NamedParameterJdbcTemplate jdbcTemplate, ObjectMapper objectMapper) { + this.jdbcTemplate = jdbcTemplate; + this.objectMapper = objectMapper; + this.postgres = isPostgres(jdbcTemplate); + } + + @Override + public void save(VectorProjection projection) { + String filterExpression = postgres ? "CAST(:filters AS jsonb)" : ":filters"; + jdbcTemplate.update(""" + INSERT INTO tb_ai_vector_projection( + projection_id, name, algorithm, status, target_types, filter_json, + item_count, error_message, created_by, created_at, completed_at) + VALUES ( + :projectionId, :name, :algorithm, :status, :targetTypes, %s, + :itemCount, :errorMessage, :createdBy, :createdAt, :completedAt) + """.formatted(filterExpression), params(projection)); + } + + @Override + public Optional findById(String projectionId) { + List items = jdbcTemplate.query(""" + SELECT projection_id, name, algorithm, status, target_types, filter_json, + item_count, error_message, created_by, created_at, completed_at + FROM tb_ai_vector_projection + WHERE projection_id = :projectionId + """, new MapSqlParameterSource("projectionId", projectionId), rowMapper); + return items.stream().findFirst(); + } + + @Override + public List findAll(int limit, int offset) { + return jdbcTemplate.query(""" + SELECT projection_id, name, algorithm, status, target_types, filter_json, + item_count, error_message, created_by, created_at, completed_at + FROM tb_ai_vector_projection + WHERE status <> 'DELETED' + ORDER BY created_at DESC + LIMIT :limit OFFSET :offset + """, new MapSqlParameterSource() + .addValue("limit", limit) + .addValue("offset", offset), rowMapper); + } + + @Override + public void updateStatus(String projectionId, ProjectionStatus status, String errorMessage, Instant completedAt) { + jdbcTemplate.update(""" + UPDATE tb_ai_vector_projection + SET status = :status, + error_message = :errorMessage, + completed_at = :completedAt + WHERE projection_id = :projectionId + """, new MapSqlParameterSource() + .addValue("projectionId", projectionId) + .addValue("status", status.name()) + .addValue("errorMessage", errorMessage) + .addValue("completedAt", timestamp(completedAt))); + } + + @Override + public void markCompleted(String projectionId, int itemCount, Instant completedAt) { + jdbcTemplate.update(""" + UPDATE tb_ai_vector_projection + SET status = 'COMPLETED', + item_count = :itemCount, + error_message = NULL, + completed_at = :completedAt + WHERE projection_id = :projectionId + """, new MapSqlParameterSource() + .addValue("projectionId", projectionId) + .addValue("itemCount", itemCount) + .addValue("completedAt", timestamp(completedAt))); + } + + private MapSqlParameterSource params(VectorProjection projection) { + return new MapSqlParameterSource() + .addValue("projectionId", projection.projectionId()) + .addValue("name", projection.name()) + .addValue("algorithm", projection.algorithm().name()) + .addValue("status", projection.status().name()) + .addValue("targetTypes", String.join(",", projection.targetTypes())) + .addValue("filters", writeJson(projection.filters())) + .addValue("itemCount", projection.itemCount()) + .addValue("errorMessage", projection.errorMessage()) + .addValue("createdBy", projection.createdBy()) + .addValue("createdAt", timestamp(projection.createdAt())) + .addValue("completedAt", timestamp(projection.completedAt())); + } + + private VectorProjection mapProjection(ResultSet rs, int rowNum) throws SQLException { + return new VectorProjection( + rs.getString("projection_id"), + rs.getString("name"), + ProjectionAlgorithm.valueOf(rs.getString("algorithm")), + ProjectionStatus.valueOf(rs.getString("status")), + readTargetTypes(rs.getString("target_types")), + readJson(rs.getString("filter_json")), + rs.getInt("item_count"), + rs.getString("error_message"), + rs.getString("created_by"), + instant(rs.getTimestamp("created_at")), + instant(rs.getTimestamp("completed_at"))); + } + + private List readTargetTypes(String text) { + if (text == null || text.isBlank()) { + return List.of(); + } + return Arrays.stream(text.split(",")) + .map(String::trim) + .filter(value -> !value.isBlank()) + .toList(); + } + + private String writeJson(Map value) { + try { + return objectMapper.writeValueAsString(value == null ? Map.of() : value); + } catch (Exception ex) { + throw new IllegalArgumentException("Invalid projection filter JSON", ex); + } + } + + private Map readJson(String value) { + if (value == null || value.isBlank()) { + return Map.of(); + } + try { + return objectMapper.readValue(value, MAP_TYPE).entrySet().stream() + .filter(entry -> entry.getValue() != null) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } catch (Exception ex) { + return Map.of(); + } + } + + private Timestamp timestamp(Instant instant) { + return instant == null ? null : Timestamp.from(instant); + } + + private Instant instant(Timestamp timestamp) { + return timestamp == null ? null : timestamp.toInstant(); + } + + private boolean isPostgres(NamedParameterJdbcTemplate template) { + try { + return Boolean.TRUE.equals(template.getJdbcOperations().execute((ConnectionCallback) connection -> { + String productName = connection.getMetaData().getDatabaseProductName(); + return productName != null && productName.toLowerCase(java.util.Locale.ROOT).contains("postgres"); + })); + } catch (RuntimeException ex) { + return true; + } + } +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionSql.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionSql.java new file mode 100644 index 00000000..c445db7d --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/JdbcVectorProjectionSql.java @@ -0,0 +1,48 @@ +package studio.one.platform.ai.service.visualization; + +import java.util.Locale; + +import org.springframework.jdbc.core.ConnectionCallback; +import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; + +final class JdbcVectorProjectionSql { + + private JdbcVectorProjectionSql() { + } + + static boolean isPostgres(NamedParameterJdbcTemplate template) { + try { + return Boolean.TRUE.equals(template.getJdbcOperations().execute((ConnectionCallback) connection -> { + String productName = connection.getMetaData().getDatabaseProductName(); + return productName != null && productName.toLowerCase(Locale.ROOT).contains("postgres"); + })); + } catch (RuntimeException ex) { + return true; + } + } + + static String jsonText(String alias, String keyExpression, boolean postgres) { + String column = alias == null || alias.isBlank() ? "metadata" : alias + ".metadata"; + if (postgres) { + return column + " ->> " + keyExpression; + } + String path = keyExpression.startsWith(":") + ? "CONCAT('$.', " + keyExpression + ")" + : "'$." + keyExpression + "'"; + return "JSON_UNQUOTE(JSON_EXTRACT(" + column + ", " + path + "))"; + } + + static String rowVectorItemId(String idExpression, boolean postgres) { + if (postgres) { + return "'row-' || " + idExpression; + } + return "CONCAT('row-', " + idExpression + ")"; + } + + static String orderByDisplayOrder(boolean postgres) { + if (postgres) { + return "p.display_order NULLS LAST, p.vector_item_id"; + } + return "p.display_order IS NULL, p.display_order, p.vector_item_id"; + } +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionCreateCommand.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionCreateCommand.java new file mode 100644 index 00000000..94a539a2 --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionCreateCommand.java @@ -0,0 +1,19 @@ +package studio.one.platform.ai.service.visualization; + +import java.util.List; +import java.util.Map; + +import studio.one.platform.ai.core.vector.visualization.ProjectionAlgorithm; + +public record VectorProjectionCreateCommand( + String name, + ProjectionAlgorithm algorithm, + List targetTypes, + Map filters, + String createdBy) { + + public VectorProjectionCreateCommand { + targetTypes = targetTypes == null ? List.of() : List.copyOf(targetTypes); + filters = filters == null ? Map.of() : Map.copyOf(filters); + } +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionJobService.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionJobService.java new file mode 100644 index 00000000..455595d7 --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionJobService.java @@ -0,0 +1,6 @@ +package studio.one.platform.ai.service.visualization; + +public interface VectorProjectionJobService { + + void run(String projectionId); +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionService.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionService.java new file mode 100644 index 00000000..f0790f2d --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorProjectionService.java @@ -0,0 +1,20 @@ +package studio.one.platform.ai.service.visualization; + +import java.util.List; + +import studio.one.platform.ai.core.vector.visualization.ProjectionPointPage; +import studio.one.platform.ai.core.vector.visualization.VectorItem; +import studio.one.platform.ai.core.vector.visualization.VectorProjection; + +public interface VectorProjectionService { + + VectorProjection create(VectorProjectionCreateCommand command); + + List list(int limit, int offset); + + VectorProjection get(String projectionId); + + ProjectionPointPage points(String projectionId, String targetType, String clusterId, String keyword, int limit, int offset); + + VectorItem item(String vectorItemId); +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationCommand.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationCommand.java new file mode 100644 index 00000000..d91eb3f1 --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationCommand.java @@ -0,0 +1,15 @@ +package studio.one.platform.ai.service.visualization; + +import java.util.List; + +public record VectorSearchVisualizationCommand( + String projectionId, + String query, + List targetTypes, + Integer topK, + Double minScore) { + + public VectorSearchVisualizationCommand { + targetTypes = targetTypes == null ? List.of() : List.copyOf(targetTypes); + } +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationResult.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationResult.java new file mode 100644 index 00000000..8fd18ba7 --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationResult.java @@ -0,0 +1,23 @@ +package studio.one.platform.ai.service.visualization; + +import java.util.List; + +public record VectorSearchVisualizationResult(QueryPoint query, List results) { + + public VectorSearchVisualizationResult { + results = results == null ? List.of() : List.copyOf(results); + } + + public record QueryPoint(String label, Double x, Double y) { + } + + public record ResultPoint( + String vectorItemId, + String targetType, + String sourceId, + String label, + double x, + double y, + Double similarity) { + } +} diff --git a/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationService.java b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationService.java new file mode 100644 index 00000000..5995cfd6 --- /dev/null +++ b/starter/studio-platform-starter-ai/src/main/java/studio/one/platform/ai/service/visualization/VectorSearchVisualizationService.java @@ -0,0 +1,6 @@ +package studio.one.platform.ai.service.visualization; + +public interface VectorSearchVisualizationService { + + VectorSearchVisualizationResult search(VectorSearchVisualizationCommand command); +} diff --git a/starter/studio-platform-starter-ai/src/test/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionServiceTest.java b/starter/studio-platform-starter-ai/src/test/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionServiceTest.java new file mode 100644 index 00000000..31b63e64 --- /dev/null +++ b/starter/studio-platform-starter-ai/src/test/java/studio/one/platform/ai/service/visualization/DefaultVectorProjectionServiceTest.java @@ -0,0 +1,254 @@ +package studio.one.platform.ai.service.visualization; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.RejectedExecutionException; + +import org.junit.jupiter.api.Test; +import org.springframework.web.server.ResponseStatusException; + +import studio.one.platform.ai.core.vector.visualization.ExistingVectorItemRepository; +import studio.one.platform.ai.core.vector.visualization.ProjectionAlgorithm; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointPage; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointView; +import studio.one.platform.ai.core.vector.visualization.ProjectionStatus; +import studio.one.platform.ai.core.vector.visualization.VectorItem; +import studio.one.platform.ai.core.vector.visualization.VectorProjection; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionGenerator; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionPoint; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionPointRepository; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionRepository; + +class DefaultVectorProjectionServiceTest { + + @Test + void createStoresRequestedProjectionAndRunsJob() { + FakeProjectionRepository projections = new FakeProjectionRepository(); + FakePointRepository points = new FakePointRepository(); + FakeItemRepository items = new FakeItemRepository(List.of(item("chunk-1"))); + DefaultVectorProjectionJobService job = new DefaultVectorProjectionJobService( + projections, + points, + items, + List.of(new VectorProjectionGenerator() { + @Override + public ProjectionAlgorithm algorithm() { + return ProjectionAlgorithm.PCA; + } + + @Override + public List generate(String projectionId, List sourceItems, Instant createdAt) { + return sourceItems.stream() + .map(source -> new VectorProjectionPoint(projectionId, source.vectorItemId(), 0.1, 0.2, null, 0, createdAt)) + .toList(); + } + })); + DefaultVectorProjectionService service = new DefaultVectorProjectionService( + projections, + points, + items, + job, + Runnable::run); + + VectorProjection projection = service.create(new VectorProjectionCreateCommand( + "map", + ProjectionAlgorithm.PCA, + List.of("COURSE_CHUNK"), + Map.of(), + "tester")); + + assertThat(projection.status()).isEqualTo(ProjectionStatus.REQUESTED); + VectorProjection saved = projections.findById(projection.projectionId()).orElseThrow(); + assertThat(saved.status()).isEqualTo(ProjectionStatus.COMPLETED); + assertThat(saved.itemCount()).isEqualTo(1); + assertThat(points.points).hasSize(1); + } + + @Test + void pointsRejectsProjectionThatIsNotCompleted() { + FakeProjectionRepository projections = new FakeProjectionRepository(); + projections.save(VectorProjection.requested( + "proj-1", + "map", + ProjectionAlgorithm.PCA, + List.of(), + Map.of(), + null, + Instant.now())); + DefaultVectorProjectionService service = new DefaultVectorProjectionService( + projections, + new FakePointRepository(), + new FakeItemRepository(List.of()), + projectionId -> {}, + Runnable::run); + + assertThatThrownBy(() -> service.points("proj-1", null, null, null, 2000, 0)) + .isInstanceOf(ResponseStatusException.class) + .hasMessageContaining("PROJECTION_NOT_READY"); + } + + @Test + void createMarksProjectionFailedWhenJobCannotBeQueued() { + FakeProjectionRepository projections = new FakeProjectionRepository(); + FakePointRepository points = new FakePointRepository(); + FakeItemRepository items = new FakeItemRepository(List.of(item("chunk-1"))); + DefaultVectorProjectionService service = new DefaultVectorProjectionService( + projections, + points, + items, + projectionId -> {}, + task -> { + throw new RejectedExecutionException("queue full"); + }); + + assertThatThrownBy(() -> service.create(new VectorProjectionCreateCommand( + "map", + ProjectionAlgorithm.PCA, + List.of("COURSE_CHUNK"), + Map.of(), + "tester"))) + .isInstanceOf(ResponseStatusException.class) + .hasMessageContaining("PROJECTION_JOB_QUEUE_UNAVAILABLE"); + assertThat(projections.projections.values()).singleElement() + .extracting(VectorProjection::status) + .isEqualTo(ProjectionStatus.FAILED); + } + + @Test + void createRejectsTooLongNameBeforePersistence() { + FakeProjectionRepository projections = new FakeProjectionRepository(); + DefaultVectorProjectionService service = new DefaultVectorProjectionService( + projections, + new FakePointRepository(), + new FakeItemRepository(List.of()), + projectionId -> {}, + Runnable::run); + + assertThatThrownBy(() -> service.create(new VectorProjectionCreateCommand( + "x".repeat(201), + ProjectionAlgorithm.PCA, + List.of(), + Map.of(), + "tester"))) + .isInstanceOf(ResponseStatusException.class) + .hasMessageContaining("name must be at most 200 characters"); + assertThat(projections.projections).isEmpty(); + } + + private static VectorItem item(String id) { + return new VectorItem(id, "COURSE_CHUNK", "course-1", "label", "text", List.of(0.1, 0.2), "model", 2, Map.of(), Instant.now()); + } + + private static final class FakeProjectionRepository implements VectorProjectionRepository { + private final Map projections = new LinkedHashMap<>(); + + @Override + public void save(VectorProjection projection) { + projections.put(projection.projectionId(), projection); + } + + @Override + public Optional findById(String projectionId) { + return Optional.ofNullable(projections.get(projectionId)); + } + + @Override + public List findAll(int limit, int offset) { + return projections.values().stream().skip(offset).limit(limit).toList(); + } + + @Override + public void updateStatus(String projectionId, ProjectionStatus status, String errorMessage, Instant completedAt) { + VectorProjection current = projections.get(projectionId); + projections.put(projectionId, new VectorProjection( + current.projectionId(), + current.name(), + current.algorithm(), + status, + current.targetTypes(), + current.filters(), + current.itemCount(), + errorMessage, + current.createdBy(), + current.createdAt(), + completedAt)); + } + + @Override + public void markCompleted(String projectionId, int itemCount, Instant completedAt) { + VectorProjection current = projections.get(projectionId); + projections.put(projectionId, new VectorProjection( + current.projectionId(), + current.name(), + current.algorithm(), + ProjectionStatus.COMPLETED, + current.targetTypes(), + current.filters(), + itemCount, + null, + current.createdBy(), + current.createdAt(), + completedAt)); + } + } + + private static final class FakePointRepository implements VectorProjectionPointRepository { + private final List points = new ArrayList<>(); + + @Override + public void deleteByProjectionId(String projectionId) { + points.removeIf(point -> point.projectionId().equals(projectionId)); + } + + @Override + public void saveAll(List points) { + this.points.addAll(points); + } + + @Override + public ProjectionPointPage findPage(String projectionId, String targetType, String clusterId, String keyword, int limit, int offset) { + return new ProjectionPointPage(points.size(), List.of()); + } + + @Override + public List findByVectorItemIds(String projectionId, Collection vectorItemIds) { + return List.of(); + } + + @Override + public Optional findByVectorItemId(String projectionId, String vectorItemId) { + return Optional.empty(); + } + } + + private static final class FakeItemRepository implements ExistingVectorItemRepository { + private final List items; + + private FakeItemRepository(List items) { + this.items = items; + } + + @Override + public List findItems(List targetTypes, Map filters) { + return items; + } + + @Override + public Optional findByVectorItemId(String vectorItemId) { + return items.stream().filter(item -> item.vectorItemId().equals(vectorItemId)).findFirst(); + } + + @Override + public List findByVectorItemIds(Collection vectorItemIds) { + return items.stream().filter(item -> vectorItemIds.contains(item.vectorItemId())).toList(); + } + } +} diff --git a/starter/studio-platform-starter-ai/src/test/java/studio/one/platform/ai/service/visualization/DefaultVectorSearchVisualizationServiceTest.java b/starter/studio-platform-starter-ai/src/test/java/studio/one/platform/ai/service/visualization/DefaultVectorSearchVisualizationServiceTest.java new file mode 100644 index 00000000..de143ded --- /dev/null +++ b/starter/studio-platform-starter-ai/src/test/java/studio/one/platform/ai/service/visualization/DefaultVectorSearchVisualizationServiceTest.java @@ -0,0 +1,173 @@ +package studio.one.platform.ai.service.visualization; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.time.Instant; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import org.junit.jupiter.api.Test; + +import studio.one.platform.ai.core.embedding.EmbeddingPort; +import studio.one.platform.ai.core.embedding.EmbeddingResponse; +import studio.one.platform.ai.core.embedding.EmbeddingVector; +import studio.one.platform.ai.core.vector.VectorSearchHit; +import studio.one.platform.ai.core.vector.VectorSearchRequest; +import studio.one.platform.ai.core.vector.VectorSearchResults; +import studio.one.platform.ai.core.vector.VectorStorePort; +import studio.one.platform.ai.core.vector.visualization.ProjectionAlgorithm; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointPage; +import studio.one.platform.ai.core.vector.visualization.ProjectionPointView; +import studio.one.platform.ai.core.vector.visualization.ProjectionStatus; +import studio.one.platform.ai.core.vector.visualization.VectorProjection; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionPoint; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionPointRepository; +import studio.one.platform.ai.core.vector.visualization.VectorProjectionRepository; + +class DefaultVectorSearchVisualizationServiceTest { + + @Test + void searchReturnsQueryCentroidAndMatchingProjectionPoints() { + EmbeddingPort embeddingPort = mock(EmbeddingPort.class); + VectorStorePort vectorStorePort = mock(VectorStorePort.class); + VectorProjectionRepository projections = mock(VectorProjectionRepository.class); + VectorProjectionPointRepository points = new FakePointRepository(List.of( + new ProjectionPointView("chunk-1", "COURSE_CHUNK", "course-1", "Java", 0.2, 0.4, null, Map.of()), + new ProjectionPointView("chunk-2", "COURSE_CHUNK", "course-2", "Spring", 0.6, 0.8, null, Map.of()))); + when(projections.findById("proj-1")).thenReturn(Optional.of(projection())); + when(embeddingPort.embed(any())).thenReturn(new EmbeddingResponse(List.of( + new EmbeddingVector("query", List.of(0.1, 0.2))))); + when(vectorStorePort.searchWithFilter(any(VectorSearchRequest.class))).thenReturn(VectorSearchResults.of(List.of( + new VectorSearchHit("chunk-1", "doc-1", "chunk-1", null, null, 0.9, null, null, null, null, null, + Map.of("chunkId", "chunk-1")), + new VectorSearchHit("chunk-2", "doc-2", "chunk-2", null, null, 0.8, null, null, null, null, null, + Map.of("chunkId", "chunk-2"))), 1L)); + DefaultVectorSearchVisualizationService service = new DefaultVectorSearchVisualizationService( + embeddingPort, + vectorStorePort, + projections, + points); + + VectorSearchVisualizationResult result = service.search(new VectorSearchVisualizationCommand( + "proj-1", + "java", + List.of(), + 10, + null)); + + assertThat(result.results()).hasSize(2); + assertThat(result.query().x()).isCloseTo(0.4, org.assertj.core.data.Offset.offset(0.0001)); + assertThat(result.query().y()).isCloseTo(0.6, org.assertj.core.data.Offset.offset(0.0001)); + } + + @Test + void searchReturnsNullQueryPointWhenNoProjectionPointMatches() { + EmbeddingPort embeddingPort = mock(EmbeddingPort.class); + VectorStorePort vectorStorePort = mock(VectorStorePort.class); + VectorProjectionRepository projections = mock(VectorProjectionRepository.class); + when(projections.findById("proj-1")).thenReturn(Optional.of(projection())); + when(embeddingPort.embed(any())).thenReturn(new EmbeddingResponse(List.of( + new EmbeddingVector("query", List.of(0.1, 0.2))))); + when(vectorStorePort.searchWithFilter(any(VectorSearchRequest.class))).thenReturn(VectorSearchResults.of(List.of( + new VectorSearchHit("missing", "doc-1", "missing", null, null, 0.9, null, null, null, null, null, + Map.of("chunkId", "missing"))), 1L)); + DefaultVectorSearchVisualizationService service = new DefaultVectorSearchVisualizationService( + embeddingPort, + vectorStorePort, + projections, + new FakePointRepository(List.of())); + + VectorSearchVisualizationResult result = service.search(new VectorSearchVisualizationCommand( + "proj-1", + "java", + List.of(), + 10, + null)); + + assertThat(result.results()).isEmpty(); + assertThat(result.query().x()).isNull(); + assertThat(result.query().y()).isNull(); + } + + @Test + void searchUsesRowVectorItemIdWhenChunkIdIsAbsent() { + EmbeddingPort embeddingPort = mock(EmbeddingPort.class); + VectorStorePort vectorStorePort = mock(VectorStorePort.class); + VectorProjectionRepository projections = mock(VectorProjectionRepository.class); + VectorProjectionPointRepository points = new FakePointRepository(List.of( + new ProjectionPointView("row-7", "COURSE_CHUNK", "course-1", "Java", 0.2, 0.4, null, Map.of()))); + when(projections.findById("proj-1")).thenReturn(Optional.of(projection())); + when(embeddingPort.embed(any())).thenReturn(new EmbeddingResponse(List.of( + new EmbeddingVector("query", List.of(0.1, 0.2))))); + when(vectorStorePort.searchWithFilter(any(VectorSearchRequest.class))).thenReturn(VectorSearchResults.of(List.of( + new VectorSearchHit("doc-1", "doc-1", "doc-1", null, null, 0.9, null, null, null, null, null, + Map.of("_vectorRowId", "row-7", "documentId", "doc-1"))), 1L)); + DefaultVectorSearchVisualizationService service = new DefaultVectorSearchVisualizationService( + embeddingPort, + vectorStorePort, + projections, + points); + + VectorSearchVisualizationResult result = service.search(new VectorSearchVisualizationCommand( + "proj-1", + "java", + List.of(), + 10, + null)); + + assertThat(result.results()).singleElement() + .extracting(VectorSearchVisualizationResult.ResultPoint::vectorItemId) + .isEqualTo("row-7"); + } + + private VectorProjection projection() { + return new VectorProjection( + "proj-1", + "map", + ProjectionAlgorithm.PCA, + ProjectionStatus.COMPLETED, + List.of(), + Map.of(), + 2, + null, + null, + Instant.now(), + Instant.now()); + } + + private static final class FakePointRepository implements VectorProjectionPointRepository { + private final List points; + + private FakePointRepository(List points) { + this.points = points; + } + + @Override + public void deleteByProjectionId(String projectionId) { + } + + @Override + public void saveAll(List points) { + } + + @Override + public ProjectionPointPage findPage(String projectionId, String targetType, String clusterId, String keyword, int limit, int offset) { + return new ProjectionPointPage(points.size(), points); + } + + @Override + public List findByVectorItemIds(String projectionId, Collection vectorItemIds) { + return points.stream().filter(point -> vectorItemIds.contains(point.vectorItemId())).toList(); + } + + @Override + public Optional findByVectorItemId(String projectionId, String vectorItemId) { + return points.stream().filter(point -> point.vectorItemId().equals(vectorItemId)).findFirst(); + } + } +} diff --git a/studio-platform-ai/README.md b/studio-platform-ai/README.md index d529c944..6a274a29 100644 --- a/studio-platform-ai/README.md +++ b/studio-platform-ai/README.md @@ -158,6 +158,27 @@ Keyword metadata는 trim, blank 제거, case-insensitive 중복 제거를 거친 - `VectorSearchRequest.includeText=false`이면 `VectorSearchHit.text()`는 `null`일 수 있고, `includeMetadata=false`이면 `metadata()`는 empty map일 수 있다. - `VectorStorePort.searchWithFilter(...)`는 filtered-search override를 위한 확장점이며 기본 구현은 `searchRecords(...)`에 위임한다. - `VectorStorePort.existsByContentHash(...)`의 기본 `false`는 미구현 fallback이다. content hash deduplication이 필요한 adapter는 반드시 override해야 한다. + +## Vector projection visualization contract + +관리자 산점도 화면은 원본 embedding 저장소와 2D projection 좌표를 분리해서 사용한다. +기존 원본 벡터 테이블 `tb_ai_document_chunk`는 변경하지 않고, projection 상태와 좌표는 +AI migration `V603__create_vector_projection_tables.sql`이 추가하는 `tb_ai_vector_projection`, +`tb_ai_vector_projection_point`에 저장한다. + +Core 계약은 `studio.one.platform.ai.core.vector.visualization` 패키지에 둔다. + +| 타입 | 설명 | +|---|---| +| `VectorItem` | 기존 벡터 테이블 row를 화면/좌표 생성용 item으로 변환한 모델 | +| `VectorProjection` | projection job 상태, 대상 target type, filter, item count | +| `VectorProjectionPoint` | projection별 2D 좌표 저장 모델 | +| `VectorProjectionGenerator` | PCA/UMAP/t-SNE 같은 projection 알고리즘 확장점 | +| `ExistingVectorItemRepository` | 기존 벡터 테이블을 읽는 adapter 포트 | +| `VectorProjectionRepository` / `VectorProjectionPointRepository` | projection 상태와 좌표 저장소 포트 | + +기본 `PcaVectorProjectionGenerator`는 추가 의존성 없이 PCA 좌표를 계산한다. 화면 API는 원본 embedding 값을 +반환하지 않으며, metadata는 표시용 allowlist로 제한한다. 원문 text는 벡터 항목 상세 조회에서만 제공한다. - 기존 `VectorDocument`와 `VectorSearchResult`는 기존 호출자 호환성을 위해 유지한다. - 새 context assembly 계약은 아직 만들지 않는다. web context 조립은 `starter-ai-web`의 `RagContextBuilder`, chunk 주변 문맥 확장은 `studio-platform-chunking`의 `ChunkContextExpander`를 우선 사용한다. diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ExistingVectorItemRepository.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ExistingVectorItemRepository.java new file mode 100644 index 00000000..51186964 --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ExistingVectorItemRepository.java @@ -0,0 +1,17 @@ +package studio.one.platform.ai.core.vector.visualization; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +public interface ExistingVectorItemRepository { + + int DEFAULT_MAX_PROJECTION_ITEMS = 1_000; + + List findItems(List targetTypes, Map filters); + + Optional findByVectorItemId(String vectorItemId); + + List findByVectorItemIds(Collection vectorItemIds); +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/PcaVectorProjectionGenerator.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/PcaVectorProjectionGenerator.java new file mode 100644 index 00000000..4b344074 --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/PcaVectorProjectionGenerator.java @@ -0,0 +1,167 @@ +package studio.one.platform.ai.core.vector.visualization; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +/** + * Dependency-free PCA projection for management visualization. + */ +public class PcaVectorProjectionGenerator implements VectorProjectionGenerator { + + private static final int POWER_ITERATIONS = 60; + + @Override + public ProjectionAlgorithm algorithm() { + return ProjectionAlgorithm.PCA; + } + + @Override + public List generate(String projectionId, List items, Instant createdAt) { + List usable = items.stream() + .filter(item -> item.embedding() != null && !item.embedding().isEmpty()) + .toList(); + if (usable.isEmpty()) { + return List.of(); + } + int dimensions = usable.stream() + .map(VectorItem::embedding) + .mapToInt(List::size) + .min() + .orElse(0); + if (dimensions <= 0) { + return List.of(); + } + double[][] centered = centeredMatrix(usable, dimensions); + double[][] covariance = covariance(centered, dimensions); + double[] first = principalComponent(covariance, null); + double[] second = dimensions == 1 ? new double[] {0.0d} : principalComponent(covariance, first); + List coordinates = new ArrayList<>(usable.size()); + for (double[] vector : centered) { + coordinates.add(new double[] {dot(vector, first), dot(vector, second)}); + } + normalizeCoordinates(coordinates); + List points = new ArrayList<>(usable.size()); + for (int i = 0; i < usable.size(); i++) { + VectorItem item = usable.get(i); + double[] coordinate = coordinates.get(i); + points.add(new VectorProjectionPoint( + projectionId, + item.vectorItemId(), + coordinate[0], + coordinate[1], + null, + i, + createdAt)); + } + return points; + } + + private double[][] centeredMatrix(List items, int dimensions) { + double[] means = new double[dimensions]; + for (VectorItem item : items) { + for (int i = 0; i < dimensions; i++) { + means[i] += item.embedding().get(i); + } + } + for (int i = 0; i < dimensions; i++) { + means[i] /= items.size(); + } + double[][] centered = new double[items.size()][dimensions]; + for (int row = 0; row < items.size(); row++) { + List embedding = items.get(row).embedding(); + for (int col = 0; col < dimensions; col++) { + centered[row][col] = embedding.get(col) - means[col]; + } + } + return centered; + } + + private double[][] covariance(double[][] centered, int dimensions) { + double[][] covariance = new double[dimensions][dimensions]; + int divisor = Math.max(1, centered.length - 1); + for (double[] row : centered) { + for (int i = 0; i < dimensions; i++) { + for (int j = i; j < dimensions; j++) { + covariance[i][j] += row[i] * row[j] / divisor; + } + } + } + for (int i = 0; i < dimensions; i++) { + for (int j = 0; j < i; j++) { + covariance[i][j] = covariance[j][i]; + } + } + return covariance; + } + + private double[] principalComponent(double[][] matrix, double[] orthogonalTo) { + int dimensions = matrix.length; + double[] vector = new double[dimensions]; + for (int i = 0; i < dimensions; i++) { + vector[i] = 1.0d / Math.sqrt(dimensions); + } + for (int iteration = 0; iteration < POWER_ITERATIONS; iteration++) { + double[] next = multiply(matrix, vector); + if (orthogonalTo != null) { + subtractProjection(next, orthogonalTo); + } + normalize(next); + vector = next; + } + return vector; + } + + private double[] multiply(double[][] matrix, double[] vector) { + double[] result = new double[vector.length]; + for (int row = 0; row < matrix.length; row++) { + for (int col = 0; col < vector.length; col++) { + result[row] += matrix[row][col] * vector[col]; + } + } + return result; + } + + private void subtractProjection(double[] vector, double[] basis) { + double scale = dot(vector, basis); + for (int i = 0; i < vector.length; i++) { + vector[i] -= scale * basis[i]; + } + } + + private void normalize(double[] vector) { + double norm = Math.sqrt(dot(vector, vector)); + if (norm == 0.0d || Double.isNaN(norm)) { + for (int i = 0; i < vector.length; i++) { + vector[i] = i == 0 ? 1.0d : 0.0d; + } + return; + } + for (int i = 0; i < vector.length; i++) { + vector[i] /= norm; + } + } + + private double dot(double[] left, double[] right) { + double result = 0.0d; + for (int i = 0; i < Math.min(left.length, right.length); i++) { + result += left[i] * right[i]; + } + return result; + } + + private void normalizeCoordinates(List coordinates) { + double maxAbs = coordinates.stream() + .flatMap(values -> List.of(Math.abs(values[0]), Math.abs(values[1])).stream()) + .max(Comparator.naturalOrder()) + .orElse(0.0d); + if (maxAbs <= 0.0d || Double.isNaN(maxAbs)) { + return; + } + for (double[] coordinate : coordinates) { + coordinate[0] /= maxAbs; + coordinate[1] /= maxAbs; + } + } +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionAlgorithm.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionAlgorithm.java new file mode 100644 index 00000000..bb7054b0 --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionAlgorithm.java @@ -0,0 +1,5 @@ +package studio.one.platform.ai.core.vector.visualization; + +public enum ProjectionAlgorithm { + PCA +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionPointPage.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionPointPage.java new file mode 100644 index 00000000..5a6fdca7 --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionPointPage.java @@ -0,0 +1,10 @@ +package studio.one.platform.ai.core.vector.visualization; + +import java.util.List; + +public record ProjectionPointPage(long totalCount, List items) { + + public ProjectionPointPage { + items = items == null ? List.of() : List.copyOf(items); + } +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionPointView.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionPointView.java new file mode 100644 index 00000000..d81cad8e --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionPointView.java @@ -0,0 +1,18 @@ +package studio.one.platform.ai.core.vector.visualization; + +import java.util.Map; + +public record ProjectionPointView( + String vectorItemId, + String targetType, + String sourceId, + String label, + double x, + double y, + String clusterId, + Map metadata) { + + public ProjectionPointView { + metadata = metadata == null ? Map.of() : Map.copyOf(metadata); + } +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionStatus.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionStatus.java new file mode 100644 index 00000000..014f4916 --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/ProjectionStatus.java @@ -0,0 +1,9 @@ +package studio.one.platform.ai.core.vector.visualization; + +public enum ProjectionStatus { + REQUESTED, + PROCESSING, + COMPLETED, + FAILED, + DELETED +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorItem.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorItem.java new file mode 100644 index 00000000..644bf1e2 --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorItem.java @@ -0,0 +1,23 @@ +package studio.one.platform.ai.core.vector.visualization; + +import java.time.Instant; +import java.util.List; +import java.util.Map; + +public record VectorItem( + String vectorItemId, + String targetType, + String sourceId, + String label, + String contentText, + List embedding, + String embeddingModel, + Integer embeddingDimension, + Map metadata, + Instant createdAt) { + + public VectorItem { + metadata = metadata == null ? Map.of() : Map.copyOf(metadata); + embedding = embedding == null ? List.of() : List.copyOf(embedding); + } +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjection.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjection.java new file mode 100644 index 00000000..e62790ec --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjection.java @@ -0,0 +1,46 @@ +package studio.one.platform.ai.core.vector.visualization; + +import java.time.Instant; +import java.util.List; +import java.util.Map; + +public record VectorProjection( + String projectionId, + String name, + ProjectionAlgorithm algorithm, + ProjectionStatus status, + List targetTypes, + Map filters, + int itemCount, + String errorMessage, + String createdBy, + Instant createdAt, + Instant completedAt) { + + public VectorProjection { + targetTypes = targetTypes == null ? List.of() : List.copyOf(targetTypes); + filters = filters == null ? Map.of() : Map.copyOf(filters); + } + + public static VectorProjection requested( + String projectionId, + String name, + ProjectionAlgorithm algorithm, + List targetTypes, + Map filters, + String createdBy, + Instant createdAt) { + return new VectorProjection( + projectionId, + name, + algorithm, + ProjectionStatus.REQUESTED, + targetTypes, + filters, + 0, + null, + createdBy, + createdAt, + null); + } +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionGenerator.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionGenerator.java new file mode 100644 index 00000000..5ca8fce1 --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionGenerator.java @@ -0,0 +1,11 @@ +package studio.one.platform.ai.core.vector.visualization; + +import java.time.Instant; +import java.util.List; + +public interface VectorProjectionGenerator { + + ProjectionAlgorithm algorithm(); + + List generate(String projectionId, List items, Instant createdAt); +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionPoint.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionPoint.java new file mode 100644 index 00000000..0377b270 --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionPoint.java @@ -0,0 +1,13 @@ +package studio.one.platform.ai.core.vector.visualization; + +import java.time.Instant; + +public record VectorProjectionPoint( + String projectionId, + String vectorItemId, + double x, + double y, + String clusterId, + Integer displayOrder, + Instant createdAt) { +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionPointRepository.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionPointRepository.java new file mode 100644 index 00000000..b777f5cc --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionPointRepository.java @@ -0,0 +1,18 @@ +package studio.one.platform.ai.core.vector.visualization; + +import java.util.Collection; +import java.util.List; +import java.util.Optional; + +public interface VectorProjectionPointRepository { + + void deleteByProjectionId(String projectionId); + + void saveAll(List points); + + ProjectionPointPage findPage(String projectionId, String targetType, String clusterId, String keyword, int limit, int offset); + + List findByVectorItemIds(String projectionId, Collection vectorItemIds); + + Optional findByVectorItemId(String projectionId, String vectorItemId); +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionRepository.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionRepository.java new file mode 100644 index 00000000..9f08f101 --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorProjectionRepository.java @@ -0,0 +1,18 @@ +package studio.one.platform.ai.core.vector.visualization; + +import java.time.Instant; +import java.util.List; +import java.util.Optional; + +public interface VectorProjectionRepository { + + void save(VectorProjection projection); + + Optional findById(String projectionId); + + List findAll(int limit, int offset); + + void updateStatus(String projectionId, ProjectionStatus status, String errorMessage, Instant completedAt); + + void markCompleted(String projectionId, int itemCount, Instant completedAt); +} diff --git a/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorVisualizationMetadataSanitizer.java b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorVisualizationMetadataSanitizer.java new file mode 100644 index 00000000..10481700 --- /dev/null +++ b/studio-platform-ai/src/main/java/studio/one/platform/ai/core/vector/visualization/VectorVisualizationMetadataSanitizer.java @@ -0,0 +1,49 @@ +package studio.one.platform.ai.core.vector.visualization; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +public final class VectorVisualizationMetadataSanitizer { + + private static final Set ALLOWED_KEYS = Set.of( + "objectType", + "objectId", + "documentId", + "chunkId", + "chunkIndex", + "chunkOrder", + "chunkType", + "sourceName", + "title", + "filename", + "fileName", + "name", + "headingPath", + "sourceRef", + "page", + "slide", + "sourceFormat", + "embeddingProvider", + "embeddingProfileId", + "embeddingModel", + "embeddingDimension", + "createdAt", + "indexedAt"); + + private VectorVisualizationMetadataSanitizer() { + } + + public static Map sanitize(Map metadata) { + if (metadata == null || metadata.isEmpty()) { + return Map.of(); + } + Map result = new LinkedHashMap<>(); + for (Map.Entry entry : metadata.entrySet()) { + if (ALLOWED_KEYS.contains(entry.getKey())) { + result.put(entry.getKey(), entry.getValue()); + } + } + return result; + } +} diff --git a/studio-platform-ai/src/main/resources/schema/ai/mariadb/V603__create_vector_projection_tables.sql b/studio-platform-ai/src/main/resources/schema/ai/mariadb/V603__create_vector_projection_tables.sql new file mode 100644 index 00000000..ab0ed54c --- /dev/null +++ b/studio-platform-ai/src/main/resources/schema/ai/mariadb/V603__create_vector_projection_tables.sql @@ -0,0 +1,33 @@ +CREATE TABLE IF NOT EXISTS tb_ai_vector_projection ( + projection_id VARCHAR(100) PRIMARY KEY, + name VARCHAR(200) NOT NULL, + algorithm VARCHAR(30) NOT NULL, + status VARCHAR(30) NOT NULL, + target_types VARCHAR(500), + filter_json LONGTEXT, + item_count INT NOT NULL DEFAULT 0, + error_message LONGTEXT, + created_by VARCHAR(100), + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + completed_at TIMESTAMP NULL +); + +CREATE INDEX idx_ai_vector_projection_status + ON tb_ai_vector_projection(status, created_at); + +CREATE TABLE IF NOT EXISTS tb_ai_vector_projection_point ( + projection_id VARCHAR(100) NOT NULL, + vector_item_id VARCHAR(100) NOT NULL, + x DOUBLE NOT NULL, + y DOUBLE NOT NULL, + cluster_id VARCHAR(100), + display_order INT, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (projection_id, vector_item_id) +); + +CREATE INDEX idx_ai_vector_projection_point_order + ON tb_ai_vector_projection_point(projection_id, display_order); + +CREATE INDEX idx_ai_vector_projection_point_cluster + ON tb_ai_vector_projection_point(projection_id, cluster_id); diff --git a/studio-platform-ai/src/main/resources/schema/ai/mysql/V603__create_vector_projection_tables.sql b/studio-platform-ai/src/main/resources/schema/ai/mysql/V603__create_vector_projection_tables.sql new file mode 100644 index 00000000..208efc7a --- /dev/null +++ b/studio-platform-ai/src/main/resources/schema/ai/mysql/V603__create_vector_projection_tables.sql @@ -0,0 +1,33 @@ +CREATE TABLE IF NOT EXISTS tb_ai_vector_projection ( + projection_id VARCHAR(100) PRIMARY KEY, + name VARCHAR(200) NOT NULL, + algorithm VARCHAR(30) NOT NULL, + status VARCHAR(30) NOT NULL, + target_types VARCHAR(500), + filter_json JSON, + item_count INT NOT NULL DEFAULT 0, + error_message LONGTEXT, + created_by VARCHAR(100), + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + completed_at TIMESTAMP NULL +); + +CREATE INDEX idx_ai_vector_projection_status + ON tb_ai_vector_projection(status, created_at); + +CREATE TABLE IF NOT EXISTS tb_ai_vector_projection_point ( + projection_id VARCHAR(100) NOT NULL, + vector_item_id VARCHAR(100) NOT NULL, + x DOUBLE NOT NULL, + y DOUBLE NOT NULL, + cluster_id VARCHAR(100), + display_order INT, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (projection_id, vector_item_id) +); + +CREATE INDEX idx_ai_vector_projection_point_order + ON tb_ai_vector_projection_point(projection_id, display_order); + +CREATE INDEX idx_ai_vector_projection_point_cluster + ON tb_ai_vector_projection_point(projection_id, cluster_id); diff --git a/studio-platform-ai/src/main/resources/schema/ai/postgres/V603__create_vector_projection_tables.sql b/studio-platform-ai/src/main/resources/schema/ai/postgres/V603__create_vector_projection_tables.sql new file mode 100644 index 00000000..39f5cb0b --- /dev/null +++ b/studio-platform-ai/src/main/resources/schema/ai/postgres/V603__create_vector_projection_tables.sql @@ -0,0 +1,33 @@ +CREATE TABLE IF NOT EXISTS tb_ai_vector_projection ( + projection_id VARCHAR(100) PRIMARY KEY, + name VARCHAR(200) NOT NULL, + algorithm VARCHAR(30) NOT NULL, + status VARCHAR(30) NOT NULL, + target_types VARCHAR(500), + filter_json JSONB DEFAULT '{}'::jsonb, + item_count INT NOT NULL DEFAULT 0, + error_message TEXT, + created_by VARCHAR(100), + created_at TIMESTAMP NOT NULL DEFAULT now(), + completed_at TIMESTAMP NULL +); + +CREATE INDEX IF NOT EXISTS idx_ai_vector_projection_status + ON tb_ai_vector_projection(status, created_at DESC); + +CREATE TABLE IF NOT EXISTS tb_ai_vector_projection_point ( + projection_id VARCHAR(100) NOT NULL, + vector_item_id VARCHAR(100) NOT NULL, + x DOUBLE PRECISION NOT NULL, + y DOUBLE PRECISION NOT NULL, + cluster_id VARCHAR(100), + display_order INT, + created_at TIMESTAMP NOT NULL DEFAULT now(), + PRIMARY KEY (projection_id, vector_item_id) +); + +CREATE INDEX IF NOT EXISTS idx_ai_vector_projection_point_order + ON tb_ai_vector_projection_point(projection_id, display_order); + +CREATE INDEX IF NOT EXISTS idx_ai_vector_projection_point_cluster + ON tb_ai_vector_projection_point(projection_id, cluster_id); diff --git a/studio-platform-ai/src/main/resources/sql/ai-sqlset.xml b/studio-platform-ai/src/main/resources/sql/ai-sqlset.xml index 1249bbdb..c648f0bf 100644 --- a/studio-platform-ai/src/main/resources/sql/ai-sqlset.xml +++ b/studio-platform-ai/src/main/resources/sql/ai-sqlset.xml @@ -12,7 +12,7 @@ :vector) AS distance + SELECT id, object_id, text, metadata, (embedding <-> :vector) AS distance FROM tb_ai_document_chunk ORDER BY embedding <-> :vector ASC LIMIT :limit @@ -28,7 +28,7 @@ :vector) AS distance + SELECT id, object_id, text, metadata, (embedding <-> :vector) AS distance FROM tb_ai_document_chunk WHERE (:objectType IS NULL OR object_type = :objectType) AND (:objectId IS NULL OR object_id = :objectId) @@ -39,7 +39,7 @@ :vector) AS distance, ts_rank_cd(to_tsvector('simple', text || ' ' || COALESCE(metadata->>'keywordsText','')), plainto_tsquery(:query)) AS bm25, ((embedding <-> :vector) * :vectorWeight) - (COALESCE(ts_rank_cd(to_tsvector('simple', text || ' ' || COALESCE(metadata->>'keywordsText','')), plainto_tsquery(:query)),0) * :lexicalWeight) AS hybrid @@ -51,7 +51,7 @@ :vector) AS distance, ts_rank_cd(to_tsvector('simple', text || ' ' || COALESCE(metadata->>'keywordsText','')), plainto_tsquery(:query)) AS bm25, ((embedding <-> :vector) * :vectorWeight) - (COALESCE(ts_rank_cd(to_tsvector('simple', text || ' ' || COALESCE(metadata->>'keywordsText','')), plainto_tsquery(:query)),0) * :lexicalWeight) AS hybrid diff --git a/studio-platform-ai/src/test/java/studio/one/platform/ai/core/vector/visualization/PcaVectorProjectionGeneratorTest.java b/studio-platform-ai/src/test/java/studio/one/platform/ai/core/vector/visualization/PcaVectorProjectionGeneratorTest.java new file mode 100644 index 00000000..891a3d5a --- /dev/null +++ b/studio-platform-ai/src/test/java/studio/one/platform/ai/core/vector/visualization/PcaVectorProjectionGeneratorTest.java @@ -0,0 +1,38 @@ +package studio.one.platform.ai.core.vector.visualization; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.time.Instant; +import java.util.List; +import java.util.Map; + +import org.junit.jupiter.api.Test; + +class PcaVectorProjectionGeneratorTest { + + private final PcaVectorProjectionGenerator generator = new PcaVectorProjectionGenerator(); + + @Test + void generateProjectsEmbeddingsToNormalizedPoints() { + List points = generator.generate("proj-1", List.of( + item("a", List.of(1.0, 0.0, 0.0)), + item("b", List.of(0.0, 1.0, 0.0)), + item("c", List.of(0.0, 0.0, 1.0))), Instant.parse("2026-04-30T00:00:00Z")); + + assertThat(points).hasSize(3); + assertThat(points).allSatisfy(point -> { + assertThat(point.projectionId()).isEqualTo("proj-1"); + assertThat(point.x()).isBetween(-1.0, 1.0); + assertThat(point.y()).isBetween(-1.0, 1.0); + }); + } + + @Test + void generateReturnsEmptyWhenNoEmbeddingExists() { + assertThat(generator.generate("proj-1", List.of(item("a", List.of())), Instant.now())).isEmpty(); + } + + private VectorItem item(String id, List embedding) { + return new VectorItem(id, "TYPE", "source", id, "text", embedding, "model", embedding.size(), Map.of(), Instant.now()); + } +}