diff --git a/.github/workflows/onprem-build-nightly.yaml b/.github/workflows/onprem-build-nightly.yaml index 92b7c3c..fbde147 100644 --- a/.github/workflows/onprem-build-nightly.yaml +++ b/.github/workflows/onprem-build-nightly.yaml @@ -77,6 +77,11 @@ jobs: with: context: graphrag-ui/ file: ./graphrag-ui/Dockerfile + # The UI Dockerfile reads VERSION from a named ``repo`` context; + # docker/build-push-action interprets undeclared contexts as image + # refs and tries to pull docker.io/library/repo:latest otherwise. + build-contexts: | + repo=. push: true tags: | ${{ env.IMAGE }} diff --git a/.github/workflows/onprem-build-test.yaml b/.github/workflows/onprem-build-test.yaml index 40d2914..8e1f9b0 100644 --- a/.github/workflows/onprem-build-test.yaml +++ b/.github/workflows/onprem-build-test.yaml @@ -63,6 +63,11 @@ jobs: with: context: graphrag-ui/ file: ./graphrag-ui/Dockerfile + # The UI Dockerfile reads VERSION from a named ``repo`` context; + # docker/build-push-action interprets undeclared contexts as image + # refs and tries to pull docker.io/library/repo:latest otherwise. + build-contexts: | + repo=. push: true tags: | tigergraph/graphrag-ui:${{steps.get-image.outputs.IMAGE}} diff --git a/.github/workflows/onprem-build.yaml b/.github/workflows/onprem-build.yaml index 9fb26a4..d28dfd4 100644 --- a/.github/workflows/onprem-build.yaml +++ b/.github/workflows/onprem-build.yaml @@ -43,7 +43,22 @@ jobs: echo "IMAGE=$IMAGE" >> $GITHUB_OUTPUT VERSION=$(cat VERSION) echo "VERSION=$VERSION" >> $GITHUB_OUTPUT - + + # Only builds triggered from main update the :latest tag. Manual + # rebuilds of older release branches (e.g. release_1.3.1) skip + # :latest so a backport build doesn't overwrite the most recent + # published image. + - name: Decide whether to update :latest + id: latest-check + run: | + if [ "${GITHUB_REF}" = "refs/heads/main" ]; then + echo "push_latest=true" >> $GITHUB_OUTPUT + echo "Building from main — :latest will be updated." + else + echo "push_latest=false" >> $GITHUB_OUTPUT + echo "Not on main (ref=${GITHUB_REF}) — :latest will be left as-is." + fi + - name: Build and push Docker image GraphRAG uses: docker/build-push-action@v5 with: @@ -53,8 +68,8 @@ jobs: tags: | tigergraph/graphrag:${{steps.get-image.outputs.IMAGE}} tigergraph/graphrag:${{steps.get-image.outputs.VERSION}} - tigergraph/graphrag:latest - + ${{ steps.latest-check.outputs.push_latest == 'true' && 'tigergraph/graphrag:latest' || '' }} + - name: Build and push Docker image ECC uses: docker/build-push-action@v5 with: @@ -64,7 +79,7 @@ jobs: tags: | tigergraph/graphrag-ecc:${{steps.get-image.outputs.IMAGE}} tigergraph/graphrag-ecc:${{steps.get-image.outputs.VERSION}} - tigergraph/graphrag-ecc:latest + ${{ steps.latest-check.outputs.push_latest == 'true' && 'tigergraph/graphrag-ecc:latest' || '' }} - name: Build and push Docker image chat-history uses: docker/build-push-action@v5 @@ -75,18 +90,23 @@ jobs: tags: | tigergraph/chat-history:${{steps.get-image.outputs.IMAGE}} tigergraph/chat-history:${{steps.get-image.outputs.VERSION}} - tigergraph/chat-history:latest - + ${{ steps.latest-check.outputs.push_latest == 'true' && 'tigergraph/chat-history:latest' || '' }} + - name: Build and push Docker image graphrag-ui uses: docker/build-push-action@v5 with: context: graphrag-ui/ file: ./graphrag-ui/Dockerfile + # The UI Dockerfile reads VERSION from a named ``repo`` context; + # docker/build-push-action interprets undeclared contexts as image + # refs and tries to pull docker.io/library/repo:latest otherwise. + build-contexts: | + repo=. push: true tags: | tigergraph/graphrag-ui:${{steps.get-image.outputs.IMAGE}} tigergraph/graphrag-ui:${{steps.get-image.outputs.VERSION}} - tigergraph/graphrag-ui:latest + ${{ steps.latest-check.outputs.push_latest == 'true' && 'tigergraph/graphrag-ui:latest' || '' }} # - name: Set SSH key # run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index b4c1d94..a50cf0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog +## [1.4.1] + +### Added +- **Token login** — the sign-in page adds a "Use token login" option with a choice of API Token or Secret, alongside the default username / password. The signed-in username and roles are resolved from TigerGraph after login so the UI shows the real user. +- **Pre-flight upload conflict check** — a new endpoint reports which planned filenames already exist before the bytes are sent. The upload dialog uses it to prompt the user once with the conflicting names and offer Replace or Skip; large files no longer have to cross the wire twice when a collision is hit. + +### Changed +- **Every request authenticates as the signed-in user**, end to end — graph operations, chat history, traces, and knowledge-graph rebuilds all run under the caller's identity (username / password, secret, or API token). +- **TigerGraph token handling is automatic** — an api token is obtained from the caller's credentials only when the database requires one, unless a static api token is configured. The `getToken` config option is no longer needed and is now ignored. +- **Sample documents are visible in the upload dialog after schema extraction.** Earlier, files used for schema extraction landed in a hidden per-request subdirectory and disappeared from the dialog. They now live alongside regular uploads, and overwriting one drops the cached extract so the next ingest sees the new bytes. +- **Schema extraction requires an explicit sample list.** The endpoint no longer treats a missing or empty `filenames` field as "use every JSONL in the temp folder," which silently mixed in stale samples from prior sessions. Callers must name each sample explicitly. +- **One schema extraction at a time per graph.** Concurrent attempts on the same graph are rejected with 409 instead of racing on the shared sample folder. +- **Document Ingestion dialog reflects server-side state on reopen.** Closing the dialog mid-conversion and reopening it no longer leaves the *Ingest* button incorrectly enabled. The dialog asks the server which operation, if any, currently holds the graph lock, and polls until that operation completes — so the button stays disabled, the uploaded files list re-populates, and the next upload doesn't collide with the prior conversion. +- **Conflict prompts use the app's styled dialog** instead of the browser default. Choosing *Cancel* now aborts the upload cleanly — the button and status message reset right away. +- **The vector store recovers without a container restart.** When the initial connection to TigerGraph fails (e.g. cold start, transient network blip), the service used to stay broken until the operator restarted the container — chat connections were rejected silently with WebSocket close 1013. The vector store now retries automatically in the background (10s → 30s → 60s → 120s → 300s backoff), and a new ``POST /ui/admin/retry_embedding_store`` lets superusers force a retry immediately after fixing the underlying issue. +- **Chat stays available when vector search is unavailable.** The chat WebSocket no longer closes hard with 1013 on vector-store failures. Instead it accepts the connection, surfaces a notice to the client, and lets graph-traversal questions answer normally — only questions that genuinely require a vector lookup fail, and they fail gracefully through the synthesizer. +- **PDF ingestion is faster on image-heavy documents.** Image-description workers now run with a larger parallel pool, and tiny decorative images skip the multimodal LLM entirely. On AWS Bedrock deployments the connection pool default is also raised so concurrent describe calls no longer queue behind a 20-connection cap. +- **Image description is tunable per graph or globally.** Two new `graphrag_config` keys — `extract_images` and `min_image_dim_px` — control whether the multimodal LLM is invoked on extracted images and the smallest image dimension that goes to the LLM (smaller images skip the call). Both are editable from the *GraphRAG Configuration* page in the UI, globally or per graph. Disabling does not alter the Image vertex type or loading job, so re-enabling later requires no schema change. The multimodal describe pass now reuses `default_concurrency` instead of a separate knob, so one setting tunes parallelism across the pipeline. +- **Community search falls back to hybrid search when it returns nothing or fails.** Auto-selected community queries that miss (no relevant community summaries) or hit a retriever error are now retried once with hybrid graph-hop search before returning a "couldn't find" answer. Manually-picked community search is unchanged. + +### Removed +- **A configured static `apiToken` no longer overrides per-user credentials.** It is used only for the service's background operations; interactive requests always authenticate as the signed-in user. + ## [1.4.0] ### Added diff --git a/README.md b/README.md index ce4f10d..4e85aff 100644 --- a/README.md +++ b/README.md @@ -423,7 +423,7 @@ For examples of how to ingest documents through the backend API, refer to the ** ## More Detailed Configurations ### DB configuration -Copy the below into `configs/server_config.json` and edit the `hostname` and `getToken` fields to match your database's configuration. If token authentication is enabled in TigerGraph, set `getToken` to `true`. Set the timeout, memory threshold, and thread limit parameters as desired to control how much of the database's resources are consumed when answering a question. +Copy the below into `configs/server_config.json` and edit the `hostname` to match your database's configuration. Token authentication is handled automatically — an api token is obtained from the username/password when the database requires one, unless a static api token is configured. Set the timeout, memory threshold, and thread limit parameters as desired to control how much of the database's resources are consumed when answering a question. ```json { @@ -433,7 +433,6 @@ Copy the below into `configs/server_config.json` and edit the `hostname` and `ge "gsPort": "14240", "username": "tigergraph", "password": "tigergraph", - "getToken": false, "default_timeout": 300, "default_mem_threshold": 5000, "default_thread_limit": 8 @@ -448,9 +447,8 @@ Copy the below into `configs/server_config.json` and edit the `hostname` and `ge | `gsPort` | string | `"14240"` | GSQL port for TigerGraph admin operations. | | `username` | string | `"tigergraph"` | TigerGraph database username. | | `password` | string | `"tigergraph"` | TigerGraph database password. | -| `getToken` | bool | `false` | Set to `true` if token authentication is enabled on TigerGraph. | | `graphname` | string | `""` | Default graph name. Usually left empty (selected at runtime). | -| `apiToken` | string | `""` | Pre-generated API token. If set, token-based auth is used instead of username/password. | +| `apiToken` | string | `""` | Optional pre-generated token for the service's background operations. Interactive requests always authenticate as the signed-in user. | | `default_timeout` | int | `300` | Default query timeout in seconds. | | `default_mem_threshold` | int | `5000` | Memory threshold (MB) for query execution. | | `default_thread_limit` | int | `8` | Max threads for query execution. | @@ -500,6 +498,8 @@ Copy the below code into `configs/server_config.json`. You shouldn’t need to c | `doc_process_switch` | bool | `true` | Enable/disable document processing during knowledge graph build. | | `entity_extraction_switch` | bool | same as `doc_process_switch` | Enable/disable entity extraction during knowledge graph build. | | `community_detection_switch` | bool | same as `entity_extraction_switch` | Enable/disable community detection during knowledge graph build. | +| `extract_images` | bool | `true` | Run the multimodal LLM on images extracted from documents to generate alt-text. Set to `false` to skip the image-description pass entirely — much faster, at the cost of losing image content from retrieval. Configurable per graph. | +| `min_image_dim_px` | int | `100` | Smallest side (in px) an image must have to be sent to the multimodal LLM. Smaller images are tagged "decorative image" without an LLM call. Configurable per graph. | | `load_batch_size` | int | `500` | Batch size for document loading. | | `upsert_delay` | int | `0` | Delay in seconds between loading batches. | | `default_concurrency` | int | `10` | Base concurrency level for parallel processing. Configurable per graph. | @@ -773,12 +773,19 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d "model_kwargs": { "temperature": 0, }, + "boto3_config": { + "max_pool_connections": 50, + "read_timeout": 300, + "retries": 5 + }, "prompt_path": "./common/prompts/aws_bedrock_claude3haiku/" } } } ``` +`boto3_config` is optional (the defaults shown above are also the built-in defaults). The same block can be set on `embedding_service` when using Bedrock embeddings. + #### Ollama ```json @@ -916,8 +923,9 @@ A bad answer at step 4 is rarely fixed by editing the response prompt; usually i | Answers miss context that's clearly in the source | chunks too small or no overlap | raise `chunk_size`; bump `overlap_size` (default 1/8 of `chunk_size`); lower `threshold` (`semantic`) | | Tables / figures get fragmented | wrong chunker for the source | use `markdown` for markdown / docs converted to markdown; use `html` for HTML pages with structure; use `regex` with a custom `pattern` for structured logs | | Cross-section reasoning fails | no overlap | increase `overlap_size` to ~25% of `chunk_size` | +| Long tables get split mid-row and the answer loses column headers | `chunk_size` (default `2048`) is smaller than the table's serialized length | raise `chunker_config.chunk_size` to fit the largest table whole — for table-heavy regulator / industry reports, **`4096`–`8192` is often the right range** | -Default starting point for prose: `chunker: "semantic"`, `threshold: 0.95`, `chunker_config.method: "percentile"`. Move to `markdown` chunker with `chunk_size: 2048` and `overlap_size: 256` if your source is markdown-heavy and table integrity matters. +Default starting point for prose: `chunker: "semantic"`, `threshold: 0.95`, `chunker_config.method: "percentile"`. Move to `markdown` chunker with `chunk_size: 2048` and `overlap_size: 256` if your source is markdown-heavy and table integrity matters. For corpora dominated by large statistical tables (regulatory reports, fiscal yearbooks, multi-year financial summaries), start with `markdown`/`html` chunker and `chunk_size: 8192` so each table stays in one chunk. ### 3. Extraction — make the graph clean before tuning retrieval @@ -985,6 +993,11 @@ When customizing: - **`reuse_embedding: true`** skips re-embedding identical text — major saving on re-ingest of unchanged documents. - **Choose `llm_model` thoughtfully** — entity / relationship extraction tolerates cheaper / faster models (Haiku, Nova-lite, Flash); response synthesis benefits from stronger ones (Sonnet, GPT-4-class). The `multimodal_service` is independent — set it to a vision-capable model only when you actually ingest images. - **`load_batch_size`** and **`upsert_delay`** control ingestion pressure on TigerGraph. Defaults are fine for most loads; lower the batch size if you see write timeouts. +- **Image-description speed.** On image-heavy documents, every image is sent to the multimodal LLM, which dominates ingest time. Tune via `graphrag_config` (global or per graph) — both knobs are also editable from the *GraphRAG Configuration* page in the UI: + - `extract_images` (default `true`) — set to `false` to skip image description entirely. + - `min_image_dim_px` (default `100`) — smaller images are tagged "decorative image" without an LLM call. + - Multimodal calls share the same `default_concurrency` semaphore as the rest of the pipeline — raise it to parallelize more describe calls; lower it if the multimodal provider's rate limit is hit. + - AWS Bedrock users can further tune connection pool sizing via `boto3_config` in `llm_config`. ### 7. A working tuning loop @@ -1024,6 +1037,12 @@ First, make sure that all your LLM service provider configuration files are work docker compose up -d --build ``` +> **Windows developers:** the repo's top-level `configs/nginx.conf` and `configs/server_config.json` are symlinks intended for POSIX shells and don't resolve on Windows. Before running `docker compose up -d` from the repo root, overwrite them with the tutorial copies: +> ```sh +> cp docs/tutorials/configs/nginx.conf configs/nginx.conf +> cp docs/tutorials/configs/server_config.json configs/server_config.json +> ``` + If you want to use Weights And Biases for logging the test results, your WandB API key needs to be set in an environment variable on the host machine. ```sh diff --git a/VERSION b/VERSION index 88c5fb8..347f583 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.4.0 +1.4.1 diff --git a/common/config.py b/common/config.py index dd59ad1..cd51d6a 100644 --- a/common/config.py +++ b/common/config.py @@ -519,6 +519,11 @@ def get_llm_service(service_config: dict) -> LLM_Model: _embedding_store_ready = threading.Event() _embedding_stores: dict = {} _embedding_stores_lock = threading.Lock() +# Serializes default-store init across the background retry loop, the manual +# /ui/admin/retry_embedding_store endpoint, and reset_embedding_store callers +# (db-config reload). Without it two _init_embedding_store threads could run +# concurrently and stomp ``embedding_store`` + ``service_status``. +_embedding_store_init_lock = threading.Lock() service_status["embedding_store"] = { "status": "initializing", "error": "Embedding store is still initializing", @@ -532,6 +537,9 @@ def _build_embedding_store(graphname: str = "") -> TigerGraphEmbeddingStore: ``embedding_service`` for the model) so the result reflects the current config. """ + # A static apiToken stays a service-side credential here; otherwise + # pyTigerGraph mints a REST++ token from the service username/password + # on demand, so no explicit getToken() is needed. conn = TigerGraphConnection( host=db_config.get("hostname", "http://tigergraph"), username=db_config.get("username", "tigergraph"), @@ -541,8 +549,6 @@ def _build_embedding_store(graphname: str = "") -> TigerGraphEmbeddingStore: graphname=graphname or db_config.get("graphname", ""), apiToken=db_config.get("apiToken", ""), ) - if not db_config.get("apiToken") and db_config.get("getToken"): - conn.getToken() store = TigerGraphEmbeddingStore( conn, @@ -559,16 +565,21 @@ def _init_embedding_store(): """Background thread target. Builds the default embedding store without blocking module import — TigerGraph may be slow on first connect, and we don't want app startup to wait on it. + + Serialized via ``_embedding_store_init_lock`` so concurrent calls + (initial startup + background retry loop + manual retry endpoint + + db-config reload) cannot stomp the shared globals. """ global embedding_store - try: - embedding_store = _build_embedding_store() - service_status["embedding_store"] = {"status": "ok", "error": None} - except Exception as e: - service_status["embedding_store"] = {"status": "error", "error": str(e)} - logger.error(f"Failed to initialize embedding store: {e}") - finally: - _embedding_store_ready.set() + with _embedding_store_init_lock: + try: + embedding_store = _build_embedding_store() + service_status["embedding_store"] = {"status": "ok", "error": None} + except Exception as e: + service_status["embedding_store"] = {"status": "error", "error": str(e)} + logger.error(f"Failed to initialize embedding store: {e}") + finally: + _embedding_store_ready.set() def get_embedding_store(graphname: str | None = None, timeout: float = 0): @@ -631,8 +642,43 @@ def reset_embedding_store() -> None: threading.Thread(target=_init_embedding_store, daemon=True).start() +def _retry_embedding_store_loop(): + """Daemon target. While the embedding store is in error state, + retry the build every so often so a transient TigerGraph outage + self-heals without a container restart. Backs off after each + failure (10s → 30s → 60s → 120s → 300s cap). + """ + import time + backoff = [10, 30, 60, 120, 300] + attempt = 0 + while True: + # Wait for the initial one-shot init to complete (success or + # failure) before starting the retry cadence. + _embedding_store_ready.wait() + if service_status["embedding_store"]["status"] != "error": + attempt = 0 + time.sleep(backoff[-1]) + continue + delay = backoff[min(attempt, len(backoff) - 1)] + attempt += 1 + time.sleep(delay) + if service_status["embedding_store"]["status"] != "error": + attempt = 0 + continue + logger.info( + f"Retrying embedding store init (attempt {attempt}, " + f"next backoff {backoff[min(attempt, len(backoff)-1)]}s)…" + ) + _embedding_store_ready.clear() + _init_embedding_store() + if service_status["embedding_store"]["status"] == "ok": + logger.info("Embedding store init recovered.") + attempt = 0 + + if os.getenv("INIT_EMBED_STORE", "true") == "true": threading.Thread(target=_init_embedding_store, daemon=True).start() + threading.Thread(target=_retry_embedding_store_loop, daemon=True).start() def reload_llm_config(new_llm_config: dict = None): diff --git a/common/db/connections.py b/common/db/connections.py index fab87c3..8b0840c 100644 --- a/common/db/connections.py +++ b/common/db/connections.py @@ -120,94 +120,31 @@ def get_db_connection_pwd_manual( return conn def elevate_db_connection_to_token(host, username, password, graphname, async_conn: bool = False) -> TigerGraphConnectionProxy: - # If a pre-existing apiToken is provided in config, use it directly - # and skip the getToken() call to avoid conflicts. - static_token = db_config.get("apiToken", "") - - if static_token: - LogWriter.info("Using pre-configured apiToken from db_config") - if async_conn: - conn = AsyncTigerGraphConnection( - host=host, - username=username, - password=password, - graphname=graphname, - apiToken=static_token, - restppPort=db_config.get("restppPort", "9000"), - gsPort=db_config.get("gsPort", "14240"), - ) - else: - conn = TigerGraphConnection( - host=host, - username=username, - password=password, - graphname=graphname, - apiToken=static_token, - restppPort=db_config.get("restppPort", "9000"), - gsPort=db_config.get("gsPort", "14240"), - ) - return conn - - conn = TigerGraphConnection( - host=host, - username=username, - password=password, - graphname=graphname, - restppPort=db_config.get("restppPort", "9000"), - gsPort=db_config.get("gsPort", "14240") - ) - - if db_config.get("getToken"): - try: - apiToken = conn.getToken()[0] - except HTTPError: - LogWriter.error("Failed to get token") - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Incorrect username or password", - headers={"WWW-Authenticate": "Basic"}, - ) - except TigerGraphException as e: - LogWriter.error(f"Failed to get token: {e}") - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Failed to get token - is the database running?" - ) + # pyTigerGraph determines on its own whether a REST++ token is needed + # and mints one from the username/password when so; we just build the + # connection with the caller's credentials. + if async_conn: + conn = AsyncTigerGraphConnection( + host=host, + username=username, + password=password, + graphname=graphname, + restppPort=db_config.get("restppPort", "9000"), + gsPort=db_config.get("gsPort", "14240") + ) - if async_conn: - conn = AsyncTigerGraphConnection( - host=host, - username=username, - password=password, - graphname=graphname, - apiToken=apiToken, - restppPort=db_config.get("restppPort", "9000"), - gsPort=db_config.get("gsPort", "14240") - ) - else: - conn = TigerGraphConnection( - host=db_config["hostname"], - username=username, - password=password, - graphname=graphname, - apiToken=apiToken, - restppPort=db_config.get("restppPort", "9000"), - gsPort=db_config.get("gsPort", "14240") - ) + # temp fix for path + if conn.restppPort == conn.gsPort and "/restpp" not in conn.restppUrl: + conn.restppUrl = conn.restppUrl+"/restpp" else: - if async_conn: - conn = AsyncTigerGraphConnection( - host=host, - username=username, - password=password, - graphname=graphname, - restppPort=db_config.get("restppPort", "9000"), - gsPort=db_config.get("gsPort", "14240") - ) - - # temp fix for path - if conn.restppPort == conn.gsPort and "/restpp" not in conn.restppUrl: - conn.restppUrl = conn.restppUrl+"/restpp" + conn = TigerGraphConnection( + host=host, + username=username, + password=password, + graphname=graphname, + restppPort=db_config.get("restppPort", "9000"), + gsPort=db_config.get("gsPort", "14240") + ) return conn diff --git a/common/embeddings/embedding_services.py b/common/embeddings/embedding_services.py index 6f170d0..e032c54 100644 --- a/common/embeddings/embedding_services.py +++ b/common/embeddings/embedding_services.py @@ -209,7 +209,7 @@ def __init__(self, config): boto3_config = config.get("boto3_config", {}) client_config = botocore.config.Config( - max_pool_connections=boto3_config.get("max_pool_connections", 20), + max_pool_connections=boto3_config.get("max_pool_connections", 50), read_timeout=boto3_config.get("read_timeout", 300), retries={"max_attempts": boto3_config.get("retries", 5)}, ) diff --git a/common/llm_services/aws_bedrock_service.py b/common/llm_services/aws_bedrock_service.py index ce6056c..4e6096f 100644 --- a/common/llm_services/aws_bedrock_service.py +++ b/common/llm_services/aws_bedrock_service.py @@ -93,7 +93,7 @@ def __init__(self, config): boto3_config = config.get("boto3_config", {}) client_config = botocore.config.Config( - max_pool_connections=boto3_config.get("max_pool_connections", 20), + max_pool_connections=boto3_config.get("max_pool_connections", 50), read_timeout=boto3_config.get("read_timeout", 300), retries={"max_attempts": boto3_config.get("retries", 5)}, ) diff --git a/common/llm_services/base_llm.py b/common/llm_services/base_llm.py index bf24588..e5f04dc 100644 --- a/common/llm_services/base_llm.py +++ b/common/llm_services/base_llm.py @@ -498,6 +498,9 @@ def chatbot_response_prompt(self): - **Tables**: every row, including the header, starts on a new line. - **Output as JSON** — escape characters as needed so the response is valid JSON. Include every field required by the format instructions; set unknown fields to empty. - Treat context keys as citations only when asked; otherwise do NOT include citations in the final answer. +- **Match the question's language.** Write the entire response (titles, bullet labels, prose, numeric formatting) in the same language the user asked in. Keep proper-noun terms (BSI, DeFi, GDP, etc.) in their original script. +- **Quote exact values from the source.** Numbers, units, time periods, and named entities must appear verbatim — do not round, approximate, or translate units. If the source says `10,678億円`, write `10,678億円`, not `about 10 trillion yen`. +- **For comparison or "which is the highest" questions, list each candidate's value before stating the conclusion.** Show the working — do not jump directly to a one-line answer. ## Inputs - **Question**: {question} diff --git a/common/requirements.txt b/common/requirements.txt index f4d5ac6..a52f992 100644 --- a/common/requirements.txt +++ b/common/requirements.txt @@ -140,7 +140,7 @@ python-multipart==0.0.20 python-iso639==2025.2.18 python-magic==0.4.27 pyTigerDriver==1.0.15 -pyTigerGraph>=2.0.3 +pyTigerGraph>=2.0.4 pytz==2025.2 PyYAML==6.0.2 rapidfuzz==3.13.0 diff --git a/common/utils/graph_locks.py b/common/utils/graph_locks.py index 9d09b5b..684a30b 100644 --- a/common/utils/graph_locks.py +++ b/common/utils/graph_locks.py @@ -14,6 +14,10 @@ _graph_locks: Dict[str, threading.Lock] = {} _locks_dict_lock = threading.Lock() +# Records the operation currently holding each graph's lock so the UI can +# reflect long-running work when a dialog remounts. +_current_operations: Dict[str, str] = {} + # Global rebuild lock (only one rebuild at a time across all graphs) # Use asyncio.Lock for async operations _rebuild_lock: Optional[asyncio.Lock] = None @@ -41,36 +45,49 @@ def get_graph_lock(graphname: str) -> threading.Lock: def acquire_graph_lock(graphname: str, operation: str = "operation") -> bool: """ Try to acquire lock for a graph. Returns True if acquired, False if already locked. - + Args: graphname: Name of the graph to lock - operation: Description of the operation (for logging) + operation: Description of the operation (for logging and status reporting) """ lock = get_graph_lock(graphname) acquired = lock.acquire(blocking=False) - + if acquired: + _current_operations[graphname] = operation logger.info(f"Lock acquired for graph '{graphname}' - {operation}") else: logger.warning(f"Lock already held for graph '{graphname}' - {operation} blocked") - + return acquired def release_graph_lock(graphname: str, operation: str = "operation"): """ Release the lock for a graph. - + Args: graphname: Name of the graph to unlock operation: Description of the operation (for logging) """ lock = get_graph_lock(graphname) if lock.locked(): + _current_operations.pop(graphname, None) lock.release() logger.info(f"Lock released for graph '{graphname}' - {operation} completed") +def get_current_operation(graphname: str) -> Optional[str]: + """Return the operation name currently holding ``graphname``'s lock, + or ``None`` if the lock is free. Used by status endpoints so the UI + can reflect long-running work that's still in flight on the server. + """ + lock = _graph_locks.get(graphname) + if lock is None or not lock.locked(): + return None + return _current_operations.get(graphname) + + def raise_if_locked(graphname: str, operation: str = "operation"): """ Try to acquire lock or raise HTTPException with 409 Conflict status. diff --git a/common/utils/image_data_extractor.py b/common/utils/image_data_extractor.py index 575264a..6be929c 100644 --- a/common/utils/image_data_extractor.py +++ b/common/utils/image_data_extractor.py @@ -1,6 +1,7 @@ import base64 import io import logging +import os from langchain_core.messages import HumanMessage, SystemMessage from common.config import get_llm_service, get_multimodal_config @@ -9,6 +10,54 @@ _multimodal_client = None _multimodal_provider = None + +def _graphrag_cfg(graphname=None) -> dict: + try: + from common.config import get_graphrag_config + return get_graphrag_config(graphname) or {} + except Exception: + return {} + + +def should_extract_images(graphname=None) -> bool: + """Whether to run the multimodal LLM on extracted images. Resolved from + per-graph or global ``graphrag_config.extract_images``; defaults to True.""" + cfg = _graphrag_cfg(graphname) + if "extract_images" in cfg: + return bool(cfg["extract_images"]) + return True + + +def min_image_dim_px(graphname=None) -> int: + """Smallest side (in px) an image must have to be sent to the LLM. + Resolved from per-graph or global ``graphrag_config.min_image_dim_px``; + defaults to 100.""" + cfg = _graphrag_cfg(graphname) + try: + return int(cfg.get("min_image_dim_px", 100)) + except (TypeError, ValueError): + return 100 + + +def image_describe_workers(graphname=None) -> int: + """Per-PDF thread-pool size for the multimodal describe pass. + Reuses ``graphrag_config.default_concurrency`` (defaults to 10) so + deployments only tune one concurrency knob.""" + cfg = _graphrag_cfg(graphname) + try: + return max(1, int(cfg.get("default_concurrency", 10))) + except (TypeError, ValueError): + return 10 + + +def is_decorative(description: str) -> bool: + """True when the multimodal LLM signalled the image carries no + retrieval-worthy content. Robust to trailing punctuation / case.""" + if not description: + return False + cleaned = description.strip().lower().rstrip(".").strip() + return cleaned == "decorative image" + def _get_client(): global _multimodal_client, _multimodal_provider if _multimodal_client is None and get_multimodal_config(): @@ -38,7 +87,6 @@ def describe_image_with_llm(file_path): """ try: from PIL import Image as PILImage - import os import time client = _get_client() @@ -67,17 +115,28 @@ def describe_image_with_llm(file_path): "captions, and footnotes; (2) the data and structure of " "any chart, graph, or table — name the chart type, the " "axes / columns, and the values or trend the chart " - "actually shows; (3) the entities, relationships, or " - "process steps in any diagram or flowchart; (4) any logo " - "or branding mark, identified by name. Do NOT describe " - "layout, background color, decorative styling, slide " - "templates, or generic visual impressions — those add " - "no retrieval value. If the image is purely decorative " - "(no text, no data, no diagram), reply with just " - "\"decorative image\" and nothing else. Respond as a " - "SINGLE plain-text paragraph — no markdown headings, no " - "bullet lists, no blank lines. The reply is used " - "verbatim as the alt-text inside `![alt](url)`." + "actually shows. For time-series charts (line / bar / " + "stacked bar with a time-period axis), TRANSCRIBE every " + "(period, value) pair you can read in the format " + "`period: value; period: value; …` — do not summarize " + "the trend in place of the values; (3) the entities, " + "relationships, or process steps in any diagram or " + "flowchart; (4) any logo or branding mark, identified by " + "name. Do NOT describe layout, background color, " + "decorative styling, slide templates, or generic visual " + "impressions — those add no retrieval value. Write the " + "description in the same language as the text inside the " + "image; if the image has no text, infer the document's " + "language from any visible labels, captions, or branding " + "and match that. Default to English only if no language " + "signal is present. EXCEPTION: if the image is purely " + "decorative (no text, no data, no diagram), reply with " + "exactly the English phrase \"decorative image\" " + "(lowercase, no punctuation, no translation) and nothing " + "else — this is a fixed sentinel, never localized. " + "Respond as a SINGLE plain-text paragraph — no markdown " + "headings, no bullet lists, no blank lines. The reply is " + "used verbatim as the alt-text inside `![alt](url)`." ), }, _build_image_content_block(image_base64, "image/jpeg"), diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py index d8df543..4459f60 100644 --- a/common/utils/text_extractors.py +++ b/common/utils/text_extractors.py @@ -236,10 +236,16 @@ def _write_to_jsonl(self, jsonl_file, doc_entries): json_line = json.dumps(doc_data, ensure_ascii=False) f.write(json_line + '\n') - async def _process_folder_async(self, folder_path, graphname, temp_folder, max_concurrent=10): + async def _process_folder_async(self, folder_path, graphname, temp_folder, filenames=None, max_concurrent=10): """ Async version of process_folder for parallel file processing. Creates one JSONL file per input file. + + When *filenames* is supplied, only files whose basename appears + in that list are processed; everything else in the folder is + ignored. This lets a caller (e.g. the sample-doc schema-extraction + flow) reuse a shared upload directory without re-converting + files that belong to a previous request. """ logger.info(f"Processing local folder ASYNC: {folder_path} for graph: {graphname} (max_concurrent={max_concurrent})") @@ -255,10 +261,14 @@ async def _process_folder_async(self, folder_path, graphname, temp_folder, max_c os.makedirs(temp_folder, exist_ok=True) logger.info(f"Saving processed documents to: {temp_folder}") + allowed_basenames = set(filenames) if filenames is not None else None + def safe_walk(path): try: for item in path.iterdir(): - if item.name.startswith(('.', '~', '$')) or 'BROMIUM' in item.name.upper(): + # ``_schema_*`` subdirs hold sample-doc staging + # and must not be re-ingested as regular documents. + if item.name.startswith(('.', '~', '$', '_schema_')) or 'BROMIUM' in item.name.upper(): continue if item.is_file(): yield item @@ -274,6 +284,8 @@ def safe_walk(path): if file_path.is_file(): if file_path.name.startswith(('.', '~', '$')) or 'BROMIUM' in file_path.name.upper(): continue + if allowed_basenames is not None and file_path.name not in allowed_basenames: + continue file_ext = file_path.suffix.lower() if file_ext == '.jsonl': dest = os.path.join(temp_folder, file_path.name) @@ -451,7 +463,15 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None): try: import pymupdf4llm from PIL import Image as PILImage - from common.utils.image_data_extractor import describe_image_with_llm + from common.utils.image_data_extractor import ( + describe_image_with_llm, + image_describe_workers, + is_decorative, + min_image_dim_px, + should_extract_images, + ) + + _is_decorative = is_decorative # Ensure clean slate - remove folder if it exists from failed previous run if image_output_folder.exists(): @@ -527,40 +547,47 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None): "position": 0 }] # Phase 1 — describe + base64-encode every image in parallel. - # Each worker hits Bedrock for the description and reads the - # image off disk, so they're I/O-bound; a small thread pool - # cuts wall-clock proportionally for image-heavy PDFs. - # Markdown mutations stay in phase 2 (next loop) because - # insert_description_by_id / replace_path_with_tg_protocol - # mutate the same shared string and must run in deterministic - # order. Concurrency cap is intentionally small to stay below - # Bedrock's per-account throttle. - image_workers = int(os.environ.get("PDF_IMAGE_CONCURRENCY", "8")) + # Each worker is I/O-bound (one multimodal request + a disk read), + # so a thread pool cuts wall-clock proportionally for image-heavy + # PDFs. Markdown mutations stay in phase 2 because + # insert_description_by_id / replace_path_with_tg_protocol mutate + # the same shared string and must run in deterministic order. + image_workers = image_describe_workers(graphname) + extract_images_enabled = should_extract_images(graphname) + min_dim = min_image_dim_px(graphname) def _describe_and_encode(img_ref: dict) -> dict: """Run on a worker thread. Returns one of: * ``{"ok": True, "img_ref", "description", "image_base64", "width", "height"}`` + * ``{"ok": True, "img_ref", "skip": True}`` for decorative + or too-small images that should be dropped from the JSONL * ``{"ok": False, "img_ref", "error"}`` Never raises. """ try: img_path = Path(img_ref["path"]) - description = describe_image_with_llm(str(img_path)) - pil_image = PILImage.open(img_path) - if pil_image.mode != "RGB": - pil_image = pil_image.convert("RGB") - buffer = io.BytesIO() - pil_image.save(buffer, format="JPEG", quality=95) - image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8") - return { - "ok": True, - "img_ref": img_ref, - "description": description, - "image_base64": image_base64, - "width": pil_image.width, - "height": pil_image.height, - } + with PILImage.open(img_path) as pil_image: + too_small = ( + pil_image.width < min_dim or pil_image.height < min_dim + ) + if not extract_images_enabled or too_small: + return {"ok": True, "skip": True, "img_ref": img_ref} + description = describe_image_with_llm(str(img_path)) + if _is_decorative(description): + return {"ok": True, "skip": True, "img_ref": img_ref} + rgb_image = pil_image if pil_image.mode == "RGB" else pil_image.convert("RGB") + buffer = io.BytesIO() + rgb_image.save(buffer, format="JPEG", quality=95) + image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8") + return { + "ok": True, + "img_ref": img_ref, + "description": description, + "image_base64": image_base64, + "width": pil_image.width, + "height": pil_image.height, + } except Exception as img_error: # noqa: BLE001 — keep going return {"ok": False, "img_ref": img_ref, "error": img_error} @@ -593,6 +620,16 @@ def _describe_and_encode(img_ref: dict) -> dict: ) continue + if d.get("skip"): + skipped_path = img_ref.get("path", "") + if skipped_path: + markdown_content = re.sub( + r'!\[.*?\]\(' + re.escape(skipped_path) + r'\)', + "", + markdown_content, + ) + continue + image_id = img_ref["image_id"] markdown_content = insert_description_by_id( markdown_content, image_id, d["description"] @@ -658,12 +695,29 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None): """ try: from PIL import Image as PILImage - from common.utils.image_data_extractor import describe_image_with_llm + from common.utils.image_data_extractor import ( + describe_image_with_llm, + is_decorative, + min_image_dim_px, + should_extract_images, + ) pil_image = PILImage.open(file_path) - if pil_image.width < 100 or pil_image.height < 100: - pass + min_dim = min_image_dim_px(graphname) + if not should_extract_images(graphname) or ( + pil_image.width < min_dim or pil_image.height < min_dim + ): + logger.info( + f"Skipping standalone image {file_path}: decorative or below " + f"min dimension ({min_dim}px)" + ) + return [] description = describe_image_with_llm(str(Path(file_path).absolute())) + if is_decorative(description): + logger.info( + f"Skipping standalone image {file_path}: LLM marked as decorative" + ) + return [] buffer = io.BytesIO() if pil_image.mode != 'RGB': pil_image = pil_image.convert('RGB') diff --git a/docs/tutorials/configs/server_config.json b/docs/tutorials/configs/server_config.json index da46e28..2ee25f4 100644 --- a/docs/tutorials/configs/server_config.json +++ b/docs/tutorials/configs/server_config.json @@ -3,7 +3,6 @@ "hostname": "http://tigergraph", "restppPort": "14240", "gsPort": "14240", - "getToken": false, "default_timeout": 300, "default_mem_threshold": 5000, "default_thread_limit": 8 diff --git a/docs/tutorials/configs/server_config.json.gemini b/docs/tutorials/configs/server_config.json.gemini index 1e8740e..7a2da90 100644 --- a/docs/tutorials/configs/server_config.json.gemini +++ b/docs/tutorials/configs/server_config.json.gemini @@ -3,7 +3,6 @@ "hostname": "http://tigergraph", "restppPort": "14240", "gsPort": "14240", - "getToken": false, "default_timeout": 300, "default_mem_threshold": 5000, "default_thread_limit": 8 diff --git a/docs/tutorials/configs/server_config.json.openai b/docs/tutorials/configs/server_config.json.openai index da46e28..2ee25f4 100644 --- a/docs/tutorials/configs/server_config.json.openai +++ b/docs/tutorials/configs/server_config.json.openai @@ -3,7 +3,6 @@ "hostname": "http://tigergraph", "restppPort": "14240", "gsPort": "14240", - "getToken": false, "default_timeout": 300, "default_mem_threshold": 5000, "default_thread_limit": 8 diff --git a/graphrag-ui/src/actions/ActionProvider.tsx b/graphrag-ui/src/actions/ActionProvider.tsx index c73c182..196a5b4 100644 --- a/graphrag-ui/src/actions/ActionProvider.tsx +++ b/graphrag-ui/src/actions/ActionProvider.tsx @@ -88,13 +88,20 @@ const ActionProvider: React.FC = ({ ); const { sendMessage, lastMessage, readyState } = useWebSocket(WS_URL, { onOpen: () => { - // Send authentication credentials - const creds = sessionStorage.getItem("creds"); - console.log("Sending credentials, length:", creds ? creds.length : 0); - queryGraphragWs2(creds!); - - // Send RAG pattern - //sendMessage(selectedRagPattern); + // Defensive: the route guard normally ensures ``auth`` is set + // before the chat page mounts, but idle-timeout expiry mid-session + // or a logout from another tab can clear it before the WebSocket + // (re)opens. Without this check we'd send "null" as the auth + // header and the server would close the WebSocket with no + // user-actionable message. + const creds = sessionStorage.getItem("auth"); + if (!creds) { + console.error("No auth credentials available; redirecting to login"); + alert("Your session has expired. Please log in again."); + window.location.href = "/"; + return; + } + queryGraphragWs2(creds); // Send conversation ID (or "new" for new conversation) const conversationId = conversationManager.getCurrentConversationId(); diff --git a/graphrag-ui/src/components/CustomChatMessage.tsx b/graphrag-ui/src/components/CustomChatMessage.tsx index 43937ef..4043c24 100755 --- a/graphrag-ui/src/components/CustomChatMessage.tsx +++ b/graphrag-ui/src/components/CustomChatMessage.tsx @@ -105,7 +105,7 @@ const AuthenticatedImage: FC<{ src: string; alt: string }> = ({ src, alt }) => { const fetchImage = async () => { try { // Get credentials from sessionStorage (same pattern as Interact.tsx and SideMenu.tsx) - const creds = sessionStorage.getItem("creds"); + const creds = sessionStorage.getItem("auth"); if (!creds) { console.error("No credentials found in sessionStorage"); setError(true); @@ -119,7 +119,7 @@ const AuthenticatedImage: FC<{ src: string; alt: string }> = ({ src, alt }) => { // Fetch image with authentication header const response = await fetch(src, { headers: { - Authorization: `Basic ${creds}`, + Authorization: creds!, }, credentials: 'include', // Include credentials in CORS requests }); @@ -259,7 +259,7 @@ export const CustomChatMessage: FC = ({ // HTTPBasic returns 401 + ``WWW-Authenticate: Basic`` and // the browser pops up its native auth dialog. Better to // tell the user to sign in again than to flash that popup. - const creds = sessionStorage.getItem("creds"); + const creds = sessionStorage.getItem("auth"); if (!creds) { await alert("Your session has expired. Please log in again."); return; @@ -270,7 +270,7 @@ export const CustomChatMessage: FC = ({ try { const probe = await fetch(`/ui/trace/${messageId}`, { method: "GET", - headers: { Authorization: `Basic ${creds}` }, + headers: { Authorization: creds! }, }); if (!probe.ok) { await alert("Trace log not found."); diff --git a/graphrag-ui/src/components/Interact.tsx b/graphrag-ui/src/components/Interact.tsx index ae93539..f5fff48 100644 --- a/graphrag-ui/src/components/Interact.tsx +++ b/graphrag-ui/src/components/Interact.tsx @@ -39,14 +39,14 @@ export const Interactions: FC = ({ const canViewTrace = isSuperuser || isGlobalDesigner || isGraphAdmin; const sendFeedback = async (action: Feedback, message: Message) => { - const creds = sessionStorage.getItem("creds"); + const creds = sessionStorage.getItem("auth"); setFeedback(action); message.feedback = action; await fetch(`${GRAPHRAG_URL}/ui/feedback`, { method: "POST", body: JSON.stringify(message), headers: { - Authorization: `Basic ${creds}`, + Authorization: creds!, "Content-Type": "application/json", }, }); diff --git a/graphrag-ui/src/components/Login.tsx b/graphrag-ui/src/components/Login.tsx index d753124..170c712 100644 --- a/graphrag-ui/src/components/Login.tsx +++ b/graphrag-ui/src/components/Login.tsx @@ -1,100 +1,116 @@ "use client"; -import { useContext, createContext, useState, useEffect } from "react"; +import { useState, useEffect } from "react"; import { useNavigate } from "react-router-dom"; import { useTranslation } from "react-i18next"; -import { zodResolver } from "@hookform/resolvers/zod"; -import { useForm } from "react-hook-form"; -import { z } from "zod"; - import { Button } from "@/components/ui/button"; -import { - Form, - FormControl, - FormField, - FormItem, - FormDescription, - FormMessage, -} from "@/components/ui/form"; import { Input } from "@/components/ui/input"; import { LANGUAGES } from "../constants"; -const formSchema = z.object({ - email: z.string().min(2, { - message: "Username must be at least 2 characters.", - }), - password: z.string().min(2, { - message: "Password must be at least 2 characters.", - }), -}); - +// TigerGraph's native sentinel for secret-based auth. pyTigerGraph +// handles this directly when sent as plain username/password. +const SECRET_USERNAME = "__GSQL__secret"; const WS_URL = "/ui/ui-login"; +type TokenType = "apiToken" | "secret"; + +// Style applied to every credential input so Chrome on macOS doesn't +// clip descenders / underscores when rendering long values. +const INPUT_CLIP_FIX: React.CSSProperties = { + WebkitAppearance: "none", + appearance: "none", + lineHeight: "1.5", +}; + +const INPUT_STYLE = "dark:border-[#3D3D3D] h-14 py-3 dark:bg-shadeA"; + export function Login() { const { i18n, t } = useTranslation(); - const [user, setUser] = useState(""); - const [token, setToken] = useState(sessionStorage.getItem("site") || ""); + const [useTokenLogin, setUseTokenLogin] = useState(false); + const [tokenType, setTokenType] = useState("apiToken"); + const [email, setEmail] = useState(""); + const [password, setPassword] = useState(""); + const [tokenValue, setTokenValue] = useState(""); const [hint, setHint] = useState(""); + const [submitting, setSubmitting] = useState(false); const navigate = useNavigate(); useEffect(() => { - const parseStore = JSON.parse(sessionStorage.getItem("site") || "{}"); - setToken(parseStore); - }, []); - - const loginAction = async (data: z.infer) => { - const creds = btoa(`${data.email}:${data.password}`); - const username = data.email; + setHint(""); + }, [useTokenLogin, tokenType, email, password, tokenValue]); + + const loginAction = async (e: React.FormEvent) => { + e.preventDefault(); + + // Build the full Authorization header value. API tokens use + // ``Bearer ``; classic user/pass and TG ``__GSQL__secret`` + // logins use ``Basic ``. Backend resolves the real TG + // identity from SHOW USER and returns it in the response. + let authHeader: string; + let typedUsername = ""; + if (useTokenLogin) { + const value = tokenValue.trim(); + if (value.length < 8) { + setHint( + tokenType === "apiToken" + ? "Please enter an API token." + : "Please enter a secret." + ); + return; + } + if (tokenType === "apiToken") { + authHeader = `Bearer ${value}`; + } else { + authHeader = `Basic ${btoa(`${SECRET_USERNAME}:${value}`)}`; + } + } else { + const username = email.trim(); + if (username.length < 2 || password.length < 2) { + setHint("Please enter your username and password."); + return; + } + authHeader = `Basic ${btoa(`${username}:${password}`)}`; + typedUsername = username; + } + setSubmitting(true); try { - const res = await fetch("/ui/ui-login", { + const res = await fetch(WS_URL, { method: "POST", - headers: { - Authorization: `Basic ${creds}`, - }, + headers: { Authorization: authHeader }, }); if (res.ok) { const data = await res.json(); - sessionStorage.setItem("creds", creds); + sessionStorage.setItem("auth", authHeader); sessionStorage.setItem("site", JSON.stringify(data)); - setUser(username); - sessionStorage.setItem("username", username); + // Server-resolved username works in every mode; fall back to + // the typed value for classic logins on older backends. + const resolved = + (typeof data.username === "string" && data.username) || + typedUsername; + if (resolved) sessionStorage.setItem("username", resolved); + else sessionStorage.removeItem("username"); navigate("/chat"); } else if (res.status === 401 || res.status === 403) { - setHint("Invalid credentials"); - navigate("/"); + setHint( + useTokenLogin ? "Invalid or unauthorized token." : "Invalid credentials." + ); } else { setHint(`Server error (${res.status}). Please try again later.`); - navigate("/"); } } catch { setHint("Unable to connect to the server. Please try again later."); - navigate("/"); + } finally { + setSubmitting(false); } }; - const logOut = () => { - setUser(""); - setToken(""); - sessionStorage.removeItem("site"); - navigate("/"); - }; - - const form = useForm>({ - resolver: zodResolver(formSchema), - defaultValues: { - email: "", - password: "", - }, - }); - const onChangeLang = (e: React.ChangeEvent) => { - const lang_code = e.target.value; - i18n.changeLanguage(lang_code); + i18n.changeLanguage(e.target.value); }; return ( @@ -106,70 +122,101 @@ export function Login() {
TigerGraph GraphRAG -

+

{t("login")}

-
- - ( - <> - - - - - - - - - )} - /> - ( - <> - - - - - - - - - )} + + {useTokenLogin ? ( + <> +
+ + +
+
+ setTokenValue(e.target.value)} + autoComplete="off" + className={INPUT_STYLE} + style={INPUT_CLIP_FIX} + /> +
+ + ) : ( + <> +
+ setEmail(e.target.value)} + autoComplete="username" + className={INPUT_STYLE} + style={INPUT_CLIP_FIX} + /> +
+
+ setPassword(e.target.value)} + autoComplete="current-password" + className={INPUT_STYLE} + style={INPUT_CLIP_FIX} + /> +
+ + )} + +
+ { + setUseTokenLogin(e.target.checked); + setTokenValue(""); + setPassword(""); + }} /> - {/* - {t("forgotPassword")} - */} - - - {
-
- - {hint} - -
} - - {/* - {t("signUp")} - */} - - + +
+ + + +
+
+ + {hint} + +
+ setExtractImages(e.target.checked)} + /> + + +

+ Sends each extracted image to the multimodal LLM for alt-text. Disable to skip image content entirely. +

+ + +
+ + setMinImageDimPx(e.target.value)} + disabled={!extractImages} + /> +

+ Smallest side (in px) an image must have to be described. +

+
)} @@ -994,7 +1073,7 @@ const GraphRAGConfig = () => { )} - - - setDraftProposal((p) => - p - ? { - ...p, - vertices: p.vertices.map((vv, i) => - i === vIdx ? { ...vv, name: e.target.value } : vv - ), - } - : p - ) - } - disabled={isInitializing || isExtractingSchema} - className="flex-1 h-8 text-sm dark:border-[#3D3D3D] dark:bg-shadeA" - /> +
+ + setDraftProposal((p) => + p + ? { + ...p, + vertices: p.vertices.map((vv, i) => + i === vIdx ? { ...vv, name: e.target.value } : vv + ), + } + : p + ) + } + disabled={isInitializing || isExtractingSchema} + className="flex-1 bg-transparent outline-none border-0 p-0 text-sm text-black dark:text-white placeholder:text-muted-foreground disabled:opacity-50" + style={INPUT_CLIP_FIX} + /> +
{collapsedVertices.has(vIdx) && ( {v.attributes.length} attr{v.attributes.length === 1 ? "" : "s"} @@ -1568,26 +1603,30 @@ const KGAdmin = () => { {!collapsedVertices.has(vIdx) && (<> - - setDraftProposal((p) => - p - ? { - ...p, - vertices: p.vertices.map((vv, i) => - i === vIdx - ? { ...vv, description: e.target.value } - : vv - ), - } - : p - ) - } - disabled={isInitializing || isExtractingSchema} - className="h-8 text-sm dark:border-[#3D3D3D] dark:bg-shadeA" - /> +
+ + setDraftProposal((p) => + p + ? { + ...p, + vertices: p.vertices.map((vv, i) => + i === vIdx + ? { ...vv, description: e.target.value } + : vv + ), + } + : p + ) + } + disabled={isInitializing || isExtractingSchema} + className="flex-1 bg-transparent outline-none border-0 p-0 text-sm text-black dark:text-white placeholder:text-muted-foreground disabled:opacity-50" + style={INPUT_CLIP_FIX} + /> +
Attributes ({v.attributes.length}); primary key id auto-added {attributesCollapsed && ( @@ -1802,26 +1841,30 @@ const KGAdmin = () => { > {collapsedEdges.has(eIdx) ? "▶" : "▼"} - - setDraftProposal((p) => - p - ? { - ...p, - edges: p.edges.map((ee, i) => - i === eIdx - ? { ...ee, name: ev.target.value } - : ee - ), - } - : p - ) - } - disabled={isInitializing || isExtractingSchema} - className="flex-1 h-8 text-sm dark:border-[#3D3D3D] dark:bg-shadeA" - /> +
+ + setDraftProposal((p) => + p + ? { + ...p, + edges: p.edges.map((ee, i) => + i === eIdx + ? { ...ee, name: ev.target.value } + : ee + ), + } + : p + ) + } + disabled={isInitializing || isExtractingSchema} + className="flex-1 bg-transparent outline-none border-0 p-0 text-sm text-black dark:text-white placeholder:text-muted-foreground disabled:opacity-50" + style={INPUT_CLIP_FIX} + /> +
{collapsedEdges.has(eIdx) && ( {e.pairs.length} pair{e.pairs.length === 1 ? "" : "s"}, {e.attributes.length} attr @@ -1847,26 +1890,30 @@ const KGAdmin = () => {
{!collapsedEdges.has(eIdx) && (<> - - setDraftProposal((p) => - p - ? { - ...p, - edges: p.edges.map((ee, i) => - i === eIdx - ? { ...ee, description: ev.target.value } - : ee - ), - } - : p - ) - } - disabled={isInitializing || isExtractingSchema} - className="h-8 text-sm dark:border-[#3D3D3D] dark:bg-shadeA" - /> +
+ + setDraftProposal((p) => + p + ? { + ...p, + edges: p.edges.map((ee, i) => + i === eIdx + ? { ...ee, description: ev.target.value } + : ee + ), + } + : p + ) + } + disabled={isInitializing || isExtractingSchema} + className="flex-1 bg-transparent outline-none border-0 p-0 text-sm text-black dark:text-white placeholder:text-muted-foreground disabled:opacity-50" + style={INPUT_CLIP_FIX} + /> +
Endpoints (FROM → TO):
@@ -2191,7 +2238,6 @@ const KGAdmin = () => {