tigergraph · chengbiao-jin · Jun 9, 2026 · May 17, 2026 · May 19, 2026 · May 21, 2026
diff --git a/.github/workflows/onprem-build-nightly.yaml b/.github/workflows/onprem-build-nightly.yaml
@@ -77,6 +77,11 @@ jobs:
       with:
         context: graphrag-ui/
         file: ./graphrag-ui/Dockerfile
+        # The UI Dockerfile reads VERSION from a named ``repo`` context;
+        # docker/build-push-action interprets undeclared contexts as image
+        # refs and tries to pull docker.io/library/repo:latest otherwise.
+        build-contexts: |
+          repo=.
         push: true
         tags: |
            ${{ env.IMAGE }}

diff --git a/.github/workflows/onprem-build-test.yaml b/.github/workflows/onprem-build-test.yaml
@@ -63,6 +63,11 @@ jobs:
       with:
         context: graphrag-ui/
         file: ./graphrag-ui/Dockerfile
+        # The UI Dockerfile reads VERSION from a named ``repo`` context;
+        # docker/build-push-action interprets undeclared contexts as image
+        # refs and tries to pull docker.io/library/repo:latest otherwise.
+        build-contexts: |
+          repo=.
         push: true
         tags: |
           tigergraph/graphrag-ui:${{steps.get-image.outputs.IMAGE}}

diff --git a/.github/workflows/onprem-build.yaml b/.github/workflows/onprem-build.yaml
@@ -43,7 +43,22 @@ jobs:
         echo "IMAGE=$IMAGE" >> $GITHUB_OUTPUT
         VERSION=$(cat VERSION)
         echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
-
+
+    # Only builds triggered from main update the :latest tag. Manual
+    # rebuilds of older release branches (e.g. release_1.3.1) skip
+    # :latest so a backport build doesn't overwrite the most recent
+    # published image.
+    - name: Decide whether to update :latest
+      id: latest-check
+      run: |
+        if [ "${GITHUB_REF}" = "refs/heads/main" ]; then
+          echo "push_latest=true" >> $GITHUB_OUTPUT
+          echo "Building from main — :latest will be updated."
+        else
+          echo "push_latest=false" >> $GITHUB_OUTPUT
+          echo "Not on main (ref=${GITHUB_REF}) — :latest will be left as-is."
+        fi
+
     - name: Build and push Docker image GraphRAG
       uses: docker/build-push-action@v5
       with:
@@ -53,8 +68,8 @@ jobs:
         tags: |
           tigergraph/graphrag:${{steps.get-image.outputs.IMAGE}}
           tigergraph/graphrag:${{steps.get-image.outputs.VERSION}}
-          tigergraph/graphrag:latest
- 
+          ${{ steps.latest-check.outputs.push_latest == 'true' && 'tigergraph/graphrag:latest' || '' }}
+
     - name: Build and push Docker image ECC
       uses: docker/build-push-action@v5
       with:
@@ -64,7 +79,7 @@ jobs:
         tags: |
           tigergraph/graphrag-ecc:${{steps.get-image.outputs.IMAGE}}
           tigergraph/graphrag-ecc:${{steps.get-image.outputs.VERSION}}
-          tigergraph/graphrag-ecc:latest
+          ${{ steps.latest-check.outputs.push_latest == 'true' && 'tigergraph/graphrag-ecc:latest' || '' }}
 
     - name: Build and push Docker image chat-history
       uses: docker/build-push-action@v5
@@ -75,18 +90,23 @@ jobs:
         tags: |
           tigergraph/chat-history:${{steps.get-image.outputs.IMAGE}}
           tigergraph/chat-history:${{steps.get-image.outputs.VERSION}}
-          tigergraph/chat-history:latest
-     
+          ${{ steps.latest-check.outputs.push_latest == 'true' && 'tigergraph/chat-history:latest' || '' }}
+
     - name: Build and push Docker image graphrag-ui
       uses: docker/build-push-action@v5
       with:
         context: graphrag-ui/
         file: ./graphrag-ui/Dockerfile
+        # The UI Dockerfile reads VERSION from a named ``repo`` context;
+        # docker/build-push-action interprets undeclared contexts as image
+        # refs and tries to pull docker.io/library/repo:latest otherwise.
+        build-contexts: |
+          repo=.
         push: true
         tags: |
           tigergraph/graphrag-ui:${{steps.get-image.outputs.IMAGE}}
           tigergraph/graphrag-ui:${{steps.get-image.outputs.VERSION}}
-          tigergraph/graphrag-ui:latest
+          ${{ steps.latest-check.outputs.push_latest == 'true' && 'tigergraph/graphrag-ui:latest' || '' }}
 
 #    - name: Set SSH key
 #      run: |

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,28 @@
 # Changelog
 
+## [1.4.1]
+
+### Added
+- **Token login** — the sign-in page adds a "Use token login" option with a choice of API Token or Secret, alongside the default username / password. The signed-in username and roles are resolved from TigerGraph after login so the UI shows the real user.
+- **Pre-flight upload conflict check** — a new endpoint reports which planned filenames already exist before the bytes are sent. The upload dialog uses it to prompt the user once with the conflicting names and offer Replace or Skip; large files no longer have to cross the wire twice when a collision is hit.
+
+### Changed
+- **Every request authenticates as the signed-in user**, end to end — graph operations, chat history, traces, and knowledge-graph rebuilds all run under the caller's identity (username / password, secret, or API token).
+- **TigerGraph token handling is automatic** — an api token is obtained from the caller's credentials only when the database requires one, unless a static api token is configured. The `getToken` config option is no longer needed and is now ignored.
+- **Sample documents are visible in the upload dialog after schema extraction.** Earlier, files used for schema extraction landed in a hidden per-request subdirectory and disappeared from the dialog. They now live alongside regular uploads, and overwriting one drops the cached extract so the next ingest sees the new bytes.
+- **Schema extraction requires an explicit sample list.** The endpoint no longer treats a missing or empty `filenames` field as "use every JSONL in the temp folder," which silently mixed in stale samples from prior sessions. Callers must name each sample explicitly.
+- **One schema extraction at a time per graph.** Concurrent attempts on the same graph are rejected with 409 instead of racing on the shared sample folder.
+- **Document Ingestion dialog reflects server-side state on reopen.** Closing the dialog mid-conversion and reopening it no longer leaves the *Ingest* button incorrectly enabled. The dialog asks the server which operation, if any, currently holds the graph lock, and polls until that operation completes — so the button stays disabled, the uploaded files list re-populates, and the next upload doesn't collide with the prior conversion.
+- **Conflict prompts use the app's styled dialog** instead of the browser default. Choosing *Cancel* now aborts the upload cleanly — the button and status message reset right away.
+- **The vector store recovers without a container restart.** When the initial connection to TigerGraph fails (e.g. cold start, transient network blip), the service used to stay broken until the operator restarted the container — chat connections were rejected silently with WebSocket close 1013. The vector store now retries automatically in the background (10s → 30s → 60s → 120s → 300s backoff), and a new ``POST /ui/admin/retry_embedding_store`` lets superusers force a retry immediately after fixing the underlying issue.
+- **Chat stays available when vector search is unavailable.** The chat WebSocket no longer closes hard with 1013 on vector-store failures. Instead it accepts the connection, surfaces a notice to the client, and lets graph-traversal questions answer normally — only questions that genuinely require a vector lookup fail, and they fail gracefully through the synthesizer.
+- **PDF ingestion is faster on image-heavy documents.** Image-description workers now run with a larger parallel pool, and tiny decorative images skip the multimodal LLM entirely. On AWS Bedrock deployments the connection pool default is also raised so concurrent describe calls no longer queue behind a 20-connection cap.
+- **Image description is tunable per graph or globally.** Two new `graphrag_config` keys — `extract_images` and `min_image_dim_px` — control whether the multimodal LLM is invoked on extracted images and the smallest image dimension that goes to the LLM (smaller images skip the call). Both are editable from the *GraphRAG Configuration* page in the UI, globally or per graph. Disabling does not alter the Image vertex type or loading job, so re-enabling later requires no schema change. The multimodal describe pass now reuses `default_concurrency` instead of a separate knob, so one setting tunes parallelism across the pipeline.
+- **Community search falls back to hybrid search when it returns nothing or fails.** Auto-selected community queries that miss (no relevant community summaries) or hit a retriever error are now retried once with hybrid graph-hop search before returning a "couldn't find" answer. Manually-picked community search is unchanged.
+
+### Removed
+- **A configured static `apiToken` no longer overrides per-user credentials.** It is used only for the service's background operations; interactive requests always authenticate as the signed-in user.
+
 ## [1.4.0]
 
 ### Added

diff --git a/README.md b/README.md
@@ -423,7 +423,7 @@ For examples of how to ingest documents through the backend API, refer to the **
 ## More Detailed Configurations
 
 ### DB configuration
-Copy the below into `configs/server_config.json` and edit the `hostname` and `getToken` fields to match your database's configuration. If token authentication is enabled in TigerGraph, set `getToken` to `true`. Set the timeout, memory threshold, and thread limit parameters as desired to control how much of the database's resources are consumed when answering a question.
+Copy the below into `configs/server_config.json` and edit the `hostname` to match your database's configuration. Token authentication is handled automatically — an api token is obtained from the username/password when the database requires one, unless a static api token is configured. Set the timeout, memory threshold, and thread limit parameters as desired to control how much of the database's resources are consumed when answering a question.
 
 ```json
 {
@@ -433,7 +433,6 @@ Copy the below into `configs/server_config.json` and edit the `hostname` and `ge
         "gsPort": "14240",
         "username": "tigergraph",
         "password": "tigergraph",
-        "getToken": false,
         "default_timeout": 300,
         "default_mem_threshold": 5000,
         "default_thread_limit": 8
@@ -448,9 +447,8 @@ Copy the below into `configs/server_config.json` and edit the `hostname` and `ge
 | `gsPort` | string | `"14240"` | GSQL port for TigerGraph admin operations. |
 | `username` | string | `"tigergraph"` | TigerGraph database username. |
 | `password` | string | `"tigergraph"` | TigerGraph database password. |
-| `getToken` | bool | `false` | Set to `true` if token authentication is enabled on TigerGraph. |
 | `graphname` | string | `""` | Default graph name. Usually left empty (selected at runtime). |
-| `apiToken` | string | `""` | Pre-generated API token. If set, token-based auth is used instead of username/password. |
+| `apiToken` | string | `""` | Optional pre-generated token for the service's background operations. Interactive requests always authenticate as the signed-in user. |
 | `default_timeout` | int | `300` | Default query timeout in seconds. |
 | `default_mem_threshold` | int | `5000` | Memory threshold (MB) for query execution. |
 | `default_thread_limit` | int | `8` | Max threads for query execution. |
@@ -500,6 +498,8 @@ Copy the below code into `configs/server_config.json`. You shouldn’t need to c
 | `doc_process_switch` | bool | `true` | Enable/disable document processing during knowledge graph build. |
 | `entity_extraction_switch` | bool | same as `doc_process_switch` | Enable/disable entity extraction during knowledge graph build. |
 | `community_detection_switch` | bool | same as `entity_extraction_switch` | Enable/disable community detection during knowledge graph build. |
+| `extract_images` | bool | `true` | Run the multimodal LLM on images extracted from documents to generate alt-text. Set to `false` to skip the image-description pass entirely — much faster, at the cost of losing image content from retrieval. Configurable per graph. |
+| `min_image_dim_px` | int | `100` | Smallest side (in px) an image must have to be sent to the multimodal LLM. Smaller images are tagged "decorative image" without an LLM call. Configurable per graph. |
 | `load_batch_size` | int | `500` | Batch size for document loading. |
 | `upsert_delay` | int | `0` | Delay in seconds between loading batches. |
 | `default_concurrency` | int | `10` | Base concurrency level for parallel processing. Configurable per graph. |
@@ -773,12 +773,19 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
             "model_kwargs": {
                 "temperature": 0,
             },
+            "boto3_config": {
+                "max_pool_connections": 50,
+                "read_timeout": 300,
+                "retries": 5
+            },
             "prompt_path": "./common/prompts/aws_bedrock_claude3haiku/"
         }
     }
 }
 ```
 
+`boto3_config` is optional (the defaults shown above are also the built-in defaults). The same block can be set on `embedding_service` when using Bedrock embeddings.
+
 #### Ollama
 
 ```json
@@ -916,8 +923,9 @@ A bad answer at step 4 is rarely fixed by editing the response prompt; usually i
 | Answers miss context that's clearly in the source | chunks too small or no overlap | raise `chunk_size`; bump `overlap_size` (default 1/8 of `chunk_size`); lower `threshold` (`semantic`) |
 | Tables / figures get fragmented | wrong chunker for the source | use `markdown` for markdown / docs converted to markdown; use `html` for HTML pages with structure; use `regex` with a custom `pattern` for structured logs |
 | Cross-section reasoning fails | no overlap | increase `overlap_size` to ~25% of `chunk_size` |
+| Long tables get split mid-row and the answer loses column headers | `chunk_size` (default `2048`) is smaller than the table's serialized length | raise `chunker_config.chunk_size` to fit the largest table whole — for table-heavy regulator / industry reports, **`4096`–`8192` is often the right range** |
 
-Default starting point for prose: `chunker: "semantic"`, `threshold: 0.95`, `chunker_config.method: "percentile"`. Move to `markdown` chunker with `chunk_size: 2048` and `overlap_size: 256` if your source is markdown-heavy and table integrity matters.
+Default starting point for prose: `chunker: "semantic"`, `threshold: 0.95`, `chunker_config.method: "percentile"`. Move to `markdown` chunker with `chunk_size: 2048` and `overlap_size: 256` if your source is markdown-heavy and table integrity matters. For corpora dominated by large statistical tables (regulatory reports, fiscal yearbooks, multi-year financial summaries), start with `markdown`/`html` chunker and `chunk_size: 8192` so each table stays in one chunk.
 
 ### 3. Extraction — make the graph clean before tuning retrieval
 
@@ -985,6 +993,11 @@ When customizing:
 - **`reuse_embedding: true`** skips re-embedding identical text — major saving on re-ingest of unchanged documents.
 - **Choose `llm_model` thoughtfully** — entity / relationship extraction tolerates cheaper / faster models (Haiku, Nova-lite, Flash); response synthesis benefits from stronger ones (Sonnet, GPT-4-class). The `multimodal_service` is independent — set it to a vision-capable model only when you actually ingest images.
 - **`load_batch_size`** and **`upsert_delay`** control ingestion pressure on TigerGraph. Defaults are fine for most loads; lower the batch size if you see write timeouts.
+- **Image-description speed.** On image-heavy documents, every image is sent to the multimodal LLM, which dominates ingest time. Tune via `graphrag_config` (global or per graph) — both knobs are also editable from the *GraphRAG Configuration* page in the UI:
+    - `extract_images` (default `true`) — set to `false` to skip image description entirely.
+    - `min_image_dim_px` (default `100`) — smaller images are tagged "decorative image" without an LLM call.
+    - Multimodal calls share the same `default_concurrency` semaphore as the rest of the pipeline — raise it to parallelize more describe calls; lower it if the multimodal provider's rate limit is hit.
+    - AWS Bedrock users can further tune connection pool sizing via `boto3_config` in `llm_config`.
 
 ### 7. A working tuning loop
 
@@ -1024,6 +1037,12 @@ First, make sure that all your LLM service provider configuration files are work
 docker compose up -d --build
 ```
 
+> **Windows developers:** the repo's top-level `configs/nginx.conf` and `configs/server_config.json` are symlinks intended for POSIX shells and don't resolve on Windows. Before running `docker compose up -d` from the repo root, overwrite them with the tutorial copies:
+> ```sh
+> cp docs/tutorials/configs/nginx.conf configs/nginx.conf
+> cp docs/tutorials/configs/server_config.json configs/server_config.json
+> ```
+
 If you want to use Weights And Biases for logging the test results, your WandB API key needs to be set in an environment variable on the host machine.
 
 ```sh

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.4.0
+1.4.1