diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index dcb39aa..dbaeb1f 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -10,7 +10,7 @@ "plugins": [ { "name": "kbagent", - "version": "0.52.1", + "version": "0.53.0", "source": "./plugins/kbagent", "description": "AI-friendly interface to Keboola Connection projects — explore configs, jobs, lineage, call MCP tools, manage dev branches, and debug SQL in workspaces", "category": "development" diff --git a/CLAUDE.md b/CLAUDE.md index a95271f..6b5c228 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -447,6 +447,17 @@ kbagent component list [--project NAME] [--type TYPE] [--query QUERY] kbagent component detail --component-id ID [--project NAME] kbagent config new --component-id ID [--name NAME] [--project NAME] [--output-dir DIR] [--push --no-files --description D --configuration JSON|@file|- --configuration-file PATH --no-validate --branch ID --dry-run] +# sync: GitOps -- configs as local files. init/pull/push/diff are filesystem-local (no serve REST surface). +kbagent sync init --project ALIAS [--directory DIR] [--git-branching] [--adopt-existing] +kbagent sync pull --project ALIAS [--all-projects] [--force] [--dry-run] [--with-samples] [--no-storage] [--no-jobs] [--job-limit N] [--branch ID] +# `sync pull --force` is conflict-aware (since 0.53.0): locally-modified config whose remote is UNCHANGED is preserved (delta stays pushable, never silently re-stamped); a true merge conflict (local AND remote both changed since last pull) aborts (exit 1, SYNC_CONFLICT, --json lists details.conflicts); local-untouched + remote-changed takes remote. Discard local edits on purpose by deleting the file/dir then pulling. +kbagent sync status [--directory DIR] +kbagent sync diff --project ALIAS [--all-projects] [--directory DIR] [--branch ID] +kbagent sync push --project ALIAS [--all-projects] [--dry-run] [--force] [--allow-plaintext-on-encrypt-failure] [--branch ID] [--no-name-drift-warnings] +kbagent sync branch-link --project ALIAS (--branch-id ID | --branch-name NAME) [--directory DIR] +kbagent sync branch-unlink [--directory DIR] +kbagent sync branch-status [--directory DIR] + kbagent dev-portal identity add --alias A --username U [--password P | --password-stdin] [--role-hint vendor|admin] [--vendor V] [--portal-url URL] kbagent dev-portal identity list diff --git a/plugins/kbagent/.claude-plugin/plugin.json b/plugins/kbagent/.claude-plugin/plugin.json index 8f8a7ec..bdaf980 100644 --- a/plugins/kbagent/.claude-plugin/plugin.json +++ b/plugins/kbagent/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "kbagent", - "version": "0.52.1", + "version": "0.53.0", "description": "AI-friendly interface to Keboola Connection projects — explore configs, jobs, lineage, call MCP tools, manage dev branches, and debug SQL in workspaces", "author": { "name": "Keboola", diff --git a/plugins/kbagent/agents/keboola-expert.md b/plugins/kbagent/agents/keboola-expert.md index e6b04f4..65871f6 100644 --- a/plugins/kbagent/agents/keboola-expert.md +++ b/plugins/kbagent/agents/keboola-expert.md @@ -304,6 +304,14 @@ success, not a failure. misleading "bucket not found" until the prod table is branch-local. Run `kbagent storage clone-table --project P --table-id T --branch ` first (one-way default->branch). See `gotchas.md`. +- **`sync pull --force` is conflict-aware, not a blind overwrite** (0.53.0+): + a locally-modified config whose remote is UNCHANGED is preserved (its pending + delta stays pushable); a true merge conflict (local AND remote both changed + since last pull) ABORTS the pull with exit 1 / `SYNC_CONFLICT` instead of + silently discarding work (pre-0.53.0 it silently corrupted the baseline and + stranded the edits). Safe to force-pull an unrelated config while you have + un-pushed edits elsewhere. To discard a local edit on purpose, delete the + file/dir then pull. See `gotchas.md`. - **`storage truncate-table` is row-only; schema and dependents are preserved** (0.32.0+): the underlying call is diff --git a/plugins/kbagent/skills/kbagent/references/commands-reference.md b/plugins/kbagent/skills/kbagent/references/commands-reference.md index a2be8c8..2072c7f 100644 --- a/plugins/kbagent/skills/kbagent/references/commands-reference.md +++ b/plugins/kbagent/skills/kbagent/references/commands-reference.md @@ -204,7 +204,7 @@ Requires the project to be added with its **master ('owner') Storage API token** ## Sync (GitOps) - `sync init --project ALIAS [--directory DIR] [--git-branching] [--adopt-existing]` -- initialize sync working directory; `--adopt-existing` (since v0.22.0) adopts a `.keboola/manifest.json` already written by the kbc Go CLI without overwriting (idempotent; validates `project_id` against the alias token) -- `sync pull --project ALIAS [--all-projects] [--force] [--dry-run] [--with-samples] [--no-storage] [--no-jobs] [--job-limit N] [--branch ID]` -- download configs to local files. For large projects (>100 configs), automatically fetches jobs per-config when the grouped API limit is insufficient. `--branch` (0.47.0+) per-invocation dev-branch override, beats every other branch source. +- `sync pull --project ALIAS [--all-projects] [--force] [--dry-run] [--with-samples] [--no-storage] [--no-jobs] [--job-limit N] [--branch ID]` -- download configs to local files. For large projects (>100 configs), automatically fetches jobs per-config when the grouped API limit is insufficient. `--force` is conflict-aware (since 0.53.0): a locally-modified config whose remote is unchanged is **preserved** (pending delta stays pushable, never silently re-stamped); a true merge conflict (local AND remote both changed since last pull) **aborts** the pull (exit 1, `SYNC_CONFLICT`; `--json` lists `details.conflicts`); local-untouched + remote-changed takes remote. To discard local edits on purpose, delete the file/dir and pull. `--branch` (0.47.0+) per-invocation dev-branch override, beats every other branch source. - `sync push --project ALIAS [--all-projects] [--dry-run] [--force] [--allow-plaintext-on-encrypt-failure] [--branch ID] [--no-name-drift-warnings]` -- push local changes (auto-encrypts secrets, fails if encryption fails). Fresh-CREATE writeback updates placeholder manifest entries in place (since 0.47.0) and propagates any `KBC.configuration.*` metadata via `set_config_metadata`. Fresh-CREATE variable binding (since 0.47.2): when a `keboola.variables` config + its values row are created alongside a transformation in the same push, the transformation's `variables_id` / `variables_values_id` placeholders are rebound to the assigned ULIDs and the row's `values` are hoisted even without a `_keboola` block, so `job run` succeeds with no post-push `config variables-set` step (unresolvable/ambiguous links surface a `variable_link` entry in `errors[]`, never a broken link). `--branch` (0.47.0+) per-invocation override; when no `/` subtree exists on disk (since 0.47.2) the local default tree (`main/`) is promoted to the target branch (API writes still target the branch id); `--no-name-drift-warnings` (0.47.0+) drops the cosmetic warnings array. - `sync diff --project ALIAS [--all-projects] [--branch ID]` -- 3-way diff (local vs base vs remote), detects conflicts. `--branch` (0.47.0+) per-invocation dev-branch override. - `sync status [--directory DIR]` -- show locally modified/added/deleted configs diff --git a/plugins/kbagent/skills/kbagent/references/gotchas.md b/plugins/kbagent/skills/kbagent/references/gotchas.md index cf982df..46ecc01 100644 --- a/plugins/kbagent/skills/kbagent/references/gotchas.md +++ b/plugins/kbagent/skills/kbagent/references/gotchas.md @@ -2426,3 +2426,24 @@ Enter just hung until Ctrl-C. The helper now branches on `sys.stdin.isatty()`: TTY uses `getpass.getpass()` (hidden, line-based, Enter confirms); pipe (`echo $PASS | kbagent dev-portal identity add --password-stdin`) still reads to EOF. + +## `sync pull --force` preserves un-pushed edits and aborts on conflict (since v0.53.0) + +`--force` is **conflict-aware**, not a blind overwrite. Pre-0.53.0 a force-pull +run while you had un-pushed local edits to a config whose remote was unchanged +**silently corrupted the sync baseline**: it re-stamped the manifest `pull_hash` +from the *edited* on-disk file, so `sync diff` / `sync push` then reported "in +sync" and shipped nothing while the remote still held the old config -- the edits +were stranded with no signal. Fixed: `--force` now branches on the 3-way state. + +- Local edited, remote unchanged -> **preserved** (pending delta stays pushable). +- Local edited AND remote also changed (true conflict) -> **aborts** with exit 1 + and error code `SYNC_CONFLICT`; the `--json` envelope carries + `details.conflicts: [{scope, component_id, config_id, config_name, path, row_id?}]`. + Resolve via `sync diff` then `sync push`-or-discard, then pull again. +- Local untouched, remote changed -> takes remote (unchanged behavior). + +Consequence: `--force` no longer discards non-conflicting local edits. To +intentionally drop a local edit, delete the file (or the config dir) and pull. +Applies at config and row granularity. `--all-projects` reports a per-project +conflict as that project's error without aborting the rest of the batch. diff --git a/plugins/kbagent/skills/kbagent/references/sync-workflow.md b/plugins/kbagent/skills/kbagent/references/sync-workflow.md index bab9f76..cd9a3ef 100644 --- a/plugins/kbagent/skills/kbagent/references/sync-workflow.md +++ b/plugins/kbagent/skills/kbagent/references/sync-workflow.md @@ -342,7 +342,8 @@ Stored in `.keboola/branch-mapping.json`: ## Key behaviors - **Pull is idempotent**: re-running pull when nothing changed writes zero files -- **Pull protects local edits**: modified files are skipped (use `--force` to overwrite) +- **Pull protects local edits**: locally-modified files are skipped by default +- **`--force` is conflict-aware (since 0.53.0)**: see below -- it no longer blindly overwrites - **Push only sends local changes**: remote_modified and conflict changes are skipped - **Encrypted values**: nonce differences are ignored in diff (no false positives) - **New configs**: push auto-assigns IDs from the API, updates manifest @@ -350,3 +351,23 @@ Stored in `.keboola/branch-mapping.json`: - **Jobs are per-config**: `_jobs.jsonl` shows recent N jobs (default 5) with status + timing - **Data samples auto-trim**: tables with >30 columns export only first 30 (API sync limit) - **Encrypted columns masked**: columns starting with `#` show `***ENCRYPTED***` in samples + +## `sync pull --force` is conflict-aware (since 0.53.0) + +`--force` no longer blindly overwrites locally-modified configs. It branches on +the 3-way diff state per config (and per row): + +- **Local edited, remote UNCHANGED** -> the file and its sync baseline are + **preserved**. The pending delta stays visible to `sync diff` / `sync push`. + `--force` does **not** discard the edit and does **not** silently re-stamp the + baseline (that was the pre-0.53.0 data-loss bug). +- **Local edited AND remote also changed** since the last pull (a true merge + conflict) -> the pull **aborts before writing anything** with exit code 1 and + error code `SYNC_CONFLICT`, listing every conflicting config/row. Resolve with + `sync diff`, then `sync push` your edits (or discard them), then pull again. +- **Local untouched, remote changed** -> `--force` takes remote as before. + +> Safe to run `sync pull --force` to refresh an unrelated config even while you +> have un-pushed edits elsewhere: non-conflicting edits survive; a real conflict +> stops you loudly instead of losing work. To intentionally drop a local edit, +> delete the file (or the config directory) and pull. diff --git a/pyproject.toml b/pyproject.toml index 6e6ee62..63a0acf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "keboola-agent-cli" -version = "0.52.1" +version = "0.53.0" description = "AI-friendly CLI for managing Keboola projects" readme = "README.md" requires-python = ">=3.12" diff --git a/src/keboola_agent_cli/changelog.py b/src/keboola_agent_cli/changelog.py index 4df1e85..14b5864 100644 --- a/src/keboola_agent_cli/changelog.py +++ b/src/keboola_agent_cli/changelog.py @@ -8,6 +8,9 @@ # Ordered newest-first. Each value is a list of brief one-line descriptions. CHANGELOG: dict[str, list[str]] = { + "0.53.0": [ + 'Fix (`sync pull --force`, silent baseline corruption -> data loss): a config with un-pushed local edits is no longer silently de-synced when a force-pull runs while the remote is unchanged. Repro (reported on v0.51.1, project 5785): pull a config, edit its `_config.yml` (`sync diff` correctly shows `1 to update`), then run `sync pull --force` -- typically to resolve an *unrelated* config\'s conflict. Pre-0.53.0, `--force` skipped the "locally modified" guard in `SyncService.pull()`, so for a config whose remote had not changed the `remote_unchanged` short-circuit re-stamped the manifest `pull_hash` from the *edited on-disk file*. Afterwards `sync diff` and `sync push` both reported "in sync" and a real `push` shipped nothing, while the live remote still held the old config -- the local edits were stranded with no visible signal. Root cause was an interaction of two individually-reasonable decisions: `--force` bypassing the overwrite guard, and the `diff` `local_override_hashes` optimization that skips re-reading a file whose hash matches `pull_hash` (so the edited content was never even compared). The fix splits `--force` behaviour by 3-way diff state, per the maintainer decision: (b) local edited + remote UNCHANGED -> the file AND its 3-way base (`pull_hash` + `pull_config_hash`) are preserved, so the pending delta stays visible to `sync push` (no data loss, no silent revert); (a) local edited + remote ALSO changed since the last pull (a true merge conflict) -> the pull aborts before writing anything with the new `SyncConflictError` (exit 1, error code `SYNC_CONFLICT`), listing every conflicting config/row so the user resolves it (`sync diff`, then `push` or discard, then pull again). A no-conflict force-pull (remote changed, local untouched) still takes remote as before. Applies at config and row granularity. Note: `--force` no longer discards un-pushed non-conflicting edits -- that was the dangerous behaviour; to intentionally drop local edits, delete the file (or the config dir) and pull. New: `errors.SyncConflictError` + `ErrorCode.SYNC_CONFLICT`; `SyncService._detect_force_pull_conflicts` / `_is_conflict` (read-only pre-pass that runs before any write); `commands/sync.py` catches the error and prints a red per-config conflict block (human) or a `SYNC_CONFLICT` envelope with a `details.conflicts` array (`--json`). The pull `--force` help now documents the preserve/abort semantics. `--all-projects` surfaces a per-project conflict as a structured entry (`error_code: SYNC_CONFLICT` + the `conflicts` list, matching the single-project envelope) without aborting the batch. Tests: `tests/test_sync_force_pull_baseline.py` (config + row, preserve case b, abort case a, remote-only-changed takes remote, `--all-projects` structured conflict), `tests/test_sync_cli.py` (exit 1 + human/JSON conflict envelope), and `tests/test_e2e.py::TestE2ESyncWorkflow::test_sync_force_pull_conflict_aware` (real Storage: preserve when remote unchanged, then `SYNC_CONFLICT` after a remote mutation).', + ], "0.52.1": [ 'Fix (docs/UX, swap-tables wording): completes the swap-tables semantics correction shipped in 0.52.0, which left four co-located surfaces still claiming the swap is dev-branch-only or "rejected on production" -- now false after that fix. The user-facing `ConfigError` raised on a missing `--branch` (exit 5) no longer says "The Storage API rejects this on production"; it now reads "swap-tables requires a branch ... Any branch works, including the default/production branch." The same stale wording was corrected in `commands-reference.md`, the `swap-tables` command docstring (which feeds the auto-generated `SKILL.md` decision table via `make skill-gen`), and the `AGENT_CONTEXT` block (`kbagent context`). A CLI test that mocked and asserted the old phantom "dev branch" error string (it passed only because the mock short-circuited the real service) was fixed to match the real message, and the `swap_tables` `Args` docstring "Dev branch ID" became "any branch accepted, including the default/production branch". No behaviour change: `branch_id` stays mandatory (the swap is branch-scoped). `clone-table` wording is intentionally untouched -- clone legitimately targets a dev branch. Surfaced by the `kbagent-pr-reviewer` self-review passes on keboola/cli#368 and keboola/cli#373.', "Maintenance: dependency bumps merged since 0.52.0 -- `pip` 26.0.1->26.1 and `python-multipart` 0.0.26->0.0.27 (the latter on the `kbagent serve` multipart path). Build/transitive only; no API or behaviour change.", diff --git a/src/keboola_agent_cli/commands/context.py b/src/keboola_agent_cli/commands/context.py index b6d8802..3ca0856 100644 --- a/src/keboola_agent_cli/commands/context.py +++ b/src/keboola_agent_cli/commands/context.py @@ -814,6 +814,12 @@ kbagent sync pull --project ALIAS [--all-projects] [--force] [--dry-run] [--with-samples] [--no-storage] [--no-jobs] [--job-limit N] [--branch ID] Download configs as local files. Idempotent, protects local modifications. + --force (semantics corrected since 0.53.0): re-pull over locally-modified configs. + A config edited locally whose remote is UNCHANGED is PRESERVED (its pending delta stays + pushable -- NOT discarded, NOT silently re-stamped). A true merge conflict (the config + changed BOTH locally and on the remote since the last pull) ABORTS the pull (exit 1, + SYNC_CONFLICT) listing each conflict; resolve via sync diff then push-or-discard, then pull. + To intentionally drop local edits, delete the file/dir and pull. Applies to rows too. --job-limit controls max recent jobs per config (default 5). For large projects, automatically falls back to per-config job fetching to ensure all configs get job history. Auto-detects renamed configs and renames local directories to match (uses git mv in git repos). diff --git a/src/keboola_agent_cli/commands/sync.py b/src/keboola_agent_cli/commands/sync.py index 3b1023a..8b0e782 100644 --- a/src/keboola_agent_cli/commands/sync.py +++ b/src/keboola_agent_cli/commands/sync.py @@ -9,7 +9,7 @@ import typer -from ..errors import ConfigError, ErrorCode, KeboolaApiError +from ..errors import ConfigError, ErrorCode, KeboolaApiError, SyncConflictError from ._helpers import check_cli_permission, get_formatter, get_service, map_error_to_exit_code sync_app = typer.Typer(help="Sync project configurations with local filesystem") @@ -248,6 +248,23 @@ def _format_diff_result(formatter: Any, result: dict) -> None: formatter.console.print(f" {len(remote_only)} new remote-only config(s)") +def _format_conflict_list(formatter: Any, conflicts: list[dict[str, str]]) -> None: + """Print the per-config force-pull conflict list (human mode only).""" + if not conflicts: + return + n = len(conflicts) + formatter.console.print( + f"\n[bold red]Merge conflict:[/bold red] {n} config(s) changed BOTH " + f"locally and on the remote since the last pull:" + ) + for c in conflicts: + label = "row" if c.get("scope") == "row" else "config" + formatter.console.print( + f" [red]![/red] {c.get('component_id')}/{c.get('config_id')} " + f"[dim]({label})[/dim] {c.get('config_name', '')}" + ) + + def _format_push_result(formatter: Any, result: dict) -> None: """Format a single-project push result for human output.""" status = result.get("status", "") @@ -409,7 +426,12 @@ def sync_pull( force: bool = typer.Option( False, "--force", - help="Overwrite local files without checking for modifications", + help=( + "Force re-pull. Locally-modified configs whose remote is unchanged " + "are PRESERVED (kept as pending changes for `sync push`); a true " + "merge conflict (local AND remote both changed since the last pull) " + "aborts the pull so you can resolve it." + ), ), dry_run: bool = typer.Option( False, @@ -531,6 +553,16 @@ def sync_pull( max_samples=max_samples, branch_override=branch, ) + except SyncConflictError as exc: + if not formatter.json_mode: + _format_conflict_list(formatter, exc.conflicts) + formatter.error( + message=exc.message, + error_code=exc.error_code, + project=project or "", + details={"conflicts": exc.conflicts}, + ) + raise typer.Exit(code=1) from None except FileNotFoundError as exc: formatter.error(message=str(exc), error_code=ErrorCode.NOT_INITIALIZED) raise typer.Exit(code=1) from None diff --git a/src/keboola_agent_cli/errors.py b/src/keboola_agent_cli/errors.py index ba4225a..ea5c988 100644 --- a/src/keboola_agent_cli/errors.py +++ b/src/keboola_agent_cli/errors.py @@ -88,6 +88,7 @@ class ErrorCode(StrEnum): # Sync PARENT_CONFIG_NOT_TRACKED = "PARENT_CONFIG_NOT_TRACKED" VARIABLE_LINK_UNRESOLVED = "VARIABLE_LINK_UNRESOLVED" + SYNC_CONFLICT = "SYNC_CONFLICT" # Encryption ENCRYPTION_FAILED = "ENCRYPTION_FAILED" @@ -176,6 +177,41 @@ def __init__(self, message: str) -> None: self.message = message +class SyncConflictError(Exception): + """Raised when ``sync pull --force`` would overwrite locally-modified + configs whose remote **also** changed since the last pull -- a true 3-way + merge conflict (local and remote both diverged from the synced base). + + ``--force`` deliberately bypasses the "preserve locally-modified files" + guard, so without this check it would silently adopt the edited on-disk + file as the new synced baseline (issue: force-pull baseline corruption). + Rather than discard un-pushed work, the pull aborts *before writing + anything* and asks the user to resolve each conflict (push or discard + local edits, then pull again). + + ``conflicts`` carries one dict per conflicting config/row so the command + layer can list them. Each dict has ``component_id``, ``config_id``, + ``config_name``, ``path``, ``scope`` (``"config"`` or ``"row"``), and an + optional ``row_id``. + """ + + def __init__(self, conflicts: list[dict[str, str]]) -> None: + self.conflicts = conflicts + n = len(conflicts) + plural = "s" if n != 1 else "" + message = ( + f"{n} config{plural} ha{'ve' if n != 1 else 's'} un-pushed local " + f"edits AND changed on the remote since the last pull (merge " + f"conflict). `sync pull --force` refuses to overwrite them so your " + f"local work is not lost. Resolve each conflict first: review with " + f"`kbagent sync diff`, then either `kbagent sync push` your local " + f"edits or discard them, and pull again." + ) + super().__init__(message) + self.message = message + self.error_code = ErrorCode.SYNC_CONFLICT + + class PermissionDeniedError(Exception): """Raised when an operation is blocked by the permission policy.""" @@ -196,6 +232,7 @@ def __init__(self, operation: str, message: str = "") -> None: ErrorCode.NOT_FOUND: "not_found", ErrorCode.CONFIG_ERROR: "configuration", ErrorCode.VALIDATION_ERROR: "validation", + ErrorCode.SYNC_CONFLICT: "conflict", ErrorCode.PERMISSION_DENIED: "authorization", ErrorCode.DP_LOGIN_FAILED: "authentication", ErrorCode.DP_MFA_REQUIRED: "authentication", diff --git a/src/keboola_agent_cli/services/sync_service.py b/src/keboola_agent_cli/services/sync_service.py index a2fa96f..b266221 100644 --- a/src/keboola_agent_cli/services/sync_service.py +++ b/src/keboola_agent_cli/services/sync_service.py @@ -34,7 +34,7 @@ STORAGE_DIR_NAME, STORAGE_SAMPLES_DIR_NAME, ) -from ..errors import ConfigError, ErrorCode, KeboolaApiError +from ..errors import ConfigError, ErrorCode, KeboolaApiError, SyncConflictError from ..sync.code_extraction import extract_code_files, merge_code_files from ..sync.config_format import ( api_config_to_local, @@ -343,6 +343,111 @@ def _adopt_existing_manifest( # pull # ------------------------------------------------------------------ + def _is_conflict( + self, + config_file: Path, + old_pull_hash: str, + old_cfg_hash: str, + api_cfg_hash: str, + ) -> bool: + """True iff the file is locally modified AND the remote also changed. + + A 3-way conflict needs both a stored ``pull_hash`` (the synced file + state) and a stored ``pull_config_hash`` (the synced remote state); + without either we cannot prove a conflict, so return False -- be + conservative, ``--force`` must not abort on incomplete bookkeeping. + A missing local file is not a content conflict (nothing to lose). + """ + if not old_pull_hash or not old_cfg_hash: + return False + if not config_file.exists(): + return False + locally_modified = self._file_hash(config_file) != old_pull_hash + remote_changed = api_cfg_hash != old_cfg_hash + return locally_modified and remote_changed + + def _detect_force_pull_conflicts( + self, + components: list[dict[str, Any]], + branch_dir: Path, + *, + existing_keys: set[str], + existing_paths: dict[str, str], + existing_file_hashes: dict[str, str], + existing_config_hashes: dict[str, str], + existing_rows: dict[str, dict[str, str]], + ) -> list[dict[str, str]]: + """Return configs/rows a ``--force`` pull would clobber as conflicts. + + A *conflict* is a config (or row) that is BOTH locally modified (its + on-disk ``_config.yml`` hash differs from the manifest ``pull_hash``) + AND changed on the remote since the last pull (the freshly fetched + config hash differs from ``pull_config_hash``). That is the only case + where ``--force`` must stop: local and remote have diverged, so neither + "take remote" nor "keep local" is safe without the user deciding. + + Configs only locally modified (remote unchanged) are NOT conflicts -- + ``--force`` preserves them so their pending delta stays pushable. + Brand-new remote configs and configs whose local file is missing are + skipped (nothing local to lose). Read-only: hashes but writes nothing. + """ + conflicts: list[dict[str, str]] = [] + for component in components: + component_id = component.get("id", "") + if component_id in ALWAYS_IGNORED_COMPONENTS: + continue + for cfg in component.get("configurations", []): + config_id = str(cfg.get("id", "")) + lookup_key = f"{component_id}/{config_id}" + if lookup_key not in existing_keys: + continue # brand-new remote config -- nothing local to lose + + rel_path = existing_paths.get(lookup_key, "") + api_cfg_hash = config_hash(api_config_to_local(component_id, cfg, config_id)) + if self._is_conflict( + branch_dir / rel_path / CONFIG_FILENAME, + existing_file_hashes.get(lookup_key, ""), + existing_config_hashes.get(lookup_key, ""), + api_cfg_hash, + ): + conflicts.append( + { + "scope": "config", + "component_id": component_id, + "config_id": config_id, + "config_name": str(cfg.get("name", "untitled")), + "path": rel_path, + } + ) + + # Row-level conflicts (same 3-way rule, per row). + config_dir = branch_dir / rel_path + for row in cfg.get("rows", []): + row_id = str(row.get("id", "")) + existing_row = existing_rows.get(f"{component_id}/{config_id}/{row_id}") + if not existing_row: + continue + row_rel_path = existing_row.get("path", "") + if self._is_conflict( + config_dir / row_rel_path / CONFIG_FILENAME, + existing_row.get("pull_hash", ""), + existing_row.get("pull_config_hash", ""), + config_hash(api_row_to_local(row, component_id)), + ): + conflicts.append( + { + "scope": "row", + "component_id": component_id, + "config_id": config_id, + "config_name": ( + f"{cfg.get('name', 'untitled')}/{row.get('name', 'untitled')}" + ), + "path": f"{rel_path}/{row_rel_path}", + "row_id": row_id, + } + ) + return conflicts + def pull( self, alias: str, @@ -476,6 +581,29 @@ def pull( "pull_config_hash": r.metadata.get("pull_config_hash", ""), } + # Force-pull conflict guard (force-pull baseline corruption fix). + # ``--force`` bypasses the "preserve locally-modified files" guard + # below, so a config edited locally AND changed on the remote since the + # last pull (a true 3-way conflict) would be silently overwritten -- and + # when the remote is unchanged, its baseline would instead be re-stamped + # from the edited file, stranding the un-pushed edits. Detect such + # conflicts up front and abort BEFORE writing anything (the read-only + # API fetch above has already happened; nothing is on disk yet), so the + # user can resolve them. Non-force pull preserves locally-modified + # files and surfaces conflicts via ``sync diff``, so it needs no abort. + if force: + conflicts = self._detect_force_pull_conflicts( + components, + branch_dir, + existing_keys=existing_keys, + existing_paths=existing_paths, + existing_file_hashes=existing_file_hashes, + existing_config_hashes=existing_config_hashes, + existing_rows=existing_rows, + ) + if conflicts: + raise SyncConflictError(conflicts) + for component in components: component_id = component.get("id", "") if component_id in ALWAYS_IGNORED_COMPONENTS: @@ -555,11 +683,16 @@ def pull( api_cfg_hash = config_hash(local_data) pull_cfg_hash = api_cfg_hash - # Detect local modifications: if file hash differs from - # pull_hash stored in manifest, the user edited the file. - # Skip overwrite unless --force to avoid losing local work. + # Detect local modifications: if the file hash differs from the + # pull_hash stored in manifest, the user edited the file -- so + # preserve it instead of overwriting. This now runs even under + # ``--force``: a force-pull that reaches here for a modified file + # has already passed the conflict guard above (so the remote is + # unchanged), and re-stamping the baseline from the edited file + # would silently strand the un-pushed edits. Preserving keeps + # the pending delta visible to ``sync push``. locally_modified = False - if not is_new and not force: + if not is_new: old_file_hash = existing_file_hashes.get(lookup_key, "") if old_file_hash: config_file = config_dir / CONFIG_FILENAME @@ -676,8 +809,12 @@ def pull( row_file = row_dir / CONFIG_FILENAME old_row_file_hash = existing_row["pull_hash"] if existing_row else "" old_row_cfg_hash = existing_row["pull_config_hash"] if existing_row else "" + # Runs even under ``--force``: a force-pull reaching a + # modified row has passed the conflict guard (remote + # unchanged), so preserve the row rather than re-stamp its + # baseline from the edited file and strand the edits. row_locally_modified = False - if existing_row and not force and old_row_file_hash and row_file.exists(): + if existing_row and old_row_file_hash and row_file.exists(): row_locally_modified = self._file_hash(row_file) != old_row_file_hash if row_locally_modified: @@ -2343,6 +2480,17 @@ def _worker(alias: str) -> None: ) results[alias] = result success_count += 1 + except SyncConflictError as exc: + # Preserve the structured conflict so a programmatic / AI + # consumer of `--all-projects --json` can tell a merge conflict + # apart from any other error and read the conflicting configs -- + # the single-project path emits the same code + conflicts. + results[alias] = { + "error": exc.message, + "error_code": exc.error_code, + "conflicts": exc.conflicts, + } + failed_count += 1 except Exception as exc: results[alias] = {"error": str(exc)} failed_count += 1 diff --git a/tests/test_e2e.py b/tests/test_e2e.py index fe995b2..fca70ba 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -3964,6 +3964,115 @@ def test_sync_workflow(self) -> None: ) assert data["status"] == "ok" + def test_sync_force_pull_conflict_aware(self) -> None: + """`sync pull --force` is conflict-aware (0.53.0+), end-to-end. + + Locks both halves of the baseline-corruption fix against real Storage: + + * (b) local edited, remote UNCHANGED -> force-pull PRESERVES the edit + (does not silently re-stamp the baseline); ``sync diff`` afterward + still reports the config as modified. + * (a) local edited AND remote also changed -> force-pull ABORTS with + exit 1 and error code ``SYNC_CONFLICT``. + + Creates + cleans up a dedicated config so the test is idempotent. + """ + import yaml as _yaml + + from keboola_agent_cli.client import KeboolaClient + from keboola_agent_cli.constants import CONFIG_FILENAME + + cfg: dict = {} + try: + with KeboolaClient(stack_url=self.url, token=self.token) as api: + cfg = api.create_config( + component_id=TEST_COMPONENT_ID, + name=f"{RUN_ID}-forcepull", + description="E2E force-pull conflict fixture", + configuration={"parameters": {"db": {"host": "orig.example.com"}}}, + ) + cfg_id = str(cfg["id"]) + + # --- init + pull, locate the config's _config.yml --- + _step("7a", "sync init + pull (force-pull fixture)") + self._run_ok( + "sync", "init", "--project", self.alias, "--directory", str(self.project_dir) + ) + self._run_ok( + "sync", "pull", "--project", self.alias, "--directory", str(self.project_dir) + ) + matches = [ + p + for p in self.project_dir.rglob(CONFIG_FILENAME) + if "rows" not in p.relative_to(self.project_dir).parts + and str( + _yaml.safe_load(p.read_text(encoding="utf-8")) + .get("_keboola", {}) + .get("config_id") + ) + == cfg_id + ] + assert len(matches) == 1, f"config YAML not found after pull: {matches}" + config_file = matches[0] + + # --- edit locally --- + local = _yaml.safe_load(config_file.read_text(encoding="utf-8")) + local.setdefault("parameters", {})["_e2e_marker"] = "x" + config_file.write_text(_yaml.dump(local, default_flow_style=False), encoding="utf-8") + + # --- (b) force-pull, remote UNCHANGED -> edit preserved --- + _step("7b", "force-pull preserves edit when remote unchanged") + self._run_ok( + "sync", + "pull", + "--project", + self.alias, + "--directory", + str(self.project_dir), + "--force", + ) + diff_after = self._run_ok( + "sync", "diff", "--project", self.alias, "--directory", str(self.project_dir) + ) + modified = [c for c in diff_after["data"]["changes"] if c["change_type"] == "modified"] + assert any(c["config_id"] == cfg_id for c in modified), ( + f"force-pull stranded the un-pushed edit: {diff_after['data']['summary']}" + ) + + # --- (a) mutate remote, force-pull -> SYNC_CONFLICT abort --- + _step("7c", "force-pull aborts on a true conflict") + with KeboolaClient(stack_url=self.url, token=self.token) as api: + api.update_config( + component_id=TEST_COMPONENT_ID, + config_id=cfg_id, + configuration={"parameters": {"db": {"host": "remote-moved.example.com"}}}, + change_description="e2e force-pull conflict", + ) + conflict_result = self._run( + "sync", + "pull", + "--project", + self.alias, + "--directory", + str(self.project_dir), + "--force", + ) + assert conflict_result.exit_code == 1, ( + f"expected exit 1 on conflict, got {conflict_result.exit_code}" + ) + envelope = json.loads(conflict_result.output) + assert envelope["status"] == "error" + assert envelope["error"]["code"] == "SYNC_CONFLICT" + assert any(c["config_id"] == cfg_id for c in envelope["error"]["details"]["conflicts"]) + finally: + cfg_id = cfg.get("id") if cfg else None + if cfg_id: + try: + with KeboolaClient(stack_url=self.url, token=self.token) as api: + api.delete_config(component_id=TEST_COMPONENT_ID, config_id=cfg_id) + except Exception as exc: + print(f" [cleanup] Failed to delete {TEST_COMPONENT_ID}/{cfg_id}: {exc}") + def test_sync_push_variable_row_round_trip(self) -> None: """PR1 P0-1 acceptance: edit a keboola.variables values row, push, pull back. diff --git a/tests/test_sync_cli.py b/tests/test_sync_cli.py index 0992b9b..0b1c0fd 100644 --- a/tests/test_sync_cli.py +++ b/tests/test_sync_cli.py @@ -14,7 +14,7 @@ from keboola_agent_cli.cli import app from keboola_agent_cli.config_store import ConfigStore -from keboola_agent_cli.errors import ConfigError, KeboolaApiError +from keboola_agent_cli.errors import ConfigError, KeboolaApiError, SyncConflictError from keboola_agent_cli.models import ProjectConfig from keboola_agent_cli.services.project_service import ProjectService @@ -467,6 +467,85 @@ def test_sync_pull_api_error(self, tmp_path: Path) -> None: assert result.exit_code == 3 # auth error + def _conflict_error(self) -> SyncConflictError: + return SyncConflictError( + [ + { + "scope": "config", + "component_id": "keboola.ex-http", + "config_id": "cfg-001", + "config_name": "My HTTP Extractor", + "path": "extractor/keboola.ex-http/my-http-extractor", + } + ] + ) + + def test_sync_pull_force_conflict_human(self, tmp_path: Path) -> None: + """sync pull --force aborts with exit 1 and lists the conflict (human).""" + config_dir = tmp_path / "config" + config_dir.mkdir() + store = _setup_config(config_dir, {"prod": {"token": TEST_TOKEN}}) + + mock_sync = _make_sync_service_mock() + mock_sync.pull.side_effect = self._conflict_error() + + with ( + patch("keboola_agent_cli.cli.ConfigStore") as MockStore, + patch("keboola_agent_cli.cli.ProjectService") as MockProjService, + patch("keboola_agent_cli.cli.SyncService") as MockSyncService, + ): + MockStore.return_value = store + MockProjService.return_value = ProjectService(config_store=store) + MockSyncService.return_value = mock_sync + + result = runner.invoke( + app, + ["sync", "pull", "--project", "prod", "--force", "--directory", str(tmp_path)], + ) + + assert result.exit_code == 1 + out = _strip_ansi(result.output) + assert "conflict" in out.lower() + assert "keboola.ex-http/cfg-001" in out + + def test_sync_pull_force_conflict_json(self, tmp_path: Path) -> None: + """sync pull --force conflict emits a SYNC_CONFLICT error envelope (JSON).""" + config_dir = tmp_path / "config" + config_dir.mkdir() + store = _setup_config(config_dir, {"prod": {"token": TEST_TOKEN}}) + + mock_sync = _make_sync_service_mock() + mock_sync.pull.side_effect = self._conflict_error() + + with ( + patch("keboola_agent_cli.cli.ConfigStore") as MockStore, + patch("keboola_agent_cli.cli.ProjectService") as MockProjService, + patch("keboola_agent_cli.cli.SyncService") as MockSyncService, + ): + MockStore.return_value = store + MockProjService.return_value = ProjectService(config_store=store) + MockSyncService.return_value = mock_sync + + result = runner.invoke( + app, + [ + "--json", + "sync", + "pull", + "--project", + "prod", + "--force", + "--directory", + str(tmp_path), + ], + ) + + assert result.exit_code == 1 + payload = json.loads(result.output) + assert payload["status"] == "error" + assert payload["error"]["code"] == "SYNC_CONFLICT" + assert payload["error"]["details"]["conflicts"][0]["config_id"] == "cfg-001" + # =================================================================== # sync status CLI tests diff --git a/tests/test_sync_force_pull_baseline.py b/tests/test_sync_force_pull_baseline.py new file mode 100644 index 0000000..a7b2f94 --- /dev/null +++ b/tests/test_sync_force_pull_baseline.py @@ -0,0 +1,271 @@ +"""Regression tests for the `sync pull --force` baseline-corruption bug. + +Field report (kbagent v0.51.1, project 5785): a user has un-pushed local edits +to config A, then runs ``sync pull --force`` (typically to resolve an +*unrelated* config B's conflict). ``--force`` bypassed the +``locally_modified`` guard in ``SyncService.pull()``, so when config A's remote +was unchanged the ``remote_unchanged`` short-circuit re-stamped A's manifest +``pull_hash`` from the *edited on-disk file*. Afterwards ``sync diff`` / +``sync push`` believed A was in sync and silently shipped nothing -- the local +edits were stranded while the remote still held the old config. + +The fix splits ``--force`` behaviour by 3-way diff state: + +* (b) local edited, remote UNCHANGED -> preserve the file AND the 3-way base, + so the pending delta stays visible to ``sync push`` (no data loss). +* (a) local edited, remote ALSO changed (true merge conflict) -> abort the pull + with ``SyncConflictError`` before writing anything; the user resolves. + +These tests pin both halves, at config and row granularity. +""" + +from pathlib import Path +from unittest.mock import MagicMock + +import pytest +import yaml + +from helpers import setup_single_project +from keboola_agent_cli.constants import CONFIG_FILENAME +from keboola_agent_cli.errors import SyncConflictError +from keboola_agent_cli.models import TokenVerifyResponse +from keboola_agent_cli.services.sync_service import SyncService + +SAMPLE_VERIFY_TOKEN = TokenVerifyResponse( + token_id="tok-001", + token_description="kbagent-cli", + project_id=258, + project_name="Production", + owner_name="My Org", +) + +SAMPLE_BRANCHES = [ + {"id": 12345, "name": "Main", "isDefault": True}, +] + + +def _http_extractor(base_url: str, rows: list | None = None) -> list: + """Single keboola.ex-http config carrying ``base_url`` (+ optional rows).""" + return [ + { + "id": "keboola.ex-http", + "type": "extractor", + "configurations": [ + { + "id": "cfg-001", + "name": "My HTTP Extractor", + "description": "Fetches data", + "configuration": {"parameters": {"baseUrl": base_url}}, + "rows": rows if rows is not None else [], + } + ], + }, + ] + + +def _row(path: str) -> dict: + return { + "id": "row-001", + "name": "Users Endpoint", + "description": "", + "configuration": {"parameters": {"path": path}}, + } + + +# Remote states used across the tests. +REMOTE_V1 = _http_extractor("https://api.example.com") +REMOTE_V2 = _http_extractor("https://api-v2.example.com") # remote moved on +REMOTE_V1_WITH_ROW = _http_extractor("https://api.example.com", rows=[_row("/users")]) +REMOTE_V2_WITH_ROW = _http_extractor("https://api.example.com", rows=[_row("/people")]) + + +def _make_mock_client( + verify_token_response: TokenVerifyResponse | None = None, + components_response: list | None = None, + branches_response: list | None = None, +) -> MagicMock: + client = MagicMock() + client.__enter__ = MagicMock(return_value=client) + client.__exit__ = MagicMock(return_value=False) + if verify_token_response: + client.verify_token.return_value = verify_token_response + if components_response is not None: + client.list_components_with_configs.return_value = components_response + if branches_response is not None: + client.list_dev_branches.return_value = branches_response + return client + + +def _svc(store, components: list | None = None) -> SyncService: + return SyncService( + config_store=store, + client_factory=lambda url, token: _make_mock_client( + verify_token_response=SAMPLE_VERIFY_TOKEN, + components_response=components, + branches_response=SAMPLE_BRANCHES, + ), + ) + + +def _init_and_pull(tmp_config_dir: Path, project_root: Path, remote: list) -> object: + """init + first pull, returning the ConfigStore.""" + store = setup_single_project(tmp_config_dir) + _svc(store).init_sync(alias="prod", project_root=project_root) + _svc(store, remote).pull(alias="prod", project_root=project_root) + return store + + +def _config_yml(project_root: Path, under_rows: bool) -> Path: + """Locate the config or row _config.yml under the pulled tree.""" + files = list(project_root.rglob(CONFIG_FILENAME)) + matches = [f for f in files if ("rows" in f.parts) == under_rows] + assert len(matches) == 1, f"expected exactly one {'row' if under_rows else 'config'} file" + return matches[0] + + +def _edit_param(config_file: Path, key: str, value: str) -> None: + data = yaml.safe_load(config_file.read_text(encoding="utf-8")) + data["parameters"][key] = value + config_file.write_text(yaml.dump(data, default_flow_style=False), encoding="utf-8") + + +# =================================================================== +# Config-level +# =================================================================== + + +def test_force_pull_preserves_unpushed_local_edits(tmp_config_dir: Path, tmp_path: Path) -> None: + """(b) force-pull, remote unchanged -> local edit preserved, still pushable.""" + project_root = tmp_path / "project" + project_root.mkdir() + store = _init_and_pull(tmp_config_dir, project_root, REMOTE_V1) + + config_file = _config_yml(project_root, under_rows=False) + _edit_param(config_file, "baseUrl", "https://changed.example.com") + + # Healthy before the force-pull. + assert _svc(store, REMOTE_V1).diff("prod", project_root)["summary"]["modified"] == 1 + + # Force-pull with the SAME remote (run to adopt some *other* config's state). + _svc(store, REMOTE_V1).pull("prod", project_root, force=True) + + # The on-disk file still holds the edit (force did not revert it). + after = yaml.safe_load(config_file.read_text(encoding="utf-8")) + assert after["parameters"]["baseUrl"] == "https://changed.example.com" + + # And the pending delta is STILL detected -- the bug stranded it silently. + diff_after = _svc(store, REMOTE_V1).diff("prod", project_root) + assert diff_after["summary"]["modified"] == 1, ( + "force-pull (remote unchanged) silently dropped the un-pushed local edit" + ) + assert diff_after["changes"][0]["change_type"] == "modified" + + +def test_force_pull_aborts_on_true_conflict(tmp_config_dir: Path, tmp_path: Path) -> None: + """(a) force-pull, remote ALSO changed -> abort with SyncConflictError.""" + project_root = tmp_path / "project" + project_root.mkdir() + store = _init_and_pull(tmp_config_dir, project_root, REMOTE_V1) + + config_file = _config_yml(project_root, under_rows=False) + _edit_param(config_file, "baseUrl", "https://changed.example.com") + + with pytest.raises(SyncConflictError) as excinfo: + _svc(store, REMOTE_V2).pull("prod", project_root, force=True) + + conflicts = excinfo.value.conflicts + assert len(conflicts) == 1 + assert conflicts[0]["scope"] == "config" + assert conflicts[0]["component_id"] == "keboola.ex-http" + assert conflicts[0]["config_id"] == "cfg-001" + + # Abort left the local edit intact (nothing was written). + after = yaml.safe_load(config_file.read_text(encoding="utf-8")) + assert after["parameters"]["baseUrl"] == "https://changed.example.com" + + # The conflict remains visible to diff (base preserved, not corrupted). + diff_after = _svc(store, REMOTE_V2).diff("prod", project_root) + assert diff_after["summary"].get("conflict", 0) == 1 + + +def test_force_pull_no_conflict_when_only_remote_changed( + tmp_config_dir: Path, tmp_path: Path +) -> None: + """Remote changed but local untouched is NOT a conflict -- force takes remote.""" + project_root = tmp_path / "project" + project_root.mkdir() + store = _init_and_pull(tmp_config_dir, project_root, REMOTE_V1) + + # No local edit; remote moved to V2. + _svc(store, REMOTE_V2).pull("prod", project_root, force=True) + + config_file = _config_yml(project_root, under_rows=False) + after = yaml.safe_load(config_file.read_text(encoding="utf-8")) + assert after["parameters"]["baseUrl"] == "https://api-v2.example.com" + + +# =================================================================== +# Row-level (same 3-way rule per row) +# =================================================================== + + +def test_force_pull_preserves_unpushed_row_edits(tmp_config_dir: Path, tmp_path: Path) -> None: + """(b) row edited, remote unchanged -> row preserved, still pushable.""" + project_root = tmp_path / "project" + project_root.mkdir() + store = _init_and_pull(tmp_config_dir, project_root, REMOTE_V1_WITH_ROW) + + row_file = _config_yml(project_root, under_rows=True) + _edit_param(row_file, "path", "/changed") + + _svc(store, REMOTE_V1_WITH_ROW).pull("prod", project_root, force=True) + + after = yaml.safe_load(row_file.read_text(encoding="utf-8")) + assert after["parameters"]["path"] == "/changed" + + diff_after = _svc(store, REMOTE_V1_WITH_ROW).diff("prod", project_root) + assert diff_after["summary"]["modified"] == 1, "force-pull stranded the un-pushed row edit" + + +def test_force_pull_aborts_on_row_conflict(tmp_config_dir: Path, tmp_path: Path) -> None: + """(a) row edited AND remote row changed -> abort with SyncConflictError.""" + project_root = tmp_path / "project" + project_root.mkdir() + store = _init_and_pull(tmp_config_dir, project_root, REMOTE_V1_WITH_ROW) + + row_file = _config_yml(project_root, under_rows=True) + _edit_param(row_file, "path", "/changed") + + with pytest.raises(SyncConflictError) as excinfo: + _svc(store, REMOTE_V2_WITH_ROW).pull("prod", project_root, force=True) + + scopes = {c["scope"] for c in excinfo.value.conflicts} + assert "row" in scopes + + +# =================================================================== +# --all-projects keeps the conflict structured (not a flat string) +# =================================================================== + + +def test_force_pull_all_projects_emits_structured_conflict( + tmp_config_dir: Path, tmp_path: Path +) -> None: + """`pull_all` must surface SYNC_CONFLICT + conflicts, not just `str(exc)`.""" + base_dir = tmp_path / "base" + project_root = base_dir / "prod" + project_root.mkdir(parents=True) + store = _init_and_pull(tmp_config_dir, project_root, REMOTE_V1) + + config_file = _config_yml(project_root, under_rows=False) + _edit_param(config_file, "baseUrl", "https://changed.example.com") + + # Remote moved on -> conflict. pull_all catches it per-project; assert it + # keeps the structured payload a JSON consumer needs (not a flat string). + result = _svc(store, REMOTE_V2).pull_all(base_dir, force=True) + proj = result["projects"]["prod"] + assert proj.get("error_code") == "SYNC_CONFLICT" + assert proj.get("conflicts"), "conflicts list must be present on the error entry" + assert proj["conflicts"][0]["config_id"] == "cfg-001" + assert result["summary"]["failed"] == 1 + assert result["summary"]["success"] == 0 diff --git a/uv.lock b/uv.lock index 25b9d2f..d9a3575 100644 --- a/uv.lock +++ b/uv.lock @@ -496,7 +496,7 @@ wheels = [ [[package]] name = "keboola-agent-cli" -version = "0.52.1" +version = "0.53.0" source = { editable = "." } dependencies = [ { name = "croniter" },