From 4611ff0f08661381a191bf0f5b41bcb095f870ab Mon Sep 17 00:00:00 2001 From: Pablo Pardo Garcia Date: Fri, 5 Sep 2025 21:34:07 +0200 Subject: [PATCH 1/5] fix Quick Start docs --- README.md | 198 ++++++++++++++++++++++++++++++++++--------------- pyproject.toml | 2 +- 2 files changed, 139 insertions(+), 61 deletions(-) diff --git a/README.md b/README.md index d4103ee..8369eb4 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ - + @@ -36,72 +36,148 @@ pip install glassflow ## Quick Start +### Initialize client + ```python -from glassflow.etl import Pipeline +from glassflow.etl import Client + +# Initialize GlassFlow client +client = Client(host="your-glassflow-etl-url") +``` +### Create a pipeline +```python pipeline_config = { - "pipeline_id": "test-pipeline", - "source": { - "type": "kafka", - "provider": "aiven", - "connection_params": { - "brokers": ["localhoust:9092"], - "protocol": "SASL_SSL", - "mechanism": "SCRAM-SHA-256", - "username": "user", - "password": "pass" - } - "topics": [ - { - "consumer_group_initial_offset": "earliest", - "id": "test-topic", - "name": "test-topic", - "schema": { - "type": "json", - "fields": [ - {"name": "id", "type": "string" }, - {"name": "email", "type": "string"} - ] + "pipeline_id": "my-pipeline-id", + "source": { + "type": "kafka", + "connection_params": { + "brokers": [ + "http://my.kafka.broker:9093" + ], + "protocol": "PLAINTEXT", + "skip_auth": True + }, + "topics": [ + { + "consumer_group_initial_offset": "latest", + "name": "users", + "schema": { + "type": "json", + "fields": [ + { + "name": "event_id", + "type": "string" + }, + { + "name": "user_id", + "type": "string" + }, + { + "name": "name", + "type": "string" + }, + { + "name": "email", + "type": "string" + }, + { + "name": "created_at", + "type": "string" + } + ] + }, + "deduplication": { + "enabled": True, + "id_field": "event_id", + "id_field_type": "string", + "time_window": "1h" + } + } + ] + }, + "join": { + "enabled": False + }, + "sink": { + "type": "clickhouse", + "host": "http://my.clickhouse.server", + "port": "9000", + "database": "default", + "username": "default", + "password": "c2VjcmV0", + "secure": False, + "max_batch_size": 1000, + "max_delay_time": "30s", + "table": "users_dedup", + "table_mapping": [ + { + "source_id": "users", + "field_name": "event_id", + "column_name": "event_id", + "column_type": "UUID" }, - "deduplication": { - "id_field": "id", - "id_field_type": "string", - "time_window": "1h", - "enabled": True + { + "source_id": "users", + "field_name": "user_id", + "column_name": "user_id", + "column_type": "UUID" + }, + { + "source_id": "users", + "field_name": "created_at", + "column_name": "created_at", + "column_type": "DateTime" + }, + { + "source_id": "users", + "field_name": "name", + "column_name": "name", + "column_type": "String" + }, + { + "source_id": "users", + "field_name": "email", + "column_name": "email", + "column_type": "String" } - } - ], - }, - "sink": { - "type": "clickhouse", - "host": "localhost:8443", - "port": 8443, - "database": "test", - "username": "default", - "password": "pass", - "table_mapping": [ - { - "source_id": "test_table", - "field_name": "id", - "column_name": "user_id", - "column_type": "UUID" - }, - { - "source_id": "test_table", - "field_name": "email", - "column_name": "email", - "column_type": "String" - } - ] - } + ] + } } -# Create a pipeline from a JSON configuration -pipeline = Pipeline(pipeline_config) +# Create a pipeline +pipeline = client.create_pipeline(pipeline_config) +``` + + +## Get pipeline -# Create the pipeline -pipeline.create() +```python +# Get a pipeline by ID +pipeline = client.get_pipeline("my-pipeline-id") +``` + +### List pipelines + +```python +pipelines = client.list_pipelines() +for pipeline in pipelines: + print(f"Pipeline ID: {pipeline['pipeline_id']}") + print(f"Name: {pipeline['name']}") + print(f"Transformation Type: {pipeline['transformation_type']}") + print(f"Created At: {pipeline['created_at']}") + print(f"State: {pipeline['state']}") +``` + +### Delete pipeline + +```python +# Delete a pipeline +client.delete_pipeline("my-pipeline-id") + +# Or delete via pipeline instance +pipeline.delete() ``` ## Pipeline Configuration @@ -119,8 +195,10 @@ export GF_TRACKING_ENABLED=false 2. Programmatically using the `disable_tracking` method: ```python -pipeline = Pipeline(pipeline_config) -pipeline.disable_tracking() +from glassflow.etl import Client + +client = Client(host="my-glassflow-host") +client.disable_tracking() ``` The tracking collects anonymous information about: diff --git a/pyproject.toml b/pyproject.toml index 7b6d74f..a15624d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "glassflow" dynamic = ["version"] -description = "GlassFlow Clickhouse ETL Python SDK: Create GlassFlow pipelines between Kafka and ClickHouse" +description = "GlassFlow Python SDK: Create GlassFlow pipelines between Kafka and ClickHouse" authors = [ {name = "GlassFlow", email = "hello@glassflow.dev"} ] From 8348f8a6d1d421e3842649ee9110670a86790015 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 5 Sep 2025 19:35:05 +0000 Subject: [PATCH 2/5] chore: bump version to 3.0.1 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 4a36342..cb2b00e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.0 +3.0.1 From ca2b13992e4d66e0010c284cf7115d651aeb7bbc Mon Sep 17 00:00:00 2001 From: Pablo Pardo Garcia Date: Fri, 5 Sep 2025 22:03:11 +0200 Subject: [PATCH 3/5] roll back version bump --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index cb2b00e..4a36342 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.1 +3.0.0 From ef4a9a49e97ba43f9aa33910595b2ae0823ffd9e Mon Sep 17 00:00:00 2001 From: Pablo Pardo Garcia Date: Fri, 5 Sep 2025 22:03:49 +0200 Subject: [PATCH 4/5] only bump version with bump labels --- .github/workflows/version-bump.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml index 2bb5a09..c18f4a8 100644 --- a/.github/workflows/version-bump.yml +++ b/.github/workflows/version-bump.yml @@ -9,6 +9,10 @@ on: jobs: determine-bump: runs-on: ubuntu-latest + if: | + contains(github.event.pull_request.labels.*.name, 'bump:major') || + contains(github.event.pull_request.labels.*.name, 'bump:minor') || + contains(github.event.pull_request.labels.*.name, 'bump:patch') outputs: bump_type: ${{ steps.bump_type.outputs.bump_type }} steps: @@ -31,9 +35,9 @@ jobs: needs: determine-bump runs-on: ubuntu-latest if: | - ${{ github.event.label.name == 'bump:major' }} || - ${{ github.event.label.name == 'bump:minor' }} || - ${{ github.event.label.name == 'bump:patch' }} + contains(github.event.pull_request.labels.*.name, 'bump:major') || + contains(github.event.pull_request.labels.*.name, 'bump:minor') || + contains(github.event.pull_request.labels.*.name, 'bump:patch') steps: - name: Checkout repository From 08ca0a1201f6f5a3ec9ecb8bb1d8a019363ac6d2 Mon Sep 17 00:00:00 2001 From: Pablo Pardo Garcia Date: Fri, 5 Sep 2025 22:07:00 +0200 Subject: [PATCH 5/5] change readme title --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8369eb4..5580c06 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# GlassFlow ETL Python SDK +# GlassFlow Python SDK