diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml index 2bb5a09..c18f4a8 100644 --- a/.github/workflows/version-bump.yml +++ b/.github/workflows/version-bump.yml @@ -9,6 +9,10 @@ on: jobs: determine-bump: runs-on: ubuntu-latest + if: | + contains(github.event.pull_request.labels.*.name, 'bump:major') || + contains(github.event.pull_request.labels.*.name, 'bump:minor') || + contains(github.event.pull_request.labels.*.name, 'bump:patch') outputs: bump_type: ${{ steps.bump_type.outputs.bump_type }} steps: @@ -31,9 +35,9 @@ jobs: needs: determine-bump runs-on: ubuntu-latest if: | - ${{ github.event.label.name == 'bump:major' }} || - ${{ github.event.label.name == 'bump:minor' }} || - ${{ github.event.label.name == 'bump:patch' }} + contains(github.event.pull_request.labels.*.name, 'bump:major') || + contains(github.event.pull_request.labels.*.name, 'bump:minor') || + contains(github.event.pull_request.labels.*.name, 'bump:patch') steps: - name: Checkout repository diff --git a/README.md b/README.md index d4103ee..5580c06 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -# GlassFlow ETL Python SDK +# GlassFlow Python SDK
-
+
@@ -36,72 +36,148 @@ pip install glassflow
## Quick Start
+### Initialize client
+
```python
-from glassflow.etl import Pipeline
+from glassflow.etl import Client
+
+# Initialize GlassFlow client
+client = Client(host="your-glassflow-etl-url")
+```
+### Create a pipeline
+```python
pipeline_config = {
- "pipeline_id": "test-pipeline",
- "source": {
- "type": "kafka",
- "provider": "aiven",
- "connection_params": {
- "brokers": ["localhoust:9092"],
- "protocol": "SASL_SSL",
- "mechanism": "SCRAM-SHA-256",
- "username": "user",
- "password": "pass"
- }
- "topics": [
- {
- "consumer_group_initial_offset": "earliest",
- "id": "test-topic",
- "name": "test-topic",
- "schema": {
- "type": "json",
- "fields": [
- {"name": "id", "type": "string" },
- {"name": "email", "type": "string"}
- ]
+ "pipeline_id": "my-pipeline-id",
+ "source": {
+ "type": "kafka",
+ "connection_params": {
+ "brokers": [
+ "http://my.kafka.broker:9093"
+ ],
+ "protocol": "PLAINTEXT",
+ "skip_auth": True
+ },
+ "topics": [
+ {
+ "consumer_group_initial_offset": "latest",
+ "name": "users",
+ "schema": {
+ "type": "json",
+ "fields": [
+ {
+ "name": "event_id",
+ "type": "string"
+ },
+ {
+ "name": "user_id",
+ "type": "string"
+ },
+ {
+ "name": "name",
+ "type": "string"
+ },
+ {
+ "name": "email",
+ "type": "string"
+ },
+ {
+ "name": "created_at",
+ "type": "string"
+ }
+ ]
+ },
+ "deduplication": {
+ "enabled": True,
+ "id_field": "event_id",
+ "id_field_type": "string",
+ "time_window": "1h"
+ }
+ }
+ ]
+ },
+ "join": {
+ "enabled": False
+ },
+ "sink": {
+ "type": "clickhouse",
+ "host": "http://my.clickhouse.server",
+ "port": "9000",
+ "database": "default",
+ "username": "default",
+ "password": "c2VjcmV0",
+ "secure": False,
+ "max_batch_size": 1000,
+ "max_delay_time": "30s",
+ "table": "users_dedup",
+ "table_mapping": [
+ {
+ "source_id": "users",
+ "field_name": "event_id",
+ "column_name": "event_id",
+ "column_type": "UUID"
},
- "deduplication": {
- "id_field": "id",
- "id_field_type": "string",
- "time_window": "1h",
- "enabled": True
+ {
+ "source_id": "users",
+ "field_name": "user_id",
+ "column_name": "user_id",
+ "column_type": "UUID"
+ },
+ {
+ "source_id": "users",
+ "field_name": "created_at",
+ "column_name": "created_at",
+ "column_type": "DateTime"
+ },
+ {
+ "source_id": "users",
+ "field_name": "name",
+ "column_name": "name",
+ "column_type": "String"
+ },
+ {
+ "source_id": "users",
+ "field_name": "email",
+ "column_name": "email",
+ "column_type": "String"
}
- }
- ],
- },
- "sink": {
- "type": "clickhouse",
- "host": "localhost:8443",
- "port": 8443,
- "database": "test",
- "username": "default",
- "password": "pass",
- "table_mapping": [
- {
- "source_id": "test_table",
- "field_name": "id",
- "column_name": "user_id",
- "column_type": "UUID"
- },
- {
- "source_id": "test_table",
- "field_name": "email",
- "column_name": "email",
- "column_type": "String"
- }
- ]
- }
+ ]
+ }
}
-# Create a pipeline from a JSON configuration
-pipeline = Pipeline(pipeline_config)
+# Create a pipeline
+pipeline = client.create_pipeline(pipeline_config)
+```
+
+
+## Get pipeline
-# Create the pipeline
-pipeline.create()
+```python
+# Get a pipeline by ID
+pipeline = client.get_pipeline("my-pipeline-id")
+```
+
+### List pipelines
+
+```python
+pipelines = client.list_pipelines()
+for pipeline in pipelines:
+ print(f"Pipeline ID: {pipeline['pipeline_id']}")
+ print(f"Name: {pipeline['name']}")
+ print(f"Transformation Type: {pipeline['transformation_type']}")
+ print(f"Created At: {pipeline['created_at']}")
+ print(f"State: {pipeline['state']}")
+```
+
+### Delete pipeline
+
+```python
+# Delete a pipeline
+client.delete_pipeline("my-pipeline-id")
+
+# Or delete via pipeline instance
+pipeline.delete()
```
## Pipeline Configuration
@@ -119,8 +195,10 @@ export GF_TRACKING_ENABLED=false
2. Programmatically using the `disable_tracking` method:
```python
-pipeline = Pipeline(pipeline_config)
-pipeline.disable_tracking()
+from glassflow.etl import Client
+
+client = Client(host="my-glassflow-host")
+client.disable_tracking()
```
The tracking collects anonymous information about:
diff --git a/pyproject.toml b/pyproject.toml
index 7b6d74f..a15624d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
[project]
name = "glassflow"
dynamic = ["version"]
-description = "GlassFlow Clickhouse ETL Python SDK: Create GlassFlow pipelines between Kafka and ClickHouse"
+description = "GlassFlow Python SDK: Create GlassFlow pipelines between Kafka and ClickHouse"
authors = [
{name = "GlassFlow", email = "hello@glassflow.dev"}
]