Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions .github/workflows/version-bump.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ on:
jobs:
determine-bump:
runs-on: ubuntu-latest
if: |
contains(github.event.pull_request.labels.*.name, 'bump:major') ||
contains(github.event.pull_request.labels.*.name, 'bump:minor') ||
contains(github.event.pull_request.labels.*.name, 'bump:patch')
outputs:
bump_type: ${{ steps.bump_type.outputs.bump_type }}
steps:
Expand All @@ -31,9 +35,9 @@ jobs:
needs: determine-bump
runs-on: ubuntu-latest
if: |
${{ github.event.label.name == 'bump:major' }} ||
${{ github.event.label.name == 'bump:minor' }} ||
${{ github.event.label.name == 'bump:patch' }}
contains(github.event.pull_request.labels.*.name, 'bump:major') ||
contains(github.event.pull_request.labels.*.name, 'bump:minor') ||
contains(github.event.pull_request.labels.*.name, 'bump:patch')

steps:
- name: Checkout repository
Expand Down
200 changes: 139 additions & 61 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# GlassFlow ETL Python SDK
# GlassFlow Python SDK

<p align="left">
<a target="_blank" href="https://pypi.python.org/pypi/glassflow">
<img src="https://img.shields.io/pypi/v/glassflow.svg?labelColor=&color=e69e3a">
</a>
<a target="_blank" href="https://github.com/glassflow/glassflow-python-sdk/blob/main/LICENSE">
<a target="_blank" href="https://github.com/glassflow/glassflow-python-sdk/blob/main/LICENSE.md">
<img src="https://img.shields.io/pypi/l/glassflow.svg?labelColor=&color=e69e3a">
</a>
<a target="_blank" href="https://pypi.python.org/pypi/glassflow">
Expand Down Expand Up @@ -36,72 +36,148 @@ pip install glassflow

## Quick Start

### Initialize client

```python
from glassflow.etl import Pipeline
from glassflow.etl import Client

# Initialize GlassFlow client
client = Client(host="your-glassflow-etl-url")
```

### Create a pipeline

```python
pipeline_config = {
"pipeline_id": "test-pipeline",
"source": {
"type": "kafka",
"provider": "aiven",
"connection_params": {
"brokers": ["localhoust:9092"],
"protocol": "SASL_SSL",
"mechanism": "SCRAM-SHA-256",
"username": "user",
"password": "pass"
}
"topics": [
{
"consumer_group_initial_offset": "earliest",
"id": "test-topic",
"name": "test-topic",
"schema": {
"type": "json",
"fields": [
{"name": "id", "type": "string" },
{"name": "email", "type": "string"}
]
"pipeline_id": "my-pipeline-id",
"source": {
"type": "kafka",
"connection_params": {
"brokers": [
"http://my.kafka.broker:9093"
],
"protocol": "PLAINTEXT",
"skip_auth": True
},
"topics": [
{
"consumer_group_initial_offset": "latest",
"name": "users",
"schema": {
"type": "json",
"fields": [
{
"name": "event_id",
"type": "string"
},
{
"name": "user_id",
"type": "string"
},
{
"name": "name",
"type": "string"
},
{
"name": "email",
"type": "string"
},
{
"name": "created_at",
"type": "string"
}
]
},
"deduplication": {
"enabled": True,
"id_field": "event_id",
"id_field_type": "string",
"time_window": "1h"
}
}
]
},
"join": {
"enabled": False
},
"sink": {
"type": "clickhouse",
"host": "http://my.clickhouse.server",
"port": "9000",
"database": "default",
"username": "default",
"password": "c2VjcmV0",
"secure": False,
"max_batch_size": 1000,
"max_delay_time": "30s",
"table": "users_dedup",
"table_mapping": [
{
"source_id": "users",
"field_name": "event_id",
"column_name": "event_id",
"column_type": "UUID"
},
"deduplication": {
"id_field": "id",
"id_field_type": "string",
"time_window": "1h",
"enabled": True
{
"source_id": "users",
"field_name": "user_id",
"column_name": "user_id",
"column_type": "UUID"
},
{
"source_id": "users",
"field_name": "created_at",
"column_name": "created_at",
"column_type": "DateTime"
},
{
"source_id": "users",
"field_name": "name",
"column_name": "name",
"column_type": "String"
},
{
"source_id": "users",
"field_name": "email",
"column_name": "email",
"column_type": "String"
}
}
],
},
"sink": {
"type": "clickhouse",
"host": "localhost:8443",
"port": 8443,
"database": "test",
"username": "default",
"password": "pass",
"table_mapping": [
{
"source_id": "test_table",
"field_name": "id",
"column_name": "user_id",
"column_type": "UUID"
},
{
"source_id": "test_table",
"field_name": "email",
"column_name": "email",
"column_type": "String"
}
]
}
]
}
}

# Create a pipeline from a JSON configuration
pipeline = Pipeline(pipeline_config)
# Create a pipeline
pipeline = client.create_pipeline(pipeline_config)
```


## Get pipeline

# Create the pipeline
pipeline.create()
```python
# Get a pipeline by ID
pipeline = client.get_pipeline("my-pipeline-id")
```

### List pipelines

```python
pipelines = client.list_pipelines()
for pipeline in pipelines:
print(f"Pipeline ID: {pipeline['pipeline_id']}")
print(f"Name: {pipeline['name']}")
print(f"Transformation Type: {pipeline['transformation_type']}")
print(f"Created At: {pipeline['created_at']}")
print(f"State: {pipeline['state']}")
```

### Delete pipeline

```python
# Delete a pipeline
client.delete_pipeline("my-pipeline-id")

# Or delete via pipeline instance
pipeline.delete()
```

## Pipeline Configuration
Expand All @@ -119,8 +195,10 @@ export GF_TRACKING_ENABLED=false

2. Programmatically using the `disable_tracking` method:
```python
pipeline = Pipeline(pipeline_config)
pipeline.disable_tracking()
from glassflow.etl import Client

client = Client(host="my-glassflow-host")
client.disable_tracking()
```

The tracking collects anonymous information about:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "glassflow"
dynamic = ["version"]
description = "GlassFlow Clickhouse ETL Python SDK: Create GlassFlow pipelines between Kafka and ClickHouse"
description = "GlassFlow Python SDK: Create GlassFlow pipelines between Kafka and ClickHouse"
authors = [
{name = "GlassFlow", email = "hello@glassflow.dev"}
]
Expand Down