Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
89 commits
Select commit Hold shift + click to select a range
4ef0006
test(integration): add tmpfs Postgres harness
Cianidos Apr 22, 2026
d70bbdc
feat(runner): copy .ts/.sql/.json siblings for multi-file workloads
Cianidos Apr 22, 2026
f0f9038
feat(cmd): add dstparse, tpch-dists, tpch-answers JSON tools
Cianidos Apr 22, 2026
c9d66cb
fix(cmd): resolve linter_fix issues in A5 tools
Cianidos Apr 22, 2026
ee1ed54
feat(datagen): add seed composition primitives
Cianidos Apr 22, 2026
42dc49a
feat(datagen): add datagen.proto skeleton with core messages
Cianidos Apr 22, 2026
907f44f
feat(datagen): add Expr evaluator with core arms
Cianidos Apr 22, 2026
f9f4633
feat(datagen): add closed stdlib with 10 primitives
Cianidos Apr 22, 2026
b758d64
feat(datagen): add DAG topo sort with cycle + ref validation
Cianidos Apr 22, 2026
30dbce3
feat(datagen): add flat population runtime with seek
Cianidos Apr 22, 2026
d955e0a
feat(datagen): add deterministic per-attr null handling
Cianidos Apr 22, 2026
25b0031
feat(driver): add within-table parallel insert utility
Cianidos Apr 22, 2026
6b8b7a9
feat(datagen): add Loader with global worker cap and FIFO admission
Cianidos Apr 22, 2026
26fefc8
feat(datagen): add TS surface skeleton with Rel/Attr/Expr/Dict/std
Cianidos Apr 22, 2026
9d523b5
test(integration): datagen pipeline end-to-end smoke on tmpfs pg
Cianidos Apr 22, 2026
f11316d
feat(datagen): add Relationship, Side, Degree, Strategy, Lookup to proto
Cianidos Apr 22, 2026
b83622d
feat(datagen): add Relationship iteration, block slots, and Lookup
Cianidos Apr 22, 2026
f620863
feat(datagen): add TS builders for Relationship, Lookup, Block
Cianidos Apr 22, 2026
add51d2
test(integration): relationship runtime parent-child smoke on tmpfs pg
Cianidos Apr 22, 2026
b0ed3c8
feat(datagen): add StreamDraw primitives and Choose with stream_id
Cianidos Apr 22, 2026
b00d6ec
feat(datagen): add Cohort schedules with LRU and persistence
Cianidos Apr 22, 2026
ab6602f
feat(datagen): add Uniform degree and SCD-2 row-split
Cianidos Apr 22, 2026
b68fcc3
feat(datagen): complete TS Draw/Dict/Cohort/Choose builders
Cianidos Apr 22, 2026
67ba635
test(integration): 4-table stage-D smoke with Uniform degree and SCD-2
Cianidos Apr 22, 2026
4635be3
test(integration): TPC-B seed load and balance invariant on tmpfs pg
Cianidos Apr 22, 2026
884fbcc
test(integration): TPC-C framework capability proof at WAREHOUSES=1
Cianidos Apr 22, 2026
3673fb0
feat(driver): wire InsertSpec from TS through xk6air to pg driver
Cianidos Apr 22, 2026
dc972f7
fix(static): embed datagen.ts and disambiguate InsertMethod export
Cianidos Apr 22, 2026
442afd0
feat(tpcb): rewrite workload with Rel.table and driver.insertSpec
Cianidos Apr 22, 2026
b723605
feat(tpcc): rewrite load with Rel.table and driver.insertSpec
Cianidos Apr 22, 2026
2b061f5
feat(driver): implement InsertSpec for mysql, ydb, picodata
Cianidos Apr 22, 2026
cd0605c
feat(tpch): generate distributions.json and answers_sf1.json
Cianidos Apr 22, 2026
f672a62
feat(tpch): full workload with 8-table load, text helper, and query v…
Cianidos Apr 22, 2026
197d0d1
feat(tpch): spec-compliant orderkeys, dates, prices, variable degree
Cianidos Apr 22, 2026
f1c553b
fix(datagen): TS stdlib wrappers use camelCase to match Go registry
Cianidos Apr 22, 2026
635c74f
feat(datagen): add Draw.grammar two-phase template walker
Cianidos Apr 22, 2026
8bf60e1
test(integration): multi-DB tmpfs harness for pg/mysql/picodata/ydb
Cianidos Apr 22, 2026
a0384bf
fix(tpcc,tpcb): ydb dialect compatibility
Cianidos Apr 22, 2026
23f2080
style(integration): normalize import order via goimports
Cianidos Apr 22, 2026
baaaf87
test(integration): extract shared Expr/Attr proto builders into helpe…
Cianidos Apr 22, 2026
61fe7ff
docs(workloads): add per-workload READMEs for tpcb, tpcc, tpch
Cianidos Apr 22, 2026
7fd0224
fix(driver-ydb): promote time.Time to addressable for BulkUpsert
Cianidos Apr 22, 2026
0f7c286
feat(tpch): mysql, picodata, ydb dialect SQLs
Cianidos Apr 22, 2026
ec566c4
feat(datagen): add Expr.litFloat, std.parseInt, std.parseFloat, Attr.…
Cianidos Apr 22, 2026
a27a42f
refactor(tpch,tpcc): use Expr.litFloat and Attr.dictAtInt instead of …
Cianidos Apr 22, 2026
17b087d
feat(datagen): add Expr.litNull and Literal.null arm
Cianidos Apr 22, 2026
e46f293
feat(datagen): add std.permuteIndex stdlib primitive
Cianidos Apr 22, 2026
387bc9c
feat(tpcc): spec parity (c_last syllables, ORIGINAL marker, determini…
Cianidos Apr 22, 2026
943157a
refactor(datagen): extract per-arm kernels from stream_draw
Cianidos Apr 23, 2026
efd1b70
feat(xk6air): sobek-bound Draw structs with init-time bounds (iter 2)
Cianidos Apr 23, 2026
8c869d7
feat(datagen-ts): DrawRT namespace with sample/next/seek/reset (iter 2)
Cianidos Apr 23, 2026
ffb17af
test(runtime): add DrawRT showcase + API contract test
Cianidos Apr 23, 2026
013ea79
refactor(tpcb): migrate tx-time randomness to DrawRT
Cianidos Apr 23, 2026
d566957
refactor(tpcc): migrate tx-time randomness to DrawRT
Cianidos Apr 23, 2026
846db13
refactor(procs): migrate load+tx paths to new framework for tpcb and …
Cianidos Apr 23, 2026
603a693
refactor(simple): rewrite demo workload to exercise new framework only
Cianidos Apr 23, 2026
88a5ab2
refactor(datagen-ts): remove R.*/Dist.*/AB.*/S.* legacy surface
Cianidos Apr 23, 2026
8774e9f
refactor(xk6air): remove generator_wrappers + legacy exports
Cianidos Apr 23, 2026
d27ce3e
refactor(driver): remove InsertValues legacy load path from all drivers
Cianidos Apr 23, 2026
d517fb2
refactor(datagen): relocate ResolveSeed and delete pkg/common/generate
Cianidos Apr 23, 2026
e803512
refactor(proto): remove Generation + InsertDescriptor + QueryParam wire
Cianidos Apr 23, 2026
d16a0ef
refactor(datagen-ts): rebind defaultInsertMethod to new InsertMethod …
Cianidos Apr 23, 2026
9841ba1
refactor(workloads): rename 'populate' step to 'load_data' for consis…
Cianidos Apr 23, 2026
b3c4830
feat(workloads): parameterize load workers via LOAD_WORKERS env
Cianidos Apr 23, 2026
ccc861c
docs(bench): parallelism sweep 1/2/4/8 workers
Cianidos Apr 23, 2026
39221e4
fix(driver-noop): honour parallelism.workers via RunParallel
Cianidos Apr 23, 2026
6f537f6
fix(datagen-lookup): per-clone registry to stop concurrent-map race
Cianidos Apr 23, 2026
4b36449
docs(bench): parallelism rerun after noop+lookup fixes
Cianidos Apr 23, 2026
a152c07
docs(stage-i): mark parallelism gaps closed
Cianidos Apr 23, 2026
26e90f1
feat(tpcc): parameterize load workers via LOAD_WORKERS env
Cianidos Apr 23, 2026
00f68c0
docs(bench): tpcc W=50 pg parallelism sweep
Cianidos Apr 23, 2026
942f52d
fix(datagen-cohort): per-clone registry to stop concurrent slotCache …
Cianidos Apr 23, 2026
16f2b9c
test(datagen-runtime): determinism sweep across all primitives
Cianidos Apr 23, 2026
ab1df2c
refactor(datagen): delete unused loader package; parallelism.workers …
Cianidos Apr 23, 2026
06c03af
docs: sweep handoff + populate/load_data stragglers post-wi5
Cianidos Apr 23, 2026
3eb9c34
feat(proto): add DRIVER_TYPE_CSV enum value
Cianidos Apr 23, 2026
0a0e321
feat(driver-csv): ephemeral CSV driver, URL-configured, NATIVE-only
Cianidos Apr 23, 2026
a8ce775
fix(helpers): skip pool config for csv driver like noop
Cianidos Apr 24, 2026
43ef8d1
test(driver-csv): smoke + integration + determinism cells
Cianidos Apr 24, 2026
0851cad
test(driver-csv): golden SHA256 for tpcb SF=1 reference output
Cianidos Apr 24, 2026
1fd1f88
docs: document csv ephemeral driver
Cianidos Apr 24, 2026
29654bc
docs: authoritative datagen framework guide
Cianidos Apr 24, 2026
8eda093
docs: parallelism contract + tuning guide
Cianidos Apr 24, 2026
57b2450
fix(linter):
Cianidos Apr 24, 2026
a854d61
fix(drivers): default bulk size 500=>2500
Cianidos Apr 24, 2026
80a37a3
fix(tpcc): c_last incorrect index
Cianidos Apr 24, 2026
b8375a1
feat(ydb,ts): partitioning, indexes
Cianidos Apr 27, 2026
5981431
fix(tx,tpcc): non ydb indexes error
Cianidos Apr 27, 2026
6969979
feat(ydb): fall back to internalCA
Cianidos Apr 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,19 @@ app-deps: # Install application dependencies in ./bin
.PHONY: proto
proto: .check-bins
rm -rf $(CURDIR)/pkg/common/proto/*
rm -rf $(CURDIR)/pkg/datagen/dgproto
rm -rf $(PROTO_BUILD_TARGET_DIR)/ts
mkdir -p $(PROTO_BUILD_TARGET_DIR)/ts/stroppy
mkdir -p $(PROTO_BUILD_TARGET_DIR)/docs
mkdir -p $(PROTO_BUILD_TARGET_DIR)/go
$(MAKE) .easyp-gen && $(MAKE) .build-proto-ts-sdk
# NOTE: easyp generates the code into the right place 'proto/stroppy' by itself
printf '// Code generated by stroppy. DO NOT EDIT.\npackage stroppy\n\nconst Version = "%s"\n' "$(VERSION)" > ./pkg/common/proto/stroppy/version.stroppy.pb.go
# datagen.proto declares go_package=".../pkg/datagen/dgproto"; relocate its
# generated files from the source-relative layout into that package.
mkdir -p $(CURDIR)/pkg/datagen/dgproto
mv $(CURDIR)/pkg/common/proto/stroppy/datagen.pb.go $(CURDIR)/pkg/datagen/dgproto/datagen.pb.go
mv $(CURDIR)/pkg/common/proto/stroppy/datagen.pb.validate.go $(CURDIR)/pkg/datagen/dgproto/datagen.pb.validate.go

cp $(PROTO_BUILD_TARGET_DIR)/ts/stroppy.pb.ts $(CURDIR)/internal/static/
cp $(PROTO_BUILD_TARGET_DIR)/ts/stroppy.pb.js $(CURDIR)/internal/static/
Expand Down Expand Up @@ -241,6 +247,32 @@ tests: # Run tests with coverage
go test -race ./... -coverprofile=coverage.out


##
## Reference-data JSON regeneration (build-time, run with upstream inputs)
##

.PHONY: gen-tpcds-json gen-tpch-json

gen-tpcds-json: # Regenerate workloads/tpcds/distributions.json from upstream .dst files
@if [ -z "$(TPCDS_TOOLS_DIR)" ]; then \
echo "error: TPCDS_TOOLS_DIR must point to the dsdgen tools directory holding .dst files (e.g. /path/to/DSGen/tools)"; \
exit 2; \
fi
go run ./cmd/dstparse -in $(TPCDS_TOOLS_DIR) -out workloads/tpcds/distributions.json

gen-tpch-json: # Regenerate workloads/tpch/distributions.json and answers_sf1.json from upstream files
@if [ -z "$(TPCH_DISTS)" ]; then \
echo "error: TPCH_DISTS must point to upstream dists.dss"; \
exit 2; \
fi
@if [ -z "$(TPCH_ANSWERS_DIR)" ]; then \
echo "error: TPCH_ANSWERS_DIR must point to the upstream answers/ directory (q*.out / *.ans)"; \
exit 2; \
fi
go run ./cmd/tpch-dists -in $(TPCH_DISTS) -out workloads/tpch/distributions.json
go run ./cmd/tpch-answers -in $(TPCH_ANSWERS_DIR) -out workloads/tpch/answers_sf1.json


# K6/Stroppy build section

.PHONY: build-k6 build-k6-debug build-debug build build-all
Expand Down Expand Up @@ -350,3 +382,37 @@ ts-test: # Run TypeScript unit tests

ts-watch: # Watch TypeScript files and run tests automatically
cd internal/static && npm run test:watch

##
## Tmpfs Postgres integration harness
##

.PHONY: tmpfs-up tmpfs-down tmpfs-clean tmpfs-psql

tmpfs-up: # Start tmpfs Postgres container for integration tests
docker compose -f test/compose.tmpfs.yml up -d --wait

tmpfs-down: # Stop and remove tmpfs Postgres container and volumes
docker compose -f test/compose.tmpfs.yml down -v

tmpfs-clean: # Recycle the tmpfs Postgres container; discards all data
$(MAKE) tmpfs-down && $(MAKE) tmpfs-up

tmpfs-psql: # Open psql shell into the tmpfs Postgres container
docker exec -it stroppy-pg-tmpfs psql -U postgres -d stroppy

##
## Multi-DB tmpfs integration harness (postgres + mysql + picodata + ydb)
##

.PHONY: tmpfs-all-up tmpfs-all-down tmpfs-all-clean

tmpfs-all-up: # Start all 4 DBs (pg, mysql, picodata, ydb) on non-default ports
docker compose -f test/compose.tmpfs-all.yml up -d --wait pg-tmpfs-all mysql-tmpfs-all picodata-tmpfs-all ydb-tmpfs-all
docker compose -f test/compose.tmpfs-all.yml up picodata-init

tmpfs-all-down: # Stop + remove all 4 DBs and their volumes
docker compose -f test/compose.tmpfs-all.yml down -v

tmpfs-all-clean: # Recycle the 4-DB harness; discards all data
$(MAKE) tmpfs-all-down && $(MAKE) tmpfs-all-up
192 changes: 192 additions & 0 deletions cmd/dstparse/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
// dstparse transforms TPC-DS dsdgen `.dst` distribution files into the
// uniform Dict-shaped JSON document consumed by the relations data
// generator. Given a directory the tool parses every `.dst` file in it
// (non-recursive) and merges the distributions into one document; given
// a single file it parses just that one.
//
// Usage:
//
// dstparse -in <dir-or-file> [-out <path>] [-pretty] [-version <tag>]
//
// The `.dst` files are upstream TPC-DS artifacts; this tool is the
// one-way boundary that imports them at build time. The generated JSON
// is what stroppy ships under workloads/tpcds/.
package main

import (
"bytes"
"encoding/json"
"errors"
"flag"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
)

// exitUsage is the process exit code used for CLI usage errors.
const exitUsage = 2

// outFilePerm is the permission mode for emitted JSON files. Parsed
// reference data is not secret but does not need to be world-readable.
const outFilePerm = 0o600

// errInput is the sentinel wrapped by CLI input errors.
var errInput = errors.New("input error")

func main() {
in := flag.String("in", "", "directory or single .dst file (required)")
out := flag.String("out", "", "output JSON path (stdout when omitted)")
version := flag.String("version", "1", "schema version string embedded in output")
sourceLabel := flag.String("source", "", "human-readable source label (defaults to input basename)")
pretty := flag.Bool("pretty", false, "emit indented JSON")

flag.Parse()

if *in == "" {
fmt.Fprintln(os.Stderr, "dstparse: -in is required")
flag.Usage()
os.Exit(exitUsage)
}

info, err := os.Stat(*in)
if err != nil {
fmt.Fprintf(os.Stderr, "dstparse: stat %s: %v\n", *in, err)
os.Exit(1)
}

files, err := collectInputFiles(*in, info.IsDir())
if err != nil {
fmt.Fprintf(os.Stderr, "dstparse: %v\n", err)
os.Exit(1)
}

root := &doc{
Version: *version,
Distributions: map[string]*dict{},
}

if err := mergeFiles(root, files); err != nil {
fmt.Fprintf(os.Stderr, "dstparse: %v\n", err)
os.Exit(1)
}

root.Source = buildSourceLabel(*sourceLabel, *in, files, info.IsDir())

var data []byte
if *pretty {
data, err = json.MarshalIndent(root, "", " ")
} else {
data, err = json.Marshal(root)
}

if err != nil {
fmt.Fprintf(os.Stderr, "dstparse: marshal: %v\n", err)
os.Exit(1)
}

if *out == "" {
_, _ = os.Stdout.Write(data)
_, _ = os.Stdout.Write([]byte{'\n'})

return
}

if err := os.WriteFile(*out, data, outFilePerm); err != nil {
fmt.Fprintf(os.Stderr, "dstparse: write %s: %v\n", *out, err)
os.Exit(1)
}

fmt.Fprintf(os.Stderr, "dstparse: wrote %s (%d distributions, %d bytes)\n",
*out, len(root.Distributions), len(data))
}

// mergeFiles parses each .dst file in turn and merges its distributions
// into root. Duplicate distribution names across files are rejected.
func mergeFiles(root *doc, files []string) error {
for _, path := range files {
if err := mergeOneFile(root, path); err != nil {
return err
}
}

return nil
}

// mergeOneFile reads, parses and merges a single .dst file into root.
func mergeOneFile(root *doc, path string) error {
raw, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("read %s: %w", path, err)
}

dists, err := parseStream(bytes.NewReader(raw))
if err != nil {
return fmt.Errorf("parse %s: %w", path, err)
}

for _, nd := range dists {
if _, dup := root.Distributions[nd.name]; dup {
return fmt.Errorf("%w: duplicate distribution %q (in %s)", errInput, nd.name, path)
}

root.Distributions[nd.name] = nd.toDict()
}

return nil
}

// collectInputFiles resolves the -in flag into a concrete list of .dst
// files to parse. For a single-file input the slice has one element; for
// a directory the slice contains every `*.dst` entry (non-recursive) in
// sorted order. Returns a usage-style error when the directory holds no
// .dst files.
func collectInputFiles(in string, isDir bool) ([]string, error) {
if !isDir {
return []string{in}, nil
}

entries, err := os.ReadDir(in)
if err != nil {
return nil, fmt.Errorf("read dir %s: %w", in, err)
}

var files []string

for _, e := range entries {
if e.IsDir() {
continue
}

if strings.EqualFold(filepath.Ext(e.Name()), ".dst") {
files = append(files, filepath.Join(in, e.Name()))
}
}

sort.Strings(files)

if len(files) == 0 {
return nil, fmt.Errorf("%w: no .dst files in %s", errInput, in)
}

return files, nil
}

func buildSourceLabel(explicit, in string, files []string, isDir bool) string {
if explicit != "" {
return explicit
}

base := filepath.Base(in)
if !isDir {
return base
}

names := make([]string, len(files))
for i, f := range files {
names[i] = strings.TrimSuffix(filepath.Base(f), filepath.Ext(f))
}

return fmt.Sprintf("%s/{%s}", base, strings.Join(names, ","))
}
Loading
Loading