diff --git a/README.md b/README.md
index 0e10221..792ee46 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# Dataforge
-Ferramenta para geração de datasets sintéticos **relacionais** com integridade referencial garantida. Disponível via interface visual no navegador e via linha de comando (CLI). Ideal para testar pipelines de dados, popular bancos de desenvolvimento e criar fixtures para modelos dbt — sem usar dados sensíveis.
+Ferramenta para geração de datasets sintéticos **relacionais** com integridade referencial garantida. Disponível via **interface visual no navegador** e via **linha de comando (CLI)**. Suporta múltiplos formatos de saída (CSV, JSON, Parquet, Avro), carga direta em bancos SQL (PostgreSQL, MySQL, SQLite) e upload automático para nuvem (GCS, S3, Azure). Ideal para testar pipelines de dados, popular bancos de desenvolvimento, criar fixtures para modelos dbt e gerar dados de demonstração — sem usar dados sensíveis.
---
@@ -71,10 +71,12 @@ A interface visual roda em `http://localhost:5173` e é a forma principal de uso
- **Save as Default** — salva o schema no servidor (`src/dataforge/schemas/`) para reutilização futura
- **Run Generator** — executa o CLI diretamente da interface com configuração visual completa:
- Formatos de saída (CSV, JSON, Parquet, Avro) e modo JSON (flat/nested)
- - Destino: **local**, **nuvem** (GCS, S3, Azure) ou **banco de dados** (PostgreSQL, MySQL, SQLite) com teste de conexão e conexões salvas
+ - Destino: **local** (com seletor de pasta nativo no Windows), **nuvem** (GCS, S3, Azure) ou **banco de dados** (PostgreSQL, MySQL, SQLite) com teste de conexão e conexões salvas
+ - Credenciais cloud inseridas diretamente na UI (GCS JSON, S3 Access Key/Secret, Azure Connection String) com suporte a **perfis salvos** — salve e carregue credenciais por nome sem precisar de arquivos externos
- Particionamento Hive-style por tabela
- Modo recorrente, seed e incrementos de coluna
- Logs de execução em tempo real com botão de parada
+ - Botão **? Help** com referência de todos os campos
O diagrama é atualizado em tempo real e mostra as relações entre tabelas com setas representando FKs.
@@ -361,12 +363,30 @@ docker compose run --rm cli generate -d ecommerce -f parquet --partition-by "ord
## Upload em nuvem
-Coloque o arquivo de credenciais na pasta `credentials/` do projeto (ela é montada no container em `/app/credentials/`).
+Há duas formas de fornecer credenciais cloud ao Dataforge:
+
+### Opção 1 — Interface Visual (recomendada)
+
+Na seção **Destination → Cloud** do Run Generator, insira as credenciais diretamente na UI:
+
+| Provider | Campos |
+|----------|--------|
+| Google Cloud Storage | JSON completo da Service Account |
+| Amazon S3 | Access Key ID, Secret Access Key e Region |
+| Azure Blob Storage | Connection String |
+
+Clique em **Save credentials** para salvar um perfil nomeado localmente (`credentials/profiles.json`). Perfis salvos aparecem no topo da seção Cloud e podem ser carregados com um clique.
+
+> **Nota:** o seletor de pasta (📁) no destino Local só funciona quando o Dataforge roda localmente no Windows. No Docker, digite o caminho manualmente (ex: `/app/output/dados`).
+
+### Opção 2 — Arquivo na pasta `credentials/`
+
+Coloque o arquivo de credenciais na pasta `credentials/` do projeto (ela é montada no container em `/app/credentials/`). As credenciais da UI têm prioridade; a pasta serve de fallback.
### Google Cloud Storage
```bash
-# Usando arquivo de service account
+# Via arquivo de service account (fallback)
docker compose run --rm cli generate -d ecommerce -f parquet \
--upload gcs \
--bucket meu-bucket \
@@ -377,10 +397,11 @@ docker compose run --rm cli generate -d ecommerce -f parquet \
### Amazon S3
```bash
-# Autenticação via variáveis de ambiente no docker-compose ou inline
+# Via variáveis de ambiente
docker compose run --rm \
-e AWS_ACCESS_KEY_ID=... \
-e AWS_SECRET_ACCESS_KEY=... \
+ -e AWS_DEFAULT_REGION=us-east-1 \
cli generate -d hr -f csv \
--upload s3 \
--bucket meu-bucket \
diff --git a/src/dataforge/frontend/src/App.tsx b/src/dataforge/frontend/src/App.tsx
index 948a43a..d2ee125 100644
--- a/src/dataforge/frontend/src/App.tsx
+++ b/src/dataforge/frontend/src/App.tsx
@@ -615,6 +615,43 @@ export default function App() {
const [showRunPanel, setShowRunPanel] = useState(false);
const [showRunHelp, setShowRunHelp] = useState(false);
+ const [canBrowseFolder, setCanBrowseFolder] = useState(false);
+ const [credProfiles, setCredProfiles] = useState<{ name: string; provider: string }[]>([]);
+ const [saveCredName, setSaveCredName] = useState('');
+ const [showSaveCredInput, setShowSaveCredInput] = useState(false);
+
+ React.useEffect(() => {
+ fetch('/api/capabilities').then(r => r.json()).then(d => setCanBrowseFolder(!!d.browseFolder)).catch(() => {});
+ fetchCredProfiles();
+ }, []);
+
+ const fetchCredProfiles = () => {
+ fetch('/api/credential-profiles').then(r => r.json()).then(setCredProfiles).catch(() => {});
+ };
+
+ const handleSaveCredProfile = async () => {
+ const name = saveCredName.trim();
+ if (!name) return;
+ await fetch('/api/credential-profiles', {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ name, provider: runConfig.uploadTarget, creds: runConfig.cloudCreds }),
+ });
+ setSaveCredName('');
+ setShowSaveCredInput(false);
+ fetchCredProfiles();
+ };
+
+ const handleLoadCredProfile = async (name: string) => {
+ const res = await fetch(`/api/credential-profiles/${encodeURIComponent(name)}`);
+ const profile = await res.json();
+ setRunConfig(r => ({ ...r, uploadTarget: profile.provider, cloudCreds: profile.creds }));
+ };
+
+ const handleDeleteCredProfile = async (name: string) => {
+ await fetch(`/api/credential-profiles/${encodeURIComponent(name)}`, { method: 'DELETE' });
+ fetchCredProfiles();
+ };
const [runConfig, setRunConfig] = useState<{
formats: string[],
destination: 'local' | 'cloud' | 'database',
@@ -634,6 +671,13 @@ export default function App() {
tablesToInclude: string[],
columnsFilter: string,
increments: Array<{ table: string; column: string; step: string; unit: string }>,
+ cloudCreds: {
+ gcsJson: string,
+ s3AccessKey: string,
+ s3SecretKey: string,
+ s3Region: string,
+ azureConnStr: string,
+ },
}>({
formats: ['csv'],
destination: 'local' as 'local' | 'cloud' | 'database',
@@ -653,6 +697,13 @@ export default function App() {
tablesToInclude: [],
columnsFilter: '',
increments: [],
+ cloudCreds: {
+ gcsJson: '',
+ s3AccessKey: '',
+ s3SecretKey: '',
+ s3Region: 'us-east-1',
+ azureConnStr: '',
+ },
});
const [runLogs, setRunLogs] = useState('');
const [isRunning, setIsRunning] = useState(false);
@@ -688,6 +739,7 @@ export default function App() {
tables: runConfig.tablesToInclude.length > 0 ? runConfig.tablesToInclude : undefined,
columns: runConfig.columnsFilter.trim() ? runConfig.columnsFilter.trim().split('\n').filter(Boolean) : undefined,
increments: runConfig.increments.filter(i => i.table && i.column && i.step !== ''),
+ cloudCreds: runConfig.destination === 'cloud' ? runConfig.cloudCreds : undefined,
})
});
@@ -1297,7 +1349,7 @@ export default function App() {
setRunConfig(r => ({...r, outputDir: e.target.value}))} style={{ flex: 1, padding: '0.5rem' }} placeholder="e.g. output" />
-
+ }
)}
@@ -1324,6 +1376,29 @@ export default function App() {
{/* Cloud */}
{runConfig.destination === 'cloud' && (
+
+ {/* Saved credential profiles */}
+ {credProfiles.filter(p => p.provider === runConfig.uploadTarget).length > 0 && (
+
+
+
+ {credProfiles.filter(p => p.provider === runConfig.uploadTarget).map(p => (
+
+
+
+
+ ))}
+
+
+ )}
+
+ {/* Provider */}
+
+ {/* Bucket + Prefix */}
-
- ✓ Credentials auto-loaded from credentials/
-
+
+ {/* Credentials — per provider */}
+
+
+
Credentials
+ {showSaveCredInput ? (
+
+ setSaveCredName(e.target.value)}
+ onKeyDown={e => { if (e.key === 'Enter') handleSaveCredProfile(); if (e.key === 'Escape') setShowSaveCredInput(false); }}
+ placeholder="Profile name..."
+ autoFocus
+ style={{ padding: '0.25rem 0.5rem', fontSize: '0.78rem', background: 'rgba(255,255,255,0.07)', border: '1px solid rgba(255,255,255,0.15)', borderRadius: '5px', color: 'white', width: '130px' }}
+ />
+
+
+
+ ) : (
+
+ )}
+
+
+ {runConfig.uploadTarget === 'gcs' && (
+
+
+
+ )}
+
+ {runConfig.uploadTarget === 's3' && (
+
+
+
+
+ setRunConfig(r => ({...r, cloudCreds: {...r.cloudCreds, s3Region: e.target.value}}))} style={{ width: '100%', padding: '0.5rem', boxSizing: 'border-box' }} placeholder="us-east-1" />
+
+
+ )}
+
+ {runConfig.uploadTarget === 'azure' && (
+
+
+ setRunConfig(r => ({...r, cloudCreds: {...r.cloudCreds, azureConnStr: e.target.value}}))} style={{ width: '100%', padding: '0.5rem', boxSizing: 'border-box' }} placeholder="DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net" />
+
+ )}
+
)}
@@ -1690,9 +1834,24 @@ export default function App() {
}
+ {(() => {
+ const validationError =
+ runConfig.destination === 'cloud' && !runConfig.bucket.trim()
+ ? 'Bucket / Container is required for cloud upload.'
+ : null;
+ return validationError ? (
+
+ ⚠ {validationError}
+
+ ) : null;
+ })()}
+
+ {(() => {
+ const disabled = isRunning || (runConfig.destination === 'cloud' && !runConfig.bucket.trim());
+ return (
-
)}
+ );
+ })()}
diff --git a/src/dataforge/frontend/vite.config.ts b/src/dataforge/frontend/vite.config.ts
index 8c63051..a1eec92 100644
--- a/src/dataforge/frontend/vite.config.ts
+++ b/src/dataforge/frontend/vite.config.ts
@@ -16,7 +16,18 @@ const cliRunnerPlugin = () => ({
name: 'cli-runner',
configureServer(server: any) {
server.middlewares.use(async (req: any, res: any, next: any) => {
+ if (req.url === '/api/capabilities' && req.method === 'GET') {
+ res.setHeader('Content-Type', 'application/json')
+ res.end(JSON.stringify({ browseFolder: process.platform === 'win32' }))
+ return
+ }
+
if (req.url === '/api/browse-folder' && req.method === 'GET') {
+ if (process.platform !== 'win32') {
+ res.setHeader('Content-Type', 'application/json')
+ res.end(JSON.stringify({ path: '', unsupported: true }))
+ return
+ }
if (browseInProgress) {
res.setHeader('Content-Type', 'application/json')
res.end(JSON.stringify({ path: '' }))
@@ -72,7 +83,7 @@ const cliRunnerPlugin = () => ({
req.on('end', () => {
try {
const data = JSON.parse(body);
- const { yamlStr, formats, outputDir, uploadTarget, bucket, prefix, partitionByTable, jsonMode, seed, dbUrl, ifExists, dbSchema, recurrence, count, credentials, rows, tables: tablesToInclude, columns: columnsToInclude, increments } = data;
+ const { yamlStr, formats, outputDir, uploadTarget, bucket, prefix, partitionByTable, jsonMode, seed, dbUrl, ifExists, dbSchema, recurrence, count, credentials, rows, tables: tablesToInclude, columns: columnsToInclude, increments, cloudCreds } = data;
const baseDir = resolve(__dirname, '../../../');
// Cloud and database-only runs use a temp dir that is cleaned up after
@@ -114,7 +125,7 @@ const cliRunnerPlugin = () => ({
if (col) args.push('--partition-by', `${table}:${col}`);
}
}
- // Resolve credentials from the fixed credentials/ folder
+ // Resolve credentials: UI input takes priority, falls back to credentials/ folder
const credentialsDir = resolve(baseDir, 'credentials');
const extraEnv: Record = {};
if (uploadTarget) {
@@ -122,16 +133,35 @@ const cliRunnerPlugin = () => ({
if (bucket?.trim()) args.push('--bucket', bucket.trim());
if (prefix?.trim()) args.push('--prefix', prefix.trim());
- if (existsSync(credentialsDir)) {
- const credFiles = readdirSync(credentialsDir);
- if (uploadTarget === 'gcs') {
- const jsonFile = credFiles.find(f => f.endsWith('.json'));
+ const creds = cloudCreds ?? {};
+
+ if (uploadTarget === 'gcs') {
+ if (creds.gcsJson?.trim()) {
+ // Write JSON key to temp file
+ const tempGcsKey = join(tmpdir(), `df_gcs_key_${Date.now()}.json`);
+ writeFileSync(tempGcsKey, creds.gcsJson.trim(), 'utf-8');
+ args.push('--credentials', tempGcsKey);
+ } else if (existsSync(credentialsDir)) {
+ const jsonFile = readdirSync(credentialsDir).find(f => f.endsWith('.json'));
if (jsonFile) args.push('--credentials', resolve(credentialsDir, jsonFile));
- } else if (uploadTarget === 's3') {
- const awsFile = credFiles.find(f => f === 'credentials' || f.endsWith('.ini') || f.endsWith('.csv'));
+ }
+ } else if (uploadTarget === 's3') {
+ if (creds.s3AccessKey?.trim() && creds.s3SecretKey?.trim()) {
+ // Write AWS credentials file to temp
+ const tempAwsCreds = join(tmpdir(), `df_aws_creds_${Date.now()}.ini`);
+ const awsContent = `[default]\naws_access_key_id = ${creds.s3AccessKey.trim()}\naws_secret_access_key = ${creds.s3SecretKey.trim()}\n`;
+ writeFileSync(tempAwsCreds, awsContent, 'utf-8');
+ extraEnv['AWS_SHARED_CREDENTIALS_FILE'] = tempAwsCreds;
+ if (creds.s3Region?.trim()) extraEnv['AWS_DEFAULT_REGION'] = creds.s3Region.trim();
+ } else if (existsSync(credentialsDir)) {
+ const awsFile = readdirSync(credentialsDir).find(f => f === 'credentials' || f.endsWith('.ini') || f.endsWith('.csv'));
if (awsFile) extraEnv['AWS_SHARED_CREDENTIALS_FILE'] = resolve(credentialsDir, awsFile);
- } else if (uploadTarget === 'azure') {
- const azFile = credFiles.find(f => f.endsWith('.txt') || f === 'connection_string');
+ }
+ } else if (uploadTarget === 'azure') {
+ if (creds.azureConnStr?.trim()) {
+ extraEnv['AZURE_STORAGE_CONNECTION_STRING'] = creds.azureConnStr.trim();
+ } else if (existsSync(credentialsDir)) {
+ const azFile = readdirSync(credentialsDir).find(f => f.endsWith('.txt') || f === 'connection_string');
if (azFile) extraEnv['AZURE_STORAGE_CONNECTION_STRING'] = readFileSync(resolve(credentialsDir, azFile), 'utf-8').trim();
}
}
@@ -683,6 +713,64 @@ DATASET DESCRIPTION:
return;
}
+ // ── Credentials profiles ──────────────────────────────────────────────
+ const profilesPath = resolve(resolve(__dirname, '../../../'), 'credentials', 'profiles.json')
+
+ const readProfiles = (): Record => {
+ try { return JSON.parse(readFileSync(profilesPath, 'utf-8')); } catch { return {}; }
+ }
+ const writeProfiles = (data: Record) => {
+ mkdirSync(resolve(profilesPath, '..'), { recursive: true });
+ writeFileSync(profilesPath, JSON.stringify(data, null, 2), 'utf-8');
+ }
+
+ if (req.url === '/api/credential-profiles' && req.method === 'GET') {
+ const profiles = readProfiles();
+ res.setHeader('Content-Type', 'application/json');
+ res.end(JSON.stringify(Object.keys(profiles).map(name => ({ name, provider: profiles[name].provider }))));
+ return;
+ }
+
+ if (req.url === '/api/credential-profiles' && req.method === 'POST') {
+ let body = '';
+ req.on('data', (chunk: Buffer) => { body += chunk.toString(); });
+ req.on('end', () => {
+ try {
+ const { name, provider, creds } = JSON.parse(body);
+ if (!name?.trim()) { res.statusCode = 400; res.end(JSON.stringify({ error: 'Name is required.' })); return; }
+ const profiles = readProfiles();
+ profiles[name.trim()] = { provider, creds };
+ writeProfiles(profiles);
+ res.setHeader('Content-Type', 'application/json');
+ res.end(JSON.stringify({ success: true }));
+ } catch (e: any) {
+ res.statusCode = 500;
+ res.end(JSON.stringify({ error: e.message }));
+ }
+ });
+ return;
+ }
+
+ const credProfileMatch = req.url?.match(/^\/api\/credential-profiles\/(.+)$/);
+ if (credProfileMatch && req.method === 'DELETE') {
+ const name = decodeURIComponent(credProfileMatch[1]);
+ const profiles = readProfiles();
+ delete profiles[name];
+ writeProfiles(profiles);
+ res.setHeader('Content-Type', 'application/json');
+ res.end(JSON.stringify({ success: true }));
+ return;
+ }
+
+ if (credProfileMatch && req.method === 'GET') {
+ const name = decodeURIComponent(credProfileMatch[1]);
+ const profiles = readProfiles();
+ if (!profiles[name]) { res.statusCode = 404; res.end(JSON.stringify({ error: 'Not found' })); return; }
+ res.setHeader('Content-Type', 'application/json');
+ res.end(JSON.stringify(profiles[name]));
+ return;
+ }
+
next();
});
}