diff --git a/package-lock.json b/package-lock.json index 6a7a827..3356918 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "draftapply", - "version": "1.0.0", + "version": "2.4.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "draftapply", - "version": "1.0.0", + "version": "2.4.0", "license": "MIT", "devDependencies": { "vitest": "^2.1.0" diff --git a/shared/role-profile-service.js b/shared/role-profile-service.js index 06b335b..8a687d4 100644 --- a/shared/role-profile-service.js +++ b/shared/role-profile-service.js @@ -122,6 +122,41 @@ export const ROLE_PROFILES = [ ], positioning: 'Position the CV around analytical judgement, decision support, data quality, stakeholder requirements, and reporting outcomes. Avoid advanced modelling claims unless the CV proves them.', }, + { + id: 'data_engineer', + family: 'Data Engineering', + domain: 'data_engineering', + aliases: [ + 'data engineer', + 'database architect', + 'data architect', + 'database developer', + 'database programmer', + 'data warehouse developer', + 'data warehousing specialist', + 'etl developer', + ], + titlePatterns: [ + /\bdata\s+engineer\b/i, + /\bdatabase\s+architect\b/i, + /\bdata\s+architect\b/i, + /\bdata\s+warehouse\b/i, + /\bdata\s+warehousing\b/i, + /\betl\s+developer\b/i, + ], + credibilitySignals: ['data pipelines', 'ETL/ELT delivery', 'data modelling', 'data quality', 'warehouse performance', 'reliable datasets'], + evidencePatterns: [/\betl|elt\b/i, /\bpipeline\b/i, /\bdata\s+model/i, /\bwarehouse\b/i, /\b(sql|postgres|mysql|snowflake|bigquery|redshift)\b/i, /\bairflow|prefect|dagster\b/i, /\bdbt\b/i], + riskClaims: ['enterprise data strategy ownership', 'org-wide data governance ownership', 'security/compliance sign-off', '24/7 production on-call'], + transferableEvidence: ['backend engineering -> data service reliability', 'analytics/reporting -> dataset quality improvement', 'platform engineering -> workflow orchestration and observability'], + skillCategories: [ + { label: 'Pipelines & Orchestration', skills: ['ETL/ELT', 'Batch Pipelines', 'Streaming Basics', 'Workflow Orchestration'] }, + { label: 'Data Modelling & Warehousing', skills: ['Dimensional Modelling', 'Schema Design', 'Data Warehousing', 'Performance Tuning'] }, + { label: 'Quality & Reliability', skills: ['Data Quality Checks', 'Testing', 'Monitoring', 'Incident Response'] }, + { label: 'SQL & Storage', skills: ['SQL', 'PostgreSQL', 'Snowflake', 'BigQuery', 'Redshift'] }, + { label: 'Automation & Governance', skills: ['Automation', 'Documentation', 'Lineage Basics', 'Access Controls Basics'] }, + ], + positioning: 'Position the CV around reliable data pipelines, data modelling, warehouse performance, data quality, and dependable datasets for stakeholders. Avoid data strategy/governance ownership claims unless explicitly proven.', + }, { id: 'customer_success', family: 'Customer Success / Support', diff --git a/tests/role-profile-service.test.js b/tests/role-profile-service.test.js index 4e81309..4d490c9 100644 --- a/tests/role-profile-service.test.js +++ b/tests/role-profile-service.test.js @@ -24,6 +24,13 @@ describe('RoleProfileService', () => { expect(service.classify({ jobTitle: 'Healthcare Administrator' })?.id).toBe('healthcare_admin'); }); + it('classifies data-engineering job titles into the data engineering role profile', () => { + expect(service.classify({ jobTitle: 'Data Engineer' })?.id).toBe('data_engineer'); + expect(service.classify({ jobTitle: 'Database Architect' })?.id).toBe('data_engineer'); + expect(service.classify({ jobTitle: 'ETL Developer' })?.id).toBe('data_engineer'); + expect(service.enrichJDData({ jobTitle: 'Data Engineer' }).domain).toBe('data_engineering'); + }); + it('enriches parsed JD data with positioning, credibility signals, risks, and skill categories', () => { const enriched = service.enrichJDData({ jobTitle: 'Product Manager',