diff --git a/CHANGELOG.md b/CHANGELOG.md index c385a38c..11a33c6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,25 @@ ## Under the hood ## Fixes +# edu_edfi_source v0.3.3 +## New features +- Add column `last_modified_timestamp` to every base table (via `source_edfi3` macro). This includes timestamps of deletes, which is helpful for tracking down when deletes occurred. +## Under the hood +- Add explicit namespacing for macro call: `edu_edfi_source.extract_descriptor()` within `gen_skey()` so `gen_skey()` can be used outside this package +## Fixes +- Force staff_unique_id and student_unique_id to lower in construction of `k_staff` and `k_student`. This is needed for keys to match foreign keys generated using `gen_skey()` macro. + +# edu_edfi_source v0.3.2 +## New features +- Add `base_ef3__staff_education_organization_employment_associations` +- Add `stg_ef3__staff_education_organization_employment_associations` +- Add `stg +# edu_edfi_source v0.3.2 +## New features +- Add `base_ef3__staff_education_organization_employment_associations` +- Add `stg_ef3__staff_education_organization_employment_associations` +- Add `stg_ef3__staff__races` + # edu_edfi_source v0.3.1 ## Fixes - Fix to `stg_ef3__student_assessments`: remove deleted records diff --git a/dbt_project.yml b/dbt_project.yml index 39a961cd..ffedd82f 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,7 +1,7 @@ config-version: 2 name: 'edu_edfi_source' -version: '0.3.0' +version: '0.3.3' require-dbt-version: [">=1.0.0", "<2.0.0"] # This setting configures which "profile" dbt uses for this project. diff --git a/macros/gen_skey.sql b/macros/gen_skey.sql index a0821ff5..86d3cb5d 100644 --- a/macros/gen_skey.sql +++ b/macros/gen_skey.sql @@ -252,7 +252,7 @@ {#- hack: if key contains Descriptor, parse value out -#} {% if 'Descriptor' in skey_var or 'descriptor' in skey_var %} - {%- set concatted_keys = extract_descriptor(concatted_keys) %} + {%- set concatted_keys = edu_edfi_source.extract_descriptor(concatted_keys) %} {% endif %} {#- grow the output object with the new key -#} {% do output.append(concatted_keys) %} diff --git a/macros/source_edfi3.sql b/macros/source_edfi3.sql index 98e699bc..03cbe024 100644 --- a/macros/source_edfi3.sql +++ b/macros/source_edfi3.sql @@ -4,7 +4,8 @@ {% if join_deletes %} select api_data.*, - get_ignore_case(deletes_data.v, 'id')::string is not null as is_deleted + get_ignore_case(deletes_data.v, 'id')::string is not null as is_deleted, + coalesce(deletes_data.pull_timestamp, api_data.pull_timestamp) as last_modified_timestamp from {{ source('raw_edfi_3', resource) }} as api_data @@ -17,7 +18,8 @@ ) {% else %} - select *, false as is_deleted from {{ source('raw_edfi_3', resource) }} + select *, false as is_deleted, pull_timestamp as last_modified_timestamp + from {{ source('raw_edfi_3', resource) }} {% endif %} diff --git a/models/staging/edfi_3/base/base_ef3__student_cte_program_associations.sql b/models/staging/edfi_3/base/base_ef3__student_cte_program_associations.sql new file mode 100644 index 00000000..d362e773 --- /dev/null +++ b/models/staging/edfi_3/base/base_ef3__student_cte_program_associations.sql @@ -0,0 +1,38 @@ +with student_cte_program_associations as ( + {{ edu_edfi_source.source_edfi3('student_cte_program_associations') }} +), +renamed as ( + select + tenant_code, + api_year, + pull_timestamp, + last_modified_timestamp, + file_row_number, + filename, + is_deleted, + v:id::string as record_guid + + , v:beginDate::date as program_enroll_begin_date + , v:endDate::date as program_enroll_end_date + , v:educationOrganizationReference as education_organization_reference + , v:educationOrganizationReference:educationOrganizationId::int as ed_org_id + , v:programReference as program_reference + , v:programReference:educationOrganizationId::int as program_ed_org_id + , v:programReference:programName::string as program_name + , {{ edu_edfi_source.extract_descriptor('v:programReference:programTypeDescriptor::string') }} as program_type + , v:studentReference as student_reference + , v:studentReference:studentUniqueId::string as student_unique_id + , v:ctePrograms::array as v_cte_programs + , v:cteProgramServices::array as v_cte_program_services + , v:nonTraditionalGenderStatus::boolean as non_traditional_gender_status + , v:participationStatus::string as participation_status + , v:privateCTEProgram::boolean as private_cte_program + , v:programParticipationStatuses::array as v_program_participation_statuses + , {{ edu_edfi_source.extract_descriptor('v:reasonExitedDescriptor::string') }} as reason_exited + , v:servedOutsideOfRegularSession::boolean as served_outside_of_regular_session + , v:services::array as v_services + , {{ edu_edfi_source.extract_descriptor('v:technicalSkillsAssessmentDescriptor::string') }} as technical_skills_assessment + , v:_ext as v_ext + from student_cte_program_associations +) +select * from renamed diff --git a/models/staging/edfi_3/stage/stg_ef3__staffs.sql b/models/staging/edfi_3/stage/stg_ef3__staffs.sql index 5ebfa97a..fa18ddf0 100644 --- a/models/staging/edfi_3/stage/stg_ef3__staffs.sql +++ b/models/staging/edfi_3/stage/stg_ef3__staffs.sql @@ -7,7 +7,7 @@ keyed as ( {{ dbt_utils.surrogate_key( [ 'tenant_code', - 'staff_unique_id' + 'lower(staff_unique_id)' ] ) }} as k_staff, base_staffs.* @@ -23,4 +23,4 @@ deduped as ( ) }} ) -select * from deduped \ No newline at end of file +select * from deduped diff --git a/models/staging/edfi_3/stage/stg_ef3__stu_cte__program_services.sql b/models/staging/edfi_3/stage/stg_ef3__stu_cte__program_services.sql new file mode 100644 index 00000000..d87465a7 --- /dev/null +++ b/models/staging/edfi_3/stage/stg_ef3__stu_cte__program_services.sql @@ -0,0 +1,23 @@ +with stg_ef3__student_cte_program_associations as ( + select * from {{ ref('stg_ef3__student_cte_program_associations') }} +), +flattened as ( + select + tenant_code, + api_year, + k_student, + k_student_xyear , + ed_org_id, + k_lea, + k_school, + k_program, + school_year + + , {{ edu_edfi_source.extract_descriptor('value:cteProgramServiceDescriptor::string') }} as cte_program_service + , value:beginDate::date as begin_date + , value:endDate::date as end_date + + from stg_ef3__student_cte_program_associations, + lateral flatten(input => v_cte_program_services) +) +select * from flattened \ No newline at end of file diff --git a/models/staging/edfi_3/stage/stg_ef3__student_cte_program_associations.sql b/models/staging/edfi_3/stage/stg_ef3__student_cte_program_associations.sql new file mode 100644 index 00000000..faa7a1a9 --- /dev/null +++ b/models/staging/edfi_3/stage/stg_ef3__student_cte_program_associations.sql @@ -0,0 +1,26 @@ +with base_student_cte_program_associations as ( + select * + from {{ ref('base_ef3__student_cte_program_associations') }} + where not is_deleted +), +keyed as ( + select + {{ edu_edfi_source.gen_skey('k_student') }}, + {{ edu_edfi_source.gen_skey('k_student_xyear') }}, + {{ edu_edfi_source.gen_skey('k_program') }}, + {{ edorg_ref(annualize=False) }}, + api_year as school_year, + * + {{ extract_extension(model_name=this.name, flatten=True) }} + from base_student_cte_program_associations +), +deduped as ( + {{ + dbt_utils.deduplicate( + relation='keyed', + partition_by='k_student, k_program, begin_date', + order_by='pull_timestamp desc' + ) + }} +) +select * from deduped \ No newline at end of file diff --git a/models/staging/edfi_3/stage/stg_ef3__students.sql b/models/staging/edfi_3/stage/stg_ef3__students.sql index afe8c0cf..0716d0de 100644 --- a/models/staging/edfi_3/stage/stg_ef3__students.sql +++ b/models/staging/edfi_3/stage/stg_ef3__students.sql @@ -9,7 +9,7 @@ keyed as ( [ 'tenant_code', 'api_year', - 'student_unique_id' + 'lower(student_unique_id)' ] ) }} as k_student, {{ dbt_utils.surrogate_key( @@ -31,4 +31,4 @@ deduped as ( ) }} ) -select * from deduped \ No newline at end of file +select * from deduped