diff --git a/src/tableselection.jl b/src/tableselection.jl index 1da3fd8a..14bf376e 100644 --- a/src/tableselection.jl +++ b/src/tableselection.jl @@ -2,56 +2,47 @@ # Licensed under the MIT License. See LICENSE in the project root. # ------------------------------------------------------------------ -struct TableSelection{T,C} +""" + TableSelection(table, names) + +Stores a sub-`table` with given column `names`. +""" +struct TableSelection{T,N} table::T - cols::C - ncols::Int - names::Vector{Symbol} - onames::Vector{Symbol} - mapnames::Dict{Symbol,Symbol} - - function TableSelection(table::T, names, onames) where {T} - cols = Tables.columns(table) - _assert(onames ⊆ Tables.columnnames(cols), "all selected columns must exist in the table") - ncols = length(names) - mapnames = Dict(zip(names, onames)) - new{T,typeof(cols)}(table, cols, ncols, names, onames, mapnames) - end + names::NTuple{N,Symbol} end -function Base.:(==)(a::TableSelection, b::TableSelection) - a.names != b.names && return false - a.onames != b.onames && return false - all(nm -> Tables.getcolumn(a, nm) == Tables.getcolumn(b, nm), a.names) +function TableSelection(table, names) + cols = Tables.columns(table) + _assert(names ⊆ Tables.columnnames(cols), "invalid columns for table selection") + TableSelection(table, Tuple(names)) end -function Base.show(io::IO, t::TableSelection) - println(io, "TableSelection") - pretty_table(io, t, vcrop_mode=:middle, newline_at_end=false) -end - -# Tables.jl interface Tables.istable(::Type{<:TableSelection}) = true + Tables.columnaccess(::Type{<:TableSelection}) = true + Tables.columns(t::TableSelection) = t + Tables.columnnames(t::TableSelection) = t.names -function Tables.getcolumn(t::TableSelection, i::Int) - 1 ≤ i ≤ t.ncols || error("Table has no column with index $i.") - Tables.getcolumn(t.cols, t.mapnames[t.names[i]]) -end +Tables.getcolumn(t::TableSelection, i::Int) = Tables.getcolumn(Tables.columns(t.table), t.names[i]) -function Tables.getcolumn(t::TableSelection, nm::Symbol) - nm ∉ t.names && error("Table has no column $nm.") - Tables.getcolumn(t.cols, t.mapnames[nm]) -end +Tables.getcolumn(t::TableSelection, nm::Symbol) = Tables.getcolumn(Tables.columns(t.table), nm) Tables.materializer(t::TableSelection) = Tables.materializer(t.table) function Tables.schema(t::TableSelection) - schema = Tables.schema(t.cols) - names = schema.names - types = schema.types - inds = indexin(t.onames, collect(names)) - Tables.Schema(t.names, types[inds]) + schema = Tables.schema(t.table) + tnames = collect(t.names) + snames = collect(schema.names) + inds = indexin(tnames, snames) + names = schema.names[inds] + types = schema.types[inds] + Tables.Schema(names, types) +end + +function Base.show(io::IO, t::TableSelection) + println(io, "TableSelection") + pretty_table(io, t, vcrop_mode=:bottom, newline_at_end=false) end diff --git a/src/transforms/select.jl b/src/transforms/select.jl index 01382de6..a1feb244 100644 --- a/src/transforms/select.jl +++ b/src/transforms/select.jl @@ -45,16 +45,19 @@ Select(pairs::Pair{C,S}...) where {C<:Column,S<:AbstractString} = Select() = throw(ArgumentError("cannot create Select transform without arguments")) -# utils -_newnames(::Nothing, select) = select -_newnames(names::Vector{Symbol}, select) = names - function applyfeat(transform::Select, feat, prep) cols = Tables.columns(feat) names = collect(Tables.columnnames(cols)) - select = transform.selector(names) - newnames = _newnames(transform.newnames, select) - newfeat = TableSelection(feat, newnames, select) + + # lazy selection of columns + snames = transform.selector(names) + stable = TableSelection(feat, snames) + + # rename if necessary + nnames = transform.newnames + rename = isnothing(nnames) ? Identity() : Rename(nnames) + newfeat = stable |> rename + newfeat, nothing end @@ -92,9 +95,8 @@ Reject(::AllSelector) = throw(ArgumentError("cannot reject all columns")) function applyfeat(transform::Reject, feat, prep) cols = Tables.columns(feat) names = Tables.columnnames(cols) - reject = transform.selector(names) - select = setdiff(names, reject) - strans = Select(select) - newfeat, _ = applyfeat(strans, feat, prep) + snames = transform.selector(names) + select = Select(setdiff(names, snames)) + newfeat, _ = applyfeat(select, feat, prep) newfeat, nothing end diff --git a/test/tableselection.jl b/test/tableselection.jl index 84ed42c5..63f0bbd3 100644 --- a/test/tableselection.jl +++ b/test/tableselection.jl @@ -8,14 +8,13 @@ t = Table(; a, b, c, d, e, f) # Tables.jl interface - select = [:a, :b, :e] - newnames = select - s = TT.TableSelection(t, newnames, select) + names = [:a, :b, :e] + s = TT.TableSelection(t, names) @test Tables.istable(s) == true @test Tables.columnaccess(s) == true @test Tables.rowaccess(s) == false @test Tables.columns(s) === s - @test Tables.columnnames(s) == [:a, :b, :e] + @test Tables.columnnames(s) == (:a, :b, :e) @test Tables.schema(s).names == (:a, :b, :e) @test Tables.schema(s).types == (Float64, Float64, Float64) @test Tables.materializer(s) == Tables.materializer(t) @@ -26,36 +25,16 @@ @test Tables.getcolumn(s, 1) == Tables.getcolumn(cols, 1) @test Tables.getcolumn(s, 3) == Tables.getcolumn(cols, :e) - # selectin with renaming - select = [:c, :d, :f] - newnames = [:x, :y, :z] - s = TT.TableSelection(t, newnames, select) - @test Tables.columnnames(s) == [:x, :y, :z] - @test Tables.getcolumn(s, :x) == t.c - @test Tables.getcolumn(s, :y) == t.d - @test Tables.getcolumn(s, :z) == t.f - @test Tables.getcolumn(s, 1) == t.c - @test Tables.getcolumn(s, 2) == t.d - @test Tables.getcolumn(s, 3) == t.f - # row table - select = [:a, :b, :e] - newnames = select + names = [:a, :b, :e] rt = Tables.rowtable(t) - s = TT.TableSelection(rt, newnames, select) + s = TT.TableSelection(rt, names) cols = Tables.columns(rt) @test Tables.getcolumn(s, :a) == Tables.getcolumn(cols, :a) @test Tables.getcolumn(s, 1) == Tables.getcolumn(cols, 1) @test Tables.getcolumn(s, 3) == Tables.getcolumn(cols, :e) # throws - @test_throws AssertionError TT.TableSelection(t, [:a, :b, :z], [:a, :b, :z]) - @test_throws AssertionError TT.TableSelection(t, [:x, :y, :z], [:c, :d, :k]) - s = TT.TableSelection(t, [:a, :b, :e], [:a, :b, :e]) - @test_throws ErrorException Tables.getcolumn(s, :f) - @test_throws ErrorException Tables.getcolumn(s, 4) - s = TT.TableSelection(t, [:x, :y, :z], [:c, :d, :f]) - @test_throws ErrorException Tables.getcolumn(s, :c) - @test_throws ErrorException Tables.getcolumn(s, 4) - @test_throws ErrorException Tables.getcolumn(s, -2) + @test_throws AssertionError TT.TableSelection(t, [:a, :b, :z]) + @test_throws AssertionError TT.TableSelection(t, [:x, :y, :z]) end diff --git a/test/transforms/sample.jl b/test/transforms/sample.jl index 5304ea2f..67ccb4dd 100644 --- a/test/transforms/sample.jl +++ b/test/transforms/sample.jl @@ -16,7 +16,7 @@ @test n.b ⊆ t.b @test n.c ⊆ t.c - T = Sample(30, replace=true, ordered=true) + T = Sample(30, replace=true, ordered=true, rng=StableRNG(1)) n, c = apply(T, t) trows = Tables.rowtable(t) @test unique(Tables.rowtable(n)) == trows diff --git a/test/transforms/select.jl b/test/transforms/select.jl index 57a30522..7660a97b 100644 --- a/test/transforms/select.jl +++ b/test/transforms/select.jl @@ -11,65 +11,65 @@ T = Select(:f, :d) n, c = apply(T, t) - @test Tables.columnnames(n) == [:f, :d] + @test Tables.columnnames(n) == (:f, :d) T = Select(:f, :d, :b) n, c = apply(T, t) - @test Tables.columnnames(n) == [:f, :d, :b] + @test Tables.columnnames(n) == (:f, :d, :b) T = Select(:d, :c, :b) n, c = apply(T, t) - @test Tables.columnnames(n) == [:d, :c, :b] + @test Tables.columnnames(n) == (:d, :c, :b) T = Select(:e, :c, :b, :a) n, c = apply(T, t) - @test Tables.columnnames(n) == [:e, :c, :b, :a] + @test Tables.columnnames(n) == (:e, :c, :b, :a) # selection with tuples T = Select((:e, :c, :b, :a)) n, c = apply(T, t) - @test Tables.columnnames(n) == [:e, :c, :b, :a] + @test Tables.columnnames(n) == (:e, :c, :b, :a) # selection with vectors T = Select([:e, :c, :b, :a]) n, c = apply(T, t) - @test Tables.columnnames(n) == [:e, :c, :b, :a] + @test Tables.columnnames(n) == (:e, :c, :b, :a) # selection with strings T = Select("d", "c", "b") n, c = apply(T, t) - @test Tables.columnnames(n) == [:d, :c, :b] + @test Tables.columnnames(n) == (:d, :c, :b) # selection with tuple of strings T = Select(("d", "c", "b")) n, c = apply(T, t) - @test Tables.columnnames(n) == [:d, :c, :b] + @test Tables.columnnames(n) == (:d, :c, :b) # selection with vector of strings T = Select(["d", "c", "b"]) n, c = apply(T, t) - @test Tables.columnnames(n) == [:d, :c, :b] + @test Tables.columnnames(n) == (:d, :c, :b) # selection with integers T = Select(4, 3, 2) n, c = apply(T, t) - @test Tables.columnnames(n) == [:d, :c, :b] + @test Tables.columnnames(n) == (:d, :c, :b) # selection with tuple of integers T = Select((4, 3, 2)) n, c = apply(T, t) - @test Tables.columnnames(n) == [:d, :c, :b] + @test Tables.columnnames(n) == (:d, :c, :b) # selection with vector of integers T = Select([4, 3, 2]) n, c = apply(T, t) - @test Tables.columnnames(n) == [:d, :c, :b] + @test Tables.columnnames(n) == (:d, :c, :b) # reapply test T = Select(:b, :c, :d) n1, c1 = apply(T, t) n2 = reapply(T, t, c1) - @test n1 == n2 + @test Table(n1) == Table(n2) # selection with renaming a = rand(10) @@ -81,34 +81,34 @@ # integer => symbol T = Select(1 => :x, 3 => :y) n, c = apply(T, t) - @test Tables.columnnames(n) == [:x, :y] + @test Tables.columnnames(n) == (:x, :y) @test Tables.getcolumn(n, :x) == t.a @test Tables.getcolumn(n, :y) == t.c # integer => string T = Select(2 => "x", 4 => "y") n, c = apply(T, t) - @test Tables.columnnames(n) == [:x, :y] + @test Tables.columnnames(n) == (:x, :y) @test Tables.getcolumn(n, :x) == t.b @test Tables.getcolumn(n, :y) == t.d # symbol => symbol T = Select(:a => :x, :c => :y) n, c = apply(T, t) - @test Tables.columnnames(n) == [:x, :y] + @test Tables.columnnames(n) == (:x, :y) @test Tables.getcolumn(n, :x) == t.a @test Tables.getcolumn(n, :y) == t.c # symbol => string T = Select(:b => "x", :d => "y") n, c = apply(T, t) - @test Tables.columnnames(n) == [:x, :y] + @test Tables.columnnames(n) == (:x, :y) @test Tables.getcolumn(n, :x) == t.b @test Tables.getcolumn(n, :y) == t.d T = Select(:a => :x1, :b => :x2, :c => :x3, :d => :x4) n, c = apply(T, t) - @test Tables.columnnames(n) == [:x1, :x2, :x3, :x4] + @test Tables.columnnames(n) == (:x1, :x2, :x3, :x4) @test Tables.getcolumn(n, :x1) == t.a @test Tables.getcolumn(n, :x2) == t.b @test Tables.getcolumn(n, :x3) == t.c @@ -117,20 +117,20 @@ # string => symbol T = Select("a" => :x, "c" => :y) n, c = apply(T, t) - @test Tables.columnnames(n) == [:x, :y] + @test Tables.columnnames(n) == (:x, :y) @test Tables.getcolumn(n, :x) == t.a @test Tables.getcolumn(n, :y) == t.c # string => string T = Select("b" => "x", "d" => "y") n, c = apply(T, t) - @test Tables.columnnames(n) == [:x, :y] + @test Tables.columnnames(n) == (:x, :y) @test Tables.getcolumn(n, :x) == t.b @test Tables.getcolumn(n, :y) == t.d T = Select("a" => "x1", "b" => "x2", "c" => "x3", "d" => "x4") n, c = apply(T, t) - @test Tables.columnnames(n) == [:x1, :x2, :x3, :x4] + @test Tables.columnnames(n) == (:x1, :x2, :x3, :x4) @test Tables.getcolumn(n, :x1) == t.a @test Tables.getcolumn(n, :x2) == t.b @test Tables.getcolumn(n, :x3) == t.c @@ -142,20 +142,21 @@ T = Select(:a => :x, :c => :y) n, c = apply(T, rt) - @test Tables.columnnames(n) == [:x, :y] - @test Tables.getcolumn(n, :x) == Tables.getcolumn(cols, :a) - @test Tables.getcolumn(n, :y) == Tables.getcolumn(cols, :c) + ncols = Tables.columns(n) + @test Tables.columnnames(ncols) == (:x, :y) + @test Tables.getcolumn(ncols, :x) == Tables.getcolumn(cols, :a) + @test Tables.getcolumn(ncols, :y) == Tables.getcolumn(cols, :c) # reapply test T = Select(:b => :x, :d => :y) n1, c1 = apply(T, t) n2 = reapply(T, t, c1) - @test n1 == n2 + @test Table(n1) == Table(n2) # selection with Regex T = Select(r"[dcb]") n, c = apply(T, t) - @test Tables.columnnames(n) == [:b, :c, :d] # the order of columns is preserved + @test Tables.columnnames(n) == (:b, :c, :d) # the order of columns is preserved x1 = rand(10) x2 = rand(10) @@ -166,18 +167,18 @@ # select columns whose names contain the character x T = Select(r"x") n, c = apply(T, t) - @test Tables.columnnames(n) == [:x1, :x2] + @test Tables.columnnames(n) == (:x1, :x2) # select columns whose names contain the character y T = Select(r"y") n, c = apply(T, t) - @test Tables.columnnames(n) == [:y1, :y2] + @test Tables.columnnames(n) == (:y1, :y2) # row table rt = Tables.rowtable(t) T = Select(r"y") n, c = apply(T, rt) - @test Tables.columnnames(n) == [:y1, :y2] + @test Tables.columnnames(Tables.columns(n)) == (:y1, :y2) # throws: Select without arguments @test_throws ArgumentError Select() @@ -212,70 +213,70 @@ end T = Reject(:f, :d) n, c = apply(T, t) - @test Tables.columnnames(n) == [:a, :b, :c, :e] + @test Tables.columnnames(n) == (:a, :b, :c, :e) T = Reject(:f, :d, :b) n, c = apply(T, t) - @test Tables.columnnames(n) == [:a, :c, :e] + @test Tables.columnnames(n) == (:a, :c, :e) T = Reject(:d, :c, :b) n, c = apply(T, t) - @test Tables.columnnames(n) == [:a, :e, :f] + @test Tables.columnnames(n) == (:a, :e, :f) T = Reject(:e, :c, :b, :a) n, c = apply(T, t) - @test Tables.columnnames(n) == [:d, :f] + @test Tables.columnnames(n) == (:d, :f) # rejection with tuples T = Reject((:e, :c, :b, :a)) n, c = apply(T, t) - @test Tables.columnnames(n) == [:d, :f] + @test Tables.columnnames(n) == (:d, :f) # rejection with vectors T = Reject([:e, :c, :b, :a]) n, c = apply(T, t) - @test Tables.columnnames(n) == [:d, :f] + @test Tables.columnnames(n) == (:d, :f) # rejection with strings T = Reject("d", "c", "b") n, c = apply(T, t) - @test Tables.columnnames(n) == [:a, :e, :f] + @test Tables.columnnames(n) == (:a, :e, :f) # rejection with tuple of strings T = Reject(("d", "c", "b")) n, c = apply(T, t) - @test Tables.columnnames(n) == [:a, :e, :f] + @test Tables.columnnames(n) == (:a, :e, :f) # rejection with vector of strings T = Reject(["d", "c", "b"]) n, c = apply(T, t) - @test Tables.columnnames(n) == [:a, :e, :f] + @test Tables.columnnames(n) == (:a, :e, :f) # rejection with integers T = Reject(4, 3, 2) n, c = apply(T, t) - @test Tables.columnnames(n) == [:a, :e, :f] + @test Tables.columnnames(n) == (:a, :e, :f) # rejection with tuple of integers T = Reject((4, 3, 2)) n, c = apply(T, t) - @test Tables.columnnames(n) == [:a, :e, :f] + @test Tables.columnnames(n) == (:a, :e, :f) # rejection with vector of integers T = Reject([4, 3, 2]) n, c = apply(T, t) - @test Tables.columnnames(n) == [:a, :e, :f] + @test Tables.columnnames(n) == (:a, :e, :f) # reapply test T = Reject(:b, :c, :d) n1, c1 = apply(T, t) n2 = reapply(T, t, c1) - @test n1 == n2 + @test Table(n1) == Table(n2) # rejection with Regex T = Reject(r"[dcb]") n, c = apply(T, t) - @test Tables.columnnames(n) == [:a, :e, :f] # the order of columns is preserved + @test Tables.columnnames(n) == (:a, :e, :f) # the order of columns is preserved x1 = rand(10) x2 = rand(10) @@ -286,18 +287,18 @@ end # reject columns whose names contain the character x T = Reject(r"x") n, c = apply(T, t) - @test Tables.columnnames(n) == [:y1, :y2] + @test Tables.columnnames(n) == (:y1, :y2) # reject columns whose names contain the character y T = Reject(r"y") n, c = apply(T, t) - @test Tables.columnnames(n) == [:x1, :x2] + @test Tables.columnnames(n) == (:x1, :x2) # row table rt = Tables.rowtable(t) T = Reject(r"y") n, c = apply(T, rt) - @test Tables.columnnames(n) == [:x1, :x2] + @test Tables.columnnames(n) == (:x1, :x2) # throws: Reject without arguments @test_throws ArgumentError Reject()