Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "StringViews"
uuid = "354b36f9-a18e-4713-926e-db85100087ba"
authors = ["Steven G. Johnson <stevenj@alum.mit.edu>"]
version = "1.3.7"
version = "2.0"

[compat]
julia = "1.6"
Expand Down
43 changes: 31 additions & 12 deletions src/StringViews.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,20 @@ Unlike Julia's built-in `String` type (which also wraps UTF-8 data), the
instance, and does not take "ownership" of or modify the array. Otherwise,
a `StringView` is intended to be usable in any context where you might
have otherwise used `String`.

In Julia 1.14, the `StringView` type will be included in the Julia `Base` module,
at which point the `StringViews` module will become a trivial stub.
"""
module StringViews

# no longer needed after https://github.com/JuliaLang/julia/pull/60526
if isdefined(Base, :StringView)

const SVRegexMatch = RegexMatch
export SVRegexMatch

else

export StringView, SVRegexMatch

"""
Expand All @@ -27,27 +39,31 @@ in the buffer.
"""
struct StringView{T<:AbstractVector{UInt8}} <: AbstractString
data::T

function StringView{T}(data::T) where {T <: AbstractVector{UInt8}}
# For now, StringViews code assumes one-based indexing
Base.require_one_based_indexing(data)

# Prevent someone constructing e.g. a `StringView{AbstractVector{UInt8}}`,
# the existence of which will complicate the implementation and provide
# no usability benefit.
if !isconcretetype(T)
throw(ArgumentError("StringView must be parameterized with a concrete type"))
end

new{T}(data)
end
end

const DenseStringView = StringView{<:Union{DenseVector{UInt8},<:Base.FastContiguousSubArray{UInt8,1,<:DenseVector{UInt8}}}}
const StringAndSub = Union{String,SubString{String}}
const StringViewAndSub = Union{StringView,SubString{<:StringView}}
const DenseStringViewAndSub = Union{DenseStringView,SubString{<:DenseStringView}}

Base.Vector{UInt8}(s::StringView{Vector{UInt8}}) = s.data
StringView(v::AbstractVector{UInt8}) = StringView{typeof(v)}(v)
Base.Vector{UInt8}(s::StringViewAndSub) = Vector{UInt8}(codeunits(s))
Base.Array{UInt8}(s::StringViewAndSub) = Vector{UInt8}(s)
Base.String(s::StringViewAndSub) = String(copyto!(Base.StringVector(ncodeunits(s)), codeunits(s)))
StringView(s::StringView) = s
StringView{S}(s::StringView{S}) where {S<:AbstractVector{UInt8}} = s
StringView(s::String) = StringView(codeunits(s))

# iobuffer constructor (note that buf.data is always 1-based)
@inline function StringView(buf::IOBuffer, r::OrdinalRange{<:Integer,<:Integer}=Base.OneTo(buf.ptr-1))
@boundscheck issubset(r, Base.OneTo(buf.size)) || throw(BoundsError(buf, r))
StringView(@view buf.data[r])
end

Base.copy(s::StringView) = StringView(copy(s.data))

Base.Symbol(s::DenseStringViewAndSub) =
Expand Down Expand Up @@ -88,9 +104,10 @@ end
Base.:(==)(s1::StringViewAndSub, s2::StringAndSub) = s2 == s1

Base.typemin(::Type{StringView{Vector{UInt8}}}) = StringView(Vector{UInt8}(undef,0))
Base.typemin(::Type{StringView{Base.CodeUnits{UInt8, String}}}) = StringView("")
Base.typemin(::Type{StringView{Base.CodeUnits{UInt8, String}}}) = StringView(codeunits(""))
Base.typemin(::T) where {T<:StringView} = typemin(T)
Base.one(::Union{T,Type{T}}) where {T<:StringView} = typemin(T)
Base.oneunit(::Union{T, Type{T}}) where {T<:StringView} = typemin(T)

if VERSION < v"1.10.0-DEV.1007" # JuliaLang/julia#47880
Base.isvalid(s::DenseStringViewAndSub) = ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s)) ≠ 0
Expand Down Expand Up @@ -141,4 +158,6 @@ include("parse.jl")
include("util.jl")
include("search.jl")

end

end # module
4 changes: 2 additions & 2 deletions src/decoding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ end
Base.getindex(s::StringView, r::UnitRange{<:Integer}) = s[Int(first(r)):Int(last(r))]

@inline function Base.getindex(s::StringView, r::UnitRange{Int})
isempty(r) && return ""
isempty(r) && return StringView(s.data[1:0])
i, j = first(r), last(r)
@boundscheck begin
checkbounds(s, r)
@inbounds isvalid(s, i) || Base.string_index_err(s, i)
@inbounds isvalid(s, j) || Base.string_index_err(s, j)
end
j = nextind(s, j) - 1
return StringView(@view s.data[i:j])
return StringView(s.data[i:j])
end

Base.length(s::StringView) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))
Expand Down
50 changes: 20 additions & 30 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ s = StringView(b)
ss = SubString(s, 2, 5) # "ooba"
abc = StringView(0x61:0x63)
invalid = StringView([0x8b, 0x52, 0x9b, 0x8d])
su = StringView("föôẞαr")
stringview(s::String) = StringView(codeunits(s)) # convenience constructor
su = stringview("föôẞαr")

@testset "construction/conversion" begin
@test StringView(s) === s
@test Vector{UInt8}(s) === Array{UInt8}(s) === codeunits(s) === b
@test Vector{UInt8}(s) == Array{UInt8}(s) == codeunits(s) === b
@test Vector{UInt8}(StringView(@view b[1:3])) == b[1:3]
@test codeunits(String(s)) == s.data
@test Vector{UInt8}(abc) == collect(0x61:0x63)
Expand All @@ -21,21 +21,11 @@ su = StringView("föôẞαr")
@test c == "foobar"
@test c.data !== s.data

buf = IOBuffer()
write(buf, s)
@test StringView(buf) == s == StringView(buf, 0x01:0x06)
@test StringView(buf, 3:5) == "oba" == StringView(buf, 0x03:0x01:0x05)
write(buf, "baz")
@test StringView(buf) == s * "baz"
@test String(take!(buf)) == s * "baz"
@test StringView(buf) == ""
@test_throws BoundsError StringView(buf, 3:4)

@test StringView("foo") isa StringView{Base.CodeUnits{UInt8,String}}
@test stringview("foo") isa StringView{Base.CodeUnits{UInt8,String}}

@test s isa StringViews.DenseStringView
@test StringView(@view b[1:3]) isa StringViews.DenseStringView
@test StringView("foo") isa StringViews.DenseStringView
@test stringview("foo") isa StringViews.DenseStringView
@test StringView(@view codeunits("foobar")[1:3]) isa StringViews.DenseStringView

@test pointer(s) == pointer(b) == Base.unsafe_convert(Ptr{UInt8}, s)
Expand Down Expand Up @@ -63,8 +53,8 @@ end

@test Base.print_to_string(ss) == "ooba"

@test cmp("foobar","bar") == cmp(ss,"bar") == -cmp("bar",ss) == cmp(ss,StringView("bar"))
@test ss == StringView("ooba") == "ooba" == ss == "ooba"
@test cmp("foobar","bar") == cmp(ss,"bar") == -cmp("bar",ss) == cmp(ss,stringview("bar"))
@test ss == stringview("ooba") == "ooba" == ss == "ooba"
@test isvalid(ss)
end

Expand Down Expand Up @@ -94,17 +84,17 @@ end
@test findnext(r"[aeiou]+", s, 1) == 2:3
@test findnext(r"[aeiou]+", ss, 1) == 1:2

sv = StringView(codeunits("foo 1234 bar"))
sv = stringview("foo 1234 bar")
@test match(r"[0-9]+", sv).match.string === sv
@test eltype(eachmatch(r"[0-9]+", sv)) == SVRegexMatch{typeof(sv)}

# Regex match of substring of stringview
strv = only(match(r"^([a-z]+)$", SubString(StringView((b"abc")))))
strv = only(match(r"^([a-z]+)$", SubString(StringView(b"abc"))))
@test typeof(strv) == SubString{StringView{typeof(b"abc")}}
end

@testset "named subpatterns" begin
m = match(r"(?<a>.)(.)(?<b>.)", StringView(codeunits("xyz")))
m = match(r"(?<a>.)(.)(?<b>.)", stringview("xyz"))
@test haskey(m, :a)
@test haskey(m, 2)
@test haskey(m, "b")
Expand All @@ -118,7 +108,7 @@ end
@testset "parsing" begin
for val in (true, 1234, 1234.5, 1234.5f0, 4.5+3.25im)
sval = string(val)
for str in (StringView(sval), SubString("foo"*sval*"bar", 4, 3+length(sval)))
for str in (stringview(sval), SubString("foo"*sval*"bar", 4, 3+length(sval)))
@test parse(typeof(val), str) === val
end
end
Expand All @@ -139,18 +129,18 @@ end
@test findnext(==("ba"), str, 1) === findnext(==("ba"), sS, 1)
@test findprev(==("ba"), str, n) === findprev(==("ba"), sS, n)
end
@test chomp(StringView("foo\n")) == "foo"
@test chomp(stringview("foo\n")) == "foo"

# issue #5
let v = [0x32, 0x30, 0x32, 0x31, 0x2d, 0x31, 0x31, 0x2d, 0x31, 0x30, 0x20, 0x32, 0x31, 0x3a, 0x34, 0x32, 0x3a, 0x30, 0x35, 0x2e, 0x31, 0x31, 0x35, 0x38, 0x30, 0x37],
pat = r"(\.[\d]{3})\d+" => s"\g<1>"
@test replace(String(copy(v)), pat) == replace(StringView(v), pat)
end

@test findfirst(==('ø'), StringView("abc")) === nothing
@test findfirst(==('ø'), StringView("abæø")) == 5
@test findlast(==('ø'), StringView("abc")) === nothing
@test findlast(==('ø'), StringView("abæø")) == 5
@test findfirst(==('ø'), stringview("abc")) === nothing
@test findfirst(==('ø'), stringview("abæø")) == 5
@test findlast(==('ø'), stringview("abc")) === nothing
@test findlast(==('ø'), stringview("abæø")) == 5
end

@testset "replace" begin
Expand All @@ -164,8 +154,8 @@ end
end

@testset "miscellaneous" begin
@test cmp("foobar","bar") == cmp(s,"bar") == -cmp("bar",s) == cmp(s,StringView("bar"))
@test s == StringView("foobar") == "foobar" == s == "foobar" != StringView("bar")
@test cmp("foobar","bar") == cmp(s,"bar") == -cmp("bar",s) == cmp(s,stringview("bar"))
@test s == stringview("foobar") == "foobar" == s == "foobar" != stringview("bar")
@test cmp(abc, "bar") == cmp("abc","bar")

@test Base.typemin(s) isa StringView{Vector{UInt8}}
Expand All @@ -176,7 +166,7 @@ end
@test oneunit(su) == oneunit(typeof(su)) == one(su) == ""

@test isascii(s)
@test !isascii(StringView("fööbār"))
@test !isascii(stringview("fööbār"))

@test isvalid(s)
@test isvalid(abc)
Expand All @@ -188,5 +178,5 @@ end
end

# issue #12
@test_throws StringIndexError StringView(codeunits("fooα"))[1:5]
@test_throws StringIndexError stringview("fooα")[1:5]
end