diff --git a/src/hist.jl b/src/hist.jl index e9bf281ca..eb5dd056b 100644 --- a/src/hist.jl +++ b/src/hist.jl @@ -1,4 +1,6 @@ -import Base: show, ==, push!, append! +using Base.Cartesian + +import Base: show, ==, push!, append!, float, norm, normalize, normalize! # Mechanism for temporary deprecation of default for "closed" (because default # value has changed). After deprecation is lifed, remove "_check_closed_arg" @@ -14,6 +16,17 @@ function _check_closed_arg(closed::Symbol, funcsym) end +## Fast getindex function for multiple arrays, returns a tuple of array elements +@inline Base.@propagate_inbounds @generated function _multi_getindex(i::Integer, c::AbstractArray...) + N = length(c) + result_expr = Expr(:tuple) + for j in 1:N + push!(result_expr.args, :(c[$j][i])) + end + result_expr +end + + ## nice-valued ranges for histograms function histrange{T}(v::AbstractArray{T}, n::Integer, closed::Symbol=:default_left) closed = _check_closed_arg(closed,:histrange) @@ -116,22 +129,24 @@ type Histogram{T<:Real,N,E} <: AbstractHistogram{T,N,E} edges::E weights::Array{T,N} closed::Symbol + isdensity::Bool function (::Type{Histogram{T,N,E}}){T,N,E}(edges::NTuple{N,AbstractArray}, - weights::Array{T,N}, closed::Symbol) + weights::Array{T,N}, closed::Symbol, isdensity::Bool=false) closed == :right || closed == :left || error("closed must :left or :right") + isdensity && !(T <: AbstractFloat) && error("Density histogram must have float-type weights") map(x -> length(x)-1,edges) == size(weights) || error("Histogram edge vectors must be 1 longer than corresponding weight dimensions") - new{T,N,E}(edges,weights,closed) + new{T,N,E}(edges,weights,closed,isdensity) end end -Histogram{T,N}(edges::NTuple{N,AbstractVector},weights::AbstractArray{T,N},closed::Symbol=:default_left) = - Histogram{T,N,typeof(edges)}(edges,weights,_check_closed_arg(closed,:Histogram)) +Histogram{T,N}(edges::NTuple{N,AbstractVector},weights::AbstractArray{T,N},closed::Symbol=:default_left, isdensity::Bool=false) = + Histogram{T,N,typeof(edges)}(edges,weights,_check_closed_arg(closed,:Histogram),isdensity) -Histogram{T,N}(edges::NTuple{N,AbstractVector},::Type{T},closed::Symbol=:default_left) = - Histogram(edges,zeros(T,map(x -> length(x)-1,edges)...),_check_closed_arg(closed,:Histogram)) +Histogram{T,N}(edges::NTuple{N,AbstractVector},::Type{T},closed::Symbol=:default_left, isdensity::Bool=false) = + Histogram(edges,zeros(T,map(x -> length(x)-1,edges)...),_check_closed_arg(closed,:Histogram),isdensity) -Histogram{N}(edges::NTuple{N,AbstractVector},closed::Symbol=:default_left) = - Histogram(edges,Int,_check_closed_arg(closed,:Histogram)) +Histogram{N}(edges::NTuple{N,AbstractVector},closed::Symbol=:default_left, isdensity::Bool=false) = + Histogram(edges,Int,_check_closed_arg(closed,:Histogram),isdensity) function show(io::IO, h::AbstractHistogram) println(io, typeof(h)) @@ -140,100 +155,259 @@ function show(io::IO, h::AbstractHistogram) println(io," ",e) end println(io,"weights: ",h.weights) - print(io,"closed: ",h.closed) + println(io,"closed: ",h.closed) + print(io,"isdensity: ",h.isdensity) end -(==)(h1::Histogram,h2::Histogram) = (==)(h1.edges,h2.edges) && (==)(h1.weights,h2.weights) && (==)(h1.closed,h2.closed) +(==)(h1::Histogram,h2::Histogram) = (==)(h1.edges,h2.edges) && (==)(h1.weights,h2.weights) && (==)(h1.closed,h2.closed) && (==)(h1.isdensity,h2.isdensity) -# 1-dimensional -Histogram{T}(edge::AbstractVector, weights::AbstractVector{T}, closed::Symbol=:default_left) = - Histogram((edge,), weights, _check_closed_arg(closed,:Histogram)) -Histogram{T}(edge::AbstractVector, ::Type{T}, closed::Symbol=:default_left) = - Histogram(edge, zeros(T,length(edge)-1), _check_closed_arg(closed,:Histogram)) +binindex{T,E}(h::AbstractHistogram{T,1,E}, x::Real) = binindex(h, (x,))[1] -Histogram(edge::AbstractVector,closed::Symbol=:default_left) = - Histogram(edge, Int, _check_closed_arg(closed,:Histogram)) +binindex{T,N,E}(h::Histogram{T,N,E}, xs::NTuple{N,Real}) = + map((edge, x) -> _edge_binindex(edge, h.closed, x), h.edges, xs) -function push!{T,E}(h::Histogram{T,1,E}, x::Real,w::Real) - i = if h.closed == :right - searchsortedfirst(h.edges[1], x) - 1 +@inline function _edge_binindex(edge::AbstractVector, closed::Symbol, x::Real) + if closed == :right + searchsortedfirst(edge, x) - 1 else - searchsortedlast(h.edges[1], x) - end - if 1 <= i <= length(h.weights) - @inbounds h.weights[i] += w + searchsortedlast(edge, x) end - h end + + +binvolume{T,E}(h::AbstractHistogram{T,1,E}, binidx::Integer) = binvolume(h, (binidx,)) +binvolume{V,T,E}(::Type{V}, h::AbstractHistogram{T,1,E}, binidx::Integer) = binvolume(V, h, (binidx,)) + +binvolume{T,N,E}(h::Histogram{T,N,E}, binidx::NTuple{N,Integer}) = + binvolume(promote_type(map(eltype, h.edges)...), h, binidx) + +binvolume{V,T,N,E}(::Type{V}, h::Histogram{T,N,E}, binidx::NTuple{N,Integer}) = + prod(map((edge, i) -> _edge_binvolume(V, edge, i), h.edges, binidx)) + +@inline _edge_binvolume{V}(::Type{V}, edge::AbstractVector, i::Integer) = V(edge[i+1]) - V(edge[i]) +@inline _edge_binvolume{V}(::Type{V}, edge::Range, i::Integer) = V(step(edge)) +@inline _edge_binvolume(edge::AbstractVector, i::Integer) = _edge_binvolume(eltype(edge), edge, i) + + +# 1-dimensional + +Histogram{T}(edge::AbstractVector, weights::AbstractVector{T}, closed::Symbol=:default_left, isdensity::Bool=false) = + Histogram((edge,), weights, closed, isdensity) + +Histogram{T}(edge::AbstractVector, ::Type{T}, closed::Symbol=:default_left, isdensity::Bool=false) = + Histogram((edge,), T, closed, isdensity) + +Histogram(edge::AbstractVector, closed::Symbol=:default_left, isdensity::Bool=false) = + Histogram((edge,), closed, isdensity) + + +push!{T,E}(h::AbstractHistogram{T,1,E}, x::Real, w::Real) = push!(h, (x,), w) push!{T,E}(h::AbstractHistogram{T,1,E}, x::Real) = push!(h,x,one(T)) +append!{T}(h::AbstractHistogram{T,1}, v::AbstractVector) = append!(h, (v,)) +append!{T}(h::AbstractHistogram{T,1}, v::AbstractVector, wv::Union{AbstractVector,WeightVec}) = append!(h, (v,), wv) -function append!{T}(h::AbstractHistogram{T,1}, v::AbstractVector) - for x in v - push!(h,x) - end - h -end -function append!{T}(h::AbstractHistogram{T,1}, v::AbstractVector,wv::WeightVec) - for (x,w) in zip(v,wv.values) - push!(h,x,w) - end - h -end -fit(::Type{Histogram},v::AbstractVector, edg::AbstractVector; closed::Symbol=:default_left) = - append!(Histogram(edg,_check_closed_arg(closed,:fit)), v) -fit(::Type{Histogram},v::AbstractVector; closed::Symbol=:default_left, nbins=sturges(length(v))) = begin - closed = _check_closed_arg(closed,:fit) - fit(Histogram, v, histrange(v,nbins,closed); closed=closed) -end +fit{T}(::Type{Histogram{T}},v::AbstractVector, edg::AbstractVector; closed::Symbol=:default_left) = + fit(Histogram{T},(v,), (edg,), closed=closed) +fit{T}(::Type{Histogram{T}},v::AbstractVector; closed::Symbol=:default_left, nbins=sturges(length(v))) = + fit(Histogram{T},(v,); closed=closed, nbins=nbins) +fit{T}(::Type{Histogram{T}},v::AbstractVector, wv::WeightVec, edg::AbstractVector; closed::Symbol=:default_left) = + fit(Histogram{T},(v,), wv, (edg,), closed=closed) +fit{T}(::Type{Histogram{T}},v::AbstractVector, wv::WeightVec; closed::Symbol=:default_left, nbins=sturges(length(v))) = + fit(Histogram{T}, (v,), wv; closed=closed, nbins=nbins) + +fit{W}(::Type{Histogram}, v::AbstractVector, wv::WeightVec{W}, args...; kwargs...) = fit(Histogram{W}, v, wv, args...; kwargs...) -fit{W}(::Type{Histogram},v::AbstractVector, wv::WeightVec{W}, edg::AbstractVector; closed::Symbol=:default_left) = - append!(Histogram(edg,W,_check_closed_arg(closed,:fit)), v, wv) -fit(::Type{Histogram},v::AbstractVector, wv::WeightVec; closed::Symbol=:default_left, nbins=sturges(length(v))) = begin - closed = _check_closed_arg(closed,:fit) - fit(Histogram, v, wv, histrange(v,nbins,closed); closed=closed) -end # N-dimensional + function push!{T,N}(h::Histogram{T,N},xs::NTuple{N,Real},w::Real) - is = if h.closed == :right - map((edge, x) -> searchsortedfirst(edge,x) - 1, h.edges, xs) - else - map(searchsortedlast, h.edges, xs) + h.isdensity && error("Density histogram must have float-type weights") + idx = binindex(h, xs) + if checkbounds(Bool, h.weights, idx...) + @inbounds h.weights[idx...] += w end - try - h.weights[is...] += w - catch e - isa(e,BoundsError) || rethrow(e) + h +end + +function push!{T<:AbstractFloat,N}(h::Histogram{T,N},xs::NTuple{N,Real},w::Real) + idx = binindex(h, xs) + if checkbounds(Bool, h.weights, idx...) + @inbounds h.weights[idx...] += h.isdensity ? w / binvolume(h, idx) : w end h end + push!{T,N}(h::AbstractHistogram{T,N},xs::NTuple{N,Real}) = push!(h,xs,one(T)) + function append!{T,N}(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector}) - for xs in zip(vs...) - push!(h,xs) + @inbounds for i in eachindex(vs...) + xs = _multi_getindex(i, vs...) + push!(h, xs, one(T)) end h end -function append!{T,N}(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector},wv::WeightVec) - for (xs,w) in zip(zip(vs...),wv.values) - push!(h,xs,w) +function append!{T,N}(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector}, wv::AbstractVector) + @inbounds for i in eachindex(wv, vs...) + xs = _multi_getindex(i, vs...) + push!(h, xs, wv[i]) end h end +append!{T,N}(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector}, wv::WeightVec) = append!(h, vs, values(wv)) -fit{N}(::Type{Histogram}, vs::NTuple{N,AbstractVector}, edges::NTuple{N,AbstractVector}; closed::Symbol=:default_left) = - append!(Histogram(edges,_check_closed_arg(closed,:fit)), vs) -fit{N}(::Type{Histogram}, vs::NTuple{N,AbstractVector}; closed::Symbol=:default_left, nbins=sturges(length(vs[1]))) = begin + +fit{T,N}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, edges::NTuple{N,AbstractVector}; closed::Symbol=:default_left) = + append!(Histogram(edges, T, _check_closed_arg(closed,:fit), false), vs) + +fit{T,N}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}; closed::Symbol=:default_left, isdensity::Bool=false, nbins=sturges(length(vs[1]))) = begin closed = _check_closed_arg(closed,:fit) - fit(Histogram, vs, histrange(vs,nbins,closed); closed=closed) + fit(Histogram{T}, vs, histrange(vs,nbins,closed); closed=closed) end -fit{N,W}(::Type{Histogram}, vs::NTuple{N,AbstractVector}, wv::WeightVec{W}, edges::NTuple{N,AbstractVector}; closed::Symbol=:default_left) = - append!(Histogram(edges,W,_check_closed_arg(closed,:fit)), vs, wv) -fit{N}(::Type{Histogram},vs::NTuple{N,AbstractVector}, wv::WeightVec; closed::Symbol=:default_left, nbins=sturges(length(vs[1]))) = begin +fit{T,N,W}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::WeightVec{W}, edges::NTuple{N,AbstractVector}; closed::Symbol=:default_left) = + append!(Histogram(edges, T, _check_closed_arg(closed,:fit), false), vs, wv) + +fit{T,N}(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::WeightVec; closed::Symbol=:default_left, isdensity::Bool=false, nbins=sturges(length(vs[1]))) = begin closed = _check_closed_arg(closed,:fit) - fit(Histogram, vs, wv, histrange(vs,nbins,closed); closed=closed) + fit(Histogram{T}, vs, wv, histrange(vs,nbins,closed); closed=closed) +end + +fit(::Type{Histogram}, args...; kwargs...) = fit(Histogram{Int}, args...; kwargs...) +fit{N,W}(::Type{Histogram}, vs::NTuple{N,AbstractVector}, wv::WeightVec{W}, args...; kwargs...) = fit(Histogram{W}, vs, wv, args...; kwargs...) + + +# Get a suitable high-precision type for the norm of a histogram. +@generated function norm_type{T, N, E}(h::Histogram{T, N, E}) + args = [:( eltype(edges[$d]) ) for d = 1:N] + quote + edges = h.edges + norm_type(promote_type(T, $(args...))) + end +end + +norm_type{T<:Integer}(::Type{T}) = promote_type(T, Int64) +norm_type{T<:AbstractFloat}(::Type{T}) = promote_type(T, Float64) + + +""" + norm(h::Histogram) + +Calculate the norm of histogram `h` as the absolute value of its integral. +""" +@generated function norm{T, N, E}(h::Histogram{T, N, E}) + quote + edges = h.edges + weights = h.weights + SumT = norm_type(h) + v_0 = 1 + s_0 = zero(SumT) + @inbounds @nloops( + $N, i, weights, + d -> begin + v_{$N-d+1} = v_{$N-d} * _edge_binvolume(SumT, edges[d], i_d) + s_{$N-d+1} = zero(SumT) + end, + d -> begin + s_{$N-d} += s_{$N-d+1} + end, + begin + $(Symbol("s_$(N)")) += (@nref $N weights i) * $(Symbol("v_$N")) + end + ) + s_0 + end +end + + +float{T<:AbstractFloat, N, E}(h::Histogram{T, N, E}) = h + +float{T, N, E}(h::Histogram{T, N, E}) = Histogram(h.edges, float(h.weights), h.closed, h.isdensity) + + + +""" + normalize!{T<:AbstractFloat, N, E}(h::Histogram{T, N, E}, aux_weights::Array{T,N}...; mode::Symbol = :pdf) + +Normalize the histogram `h` and optionally scale one or more auxiliary weight +arrays appropriately. See description of `normalize` for details. Returns `h`. +""" +@generated function normalize!{T<:AbstractFloat, N, E}(h::Histogram{T, N, E}, aux_weights::Array{T,N}...; mode::Symbol = :pdf) + quote + edges = h.edges + weights = h.weights + + for A in aux_weights + (size(A) != size(weights)) && throw(DimensionMismatch("aux_weights must have same size as histogram weights")) + end + + if mode == :none + # nothing to do + elseif mode == :pdf || mode == :density + if h.isdensity + if mode == :pdf + # histogram already represents a density, just divide weights by norm + s = 1/norm(h) + weights .*= s + for A in aux_weights + A .*= s + end + else + # histogram already represents a density, nothing to do + end + else + # Divide weights by bin volume, for :pdf also divide by sum of weights + SumT = norm_type(h) + vs_0 = (mode == :pdf) ? sum(SumT(x) for x in weights) : one(SumT) + @inbounds @nloops $N i weights d->(vs_{$N-d+1} = vs_{$N-d} * _edge_binvolume(SumT, edges[d], i_d)) begin + (@nref $N weights i) /= $(Symbol("vs_$N")) + for A in aux_weights + (@nref $N A i) /= $(Symbol("vs_$N")) + end + end + end + h.isdensity = true + else mode != :pdf && mode != :density + throw(ArgumentError("Normalization mode must be :pdf, :density or :none")) + end + h + end +end + + +""" + normalize{T, N, E}(h::Histogram{T, N, E}; mode::Symbol = :pdf) + +Normalize the histogram `h`. + +Valid values for `mode` are: + +* `:pdf`: Normalize by sum of weights and bin sizes. Resulting histogram + has norm 1 and represents a PDF. +* `:density`: Normalize by bin sizes only. Resulting histogram represents + count density of input and does not have norm 1. Will not modify the + histogram if it already represents a density (`h.isdensity == 1`). +* `:none`: Leaves histogram unchanged. Useful to simplify code that has to + conditionally apply different modes of normalization. +""" +normalize{T, N, E}(h::Histogram{T, N, E}; mode::Symbol = :pdf) = + normalize!(deepcopy(float(h)), mode = mode) + + +""" + normalize{T, N, E}(h::Histogram{T, N, E}, aux_weights::Array{T,N}...; mode::Symbol = :pdf) + +Normalize the histogram `h` and rescales one or more auxiliary weight arrays +at the same time (`aux_weights` may, e.g., contain estimated statistical +uncertainties). The values of the auxiliary arrays are scaled by the same +factor as the corresponding histogram weight values. Returns a tuple of the +normalized histogram and scaled auxiliary weights. +""" +function normalize{T, N, E}(h::Histogram{T, N, E}, aux_weights::Array{T,N}...; mode::Symbol = :pdf) + h_fltcp = deepcopy(float(h)) + aux_weights_fltcp = map(x -> deepcopy(float(x)), aux_weights) + normalize!(h_fltcp, aux_weights_fltcp..., mode = mode) + (h_fltcp, aux_weights_fltcp...) end diff --git a/test/hist.jl b/test/hist.jl index cad5641ce..adaee8466 100644 --- a/test/hist.jl +++ b/test/hist.jl @@ -3,79 +3,189 @@ using StatsBase using Base.Test -# test hist -@test sum(fit(Histogram,[1,2,3], closed=:left).weights) == 3 # FIXME: closed -@test fit(Histogram,Int[], closed=:left).weights == Int[] # FIXME: closed -@test fit(Histogram,[1], closed=:left).weights == [1] # FIXME: closed -@test fit(Histogram,[1,2,3],[0,2,4], closed=:left) == Histogram([0,2,4],[1,2], :left) # FIXME: closed -@test fit(Histogram,[1,2,3],[0,2,4], closed=:left) != Histogram([0,2,4],[1,1], :left) # FIXME: closed -@test fit(Histogram,[1,2,3],0:2:4, closed=:left) == Histogram(0:2:4,[1,2], :left) # FIXME: closed -@test all(fit(Histogram,[0:99;]/100,0.0:0.01:1.0, closed=:left).weights .==1) # FIXME: closed -@test fit(Histogram,[1,1,1,1,1], closed=:left).weights[1] == 5 # FIXME: closed -@test sum(fit(Histogram,(rand(100),rand(100)), closed=:left).weights) == 100 # FIXME: closed -@test fit(Histogram,1:100,nbins=5,closed=:right).weights == [20,20,20,20,20] -@test fit(Histogram,1:100,nbins=5,closed=:left).weights == [19,20,20,20,20,1] -@test fit(Histogram,0:99,nbins=5,closed=:right).weights == [1,20,20,20,20,19] -@test fit(Histogram,0:99,nbins=5,closed=:left).weights == [20,20,20,20,20] - -# FIXME: closed (all lines in this block): -@test fit(Histogram,(0:99,0:99),nbins=5, closed=:left).weights == diagm([20,20,20,20,20]) -@test fit(Histogram,(0:99,0:99),nbins=(5,5), closed=:left).weights == diagm([20,20,20,20,20]) - -# FIXME: closed (all lines in this block): -@test fit(Histogram,0:99,weights(ones(100)),nbins=5, closed=:left).weights == [20,20,20,20,20] -@test fit(Histogram,0:99,weights(2*ones(100)),nbins=5, closed=:left).weights == [40,40,40,40,40] - -# FIXME: closed (all lines in this block): -@test eltype(fit(Histogram,1:100,weights(ones(Int,100)),nbins=5, closed=:left).weights) == Int -@test eltype(fit(Histogram,1:100,weights(ones(Float64,100)),nbins=5, closed=:left).weights) == Float64 - -# histrange -# Note: atm histrange must be qualified -@test StatsBase.histrange(Float64[], 0, :left) == 0.0:1.0:0.0 -@test StatsBase.histrange(Float64[1:5;], 1, :left) == 0.0:5.0:10.0 -@test StatsBase.histrange(Float64[1:10;], 1, :left) == 0.0:10.0:20.0 -@test StatsBase.histrange(1.0, 10.0, 1, :left) == 0.0:10.0:20.0 - -@test StatsBase.histrange([0.201,0.299], 10, :left) == 0.2:0.01:0.3 -@test StatsBase.histrange([0.2,0.299], 10, :left) == 0.2:0.01:0.3 -@test StatsBase.histrange([0.2,0.3], 10, :left) == 0.2:0.01:0.31 -@test StatsBase.histrange(0.2, 0.3, 10, :left) == 0.2:0.01:0.31 -@test StatsBase.histrange([0.2,0.3], 10, :right) == 0.19:0.01:0.3 -@test StatsBase.histrange(0.2, 0.3, 10, :right) == 0.19:0.01:0.3 - -@test StatsBase.histrange([200.1,299.9], 10, :left) == 200.0:10.0:300.0 -@test StatsBase.histrange([200.0,299.9], 10, :left) == 200.0:10.0:300.0 -@test StatsBase.histrange([200.0,300.0], 10, :left) == 200.0:10.0:310.0 -@test StatsBase.histrange([200.0,300.0], 10, :right) == 190.0:10.0:300.0 - -@test StatsBase.histrange(Int64[1:5;], 1, :left) == 0:5:10 -@test StatsBase.histrange(Int64[1:10;], 1, :left) == 0:10:20 - -# FIXME: closed (all lines in this block): -@test StatsBase.histrange([0, 1, 2, 3], 4, :left) == 0.0:1.0:4.0 -@test StatsBase.histrange([0, 1, 1, 3], 4, :left) == 0.0:1.0:4.0 -@test StatsBase.histrange([0, 9], 4, :left) == 0.0:5.0:10.0 -@test StatsBase.histrange([0, 19], 4, :left) == 0.0:5.0:20.0 -@test StatsBase.histrange([0, 599], 4, :left) == 0.0:200.0:600.0 -@test StatsBase.histrange([-1, -1000], 4, :left) == -1000.0:500.0:0.0 - -# Base issue #13326 -l,h = extrema(StatsBase.histrange([typemin(Int),typemax(Int)], 10, :left)) # FIXME: closed -@test l <= typemin(Int) -@test h >= typemax(Int) - -# FIXME: closed (all lines in this block): -@test_throws ArgumentError StatsBase.histrange([1, 10], 0, :left) -@test_throws ArgumentError StatsBase.histrange([1, 10], -1, :left) -@test_throws ArgumentError StatsBase.histrange([1.0, 10.0], 0, :left) -@test_throws ArgumentError StatsBase.histrange([1.0, 10.0], -1, :left) -@test_throws ArgumentError StatsBase.histrange(Float64[],-1, :left) -@test_throws ArgumentError StatsBase.histrange([0.], 0, :left) - - -# hist show -show_h = sprint(show, fit(Histogram,[0,1,2], closed=:left)) # FIXME: closed -@test contains(show_h, "edges:\n 0.0:1.0:3.0") -@test contains(show_h, "weights: $([1,1,1])") -@test contains(show_h, "closed: left") +@testset "StatsBase.Histogram" begin + + +@testset "Histogram binindex and binvolume" begin + edg1 = -2:0.5:9 + edg1f0 = -2:0.5f0:9 + edg2 = [-2, -1, 2, 7, 19] + h1 = Histogram(edg1, :left) + h2 = Histogram((edg1, edg2), :left) + h3 = Histogram((edg1f0, edg2), :left) + @test StatsBase.binindex(h1, -0.5) == 4 + @test StatsBase.binindex(h2, (1.5, 2)) == (8, 3) + + @test [StatsBase.binvolume(h1, i) for i in indices(h1.weights, 1)] ≈ diff(edg1) + @test [StatsBase.binvolume(h2, (i,j)) for i in indices(h2.weights, 1), j in indices(h2.weights, 2)] ≈ diff(edg1) * diff(edg2)' + + @test typeof(StatsBase.binvolume(h2, (1,1))) == Float64 + @test typeof(StatsBase.binvolume(h3, (1,1))) == Float32 + @test typeof(StatsBase.binvolume(Float64, h3, (1,1))) == Float64 +end + + +@testset "Histogram append" begin + # FIXME: closed (all lines in this block): + @test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99).weights ≈ [40,40,40,40,40] + @test append!(Histogram(0:20:100, Float64, :left, true), 0:0.5:99.99).weights ≈ [2,2,2,2,2] + @test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99, fill(2, 200)).weights ≈ [80,80,80,80,80] + @test append!(Histogram(0:20:100, Float64, :left, true), 0:0.5:99.99, fill(2, 200)).weights ≈ [4,4,4,4,4] +end + + +@testset "Histogram fit" begin + # FIXME: closed (all lines in this block): + @test sum(fit(Histogram,[1,2,3], closed=:left).weights) == 3 # FIXME: closed + @test fit(Histogram,Int[], closed=:left).weights == Int[] # FIXME: closed + @test fit(Histogram,[1], closed=:left).weights == [1] # FIXME: closed + @test fit(Histogram,[1,2,3],[0,2,4], closed=:left) == Histogram([0,2,4],[1,2], :left) # FIXME: closed + @test fit(Histogram,[1,2,3],[0,2,4], closed=:left) != Histogram([0,2,4],[1,1], :left) # FIXME: closed + @test fit(Histogram,[1,2,3],0:2:4, closed=:left) == Histogram(0:2:4,[1,2], :left) # FIXME: closed + @test all(fit(Histogram,[0:99;]/100,0.0:0.01:1.0, closed=:left).weights .==1) # FIXME: closed + @test fit(Histogram,[1,1,1,1,1], closed=:left).weights[1] == 5 # FIXME: closed + @test sum(fit(Histogram,(rand(100),rand(100)), closed=:left).weights) == 100 # FIXME: closed + @test fit(Histogram,1:100,nbins=5,closed=:right).weights == [20,20,20,20,20] + @test fit(Histogram,1:100,nbins=5,closed=:left).weights == [19,20,20,20,20,1] + @test fit(Histogram,0:99,nbins=5,closed=:right).weights == [1,20,20,20,20,19] + @test fit(Histogram,0:99,nbins=5,closed=:left).weights == [20,20,20,20,20] + + # FIXME: closed (all lines in this block): + @test fit(Histogram,(0:99,0:99),nbins=5, closed=:left).weights == diagm([20,20,20,20,20]) + @test fit(Histogram,(0:99,0:99),nbins=(5,5), closed=:left).weights == diagm([20,20,20,20,20]) + + # FIXME: closed (all lines in this block): + @test fit(Histogram,0:99,weights(ones(100)),nbins=5, closed=:left).weights == [20,20,20,20,20] + @test fit(Histogram,0:99,weights(2*ones(100)),nbins=5, closed=:left).weights == [40,40,40,40,40] + @test fit(Histogram{Int32},0:99,weights(2*ones(100)),nbins=5, closed=:left).weights == [40,40,40,40,40] + @test fit(Histogram{Float32},0:99,weights(2*ones(100)),nbins=5, closed=:left).weights == [40,40,40,40,40] +end + + +@testset "Histogram element type" begin + # FIXME: closed (all lines in this block): + @test eltype(fit(Histogram,1:100,weights(ones(Int,100)),nbins=5, closed=:left).weights) == Int + @test eltype(fit(Histogram{Float32},1:100,weights(ones(Int,100)),nbins=5, closed=:left).weights) == Float32 + @test eltype(fit(Histogram,1:100,weights(ones(Float64,100)),nbins=5, closed=:left).weights) == Float64 + @test eltype(fit(Histogram{Float32},1:100,weights(ones(Float64,100)),nbins=5, closed=:left).weights) == Float32 +end + + +@testset "histrange" begin + # Note: atm histrange must be qualified + @test StatsBase.histrange(Float64[], 0, :left) == 0.0:1.0:0.0 + @test StatsBase.histrange(Float64[1:5;], 1, :left) == 0.0:5.0:10.0 + @test StatsBase.histrange(Float64[1:10;], 1, :left) == 0.0:10.0:20.0 + @test StatsBase.histrange(1.0, 10.0, 1, :left) == 0.0:10.0:20.0 + + @test StatsBase.histrange([0.201,0.299], 10, :left) == 0.2:0.01:0.3 + @test StatsBase.histrange([0.2,0.299], 10, :left) == 0.2:0.01:0.3 + @test StatsBase.histrange([0.2,0.3], 10, :left) == 0.2:0.01:0.31 + @test StatsBase.histrange(0.2, 0.3, 10, :left) == 0.2:0.01:0.31 + @test StatsBase.histrange([0.2,0.3], 10, :right) == 0.19:0.01:0.3 + @test StatsBase.histrange(0.2, 0.3, 10, :right) == 0.19:0.01:0.3 + + @test StatsBase.histrange([200.1,299.9], 10, :left) == 200.0:10.0:300.0 + @test StatsBase.histrange([200.0,299.9], 10, :left) == 200.0:10.0:300.0 + @test StatsBase.histrange([200.0,300.0], 10, :left) == 200.0:10.0:310.0 + @test StatsBase.histrange([200.0,300.0], 10, :right) == 190.0:10.0:300.0 + + @test StatsBase.histrange(Int64[1:5;], 1, :left) == 0:5:10 + @test StatsBase.histrange(Int64[1:10;], 1, :left) == 0:10:20 + + # FIXME: closed (all lines in this block): + @test StatsBase.histrange([0, 1, 2, 3], 4, :left) == 0.0:1.0:4.0 + @test StatsBase.histrange([0, 1, 1, 3], 4, :left) == 0.0:1.0:4.0 + @test StatsBase.histrange([0, 9], 4, :left) == 0.0:5.0:10.0 + @test StatsBase.histrange([0, 19], 4, :left) == 0.0:5.0:20.0 + @test StatsBase.histrange([0, 599], 4, :left) == 0.0:200.0:600.0 + @test StatsBase.histrange([-1, -1000], 4, :left) == -1000.0:500.0:0.0 + + # Base issue #13326 + l,h = extrema(StatsBase.histrange([typemin(Int),typemax(Int)], 10, :left)) # FIXME: closed + @test l <= typemin(Int) + @test h >= typemax(Int) + + # FIXME: closed (all lines in this block): + @test_throws ArgumentError StatsBase.histrange([1, 10], 0, :left) + @test_throws ArgumentError StatsBase.histrange([1, 10], -1, :left) + @test_throws ArgumentError StatsBase.histrange([1.0, 10.0], 0, :left) + @test_throws ArgumentError StatsBase.histrange([1.0, 10.0], -1, :left) + @test_throws ArgumentError StatsBase.histrange(Float64[],-1, :left) + @test_throws ArgumentError StatsBase.histrange([0.], 0, :left) +end + + +@testset "Histogram show" begin + # hist show + show_h = sprint(show, fit(Histogram,[0,1,2], closed=:left)) # FIXME: closed + @test contains(show_h, "edges:\n 0.0:1.0:3.0") + @test contains(show_h, "weights: $([1,1,1])") + @test contains(show_h, "closed: left") + @test contains(show_h, "isdensity: false") +end + + +@testset "Histogram norm and normalize" begin + rng = MersenneTwister(345678) + edges = ( + cumsum(rand(rng) * rand(rng, 9)), + cumsum(rand(rng, 1:10) * rand(rng, 1:100, 11)), + cumsum(5 * rand(rng) * rand(rng, 14)) + ) + + n = 100000 + + data = ( + maximum(edges[1]) * (randn(rng, n) / 6 + 0.5), + rand(rng, 1:maximum(edges[2]), n), + maximum(edges[3]) * rand(rng, n) + ) + + h = fit(Histogram, data, edges, closed = :left) + + weight_sum = sum(h.weights) + bin_vols = [ x * y * z for x in diff(edges[1]), y in diff(edges[2]), z in diff(edges[3])] + + @test norm(h) ≈ sum(h.weights .* bin_vols) + + @test normalize(h, mode = :none) == h + + + h_pdf = normalize(h, mode = :pdf) + @test h_pdf.weights ≈ h.weights ./ bin_vols ./ weight_sum + @test h_pdf.isdensity == true + @test norm(h_pdf) ≈ 1 + @test normalize(h_pdf, mode = :pdf) == h_pdf + @test normalize(h_pdf, mode = :density) == h_pdf + + h_density = normalize(h, mode = :density) + @test h_density.weights ≈ h.weights ./ bin_vols + @test h_density.isdensity == true + @test norm(h_density) ≈ weight_sum + @test normalize(h_density, mode = :pdf) == + Histogram(h_density.edges, h_density.weights .* (1/norm(h_density)), h_density.closed, true) + @test normalize(h_density, mode = :pdf).weights ≈ h_pdf.weights + @test normalize(h_density, mode = :density) == h_density + + h2 = deepcopy(float(h)) + mod_h2 = normalize!(h2, mode = :density) + @test mod_h2 === h2 && mod_h2.weights === h2.weights + @test h2.weights == h_density.weights + + aux_weights = sqrt.(h.weights) + divor0 = (a,b) -> (a == 0 && b == 0) ? 0 : a/b + divor0_cmp = (a_n, a_d, b_n, b_d) -> maximum(abs.(map(divor0, a_n, a_d) - map(divor0, b_n, b_d))) < 1e-10 + + h_pdf2, h_pdf2_aux = normalize(float(h), aux_weights, mode = :pdf) + @test divor0_cmp(h_pdf2_aux, aux_weights, h_pdf2.weights, h.weights) + + h_density2, h_density2_aux = normalize(float(h), aux_weights, mode = :density) + @test divor0_cmp(h_density2_aux, aux_weights, h_density2.weights, h.weights) + + h_density3, h_density3_aux = normalize(h_density2, h_density2_aux, mode = :pdf) + @test divor0_cmp(h_density3_aux, h_density2_aux, h_density3.weights, h_density2.weights) +end + + +end # @testset "StatsBase.Histogram"