From 05b8704e51d2e946da2aca44a1e08e54ab5af4d7 Mon Sep 17 00:00:00 2001 From: Michael Krabbe Borregaard Date: Wed, 23 Aug 2017 15:27:36 +0200 Subject: [PATCH] Add proportion normalization to histograms (#293) * Add proportion normalization to histograms * Fix tests * Change name to :fraction * Remove redundant words * Change implementation when isdensity == true * update tests to match new behaviour * Improve implementation/tests of Histogram normalization in :fraction mode * Replace :fraction with :probability --- src/hist.jl | 39 +++++++++++++++++++++++++++------------ test/hist.jl | 9 +++++++++ 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/hist.jl b/src/hist.jl index 82b006ea1..691aef965 100644 --- a/src/hist.jl +++ b/src/hist.jl @@ -401,9 +401,9 @@ arrays appropriately. See description of `normalize` for details. Returns `h`. if mode == :none # nothing to do - elseif mode == :pdf || mode == :density + elseif mode == :pdf || mode == :density || mode == :probability if h.isdensity - if mode == :pdf + if mode == :pdf || mode == :probability # histogram already represents a density, just divide weights by norm s = 1/norm(h) weights .*= s @@ -411,22 +411,31 @@ arrays appropriately. See description of `normalize` for details. Returns `h`. A .*= s end else - # histogram already represents a density, nothing to do + # :density - histogram already represents a density, nothing to do end else - # Divide weights by bin volume, for :pdf also divide by sum of weights - SumT = norm_type(h) - vs_0 = (mode == :pdf) ? sum(SumT(x) for x in weights) : one(SumT) - @inbounds @nloops $N i weights d->(vs_{$N-d+1} = vs_{$N-d} * _edge_binvolume(SumT, edges[d], i_d)) begin - (@nref $N weights i) /= $(Symbol("vs_$N")) + if mode == :pdf || mode == :density + # Divide weights by bin volume, for :pdf also divide by sum of weights + SumT = norm_type(h) + vs_0 = (mode == :pdf) ? sum(SumT(x) for x in weights) : one(SumT) + @inbounds @nloops $N i weights d->(vs_{$N-d+1} = vs_{$N-d} * _edge_binvolume(SumT, edges[d], i_d)) begin + (@nref $N weights i) /= $(Symbol("vs_$N")) + for A in aux_weights + (@nref $N A i) /= $(Symbol("vs_$N")) + end + end + h.isdensity = true + else + # :probability - divide weights by sum of weights + nf = inv(sum(weights)) + weights .*= nf for A in aux_weights - (@nref $N A i) /= $(Symbol("vs_$N")) + A .*= nf end end end - h.isdensity = true - else mode != :pdf && mode != :density - throw(ArgumentError("Normalization mode must be :pdf, :density or :none")) + else + throw(ArgumentError("Normalization mode must be :pdf, :density, :probability or :none")) end h end @@ -445,8 +454,14 @@ Valid values for `mode` are: * `:density`: Normalize by bin sizes only. Resulting histogram represents count density of input and does not have norm 1. Will not modify the histogram if it already represents a density (`h.isdensity == 1`). +* `:probability`: Normalize by sum of weights only. Resulting histogram + represents the fraction of probability mass for each bin and does not have + norm 1. * `:none`: Leaves histogram unchanged. Useful to simplify code that has to conditionally apply different modes of normalization. + +Successive application of both `:probability` and `:density` normalization (in +any order) is equivalent to `:pdf` normalization. """ normalize(h::Histogram{T,N}; mode::Symbol=:pdf) where {T,N} = normalize!(deepcopy(float(h)), mode = mode) diff --git a/test/hist.jl b/test/hist.jl index 8996da988..f28649080 100644 --- a/test/hist.jl +++ b/test/hist.jl @@ -160,6 +160,7 @@ end @test @inferred(norm(h_pdf)) ≈ 1 @test @inferred(normalize(h_pdf, mode = :pdf)) == h_pdf @test @inferred(normalize(h_pdf, mode = :density)) == h_pdf + @test @inferred(normalize(h_pdf, mode = :probability)) == h_pdf h_density = normalize(h, mode = :density) @test h_density.weights ≈ h.weights ./ bin_vols @@ -169,6 +170,14 @@ end Histogram(h_density.edges, h_density.weights .* (1/norm(h_density)), h_density.closed, true) @test normalize(h_density, mode = :pdf).weights ≈ h_pdf.weights @test normalize(h_density, mode = :density) == h_density + @test normalize(h_density, mode = :probability).weights ≈ h_pdf.weights + + h_fraction = normalize(h, mode = :probability) + @test sum(h_fraction.weights) ≈ 1 + @test h_fraction.isdensity == false + @test normalize(h_fraction, mode = :pdf).weights ≈ h_pdf.weights + @test normalize(h_fraction, mode = :density).weights ≈ h_pdf.weights + @test normalize(h_fraction, mode = :probability).weights ≈ h_fraction.weights h_copy = deepcopy(float(h)) @test @inferred(normalize!(h_copy, mode = :density)) == h_copy