Skip to content

Commit 291e673

Browse files
committed
Improve histogram performance with equal bin sizes
1 parent ee507e2 commit 291e673

2 files changed

Lines changed: 105 additions & 51 deletions

File tree

include/xtensor/xhistogram.hpp

Lines changed: 93 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,79 @@ namespace xt
4444
return xt::searchsorted(std::forward<E2>(bin_edges), std::forward<E1>(data), right);
4545
}
4646

47+
namespace detail
48+
{
49+
template <class R = double, class E1, class E2, class E3>
50+
inline auto histogram_imp(E1&& data, E2&& bin_edges, E3&& weights, bool density, bool equal_bins)
51+
{
52+
using size_type = common_size_type_t<std::decay_t<E1>, std::decay_t<E2>, std::decay_t<E3>>;
53+
using value_type = typename std::decay_t<E3>::value_type;
54+
55+
XTENSOR_ASSERT(data.dimension() == 1);
56+
XTENSOR_ASSERT(weights.dimension() == 1);
57+
XTENSOR_ASSERT(bin_edges.dimension() == 1);
58+
XTENSOR_ASSERT(weights.size() == data.size());
59+
XTENSOR_ASSERT(bin_edges.size() >= 2);
60+
XTENSOR_ASSERT(std::is_sorted(bin_edges.cbegin(), bin_edges.cend()));
61+
XTENSOR_ASSERT(xt::amin(data)[0] >= bin_edges[0]);
62+
XTENSOR_ASSERT(xt::amax(data)[0] <= bin_edges[bin_edges.size() - 1]);
63+
64+
size_t n_bins = bin_edges.size() - 1;
65+
xt::xtensor<value_type, 1> count = xt::zeros<value_type>({ n_bins });
66+
67+
if (equal_bins)
68+
{
69+
std::array<typename std::decay_t<E2>::value_type, 2> bounds = xt::minmax(bin_edges)();
70+
auto left = static_cast<double>(bounds[0]);
71+
auto right = static_cast<double>(bounds[1]);
72+
double norm = 1. / (right - left);
73+
for (size_t i = 0; i < data.size(); ++i)
74+
{
75+
auto v = static_cast<double>(data(i));
76+
// left and right are not bounds of data
77+
if ( v >= left & v < right )
78+
{
79+
auto i_bin = static_cast<size_t>(static_cast<double>(n_bins) * (static_cast<double>(data(i)) - left) * norm);
80+
count(i_bin) += weights(i);
81+
}
82+
else if ( v == right )
83+
{
84+
count(n_bins - 1) += weights(i);
85+
}
86+
}
87+
}
88+
else
89+
{
90+
auto sorter = xt::argsort(data);
91+
92+
size_type ibin = 0;
93+
94+
for (auto& idx : sorter)
95+
{
96+
while (data[idx] >= bin_edges[ibin + 1] && ibin < bin_edges.size() - 2)
97+
{
98+
++ibin;
99+
}
100+
count[ibin] += weights[idx];
101+
}
102+
}
103+
104+
xt::xtensor<R, 1> prob = xt::cast<R>(count);
105+
106+
if (density)
107+
{
108+
R n = static_cast<R>(data.size());
109+
for (size_type i = 0; i < bin_edges.size() - 1; ++i)
110+
{
111+
prob[i] /= (static_cast<R>(bin_edges[i + 1] - bin_edges[i]) * n);
112+
}
113+
}
114+
115+
return prob;
116+
}
117+
118+
} //detail
119+
47120
/**
48121
* @ingroup histogram
49122
* @brief Compute the histogram of a set of data.
@@ -57,45 +130,11 @@ namespace xt
57130
template <class R = double, class E1, class E2, class E3>
58131
inline auto histogram(E1&& data, E2&& bin_edges, E3&& weights, bool density = false)
59132
{
60-
using size_type = common_size_type_t<std::decay_t<E1>, std::decay_t<E2>, std::decay_t<E3>>;
61-
using value_type = typename std::decay_t<E3>::value_type;
62-
63-
XTENSOR_ASSERT(data.dimension() == 1);
64-
XTENSOR_ASSERT(weights.dimension() == 1);
65-
XTENSOR_ASSERT(bin_edges.dimension() == 1);
66-
XTENSOR_ASSERT(weights.size() == data.size());
67-
XTENSOR_ASSERT(bin_edges.size() >= 2);
68-
XTENSOR_ASSERT(std::is_sorted(bin_edges.cbegin(), bin_edges.cend()));
69-
XTENSOR_ASSERT(xt::amin(data)[0] >= bin_edges[0]);
70-
XTENSOR_ASSERT(xt::amax(data)[0] <= bin_edges[bin_edges.size() - 1]);
71-
72-
xt::xtensor<value_type, 1> count = xt::zeros<value_type>({ bin_edges.size() - 1 });
73-
74-
auto sorter = xt::argsort(data);
75-
76-
size_type ibin = 0;
77-
78-
for (auto& idx : sorter)
79-
{
80-
while (data[idx] >= bin_edges[ibin + 1] && ibin < bin_edges.size() - 2)
81-
{
82-
++ibin;
83-
}
84-
count[ibin] += weights[idx];
85-
}
86-
87-
xt::xtensor<R, 1> prob = xt::cast<R>(count);
88-
89-
if (density)
90-
{
91-
R n = static_cast<R>(data.size());
92-
for (size_type i = 0; i < bin_edges.size() - 1; ++i)
93-
{
94-
prob[i] /= (static_cast<R>(bin_edges[i + 1] - bin_edges[i]) * n);
95-
}
96-
}
97-
98-
return prob;
133+
return detail::histogram_imp<R>(std::forward<E1>(data),
134+
std::forward<E2>(bin_edges),
135+
std::forward<E3>(weights),
136+
density,
137+
false);
99138
}
100139

101140
/**
@@ -114,10 +153,11 @@ namespace xt
114153

115154
auto n = data.size();
116155

117-
return histogram(std::forward<E1>(data),
118-
std::forward<E2>(bin_edges),
119-
xt::ones<value_type>({ n }),
120-
density);
156+
return detail::histogram_imp(std::forward<E1>(data),
157+
std::forward<E2>(bin_edges),
158+
xt::ones<value_type>({ n }),
159+
density,
160+
false);
121161
}
122162

123163
/**
@@ -138,10 +178,11 @@ namespace xt
138178

139179
auto bin_edges = histogram_bin_edges(data, xt::ones<value_type>({ n }), bins);
140180

141-
return histogram(std::forward<E1>(data),
142-
std::forward<E1>(bin_edges),
143-
xt::ones<value_type>({ n }),
144-
density);
181+
return detail::histogram_imp(std::forward<E1>(data),
182+
std::forward<E1>(bin_edges),
183+
xt::ones<value_type>({ n }),
184+
density,
185+
true);
145186
}
146187

147188
/**
@@ -159,10 +200,11 @@ namespace xt
159200
{
160201
auto bin_edges = histogram_bin_edges(data, weights, bins);
161202

162-
return histogram(std::forward<E1>(data),
163-
std::forward<E1>(bin_edges),
164-
std::forward<E2>(weights),
165-
density);
203+
return detail::histogram_imp(std::forward<E1>(data),
204+
std::forward<E1>(bin_edges),
205+
std::forward<E2>(weights),
206+
density,
207+
true);
166208
}
167209

168210
/**

test/test_xhistogram.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "xtensor/xtensor.hpp"
1515
#include "xtensor/xhistogram.hpp"
1616
#include "xtensor/xrandom.hpp"
17+
#include "xtensor/xio.hpp"
18+
1719

1820
namespace xt
1921
{
@@ -29,6 +31,16 @@ namespace xt
2931
EXPECT_EQ(count(1), 2.);
3032
}
3133

34+
{
35+
xt::xtensor<double, 1> edges {0, 1.5, 3};
36+
xt::xtensor<double, 1> weights {1, 1, 1, 1};
37+
auto count = xt::histogram<int>(data, edges, weights, true);
38+
// This is indeed a bug. If the count is the density, an integral return
39+
// type should not be allowed.
40+
EXPECT_EQ(count(0), 0.);
41+
EXPECT_EQ(count(1), 0.);
42+
}
43+
3244
{
3345
xt::xtensor<double, 1> count = xt::histogram(data,
3446
xt::histogram_bin_edges(data, std::size_t(2), xt::histogram_algorithm::uniform)

0 commit comments

Comments
 (0)