histogram.hh 8.03 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
2
// graph-tool -- a general graph modification and manipulation thingy
//
Tiago Peixoto's avatar
Tiago Peixoto committed
3
// Copyright (C) 2006-2020 Tiago de Paula Peixoto <tiago@skewed.de>
Tiago Peixoto's avatar
Tiago Peixoto committed
4
//
5
6
7
8
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3 of the License, or (at your option) any
// later version.
Tiago Peixoto's avatar
Tiago Peixoto committed
9
//
10
11
12
13
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
Tiago Peixoto's avatar
Tiago Peixoto committed
14
//
15
// You should have received a copy of the GNU Lesser General Public License
16
// along with this program. If not, see <http://www.gnu.org/licenses/>.
Tiago Peixoto's avatar
Tiago Peixoto committed
17
18
19
20

#ifndef HISTOGRAM_HH
#define HISTOGRAM_HH

21
22
#include <vector>
#include <utility>
23
#include <algorithm>
Tiago Peixoto's avatar
Tiago Peixoto committed
24
#include <array>
25
26
#define BOOST_DISABLE_ASSERTS
#include <boost/multi_array.hpp>
Tiago Peixoto's avatar
Tiago Peixoto committed
27

28
29
30
#include <boost/mpl/if.hpp>
#include <boost/mpl/int.hpp>

31
//
32
// This is a generic multidimensional histogram type
33
34
//

35
36
template <class ValueType, class CountType, size_t Dim>
class Histogram
Tiago Peixoto's avatar
Tiago Peixoto committed
37
{
38
public:
Tiago Peixoto's avatar
Tiago Peixoto committed
39
40
41
    typedef std::array<ValueType,Dim> point_t; // point type to be
                                               // histogrammed
    typedef std::array<size_t,Dim> bin_t;      // bin type
Tiago Peixoto's avatar
Tiago Peixoto committed
42

43
    typedef boost::multi_array<CountType,Dim> count_t; // the histogram itself
Tiago Peixoto's avatar
Tiago Peixoto committed
44

45
46
    typedef boost::mpl::int_<Dim> dim;
    typedef CountType count_type;
47
    typedef ValueType value_type;
Tiago Peixoto's avatar
Tiago Peixoto committed
48

49
50
51
52
    // floating point type to calculate the mean
    typedef typename boost::mpl::if_<boost::is_floating_point<ValueType>,
                                     ValueType, double>::type mean_t;

53
54
    Histogram(const std::array<std::vector<ValueType>, Dim>& bins)
        : _bins(bins)
55
56
57
    {
        bin_t new_shape;
        for (size_t j = 0; j < Dim; ++j)
Tiago Peixoto's avatar
Tiago Peixoto committed
58
        {
59
60
            if (_bins[j].size() < 1)
                throw std::range_error("invalid bin edge number < 1!");
61
62
63

            _data_range[j] = std::make_pair(0, 0);
            value_type delta = _bins[j][1] - _bins[j][0];
64
65

            if (_bins[j].size() == 2)
Tiago Peixoto's avatar
Tiago Peixoto committed
66
            {
67
68
69
                _data_range[j] = std::make_pair(_bins[j][0], _bins[j][0]);
                delta = _bins[j][1];
                _const_width[j] = true;
Tiago Peixoto's avatar
Tiago Peixoto committed
70
            }
71
            else
Tiago Peixoto's avatar
Tiago Peixoto committed
72
            {
73
74
75
76
                // detect whether the given bins are of constant width, for faster
                // binning
                _const_width[j] = true;
                for (size_t i = 2; i < _bins[j].size(); ++i)
77
                {
78
79
80
                    value_type d = _bins[j][i] - _bins[j][i-1];
                    if (delta != d)
                        _const_width[j] = false;
81
                }
82
83

                if (_const_width[j])
84
                    _data_range[j] = std::make_pair(_bins[j].front(),
85
                                                    _bins[j].back());
86
            }
87
88
89
90
            if (delta == 0)
                throw std::range_error("invalid bin size of zero!");

            new_shape[j] = _bins[j].size() - 1;
Tiago Peixoto's avatar
Tiago Peixoto committed
91
        }
92
93
94
        _counts.resize(new_shape);
    }

95
    void put_value(const point_t& v, const CountType& weight = 1)
96
97
98
99
    {
        bin_t bin;
        for (size_t i = 0; i < Dim; ++i)
        {
100
            if (_const_width[i])
101
            {
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
                value_type delta;

                if (_data_range[i].first == _data_range[i].second)
                {
                    delta = _bins[i][1];

                    if (v[i] < _data_range[i].first)
                        return; // out of bounds
                }
                else
                {
                    delta = _bins[i][1] - _bins[i][0];

                    if (v[i] < _data_range[i].first ||
                        v[i] >= _data_range[i].second)
                        return; // out of bounds
                }
119

120
                bin[i] = size_t((v[i] - _data_range[i].first) / delta);
121
122
123
124
125
126
127
128
129
130
                if (bin[i] >= _counts.shape()[i]) // modify shape
                {
                    bin_t new_shape;
                    for (size_t j = 0; j < Dim; ++j)
                        new_shape[j] = _counts.shape()[j];
                    new_shape[i] = bin[i] + 1;
                    _counts.resize(new_shape);
                    while (_bins[i].size() < new_shape[i] + 1)
                        _bins[i].push_back(_bins[i].back() + delta);
                }
131
            }
Tiago Peixoto's avatar
Tiago Peixoto committed
132
            else // arbitrary bins widths. do a binary search
133
            {
134
                std::vector<ValueType>& bins = _bins[i];
135
                auto iter = upper_bound(bins.begin(), bins.end(), v[i]);
136
137
138
                if (iter == bins.end())
                {
                    return;  // falls off from last bin, do not count
139
140
141
142
143
                }
                else
                {
                    bin[i] = iter - bins.begin();
                    if (bin[i] == 0)
144
                        return; // falls off from fist bin, do not count
145
146
147
148
149
150
151
152
                    else
                       --bin[i];
                }
            }
        }
        _counts(bin) += weight;
    }

153
    boost::multi_array<CountType,Dim>& get_array() { return _counts; }
154

155
    std::array<std::pair<ValueType,ValueType>,Dim>& get_data_range()
156
157
    { return _data_range; }

158
    std::array<std::vector<ValueType>, Dim>& get_bins() { return _bins; }
159

160
161
protected:
    boost::multi_array<CountType,Dim> _counts;
Tiago Peixoto's avatar
Tiago Peixoto committed
162
163
164
    std::array<std::vector<ValueType>, Dim> _bins;
    std::array<std::pair<ValueType,ValueType>,Dim> _data_range;
    std::array<bool,Dim> _const_width;
165
166
167
168
169
};


// This class will encapsulate a histogram, and atomically sum it to a given
// resulting histogram (which is shared among all copies) after it is
170
// destructed, or when the gather() member function is called. This enables, for
171
// instance, a histogram to be built in parallel.
Tiago Peixoto's avatar
Tiago Peixoto committed
172
173

template <class Histogram>
174
175
176
class SharedHistogram: public Histogram
{
public:
177

Tiago Peixoto's avatar
Tiago Peixoto committed
178
    SharedHistogram(Histogram& hist): Histogram(hist), _sum(&hist) {}
179
180
    ~SharedHistogram()
    {
181
        gather();
182
183
    }

184
    void gather()
185
186
187
188
189
    {
        if (_sum != 0)
        {
            #pragma omp critical
            {
Tiago Peixoto's avatar
Tiago Peixoto committed
190
191
                typename Histogram::bin_t idx;

192
                typename Histogram::bin_t shape;
193
                for (size_t i = 0; i <  this->_counts.num_dimensions(); ++i)
194
                    shape[i] = std::max(this->_counts.shape()[i],
195
196
                                        _sum->get_array().shape()[i]);
                _sum->get_array().resize(shape);
197
                for (size_t i = 0; i < this->_counts.num_elements(); ++i)
Tiago Peixoto's avatar
Tiago Peixoto committed
198
199
200
201
202
203
204
205
                {
                    size_t offset = 1;
                    for (size_t j = 0; j < this->_counts.num_dimensions(); ++j)
                    {
                        size_t L = this->_counts.shape()[j];
                        idx[j] = ((i / offset) % L);
                        offset *= L;
                    }
206
                    _sum->get_array()(idx) += this->_counts(idx);
Tiago Peixoto's avatar
Tiago Peixoto committed
207
                }
208
                for (int i = 0; i < Histogram::dim::value; ++i)
209
                {
210
211
                    if (_sum->get_bins()[i].size() < this->_bins[i].size())
                        _sum->get_bins()[i] = this->_bins[i];
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
                }
            }
            _sum = 0;
        }
    }
private:
    Histogram* _sum;
};


//
// useful functions to get the the mean and standard deviations from simple
// map-based, non-binned histograms. Not to be used with the above type.
//

// gets the mean value of a histogram
template <class Map>
229
double get_map_mean(const Map &m)
Tiago Peixoto's avatar
Tiago Peixoto committed
230
231
232
{
    int total = 0;
    double mean = 0;
233
    for (auto& vc : m)
234
    {
235
236
        mean += double(vc.first * vc.second);
        total += vc.second;
237
    }
238

239
    return (total > 0)?mean/total:0.0;
Tiago Peixoto's avatar
Tiago Peixoto committed
240
241
}

242
// gets the standard deviation of a histogram
243
template <class Map>
244
double get_map_deviation(const Map &m, double avg)
245
{
246
247
    double dev = 0.0;
    int total = 0;
248
    for (auto& vc : m)
249
    {
250
251
        dev += double((vc.first - avg) * (vc.first - avg) * vc.second);
        total += vc.second;
252
253
254
255
    }
    return (total > 1)?sqrt(dev/(total-1)):0.0;
}

Tiago Peixoto's avatar
Tiago Peixoto committed
256
#endif //HISTOGRAM_HH