histogram.hh 8.14 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
2
// graph-tool -- a general graph modification and manipulation thingy
//
3
// Copyright (C) 2007-2010 Tiago de Paula Peixoto <tiago@forked.de>
Tiago Peixoto's avatar
Tiago Peixoto committed
4
5
6
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
Tiago Peixoto's avatar
Tiago Peixoto committed
7
// as published by the Free Software Foundation; either version 3
Tiago Peixoto's avatar
Tiago Peixoto committed
8
9
10
11
12
13
14
15
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
16
// along with this program. If not, see <http://www.gnu.org/licenses/>.
Tiago Peixoto's avatar
Tiago Peixoto committed
17
18
19
20

#ifndef HISTOGRAM_HH
#define HISTOGRAM_HH

21
22
23
24
25
#include <vector>
#include <utility>
#include <boost/array.hpp>
#define BOOST_DISABLE_ASSERTS
#include <boost/multi_array.hpp>
Tiago Peixoto's avatar
Tiago Peixoto committed
26

27
28
29
30
31
#include <boost/type_traits.hpp>
#include <boost/mpl/if.hpp>
#include <boost/mpl/int.hpp>

#include <boost/python/object.hpp>
32
33

//
34
// This is a generic multidimensional histogram type
35
36
//

37
38
template <class ValueType, class CountType, size_t Dim>
class Histogram
Tiago Peixoto's avatar
Tiago Peixoto committed
39
{
40
41
42
43
public:
    typedef boost::array<ValueType,Dim> point_t; // point type to be
                                                 // histogrammed
    typedef boost::array<size_t,Dim> bin_t;      // bin type
Tiago Peixoto's avatar
Tiago Peixoto committed
44

45
    typedef boost::multi_array<CountType,Dim> count_t; // the histogram itself
Tiago Peixoto's avatar
Tiago Peixoto committed
46

47
48
    typedef boost::mpl::int_<Dim> dim;
    typedef CountType count_type;
49
    typedef ValueType value_type;
Tiago Peixoto's avatar
Tiago Peixoto committed
50

51
52
53
54
    // floating point type to calculate the mean
    typedef typename boost::mpl::if_<boost::is_floating_point<ValueType>,
                                     ValueType, double>::type mean_t;

55
56
    Histogram(const boost::array<std::vector<ValueType>, Dim>& bins):
        _bins(bins)
57
58
59
    {
        bin_t new_shape;
        for (size_t j = 0; j < Dim; ++j)
Tiago Peixoto's avatar
Tiago Peixoto committed
60
        {
61
62
63
64
65
            if (_bins[j].size() < 2)
                throw std::range_error("invalid bin edge number < 2!");

            _data_range[j] = std::make_pair(0, 0);

Tiago Peixoto's avatar
Tiago Peixoto committed
66
67
            // detect whether the given bins are of constant width, for faster
            // binning
68
69
70
            _const_width[j] = true;
            value_type delta = _bins[j][1] - _bins[j][0];
            for (size_t i = 2; i < _bins[j].size(); ++i)
Tiago Peixoto's avatar
Tiago Peixoto committed
71
72
73
            {
                value_type d = _bins[j][i] - _bins[j][i-1];
                if (delta != d)
74
                    _const_width[j] = false;
Tiago Peixoto's avatar
Tiago Peixoto committed
75
76
            }

77
            if (_const_width[j])
Tiago Peixoto's avatar
Tiago Peixoto committed
78
            {
79
80
81
82
83
84
85
86
87
88
89
90
91
92
                if (_bins[j].size() > 2)
                {
                    _data_range[j] = std::make_pair(_bins[j].front(),
                                                    _bins[j].back());
                    new_shape[j] = _bins[j].size() - 1;
                }
                else
                {
                    _data_range[j] = std::make_pair(_bins[j].front(),
                                                    _bins[j].front());
                    new_shape[j] = 1;
                }
                if (delta == 0)
                    throw std::range_error("invalid bin size of zero!");
Tiago Peixoto's avatar
Tiago Peixoto committed
93
94
            }
            else
95
96
97
            {
                new_shape[j] = _bins[j].size() - 1;
            }
Tiago Peixoto's avatar
Tiago Peixoto committed
98
        }
99
100
101
102
103
104
105
106
        _counts.resize(new_shape);
    }

    void PutValue(const point_t& v, const CountType& weight = 1)
    {
        bin_t bin;
        for (size_t i = 0; i < Dim; ++i)
        {
107
            if (_const_width[i])
108
            {
109
110
111
                if (_data_range[i].first != _data_range[i].second &&
                    (v[i] < _data_range[i].first ||
                     v[i] > _data_range[i].second))
Tiago Peixoto's avatar
Tiago Peixoto committed
112
                    return; // out of bounds
113
114
115
116
117
118
119
120
121
122
123
124
                value_type delta = _bins[i][1] - _bins[i][0];
                bin[i] = (v[i] - _data_range[i].first) / delta;
                if (bin[i] >= _counts.shape()[i]) // modify shape
                {
                    bin_t new_shape;
                    for (size_t j = 0; j < Dim; ++j)
                        new_shape[j] = _counts.shape()[j];
                    new_shape[i] = bin[i] + 1;
                    _counts.resize(new_shape);
                    while (_bins[i].size() < new_shape[i] + 1)
                        _bins[i].push_back(_bins[i].back() + delta);
                }
125
            }
Tiago Peixoto's avatar
Tiago Peixoto committed
126
            else // arbitrary bins widths. do a binary search
127
            {
128
                std::vector<ValueType>& bins = _bins[i];
129
130
                typeof(bins.begin()) iter = upper_bound(bins.begin(),
                                                        bins.end(), v[i]);
131
132
133
                if (iter == bins.end())
                {
                    return;  // falls off from last bin, do not count
134
135
136
137
138
                }
                else
                {
                    bin[i] = iter - bins.begin();
                    if (bin[i] == 0)
139
                        return; // falls off from fist bin, do not count
140
141
142
143
144
145
146
147
148
149
                    else
                       --bin[i];
                }
            }
        }
        _counts(bin) += weight;
    }

    boost::multi_array<CountType,Dim>& GetArray() { return _counts; }

150
151
152
    boost::array<std::pair<ValueType,ValueType>,Dim>& GetDataRange()
    { return _data_range; }

153
    boost::array<std::vector<ValueType>, Dim>& GetBins() { return _bins; }
154

155
156
protected:
    boost::multi_array<CountType,Dim> _counts;
157
    boost::array<std::vector<ValueType>, Dim> _bins;
158
    boost::array<std::pair<ValueType,ValueType>,Dim> _data_range;
159
    boost::array<bool,Dim> _const_width;
160
161
162
163
164
165
166
};


// This class will encapsulate a histogram, and atomically sum it to a given
// resulting histogram (which is shared among all copies) after it is
// destructed, or when the Gather() member function is called. This enables, for
// instance, a histogram to be built in parallel.
Tiago Peixoto's avatar
Tiago Peixoto committed
167
168

template <class Histogram>
169
170
171
class SharedHistogram: public Histogram
{
public:
172

Tiago Peixoto's avatar
Tiago Peixoto committed
173
    SharedHistogram(Histogram& hist): Histogram(hist), _sum(&hist) {}
174
175
176
177
178
179
180
181
182
183
184
    ~SharedHistogram()
    {
        Gather();
    }

    void Gather()
    {
        if (_sum != 0)
        {
            #pragma omp critical
            {
Tiago Peixoto's avatar
Tiago Peixoto committed
185
186
                typename Histogram::bin_t idx;

187
                typename Histogram::bin_t shape;
188
189
190
                for (size_t i = 0; i <  this->_counts.num_dimensions(); ++i)
                    shape[i] = max(this->_counts.shape()[i],
                                   _sum->GetArray().shape()[i]);
191
192
                _sum->GetArray().resize(shape);
                for (size_t i = 0; i < this->_counts.num_elements(); ++i)
Tiago Peixoto's avatar
Tiago Peixoto committed
193
194
195
196
197
198
199
200
201
202
                {
                    size_t offset = 1;
                    for (size_t j = 0; j < this->_counts.num_dimensions(); ++j)
                    {
                        size_t L = this->_counts.shape()[j];
                        idx[j] = ((i / offset) % L);
                        offset *= L;
                    }
                    _sum->GetArray()(idx) += this->_counts(idx);
                }
203
                for (int i = 0; i < Histogram::dim::value; ++i)
204
                {
205
206
                    if (_sum->GetBins()[i].size() < this->_bins[i].size())
                        _sum->GetBins()[i] = this->_bins[i];
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
                }
            }
            _sum = 0;
        }
    }
private:
    Histogram* _sum;
};


//
// useful functions to get the the mean and standard deviations from simple
// map-based, non-binned histograms. Not to be used with the above type.
//

// gets the mean value of a histogram
template <class Map>
double GetMapMean (const Map &m)
Tiago Peixoto's avatar
Tiago Peixoto committed
225
226
227
228
{
    int total = 0;
    double mean = 0;
    for (typeof(m.begin()) iter = m.begin(); iter != m.end(); iter++)
229
230
    {
        mean += double(iter->first * iter->second);
231
        total += iter->second;
232
    }
233

234
    return (total > 0)?mean/total:0.0;
Tiago Peixoto's avatar
Tiago Peixoto committed
235
236
}

237
// gets the standard deviation of a histogram
238
239
template <class Map>
double GetMapDeviation (const Map &m, double avg)
240
{
241
242
243
244
    double dev = 0.0;
    int total = 0;
    for (typeof(m.begin()) iter = m.begin(); iter != m.end(); iter++)
    {
245
246
        dev += double( (iter->first - avg) *
                       (iter->first - avg) * iter->second);
247
        total += iter->second;
248
249
250
251
    }
    return (total > 1)?sqrt(dev/(total-1)):0.0;
}

Tiago Peixoto's avatar
Tiago Peixoto committed
252
#endif //HISTOGRAM_HH