graph_blockmodel_multiflip_mcmc.hh 20.7 KB
Newer Older
1
2
// graph-tool -- a general graph modification and manipulation thingy
//
Tiago Peixoto's avatar
Tiago Peixoto committed
3
// Copyright (C) 2006-2018 Tiago de Paula Peixoto <tiago@skewed.de>
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#ifndef GRAPH_BLOCKMODEL_MULTIFLIP_MCMC_HH
#define GRAPH_BLOCKMODEL_MULTIFLIP_MCMC_HH

#include "config.h"

#include <vector>
#include <algorithm>
25
#include <queue>
26
27

#include "graph_tool.hh"
28
#include "../support/graph_state.hh"
29
30
31
32
33
34
35
36
37
38
39
40
41
#include "graph_blockmodel_util.hh"
#include <boost/mpl/vector.hpp>

namespace graph_tool
{
using namespace boost;
using namespace std;

#define MCMC_BLOCK_STATE_params(State)                                         \
    ((__class__,&, mpl::vector<python::object>, 1))                            \
    ((state, &, State&, 0))                                                    \
    ((beta,, double, 0))                                                       \
    ((c,, double, 0))                                                          \
42
    ((a1,, double, 0))                                                         \
43
44
45
    ((d,, double, 0))                                                          \
    ((psplit,, double, 0))                                                     \
    ((gibbs_sweeps,, size_t, 0))                                               \
46
47
48
49
    ((entropy_args,, entropy_args_t, 0))                                       \
    ((verbose,, bool, 0))                                                      \
    ((niter,, size_t, 0))

50
enum class move_t { single_node = 0, split, merge, null };
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71

template <class State>
struct MCMC
{
    GEN_STATE_BASE(MCMCBlockStateBase, MCMC_BLOCK_STATE_params(State))

    template <class... Ts>
    class MCMCBlockState
        : public MCMCBlockStateBase<Ts...>
    {
    public:
        GET_PARAMS_USING(MCMCBlockStateBase<Ts...>,
                         MCMC_BLOCK_STATE_params(State))
        GET_PARAMS_TYPEDEF(Ts, MCMC_BLOCK_STATE_params(State))

        template <class... ATs,
                  typename std::enable_if_t<sizeof...(ATs) ==
                                            sizeof...(Ts)>* = nullptr>
        MCMCBlockState(ATs&&... as)
           : MCMCBlockStateBase<Ts...>(as...),
            _g(_state._g),
72
73
74
75
            _groups(num_vertices(_state._bg)),
            _vpos(get(vertex_index_t(), _state._g),
                  num_vertices(_state._g)),
            _rpos(get(vertex_index_t(), _state._bg),
76
77
78
79
80
81
82
83
84
                  num_vertices(_state._bg)),
            _btemp(get(vertex_index_t(), _state._g),
                   num_vertices(_state._g)),
            _btemp2(get(vertex_index_t(), _state._g),
                    num_vertices(_state._g)),
            _bprev(get(vertex_index_t(), _state._g),
                   num_vertices(_state._g)),
            _bnext(get(vertex_index_t(), _state._g),
                   num_vertices(_state._g))
85
86
87
88
89
90
91
        {
            _state.init_mcmc(_c,
                             (_entropy_args.partition_dl ||
                              _entropy_args.degree_dl ||
                              _entropy_args.edges_dl));
            for (auto v : vertices_range(_state._g))
            {
92
93
94
95
                if (_state._vweight[v] == 0)
                    continue;
                add_element(_groups[_state._b[v]], _vpos, v);
                _N++;
96
            }
97

98
            for (auto r : vertices_range(_state._bg))
99
            {
100
101
102
                if (_state._wr[r] == 0)
                    continue;
                add_element(_vlist, _rpos, r);
103
            }
104
105
106
107
        }

        typename state_t::g_t& _g;

108
109
110
        std::vector<std::vector<size_t>> _groups;
        typename vprop_map_t<size_t>::type::unchecked_t _vpos;
        typename vprop_map_t<size_t>::type::unchecked_t _rpos;
111

112
113
114
115
116
        typename vprop_map_t<int>::type::unchecked_t _btemp;
        typename vprop_map_t<int>::type::unchecked_t _btemp2;
        typename vprop_map_t<int>::type::unchecked_t _bprev;
        typename vprop_map_t<int>::type::unchecked_t _bnext;

117
118
        std::vector<size_t> _vlist;
        std::vector<size_t> _vs;
119
        std::vector<size_t> _vs_temp;
120

121
        constexpr static move_t _null_move = move_t::null;
122

123
124
125
        size_t _nproposals = 0;
        size_t _N = 0;

126
127
128
129
130
131
132
        double _dS;
        double _a;
        size_t _s = null_group;
        size_t _t = null_group;
        size_t _u = null_group;
        size_t _v = null_group;

133
        size_t node_state(size_t r)
134
        {
135
            return r;
136
137
        }

138
        bool skip_node(size_t r)
139
        {
140
            return _groups[r].empty();
141
142
        }

143
        size_t node_weight(size_t)
144
        {
145
            return _vs.size();
146
147
        }

148
        template <class RNG>
149
        size_t sample_new_group(size_t v, RNG& rng)
150
        {
151
152
153
154
155
156
            size_t t = _state.sample_block(v, _c, 1, rng);
            if (t >= _groups.size())
            {
                _groups.resize(t + 1);
                _rpos.resize(t + 1);
            }
157

158
159
160
            assert(_state._wr[t] == 0);
            return t;
        }
161

162
163
        void move_vertex(size_t v, size_t r)
        {
164
165
166
            size_t s = _state._b[v];
            if (s == r)
                return;
167
168
169
170
            remove_element(_groups[s], _vpos, v);
            _state.move_vertex(v, r);
            add_element(_groups[r], _vpos, v);
        }
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
        template <class RNG, bool forward=true>
        std::tuple<size_t, size_t, double, double> split(size_t r, RNG& rng)
        {
            if (forward)
                _vs = _groups[r];
            std::shuffle(_vs.begin(), _vs.end(), rng);

            std::array<size_t, 2> rt = {null_group, null_group};
            std::array<double, 2> ps;
            double lp = -log(2);
            double dS = 0;
            for (auto v : _vs)
            {
                if (forward)
                    _bprev[v] = r;
187

188
189
190
191
192
193
194
195
196
197
198
                if (rt[0] == null_group)
                {
                    rt[0] = sample_new_group(v, rng);
                    dS += _state.virtual_move(v, _state._b[v], rt[0],
                                              _entropy_args);
                    if (forward)
                        move_vertex(v, rt[0]);
                    else
                        _state.move_vertex(v, rt[0]);
                    continue;
                }
199

200
201
                if (rt[1] == null_group)
                {
202
203
204
205
206
                    if (forward)
                        rt[1] = sample_new_group(v, rng);
                    else
                        rt[1] = (_state.virtual_remove_size(v) == 0) ?
                            r : sample_new_group(v, rng);
207
208
209
210
211
212
213
214
                    dS += _state.virtual_move(v, _state._b[v], rt[1],
                                              _entropy_args);
                    if (forward)
                        move_vertex(v, rt[1]);
                    else
                        _state.move_vertex(v, rt[1]);
                    continue;
                }
215

216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
                ps[0] = -_state.virtual_move(v, _state._b[v], rt[0],
                                             _entropy_args);
                ps[1] = -_state.virtual_move(v, _state._b[v], rt[1],
                                             _entropy_args);

                double Z = 0, p0 = 0;
                if (!std::isinf(_beta))
                {
                    Z = log_sum(_beta * ps[0], _beta * ps[1]);
                    p0 = _beta * ps[0] - Z;
                }
                else
                {
                    p0 =  (ps[0] < ps[1]) ? 0 : -numeric_limits<double>::infinity();
                }

                std::bernoulli_distribution sample(exp(p0));
                if (sample(rng))
                {
                    if (forward)
                        move_vertex(v, rt[0]);
                    else
                        _state.move_vertex(v, rt[0]);
                    lp += p0;
                    dS -= ps[0];
                }
                else
                {
                    if (forward)
                        move_vertex(v, rt[1]);
                    else
                        _state.move_vertex(v, rt[1]);
                    if (!std::isinf(_beta))
                        lp += _beta * ps[1] - Z;
                    dS -= ps[1];
                }
            }

            // gibbs sweep
            for (size_t i = 0; i < (forward ? _gibbs_sweeps : _gibbs_sweeps - 1); ++i)
256
            {
257
258
259
260
261
262
263
264
265
266
267
268
                lp = 0;
                std::array<double,2> p = {0,0};
                for (auto v : _vs)
                {
                    size_t bv = _state._b[v];
                    size_t nbv = (bv == rt[0]) ? rt[1] : rt[0];
                    double ddS;
                    if (_state.virtual_remove_size(v) > 0)
                        ddS = _state.virtual_move(v, bv, nbv, _entropy_args);
                    else
                        ddS = std::numeric_limits<double>::infinity();

269
                    if (!std::isinf(_beta) && !std::isinf(ddS))
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
                    {
                        double Z = log_sum(0., -ddS * _beta);
                        p[0] = -ddS * _beta - Z;
                        p[1] = -Z;
                    }
                    else
                    {
                        if (ddS < 0)
                        {
                            p[0] = 0;
                            p[1] = -std::numeric_limits<double>::infinity();
                        }
                        else
                        {
                            p[0] = -std::numeric_limits<double>::infinity();;
                            p[1] = 0;
                        }
                    }

                    std::bernoulli_distribution sample(exp(p[0]));
                    if (sample(rng))
                    {
                        if (forward)
                            move_vertex(v, nbv);
                        else
                            _state.move_vertex(v, nbv);
                        lp += p[0];
                        dS += ddS;
                    }
                    else
                    {
                        lp += p[1];
                    }
                }
304
            }
305
306

            return {rt[0], rt[1], dS, lp};
307
308
309
        }

        template <class RNG>
310
        double split_prob(size_t r, size_t s, RNG& rng)
311
        {
312
313
            size_t t = (_state._wr[r] == 0) ? r : s;

314
315
316
317
318
319
320
321
322
323
            for (auto v : _vs)
            {
                _btemp[v] = _state._b[v];
                _state.move_vertex(v, t);
            }

            double lp = 0;
            if (_gibbs_sweeps == 0)
            {
                std::shuffle(_vs.begin(), _vs.end(), rng);
324

325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
                lp = -log(2);
                std::array<double, 2> ps;
                for (auto v : _vs)
                {
                    if (v == _vs[0] || v == _vs[1])
                    {
                        _state.move_vertex(v, _bprev[v]);
                        continue;
                    }

                    ps[0] = -_state.virtual_move(v, _state._b[v], _bprev[v],
                                                 _entropy_args);
                    ps[1] = -_state.virtual_move(v, _state._b[v],
                                                 (size_t(_bprev[v]) == r) ? s : r,
                                                 _entropy_args);

                    assert(_bprev[v] == r || _bprev[v] == s);
342

343
344
                    double Z = log_sum(_beta * ps[0], _beta * ps[1]);
                    double p0 = _beta * ps[0] - Z;
345

346
                    lp += p0;
347

348
349
350
351
                    _state.move_vertex(v, _bprev[v]);
                }
            }
            else
352
            {
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
                auto ret = split<RNG, false>(t, rng);

                auto r_ = get<0>(ret);
                auto s_ = get<1>(ret);

                for (auto v : _vs)
                    _btemp2[v] = _state._b[v];

                double lp1 = 0, lp2 = 0;
                for (bool swap : std::array<bool,2>({false, true}))
                {
                    if (swap)
                        std::swap(r_, s_);
                    for (auto v : _vs)
                    {
                        size_t bv = _state._b[v];
                        size_t nbv = (bv == r_) ? s_ : r_;
                        double ddS;
                        if (_state.virtual_remove_size(v) > 0)
                            ddS = _state.virtual_move(v, bv, nbv, _entropy_args);
                        else
                            ddS = std::numeric_limits<double>::infinity();

376
377
378
379
                        if (!std::isinf(ddS))
                            ddS *= _beta;

                        double Z = log_sum(0., -ddS);
380
381
382
383
384

                        double p;
                        if ((size_t(_bprev[v]) == r) == (nbv == r_))
                        {
                            _state.move_vertex(v, nbv);
385
                            p = -ddS - Z;
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
                        }
                        else
                        {
                            p = -Z;
                        }

                        if (swap)
                            lp2 += p;
                        else
                            lp1 += p;
                    }

                    if (!swap)
                    {
                        for (auto v : _vs)
                            _state.move_vertex(v, _btemp2[v]);
                    }
                }

                lp = log_sum(lp1, lp2) - log(2);
406
            }
407

408
            for (auto v : _vs)
409
410
411
412
413
                _state.move_vertex(v, _btemp[v]);

            return lp;
        }

414
415
416
417
418
419
420
421
422
423
424
        bool allow_merge(size_t r, size_t s)
        {
            if (_state._coupled_state != nullptr)
            {
                auto& bh = _state._coupled_state->get_b();
                if (bh[r] != bh[s])
                    return false;
            }
            return _state._bclabel[r] == _state._bclabel[s];
        }

425
426
427
428
429
430
431
432

        template <class RNG, bool symmetric=true>
        std::tuple<size_t, double> merge(size_t r, size_t s, RNG& rng)
        {
            double dS = 0;

            _vs = _groups[r];
            _vs.insert(_vs.end(), _groups[s].begin(), _groups[s].end());
433

434
435
436
437
438
439
440
441
442
443
            size_t t;
            if (symmetric)
            {
                std::bernoulli_distribution flip(.5);
                t = flip(rng) ? r : s;
            }
            else
            {
                t = s;
            }
444

445
            for (auto v : _vs)
446
            {
447
448
449
450
451
452
453
                _bprev[v] = _state._b[v];
                if (size_t(_state._b[v]) != t)
                {
                    dS +=_state.virtual_move(v, _state._b[v], t,
                                             _entropy_args);
                    move_vertex(v, t);
                }
454
455
            }

456
457
            return {t, dS};
        }
458

459
460
461
462
463
464
465
466
467
468
469
        double get_move_prob(size_t r, size_t s)
        {
            double prs = 0, prr = 0;
            for (auto v : _groups[r])
            {
                prs += _state.get_move_prob(v, r, s, _c, 0, false);
                prr += _state.get_move_prob(v, r, r, _c, 0, false);
            }
            prs /= _groups[r].size();
            prr /= _groups[r].size();
            return prs/(1-prr);
470
471
        }

472
473
        template <bool symmetric=true>
        double merge_prob(size_t r, size_t s)
474
        {
475
            if (symmetric)
476
            {
477
478
479
                double pr = get_move_prob(r, s);
                double ps = get_move_prob(s, r);
                return log(pr + ps) - log(2);
480
481
482
            }
            else
            {
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
                return log(get_move_prob(r, s));
            }
        }

        template <class RNG>
        move_t move_proposal(size_t r, RNG& rng)
        {
            move_t move;
            double pf = 0, pb = 0;

            std::bernoulli_distribution single(_a1);

            if (single(rng))
            {
                move = move_t::single_node;
                auto v = uniform_sample(_groups[r], rng);
                _s = _state.sample_block(v, _c, _d, rng);
                if (_s >= _groups.size())
501
                {
502
503
504
                    _groups.resize(_s + 1);
                    _rpos.resize(_s + 1);
                }
505
                if (r == _s || !_state.allow_move(v, r, _s))
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
                    return _null_move;
                _dS = _state.virtual_move(v, r, _s, _entropy_args);
                if (!std::isinf(_beta))
                {
                    pf = log(_state.get_move_prob(v, r, _s, _c, _d, false));
                    pb = log(_state.get_move_prob(v, _s, r, _c, _d, true));

                    pf += -safelog_fast(_vlist.size());
                    pf += -safelog_fast(_groups[r].size());
                    int dB = 0;
                    if (_groups[_s].empty())
                        dB++;
                    if (_groups[r].size() == 1)
                        dB--;
                    pb += -safelog_fast(_vlist.size() + dB);
                    pb += -safelog_fast(_groups[_s].size() + 1);
                }
                _vs.clear();
                _vs.push_back(v);
                _bprev[v] = r;
                _bnext[v] = _s;
            }
            else
            {
                std::bernoulli_distribution ssplit((_vlist.size() > 1) ? _psplit : 1);
                if (ssplit(rng))
                {
                    move = move_t::split;
                    if (_state._wr[r] < 2)
                        return _null_move;
                    std::tie(_s, _t, _dS, pf) = split(r, rng);
                    if (_vlist.size() > 1)
                        pf += log(_psplit);
                    if (!std::isinf(_beta))
                        pb = merge_prob(_s, _t) + log1p(-_psplit);
                }
                else
                {
                    move = move_t::merge;
                    _s = r;
                    while (_s == r)
                    {
                        size_t v = uniform_sample(_groups[r], rng);
                        _s = _state.sample_block(v, _c, 0, rng);
                    }
551
                    if (!allow_merge(r, _s))
552
553
554
555
556
557
558
559
560
                    {
                        _nproposals += _groups[r].size() + _groups[_s].size();
                        return _null_move;
                    }
                    if (!std::isinf(_beta))
                        pf = merge_prob(r, _s) + log1p(-_psplit);
                    std::tie(_t, _dS) = merge(r, _s, rng);
                    if (!std::isinf(_beta))
                        pb = split_prob(r, _s, rng) + log(_psplit);
561
                }
562
                for (auto v : _vs)
563
564
565
566
                {
                    _bnext[v] = _state._b[v];
                    move_vertex(v, _bprev[v]);
                }
567
            }
568
569
570
571
572
573
574

            if (!std::isinf(_beta))
                _a = pb - pf;
            else
                _a = 0;

            _nproposals += _vs.size();
575

576
577
578
579
580
581
582
            return move;
        }

        std::tuple<double, double>
        virtual_move_dS(size_t, move_t)
        {
            return {_dS, _a};
583
584
        }

585
        void perform_move(size_t r, move_t move)
586
        {
587
            for (auto v : _vs)
588
589
590
                move_vertex(v, _bnext[v]);

            switch (move)
591
            {
592
593
594
595
596
597
598
599
600
601
602
603
            case move_t::single_node:
                if (_groups[r].empty())
                    remove_element(_vlist, _rpos, r);
                if (!has_element(_vlist, _rpos, _s))
                    add_element(_vlist, _rpos, _s);
                break;
            case move_t::split:
                remove_element(_vlist, _rpos, r);
                add_element(_vlist, _rpos, _s);
                add_element(_vlist, _rpos, _t);
                break;
            case move_t::merge:
604
                remove_element(_vlist, _rpos, r);
605
606
607
608
609
610
                remove_element(_vlist, _rpos, _s);
                add_element(_vlist, _rpos, _t);
                break;
            default:
                break;
            }
611
        }
612
613
614

        bool is_deterministic()
        {
615
            return false;
616
617
618
619
        }

        bool is_sequential()
        {
620
            return false;
621
622
623
624
625
626
627
628
629
630
631
632
633
634
        }

        auto& get_vlist()
        {
            return _vlist;
        }

        double get_beta()
        {
            return _beta;
        }

        size_t get_niter()
        {
635
            if (_N > 1 && _nproposals < _niter * _N)
636
637
                return std::numeric_limits<size_t>::max();
            return 0;
638
639
        }

640
        void step(size_t, move_t)
641
642
        {
        }
643
644
645
    };
};

646
std::ostream& operator<<(std::ostream& os, move_t move);
647
648
649
650

} // graph_tool namespace

#endif //GRAPH_BLOCKMODEL_MCMC_HH