graph_blockmodel_overlap.hh 58 KB
Newer Older
1
2
// graph-tool -- a general graph modification and manipulation thingy
//
Tiago Peixoto's avatar
Tiago Peixoto committed
3
// Copyright (C) 2006-2015 Tiago de Paula Peixoto <tiago@skewed.de>
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#ifndef GRAPH_BLOCKMODEL_OVERLAP_HH
#define GRAPH_BLOCKMODEL_OVERLAP_HH

#include "config.h"
#include <tuple>

#include "graph_blockmodel.hh"

namespace graph_tool
{

using namespace boost;

//===============
// Overlap stats
//===============

class overlap_stats_t
{
public:
    typedef pair<size_t, size_t> deg_t;

    typedef property_map_type::apply<int32_t,
                                     GraphInterface::vertex_index_map_t>::type::unchecked_t
        vmap_t;
43
44
45
46
    typedef property_map_type::apply<int64_t,
                                     GraphInterface::vertex_index_map_t>::type::unchecked_t
        vimap_t;
    typedef property_map_type::apply<vector<int64_t>,
47
48
49
50
51
52
                                     GraphInterface::vertex_index_map_t>::type::unchecked_t
        vvmap_t;

    overlap_stats_t(): _enabled(false) {}

    template <class Graph>
53
    overlap_stats_t(Graph& g, vmap_t b, vvmap_t half_edges, vimap_t node_index,
54
55
                    size_t B)
        : _half_edges(half_edges), _node_index(node_index),
56
57
          _out_neighbours(num_vertices(g), -1),
          _in_neighbours(num_vertices(g), -1)
58
59
60
61
62
63
64
65
66
67
68
69
    {
        _enabled = true;
        _block_nodes.resize(B);

#ifdef HAVE_SPARSEHASH
        for (auto& bnodes : _block_nodes)
        {
            bnodes.set_empty_key(numeric_limits<size_t>::max());
            bnodes.set_deleted_key(numeric_limits<size_t>::max() - 1);
        }
#endif

70
        size_t N = 0;
71
72
73
        for (auto v : vertices_range(g))
        {
            size_t vi = node_index[v];
74
            N = std::max(N, vi + 1);
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
            size_t kin = in_degreeS()(v, g);
            size_t kout = out_degreeS()(v, g);

            size_t r = b[v];
            auto& bnodes = _block_nodes[r];
            auto& k = bnodes[vi];
            k.first += kin;
            k.second += kout;

            for (auto e : out_edges_range(v, g))
                _out_neighbours[v] = target(e, g);
            for (auto e : in_edges_range(v, g))
                _in_neighbours[v] = source(e, g);
        }

        // parallel edges
        _mi.resize(num_vertices(g), -1);

93
94
        for (size_t i = 0; i < N; ++i)
        {
95
96
            auto& he = half_edges[i];

97
            gt_hash_map<size_t, vector<size_t>> out_us;
98
99
100
101
102
            for (auto u : he)
            {
                auto w = _out_neighbours[u];
                if (w == -1)
                    continue;
103
104
                if (!is_directed::apply<Graph>::type::value && size_t(node_index[w]) < i)
                    continue;
105
106
107
108
109
110
111
                out_us[node_index[w]].push_back(u);
            }

            for (auto& uc : out_us)
            {
                if (uc.second.size() > 1)
                {
112
113
114
115
116
117
118
119
                    _parallel_bundles.resize(_parallel_bundles.size() + 1);
                    auto& h = _parallel_bundles.back();
#ifdef HAVE_SPARSEHASH
                    h.set_empty_key(make_pair(numeric_limits<size_t>::max(),
                                              numeric_limits<size_t>::max()));
                    h.set_deleted_key(make_pair(numeric_limits<size_t>::max() - 1,
                                                numeric_limits<size_t>::max() - 1));
#endif
120
                    for (auto u : uc.second)
121
122
123
124
125
126
127
128
129
130
                    {
                        auto w = _out_neighbours[u];
                        assert(w != -1);
                        _mi[u] = _mi[w] = _parallel_bundles.size() - 1;
                        size_t r = b[u];
                        size_t s = b[w];
                        if (!is_directed::apply<Graph>::type::value && r > s)
                            std::swap(r, s);
                        h[make_pair(r, s)]++;
                    }
131
132
133
134
135
                }
            }
        }
    }

136
    template <class Graph, class VProp>
137
    void add_half_edge(size_t v, size_t v_r, VProp& b, Graph&)
138
139
    {
        size_t u = _node_index[v];
140
141
142
143
        size_t kin = (_in_neighbours[v] == -1) ? 0 : 1;
        size_t kout = (_out_neighbours[v] == -1) ? 0 : 1;
        assert(kin + kout == 1);
        auto& k = _block_nodes[v_r][u];
144
145
        k.first += kin;
        k.second += kout;
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167

        int m = _mi[v];
        if (m != -1)
        {
            size_t r, s;
            int u = _out_neighbours[v];
            if (u == -1)
            {
                u = _in_neighbours[v];
                r = b[u];
                s = v_r;
            }
            else
            {
                r = v_r;
                s = b[u];
            }
            auto& h = _parallel_bundles[m];
            if (!is_directed::apply<Graph>::type::value && r > s)
                std::swap(r, s);
            h[make_pair(r, s)]++;
        }
168
169
    }

170
    template <class Graph, class VProp>
171
    void remove_half_edge(size_t v, size_t v_r, VProp& b, Graph&)
172
173
    {
        size_t u = _node_index[v];
174
175
176
177
        size_t kin = (_in_neighbours[v] == -1) ? 0 : 1;
        size_t kout = (_out_neighbours[v] == -1) ? 0 : 1;
        assert(kin + kout == 1);
        auto& k = _block_nodes[v_r][u];
178
179
180
181
        k.first -= kin;
        k.second -= kout;

        if (k.first + k.second == 0)
182
            _block_nodes[v_r].erase(u);
183

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
        int m = _mi[v];
        if (m != -1)
        {
            size_t r, s;
            int u = _out_neighbours[v];
            if (u == -1)
            {
                u = _in_neighbours[v];
                r = b[u];
                s = v_r;
            }
            else
            {
                r = v_r;
                s = b[u];
            }
            auto& h = _parallel_bundles[m];
            if (!is_directed::apply<Graph>::type::value && r > s)
                std::swap(r, s);
            auto iter = h.find(make_pair(r, s));
            assert(iter->second > 0);
            iter->second--;
            if (iter->second == 0)
                h.erase(iter);
        }
209
210
    }

211
    size_t get_block_size(size_t r) const
212
213
214
215
    {
        return _block_nodes[r].size();
    }

216
217
    size_t virtual_remove_size(size_t v, size_t r, size_t in_deg = 0,
                               size_t out_deg = 0) const
218
219
220
    {
        size_t nr = _block_nodes[r].size();
        size_t u = _node_index[v];
221
222
223
224
225
226
        size_t kin = (in_deg + out_deg) > 0 ?
            in_deg : ((_in_neighbours[v] == -1) ? 0 : 1);
        size_t kout = (in_deg + out_deg) > 0 ?
            out_deg : ((_out_neighbours[v] == -1) ? 0 : 1);
        const auto iter = _block_nodes[r].find(u);
        const auto& deg = iter->second;
227
228
229
230
231
        if (deg.first == kin && deg.second == kout)
            nr--;
        return nr;
    }

232
    size_t virtual_add_size(size_t v, size_t r) const
233
234
235
    {
        size_t nr = _block_nodes[r].size();
        size_t u = _node_index[v];
236
        const auto& bnodes = _block_nodes[r];
237
238
239
240
241
242
        if (bnodes.find(u) == bnodes.end())
            nr++;
        return nr;
    }

    template <class Graph>
243
244
    double virtual_move_dS(size_t v, size_t r, size_t nr, Graph& g,
                           size_t in_deg = 0, size_t out_deg = 0) const
245
246
247
248
    {
        double dS = 0;

        size_t u = _node_index[v];
249
250
        size_t u_kin = ((in_deg + out_deg) > 0) ? in_deg : in_degreeS()(v, g);
        size_t u_kout = ((in_deg + out_deg) > 0) ? out_deg : out_degreeS()(v, g);
251

252
        auto deg =  _block_nodes[r].find(u)->second;
253
254
255
256
257
258
259
        auto ndeg = deg;
        ndeg.first -= u_kin;
        ndeg.second -= u_kout;

        dS -= lgamma_fast(ndeg.first + 1) + lgamma_fast(ndeg.second + 1);
        dS += lgamma_fast(deg.first + 1) + lgamma_fast(deg.second + 1);

260
        const auto iter = _block_nodes[nr].find(u);
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
        if (iter != _block_nodes[nr].end())
            deg = iter->second;
        else
            deg = make_pair(0, 0);
        ndeg = deg;
        ndeg.first += u_kin;
        ndeg.second += u_kout;

        dS -= lgamma_fast(ndeg.first + 1) + lgamma_fast(ndeg.second + 1);
        dS += lgamma_fast(deg.first + 1) + lgamma_fast(deg.second + 1);

        return dS;
    }

    template <class Graph, class VProp>
276
    double virtual_move_parallel_dS(size_t v, size_t v_r, size_t v_nr, VProp& b,
277
                                    Graph&, bool coherent=false) const
278
279
280
281
282
    {
        int m = _mi[v];
        if (m == -1)
            return 0;

283
284
285
        size_t r, s, nr, ns;
        int u = _out_neighbours[v];
        if (u == -1)
286
        {
287
288
289
290
291
            u = _in_neighbours[v];
            r = b[u];
            s = v_r;
            nr = r;
            ns = v_nr;
292
        }
293
        else
294
        {
295
296
297
298
299
300
301
302
303
            r = v_r;
            s = b[u];
            nr = v_nr;
            ns = s;
        }
        if (!is_directed::apply<Graph>::type::value && r > s)
            std::swap(r, s);
        if (!is_directed::apply<Graph>::type::value && nr > ns)
            std::swap(nr, ns);
304

305
        auto& h = _parallel_bundles[m];
306

307
308
309
310
311
312
313
        auto get_h = [&](const pair<size_t, size_t>& k) -> int
            {
                const auto iter = h.find(k);
                if (iter == h.end())
                    return 0;
                return iter->second;
            };
314

315
316
        int c  = get_h(make_pair(r,  s));
        int nc = get_h(make_pair(nr, ns));
317

318
319
320
321
        assert(c > 0);
        assert(nc >= 0);
        assert(v_r != v_nr);
        assert(make_pair(r, s) != make_pair(nr, ns));
322

323
324
325
326
327
328
        double S = 0;
        S -= lgamma_fast(c + 1) + lgamma_fast(nc + 1);
        if (!coherent)
            S += lgamma_fast(c) + lgamma_fast(nc + 2);
        else
            S += lgamma_fast(c + nc + 1);
329
330
331
332

        return S;
    }

333
    // sample another half-edge adjacent to the node w
334
    template <class RNG>
335
    size_t sample_half_edge(size_t w, RNG& rng) const
336
    {
337
        auto& half_edges = _half_edges[w];
338
339
340
        return uniform_sample(half_edges, rng);
    }

341
342
343
344
345
    size_t get_node(size_t v) const { return _node_index[v]; }
    const vector<int64_t>& get_half_edges(size_t v) const { return _half_edges[v]; }

    int64_t get_out_neighbour(size_t v) const { return _out_neighbours[v]; }
    int64_t get_in_neighbour(size_t v) const { return _in_neighbours[v]; }
346
347


348
    bool is_enabled() const { return _enabled; }
349

350

351
    typedef gt_hash_map<pair<size_t, size_t>, int> phist_t;
352
353
354

    const vector<phist_t>& get_parallel_bundles() { return _parallel_bundles; }
    const vector<int>& get_mi() { return _mi; }
355
356
357
358

private:
    bool _enabled;
    vvmap_t _half_edges;     // half-edges to each node
359
    vimap_t _node_index;     // node to each half edges
360

361
    typedef gt_hash_map<size_t, deg_t> node_map_t;
362
363
364

    vector<node_map_t> _block_nodes; // nodes (and degrees) in each block

365
366
367
    vector<int64_t> _out_neighbours;
    vector<int64_t> _in_neighbours;

368
369

    vector<int> _mi;
370
    vector<phist_t> _parallel_bundles; // parallel edge bundles
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
};


//=============================
// Partition Description length
//=============================

extern double lbinom(double N, double k);
extern double xlogx(size_t x);

struct overlap_partition_stats_t
{
    typedef std::tuple<int, int> deg_t;
    typedef vector<deg_t> cdeg_t;

    typedef vector<int> bv_t;

388
389
    typedef gt_hash_map<bv_t, size_t> bhist_t;
    typedef gt_hash_map<cdeg_t, size_t, std::hash<cdeg_t>> cdeg_hist_t;
390

391
    typedef gt_hash_map<bv_t, cdeg_hist_t> deg_hist_t;
392

393
    typedef gt_hash_map<bv_t, vector<size_t>> ebhist_t;
394

395
    typedef gt_hash_map<int, int> dmap_t;
396
397
398
399
400

    overlap_partition_stats_t() : _enabled(false) {}

    template <class Graph, class Vprop, class Eprop>
    overlap_partition_stats_t(Graph& g, Vprop b, overlap_stats_t& overlap_stats,
401
                              Eprop eweight, size_t N, size_t B, bool edges_dl)
402
403
404
405
        : _enabled(true), _N(N), _B(B), _D(0),
          _dhist(B + 1), _r_count(B), _bhist(N), _emhist(B), _ephist(B),
          _embhist(N), _epbhist(N), _deg_hist(N), _bvs(N), _nbvs(N), _degs(N),
          _ndegs(N), _deg_delta(B), _edges_dl(edges_dl)
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
    {
        dmap_t in_hist, out_hist;
        for (size_t v = 0; v < N; ++v)
        {
            dmap_t in_hist, out_hist;
            set<size_t> rs;

            get_bv_deg(v, b, eweight, overlap_stats, g, rs, in_hist, out_hist);

            cdeg_t cdeg;
            for (auto r : rs)
            {
                deg_t deg = std::make_tuple(in_hist[r], out_hist[r]);
                cdeg.push_back(deg);
            }

            bv_t bv(rs.begin(), rs.end());

            _bvs[v].reserve(bv.size() * 2);
425
            _bvs[v] = bv;
426
            _nbvs[v].reserve(bv.size() * 2);
427
            _nbvs[v] = bv;
428
            _degs[v].reserve(cdeg.size() * 2);
429
            _degs[v] = cdeg;
430
            _ndegs[v].reserve(cdeg.size() * 2);
431
            _ndegs[v] = cdeg;
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460

            auto & cdh = _deg_hist[bv];
            cdh[cdeg]++;

            size_t d = bv.size();
            _D = max(_D, d);
            _dhist[d]++;
            _bhist[bv]++;

            auto& bmh = _embhist[bv];
            auto& bph = _epbhist[bv];
            bmh.resize(bv.size());
            bph.resize(bv.size());

            for (size_t i = 0; i < bv.size(); ++i)
            {
                size_t r = bv[i];
                _emhist[r] += get<0>(cdeg[i]);
                _ephist[r] += get<1>(cdeg[i]);
                bmh[i] += get<0>(cdeg[i]);
                bph[i] += get<1>(cdeg[i]);
            }

        }

        for (auto& bv_c : _bhist)
        {
            assert(bv_c.second > 0);
            for (auto r : bv_c.first)
461
                _r_count[r] += 1;
462
463
        }

464
465
466
467
468
469
        _actual_B = 0;
        for (size_t r = 0; r < _B; ++r)
        {
            if (overlap_stats.get_block_size(r) > 0)
                _actual_B++;
        }
470
471
472
473
474
    }

    template <class Graph, class Vprop, class Eprop>
    void get_bv_deg(size_t v, Vprop& b, Eprop& eweight,
                    overlap_stats_t& overlap_stats, Graph& g, set<size_t>& rs,
475
                    dmap_t& in_hist, dmap_t& out_hist) const
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
    {
        if (!overlap_stats.is_enabled())
        {
            in_hist[b[v]] += in_degreeS()(v, g, eweight);
            out_hist[b[v]] += out_degreeS()(v, g, eweight);
        }
        else
        {
            auto& half_edges = overlap_stats.get_half_edges(v);
            for (size_t u : half_edges)
            {
                size_t kin = in_degreeS()(u, g);
                size_t kout = out_degreeS()(u, g);

                in_hist[b[u]] += kin;
                out_hist[b[u]] += kout;
            }
        }

        for (auto& rk : in_hist)
            rs.insert(rk.first);
    }


500
    double get_partition_dl() const
501
502
503
504
505
506
507
    {
        double S = 0;
        for (size_t d = 1; d < _dhist.size(); ++d)
        {
            size_t nd = _dhist[d];
            if (nd == 0)
                continue;
508
            double x = lbinom_fast(_actual_B, d);
509
            double ss = lbinom_careful((exp(x) + nd) - 1, nd); // not fast
510
511
512
513
514
515
516
            if (std::isinf(ss) || std::isnan(ss))
                ss = nd * x - lgamma_fast(nd + 1);
            assert(!std::isinf(ss));
            assert(!std::isnan(ss));
            S += ss;
        }

517
        S += lbinom_fast(_D + _N - 1, _N) + lgamma_fast(_N + 1);
518
519

        for (auto& bh : _bhist)
520
            S -= lgamma_fast(bh.second + 1);
521
522
523
524

        return S;
    }

525
    double get_deg_dl(bool ent, bool deg_alt, bool xi_fast) const
526
527
528
529
530
531
532
533
534
    {
        double S = 0;
        if (ent)
        {
            for (auto& ch : _deg_hist)
            {
                auto& bv = ch.first;
                auto& cdeg_hist = ch.second;

535
                size_t n_bv = _bhist.find(bv)->second;
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550

                S += xlogx(n_bv);
                for (auto& dh : cdeg_hist)
                    S -= xlogx(dh.second);
            }
        }
        else
        {
            S = 0;

            for (auto& ch : _deg_hist)
            {
                auto& bv = ch.first;
                auto& cdeg_hist = ch.second;

551
                size_t n_bv = _bhist.find(bv)->second;
552
553
554
555

                if (n_bv == 0)
                    continue;

556
557
                const auto& bmh = _embhist.find(bv)->second;
                const auto& bph = _epbhist.find(bv)->second;
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573

                double S1 = 0;
                for (size_t i = 0; i < bv.size(); ++i)
                {
                    if (xi_fast)
                    {
                        S1 += get_xi_fast(n_bv, bmh[i]);
                        S1 += get_xi_fast(n_bv, bph[i]);
                    }
                    else
                    {
                        S1 += get_xi(n_bv, bmh[i]);
                        S1 += get_xi(n_bv, bph[i]);
                    }
                }

574
                S1 += lgamma_fast(n_bv + 1);
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607

                for (auto& dh : cdeg_hist)
                    S1 -= lgamma_fast(dh.second + 1);

                if (deg_alt)
                {
                    double S2 = 0;
                    for (size_t i = 0; i < bv.size(); ++i)
                    {
                        S2 += lbinom(n_bv + bmh[i] - 1, bmh[i]);
                        S2 += lbinom(n_bv + bph[i] - 1, bph[i]);
                    }
                    S += min(S1, S2);
                }
                else
                {
                    S += S1;
                }
            }

            for (size_t r = 0; r < _B; ++r)
            {
                if (_r_count[r] == 0)
                    continue;
                S += lbinom(_r_count[r] + _emhist[r] - 1,  _emhist[r]);
                S += lbinom(_r_count[r] + _ephist[r] - 1,  _ephist[r]);
            }
        }
        return S;
    }


    template <class Graph>
608
609
610
    bool get_n_bv(size_t v, size_t r, size_t nr, const bv_t& bv,
                  const cdeg_t& deg, bv_t& n_bv, cdeg_t& n_deg, Graph& g,
                  size_t in_deg = 0, size_t out_deg = 0)
611
    {
612
613
        size_t kin = (in_deg + out_deg == 0) ? in_degreeS()(v, g) : in_deg;
        size_t kout = (in_deg + out_deg == 0) ? out_degreeS()(v, g) : out_deg;
614

615
616
617
        auto& d_r = _deg_delta[r];
        d_r.first -= kin;
        d_r.second -= kout;
618

619
620
621
        auto& d_nr = _deg_delta[nr];
        d_nr.first += kin;
        d_nr.second += kout;
622
623
624
625

        n_deg.clear();
        n_bv.clear();
        bool is_same_bv = true;
626
        bool has_r = false, has_nr = false;
627
628
629
630
631
        for (size_t i = 0; i < bv.size(); ++i)
        {
            size_t s = bv[i];
            auto k_s = deg[i];

632
633
634
635
636
637
638
639
640
641
642
            auto& d_s = _deg_delta[s];
            get<0>(k_s) += d_s.first;
            get<1>(k_s) += d_s.second;

            d_s.first = d_s.second = 0;

            if (s == r)
                has_r = true;

            if (s == nr)
                has_nr = true;
643
644
645
646
647
648
649
650
651
652
653
654

            if ((get<0>(k_s) + get<1>(k_s)) > 0)
            {
                n_bv.push_back(s);
                n_deg.push_back(k_s);
            }
            else
            {
                is_same_bv = false;
            }
        }

655
        if (!has_r || !has_nr)
656
657
        {
            is_same_bv = false;
658
659
            std::array<size_t, 2> ss = {r, nr};
            for (auto s : ss)
660
            {
661
662
663
664
665
666
                auto& d_s = _deg_delta[s];
                if (d_s.first + d_s.second == 0)
                    continue;
                size_t kin = d_s.first;
                size_t kout = d_s.second;
                auto pos = std::lower_bound(n_bv.begin(), n_bv.end(), s);
667
668
                auto dpos = n_deg.begin();
                std::advance(dpos, pos - n_bv.begin());
669
                n_bv.insert(pos, s);
670
                n_deg.insert(dpos, make_pair(kin, kout));
671
                d_s.first = d_s.second = 0;
672
673
674
675
676
677
            }
        }
        return is_same_bv;
    }

    // get deg counts without increasing the container
678
    size_t get_deg_count(const bv_t& bv, const cdeg_t& deg) const
679
680
681
682
683
684
685
686
687
688
689
690
691
692
    {
        auto iter = _deg_hist.find(bv);
        if (iter == _deg_hist.end())
            return 0;
        auto& hist = iter->second;
        if (hist.empty())
            return 0;
        auto diter = hist.find(deg);
        if (diter == hist.end())
            return 0;
        return diter->second;
    }

    // get bv counts without increasing the container
693
    size_t get_bv_count(const bv_t& bv) const
694
695
696
697
698
699
700
701
    {
        auto iter = _bhist.find(bv);
        if (iter == _bhist.end())
            return 0;
        return iter->second;
    }

    template <class Graph>
702
703
704
    double get_delta_dl(size_t v, size_t r, size_t nr,
                        overlap_stats_t& overlap_stats, Graph& g,
                        size_t in_deg = 0, size_t out_deg = 0)
705
    {
706
        if (r == nr)
707
708
            return 0;

709
        size_t u = overlap_stats.get_node(v);
710
711
712
713
714
715
        auto& bv = _bvs[u];
        auto& n_bv = _nbvs[u];
        size_t d = bv.size();
        cdeg_t& deg = _degs[u];
        cdeg_t& n_deg = _ndegs[u];

716
717
        bool is_same_bv = get_n_bv(v, r, nr, bv, deg, n_bv, n_deg, g, in_deg,
                                   out_deg);
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734

        size_t n_d = n_bv.size();
        size_t n_D = _D;

        if (d == _D && _dhist[d] == 1)
        {
            n_D = 1;
            for (auto& bc : _bhist)
            {
                if (bc.first.size() == d || bc.second == 0)
                    continue;
                n_D = max(n_D, bc.first.size());
            }
        }

        n_D = max(n_D, n_d);

735
736
        double S_a = 0, S_b = 0;

737
738
        if (n_D != _D)
        {
739
740
            S_b += lbinom(_D  + _N - 1, _N);
            S_a += lbinom(n_D + _N - 1, _N);
741
742
        }

743
744
745
746
747
        int dB = 0;
        if (overlap_stats.virtual_remove_size(v, r, in_deg, out_deg) == 0)
            dB--;
        if (overlap_stats.get_block_size(nr) == 0)
            dB++;
748

749
        auto get_S_d = [&] (size_t d_i, int delta, int dB) -> double
750
751
752
753
            {
                int nd = int(_dhist[d_i]) + delta;
                if (nd == 0)
                    return 0.;
754
755
                double x = lbinom_fast(_actual_B + dB, d_i);
                double S = lbinom(exp(x) + nd - 1, nd); // not fast
756
757
758
759
760
                if (std::isinf(S) || std::isnan(S))
                    S = nd * x - lgamma_fast(nd + 1);
                return S;
            };

761
762
763
764
765
766
767
768
769
        if (dB == 0)
        {
            if (n_d != d)
            {
                S_b += get_S_d(d,  0, 0) + get_S_d(n_d, 0, 0);
                S_a += get_S_d(d, -1, 0) + get_S_d(n_d, 1, 0);
            }
        }
        else
770
        {
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
            for (size_t di = 0; di < min(_actual_B + abs(dB) + 1, _dhist.size()); ++di)
            {
                if (d != n_d)
                {
                    if (di == d)
                    {
                        S_b += get_S_d(d,  0, 0);
                        S_a += get_S_d(d, -1, dB);
                        continue;
                    }
                    if (di == n_d)
                    {
                        S_b += get_S_d(n_d, 0, 0);
                        S_a += get_S_d(n_d, 1, dB);
                        continue;
                    }
                }
                if (_dhist[di] == 0)
                    continue;
                S_b += get_S_d(di, 0, 0);
                S_a += get_S_d(di, 0, dB);
            }

            if (_edges_dl)
            {
                auto get_x = [](size_t B) -> size_t
                {
                    if (is_directed::apply<Graph>::type::value)
                        return B * B;
                    else
                        return (B * (B + 1)) / 2;
                };

                size_t E = num_vertices(g) / 2;
                S_b += lbinom(get_x(_actual_B) + E - 1, E);
                S_a += lbinom(get_x(_actual_B + dB) + E - 1, E);
            }

809
810
        }

811
        size_t bv_count, n_bv_count;
812
813
814
815
816
817
818
819
820
821

        auto get_S_b = [&] (bool is_bv, int delta) -> double
            {
                if (is_bv)
                    return -lgamma_fast(bv_count + delta + 1);
                return -lgamma_fast(n_bv_count + delta + 1);
            };

        if (!is_same_bv)
        {
822
823
            bv_count = get_bv_count(bv);
            n_bv_count = get_bv_count(n_bv);
824
825
826
827
            S_b += get_S_b(true,  0) + get_S_b(false, 0);
            S_a += get_S_b(true, -1) + get_S_b(false, 1);
        }

828
829
        return S_a - S_b;
    }
830
831


832
833
834
835
836
837
838
    template <class Graph, class EWeight>
    double get_delta_deg_dl(size_t v, size_t r, size_t nr, EWeight&,
                            overlap_stats_t& overlap_stats, Graph& g,
                            size_t in_deg = 0, size_t out_deg = 0)
    {
        if (r == nr)
            return 0;
839

840
        double S_b = 0, S_a = 0;
841

842
843
844
        size_t u = overlap_stats.get_node(v);
        auto& bv = _bvs[u];
        auto& n_bv = _nbvs[u];
845

846
847
        cdeg_t& deg = _degs[u];
        cdeg_t& n_deg = _ndegs[u];
848

849
850
        bool is_same_bv = get_n_bv(v, r, nr, bv, deg, n_bv, n_deg, g, in_deg,
                                   out_deg);
851

852
853
        size_t bv_count = get_bv_count(bv);
        size_t n_bv_count = get_bv_count(n_bv);
854

855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
        auto get_S_bv = [&] (bool is_bv, int delta) -> double
            {
                if (is_bv)
                    return lgamma_fast(bv_count + delta + 1);
                return lgamma_fast(n_bv_count + delta + 1);
            };

        auto get_S_e = [&] (bool is_bv, int bdelta, int deg_delta) -> double
            {
                size_t bv_c = ((is_bv) ? bv_count : n_bv_count) + bdelta;
                if (bv_c == 0)
                    return 0.;

                const cdeg_t& deg_i = (is_bv) ? deg : n_deg;
                const auto& bv_i = (is_bv) ? bv : n_bv;
870

871
872
                double S = 0;
                if (((is_bv) ? bv_count : n_bv_count) > 0)
873
                {
874
875
                    const auto& bmh = _embhist.find(bv_i)->second;
                    const auto& bph = _epbhist.find(bv_i)->second;
876

877
878
879
880
                    assert(bmh.size() == bv_i.size());
                    assert(bph.size() == bv_i.size());

                    for (size_t i = 0; i < bv_i.size(); ++i)
881
                    {
882
883
                        S += get_xi_fast(bv_c, bmh[i] + deg_delta * int(get<0>(deg_i[i])));
                        S += get_xi_fast(bv_c, bph[i] + deg_delta * int(get<1>(deg_i[i])));
884
                    }
885
886
887
888
889
890
891
892
893
                }
                else
                {
                    for (size_t i = 0; i < bv_i.size(); ++i)
                    {
                        S += get_xi_fast(bv_c, deg_delta * int(get<0>(deg_i[i])));
                        S += get_xi_fast(bv_c, deg_delta * int(get<1>(deg_i[i])));
                    }
                }
894

895
896
                return S;
            };
897

898
        auto get_S_e2 = [&] (int deg_delta, int ndeg_delta) -> double
899
            {
900
901
902
                double S = 0;
                const auto& bmh = _embhist.find(bv)->second;
                const auto& bph = _epbhist.find(bv)->second;
903

904
                for (size_t i = 0; i < bv.size(); ++i)
905
                {
906
907
908
909
910
911
912
913
914
                    S += get_xi_fast(bv_count, bmh[i] +
                                     deg_delta * int(get<0>(deg[i])) +
                                     ndeg_delta * int(get<0>(n_deg[i])));
                    S += get_xi_fast(bv_count, bph[i] +
                                     deg_delta * int(get<1>(deg[i])) +
                                     ndeg_delta * int(get<1>(n_deg[i])));
                }
                return S;
            };
915

916
917
918
919
        if (!is_same_bv)
        {
            S_b += get_S_bv(true,  0) + get_S_bv(false, 0);
            S_a += get_S_bv(true, -1) + get_S_bv(false, 1);
920

921
922
923
924
925
926
927
928
929
930
931
            S_b += get_S_e(true,  0,  0) + get_S_e(false, 0, 0);
            S_a += get_S_e(true, -1, -1) + get_S_e(false, 1, 1);
        }
        else
        {
            S_b += get_S_e2( 0, 0);
            S_a += get_S_e2(-1, 1);
        }

        size_t deg_count = get_deg_count(bv, deg);
        size_t n_deg_count = get_deg_count(n_bv, n_deg);
932

933
        auto get_S_deg = [&] (bool is_deg, int delta) -> double
934
            {
935
936
937
938
                if (is_deg)
                    return -lgamma_fast(deg_count + delta + 1);
                return -lgamma_fast(n_deg_count + delta + 1);
            };
939

940
941
942
943
        S_b += get_S_deg(true,  0) + get_S_deg(false, 0);
        S_a += get_S_deg(true, -1) + get_S_deg(false, 1);

        auto is_in = [&] (const bv_t& bv, size_t r) -> bool
944
            {
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
                auto iter = lower_bound(bv.begin(), bv.end(), r);
                if (iter == bv.end())
                    return false;
                if (size_t(*iter) != r)
                    return false;
                return true;
            };

        for (size_t s : bv)
        {
            S_b += lbinom_fast(_r_count[s] + _emhist[s] - 1, _emhist[s]);
            S_b += lbinom_fast(_r_count[s] + _ephist[s] - 1, _ephist[s]);
        }

        for (size_t s : n_bv)
        {
            if (is_in(bv, s))
                continue;
            S_b += lbinom_fast(_r_count[s] + _emhist[s] - 1, _emhist[s]);
            S_b += lbinom_fast(_r_count[s] + _ephist[s] - 1, _ephist[s]);
        }
966

967
968
        gt_hash_map<size_t, pair<int, int>> deg_delta;
        gt_hash_map<size_t, int> r_count_delta;
969

970
971
972
        if (bv != n_bv)
        {
            if (n_bv_count == 0)
973
            {
974
975
                for (auto s : n_bv)
                    r_count_delta[s] += 1;
976
977
            }

978
            if (bv_count == 1)
979
            {
980
981
982
983
                for (auto s : bv)
                   r_count_delta[s] -= 1;
            }
        }
984

985
986
987
988
        if (r != nr)
        {
            size_t kin = (in_deg + out_deg == 0) ? in_degreeS()(v, g) : in_deg;
            size_t kout = (in_deg + out_deg == 0) ? out_degreeS()(v, g) : out_deg;
989

990
991
992
            auto& d_r = deg_delta[r];
            d_r.first -= kin;
            d_r.second -= kout;
993

994
995
996
997
998
999
1000
            auto& d_nr = deg_delta[nr];
            d_nr.first += kin;
            d_nr.second += kout;
        }

        for (size_t s : bv)
        {
For faster browsing, not all history is shown. View entire blame