Commit 8a35de17 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

generate_sbm(): Implement microcanonical model

parent fbb1d5c0
......@@ -48,4 +48,5 @@ libgraph_tool_generation_la_include_HEADERS = \
graph_sbm.hh \
graph_triangulation.hh \
graph_union.hh \
sampler.hh
sampler.hh \
urn_sampler.hh
......@@ -70,7 +70,8 @@ void generate_graph(GraphInterface& gi, size_t N,
void generate_sbm(GraphInterface& gi, boost::any ab, boost::python::object ors,
boost::python::object oss, boost::python::object oprobs,
boost::any ain_deg, boost::any aout_deg, rng_t& rng);
boost::any ain_deg, boost::any aout_deg, bool micro_ers,
bool micro_degs, rng_t& rng);
size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
bool no_sweep, bool self_loops, bool parallel_edges,
......
......@@ -24,19 +24,33 @@ using namespace graph_tool;
void generate_sbm(GraphInterface& gi, boost::any ab, boost::python::object ors,
boost::python::object oss, boost::python::object oprobs,
boost::any ain_deg, boost::any aout_deg, rng_t& rng)
boost::any ain_deg, boost::any aout_deg, bool micro_ers,
bool micro_degs, rng_t& rng)
{
auto rs = get_array<int64_t, 1>(ors);
auto ss = get_array<int64_t, 1>(oss);
auto probs = get_array<double, 1>(oprobs);
typedef vprop_map_t<int32_t>::type bmap_t;
auto b = any_cast<bmap_t>(ab).get_unchecked();
typedef vprop_map_t<double>::type dmap_t;
auto in_deg = any_cast<dmap_t>(ain_deg).get_unchecked();
auto out_deg = any_cast<dmap_t>(aout_deg).get_unchecked();
run_action<>()
(gi, [&](auto& g) { gen_sbm(g, b, rs, ss, probs, in_deg, out_deg, rng); })();
if (micro_degs)
{
auto probs = get_array<uint64_t, 1>(oprobs);
typedef vprop_map_t<int64_t>::type dmap_t;
auto in_deg = any_cast<dmap_t>(ain_deg).get_unchecked();
auto out_deg = any_cast<dmap_t>(aout_deg).get_unchecked();
run_action<>()
(gi, [&](auto& g) { gen_sbm<true>(g, b, rs, ss, probs, in_deg,
out_deg, true, rng); })();
}
else
{
auto probs = get_array<double, 1>(oprobs);
typedef vprop_map_t<double>::type dmap_t;
auto in_deg = any_cast<dmap_t>(ain_deg).get_unchecked();
auto out_deg = any_cast<dmap_t>(aout_deg).get_unchecked();
run_action<>()
(gi, [&](auto& g) { gen_sbm<false>(g, b, rs, ss, probs, in_deg,
out_deg, micro_ers, rng); })();
}
}
......@@ -26,6 +26,7 @@
#include "graph_filtering.hh"
#include "graph_util.hh"
#include "sampler.hh"
#include "urn_sampler.hh"
#include "random.hh"
......@@ -36,55 +37,78 @@ namespace graph_tool
using namespace std;
using namespace boost;
template <class Graph, class VProp, class IVec, class FVec, class VDProp,
class RNG>
template <bool micro_deg, class Graph, class VProp, class IVec, class FVec,
class VDProp, class RNG>
void gen_sbm(Graph& g, VProp b, IVec& rs, IVec& ss, FVec probs, VDProp in_deg,
VDProp out_deg, RNG& rng)
VDProp out_deg, bool micro_ers, RNG& rng)
{
constexpr bool is_dir = is_directed::apply<Graph>::type::value;
typedef typename std::conditional_t<micro_deg,size_t,double> dtype;
vector<vector<size_t>> rvs;
vector<vector<double>> v_in_probs, v_out_probs;
vector<vector<dtype>> v_in_probs, v_out_probs;
for (auto v : vertices_range(g))
{
size_t r = b[v];
if (r >= v_in_probs.size())
if (r >= v_out_probs.size())
{
v_in_probs.resize(r+1);
if (is_dir)
v_in_probs.resize(r+1);
v_out_probs.resize(r+1);
rvs.resize(r+1);
}
rvs[r].push_back(v);
v_in_probs[r].push_back(in_deg[v]);
if (is_dir)
v_in_probs[r].push_back(in_deg[v]);
v_out_probs[r].push_back(out_deg[v]);
}
vector<Sampler<size_t>> v_in_sampler, v_out_sampler;
typedef typename std::conditional_t<micro_deg,
UrnSampler<size_t, false>,
Sampler<size_t>> vsampler_t;
vector<vsampler_t> v_in_sampler_, v_out_sampler;
for (size_t r = 0; r < rvs.size(); ++r)
{
v_in_sampler.emplace_back(rvs[r], v_in_probs[r]);
if (is_dir)
v_in_sampler_.emplace_back(rvs[r], v_in_probs[r]);
v_out_sampler.emplace_back(rvs[r], v_out_probs[r]);
}
auto& v_in_sampler = (is_dir) ? v_in_sampler_ : v_out_sampler;
for (size_t i = 0; i < rs.shape()[0]; ++i)
{
size_t r = rs[i];
size_t s = ss[i];
double p = probs[i];
auto p = probs[i];
if (!is_directed::apply<Graph>::type::value && r == s)
if (!is_dir && r == s)
p /= 2;
if (r >= v_out_sampler.size() || v_out_sampler[r].prob_sum() == 0 ||
s >= v_in_sampler.size() || v_in_sampler[s].prob_sum() == 0)
throw GraphException("Inconsistent SBM parameters: edge probabilities given for empty groups");
if (p > 0 && (r >= v_out_sampler.size() || v_out_sampler[r].empty() ||
s >= v_in_sampler.size() || v_in_sampler[s].empty()))
throw GraphException("Inconsistent SBM parameters: nonzero edge probabilities given for empty groups");
auto& r_sampler = v_out_sampler[r];
auto& s_sampler = v_in_sampler[s];
std::poisson_distribution<> poi(p);
size_t ers = poi(rng);
size_t ers;
if (micro_ers)
{
ers = p;
}
else
{
std::poisson_distribution<> poi(p);
ers = poi(rng);
}
for (size_t j = 0; j < ers; ++j)
{
if (r_sampler.empty())
throw GraphException("Inconsistent SBM parameters: node degrees do not agree with matrix of edge counts between groups");
size_t u = r_sampler.sample(rng);
if (s_sampler.empty())
throw GraphException("Inconsistent SBM parameters: node degrees do not agree with matrix of edge counts between groups");
size_t v = s_sampler.sample(rng);
add_edge(u, v, g);
}
......
......@@ -94,8 +94,8 @@ public:
}
size_t size() const { return _items.size(); }
bool empty() const { return _items.empty(); }
double prob_sum() const { return _S; }
bool empty() const { return _S == 0; }
const Value& operator[](size_t i) const
{
......
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2017 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef URN_SAMPLER_HH
#define URN_SAMPLER_HH
#include "random.hh"
namespace graph_tool
{
using namespace std;
using namespace boost;
// Sampling with and without replacement from an urn
template <class Value, bool replacement>
class UrnSampler
{
public:
UrnSampler() {}
template <class Int>
UrnSampler(const vector<Value>& items,
const vector<Int>& counts)
{
for (size_t i = 0; i < items.size(); ++i)
{
const auto& x = items[i];
size_t n = counts[i];
for (size_t j = 0; j < n; ++j)
_urn.push_back(x);
}
}
template <class RNG>
typename std::conditional<replacement,
const Value&,
Value>::type
sample(RNG& rng)
{
uniform_int_distribution<size_t> sample(0, _urn.size() - 1);
size_t i = sample(rng);
if (replacement)
{
return _urn[i];
}
else
{
std::swap(_urn[i], _urn.back());
Value temp = _urn.back();
_urn.pop_back();
return temp;
}
}
bool empty()
{
return _urn.empty();
}
private:
vector<Value> _urn;
};
} // namespace graph_tool
#endif // URN_SAMPLER_HH
......@@ -836,8 +836,10 @@ def random_rewire(g, model="configuration", n_iter=1, edge_sweep=True,
_get_rng(), verbose)
return pcount
def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False):
r"""Generate a random graph by sampling from the Poisson stochastic block model.
def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False,
micro_ers=False, micro_degs=False):
r"""Generate a random graph by sampling from the Poisson or microcanonical
stochastic block model.
Parameters
----------
......@@ -858,6 +860,17 @@ def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False):
if they are not already so.
directed : ``bool`` (optional, default: ``False``)
Whether the graph is directed.
micro_ers : ``bool`` (optional, default: ``False``)
If true, the `microcanonical` version of the model will be evoked, where
the numbers of edges between groups will be given `exactly` by the
parameter ``probs``, and this will not fluctuate between samples.
micro_degs : ``bool`` (optional, default: ``False``)
If true, the `microcanonical` version of the degree-corrected model will
be evoked, where the degrees of nodes will be given `exactly` by the
parameters ``out_degs`` and ``in_degs``, and they will not fluctuate
between samples. (If ``micro_degs == True`` it implies ``micro_ers ==
True``.)
Returns
-------
......@@ -875,7 +888,7 @@ def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False):
.. math::
P({\boldsymbol A}|{\boldsymbol \lambda},{\boldsymbol \theta},{\boldsymbol b})
P({\boldsymbol A}|{\boldsymbol \theta},{\boldsymbol \lambda},{\boldsymbol b})
= \prod_{i<j}\frac{e^{-\lambda_{b_ib_j}\theta_i\theta_j}(\lambda_{b_ib_j}\theta_i\theta_j)^{A_{ij}}}{A_{ij}!}
\times\prod_i\frac{e^{-\lambda_{b_ib_i}\theta_i^2/2}(\lambda_{b_ib_i}\theta_i^2/2)^{A_{ij}/2}}{(A_{ij}/2)!},
......@@ -900,7 +913,7 @@ def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False):
.. math::
P({\boldsymbol A}|{\boldsymbol \lambda},{\boldsymbol \theta},{\boldsymbol b})
P({\boldsymbol A}|{\boldsymbol \theta},{\boldsymbol \lambda},{\boldsymbol b})
= \prod_{ij}\frac{e^{-\lambda_{b_ib_j}\theta^+_i\theta^-_j}(\lambda_{b_ib_j}\theta^+_i\theta^-_j)^{A_{ij}}}{A_{ij}!}.
Again, the same normalization is assumed:
......@@ -912,8 +925,47 @@ def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False):
such that the value :math:`\lambda_{rs}` will correspond to the average
number of directed edges between groups :math:`r` and :math:`s`.
The graph is generated in time :math:`O(V + E + B)`, where :math:`B` is the
number of groups.
In case the parameter ``micro_degs == True`` is passed, a `microcanical
<https://en.wikipedia.org/wiki/Microcanonical_ensemble>`_ model is used
instead, where both the number of edges between groups as well as the
degrees of the nodes are preserved `exactly`, instead of only on
expectation. In this case, the parameters are interpreted as
:math:`{\boldsymbol\lambda}\equiv{\boldsymbol e}` and
:math:`{\boldsymbol\theta}\equiv{\boldsymbol k}`, where :math:`e_{rs}` is
the number of edges between groups :math:`r` and :math:`s` (or twice that if
:math:`r=s` in the undirected case), and :math:`k_i` is the degree of node
:math:`i`. The multigraphs are then sampled with probability
.. math::
P({\boldsymbol A}|{\boldsymbol k},{\boldsymbol e},{\boldsymbol b}) =
\frac{\prod_{r<s}e_{rs}!\prod_re_{rr}!!\prod_ik_i!}{\prod_re_r!\prod_{i<j}A_{ij}!\prod_iA_{ii}!!}.
and in the directed case with probability
.. math::
P({\boldsymbol A}|{\boldsymbol k}^+,{\boldsymbol k}^-,{\boldsymbol e},{\boldsymbol b}) =
\frac{\prod_{rs}e_{rs}!\prod_ik^+_i!k^-_i!}{\prod_re^+_r!e^-_r!\prod_{ij}A_{ij}!}.
In the non-degree-corrected case, if ``micro_ers == True``, the
microcanonical model corresponds to
.. math::
P({\boldsymbol A}|{\boldsymbol e},{\boldsymbol b}) =
\frac{\prod_{r<s}e_{rs}!\prod_re_{rr}!!}{\prod_rn_r^{e_r}\prod_{i<j}A_{ij}!\prod_iA_{ii}!!},
and in the directed case to
.. math::
P({\boldsymbol A}|{\boldsymbol e},{\boldsymbol b}) =
\frac{\prod_{rs}e_{rs}!}{\prod_rn_r^{e_r^+ + e_r^-}\prod_{ij}A_{ij}!}.
In every case above, the final graph is generated in time :math:`O(V + E +
B)`, where :math:`B` is the number of groups.
Examples
--------
......@@ -923,7 +975,7 @@ def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False):
>>> g = gt.Graph(g, prune=True)
>>> state = gt.minimize_blockmodel_dl(g)
>>> u = gt.generate_sbm(state.b.a, gt.adjacency(state.get_bg(),
... state.get_ers()),
... state.get_ers()).T,
... g.degree_property_map("out").a,
... g.degree_property_map("in").a, directed=True)
>>> gt.graph_draw(g, g.vp.pos, output="polblogs-sbm.png")
......@@ -944,6 +996,9 @@ def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False):
.. [karrer-stochastic-2011] Brian Karrer and M. E. J. Newman, "Stochastic
blockmodels and community structure in networks," Physical Review E 83,
no. 1: 016107 (2011) :doi:`10.1103/PhysRevE.83.016107` :arxiv:`1008.3926`
.. [peixoto-nonparametric-2017] Tiago P. Peixoto, "Nonparametric Bayesian
inference of the microcanonical stochastic block model", Phys. Rev. E 95
012317 (2017). :doi:`10.1103/PhysRevE.95.012317`, :arxiv:`1610.02703`
"""
......@@ -951,20 +1006,22 @@ def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False):
g.add_vertex(len(b))
b = g.new_vp("int", b)
deg_type = "double" if not micro_degs else "int64_t"
p_type = "double" if not micro_degs else "uint64"
if not directed:
if out_degs is None:
out_degs = in_degs = g.new_vp("double", 1)
out_degs = in_degs = g.new_vp(deg_type, 1)
else:
out_degs = in_degs = g.new_vp("double", out_degs)
out_degs = in_degs = g.new_vp(deg_type, out_degs)
else:
if out_degs is None:
out_degs = g.new_vp("double", 1)
out_degs = g.new_vp(deg_type, 1)
else:
out_degs = g.new_vp("double", out_degs)
out_degs = g.new_vp(deg_type, out_degs)
if in_degs is None:
in_degs = g.new_vp("double", 1)
in_degs = g.new_vp(deg_type, 1)
else:
in_degs = g.new_vp("double", in_degs)
in_degs = g.new_vp(deg_type, in_degs)
r, s = probs.nonzero()
if not directed:
......@@ -980,11 +1037,13 @@ def generate_sbm(b, probs, out_degs=None, in_degs=None, directed=False):
libgraph_tool_generation.gen_sbm(g._Graph__graph,
_prop("v", g, b),
numpy.array(r, dtype="int64"),
numpy.array(s, dtype="int64"),
numpy.array(p, dtype="double"),
numpy.asarray(r, dtype="int64"),
numpy.asarray(s, dtype="int64"),
numpy.asarray(p, dtype=p_type),
_prop("v", g, in_degs),
_prop("v", g, out_degs),
micro_ers,
micro_degs,
_get_rng())
return g
......
......@@ -2201,15 +2201,15 @@ class BlockState(object):
"vertex_color",
"edge_gradient"]))
def sample_graph(self, canonical=False, niter=100, multigraph=True,
self_loops=True):
def sample_graph(self, canonical=False, niter=100, poisson=True,
multigraph=True, self_loops=True):
r"""Sample a new graph from the fitted model.
Parameters
----------
canonical : ``bool`` (optional, default: ``False``)
If ``canonical == True``, the graph will be sampled from the
maximum-likelihood estimate of the Poisson stochastic block
maximum-likelihood estimate of the canonical stochastic block
model. Otherwise, it will be sampled from the microcanonical model.
niter : ``int`` (optional, default: ``None``)
Number of MCMC steps. This value is ignored if ``canonical ==
......@@ -2218,6 +2218,9 @@ class BlockState(object):
If ``True``, parallel edges will be allowed.
self-loops : ``bool`` (optional, default: ``True``)
If ``True``, self-loops will be allowed.
poisson : ``bool`` (optional, default: ``True``)
If ``True``, the Poisson model will be used, otherwise the graph
will be sampled from the corresponding maximum entropy ensemble.
Returns
-------
......@@ -2227,8 +2230,8 @@ class BlockState(object):
Notes
-----
This function is just a convenience wrapper to
:func:`~graph_tool.generation.random_rewire` (if ``canonical == False``)
and :func:`~graph_tool.generation.generate_sbm` (if ``canonical ==
:func:`~graph_tool.generation.random_rewire` (if ``poisson == False``)
and :func:`~graph_tool.generation.generate_sbm` (if ``poisson ==
True``).
Examples
......@@ -2253,7 +2256,7 @@ class BlockState(object):
SBM fitted to the original network.
"""
if canonical:
if poisson:
in_degs = out_degs = None
if self.deg_corr:
out_degs = self.g.degree_property_map("out").fa
......@@ -2261,10 +2264,12 @@ class BlockState(object):
in_degs = self.g.degree_property_map("in").fa
else:
in_degs = None
probs = adjacency(self.bg, weight=self.mrs)
probs = adjacency(self.bg, weight=self.mrs).T
g = generate_sbm(b=self.b.fa, probs=probs,
in_degs=in_degs, out_degs=out_degs,
directed=self.g.is_directed())
directed=self.g.is_directed(),
micro_ers=not canonical,
micro_degs=not canonical and self.deg_corr)
if not multigraph:
remove_parallel_edges(g)
if not self_loops:
......@@ -2272,7 +2277,7 @@ class BlockState(object):
else:
g = self.g.copy()
if self.deg_corr:
model = "constrained-configuration"
model = "constrained-configuration" if not canonical else "blockmodel-degree"
probs = None
else:
model = "blockmodel"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment