Commit da179ff0 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Fix sampling of stochastic blockmodel to guarantee correct asymptotic probability

parent ca503515
......@@ -800,6 +800,60 @@ void do_collect_vertex_marginals(GraphInterface& gi, boost::any ob,
vertex_scalar_vector_properties())(op);
}
struct get_deg_entropy
{
template <class Graph, class Vprop>
void operator()(Graph& g, Vprop b, size_t B, double& S) const
{
#ifdef HAVE_SPARSEHASH
typedef dense_hash_map<pair<size_t,size_t>, int, boost::hash<pair<size_t,size_t>>> map_t;
#else
typedef unordered_map<pair<size_t,size_t>, int, boost::hash<pair<size_t,size_t>>> map_t;
#endif
vector<map_t> hist(B);
vector<int> total(B);
#ifdef HAVE_SPARSEHASH
for (size_t r = 0; r < B; ++r)
hist[r].set_empty_key(make_pair(numeric_limits<size_t>::max(),
numeric_limits<size_t>::max()));
#endif
typename graph_traits<Graph>::vertex_iterator v, v_end;
for (tie(v, v_end) = vertices(g); v != v_end; ++v)
{
hist[b[*v]][make_pair(in_degreeS()(*v, g), out_degree(*v, g))]++;
total[b[*v]]++;
}
S = 0;
for (size_t r = 0; r < B; ++r)
{
for (auto iter = hist[r].begin(); iter != hist[r].end(); ++iter)
{
double p = iter->second / double(total[r]);
S -= p * log(p) * total[r];
}
}
}
};
double do_get_deg_entropy(GraphInterface& gi, boost::any ob, size_t B)
{
typedef property_map_type::apply<int32_t,
GraphInterface::vertex_index_map_t>::type
vmap_t;
vmap_t b = any_cast<vmap_t>(ob);
double S = 0;
run_action<>()
(gi, std::bind(get_deg_entropy(),
placeholders::_1, b, B, std::ref(S)))();
return S;
}
vector<int32_t> get_vector(size_t n)
{
return vector<int32_t>(n);
......@@ -862,6 +916,7 @@ void export_blockmodel()
def("entropy", do_get_ent);
def("entropy_dense", do_get_ent_dense);
def("deg_entropy", do_get_deg_entropy);
def("edge_marginals", do_collect_edge_marginals);
def("bethe_entropy", do_bethe_entropy);
......
......@@ -19,6 +19,7 @@
#include "graph_util.hh"
#include "graph_filtering.hh"
#include "graph_generation.hh"
#include "sampler.hh"
#include <boost/python.hpp>
using namespace std;
......@@ -109,4 +110,9 @@ BOOST_PYTHON_MODULE(libgraph_tool_generation)
def("price", &price);
def("complete", &complete);
def("circular", &circular);
class_<Sampler<int, boost::mpl::false_>>("Sampler",
init<const vector<int>&, const vector<double>&>())
.def("sample", &Sampler<int, boost::mpl::false_>::sample<rng_t>,
return_value_policy<copy_const_reference>());
}
......@@ -33,7 +33,9 @@ class PythonFuncWrap
public:
PythonFuncWrap(boost::python::object o): _o(o) {}
double operator()(pair<size_t, size_t> deg1, pair<size_t, size_t> deg2)
typedef pair<size_t, size_t> deg_t;
double operator()(deg_t deg1, deg_t deg2)
const
{
boost::python::object ret = _o(boost::python::make_tuple(deg1.first, deg1.second),
......@@ -48,6 +50,25 @@ public:
return boost::python::extract<double>(ret);
}
template <class ProbMap>
void get_probs(ProbMap& probs) const
{
typedef typename ProbMap::key_type::first_type block_t;
if (PyObject_HasAttrString(_o.ptr(), "__getitem__"))
{
int N = boost::python::len(_o);
for (int i = 0; i < N; ++i)
{
block_t ks = boost::python::extract<block_t>(_o[i][0])();
block_t kt = boost::python::extract<block_t>(_o[i][1])();
double p = boost::python::extract<double>(_o[i][2])();
if (std::isnan(p) || std::isinf(p) || p <= 0)
continue;
probs[make_pair(ks, kt)] += p;
}
}
}
private:
boost::python::object _o;
};
......@@ -86,6 +107,22 @@ struct graph_rewire_block
};
struct graph_rewire_correlated
{
template <class Graph, class EdgeIndexMap, class CorrProb, class BlockProp>
void operator()(Graph& g, EdgeIndexMap edge_index, CorrProb corr_prob,
bool self_loops, bool parallel_edges,
pair<size_t, bool> iter_sweep,
std::tuple<bool, bool, bool> cache_verbose,
size_t& pcount, rng_t& rng, BlockProp block_prop) const
{
graph_rewire<CorrelatedRewireStrategy>()
(g, edge_index, corr_prob, self_loops, parallel_edges, iter_sweep,
cache_verbose, pcount, rng, PropertyBlock<BlockProp>(block_prop));
}
};
size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
bool no_sweep, bool self_loops, bool parallel_edges,
bool alias, bool traditional, bool persist,
......@@ -96,6 +133,7 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
size_t pcount = 0;
if (strat == "erdos")
{
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire<ErdosRewireStrategy>(),
placeholders::_1, gi.GetEdgeIndex(),
......@@ -104,7 +142,9 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
}
else if (strat == "uncorrelated")
{
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire<RandomRewireStrategy>(),
placeholders::_1, gi.GetEdgeIndex(), std::ref(corr),
......@@ -112,15 +152,34 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
}
else if (strat == "correlated")
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire<CorrelatedRewireStrategy>(),
placeholders::_1, gi.GetEdgeIndex(), std::ref(corr),
self_loops, parallel_edges,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
{
if (block.empty())
{
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire<CorrelatedRewireStrategy>(),
placeholders::_1, gi.GetEdgeIndex(), std::ref(corr),
self_loops, parallel_edges,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
}
else
{
run_action<graph_tool::detail::never_reversed>()
(gi, std::bind(graph_rewire_correlated(),
placeholders::_1, gi.GetEdgeIndex(), std::ref(corr),
self_loops, parallel_edges,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng),
placeholders::_2),
vertex_properties())(block);
}
}
else if (strat == "probabilistic")
{
run_action<>()
(gi, std::bind(graph_rewire<ProbabilisticRewireStrategy>(),
placeholders::_1, gi.GetEdgeIndex(), std::ref(corr),
......@@ -128,7 +187,9 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
make_pair(niter, no_sweep),
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)))();
}
else if (strat == "blockmodel")
{
run_action<>()
(gi, std::bind(graph_rewire_block(alias, traditional),
placeholders::_1, gi.GetEdgeIndex(),
......@@ -139,7 +200,10 @@ size_t random_rewire(GraphInterface& gi, string strat, size_t niter,
std::make_tuple(persist, cache, verbose),
std::ref(pcount), std::ref(rng)),
vertex_properties())(block);
}
else
{
throw ValueException("invalid random rewire strategy: " + strat);
}
return pcount;
}
This diff is collapsed.
......@@ -94,18 +94,6 @@ public:
private:
struct _cmp : binary_function <size_t, size_t, bool>
{
_cmp(const vector<double>& prob):_prob(prob) {}
const vector<double>& _prob;
bool operator() (const size_t& x, const size_t& y) const
{
if (_prob[x] == _prob[y])
return x < y;
return _prob[x] < _prob[y];
}
};
typedef typename mpl::if_<KeepReference,
const vector<Value>&,
vector<Value> >::type items_t;
......
......@@ -379,14 +379,9 @@ class BlockState(object):
S += model_entropy(self.B, N, E, directed=self.g.is_directed(), nr=self.wr.a) * E
if self.deg_corr:
S_seq = 0
hist = [defaultdict(int) for r in range(self.B)]
for v in self.g.vertices():
hist[self.b[v]][(v.in_degree(), v.out_degree())] += 1
for r in range(self.B):
for k, v in hist[r].items():
p = v / float(self.wr.a[r])
S_seq -= p * log(p) * self.wr.a[r]
S_seq = libcommunity.deg_entropy(self.g._Graph__graph,
_prop("v", self.g, self.b),
self.B)
S += S_seq
return S / E
......
......@@ -49,7 +49,7 @@ from __future__ import division, absolute_import, print_function
from .. dl_import import dl_import
dl_import("from . import libgraph_tool_generation")
from .. import Graph, GraphView, _check_prop_scalar, _prop, _limit_args, _gt_type, _get_rng
from .. import Graph, GraphView, _check_prop_scalar, _prop, _limit_args, _gt_type, _get_rng, libcore
from .. stats import label_parallel_edges, label_self_loops
import inspect
import types
......@@ -109,6 +109,10 @@ def random_graph(N, deg_sampler, directed=True,
types. It must be callable either with no arguments or with a single
argument which will be the vertex index. In either case it must return
a type compatible with the ``block_type`` parameter.
See the documentation for the ``vertex_corr`` parameter of the
:func:`~graph_tool.generation.random_rewire` function which specifies
the correlation matrix.
block_type : string (optional, default: ``"int"``)
Value type of block labels. Valid only if ``block_membership != None``.
degree_block : bool (optional, default: ``False``)
......@@ -189,7 +193,7 @@ def random_graph(N, deg_sampler, directed=True,
... vertex_corr=lambda i, k: 1.0 / (1 + abs(i - k)), directed=False,
... n_iter=100)
>>> gt.scalar_assortativity(g, "out")
(0.6197157767573332, 0.010781011616659146)
(0.6321636468713748, 0.01082292099309249)
The following samples an in,out-degree pair from the joint distribution:
......@@ -449,8 +453,11 @@ def random_rewire(g, model="uncorrelated", n_iter=1, edge_sweep=True,
graph will remain unmodified.
``correlated``
The edges will be rewired randomly, but both the degree sequence of
the graph and the *vertex-vertex degree correlations* will remain
unmodified.
the graph and the *vertex-vertex (in,out)-degree correlations* will
remain exactly preserved. If the ``block_membership`` parameter is
passed, the block variables at the endpoints of the edges will be
preserved (instead of the degrees), in addition to the degree
sequence.
``probabilistic``
This is similar to the ``correlated`` option, but the vertex-vertex
correlations are not kept unmodified, but instead are sampled from an
......@@ -477,21 +484,39 @@ def random_rewire(g, model="uncorrelated", n_iter=1, edge_sweep=True,
If ``True``, parallel edges are allowed.
self_loops : bool (optional, default: ``False``)
If ``True``, self-loops are allowed.
vertex_corr : function (optional, default: ``None``)
A function which gives the vertex-vertex correlation of the graph.
vertex_corr : function or sequence of triples (optional, default: ``None``)
A function which gives the vertex-vertex correlation of the edges in the
graph. In general it should have the following signature:
.. code::
def vertex_corr(r, s):
...
return p
where the return value should be a scalar.
If ``model == probabilistic`` it should be callable with two parameters:
the (in, out)-degree pair of the source vertex an edge, and the
(in,out)-degree pair of the target of the same edge (for undirected
graphs, both parameters are single values). The function should return a
number proportional to the probability of such an edge existing in the
generated graph.
Alternatively, this parameter can be a list of triples of the form
``(r, s, p)``, with the same meaning as the ``r``, ``s`` and ``p``
values above. If a given ``(r, s)`` combination is not present in this
list, the corresponding value of ``p`` is assumed to be zero. If the same
``(r, s)`` combination appears more than once, their ``p`` values will
be summed together. This is useful when the correlation matrix is sparse,
i.e. most entries are zero.
If ``model == probabilistic`` the parameters ``r`` and ``s`` correspond
respectively to the (in, out)-degree pair of the source vertex an edge,
and the (in,out)-degree pair of the target of the same edge (for
undirected graphs, both parameters are scalars instead). The value of
``p`` should be a number proportional to the probability of such an
edge existing in the generated graph.
If ``model == blockmodel`` or ``model == blockmodel-traditional``, the
values passed to the function will be the block value of the respective
vertices, as specified via the ``block_membership``. The function should
also return a number proportional to the probability of such an edge
existing in the generated graph.
``r`` and ``s`` values passed to the function will be the block values
of the respective vertices, as specified via the ``block_membership``
parameter. The value of ``p`` should be a number proportional to the
probability of such an edge existing in the generated graph.
block_membership : :class:`~graph_tool.PropertyMap` (optional, default: ``None``)
If supplied, the graph will be rewired to conform to a blockmodel
ensemble. The value must be a vertex property map which defines the
......@@ -770,10 +795,15 @@ def random_rewire(g, model="uncorrelated", n_iter=1, edge_sweep=True,
if model not in ["probabilistic", "blockmodel", "blockmodel-traditional"]:
g = GraphView(g, reversed=False)
if model == "blockmodel" and alias and edge_sweep:
edge_sweep = False
n_iter *= g.num_edges()
traditional = False
if model == "blockmodel-traditional":
model = "blockmodel"
traditional = True
pcount = libgraph_tool_generation.random_rewire(g._Graph__graph, model,
n_iter, not edge_sweep,
self_loops, parallel_edges,
......@@ -1554,3 +1584,10 @@ def price_network(N, m=1, c=None, gamma=1, directed=True, seed_graph=None):
g = seed_graph
libgraph_tool_generation.price(g._Graph__graph, N, gamma, c, m, _get_rng())
return g
class Sampler(libgraph_tool_generation.Sampler):
def __init__(self, values, probs):
libgraph_tool_generation.Sampler.__init__(self, values, probs)
def sample(self):
return libgraph_tool_generation.Sampler.sample(self, _get_rng())
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment