If you want to fork this repository to propose merge requests, please send an email to tiago@skewed.de, and your project limit will be raised.

Commit 413c28a1 authored by Tiago Peixoto's avatar Tiago Peixoto

Implement semi-Bayesian block model inference

This includes code for expectation maximization with belief propagation
for the stochastic block model.
parent 93b1d393
Pipeline #147 failed with stage
......@@ -17,6 +17,7 @@ libgraph_tool_inference_la_LDFLAGS = $(MOD_LDFLAGS)
libgraph_tool_inference_la_SOURCES = \
cache.cc \
graph_blockmodel.cc \
graph_blockmodel_em.cc \
graph_blockmodel_gibbs.cc \
graph_blockmodel_layers.cc \
graph_blockmodel_layers_gibbs.cc \
......@@ -46,6 +47,7 @@ libgraph_tool_inference_la_include_HEADERS = \
cache.hh \
gibbs_loop.hh \
graph_blockmodel.hh \
graph_blockmodel_em.hh \
graph_blockmodel_gibbs.hh \
graph_blockmodel_layers.hh \
graph_blockmodel_layers_util.hh \
......
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2016 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "graph_tool.hh"
#include "random.hh"
#include <boost/python.hpp>
#include "graph_blockmodel_em.hh"
#include "graph_state.hh"
using namespace boost;
using namespace graph_tool;
GEN_DISPATCH(em_block_state, EMBlockState, EM_BLOCK_STATE_params)
python::object make_em_block_state(boost::python::object ostate, rng_t& rng)
{
python::object state;
em_block_state::make_dispatch(ostate,
[&](auto& s){state = python::object(s);},
rng);
return state;
}
void export_em_blockmodel_state()
{
using namespace boost::python;
em_block_state::dispatch
([&](auto* s)
{
typedef typename std::remove_reference<decltype(*s)>::type state_t;
class_<state_t> c(name_demangle(typeid(state_t).name()).c_str(),
no_init);
c.def("learn_iter", &state_t::learn_iter)
.def("bp_iter", &state_t::bp_iter)
.def("bethe_fe", &state_t::bethe_fe)
.def("get_MAP", &state_t::get_MAP_any);
});
def("make_em_block_state", make_em_block_state);
}
This diff is collapsed.
......@@ -148,6 +148,7 @@ extern void export_layered_overlap_blockmodel_bundled_mcmc();
extern void export_layered_overlap_blockmodel_gibbs();
extern void export_layered_overlap_blockmodel_multicanonical();
extern void export_layered_overlap_blockmodel_vacate();
extern void export_em_blockmodel_state();
BOOST_PYTHON_MODULE(libgraph_tool_inference)
{
......@@ -175,6 +176,7 @@ BOOST_PYTHON_MODULE(libgraph_tool_inference)
export_layered_overlap_blockmodel_gibbs();
export_layered_overlap_blockmodel_multicanonical();
export_layered_overlap_blockmodel_vacate();
export_em_blockmodel_state();
def("vertex_marginals", collect_vertex_marginals);
def("edge_marginals", collect_edge_marginals);
......
......@@ -42,6 +42,8 @@
#include "graph_filtering.hh"
#include "graph_util.hh"
#include "numpy_bind.hh"
#include "config.h"
namespace graph_tool
......@@ -204,7 +206,7 @@ struct StateWrap
auto partial_f = [&](auto&&... args)
{
full_f(std::forward<decltype(args)>(args)...,
uncheck(this->extract<Ts>
uncheck(Extract<Ts>()
(ostate,
names[FIdx + sizeof...(Idx)]))...);
};
......@@ -213,51 +215,76 @@ struct StateWrap
}
template <class T>
T extract(python::object mobj, std::string name) const
struct Extract
{
python::object obj = mobj.attr(name.c_str());
python::extract<T> extract(obj);
if (extract.check())
{
T val = extract();
return val;
}
else
T operator()(python::object mobj, std::string name) const
{
python::object aobj;
if (PyObject_HasAttrString(obj.ptr(), "_get_any"))
aobj = obj.attr("_get_any")();
else
aobj = obj;
python::extract<boost::any&> extract(aobj);
try
python::object obj = mobj.attr(name.c_str());
python::extract<T> extract(obj);
if (extract.check())
{
if (!extract.check())
throw boost::bad_any_cast();
boost::any& aval = extract();
T val = any_cast<T>(aval);
T val = extract();
return val;
}
catch (boost::bad_any_cast)
else
{
python::object aobj;
if (PyObject_HasAttrString(obj.ptr(), "_get_any"))
aobj = obj.attr("_get_any")();
else
aobj = obj;
python::extract<boost::any&> extract(aobj);
try
{
typedef std::reference_wrapper
<typename std::remove_reference<T>::type>
ref_wrap_t;
if (!extract.check())
throw boost::bad_any_cast();
boost::any& aval = extract();
auto val = any_cast<ref_wrap_t>(aval);
return val.get();
T val = any_cast<T>(aval);
return val;
}
catch (boost::bad_any_cast)
{
throw ValueException("Cannot extract parameter '" + name +
"' of desired type: " +
name_demangle(typeid(T).name()));
try
{
typedef std::reference_wrapper
<typename std::remove_reference<T>::type>
ref_wrap_t;
boost::any& aval = extract();
auto val = any_cast<ref_wrap_t>(aval);
return val.get();
}
catch (boost::bad_any_cast)
{
throw ValueException("Cannot extract parameter '" + name +
"' of desired type: " +
name_demangle(typeid(T).name()));
}
}
}
}
}
};
template <class Type, size_t Dim>
struct Extract<multi_array_ref<Type, Dim>>
{
multi_array_ref<Type, Dim> operator()(python::object mobj,
std::string name) const
{
python::object obj = mobj.attr(name.c_str());
try
{
return get_array<Type, Dim>(obj);
}
catch (InvalidNumpyConversion& e)
{
throw ValueException("Cannot extract parameter '" + name +
"' of desired type: " +
name_demangle(typeid(multi_array_ref<Type, Dim>).name())
+ ", reason: " +
std::string(e.what()));
}
}
};
};
template <class TR>
......
......@@ -75,6 +75,7 @@ graph_tool_inference_PYTHON = \
inference/__init__.py \
inference/bisection.py \
inference/blockmodel.py \
inference/blockmodel_em.py \
inference/layered_blockmodel.py \
inference/nested_blockmodel.py \
inference/overlap_blockmodel.py \
......
......@@ -25,8 +25,8 @@
This module contains algorithms for the identification of large-scale network
structure via the statistical inference of generative models.
Stochastic block model inference
++++++++++++++++++++++++++++++++
Bayesian stochastic block model inference
+++++++++++++++++++++++++++++++++++++++++
High-level functions
====================
......@@ -73,6 +73,27 @@ Auxiliary functions
half_edge_graph
get_block_edge_gradient
Semi-Bayesian stochastic block model inference
++++++++++++++++++++++++++++++++++++++++++++++
State classes
=============
.. autosummary::
:nosignatures:
EMBlockState
Expectation-maximization Inference
==================================
.. autosummary::
:nosignatures:
em_infer
Contents
++++++++
......@@ -96,6 +117,8 @@ __all__ = ["minimize_blockmodel_dl",
"MulticanonicalState",
"bisection_minimize",
"hierarchy_minimize",
"EMBlockState",
"em_infer",
"mf_entropy",
"bethe_entropy",
"half_edge_graph",
......@@ -109,4 +132,5 @@ from . nested_blockmodel import *
from . mcmc import *
from . bisection import *
from . minimize import *
from . blockmodel_em import *
from . util import *
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# graph_tool -- a general graph manipulation python module
#
# Copyright (C) 2006-2016 Tiago de Paula Peixoto <tiago@skewed.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import division, absolute_import, print_function
import sys
if sys.version_info < (3,):
range = xrange
from .. import _degree, _prop, Graph, GraphView, libcore, _get_rng, PropertyMap, \
conv_pickle_state, Vector_size_t, Vector_double, group_vector_property
from .. generation import condensation_graph
from .. stats import label_self_loops
from .. spectral import adjacency
import random
from numpy import *
import numpy
import copy
import collections
from . blockmodel import *
from . util import *
from .. dl_import import dl_import
dl_import("from . import libgraph_tool_inference as libinference")
class EMBlockState(object):
r"""The parametric, undirected stochastic block model state of a given graph.
Parameters
----------
g : :class:`~graph_tool.Graph`
Graph to be modelled.
B : ``int``
Number of blocks (or vertex groups).
init_state : :class:`~graph_tool.inference.BlockState` (optional, default: ``None``)
Optional block state used for initialization.
Notes
-----
This class is intended to be used with :func:`em_infer()` to perform
expectation maximization with belief propagation. See
[decelle_asymptotic_2011]_ for more details.
References
----------
.. [decelle_asymptotic_2011] Aurelien Decelle, Florent Krzakala, Cristopher
Moore, and Lenka Zdeborová, "Asymptotic analysis of the stochastic block
model for modular networks and its algorithmic applications",
Phys. Rev. E 84, 066106 (2011), :doi:`10.1103/PhysRevE.84.066106`,
:arxiv:`1109.3041` """
def __init__(self, g, B, init_state=None):
self.g = g
self.N = g.num_vertices()
self.B = B
self.wr = random.random(B)
self.wr /= self.wr.sum()
ak = 2 * g.num_edges() / g.num_vertices()
self.prs = random.random((B, B))
for r in range(B):
for s in range(r, B):
self.prs[r,s] = self.prs[s,r] = random.random()
self.em_s = g.new_edge_property("vector<double>")
self.em_t = g.new_edge_property("vector<double>")
self.vm = g.new_vertex_property("vector<double>")
self.Z = g.new_edge_property("double")
self.max_E = self.g._get_edge_index_range()
self.oprs = self.prs
self.owr = self.wr
self._state = libinference.make_em_block_state(self, _get_rng())
del self.oprs
del self.owr
# fix average degree
self.prs[:,:] /= self.get_ak() / ak
if init_state is not None:
# init marginals and messages
for v in g.vertices():
r = init_state.b[v]
self.vm[v].a = 1e-6
self.vm[v][r] = 1
self.vm[v].a /= self.vm[v].a.sum()
for e in g.edges():
u, v = e
if u > v:
u, v = v, u
self.em_s[e] = self.vm[u]
self.em_t[e] = self.vm[v]
#init parameters
self.wr[:] = init_state.wr.a
self.wr[:] /= self.wr.sum()
# m includes _twice_ the amount of edges in the diagonal
m = init_state.get_matrix()
for r in range(self.B):
for s in range(r, self.B):
self.prs[r, s] = self.N * m[r, s] / (init_state.wr[r] * init_state.wr[s])
self.prs[s, r] = self.prs[r, s]
def get_vertex_marginals(self):
"""Return the vertex marginals."""
return self.vm
def get_group_sizes(self):
"""Return the group sizes."""
return self.wr
def get_matrix(self):
"""Return probability matrix."""
return self.prs
def get_MAP(self):
"""Return the maximum a posteriori (MAP) estimate of the node partition."""
b = self.g.new_vertex_property("int")
self._state.get_MAP(_prop("v", self.g, b))
return b
def get_fe(self):
"""Return the Bethe free energy."""
return self._state.bethe_fe()
def get_ak(self):
"""Return the model's average degree."""
ak = 0
for r in range(self.B):
for s in range(self.B):
ak += self.prs[r][s] * self.wr[r] * self.wr[s]
return ak
def e_iter(self, max_iter=1000, epsilon=1e-3, verbose=False):
"""Perform 'expectation' iterations, using belief propagation, where the vertex
marginals and edge messages are updated, until convergence according to
``epsilon`` or the maximum number of iterations given by
``max_iter``. If ``verbose == True``, convergence information is
displayed.
The last update delta is returned.
"""
return self._state.bp_iter(epsilon, max_iter, verbose, _get_rng())
def m_iter(self):
"""Perform a single 'maximization' iteration, where the group sizes and
connection probability matrix are updated.
The update delta is returned.
"""
return self._state.learn_iter()
def learn(state, epsilon=1e-3):
"""Perform 'maximization' iterations until convergence according to ``epsilon``.
The last update delta is returned.
"""
delta = epsilon + 1
while delta > epsilon:
delta = self.m_iter()
return delta
def draw(self, **kwargs):
r"""Convenience wrapper to :func:`~graph_tool.draw.graph_draw` that
draws the state of the graph as colors on the vertices and edges."""
b = self.get_MAP()
bv = self.g.new_vertex_property("vector<int32_t>", val=range(self.B))
gradient = self.g.new_ep("double")
gradient = group_vector_property([gradient])
from graph_tool.draw import graph_draw
return graph_draw(self.g,
vertex_fill_color=kwargs.get("vertex_fill_color", b),
vertex_shape=kwargs.get("vertex_shape", "pie"),
vertex_pie_colors=kwargs.get("vertex_pie_colors", bv),
vertex_pie_fractions=kwargs.get("vertex_pie_fractions",
self.vm),
edge_gradient=kwargs.get("edge_gradient", gradient),
**dmask(kwargs, ["vertex_shape", "vertex_pie_colors",
"vertex_pie_fractions",
"vertex_fill_color",
"edge_gradient"]))
def em_infer(state, max_iter=1000, max_e_iter=1, epsilon=1e-3,
learn_first=False, verbose=False):
"""Infer the model parameters and latent variables using the
expectation-maximization (EM) algorithm with initial state given by
``state``.
Parameters
----------
state : model state
State object, e.g. of type :class:`graph_tool.inference.EMBlockState`.
max_iter : ``int`` (optional, default: ``1000``)
Maximum number of iterations.
max_e_iter : ``int`` (optional, default: ``1``)
Maximum number of 'expectation' iterations inside the main loop.
epsilon : ``float`` (optional, default: ``1e-3``)
Convergence criterion.
learn_first : ``bool`` (optional, default: ``False``)
If ``True``, the maximization (a.k.a parameter learning) is converged
before the main loop is run.
verbose : ``bool`` (optional, default: ``True``)
If ``True``, convergence information is displayed.
Returns
-------
delta : ``float``
The last update delta.
niter : ``int``
The total number of iterations.
Examples
--------
.. testsetup:: em_infer
gt.seed_rng(42)
np.random.seed(42)
.. doctest:: em_infer
>>> g = gt.collection.data["polbooks"]
>>> state = gt.EMBlockState(g, B=3)
>>> delta, niter = gt.em_infer(state)
>>> state.draw(pos=g.vp["pos"], output="polbooks_EM_B3.pdf")
<...>
.. testcleanup:: em_infer
state.draw(pos=g.vp["pos"], output="polbooks_EM_B3.png")
.. figure:: polbooks_EM_B3.*
:align: center
"Soft" block partition of a political books network with :math:`B=3`.
References
----------
.. [wiki-EM] "Expectation–maximization algorithm",
https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm
"""
if learn_first:
state.learn(state, epsilon)
niter = 0
delta = epsilon + 1
while delta > epsilon:
delta = state.e_iter(max_iter=max_e_iter, epsilon=epsilon,
verbose=verbose)
delta += state.m_iter()
niter += 1
if niter > max_iter and max_iter > 0:
break
if verbose:
print(niter, delta)
return delta, niter
\ No newline at end of file
......@@ -87,6 +87,17 @@ def mcmc_equilibrate(state, wait=10, nbreaks=2, max_niter=numpy.inf,
the value of ``state.entropy(**args)`` with ``args`` corresponding to
``mcmc_args["entropy_args"]``.
Returns
-------
history : list of tuples of the form ``(iteration, entropy)``
Summary of the MCMC run. This is returned only if ``history == True``.
entropy : ``float``
Current entropy value after run. This is returned only if ``history ==
False``.
nmoves : ``int``
Number of node moves.
References
----------
......@@ -212,6 +223,17 @@ def mcmc_anneal(state, beta_range=(1., 10.), niter=100, history=False,
:func:`~graph_tool.inference.mcmc_equilibrate` is called with the current
value of `beta` (via the ``mcmc_args`` parameter).
Returns
-------
history : list of tuples of the form ``(iteration, beta, entropy)``
Summary of the MCMC run. This is returned only if ``history == True``.
entropy : ``float``
Current entropy value after run. This is returned only if ``history ==
False``.
nmoves : ``int``
Number of node moves.
References
----------
......@@ -302,6 +324,12 @@ def mcmc_multilevel(state, B, r=2, b_cache=None, anneal=False,
the starting point if far away from equilibrium, as discussed in
[peixoto-efficient-2014]_.
Returns
-------
state : The same type as parameter ``state``
This is the final state after the MCMC run.
References
----------
......@@ -483,6 +511,12 @@ def multicanonical_equilibrate(state, m_state, f_range=(1., 1e-6), r=2,
detail, and ``prefix`` is a string that is prepended to the all output
messages.
Returns
-------
niter : ``int``
Number of iterations required for convergence.
References
----------
......@@ -492,6 +526,7 @@ def multicanonical_equilibrate(state, m_state, f_range=(1., 1e-6), r=2,
:arxiv:`cond-mat/0011174`
"""
count = 0
f = f_range[0]
while f >= f_range[1]:
state.multicanonical_sweep(m_state, **overlay(multicanonical_args, f=f))
......@@ -504,7 +539,9 @@ def multicanonical_equilibrate(state, m_state, f_range=(1., 1e-6), r=2,
if callback is not None:
calback(state, m_state)
count += 1
if check_verbose(verbose):
print(verbose_pad(verbose) + "f: %g flatness: %g" % (f, hf))
print(verbose_pad(verbose) +
"iter: %d f: %g flatness: %g" % (count, f, hf))
return m_state
\ No newline at end of file
return count
\ No newline at end of file
......@@ -297,6 +297,16 @@ def minimize_blockmodel_dl(g, B_min=None, B_max=None, b_min=None, b_max=None,
state_args=state_args,
mcmc_multilevel_args=mcmc_multilevel_args)
if B_min is None:
B_min = 1
if B_max is None:
B_max = numpy.inf
Bs = list(b_cache.keys())
for B in Bs:
if B > B_max or B < B_min:
del b_cache[B]
state = bisection_minimize([min_state, max_state], verbose=verbose,
**bisection_args)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment