Commit 918ae899 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Initial implementation of partition modes

parent 2d134e6d
......@@ -31,6 +31,10 @@ libgraph_tool_inference_la_SOURCES = \
partition_centroid/graph_partition_centroid.cc \
partition_centroid/graph_partition_centroid_mcmc.cc \
partition_centroid/graph_partition_centroid_multiflip_mcmc.cc \
partition_modes/graph_partition_mode.cc \
partition_modes/graph_partition_mode_clustering.cc \
partition_modes/graph_partition_mode_clustering_mcmc.cc \
partition_modes/graph_partition_mode_clustering_multiflip_mcmc.cc \
overlap/graph_blockmodel_overlap.cc \
overlap/graph_blockmodel_overlap_exhaustive.cc \
overlap/graph_blockmodel_overlap_gibbs.cc \
......@@ -100,6 +104,8 @@ libgraph_tool_inference_la_include_HEADERS = \
blockmodel/graph_blockmodel_util.hh \
blockmodel/graph_blockmodel_weights.hh \
partition_centroid/graph_partition_centroid.hh \
partition_modes/graph_partition_mode.hh \
partition_modes/graph_partition_mode_clustering.hh \
overlap/graph_blockmodel_overlap.hh \
overlap/graph_blockmodel_overlap_mcmc_bundled.hh \
overlap/graph_blockmodel_overlap_util.hh \
......
......@@ -2584,6 +2584,12 @@ public:
assert(size_t(_wr[r]) == wr[r]);
}
template <class V>
void push_state(V&) {}
void pop_state() {}
void store_next_state(size_t) {}
void clear_next_state() {}
//private:
typedef typename
std::conditional<is_directed_::apply<g_t>::type::value,
......
......@@ -130,6 +130,7 @@ struct MCMC
auto& back = _bstack.back();
for (auto v : vs)
back.emplace_back(v, _state._b[v]);
_state.push_state(vs);
_push_b_dispatch(std::forward<Vs>(vvs)...);
}
......@@ -150,6 +151,7 @@ struct MCMC
move_vertex(v, s);
}
_bstack.pop_back();
_state.pop_state();
}
std::vector<size_t> _rlist;
......@@ -214,10 +216,10 @@ struct MCMC
void move_vertex(size_t v, size_t r)
{
size_t s = _state._b[v];
_state.move_vertex(v, r);
if (s == r)
return;
remove_element(_groups[s], _vpos, v);
_state.move_vertex(v, r);
add_element(_groups[r], _vpos, v);
_nmoves++;
}
......@@ -281,11 +283,11 @@ struct MCMC
stage_split_random(std::vector<size_t>& vs, size_t r, size_t s, RNG& rng)
{
std::array<size_t, 2> rt = {null_group, null_group};
std::array<double, 2> ps;
double dS = 0;
std::uniform_real_distribution<> unit(0, 1);
double p = unit(rng);
double p0 = unit(rng);
std::bernoulli_distribution sample(p0);
std::shuffle(vs.begin(), vs.end(), rng);
for (auto v : vs)
......@@ -311,12 +313,6 @@ struct MCMC
continue;
}
ps[0] = log(p);
ps[1] = log1p(-p);
double Z = log_sum(ps[0], ps[1]);
double p0 = ps[0] - Z;
std::bernoulli_distribution sample(exp(p0));
if (sample(rng))
{
dS += _state.virtual_move(v, _state._b[v], rt[0],
......@@ -417,8 +413,7 @@ struct MCMC
}
template <class RNG, bool forward=true>
std::tuple<size_t, double, double> split(size_t r, size_t s,
RNG& rng)
std::tuple<size_t, double, double> split(size_t r, size_t s, RNG& rng)
{
auto vs = _groups[r];
......@@ -602,6 +597,7 @@ struct MCMC
_dS = _a = 0;
_vs.clear();
_nmoves = 0;
_state.clear_next_state();
move_t move = _move_sampler.sample(rng);
......@@ -668,7 +664,10 @@ struct MCMC
<< " " << -_dS + pb - pf << endl;
for (auto v : _vs)
{
_bnext[v] = _state._b[v];
_state.store_next_state(v);
}
pop_b();
_state._egroups_update = true;
......@@ -699,7 +698,10 @@ struct MCMC
_dS = merge(r, s);
for (auto v : _vs)
{
_bnext[v] = _state._b[v];
_state.store_next_state(v);
}
pop_b();
_state._egroups_update = true;
......@@ -748,6 +750,7 @@ struct MCMC
auto v = get<0>(vb);
_vs.push_back(v);
_bnext[v] = _state._b[v];
_state.store_next_state(v);
}
while (!_bstack.empty())
......
......@@ -127,6 +127,10 @@ extern void export_pseudo_ising_mcmc_h();
extern void export_vi_center_state();
extern void export_vi_center_mcmc();
extern void export_vi_multiflip_mcmc();
extern void export_partition_mode();
extern void export_mode_cluster_state();
extern void export_mode_cluster_mcmc();
extern void export_mode_cluster_multiflip_mcmc();
BOOST_PYTHON_MODULE(libgraph_tool_inference)
{
......@@ -190,6 +194,10 @@ BOOST_PYTHON_MODULE(libgraph_tool_inference)
export_vi_center_state();
export_vi_center_mcmc();
export_vi_multiflip_mcmc();
export_partition_mode();
export_mode_cluster_state();
export_mode_cluster_mcmc();
export_mode_cluster_multiflip_mcmc();
def("vector_map", vector_map<int32_t>);
def("vector_map64", vector_map<int64_t>);
......
......@@ -1262,6 +1262,11 @@ public:
void coupled_resize_vertex(size_t) { }
void update_block_edge(const GraphInterface::edge_t&,
const std::vector<double>&) { }
template <class V>
void push_state(V&) {}
void pop_state() {}
void store_next_state(size_t) {}
void clear_next_state() {}
//private:
typedef typename
......
......@@ -36,7 +36,7 @@ typedef multi_array_ref<int32_t,2> bs_t;
typedef multi_array_ref<int32_t,1> b_t;
#define BLOCK_STATE_params \
((g, &, all_graph_views, 1)) \
((g, &, always_directed_never_reversed, 1)) \
((_abg, &, boost::any&, 0)) \
((bs,, bs_t, 0)) \
((b,, b_t, 0))
......@@ -283,6 +283,12 @@ public:
return true;
}
template <class V>
void push_state(V&) {}
void pop_state() {}
void store_next_state(size_t) {}
void clear_next_state() {}
};
} // graph_tool namespace
......
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2019 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "graph_tool.hh"
#include "random.hh"
#include "numpy_bind.hh"
#include <boost/python.hpp>
#include "graph_partition_mode.hh"
using namespace boost;
using namespace graph_tool;
void export_partition_mode()
{
using namespace boost::python;
class_<PartitionModeState>
("PartitionModeState", init<size_t>())
.def("add_partition",
+[](PartitionModeState& state, object ob, bool relabel)
{
auto b = get_array<int32_t, 1>(ob);
state.add_partition(b, relabel);
})
.def("remove_partition",
+[](PartitionModeState& state, size_t i)
{
state.remove_partition(i);
})
.def("replace_partitions", &PartitionModeState::replace_partitions)
.def("get_marginal",
+[](PartitionModeState& state,
GraphInterface& gi, boost::any obm)
{
run_action<>()
(gi, [&](auto& g, auto bm)
{
state.get_marginal(g, bm);
}, vertex_scalar_vector_properties())(obm);
})
.def("get_partition",
+[](PartitionModeState& state, size_t i)
{
auto b = state.get_partition(i);
return wrap_multi_array_not_owned(b);
})
.def("get_partitions",
+[](PartitionModeState& state)
{
python::dict obs;
auto& bs = state.get_partitions();
for (auto& kb : bs)
{
auto b = state.get_partition(kb.first);
obs[kb.first] = wrap_multi_array_not_owned(b);
}
return obs;
})
.def("relabel", &PartitionModeState::relabel)
.def("entropy", &PartitionModeState::entropy)
.def("posterior_entropy", &PartitionModeState::posterior_entropy);
}
// graph-tool -- a general graph modification and manipulation thingy
//
// Copyright (C) 2006-2019 Tiago de Paula Peixoto <tiago@skewed.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef GRAPH_PARTITION_MODE_HH
#define GRAPH_PARTITION_MODE_HH
#include "config.h"
#include <vector>
#include "../blockmodel/graph_blockmodel_util.hh"
#include <boost/graph/maximum_weighted_matching.hpp>
#include "idx_map.hh"
namespace graph_tool
{
template <class Graph, class VMap, class EMap, class VX, class VY>
void get_contingency_graph(Graph& g, VMap label, EMap mrs, VX& x, VY& y)
{
typedef typename graph_traits<Graph>::vertex_descriptor vertex_t;
typedef typename VX::value_type xv_t;
typedef typename VY::value_type yv_t;
idx_map<int, vertex_t> x_vertices, y_vertices;
auto get_v =
[&](auto& vs, auto val)
{
auto iter = vs.find(val);
if (iter == vs.end())
{
auto v = add_vertex(g);
vs[val] = v;
return v;
}
return iter->second;
};
for (auto&& r : x)
{
if constexpr (std::is_scalar_v<xv_t>)
{
label[get_v(x_vertices, r)] = r;
}
else
{
for (auto& rn : r)
label[get_v(x_vertices, rn.first)] = rn.first;
}
}
for (auto&& s : y)
{
if constexpr (std::is_scalar_v<yv_t>)
{
label[get_v(y_vertices, s)] = s;
}
else
{
for (auto& sn : s)
label[get_v(y_vertices, sn.first)] = sn.first;
}
}
auto add_mrs
= [&](auto i, auto u)
{
if constexpr (std::is_scalar_v<yv_t>)
{
auto s = y[i];
auto v = get_v(y_vertices, s);
auto e = edge(u, v, g);
if (!e.second)
e = add_edge(u, v, g);
mrs[e.first]++;
}
else
{
for (auto& sn : y[i])
{
auto v = get_v(y_vertices, sn.first);
auto e = edge(u, v, g);
if (!e.second)
e = add_edge(u, v, g);
mrs[e.first] += safelog_fast(sn.second + 1);
}
}
};
for (size_t i = 0; i < x.size(); ++i)
{
if constexpr (std::is_scalar_v<xv_t>)
{
auto r = x[i];
auto u = get_v(x_vertices, r);
add_mrs(i, u);
}
else
{
for (auto& rn : x[i])
{
auto u = get_v(x_vertices, rn.first);
add_mrs(i, u);
}
}
}
}
class PartitionModeState
{
public:
PartitionModeState(size_t N)
: _N(N), _nr(N), _count(N) {}
typedef multi_array_ref<int32_t,1> b_t;
size_t add_partition(b_t& b, bool relabel)
{
if (relabel)
relabel_partition(b);
for (size_t i = 0; i < b.size(); ++i)
{
size_t r = b[i];
_nr[i][r]++;
if (r >= _count.size())
_count.resize(r + 1);
_count[r]++;
if (_count[r] == 1)
{
_B++;
_free_idxs.erase(r);
}
if (r > _rmax)
_rmax = r;
}
size_t j;
if (_free_pos.empty())
{
j = _max_pos++;
}
else
{
j = _free_pos.back();
_free_pos.pop_back();
}
_bs[j] = b.data();
return j;
}
void remove_partition(size_t j)
{
assert(_bs.find(j) != _bs.end());
auto b = b_t(_bs[j], extents[_N]);
for (size_t i = 0; i < b.size(); ++i)
{
auto r = b[i];
auto iter = _nr[i].find(r);
iter->second--;
if (iter->second == 0)
_nr[i].erase(iter);
_count[r]--;
if (_count[r] == 0)
{
_B--;
_free_idxs.insert(r);
}
}
_bs.erase(j);
_free_pos.push_back(j);
}
void replace_partitions()
{
std::vector<size_t> pos;
for (auto ib : _bs)
pos.push_back(ib.first);
for (auto j : pos)
{
auto b = get_partition(j);
double dS = virtual_remove_partition(b);
remove_partition(j);
dS += virtual_add_partition(b);
add_partition(b, dS < 0);
}
}
bool has_partition(size_t j, b_t& b)
{
if (_bs.find(j) == _bs.end())
return false;
return (b == get_partition(j));
}
void rebuild_nr()
{
_B = 0;
_rmax = 0;
for (auto& x : _nr)
x.clear();
std::fill(_count.begin(), _count.end(), 0);
for (auto jb : _bs)
{
auto b = get_partition(jb.first);
for (size_t i = 0; i < b.size(); ++i)
{
size_t r = b[i];
_nr[i][r]++;
_count[r]++;
if (_count[r] == 1)
{
_B++;
_free_idxs.erase(r);
}
if (r > _rmax)
_rmax = r;
}
}
for (size_t r = 0; r < _rmax; ++r)
{
if (_count[r] == 0)
_free_idxs.insert(r);
}
}
void relabel()
{
std::vector<int> labels(_N), map(_N);
std::iota(labels.begin(), labels.end(), 0);
std::sort(labels.begin(), labels.end(),
[&](auto r, auto s) { return _count[r] > _count[s]; });
for (size_t i = 0; i < _N; ++i)
map[labels[i]] = i;
for (auto jb : _bs)
{
auto b = get_partition(jb.first);
for (size_t i = 0; i < _N; ++i)
b[i] = map[b[i]];
}
rebuild_nr();
}
template <class BT>
void relabel_partition(BT& b)
{
idx_map<int32_t, size_t> rpos;
for (size_t i = 0; i < b.size(); ++i)
{
auto r = b[i];
auto iter = rpos.find(r);
if (iter == rpos.end())
b[i] = rpos[r] = rpos.size();
else
b[i] = iter->second;
}
if (_bs.empty())
return;
adj_list<> g;
typename vprop_map_t<int32_t>::type label(get(vertex_index_t(), g));
typename eprop_map_t<long double>::type mrs(get(edge_index_t(), g));
get_contingency_graph(g, label, mrs, b, _nr);
typedef typename graph_traits<adj_list<>>::vertex_descriptor vertex_t;
typename vprop_map_t<vertex_t>::type match(get(vertex_index_t(), g));
auto u = undirected_adaptor<adj_list<>>(g);
maximum_weighted_matching(u, mrs, match);
idx_map<int32_t, size_t> b_vertices;
for (auto v : vertices_range(g))
{
if (v >= rpos.size())
break;
b_vertices[label[v]] = v;
}
auto ipos = _free_idxs.begin();
rpos.clear();
for (size_t i = 0; i < b.size(); ++i)
{
auto r = b[i];
auto v = match[b_vertices[r]];
if (v != graph_traits<adj_list<>>::null_vertex())
{
b[i] = label[v];
}
else
{
auto iter = rpos.find(r);
if (iter == rpos.end())
{
if (ipos == _free_idxs.end())
ipos = _free_idxs.insert(++_rmax).first;
rpos[r] = b[i] = *(ipos++);
}
else
{
b[i] = iter->second;