Commit b9eac005 authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Switch PRNG from std::mt19937 to PCG

parent 0d86003c
......@@ -357,7 +357,7 @@ AC_SUBST(CPPFLAGS)
AC_SUBST(CXXFLAGS)
# extra CPP flags for submodules
[MOD_CPPFLAGS="-I\$(top_srcdir)/src/boost-workaround -DHAVE_CONFIG_H -I\$(top_srcdir)/src/graph -I\$(top_builddir) ${PYTHON_CPPFLAGS} ${BOOST_CPPFLAGS} ${NUMPY_CPPFLAGS} ${EXPAT_CFLAGS}"]
[MOD_CPPFLAGS="-I\$(top_srcdir)/src/boost-workaround -I\$(top_srcdir)/src/pcg-cpp/include -DHAVE_CONFIG_H -I\$(top_srcdir)/src/graph -I\$(top_builddir) ${PYTHON_CPPFLAGS} ${BOOST_CPPFLAGS} ${NUMPY_CPPFLAGS} ${EXPAT_CFLAGS}"]
AC_SUBST(MOD_CPPFLAGS)
# extra LIBADD flags for submodules
......@@ -378,7 +378,7 @@ AC_SUBST(MOD_LDFLAGS)
AX_CREATE_PKGCONFIG_INFO([graph-tool-py${PYTHON_VERSION}.pc], [],
[${PYTHON_LIBS} -l${BOOST_PYTHON_LIB}],
[graph-tool Python library],
[-I${MOD_DIR}/include -I${MOD_DIR}/include/boost-workaround ${PYTHON_CPPFLAGS} ${BOOST_CPPFLAGS} ${SPARSEHASH_CFLAGS} ${NUMPY_CPPFLAGS}],
[-I${MOD_DIR}/include -I${MOD_DIR}/include/boost-workaround -I${MOD_DIR}/include/pcg-cpp ${PYTHON_CPPFLAGS} ${BOOST_CPPFLAGS} ${SPARSEHASH_CFLAGS} ${NUMPY_CPPFLAGS}],
[])
AC_CONFIG_FILES([
......
......@@ -71,6 +71,7 @@ libgraph_tool_core_la_include_HEADERS = \
mpl_nested_loop.hh \
numpy_bind.hh \
openmp_lock.hh \
parallel_rng.hh \
random.hh \
str_repr.hh \
shared_map.hh \
......@@ -91,3 +92,9 @@ libgraph_tool_core_la_workaround_HEADERS = \
../boost-workaround/boost/graph/copy_alt.hpp \
../boost-workaround/boost/graph/stoer_wagner_min_cut.hpp
libgraph_tool_core_la_pcg_cppdir = $(MOD_DIR)/include/pcg-cpp
libgraph_tool_core_la_pcg_cpp_HEADERS = \
../pcg-cpp/include/pcg_extras.hpp \
../pcg-cpp/include/pcg_random.hpp \
../pcg-cpp/include/pcg_uint128.hpp
......@@ -134,18 +134,50 @@ private:
// uniform sampling from containers
template <class Iter, class RNG>
auto&& uniform_sample(Iter begin, const Iter& end, RNG& rng)
auto uniform_sample_iter(Iter begin, const Iter& end, RNG& rng)
{
auto N = end - begin;
auto N = std::distance(begin, end);
std::uniform_int_distribution<size_t> i_rand(0, N - 1);
std::advance(begin, i_rand(rng));
return *begin;
return begin;
}
template <class Container, class RNG>
auto uniform_sample_iter(Container& v, RNG& rng)
{
return uniform_sample_iter(v.begin(), v.end(), rng);
}
template <class Iter, class RNG>
auto&& uniform_sample(Iter&& begin, const Iter& end, RNG& rng)
{
return *uniform_sample_iter(begin, end, rng);
}
template <class Container, class RNG>
auto&& uniform_sample(Container& v, RNG& rng)
{
return uniform_sample(v.begin(), v.end(), rng);
return *uniform_sample_iter(v, rng);
}
template <class Graph, class RNG>
typename boost::graph_traits<Graph>::vertex_descriptor
random_out_neighbor(typename boost::graph_traits<Graph>::vertex_descriptor v,
const Graph& g,
RNG& rng)
{
auto iter = out_edges(v, g);
return target(*uniform_sample_iter(iter.first, iter.second, rng), g);
}
template <class Graph, class RNG>
typename boost::graph_traits<Graph>::vertex_descriptor
random_in_neighbor(typename boost::graph_traits<Graph>::vertex_descriptor v,
const Graph& g,
RNG& rng)
{
auto iter = in_edge_iteratorS<Graph>::get_edges(v, g);
return source(*uniform_sample_iter(iter.first, iter.second, rng), g);
}
} // namespace graph_tool
......
......@@ -103,6 +103,5 @@ libgraph_tool_inference_la_include_HEADERS = \
support/graph_neighbor_sampler.hh \
support/graph_state.hh \
support/int_part.hh \
support/parallel_rng.hh \
support/util.hh \
graph_modularity.hh
......@@ -102,15 +102,14 @@ python::object do_gibbs_sweep_parallel(python::object ogibbs_states,
block_state::dispatch(oblock_states[i], dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -110,15 +110,14 @@ python::object do_mcmc_sweep_parallel(python::object omcmc_states,
block_state::dispatch(oblock_states[i], dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -102,15 +102,14 @@ python::object do_multiflip_mcmc_sweep_parallel(python::object omcmc_states,
block_state::dispatch(oblock_states[i], dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -130,15 +130,14 @@ python::object gibbs_layered_sweep_parallel(python::object ogibbs_states,
block_state::dispatch(dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -138,15 +138,14 @@ python::object mcmc_layered_sweep_parallel(python::object omcmc_states,
block_state::dispatch(dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -129,15 +129,14 @@ python::object multiflip_mcmc_layered_sweep_parallel(python::object omcmc_states
block_state::dispatch(dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -128,15 +128,14 @@ python::object gibbs_layered_overlap_sweep_parallel(python::object ogibbs_states
overlap_block_state::dispatch(dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -137,15 +137,14 @@ python::object mcmc_layered_overlap_sweep_parallel(python::object omcmc_states,
overlap_block_state::dispatch(dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -128,15 +128,14 @@ python::object multiflip_mcmc_layered_overlap_sweep_parallel(python::object omcm
overlap_block_state::dispatch(dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -26,7 +26,7 @@
#include <tuple>
#include "hash_map_wrap.hh"
#include "../support/parallel_rng.hh"
#include "parallel_rng.hh"
#ifdef _OPENMP
#include <omp.h>
......@@ -39,12 +39,12 @@ auto gibbs_sweep(GibbsState state, RNG& rng_)
{
auto& g = state._g;
vector<std::shared_ptr<RNG>> rngs;
std::vector<std::pair<size_t, double>> best_move;
vector<RNG> rngs;
vector<std::pair<size_t, double>> best_move;
if (state._parallel)
{
init_rngs(rngs, rng_);
parallel_rng<RNG>::init(rng_);
init_cache(state._E);
best_move.resize(num_vertices(g));
}
......@@ -84,7 +84,7 @@ auto gibbs_sweep(GibbsState state, RNG& rng_)
(vlist,
[&](size_t, auto v)
{
auto& rng = get_rng(rngs, rng_);
auto& rng = parallel_rng<RNG>::get(rng_);
if (!state._sequential)
v = uniform_sample(vlist, rng);
......
......@@ -26,7 +26,7 @@
#include <tuple>
#include "hash_map_wrap.hh"
#include "../support/parallel_rng.hh"
#include "parallel_rng.hh"
#ifdef _OPENMP
#include <omp.h>
......@@ -117,10 +117,9 @@ auto mcmc_sweep_parallel(MCMCState state, RNG& rng_)
{
auto& g = state._g;
vector<std::shared_ptr<RNG>> rngs;
std::vector<std::pair<size_t, double>> best_move;
init_rngs(rngs, rng_);
parallel_rng<RNG>::init(rng_);
init_cache(state._E);
best_move.resize(num_vertices(g));
......@@ -146,7 +145,7 @@ auto mcmc_sweep_parallel(MCMCState state, RNG& rng_)
(vlist,
[&](size_t, auto v)
{
auto& rng = get_rng(rngs, rng_);
auto& rng = parallel_rng<RNG>::get(rng_);
if (state.node_weight(v) == 0)
return;
......
......@@ -26,7 +26,7 @@
#include <tuple>
#include "hash_map_wrap.hh"
#include "../support/parallel_rng.hh"
#include "parallel_rng.hh"
#ifdef _OPENMP
#include <omp.h>
......@@ -37,10 +37,9 @@ namespace graph_tool
template <class MergeState, class RNG>
auto merge_sweep(MergeState state, RNG& rng_)
{
vector<std::shared_ptr<RNG>> rngs;
if (state._parallel)
{
init_rngs(rngs, rng_);
parallel_rng<RNG>::init(rng_);
init_cache(state._E);
}
......@@ -60,7 +59,7 @@ auto merge_sweep(MergeState state, RNG& rng_)
(state._available,
[&](size_t, auto v)
{
auto& rng = get_rng(rngs, rng_);
auto& rng = parallel_rng<RNG>::get(rng_);
if (state.node_weight(v) == 0)
return;
......
......@@ -105,15 +105,13 @@ python::object gibbs_overlap_sweep_parallel(python::object ogibbs_states,
overlap_block_state::dispatch(oblock_states[i], dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -106,15 +106,14 @@ python::object overlap_mcmc_sweep_parallel(python::object omcmc_states,
overlap_block_state::dispatch(oblock_states[i], dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -106,15 +106,14 @@ python::object overlap_multiflip_mcmc_sweep_parallel(python::object omcmc_states
overlap_block_state::dispatch(oblock_states[i], dispatch);
}
std::vector<std::shared_ptr<rng_t>> rngs;
init_rngs(rngs, rng);
parallel_rng<rng_t>::init(rng);
std::vector<std::tuple<double, size_t, size_t>> rets(N);
#pragma omp parallel for schedule(runtime)
for (size_t i = 0; i < N; ++i)
{
auto& rng_ = get_rng(rngs, rng);
auto& rng_ = parallel_rng<rng_t>::get(rng);
rets[i] = sweeps[i]->run(rng_);
}
......
......@@ -18,34 +18,56 @@
#ifndef PARALLEL_RNG_HH
#define PARALLEL_RNG_HH
#include "config.h"
#include <vector>
template <class RNG>
void init_rngs(std::vector<std::shared_ptr<RNG>>& rngs, RNG& rng)
{
size_t num_threads = 1;
#ifdef _OPENMP
num_threads = omp_get_max_threads();
# include <omp.h>
#endif
for (size_t i = 0; i < num_threads; ++i)
{
std::array<int, RNG::state_size> seed_data;
std::generate_n(seed_data.data(), seed_data.size(), std::ref(rng));
std::seed_seq seq(std::begin(seed_data), std::end(seed_data));
rngs.push_back(std::make_shared<rng_t>(seq));
}
}
template <class RNG>
RNG& get_rng(std::vector<std::shared_ptr<RNG>>& rngs, RNG& rng)
class parallel_rng
{
if (rngs.empty())
return rng;
size_t tid = 0;
public:
static void init(RNG& rng)
{
size_t num_threads = 1;
#ifdef _OPENMP
num_threads = omp_get_max_threads();
#endif
for (size_t i = _rngs.size(); i < num_threads - 1; ++i)
{
// std::array<int, RNG::state_size> seed_data;
// std::generate_n(seed_data.data(), seed_data.size(), std::ref(rng));
// std::seed_seq seq(std::begin(seed_data), std::end(seed_data));
// rngs.emplace_back(seq);
_rngs.emplace_back(rng);
_rngs.back().set_stream(i + 1);
}
}
static void clear()
{
_rngs.clear();
}
static RNG& get(RNG& rng)
{
size_t tid = 0;
#ifdef _OPENMP
tid = omp_get_thread_num();
tid = omp_get_thread_num();
#endif
return *rngs[tid];
if (tid == 0)
return rng;
return _rngs[tid - 1];
}
private:
static std::vector<RNG> _rngs;
};
template <class RNG>
std::vector<RNG> parallel_rng<RNG>::_rngs;
#endif // PARALLEL_RNG_HH
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment