Commit d0d247ea authored by Tiago Peixoto's avatar Tiago Peixoto
Browse files

Implement new binary graph format "gt"

The new format fully supports all property map types present in
graph-tool and should be much faster and produce smaller files than the
other text-based formats.
parent 531add6b
......@@ -430,18 +430,19 @@ dictionaries:
Graph I/O
---------
Graphs can be saved and loaded in three formats: `graphml
Graphs can be saved and loaded in four formats: `graphml
<http://graphml.graphdrawing.org/>`_, `dot
<http://www.graphviz.org/doc/info/lang.html>`_ and `gml
<http://www.fim.uni-passau.de/en/fim/faculty/chairs/theoretische-informatik/projects.html>`_.
``Graphml`` is the default and preferred format, since it is by far the
most complete. The ``dot`` and ``gml`` formats are fully supported, but
since they contain no precise type information, all properties are read
as strings (or also as double, in the case of ``gml``), and must be
converted by hand. Therefore you should always use graphml, since it
performs an exact bit-for-bit representation of all supported
:ref:`sec_property_maps`, except when interfacing with other software, or
existing data, which uses ``dot`` or ``gml``.
<http://www.graphviz.org/doc/info/lang.html>`_, `gml
<http://www.fim.uni-passau.de/en/fim/faculty/chairs/theoretische-informatik/projects.html>`_
and a custom binary format ``gt``. The binary format ``gt`` and
``graphml`` are the preferred formats, since they are by far the most
complete. The ``dot`` and ``gml`` formats are fully supported, but since
they contain no precise type information, all properties are read as
strings (or also as double, in the case of ``gml``), and must be
converted by hand. Therefore you should always use either ``gt`` or
``graphml``, since it performs an exact bit-for-bit representation of
all supported :ref:`sec_property_maps`, except when interfacing with
other software, or existing data, which uses ``dot`` or ``gml``.
A graph can be saved or loaded to a file with the :attr:`~graph_tool.Graph.save`
and :attr:`~graph_tool.Graph.load` methods, which take either a file name or a
......
......@@ -106,9 +106,9 @@ bool read_graphviz(const std::string& str,
MutableGraph& graph, boost::dynamic_properties& dp,
std::string const& node_id = "node_id",
bool ignore_directedness = false,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>()) {
const std::unordered_set<std::string>& ignore_vp = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_ep = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_gp = std::unordered_set<std::string>()) {
boost::detail::graph::mutate_graph_impl<MutableGraph> mg(graph, dp, node_id,
ignore_vp, ignore_ep,
ignore_gp);
......@@ -120,9 +120,9 @@ bool read_graphviz(InputIter begin, InputIter end,
MutableGraph& graph, boost::dynamic_properties& dp,
std::string const& node_id = "node_id",
bool ignore_directedness = false,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>()) {
const std::unordered_set<std::string>& ignore_vp = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_ep = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_gp = std::unordered_set<std::string>()) {
return read_graphviz(std::string(begin, end), graph, dp, node_id,
ignore_directedness, ignore_vp, ignore_ep, ignore_gp);
}
......
......@@ -24,12 +24,20 @@
#include <boost/mpl/for_each.hpp>
#include <boost/python/object.hpp>
#include <boost/bind.hpp>
#include <string>
#include <exception>
#include <set>
#include <unordered_set>
namespace boost
{
// Base64 Encoding
std::string base64_encode(const std::string& s);
std::string base64_decode(const std::string& s);
/////////////////////////////////////////////////////////////////////////////
// Graph reader exceptions
/////////////////////////////////////////////////////////////////////////////
......@@ -86,9 +94,9 @@ class mutate_graph_impl : public mutate_graph
public:
mutate_graph_impl(MutableGraph& g, dynamic_properties& dp,
bool ignore_directedness,
std::set<std::string> ignore_vp,
std::set<std::string> ignore_ep,
std::set<std::string> ignore_gp)
std::unordered_set<std::string> ignore_vp,
std::unordered_set<std::string> ignore_ep,
std::unordered_set<std::string> ignore_gp)
: m_g(g), m_dp(dp), m_ignore_directedness(ignore_directedness),
m_is_directed(false), m_ignore_vp(ignore_vp),
m_ignore_ep(ignore_ep), m_ignore_gp(ignore_gp) { }
......@@ -232,7 +240,15 @@ public:
}
else
{
put(m_name, m_dp, m_key, lexical_cast<Value>(m_value));
if (is_same<Value, boost::python::object>::value)
{
std::string val = base64_decode(m_value);
put(m_name, m_dp, m_key, lexical_cast<Value>(val));
}
else
{
put(m_name, m_dp, m_key, lexical_cast<Value>(m_value));
}
}
m_type_found = true;
}
......@@ -252,9 +268,9 @@ protected:
dynamic_properties& m_dp;
bool m_ignore_directedness;
bool m_is_directed;
std::set<std::string> m_ignore_vp;
std::set<std::string> m_ignore_ep;
std::set<std::string> m_ignore_gp;
std::unordered_set<std::string> m_ignore_vp;
std::unordered_set<std::string> m_ignore_ep;
std::unordered_set<std::string> m_ignore_gp;
typedef mpl::vector<uint8_t, int16_t, int32_t, int64_t, double, long double,
std::vector<uint8_t>, std::vector<int32_t>,
std::vector<int64_t>, std::vector<double>,
......@@ -278,9 +294,9 @@ template<typename MutableGraph>
bool
read_graphml(std::istream& in, MutableGraph& g, dynamic_properties& dp,
bool store_ids, bool integer_vertices, bool ignore_directedness,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
std::unordered_set<std::string> ignore_vp = std::unordered_set<std::string>(),
std::unordered_set<std::string> ignore_ep = std::unordered_set<std::string>(),
std::unordered_set<std::string> ignore_gp = std::unordered_set<std::string>())
{
mutate_graph_impl<MutableGraph> mg(g, dp, ignore_directedness, ignore_vp,
ignore_ep, ignore_gp);
......@@ -319,6 +335,8 @@ struct get_string
if (v != 0)
{
sval = lexical_cast<std::string>(*v);
if (is_same<ValueType, boost::python::object>::value)
sval = base64_encode(sval);
}
}
};
......
......@@ -13,6 +13,7 @@
#include <boost/config.hpp>
#include <string>
#include <map>
#include <unordered_set>
#include <iostream>
#include <fstream>
#include <stdio.h> // for FILE
......@@ -724,9 +725,9 @@ class mutate_graph_impl : public mutate_graph
public:
mutate_graph_impl(MutableGraph& graph, dynamic_properties& dp,
std::string node_id_prop,
std::set<std::string> ignore_vp,
std::set<std::string> ignore_ep,
std::set<std::string> ignore_gp)
const std::unordered_set<std::string>& ignore_vp,
const std::unordered_set<std::string>& ignore_ep,
const std::unordered_set<std::string>& ignore_gp)
: graph_(graph), dp_(dp), node_id_prop_(node_id_prop),
m_ignore_vp(ignore_vp), m_ignore_ep(ignore_ep),
m_ignore_gp(ignore_gp) { }
......@@ -799,9 +800,9 @@ class mutate_graph_impl : public mutate_graph
std::string node_id_prop_;
std::map<node_t, bgl_vertex_t> bgl_nodes;
std::map<edge_t, bgl_edge_t> bgl_edges;
std::set<std::string> m_ignore_vp;
std::set<std::string> m_ignore_ep;
std::set<std::string> m_ignore_gp;
const std::unordered_set<std::string>& m_ignore_vp;
const std::unordered_set<std::string>& m_ignore_ep;
const std::unordered_set<std::string>& m_ignore_gp;
};
BOOST_GRAPH_DECL
......@@ -815,17 +816,17 @@ bool read_graphviz(std::istream& in, MutableGraph& graph,
dynamic_properties& dp,
std::string const& node_id = "node_id",
bool ignore_directedness = false,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
const std::unordered_set<std::string>& ignore_vp = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_ep = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_gp = std::unordered_set<std::string>())
{
std::string data;
in >> std::noskipws;
std::copy(std::istream_iterator<char>(in),
std::istream_iterator<char>(),
std::back_inserter(data));
return read_graphviz(data,graph,dp,node_id,ignore_directedness,ignore_vp,
ignore_ep,ignore_gp);
return read_graphviz(data, graph, dp, node_id, ignore_directedness, ignore_vp,
ignore_ep, ignore_gp);
}
} // namespace boost
......
......@@ -46,6 +46,7 @@ libgraph_tool_core_la_include_HEADERS = \
graph_adaptor.hh \
graph_exceptions.hh \
graph_filtering.hh \
graph_io_binary.hh \
graph_properties.hh \
graph_properties_group.hh \
graph_python_interface.hh \
......
......@@ -70,9 +70,9 @@ class gml_state
{
public:
gml_state(Graph& g, dynamic_properties& dp,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
const std::unordered_set<std::string>& ignore_vp = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_ep = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_gp = std::unordered_set<std::string>())
: _g(g), _dp(dp), _directed(false), _ignore_vp(ignore_vp),
_ignore_ep(ignore_ep), _ignore_gp(ignore_gp) {}
......@@ -227,9 +227,9 @@ private:
typedef std::unordered_map<std::string, val_t> prop_list_t;
vector<pair<std::string, prop_list_t> > _stack;
std::set<std::string> _ignore_vp;
std::set<std::string> _ignore_ep;
std::set<std::string> _ignore_gp;
const std::unordered_set<std::string>& _ignore_vp;
const std::unordered_set<std::string>& _ignore_ep;
const std::unordered_set<std::string>& _ignore_gp;
};
......@@ -237,9 +237,9 @@ template <class Iterator, class Graph, class Skipper>
struct gml : spirit::qi::grammar<Iterator, void(), Skipper>
{
gml(Graph& g, dynamic_properties& dp,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
const std::unordered_set<std::string>& ignore_vp = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_ep = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_gp = std::unordered_set<std::string>())
: gml::base_type(start), _state(g, dp, ignore_vp, ignore_ep, ignore_gp)
{
using namespace spirit;
......@@ -276,9 +276,9 @@ struct gml : spirit::qi::grammar<Iterator, void(), Skipper>
template <class Iterator, class Graph, class Skipper>
bool parse_grammar(Iterator begin, Iterator end, Graph& g,
dynamic_properties& dp, Skipper skip,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
const std::unordered_set<std::string>& ignore_vp = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_ep = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_gp = std::unordered_set<std::string>())
{
using namespace spirit;
gml<spirit::istream_iterator, Graph, Skipper> parser(g, dp, ignore_vp,
......@@ -292,9 +292,9 @@ bool parse_grammar(Iterator begin, Iterator end, Graph& g,
template <class Graph>
bool read_gml(istream& in, Graph& g, dynamic_properties& dp,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
const std::unordered_set<std::string>& ignore_vp = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_ep = std::unordered_set<std::string>(),
const std::unordered_set<std::string>& ignore_gp = std::unordered_set<std::string>())
{
using namespace spirit;
......
......@@ -81,6 +81,8 @@ python::object lexical_cast<python::object,string>(const string& ps)
}
}
#include "graph_io_binary.hh"
// the following source & sink provide iostream access to python file-like
// objects
......@@ -146,6 +148,21 @@ struct get_python_property
.base()));
} catch (bad_cast&) {}
}
template <class ValueType, class IndexMap>
void operator()(ValueType, IndexMap, boost::any& map,
boost::python::object& pmap) const
{
typedef typename property_map_type::apply<ValueType, IndexMap>::type
map_t;
try
{
PythonPropertyMap<map_t> opmap(any_cast<map_t>(map));
pmap = boost::python::object(opmap);
}
catch (bad_any_cast&) {}
}
};
template <class IndexMap>
......@@ -158,6 +175,16 @@ boost::python::object find_property_map(dynamic_property_map& map, IndexMap)
return pmap;
}
template <class IndexMap>
boost::python::object find_property_map(boost::any& map, IndexMap)
{
boost::python::object pmap;
boost::mpl::for_each<value_types>(std::bind(get_python_property(),
std::placeholders::_1, IndexMap(), std::ref(map),
std::ref(pmap)));
return pmap;
}
// this functor will check whether a value is of a specific type, create a
// corresponding vector_property_map and add the value to it
......@@ -284,9 +311,9 @@ struct graph_traits<FakeUndirGraph<Graph> >
// ReadFromFile(file, pfile, format)
//==============================================================================
void build_stream
(boost::iostreams::filtering_stream<boost::iostreams::input>& stream,
const string& file, boost::python::object& pfile, std::ifstream& file_stream)
void build_stream(boost::iostreams::filtering_stream<boost::iostreams::input>& stream,
const string& file, boost::python::object& pfile,
std::ifstream& file_stream)
{
stream.reset();
if (file == "-")
......@@ -321,7 +348,7 @@ boost::python::tuple GraphInterface::ReadFromFile(string file,
boost::python::list ignore_ep,
boost::python::list ignore_gp)
{
if (format != "dot" && format != "xml" && format != "gml")
if (format != "gt" && format != "dot" && format != "xml" && format != "gml")
throw ValueException("error reading from file '" + file +
"': requested invalid format '" + format + "'");
try
......@@ -331,7 +358,7 @@ boost::python::tuple GraphInterface::ReadFromFile(string file,
std::ifstream file_stream;
build_stream(stream, file, pfile, file_stream);
set<string> ivp, iep, igp;
std::unordered_set<std::string> ivp, iep, igp;
for (int i = 0; i < len(ignore_vp); ++i)
ivp.insert(boost::python::extract<string>(ignore_vp[i]));
for (int i = 0; i < len(ignore_ep); ++i)
......@@ -353,18 +380,34 @@ boost::python::tuple GraphInterface::ReadFromFile(string file,
else if (format == "gml")
_directed = read_gml(stream, *_mg, dp, ivp, iep, igp);
boost::python::dict vprops, eprops, gprops;
for(typeof(dp.begin()) iter = dp.begin(); iter != dp.end(); ++iter)
if (format == "gt")
{
if (iter->second->key() == typeid(vertex_t))
vprops[iter->first] = find_property_map(*iter->second,
_vertex_index);
else if (iter->second->key() == typeid(edge_t))
eprops[iter->first] = find_property_map(*iter->second,
_edge_index);
else
gprops[iter->first] = find_property_map(*iter->second,
_graph_index);
vector<pair<string, boost::any>> agprops, avprops, aeprops;
_directed = read_graph(stream, *_mg, agprops, avprops, aeprops, igp,
ivp, iep);
for (auto& p : agprops)
gprops[p.first] = find_property_map(p.second, _graph_index);
for (auto& p : avprops)
vprops[p.first] = find_property_map(p.second, _vertex_index);
for (auto& p : aeprops)
eprops[p.first] = find_property_map(p.second, _edge_index);
}
else
{
for(typeof(dp.begin()) iter = dp.begin(); iter != dp.end(); ++iter)
{
if (iter->second->key() == typeid(vertex_t))
vprops[iter->first] = find_property_map(*iter->second,
_vertex_index);
else if (iter->second->key() == typeid(edge_t))
eprops[iter->first] = find_property_map(*iter->second,
_edge_index);
else
gprops[iter->first] = find_property_map(*iter->second,
_graph_index);
}
}
return boost::python::make_tuple(vprops, eprops, gprops);
}
......@@ -439,6 +482,18 @@ struct write_to_file_fake_undir: public write_to_file
}
};
struct write_to_binary_file
{
template <class Graph, class IndexMap>
void operator()(ostream& stream, Graph& g, IndexMap index_map, size_t N,
bool directed, vector<pair<string, boost::any >> & gprops,
vector<pair<string, boost::any >> & vprops,
vector<pair<string, boost::any >> & eprops) const
{
write_graph(g, index_map, N, directed, gprops, vprops, eprops, stream);
}
};
struct generate_index
{
template <class Graph, class IndexMap>
......@@ -454,7 +509,7 @@ struct generate_index
void GraphInterface::WriteToFile(string file, boost::python::object pfile,
string format, boost::python::list props)
{
if (format != "xml" && format != "dot" && format != "gml")
if (format != "gt" && format != "xml" && format != "dot" && format != "gml")
throw ValueException("error writing to file '" + file +
"': requested invalid format '" + format + "'");
try
......@@ -484,57 +539,112 @@ void GraphInterface::WriteToFile(string file, boost::python::object pfile,
}
stream.exceptions(ios_base::badbit | ios_base::failbit);
dynamic_properties dp;
for (int i = 0; i < len(props); ++i)
if (format == "gt")
{
dynamic_property_map* pmap =
any_cast<dynamic_property_map*>
(boost::python::extract<boost::any>
(props[i][1].attr("get_dynamic_map")()));
dp.insert(boost::python::extract<string>(props[i][0]),
DP_SMART_PTR<dynamic_property_map>(pmap));
}
typedef property_map_types::apply<value_types,
GraphInterface::graph_index_map_t>::type
graph_properties;
if (IsVertexFilterActive())
{
// vertex indexes must be between the [0, HardNumVertices(g)] range
typedef std::unordered_map<vertex_t, size_t> map_t;
map_t vertex_to_index;
associative_property_map<map_t> index_map(vertex_to_index);
run_action<>()(*this, boost::bind<void>(generate_index(),
_1, index_map))();
if (format == "dot")
graphviz_insert_index(dp, index_map);
if (GetDirected())
run_action<detail::always_directed>()
(*this, boost::bind<void>(write_to_file(),
boost::ref(stream), _1,
index_map, boost::ref(dp),
format))();
vector<pair<string, boost::any>> agprops, avprops, aeprops;
for (int i = 0; i < python::len(props); ++i)
{
string name = python::extract<string>(props[i][0])();
boost::any p = python::extract<boost::any>(props[i][1].attr("get_map")())();
if (belongs<graph_properties>()(p))
agprops.push_back(make_pair(name, p));
if (belongs<vertex_properties>()(p))
avprops.push_back(make_pair(name, p));
if (belongs<edge_properties>()(p))
aeprops.push_back(make_pair(name, p));
}
bool directed = _directed;
_directed = true;
if (IsVertexFilterActive())
{
// vertex indexes must be between the [0, HardNumVertices(g)] range
vector_property_map<size_t> index_map(_vertex_index);
run_action<>()(*this, std::bind(generate_index(),
std::placeholders::_1,
index_map))();
run_action<>()(*this, std::bind(write_to_binary_file(),
std::ref(stream),
std::placeholders::_1,
index_map,
GetNumberOfVertices(),
directed,
std::ref(agprops),
std::ref(avprops),
std::ref(aeprops)))();
}
else
run_action<detail::never_directed>()
(*this,boost::bind<void>(write_to_file_fake_undir(),
boost::ref(stream), _1, index_map,
boost::ref(dp), format))();
{
run_action<>()(*this, std::bind(write_to_binary_file(),
std::ref(stream),
std::placeholders::_1,
_vertex_index,
GetNumberOfVertices(),
directed,
std::ref(agprops),
std::ref(avprops),
std::ref(aeprops)))();
}
_directed = directed;
}
else
{
if (format == "dot")
graphviz_insert_index(dp, _vertex_index);
if (GetDirected())
run_action<detail::always_directed>()
(*this, boost::bind<void>(write_to_file(),
boost::ref(stream), _1,
_vertex_index, boost::ref(dp),
format))();
dynamic_properties dp;
for (int i = 0; i < len(props); ++i)
{
dynamic_property_map* pmap =
any_cast<dynamic_property_map*>
(boost::python::extract<boost::any>
(props[i][1].attr("get_dynamic_map")()));
dp.insert(boost::python::extract<string>(props[i][0]),
DP_SMART_PTR<dynamic_property_map>(pmap));
}
if (IsVertexFilterActive())
{
// vertex indexes must be between the [0, HardNumVertices(g)] range
vector_property_map<size_t> index_map(_vertex_index);
run_action<>()(*this, boost::bind<void>(generate_index(),
_1, index_map))();
if (format == "dot")
graphviz_insert_index(dp, index_map);
if (GetDirected())
run_action<detail::always_directed>()
(*this, boost::bind<void>(write_to_file(),
boost::ref(stream), _1,
index_map, boost::ref(dp),
format))();
else
run_action<detail::never_directed>()
(*this,boost::bind<void>(write_to_file_fake_undir(),
boost::ref(stream), _1, index_map,
boost::ref(dp), format))();
}
else
run_action<detail::never_directed>()
(*this,boost::bind<void>(write_to_file_fake_undir(),
boost::ref(stream), _1,
_vertex_index, boost::ref(dp),
format))();
{
if (format == "dot")
graphviz_insert_index(dp, _vertex_index);
if (GetDirected())
run_action<detail::always_directed>()
(*this, boost::bind<void>(write_to_file(),
boost::ref(stream), _1,
_vertex_index, boost::ref(dp),
format))();
else
run_action<detail::never_directed>()
(*this,boost::bind<void>(write_to_file_fake_undir(),
boost::ref(stream), _1,
_vertex_index, boost::ref(dp),
format))();
}
}
stream.reset();
}
......
// graph-tool -- a general graph modification and manipulation thingy