Commit 2583f906 authored by Tiago Peixoto's avatar Tiago Peixoto

Improve memory usage of graphml reader and include support for ignoring properties

The graphml reader no longer builds a list of edges/vertices during
parsing, whenever the graph uses canonical names.
parent 655107df
......@@ -105,8 +105,13 @@ template <typename MutableGraph>
bool read_graphviz(const std::string& str,
MutableGraph& graph, boost::dynamic_properties& dp,
std::string const& node_id = "node_id",
bool ignore_directedness = false) {
boost::detail::graph::mutate_graph_impl<MutableGraph> mg(graph, dp, node_id);
bool ignore_directedness = false,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>()) {
boost::detail::graph::mutate_graph_impl<MutableGraph> mg(graph, dp, node_id,
ignore_vp, ignore_ep,
ignore_gp);
return detail::graph::read_graphviz(str, &mg, ignore_directedness);
}
......@@ -114,9 +119,12 @@ template <typename InputIter, typename MutableGraph>
bool read_graphviz(InputIter begin, InputIter end,
MutableGraph& graph, boost::dynamic_properties& dp,
std::string const& node_id = "node_id",
bool ignore_directedness = false) {
bool ignore_directedness = false,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>()) {
return read_graphviz(std::string(begin, end), graph, dp, node_id,
ignore_directedness);
ignore_directedness, ignore_vp, ignore_ep, ignore_gp);
}
} // namespace boost
......
......@@ -25,6 +25,7 @@
#include <boost/python/object.hpp>
#include <boost/bind.hpp>
#include <exception>
#include <set>
namespace boost
{
......@@ -57,6 +58,8 @@ public:
virtual std::pair<boost::any,bool> do_add_edge(boost::any source,
boost::any target) = 0;
virtual size_t n_vertices() const = 0;
virtual void
set_graph_property(const std::string& name, const std::string& value,
const std::string& value_type) = 0;
......@@ -82,9 +85,13 @@ class mutate_graph_impl : public mutate_graph
public:
mutate_graph_impl(MutableGraph& g, dynamic_properties& dp,
bool ignore_directedness)
bool ignore_directedness,
std::set<std::string> ignore_vp,
std::set<std::string> ignore_ep,
std::set<std::string> ignore_gp)
: m_g(g), m_dp(dp), m_ignore_directedness(ignore_directedness),
m_is_directed(false) { }
m_is_directed(false), m_ignore_vp(ignore_vp),
m_ignore_ep(ignore_ep), m_ignore_gp(ignore_gp) { }
virtual int is_directed() const
{
......@@ -119,10 +126,18 @@ public:
return std::make_pair(any(retval.first), retval.second);
}
virtual size_t n_vertices() const
{
return num_vertices(m_g);
}
virtual void
set_graph_property(const std::string& name,
const std::string& value, const std::string& value_type)
{
if (m_ignore_gp.find(name) != m_ignore_gp.end())
return;
bool type_found = false;
try
{
......@@ -146,6 +161,9 @@ public:
set_vertex_property(const std::string& name, any vertex,
const std::string& value, const std::string& value_type)
{
if (m_ignore_vp.find(name) != m_ignore_vp.end())
return;
bool type_found = false;
try
{
......@@ -169,6 +187,9 @@ public:
set_edge_property(const std::string& name, any edge,
const std::string& value, const std::string& value_type)
{
if (m_ignore_ep.find(name) != m_ignore_ep.end())
return;
bool type_found = false;
try
{
......@@ -231,6 +252,9 @@ protected:
dynamic_properties& m_dp;
bool m_ignore_directedness;
bool m_is_directed;
std::set<std::string> m_ignore_vp;
std::set<std::string> m_ignore_ep;
std::set<std::string> m_ignore_gp;
typedef mpl::vector<uint8_t, int16_t, int32_t, int64_t, double, long double,
std::vector<uint8_t>, std::vector<int32_t>,
std::vector<int64_t>, std::vector<double>,
......@@ -247,15 +271,20 @@ const char* mutate_graph_impl<MutableGraph>::m_type_names[] =
"python_object"};
void
read_graphml(std::istream& in, mutate_graph& g, bool store_ids);
read_graphml(std::istream& in, mutate_graph& g, bool integer_vertices,
bool store_ids);
template<typename MutableGraph>
bool
read_graphml(std::istream& in, MutableGraph& g, dynamic_properties& dp,
bool store_ids, bool ignore_directedness)
bool store_ids, bool integer_vertices, bool ignore_directedness,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
{
mutate_graph_impl<MutableGraph> mg(g,dp,ignore_directedness);
read_graphml(in, mg, store_ids);
mutate_graph_impl<MutableGraph> mg(g, dp, ignore_directedness, ignore_vp,
ignore_ep, ignore_gp);
read_graphml(in, mg, integer_vertices, store_ids);
return mg.get_directed();
}
......
......@@ -723,8 +723,13 @@ class mutate_graph_impl : public mutate_graph
public:
mutate_graph_impl(MutableGraph& graph, dynamic_properties& dp,
std::string node_id_prop)
: graph_(graph), dp_(dp), node_id_prop_(node_id_prop) { }
std::string node_id_prop,
std::set<std::string> ignore_vp,
std::set<std::string> ignore_ep,
std::set<std::string> ignore_gp)
: graph_(graph), dp_(dp), node_id_prop_(node_id_prop),
m_ignore_vp(ignore_vp), m_ignore_ep(ignore_ep),
m_ignore_gp(ignore_gp) { }
~mutate_graph_impl() {}
......@@ -765,18 +770,24 @@ class mutate_graph_impl : public mutate_graph
void
set_node_property(const id_t& key, const node_t& node, const id_t& value)
{
if (m_ignore_vp.find(key) != m_ignore_vp.end())
return;
put(key, dp_, bgl_nodes[node], value);
}
void
set_edge_property(const id_t& key, const edge_t& edge, const id_t& value)
{
if (m_ignore_ep.find(key) != m_ignore_ep.end())
return;
put(key, dp_, bgl_edges[edge], value);
}
void
set_graph_property(const id_t& key, const id_t& value)
{
if (m_ignore_gp.find(key) != m_ignore_gp.end())
return;
/* RG: pointer to graph prevents copying */
put(key, dp_, &graph_, value);
}
......@@ -788,6 +799,9 @@ class mutate_graph_impl : public mutate_graph
std::string node_id_prop_;
std::map<node_t, bgl_vertex_t> bgl_nodes;
std::map<edge_t, bgl_edge_t> bgl_edges;
std::set<std::string> m_ignore_vp;
std::set<std::string> m_ignore_ep;
std::set<std::string> m_ignore_gp;
};
BOOST_GRAPH_DECL
......@@ -800,14 +814,18 @@ template <typename MutableGraph>
bool read_graphviz(std::istream& in, MutableGraph& graph,
dynamic_properties& dp,
std::string const& node_id = "node_id",
bool ignore_directedness = false)
bool ignore_directedness = false,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
{
std::string data;
in >> std::noskipws;
std::copy(std::istream_iterator<char>(in),
std::istream_iterator<char>(),
std::back_inserter(data));
return read_graphviz(data,graph,dp,node_id,ignore_directedness);
return read_graphviz(data,graph,dp,node_id,ignore_directedness,ignore_vp,
ignore_ep,ignore_gp);
}
} // namespace boost
......
......@@ -70,8 +70,12 @@ template <class Graph>
class gml_state
{
public:
gml_state(Graph& g, dynamic_properties& dp)
: _g(g), _dp(dp), _directed(false) {}
gml_state(Graph& g, dynamic_properties& dp,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
: _g(g), _dp(dp), _directed(false), _ignore_vp(ignore_vp),
_ignore_ep(ignore_ep), _ignore_gp(ignore_gp) {}
typedef boost::variant<std::string, int, double> val_t;
......@@ -120,6 +124,8 @@ public:
{
if (iter->first == "id")
continue;
if (_ignore_vp.find(iter->first) != _ignore_vp.end())
continue;
try
{
put(iter->first, _dp, v, boost::get<string>(iter->second));
......@@ -161,6 +167,8 @@ public:
{
if (iter->first == "id" || iter->first == "source" || iter->first == "target")
continue;
if (_ignore_ep.find(iter->first) != _ignore_ep.end())
continue;
try
{
put(iter->first, _dp, e, boost::get<string>(iter->second));
......@@ -180,6 +188,8 @@ public:
{
if (iter->first == "directed")
_directed = boost::get<double>(iter->second);
if (_ignore_gp.find(iter->first) != _ignore_gp.end())
continue;
try
{
put(iter->first, _dp, graph_property_tag(), boost::get<string>(iter->second));
......@@ -217,13 +227,21 @@ private:
// the stack holds the keys, and its properties (but omits nested lists)
typedef tr1::unordered_map<std::string, val_t> prop_list_t;
vector<pair<std::string, prop_list_t> > _stack;
std::set<std::string> _ignore_vp;
std::set<std::string> _ignore_ep;
std::set<std::string> _ignore_gp;
};
template <class Iterator, class Graph, class Skipper>
struct gml : spirit::qi::grammar<Iterator, void(), Skipper>
{
gml(Graph& g, dynamic_properties& dp) : gml::base_type(start), _state(g, dp)
gml(Graph& g, dynamic_properties& dp,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
: gml::base_type(start), _state(g, dp, ignore_vp, ignore_ep, ignore_gp)
{
using namespace spirit;
using spirit::ascii::char_;
......@@ -258,10 +276,14 @@ struct gml : spirit::qi::grammar<Iterator, void(), Skipper>
template <class Iterator, class Graph, class Skipper>
bool parse_grammar(Iterator begin, Iterator end, Graph& g,
dynamic_properties& dp, Skipper skip)
dynamic_properties& dp, Skipper skip,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
{
using namespace spirit;
gml<spirit::istream_iterator, Graph, Skipper> parser(g, dp);
gml<spirit::istream_iterator, Graph, Skipper> parser(g, dp, ignore_vp,
ignore_ep, ignore_gp);
bool ok = qi::phrase_parse(begin, end, parser, skip);
if (!ok)
throw gml_parse_error("invalid syntax");
......@@ -270,7 +292,10 @@ bool parse_grammar(Iterator begin, Iterator end, Graph& g,
template <class Graph>
bool read_gml(istream& in, Graph& g, dynamic_properties& dp)
bool read_gml(istream& in, Graph& g, dynamic_properties& dp,
std::set<std::string> ignore_vp = std::set<std::string>(),
std::set<std::string> ignore_ep = std::set<std::string>(),
std::set<std::string> ignore_gp = std::set<std::string>())
{
using namespace spirit;
......@@ -280,11 +305,9 @@ bool read_gml(istream& in, Graph& g, dynamic_properties& dp)
bool directed =
parse_grammar(begin, end, g, dp,
(ascii::space |'#' >> *(ascii::char_ - qi::eol) >> qi::eol));
(ascii::space |'#' >> *(ascii::char_ - qi::eol) >> qi::eol),
ignore_vp, ignore_ep, ignore_gp);
in >> std::noskipws;
std::stringstream input;
input << in.rdbuf();
return directed;
}
......
......@@ -119,7 +119,9 @@ public:
// I/O
void WriteToFile(string s, python::object pf, string format,
python::list properties);
python::tuple ReadFromFile(string s, python::object pf, string format);
python::tuple ReadFromFile(string s, python::object pf, string format,
python::list ignore_vp, python::list ignore_ep,
python::list ignore_gp);
//
// Internal types
......
......@@ -308,7 +308,10 @@ void build_stream
python::tuple GraphInterface::ReadFromFile(string file, python::object pfile,
string format)
string format,
python::list ignore_vp,
python::list ignore_ep,
python::list ignore_gp)
{
if (format != "dot" && format != "xml" && format != "gml")
throw ValueException("error reading from file '" + file +
......@@ -320,17 +323,27 @@ python::tuple GraphInterface::ReadFromFile(string file, python::object pfile,
std::ifstream file_stream;
build_stream(stream, file, pfile, file_stream);
set<string> ivp, iep, igp;
for (int i = 0; i < len(ignore_vp); ++i)
ivp.insert(python::extract<string>(ignore_vp[i]));
for (int i = 0; i < len(ignore_ep); ++i)
iep.insert(python::extract<string>(ignore_ep[i]));
for (int i = 0; i < len(ignore_gp); ++i)
igp.insert(python::extract<string>(ignore_gp[i]));
create_dynamic_map<vertex_index_map_t,edge_index_map_t>
map_creator(_vertex_index, _edge_index);
dynamic_properties dp(map_creator);
*_mg = multigraph_t();
if (format == "dot")
_directed = read_graphviz(stream, *_mg, dp, "vertex_name", true);
_directed = read_graphviz(stream, *_mg, dp, "vertex_name", true,
ivp, iep, igp);
else if (format == "xml")
_directed = read_graphml(stream, *_mg, dp, true, true);
_directed = read_graphml(stream, *_mg, dp, true, true, true,
ivp, iep, igp);
else if (format == "gml")
_directed = read_gml(stream, *_mg, dp);
_directed = read_gml(stream, *_mg, dp, ivp, iep, igp);
python::dict vprops, eprops, gprops;
for(typeof(dp.begin()) iter = dp.begin(); iter != dp.end(); ++iter)
......
......@@ -34,8 +34,9 @@ std::string protect_xml_string(const std::string& os)
class graphml_reader
{
public:
graphml_reader(mutate_graph& g, bool store_ids)
: m_g(g), m_canonical_vertices(false), m_store_ids(store_ids) { }
graphml_reader(mutate_graph& g, bool integer_vertices, bool store_ids)
: m_g(g), m_canonical_vertices(false),
m_integer_vertices(integer_vertices), m_store_ids(store_ids) { }
void run(std::istream& in)
{
......@@ -77,6 +78,14 @@ private:
all_key
};
enum desc_kind
{
M_VERTEX_DESCRIPTOR,
M_EDGE_DESCRIPTOR,
M_GRAPH_DESCRIPTOR
};
static void
on_start_element(void* user_data, const XML_Char *c_name,
const XML_Char **atts)
......@@ -113,8 +122,8 @@ private:
}
}
self->m_active_descriptor = self->m_edge.size();
self->handle_edge(id, source, target);
self->m_active_descriptor = self->handle_edge(id, source, target);
self->m_descriptor_kind = M_EDGE_DESCRIPTOR;
}
else if (name == "node")
{
......@@ -128,8 +137,8 @@ private:
if (name == "id") id = value;
}
self->handle_vertex(id);
self->m_active_descriptor = id;
self->m_active_descriptor = self->handle_vertex(id);
self->m_descriptor_kind = M_VERTEX_DESCRIPTOR;
}
else if (name == "data")
{
......@@ -212,7 +221,8 @@ private:
self->m_canonical_edges = (value == "canonical");
}
}
self->m_active_descriptor = "";
self->m_active_descriptor = any();
self->m_descriptor_kind = M_GRAPH_DESCRIPTOR;
}
self->m_character_data.clear();
......@@ -228,8 +238,21 @@ private:
if (name == "data")
{
self->handle_property(self->m_active_key, self->m_active_descriptor,
self->m_character_data);
switch (self->m_descriptor_kind)
{
case M_VERTEX_DESCRIPTOR:
self->handle_vertex_property(self->m_active_key, self->m_active_descriptor,
self->m_character_data);
break;
case M_EDGE_DESCRIPTOR:
self->handle_edge_property(self->m_active_key, self->m_active_descriptor,
self->m_character_data);
break;
case M_GRAPH_DESCRIPTOR:
self->handle_graph_property(self->m_active_key, self->m_active_descriptor,
self->m_character_data);
break;
}
}
else if (name == "default")
{
......@@ -244,7 +267,7 @@ private:
self->m_character_data.append(s, len);
}
void
any
handle_vertex(const std::string& v)
{
bool is_new = false;
......@@ -267,10 +290,19 @@ private:
throw parse_error(s.str());
}
while(id >= m_canonical_vertex.size())
if (m_integer_vertices)
{
m_canonical_vertex.push_back(m_g.do_add_vertex());
is_new = true;
is_new = (m_g.n_vertices() <= id);
for (size_t i = m_g.n_vertices(); i <= id; ++i)
m_g.do_add_vertex();
}
else
{
while(id >= m_canonical_vertex.size())
{
m_canonical_vertex.push_back(m_g.do_add_vertex());
is_new = true;
}
}
}
else
......@@ -282,6 +314,7 @@ private:
}
}
any vd = get_vertex_descriptor(v);
if (is_new)
{
std::map<std::string, std::string>::iterator iter;
......@@ -289,13 +322,13 @@ private:
++iter)
{
if (m_keys[iter->first] == node_key)
handle_property(iter->first, v, iter->second);
handle_vertex_property(iter->first, vd, iter->second);
}
if (m_store_ids && !m_canonical_vertices)
m_g.set_vertex_property("_graphml_vertex_id",
get_vertex_descriptor(v),
v, "string");
vd, v, "string");
}
return vd;
}
any
......@@ -305,6 +338,8 @@ private:
{
//strip leading "n" from name
size_t id = lexical_cast<size_t>(std::string(v,1));
if (m_integer_vertices)
return id;
return m_canonical_vertex[id];
}
else
......@@ -313,7 +348,7 @@ private:
}
}
void
any
handle_edge(const std::string& id, const std::string& u,
const std::string& v)
{
......@@ -330,46 +365,27 @@ private:
if (!added)
throw bad_parallel_edge(u, v);
size_t e = m_edge.size();
m_edge.push_back(edge);
std::map<std::string, std::string>::iterator iter;
for (iter = m_key_default.begin(); iter != m_key_default.end(); ++iter)
{
if (m_keys[iter->first] == edge_key)
handle_property(iter->first, e, iter->second);
handle_edge_property(iter->first, edge, iter->second);
}
if (m_store_ids && !m_canonical_edges)
m_g.set_edge_property("_graphml_edge_id", get_edge_descriptor(e),
id, "string");
m_g.set_edge_property("_graphml_edge_id", edge, id, "string");
return edge;
}
void handle_property(const std::string& key_id,
const variant<std::string,size_t>& descriptor,
const std::string& value)
void handle_edge_property(const std::string& key_id,
const any& descriptor,
const std::string& value)
{
try
{
if (get<std::string>(&descriptor))
{
if (get<std::string>(descriptor) == "")
m_g.set_graph_property(m_key_name[key_id], value,
m_key_type[key_id]);
else
m_g.set_vertex_property(m_key_name[key_id],
get_vertex_descriptor
(get<std::string>(descriptor)),
value, m_key_type[key_id]);
}
else
{
m_g.set_edge_property(m_key_name[key_id],
get_edge_descriptor
(get<size_t>(descriptor)),
value, m_key_type[key_id]);
}
m_g.set_edge_property(m_key_name[key_id], descriptor,
value, m_key_type[key_id]);
}
catch (parse_error &e)
{
......@@ -381,10 +397,42 @@ private:
}
}
any
get_edge_descriptor(size_t e)
void handle_vertex_property(const std::string& key_id,
const any& descriptor,
const std::string& value)
{
try
{
m_g.set_vertex_property(m_key_name[key_id], descriptor,
value, m_key_type[key_id]);
}
catch (parse_error &e)
{
std::stringstream s;
s << "on line " << XML_GetCurrentLineNumber(m_parser)
<< ", column " << XML_GetCurrentColumnNumber(m_parser)
<< ": " << e.error;
throw parse_error(s.str());
}
}
void handle_graph_property(const std::string& key_id,
const any&,
const std::string& value)
{