graphml.cpp 12.8 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
// Copyright (C) 2006  Tiago de Paula Peixoto <tiago@skewed.de>
2
// Copyright (C) 2004  The Trustees of Indiana University.
Tiago Peixoto's avatar
Tiago Peixoto committed
3
//
4
5
6
// Use, modification and distribution is subject to the Boost Software
// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
Tiago Peixoto's avatar
Tiago Peixoto committed
7
8
9
10
11

//  Authors: Douglas Gregor
//           Andrew Lumsdaine
//           Tiago de Paula Peixoto

12
#include <boost/variant.hpp>
13
14
#include <expat.h>
#include <boost/graph/graphml.hpp>
15
#include <boost/algorithm/string/replace.hpp>
16
17
#include <boost/archive/iterators/xml_escape.hpp>
#include <boost/archive/iterators/ostream_iterator.hpp>
Tiago Peixoto's avatar
Tiago Peixoto committed
18

19
using namespace boost;
20
21
22
23
namespace boost
{
std::string protect_xml_string(const std::string& os)
{
24
25
26
27
28
29
    using namespace boost::archive::iterators;
    std::stringstream s;
    std::copy(xml_escape<const char*>(os.c_str()),
              xml_escape<const char*>(os.c_str()+os.size()),
              ostream_iterator<char>(s));
    return s.str();
30
31
}
}
Tiago Peixoto's avatar
Tiago Peixoto committed
32
33
34
35

class graphml_reader
{
public:
36
37
    graphml_reader(mutate_graph& g, bool store_ids)
        : m_g(g), m_canonical_vertices(false), m_store_ids(store_ids) { }
38

Tiago Peixoto's avatar
Tiago Peixoto committed
39
40
    void run(std::istream& in)
    {
41
        const int buffer_size = 4096;
42
43
44
45
        m_parser = XML_ParserCreateNS(0,'|');
        XML_SetElementHandler(m_parser, &on_start_element, &on_end_element);
        XML_SetCharacterDataHandler(m_parser, &on_character_data);
        XML_SetUserData(m_parser, this);
46
        char buffer[buffer_size];
47
48

        bool okay = true;
49
        do
50
51
        {
            in.read(buffer, buffer_size);
52
            okay = XML_Parse(m_parser, buffer, in.gcount(), in.gcount() == 0);
53
        }
54
        while (okay && in.good());
55

56
        if (!okay)
57
58
        {
            std::stringstream s;
59
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
60
61
              <<", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << XML_ErrorString(XML_GetErrorCode(m_parser));
62
63
            throw parse_error(s.str());
        }
64
        XML_ParserFree(m_parser);
Tiago Peixoto's avatar
Tiago Peixoto committed
65
66
67
68
    }

private:
    /// The kinds of keys. Not all of these are supported
69
70
71
    enum key_kind {
        graph_key,
        node_key,
72
73
74
        edge_key,
        hyperedge_key,
        port_key,
75
        endpoint_key,
76
        all_key
Tiago Peixoto's avatar
Tiago Peixoto committed
77
78
    };

79
    static void
Tiago Peixoto's avatar
Tiago Peixoto committed
80
    on_start_element(void* user_data, const XML_Char *c_name,
81
                     const XML_Char **atts)
Tiago Peixoto's avatar
Tiago Peixoto committed
82
    {
83
84
85
        graphml_reader* self = static_cast<graphml_reader*>(user_data);

        std::string name(c_name);
86
        replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
87
88

        if (name == "edge")
89
90
        {
            std::string id;
91
            std::string source, target;
92
            while (*atts)
93
            {
94
95
96
97
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "id") id = value;
98
99
                else if (name == "source") source = value;
                else if (name == "target") target = value;
100
                else if (name == "directed")
101
                {
102
                    bool edge_is_directed = (value == "directed");
103
104
                    if (self->m_g.is_directed() != 2 &&
                        edge_is_directed != self->m_g.is_directed())
105
                    {
106
                        if (edge_is_directed)
107
108
109
                            throw directed_graph_error();
                        else
                            throw undirected_graph_error();
110
                    }
111
                    self->m_g.flip_directed(edge_is_directed);
112
                }
113
114
            }

115
            self->m_active_descriptor = self->m_edge.size();
116
            self->handle_edge(id, source, target);
117
        }
118
        else if (name == "node")
119
120
121
        {
            std::string id;

122
            while (*atts)
123
            {
124
125
                std::string name = *atts++;
                std::string value = *atts++;
126

127
                if (name == "id") id = value;
128
129
130
131
            }

            self->handle_vertex(id);
            self->m_active_descriptor = id;
132
133
        }
        else if (name == "data")
134
        {
135
            while (*atts)
136
137
138
139
140
141
142
            {
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "key") self->m_active_key = value;
            }
        }
143
        else if (name == "key")
144
145
        {
            std::string id;
146
147
148
149
            std::string key_name;
            std::string key_type;
            key_kind kind = all_key;

150
            while (*atts)
151
            {
152
153
154
155
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "id") id = value;
156
157
                else if (name == "attr.name") key_name = value;
                else if (name == "attr.type") key_type = value;
158
                else if (name == "for")
159
                {
160
161
162
163
164
165
166
                    if (value == "graph") kind = graph_key;
                    else if (value == "node") kind = node_key;
                    else if (value == "edge") kind = edge_key;
                    else if (value == "hyperedge") kind = hyperedge_key;
                    else if (value == "port") kind = port_key;
                    else if (value == "endpoint") kind = endpoint_key;
                    else if (value == "all") kind = all_key;
167
                    else
168
                    {
169
                        std::stringstream s;
170
171
172
                        s << "on line "
                          << XML_GetCurrentLineNumber(self->m_parser)
                          << ", column "
173
                          << XML_GetCurrentColumnNumber(self->m_parser)
174
175
                          << ": unrecognized key kind '" << value << "'";
                        throw parse_error(s.str());
176
177
                    }
                }
178
179
            }

180
181
182
183
            self->m_keys[id] = kind;
            self->m_key_name[id] = key_name;
            self->m_key_type[id] = key_type;
            self->m_active_key = id;
184
185
        }
        else if (name == "graph")
186
        {
187
            while (*atts)
188
            {
189
190
                std::string name = *atts++;
                std::string value = *atts++;
191
192

                if (name == "edgedefault")
193
194
                {
                    bool edge_is_directed = (value == "directed");
195
196
                    if ( self->m_g.is_directed() != 2 &&
                         edge_is_directed != self->m_g.is_directed())
197
                    {
198
                        if (edge_is_directed)
199
200
201
202
                            throw directed_graph_error();
                        else
                            throw undirected_graph_error();
                    }
203
                    self->m_g.flip_directed(edge_is_directed);
204
205
206
207
208
                }
                else if (name == "parse.nodeids")
                {
                    self->m_canonical_vertices = (value == "canonical");
                }
209
210
211
212
                else if (name == "parse.edgeids")
                {
                    self->m_canonical_edges = (value == "canonical");
                }
213
214
            }
            self->m_active_descriptor = "";
215
        }
216
217

        self->m_character_data.clear();
Tiago Peixoto's avatar
Tiago Peixoto committed
218
    }
219

Tiago Peixoto's avatar
Tiago Peixoto committed
220
221
222
    static void
    on_end_element(void* user_data, const XML_Char *c_name)
    {
223
        graphml_reader* self = static_cast<graphml_reader*>(user_data);
224

225
        std::string name(c_name);
226
        replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
227

228
229
        if (name == "data")
        {
230
            self->handle_property(self->m_active_key, self->m_active_descriptor,
231
                                  self->m_character_data);
232
        }
233
234
235
236
        else if (name == "default")
        {
            self->m_key_default[self->m_active_key] = self->m_character_data;
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
237
238
239
240
241
    }

    static void
    on_character_data(void* user_data, const XML_Char* s, int len)
    {
242
243
        graphml_reader* self = static_cast<graphml_reader*>(user_data);
        self->m_character_data.append(s, len);
Tiago Peixoto's avatar
Tiago Peixoto committed
244
245
    }

246
    void
Tiago Peixoto's avatar
Tiago Peixoto committed
247
248
    handle_vertex(const std::string& v)
    {
249
250
251
252
253
254
255
        bool is_new = false;

        if (m_canonical_vertices)
        {
            size_t id;

            //strip leading "n" from name
256
            try
257
            {
258
                id = lexical_cast<size_t>(std::string(v,1));
259
260
261
            }
            catch (bad_lexical_cast)
            {
262
                std::stringstream s;
263
                s << "on line " << XML_GetCurrentLineNumber(m_parser)
264
265
266
                  << ", column " << XML_GetCurrentColumnNumber(m_parser)
                  << ": invalid vertex: " << v;
                throw parse_error(s.str());
267
            }
268

269
270
            while(id >= m_canonical_vertex.size())
            {
271
272
                m_canonical_vertex.push_back(m_g.do_add_vertex());
                is_new = true;
273
274
275
276
277
278
            }
        }
        else
        {
            if (m_vertex.find(v) == m_vertex.end())
            {
279
280
                m_vertex[v] = m_g.do_add_vertex();
                is_new = true;
281
282
283
284
285
286
            }
        }

        if (is_new)
        {
            std::map<std::string, std::string>::iterator iter;
287
            for (iter = m_key_default.begin(); iter != m_key_default.end();
288
                 ++iter)
289
            {
290
                if (m_keys[iter->first] == node_key)
291
                    handle_property(iter->first, v, iter->second);
292
            }
293
294
295
296
            if (m_store_ids && !m_canonical_vertices)
                m_g.set_vertex_property("_graphml_vertex_id",
                                        get_vertex_descriptor(v),
                                        v, "string");
297
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
298
299
    }

300
    any
Tiago Peixoto's avatar
Tiago Peixoto committed
301
302
    get_vertex_descriptor(const std::string& v)
    {
303
304
305
306
307
308
309
        if (m_canonical_vertices)
        {
            //strip leading "n" from name
            size_t id = lexical_cast<size_t>(std::string(v,1));
            return m_canonical_vertex[id];
        }
        else
310
        {
311
312
            return m_vertex[v];
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
313
314
    }

315
    void
316
317
    handle_edge(const std::string& id, const std::string& u,
                const std::string& v)
Tiago Peixoto's avatar
Tiago Peixoto committed
318
    {
319
320
        handle_vertex(u);
        handle_vertex(v);
Tiago Peixoto's avatar
Tiago Peixoto committed
321

322
323
324
        any source, target;
        source = get_vertex_descriptor(u);
        target = get_vertex_descriptor(v);
Tiago Peixoto's avatar
Tiago Peixoto committed
325

326
327
328
329
330
        any edge;
        bool added;
        tie(edge, added) = m_g.do_add_edge(source, target);
        if (!added)
            throw bad_parallel_edge(u, v);
Tiago Peixoto's avatar
Tiago Peixoto committed
331

332
333
        size_t e = m_edge.size();
        m_edge.push_back(edge);
334

335
336
337
338
        std::map<std::string, std::string>::iterator iter;
        for (iter = m_key_default.begin(); iter != m_key_default.end(); ++iter)
        {
            if (m_keys[iter->first] == edge_key)
339
                handle_property(iter->first, e, iter->second);
340
        }
341
342
343
344
345

        if (m_store_ids && !m_canonical_edges)
            m_g.set_edge_property("_graphml_edge_id", get_edge_descriptor(e),
                                  id, "string");

346
    }
Tiago Peixoto's avatar
Tiago Peixoto committed
347

348
349
    void handle_property(const std::string& key_id,
                         const variant<std::string,size_t>& descriptor,
350
                         const std::string& value)
Tiago Peixoto's avatar
Tiago Peixoto committed
351
    {
352
        try
353
        {
354
355
356
            if (get<std::string>(&descriptor))
            {
                if (get<std::string>(descriptor) == "")
357
                    m_g.set_graph_property(m_key_name[key_id], value,
358
                                           m_key_type[key_id]);
359
                else
360
                    m_g.set_vertex_property(m_key_name[key_id],
361
                                            get_vertex_descriptor
362
                                                (get<std::string>(descriptor)),
363
                                            value, m_key_type[key_id]);
364
            }
365
            else
366
            {
367
                m_g.set_edge_property(m_key_name[key_id],
368
                                      get_edge_descriptor
369
                                          (get<size_t>(descriptor)),
370
                                      value, m_key_type[key_id]);
371
            }
372
        }
373
        catch (parse_error &e)
374
        {
375
            std::stringstream s;
376
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
377
378
379
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
380
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
381
382
    }

383
384
385
386
387
388
    any
    get_edge_descriptor(size_t e)
    {
        return m_edge[e];
    }

389
    mutate_graph& m_g;
Tiago Peixoto's avatar
Tiago Peixoto committed
390
391
392
    std::map<std::string, key_kind> m_keys;
    std::map<std::string, std::string> m_key_name;
    std::map<std::string, std::string> m_key_type;
393
394
395
    std::map<std::string, std::string> m_key_default;
    std::map<std::string, any> m_vertex;
    std::vector<any> m_canonical_vertex;
396
397
    std::vector<any> m_edge;
    variant<std::string, size_t> m_active_descriptor;
Tiago Peixoto's avatar
Tiago Peixoto committed
398
399
400
401
    std::string m_active_key;
    std::string m_character_data;
    bool m_canonical_vertices;
    bool m_canonical_edges;
402
    bool m_store_ids;
403
    bool m_ignore_directedness;
404
    XML_Parser m_parser;
Tiago Peixoto's avatar
Tiago Peixoto committed
405
406
};

407
namespace boost
Tiago Peixoto's avatar
Tiago Peixoto committed
408
409
{
void
410
read_graphml(std::istream& in, mutate_graph& g, bool store_ids)
411
{
412
    graphml_reader reader(g, store_ids);
413
    reader.run(in);
Tiago Peixoto's avatar
Tiago Peixoto committed
414
415
}
}