graphml.cpp 14.4 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
// Copyright (C) 2006  Tiago de Paula Peixoto <tiago@skewed.de>
2
// Copyright (C) 2004  The Trustees of Indiana University.
Tiago Peixoto's avatar
Tiago Peixoto committed
3
//
4
5
6
// Use, modification and distribution is subject to the Boost Software
// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
Tiago Peixoto's avatar
Tiago Peixoto committed
7
8
9
10
11

//  Authors: Douglas Gregor
//           Andrew Lumsdaine
//           Tiago de Paula Peixoto

12
#include <boost/python.hpp>
13
#include <boost/variant.hpp>
14
15
#include <expat.h>
#include <boost/graph/graphml.hpp>
16
#include <boost/algorithm/string/replace.hpp>
17
18
#include <boost/archive/iterators/xml_escape.hpp>
#include <boost/archive/iterators/ostream_iterator.hpp>
Tiago Peixoto's avatar
Tiago Peixoto committed
19

20
using namespace boost;
21
22
23
24
namespace boost
{
std::string protect_xml_string(const std::string& os)
{
25
26
27
28
29
30
    using namespace boost::archive::iterators;
    std::stringstream s;
    std::copy(xml_escape<const char*>(os.c_str()),
              xml_escape<const char*>(os.c_str()+os.size()),
              ostream_iterator<char>(s));
    return s.str();
31
32
}
}
Tiago Peixoto's avatar
Tiago Peixoto committed
33
34
35
36

class graphml_reader
{
public:
37
38
39
    graphml_reader(mutate_graph& g, bool integer_vertices, bool store_ids)
        : m_g(g), m_canonical_vertices(false),
          m_integer_vertices(integer_vertices), m_store_ids(store_ids) { }
40

Tiago Peixoto's avatar
Tiago Peixoto committed
41
42
    void run(std::istream& in)
    {
43
        const int buffer_size = 4096;
44
45
46
47
        m_parser = XML_ParserCreateNS(0,'|');
        XML_SetElementHandler(m_parser, &on_start_element, &on_end_element);
        XML_SetCharacterDataHandler(m_parser, &on_character_data);
        XML_SetUserData(m_parser, this);
48
        char buffer[buffer_size];
49
50

        bool okay = true;
51
        do
52
53
        {
            in.read(buffer, buffer_size);
54
            okay = XML_Parse(m_parser, buffer, in.gcount(), in.gcount() == 0);
55
        }
56
        while (okay && in.good());
57

58
        if (!okay)
59
60
        {
            std::stringstream s;
61
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
62
63
              <<", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << XML_ErrorString(XML_GetErrorCode(m_parser));
64
65
            throw parse_error(s.str());
        }
66
        XML_ParserFree(m_parser);
Tiago Peixoto's avatar
Tiago Peixoto committed
67
68
69
70
    }

private:
    /// The kinds of keys. Not all of these are supported
71
72
73
    enum key_kind {
        graph_key,
        node_key,
74
75
76
        edge_key,
        hyperedge_key,
        port_key,
77
        endpoint_key,
78
        all_key
Tiago Peixoto's avatar
Tiago Peixoto committed
79
80
    };

81
82
83
84
85
86
87
88
    enum desc_kind
    {
        M_VERTEX_DESCRIPTOR,
        M_EDGE_DESCRIPTOR,
        M_GRAPH_DESCRIPTOR
    };


89
    static void
Tiago Peixoto's avatar
Tiago Peixoto committed
90
    on_start_element(void* user_data, const XML_Char *c_name,
91
                     const XML_Char **atts)
Tiago Peixoto's avatar
Tiago Peixoto committed
92
    {
93
94
95
        graphml_reader* self = static_cast<graphml_reader*>(user_data);

        std::string name(c_name);
96
        replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
97
98

        if (name == "edge")
99
100
        {
            std::string id;
101
            std::string source, target;
102
            while (*atts)
103
            {
104
105
106
107
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "id") id = value;
108
109
                else if (name == "source") source = value;
                else if (name == "target") target = value;
110
                else if (name == "directed")
111
                {
112
                    bool edge_is_directed = (value == "directed");
113
114
                    if (self->m_g.is_directed() != 2 &&
                        edge_is_directed != self->m_g.is_directed())
115
                    {
116
                        if (edge_is_directed)
117
118
119
                            throw directed_graph_error();
                        else
                            throw undirected_graph_error();
120
                    }
121
                    self->m_g.flip_directed(edge_is_directed);
122
                }
123
124
            }

125
126
            self->m_active_descriptor = self->handle_edge(id, source, target);
            self->m_descriptor_kind = M_EDGE_DESCRIPTOR;
127
        }
128
        else if (name == "node")
129
130
131
        {
            std::string id;

132
            while (*atts)
133
            {
134
135
                std::string name = *atts++;
                std::string value = *atts++;
136

137
                if (name == "id") id = value;
138
139
            }

140
141
            self->m_active_descriptor = self->handle_vertex(id);
            self->m_descriptor_kind = M_VERTEX_DESCRIPTOR;
142
143
        }
        else if (name == "data")
144
        {
145
            while (*atts)
146
147
148
149
150
151
152
            {
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "key") self->m_active_key = value;
            }
        }
153
        else if (name == "key")
154
155
        {
            std::string id;
156
157
158
159
            std::string key_name;
            std::string key_type;
            key_kind kind = all_key;

160
            while (*atts)
161
            {
162
163
164
165
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "id") id = value;
166
167
                else if (name == "attr.name") key_name = value;
                else if (name == "attr.type") key_type = value;
168
                else if (name == "for")
169
                {
170
171
172
173
174
175
176
                    if (value == "graph") kind = graph_key;
                    else if (value == "node") kind = node_key;
                    else if (value == "edge") kind = edge_key;
                    else if (value == "hyperedge") kind = hyperedge_key;
                    else if (value == "port") kind = port_key;
                    else if (value == "endpoint") kind = endpoint_key;
                    else if (value == "all") kind = all_key;
177
                    else
178
                    {
179
                        std::stringstream s;
180
181
182
                        s << "on line "
                          << XML_GetCurrentLineNumber(self->m_parser)
                          << ", column "
183
                          << XML_GetCurrentColumnNumber(self->m_parser)
184
185
                          << ": unrecognized key kind '" << value << "'";
                        throw parse_error(s.str());
186
187
                    }
                }
188
189
            }

190
191
192
193
            self->m_keys[id] = kind;
            self->m_key_name[id] = key_name;
            self->m_key_type[id] = key_type;
            self->m_active_key = id;
194
195
        }
        else if (name == "graph")
196
        {
197
            while (*atts)
198
            {
199
200
                std::string name = *atts++;
                std::string value = *atts++;
201
202

                if (name == "edgedefault")
203
204
                {
                    bool edge_is_directed = (value == "directed");
205
206
                    if ( self->m_g.is_directed() != 2 &&
                         edge_is_directed != self->m_g.is_directed())
207
                    {
208
                        if (edge_is_directed)
209
210
211
212
                            throw directed_graph_error();
                        else
                            throw undirected_graph_error();
                    }
213
                    self->m_g.flip_directed(edge_is_directed);
214
215
216
217
218
                }
                else if (name == "parse.nodeids")
                {
                    self->m_canonical_vertices = (value == "canonical");
                }
219
220
221
222
                else if (name == "parse.edgeids")
                {
                    self->m_canonical_edges = (value == "canonical");
                }
223
            }
224
225
            self->m_active_descriptor = any();
            self->m_descriptor_kind = M_GRAPH_DESCRIPTOR;
226
        }
227
228

        self->m_character_data.clear();
Tiago Peixoto's avatar
Tiago Peixoto committed
229
    }
230

Tiago Peixoto's avatar
Tiago Peixoto committed
231
232
233
    static void
    on_end_element(void* user_data, const XML_Char *c_name)
    {
234
        graphml_reader* self = static_cast<graphml_reader*>(user_data);
235

236
        std::string name(c_name);
237
        replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
238

239
240
        if (name == "data")
        {
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
            switch (self->m_descriptor_kind)
            {
            case M_VERTEX_DESCRIPTOR:
                self->handle_vertex_property(self->m_active_key, self->m_active_descriptor,
                                             self->m_character_data);
                break;
            case M_EDGE_DESCRIPTOR:
                self->handle_edge_property(self->m_active_key, self->m_active_descriptor,
                                           self->m_character_data);
                break;
            case M_GRAPH_DESCRIPTOR:
                self->handle_graph_property(self->m_active_key, self->m_active_descriptor,
                                             self->m_character_data);
                break;
            }
256
        }
257
258
259
260
        else if (name == "default")
        {
            self->m_key_default[self->m_active_key] = self->m_character_data;
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
261
262
263
264
265
    }

    static void
    on_character_data(void* user_data, const XML_Char* s, int len)
    {
266
267
        graphml_reader* self = static_cast<graphml_reader*>(user_data);
        self->m_character_data.append(s, len);
Tiago Peixoto's avatar
Tiago Peixoto committed
268
269
    }

270
    any
Tiago Peixoto's avatar
Tiago Peixoto committed
271
272
    handle_vertex(const std::string& v)
    {
273
274
275
276
277
278
279
        bool is_new = false;

        if (m_canonical_vertices)
        {
            size_t id;

            //strip leading "n" from name
280
            try
281
            {
282
                id = lexical_cast<size_t>(std::string(v,1));
283
284
285
            }
            catch (bad_lexical_cast)
            {
286
                std::stringstream s;
287
                s << "on line " << XML_GetCurrentLineNumber(m_parser)
288
289
290
                  << ", column " << XML_GetCurrentColumnNumber(m_parser)
                  << ": invalid vertex: " << v;
                throw parse_error(s.str());
291
            }
292

293
            if (m_integer_vertices)
294
            {
295
296
297
298
299
300
301
302
303
304
305
                is_new = (m_g.n_vertices() <= id);
                for (size_t i = m_g.n_vertices(); i <= id; ++i)
                    m_g.do_add_vertex();
            }
            else
            {
                while(id >= m_canonical_vertex.size())
                {
                    m_canonical_vertex.push_back(m_g.do_add_vertex());
                    is_new = true;
                }
306
307
308
309
310
311
            }
        }
        else
        {
            if (m_vertex.find(v) == m_vertex.end())
            {
312
313
                m_vertex[v] = m_g.do_add_vertex();
                is_new = true;
314
315
316
            }
        }

317
        any vd = get_vertex_descriptor(v);
318
319
320
        if (is_new)
        {
            std::map<std::string, std::string>::iterator iter;
321
            for (iter = m_key_default.begin(); iter != m_key_default.end();
322
                 ++iter)
323
            {
324
                if (m_keys[iter->first] == node_key)
325
                    handle_vertex_property(iter->first, vd, iter->second);
326
            }
327
328
            if (m_store_ids && !m_canonical_vertices)
                m_g.set_vertex_property("_graphml_vertex_id",
329
                                        vd, v, "string");
330
        }
331
        return vd;
Tiago Peixoto's avatar
Tiago Peixoto committed
332
333
    }

334
    any
Tiago Peixoto's avatar
Tiago Peixoto committed
335
336
    get_vertex_descriptor(const std::string& v)
    {
337
338
339
340
        if (m_canonical_vertices)
        {
            //strip leading "n" from name
            size_t id = lexical_cast<size_t>(std::string(v,1));
341
342
            if (m_integer_vertices)
                return id;
343
344
345
            return m_canonical_vertex[id];
        }
        else
346
        {
347
348
            return m_vertex[v];
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
349
350
    }

351
    any
352
353
    handle_edge(const std::string& id, const std::string& u,
                const std::string& v)
Tiago Peixoto's avatar
Tiago Peixoto committed
354
    {
355
356
        handle_vertex(u);
        handle_vertex(v);
Tiago Peixoto's avatar
Tiago Peixoto committed
357

358
359
360
        any source, target;
        source = get_vertex_descriptor(u);
        target = get_vertex_descriptor(v);
Tiago Peixoto's avatar
Tiago Peixoto committed
361

362
363
364
365
366
        any edge;
        bool added;
        tie(edge, added) = m_g.do_add_edge(source, target);
        if (!added)
            throw bad_parallel_edge(u, v);
Tiago Peixoto's avatar
Tiago Peixoto committed
367

368
369
370
371
        std::map<std::string, std::string>::iterator iter;
        for (iter = m_key_default.begin(); iter != m_key_default.end(); ++iter)
        {
            if (m_keys[iter->first] == edge_key)
372
                handle_edge_property(iter->first, edge, iter->second);
373
        }
374
375

        if (m_store_ids && !m_canonical_edges)
376
            m_g.set_edge_property("_graphml_edge_id", edge, id, "string");
377

378
        return edge;
379
    }
Tiago Peixoto's avatar
Tiago Peixoto committed
380

381
382
383
    void handle_edge_property(const std::string& key_id,
                              const any& descriptor,
                              const std::string& value)
Tiago Peixoto's avatar
Tiago Peixoto committed
384
    {
385
        try
386
        {
387
388
            m_g.set_edge_property(m_key_name[key_id], descriptor,
                              value, m_key_type[key_id]);
389
        }
390
        catch (parse_error &e)
391
        {
392
            std::stringstream s;
393
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
394
395
396
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
397
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
398
399
    }

400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
    void handle_vertex_property(const std::string& key_id,
                                const any& descriptor,
                                const std::string& value)
    {
        try
        {
            m_g.set_vertex_property(m_key_name[key_id], descriptor,
                                    value, m_key_type[key_id]);
        }
        catch (parse_error &e)
        {
            std::stringstream s;
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
        }
    }

    void handle_graph_property(const std::string& key_id,
                               const any&,
                               const std::string& value)
422
    {
423
424
425
426
427
428
429
430
431
432
433
434
435
        try
        {
            m_g.set_graph_property(m_key_name[key_id], value,
                                   m_key_type[key_id]);
        }
        catch (parse_error &e)
        {
            std::stringstream s;
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
        }
436
437
    }

438
    mutate_graph& m_g;
Tiago Peixoto's avatar
Tiago Peixoto committed
439
440
441
    std::map<std::string, key_kind> m_keys;
    std::map<std::string, std::string> m_key_name;
    std::map<std::string, std::string> m_key_type;
442
443
444
    std::map<std::string, std::string> m_key_default;
    std::map<std::string, any> m_vertex;
    std::vector<any> m_canonical_vertex;
445
446
447
448

    any m_active_descriptor;
    desc_kind m_descriptor_kind;

Tiago Peixoto's avatar
Tiago Peixoto committed
449
450
451
452
    std::string m_active_key;
    std::string m_character_data;
    bool m_canonical_vertices;
    bool m_canonical_edges;
453
    bool m_integer_vertices;
454
    bool m_store_ids;
455
    bool m_ignore_directedness;
456
    XML_Parser m_parser;
Tiago Peixoto's avatar
Tiago Peixoto committed
457
458
};

459
namespace boost
Tiago Peixoto's avatar
Tiago Peixoto committed
460
461
{
void
462
read_graphml(std::istream& in, mutate_graph& g, bool integer_vertices, bool store_ids)
463
{
464
    graphml_reader reader(g, integer_vertices, store_ids);
465
    reader.run(in);
Tiago Peixoto's avatar
Tiago Peixoto committed
466
467
}
}