graphml.cpp 14.5 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
// Copyright (C) 2006  Tiago de Paula Peixoto <tiago@skewed.de>
2
// Copyright (C) 2004  The Trustees of Indiana University.
Tiago Peixoto's avatar
Tiago Peixoto committed
3
//
4
5
6
// Use, modification and distribution is subject to the Boost Software
// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
Tiago Peixoto's avatar
Tiago Peixoto committed
7
8
9
10
11

//  Authors: Douglas Gregor
//           Andrew Lumsdaine
//           Tiago de Paula Peixoto

12
#include <boost/python.hpp>
13
#include <boost/variant.hpp>
14
15
#include <expat.h>
#include <boost/graph/graphml.hpp>
16
#include <boost/algorithm/string/replace.hpp>
17
18
#include <boost/archive/iterators/xml_escape.hpp>
#include <boost/archive/iterators/ostream_iterator.hpp>
Tiago Peixoto's avatar
Tiago Peixoto committed
19

20
using namespace boost;
21
22
23
24
namespace boost
{
std::string protect_xml_string(const std::string& os)
{
25
26
27
28
29
30
    using namespace boost::archive::iterators;
    std::stringstream s;
    std::copy(xml_escape<const char*>(os.c_str()),
              xml_escape<const char*>(os.c_str()+os.size()),
              ostream_iterator<char>(s));
    return s.str();
31
32
}
}
Tiago Peixoto's avatar
Tiago Peixoto committed
33
34
35
36

class graphml_reader
{
public:
37
    graphml_reader(mutate_graph& g, bool integer_vertices, bool store_ids)
38
39
40
41
42
        : m_g(g),
          m_canonical_vertices(false),
          m_canonical_edges(false),
          m_integer_vertices(integer_vertices),
          m_store_ids(store_ids) { }
43

Tiago Peixoto's avatar
Tiago Peixoto committed
44
45
    void run(std::istream& in)
    {
46
        const int buffer_size = 4096;
47
48
49
50
        m_parser = XML_ParserCreateNS(0,'|');
        XML_SetElementHandler(m_parser, &on_start_element, &on_end_element);
        XML_SetCharacterDataHandler(m_parser, &on_character_data);
        XML_SetUserData(m_parser, this);
51
        char buffer[buffer_size];
52
53

        bool okay = true;
54
        do
55
56
        {
            in.read(buffer, buffer_size);
57
            okay = XML_Parse(m_parser, buffer, in.gcount(), in.gcount() == 0);
58
        }
59
        while (okay && in.good());
60

61
        if (!okay)
62
63
        {
            std::stringstream s;
64
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
65
66
              <<", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << XML_ErrorString(XML_GetErrorCode(m_parser));
67
68
            throw parse_error(s.str());
        }
69
        XML_ParserFree(m_parser);
Tiago Peixoto's avatar
Tiago Peixoto committed
70
71
72
73
    }

private:
    /// The kinds of keys. Not all of these are supported
74
75
76
    enum key_kind {
        graph_key,
        node_key,
77
78
79
        edge_key,
        hyperedge_key,
        port_key,
80
        endpoint_key,
81
        all_key
Tiago Peixoto's avatar
Tiago Peixoto committed
82
83
    };

84
85
86
87
88
89
90
91
    enum desc_kind
    {
        M_VERTEX_DESCRIPTOR,
        M_EDGE_DESCRIPTOR,
        M_GRAPH_DESCRIPTOR
    };


92
    static void
Tiago Peixoto's avatar
Tiago Peixoto committed
93
    on_start_element(void* user_data, const XML_Char *c_name,
94
                     const XML_Char **atts)
Tiago Peixoto's avatar
Tiago Peixoto committed
95
    {
96
97
98
        graphml_reader* self = static_cast<graphml_reader*>(user_data);

        std::string name(c_name);
99
        replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
100
101

        if (name == "edge")
102
103
        {
            std::string id;
104
            std::string source, target;
105
            while (*atts)
106
            {
107
108
109
110
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "id") id = value;
111
112
                else if (name == "source") source = value;
                else if (name == "target") target = value;
113
                else if (name == "directed")
114
                {
115
                    bool edge_is_directed = (value == "directed");
116
117
                    if (self->m_g.is_directed() != 2 &&
                        edge_is_directed != self->m_g.is_directed())
118
                    {
119
                        if (edge_is_directed)
120
121
122
                            throw directed_graph_error();
                        else
                            throw undirected_graph_error();
123
                    }
124
                    self->m_g.flip_directed(edge_is_directed);
125
                }
126
127
            }

128
129
            self->m_active_descriptor = self->handle_edge(id, source, target);
            self->m_descriptor_kind = M_EDGE_DESCRIPTOR;
130
        }
131
        else if (name == "node")
132
133
134
        {
            std::string id;

135
            while (*atts)
136
            {
137
138
                std::string name = *atts++;
                std::string value = *atts++;
139

140
                if (name == "id") id = value;
141
142
            }

143
144
            self->m_active_descriptor = self->handle_vertex(id);
            self->m_descriptor_kind = M_VERTEX_DESCRIPTOR;
145
146
        }
        else if (name == "data")
147
        {
148
            while (*atts)
149
150
151
152
153
154
155
            {
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "key") self->m_active_key = value;
            }
        }
156
        else if (name == "key")
157
158
        {
            std::string id;
159
160
161
162
            std::string key_name;
            std::string key_type;
            key_kind kind = all_key;

163
            while (*atts)
164
            {
165
166
167
168
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "id") id = value;
169
170
                else if (name == "attr.name") key_name = value;
                else if (name == "attr.type") key_type = value;
171
                else if (name == "for")
172
                {
173
174
175
176
177
178
179
                    if (value == "graph") kind = graph_key;
                    else if (value == "node") kind = node_key;
                    else if (value == "edge") kind = edge_key;
                    else if (value == "hyperedge") kind = hyperedge_key;
                    else if (value == "port") kind = port_key;
                    else if (value == "endpoint") kind = endpoint_key;
                    else if (value == "all") kind = all_key;
180
                    else
181
                    {
182
                        std::stringstream s;
183
184
185
                        s << "on line "
                          << XML_GetCurrentLineNumber(self->m_parser)
                          << ", column "
186
                          << XML_GetCurrentColumnNumber(self->m_parser)
187
188
                          << ": unrecognized key kind '" << value << "'";
                        throw parse_error(s.str());
189
190
                    }
                }
191
192
            }

193
194
195
196
            self->m_keys[id] = kind;
            self->m_key_name[id] = key_name;
            self->m_key_type[id] = key_type;
            self->m_active_key = id;
197
198
        }
        else if (name == "graph")
199
        {
200
            while (*atts)
201
            {
202
203
                std::string name = *atts++;
                std::string value = *atts++;
204
205

                if (name == "edgedefault")
206
207
                {
                    bool edge_is_directed = (value == "directed");
208
209
                    if ( self->m_g.is_directed() != 2 &&
                         edge_is_directed != self->m_g.is_directed())
210
                    {
211
                        if (edge_is_directed)
212
213
214
215
                            throw directed_graph_error();
                        else
                            throw undirected_graph_error();
                    }
216
                    self->m_g.flip_directed(edge_is_directed);
217
218
219
220
221
                }
                else if (name == "parse.nodeids")
                {
                    self->m_canonical_vertices = (value == "canonical");
                }
222
223
224
225
                else if (name == "parse.edgeids")
                {
                    self->m_canonical_edges = (value == "canonical");
                }
226
            }
227
228
            self->m_active_descriptor = any();
            self->m_descriptor_kind = M_GRAPH_DESCRIPTOR;
229
        }
230
231

        self->m_character_data.clear();
Tiago Peixoto's avatar
Tiago Peixoto committed
232
    }
233

Tiago Peixoto's avatar
Tiago Peixoto committed
234
235
236
    static void
    on_end_element(void* user_data, const XML_Char *c_name)
    {
237
        graphml_reader* self = static_cast<graphml_reader*>(user_data);
238

239
        std::string name(c_name);
240
        replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
241

242
243
        if (name == "data")
        {
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
            switch (self->m_descriptor_kind)
            {
            case M_VERTEX_DESCRIPTOR:
                self->handle_vertex_property(self->m_active_key, self->m_active_descriptor,
                                             self->m_character_data);
                break;
            case M_EDGE_DESCRIPTOR:
                self->handle_edge_property(self->m_active_key, self->m_active_descriptor,
                                           self->m_character_data);
                break;
            case M_GRAPH_DESCRIPTOR:
                self->handle_graph_property(self->m_active_key, self->m_active_descriptor,
                                             self->m_character_data);
                break;
            }
259
        }
260
261
262
263
        else if (name == "default")
        {
            self->m_key_default[self->m_active_key] = self->m_character_data;
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
264
265
266
267
268
    }

    static void
    on_character_data(void* user_data, const XML_Char* s, int len)
    {
269
270
        graphml_reader* self = static_cast<graphml_reader*>(user_data);
        self->m_character_data.append(s, len);
Tiago Peixoto's avatar
Tiago Peixoto committed
271
272
    }

273
    any
Tiago Peixoto's avatar
Tiago Peixoto committed
274
275
    handle_vertex(const std::string& v)
    {
276
277
278
279
280
281
282
        bool is_new = false;

        if (m_canonical_vertices)
        {
            size_t id;

            //strip leading "n" from name
283
            try
284
            {
285
                id = lexical_cast<size_t>(std::string(v,1));
286
287
288
            }
            catch (bad_lexical_cast)
            {
289
                std::stringstream s;
290
                s << "on line " << XML_GetCurrentLineNumber(m_parser)
291
292
293
                  << ", column " << XML_GetCurrentColumnNumber(m_parser)
                  << ": invalid vertex: " << v;
                throw parse_error(s.str());
294
            }
295

296
            if (m_integer_vertices)
297
            {
298
299
300
301
302
303
304
305
306
307
308
                is_new = (m_g.n_vertices() <= id);
                for (size_t i = m_g.n_vertices(); i <= id; ++i)
                    m_g.do_add_vertex();
            }
            else
            {
                while(id >= m_canonical_vertex.size())
                {
                    m_canonical_vertex.push_back(m_g.do_add_vertex());
                    is_new = true;
                }
309
310
311
312
313
314
            }
        }
        else
        {
            if (m_vertex.find(v) == m_vertex.end())
            {
315
316
                m_vertex[v] = m_g.do_add_vertex();
                is_new = true;
317
318
319
            }
        }

320
        any vd = get_vertex_descriptor(v);
321
322
323
        if (is_new)
        {
            std::map<std::string, std::string>::iterator iter;
324
            for (iter = m_key_default.begin(); iter != m_key_default.end();
325
                 ++iter)
326
            {
327
                if (m_keys[iter->first] == node_key)
328
                    handle_vertex_property(iter->first, vd, iter->second);
329
            }
330
331
            if (m_store_ids && !m_canonical_vertices)
                m_g.set_vertex_property("_graphml_vertex_id",
332
                                        vd, v, "string");
333
        }
334
        return vd;
Tiago Peixoto's avatar
Tiago Peixoto committed
335
336
    }

337
    any
Tiago Peixoto's avatar
Tiago Peixoto committed
338
339
    get_vertex_descriptor(const std::string& v)
    {
340
341
342
343
        if (m_canonical_vertices)
        {
            //strip leading "n" from name
            size_t id = lexical_cast<size_t>(std::string(v,1));
344
345
            if (m_integer_vertices)
                return id;
346
347
348
            return m_canonical_vertex[id];
        }
        else
349
        {
350
351
            return m_vertex[v];
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
352
353
    }

354
    any
355
356
    handle_edge(const std::string& id, const std::string& u,
                const std::string& v)
Tiago Peixoto's avatar
Tiago Peixoto committed
357
    {
358
359
        handle_vertex(u);
        handle_vertex(v);
Tiago Peixoto's avatar
Tiago Peixoto committed
360

361
362
363
        any source, target;
        source = get_vertex_descriptor(u);
        target = get_vertex_descriptor(v);
Tiago Peixoto's avatar
Tiago Peixoto committed
364

365
366
367
368
369
        any edge;
        bool added;
        tie(edge, added) = m_g.do_add_edge(source, target);
        if (!added)
            throw bad_parallel_edge(u, v);
Tiago Peixoto's avatar
Tiago Peixoto committed
370

371
372
373
374
        std::map<std::string, std::string>::iterator iter;
        for (iter = m_key_default.begin(); iter != m_key_default.end(); ++iter)
        {
            if (m_keys[iter->first] == edge_key)
375
                handle_edge_property(iter->first, edge, iter->second);
376
        }
377
378

        if (m_store_ids && !m_canonical_edges)
379
            m_g.set_edge_property("_graphml_edge_id", edge, id, "string");
380

381
        return edge;
382
    }
Tiago Peixoto's avatar
Tiago Peixoto committed
383

384
385
386
    void handle_edge_property(const std::string& key_id,
                              const any& descriptor,
                              const std::string& value)
Tiago Peixoto's avatar
Tiago Peixoto committed
387
    {
388
        try
389
        {
390
391
            m_g.set_edge_property(m_key_name[key_id], descriptor,
                              value, m_key_type[key_id]);
392
        }
393
        catch (parse_error &e)
394
        {
395
            std::stringstream s;
396
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
397
398
399
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
400
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
401
402
    }

403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
    void handle_vertex_property(const std::string& key_id,
                                const any& descriptor,
                                const std::string& value)
    {
        try
        {
            m_g.set_vertex_property(m_key_name[key_id], descriptor,
                                    value, m_key_type[key_id]);
        }
        catch (parse_error &e)
        {
            std::stringstream s;
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
        }
    }

    void handle_graph_property(const std::string& key_id,
                               const any&,
                               const std::string& value)
425
    {
426
427
428
429
430
431
432
433
434
435
436
437
438
        try
        {
            m_g.set_graph_property(m_key_name[key_id], value,
                                   m_key_type[key_id]);
        }
        catch (parse_error &e)
        {
            std::stringstream s;
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
        }
439
440
    }

441
    mutate_graph& m_g;
Tiago Peixoto's avatar
Tiago Peixoto committed
442
443
444
    std::map<std::string, key_kind> m_keys;
    std::map<std::string, std::string> m_key_name;
    std::map<std::string, std::string> m_key_type;
445
446
447
    std::map<std::string, std::string> m_key_default;
    std::map<std::string, any> m_vertex;
    std::vector<any> m_canonical_vertex;
448
449
450
451

    any m_active_descriptor;
    desc_kind m_descriptor_kind;

Tiago Peixoto's avatar
Tiago Peixoto committed
452
453
454
455
    std::string m_active_key;
    std::string m_character_data;
    bool m_canonical_vertices;
    bool m_canonical_edges;
456
    bool m_integer_vertices;
457
    bool m_store_ids;
458
    bool m_ignore_directedness;
459
    XML_Parser m_parser;
Tiago Peixoto's avatar
Tiago Peixoto committed
460
461
};

462
namespace boost
Tiago Peixoto's avatar
Tiago Peixoto committed
463
464
{
void
465
read_graphml(std::istream& in, mutate_graph& g, bool integer_vertices, bool store_ids)
466
{
467
    graphml_reader reader(g, integer_vertices, store_ids);
468
    reader.run(in);
Tiago Peixoto's avatar
Tiago Peixoto committed
469
470
}
}