graphml.cpp 14.6 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
// Copyright (C) 2006  Tiago de Paula Peixoto <tiago@skewed.de>
2
// Copyright (C) 2004  The Trustees of Indiana University.
Tiago Peixoto's avatar
Tiago Peixoto committed
3
//
4
5
6
// Use, modification and distribution is subject to the Boost Software
// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
Tiago Peixoto's avatar
Tiago Peixoto committed
7
8
9
10
11

//  Authors: Douglas Gregor
//           Andrew Lumsdaine
//           Tiago de Paula Peixoto

12
#include <boost/python.hpp>
13
#include <boost/variant.hpp>
14
15
#include <expat.h>
#include <boost/graph/graphml.hpp>
16
#include <boost/algorithm/string/replace.hpp>
17
18
#include <boost/archive/iterators/xml_escape.hpp>
#include <boost/archive/iterators/ostream_iterator.hpp>
19
20
#include <boost/archive/iterators/insert_linebreaks.hpp>
#include <sstream>
Tiago Peixoto's avatar
Tiago Peixoto committed
21

22
23
#include "base64.hh"

24
using namespace boost;
25
26
namespace boost
{
27

28
29
std::string protect_xml_string(const std::string& os)
{
30
31
32
33
34
35
    using namespace boost::archive::iterators;
    std::stringstream s;
    std::copy(xml_escape<const char*>(os.c_str()),
              xml_escape<const char*>(os.c_str()+os.size()),
              ostream_iterator<char>(s));
    return s.str();
36
37
}
}
Tiago Peixoto's avatar
Tiago Peixoto committed
38
39
40
41

class graphml_reader
{
public:
42
    graphml_reader(mutate_graph& g, bool integer_vertices, bool store_ids)
43
44
45
46
47
        : m_g(g),
          m_canonical_vertices(false),
          m_canonical_edges(false),
          m_integer_vertices(integer_vertices),
          m_store_ids(store_ids) { }
48

Tiago Peixoto's avatar
Tiago Peixoto committed
49
50
    void run(std::istream& in)
    {
51
        const int buffer_size = 4096;
52
53
54
55
        m_parser = XML_ParserCreateNS(0,'|');
        XML_SetElementHandler(m_parser, &on_start_element, &on_end_element);
        XML_SetCharacterDataHandler(m_parser, &on_character_data);
        XML_SetUserData(m_parser, this);
56
        char buffer[buffer_size];
57
58

        bool okay = true;
59
        do
60
61
        {
            in.read(buffer, buffer_size);
62
            okay = XML_Parse(m_parser, buffer, in.gcount(), in.gcount() == 0);
63
        }
64
        while (okay && in.good());
65

66
        if (!okay)
67
68
        {
            std::stringstream s;
69
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
70
71
              <<", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << XML_ErrorString(XML_GetErrorCode(m_parser));
72
73
            throw parse_error(s.str());
        }
74
        XML_ParserFree(m_parser);
Tiago Peixoto's avatar
Tiago Peixoto committed
75
76
77
78
    }

private:
    /// The kinds of keys. Not all of these are supported
79
80
81
    enum key_kind {
        graph_key,
        node_key,
82
83
84
        edge_key,
        hyperedge_key,
        port_key,
85
        endpoint_key,
86
        all_key
Tiago Peixoto's avatar
Tiago Peixoto committed
87
88
    };

89
90
91
92
93
94
95
96
    enum desc_kind
    {
        M_VERTEX_DESCRIPTOR,
        M_EDGE_DESCRIPTOR,
        M_GRAPH_DESCRIPTOR
    };


97
    static void
Tiago Peixoto's avatar
Tiago Peixoto committed
98
    on_start_element(void* user_data, const XML_Char *c_name,
99
                     const XML_Char **atts)
Tiago Peixoto's avatar
Tiago Peixoto committed
100
    {
101
102
103
        graphml_reader* self = static_cast<graphml_reader*>(user_data);

        std::string name(c_name);
104
        replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
105
106

        if (name == "edge")
107
108
        {
            std::string id;
109
            std::string source, target;
110
            while (*atts)
111
            {
112
113
114
115
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "id") id = value;
116
117
                else if (name == "source") source = value;
                else if (name == "target") target = value;
118
                else if (name == "directed")
119
                {
120
                    bool edge_is_directed = (value == "directed");
121
122
                    if (self->m_g.is_directed() != 2 &&
                        edge_is_directed != self->m_g.is_directed())
123
                    {
124
                        if (edge_is_directed)
125
126
127
                            throw directed_graph_error();
                        else
                            throw undirected_graph_error();
128
                    }
129
                    self->m_g.flip_directed(edge_is_directed);
130
                }
131
132
            }

133
134
            self->m_active_descriptor = self->handle_edge(id, source, target);
            self->m_descriptor_kind = M_EDGE_DESCRIPTOR;
135
        }
136
        else if (name == "node")
137
138
139
        {
            std::string id;

140
            while (*atts)
141
            {
142
143
                std::string name = *atts++;
                std::string value = *atts++;
144

145
                if (name == "id") id = value;
146
147
            }

148
149
            self->m_active_descriptor = self->handle_vertex(id);
            self->m_descriptor_kind = M_VERTEX_DESCRIPTOR;
150
151
        }
        else if (name == "data")
152
        {
153
            while (*atts)
154
155
156
157
158
159
160
            {
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "key") self->m_active_key = value;
            }
        }
161
        else if (name == "key")
162
163
        {
            std::string id;
164
165
166
167
            std::string key_name;
            std::string key_type;
            key_kind kind = all_key;

168
            while (*atts)
169
            {
170
171
172
173
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "id") id = value;
174
175
                else if (name == "attr.name") key_name = value;
                else if (name == "attr.type") key_type = value;
176
                else if (name == "for")
177
                {
178
179
180
181
182
183
184
                    if (value == "graph") kind = graph_key;
                    else if (value == "node") kind = node_key;
                    else if (value == "edge") kind = edge_key;
                    else if (value == "hyperedge") kind = hyperedge_key;
                    else if (value == "port") kind = port_key;
                    else if (value == "endpoint") kind = endpoint_key;
                    else if (value == "all") kind = all_key;
185
                    else
186
                    {
187
                        std::stringstream s;
188
189
190
                        s << "on line "
                          << XML_GetCurrentLineNumber(self->m_parser)
                          << ", column "
191
                          << XML_GetCurrentColumnNumber(self->m_parser)
192
193
                          << ": unrecognized key kind '" << value << "'";
                        throw parse_error(s.str());
194
195
                    }
                }
196
197
            }

198
199
200
201
            self->m_keys[id] = kind;
            self->m_key_name[id] = key_name;
            self->m_key_type[id] = key_type;
            self->m_active_key = id;
202
203
        }
        else if (name == "graph")
204
        {
205
            while (*atts)
206
            {
207
208
                std::string name = *atts++;
                std::string value = *atts++;
209
210

                if (name == "edgedefault")
211
212
                {
                    bool edge_is_directed = (value == "directed");
213
214
                    if ( self->m_g.is_directed() != 2 &&
                         edge_is_directed != self->m_g.is_directed())
215
                    {
216
                        if (edge_is_directed)
217
218
219
220
                            throw directed_graph_error();
                        else
                            throw undirected_graph_error();
                    }
221
                    self->m_g.flip_directed(edge_is_directed);
222
223
224
225
226
                }
                else if (name == "parse.nodeids")
                {
                    self->m_canonical_vertices = (value == "canonical");
                }
227
228
229
230
                else if (name == "parse.edgeids")
                {
                    self->m_canonical_edges = (value == "canonical");
                }
231
            }
232
233
            self->m_active_descriptor = any();
            self->m_descriptor_kind = M_GRAPH_DESCRIPTOR;
234
        }
235
236

        self->m_character_data.clear();
Tiago Peixoto's avatar
Tiago Peixoto committed
237
    }
238

Tiago Peixoto's avatar
Tiago Peixoto committed
239
240
241
    static void
    on_end_element(void* user_data, const XML_Char *c_name)
    {
242
        graphml_reader* self = static_cast<graphml_reader*>(user_data);
243

244
        std::string name(c_name);
245
        replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
246

247
248
        if (name == "data")
        {
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
            switch (self->m_descriptor_kind)
            {
            case M_VERTEX_DESCRIPTOR:
                self->handle_vertex_property(self->m_active_key, self->m_active_descriptor,
                                             self->m_character_data);
                break;
            case M_EDGE_DESCRIPTOR:
                self->handle_edge_property(self->m_active_key, self->m_active_descriptor,
                                           self->m_character_data);
                break;
            case M_GRAPH_DESCRIPTOR:
                self->handle_graph_property(self->m_active_key, self->m_active_descriptor,
                                             self->m_character_data);
                break;
            }
264
        }
265
266
267
268
        else if (name == "default")
        {
            self->m_key_default[self->m_active_key] = self->m_character_data;
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
269
270
271
272
273
    }

    static void
    on_character_data(void* user_data, const XML_Char* s, int len)
    {
274
275
        graphml_reader* self = static_cast<graphml_reader*>(user_data);
        self->m_character_data.append(s, len);
Tiago Peixoto's avatar
Tiago Peixoto committed
276
277
    }

278
    any
Tiago Peixoto's avatar
Tiago Peixoto committed
279
280
    handle_vertex(const std::string& v)
    {
281
282
283
284
285
286
287
        bool is_new = false;

        if (m_canonical_vertices)
        {
            size_t id;

            //strip leading "n" from name
288
            try
289
            {
290
                id = lexical_cast<size_t>(std::string(v,1));
291
292
293
            }
            catch (bad_lexical_cast)
            {
294
                std::stringstream s;
295
                s << "on line " << XML_GetCurrentLineNumber(m_parser)
296
297
298
                  << ", column " << XML_GetCurrentColumnNumber(m_parser)
                  << ": invalid vertex: " << v;
                throw parse_error(s.str());
299
            }
300

301
            if (m_integer_vertices)
302
            {
303
304
305
306
307
308
309
310
311
312
313
                is_new = (m_g.n_vertices() <= id);
                for (size_t i = m_g.n_vertices(); i <= id; ++i)
                    m_g.do_add_vertex();
            }
            else
            {
                while(id >= m_canonical_vertex.size())
                {
                    m_canonical_vertex.push_back(m_g.do_add_vertex());
                    is_new = true;
                }
314
315
316
317
318
319
            }
        }
        else
        {
            if (m_vertex.find(v) == m_vertex.end())
            {
320
321
                m_vertex[v] = m_g.do_add_vertex();
                is_new = true;
322
323
324
            }
        }

325
        any vd = get_vertex_descriptor(v);
326
327
328
        if (is_new)
        {
            std::map<std::string, std::string>::iterator iter;
329
            for (iter = m_key_default.begin(); iter != m_key_default.end();
330
                 ++iter)
331
            {
332
                if (m_keys[iter->first] == node_key)
333
                    handle_vertex_property(iter->first, vd, iter->second);
334
            }
335
336
            if (m_store_ids && !m_canonical_vertices)
                m_g.set_vertex_property("_graphml_vertex_id",
337
                                        vd, v, "string");
338
        }
339
        return vd;
Tiago Peixoto's avatar
Tiago Peixoto committed
340
341
    }

342
    any
Tiago Peixoto's avatar
Tiago Peixoto committed
343
344
    get_vertex_descriptor(const std::string& v)
    {
345
346
347
348
        if (m_canonical_vertices)
        {
            //strip leading "n" from name
            size_t id = lexical_cast<size_t>(std::string(v,1));
349
350
            if (m_integer_vertices)
                return id;
351
352
353
            return m_canonical_vertex[id];
        }
        else
354
        {
355
356
            return m_vertex[v];
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
357
358
    }

359
    any
360
361
    handle_edge(const std::string& id, const std::string& u,
                const std::string& v)
Tiago Peixoto's avatar
Tiago Peixoto committed
362
    {
363
364
        handle_vertex(u);
        handle_vertex(v);
Tiago Peixoto's avatar
Tiago Peixoto committed
365

366
367
368
        any source, target;
        source = get_vertex_descriptor(u);
        target = get_vertex_descriptor(v);
Tiago Peixoto's avatar
Tiago Peixoto committed
369

370
371
372
373
374
        any edge;
        bool added;
        tie(edge, added) = m_g.do_add_edge(source, target);
        if (!added)
            throw bad_parallel_edge(u, v);
Tiago Peixoto's avatar
Tiago Peixoto committed
375

376
377
378
379
        std::map<std::string, std::string>::iterator iter;
        for (iter = m_key_default.begin(); iter != m_key_default.end(); ++iter)
        {
            if (m_keys[iter->first] == edge_key)
380
                handle_edge_property(iter->first, edge, iter->second);
381
        }
382
383

        if (m_store_ids && !m_canonical_edges)
384
            m_g.set_edge_property("_graphml_edge_id", edge, id, "string");
385

386
        return edge;
387
    }
Tiago Peixoto's avatar
Tiago Peixoto committed
388

389
390
391
    void handle_edge_property(const std::string& key_id,
                              const any& descriptor,
                              const std::string& value)
Tiago Peixoto's avatar
Tiago Peixoto committed
392
    {
393
        try
394
        {
395
396
            m_g.set_edge_property(m_key_name[key_id], descriptor,
                              value, m_key_type[key_id]);
397
        }
398
        catch (parse_error &e)
399
        {
400
            std::stringstream s;
401
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
402
403
404
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
405
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
406
407
    }

408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
    void handle_vertex_property(const std::string& key_id,
                                const any& descriptor,
                                const std::string& value)
    {
        try
        {
            m_g.set_vertex_property(m_key_name[key_id], descriptor,
                                    value, m_key_type[key_id]);
        }
        catch (parse_error &e)
        {
            std::stringstream s;
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
        }
    }

    void handle_graph_property(const std::string& key_id,
                               const any&,
                               const std::string& value)
430
    {
431
432
433
434
435
436
437
438
439
440
441
442
443
        try
        {
            m_g.set_graph_property(m_key_name[key_id], value,
                                   m_key_type[key_id]);
        }
        catch (parse_error &e)
        {
            std::stringstream s;
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
        }
444
445
    }

446
    mutate_graph& m_g;
Tiago Peixoto's avatar
Tiago Peixoto committed
447
448
449
    std::map<std::string, key_kind> m_keys;
    std::map<std::string, std::string> m_key_name;
    std::map<std::string, std::string> m_key_type;
450
451
452
    std::map<std::string, std::string> m_key_default;
    std::map<std::string, any> m_vertex;
    std::vector<any> m_canonical_vertex;
453
454
455
456

    any m_active_descriptor;
    desc_kind m_descriptor_kind;

Tiago Peixoto's avatar
Tiago Peixoto committed
457
458
459
460
    std::string m_active_key;
    std::string m_character_data;
    bool m_canonical_vertices;
    bool m_canonical_edges;
461
    bool m_integer_vertices;
462
    bool m_store_ids;
463
    bool m_ignore_directedness;
464
    XML_Parser m_parser;
Tiago Peixoto's avatar
Tiago Peixoto committed
465
466
};

467
namespace boost
Tiago Peixoto's avatar
Tiago Peixoto committed
468
469
{
void
470
read_graphml(std::istream& in, mutate_graph& g, bool integer_vertices, bool store_ids)
471
{
472
    graphml_reader reader(g, integer_vertices, store_ids);
473
    reader.run(in);
Tiago Peixoto's avatar
Tiago Peixoto committed
474
475
}
}